Source code for geocoding.writers

import pandas as pd
import csv
import os

from geocoding import clf_utilities as clf_ut
from geocoding.config import Config


[docs]def write_feats_space(fpath):
    """
    Writes the features configuration in *fpath*.

    Args:
        fpath (str): Path to write

    Returns:
        None
    """
    with open(fpath, 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['feature', 'normalized'])
        for f in Config.included_features:
            writer.writerow([f, True if f in Config.normalized_features else False])


[docs]def write_clf_space(fpath, clf_name, best_params=None):
    """
    Writes *clf_name* classifier configuration in *fpath*. If *best_params* \
    is given then writes the best performing configuration of *clf_name*.

    Args:
        fpath (str): Path to write
        clf_name (str): Name of classifier to consider
        best_params (dict, optional): Has hyperparametrs as keys and the \
            corresponding values as values

    Returns:
        None
    """
    with open(fpath, 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['classifier', 'parameters'])
        if best_params is None:
            writer.writerow([clf_name, clf_ut.clf_hparams_map[clf_name][0]])
        else:
            writer.writerow([clf_name, best_params])


[docs]def write_results(results_path, results, step):
    """
    Writes full and averaged experiment results.

    Args:
        results_path (str): Path to write
        results (dict): Contains metrics as keys and the corresponding values \
            values
        step (str): Defines the experiment step

    Returns:
        None
    """
    col = 'classifier' if step == 'algorithm_selection' else 'clf_params'

    all_results_df = pd.DataFrame(results)
    all_results_df.to_csv(
        os.path.join(results_path, 'all_results.csv'),
        columns=['fold', col, 'accuracy', 'f1_macro', 'f1_micro', 'f1_weighted'],
        index=False
    )

    avg_results_df = all_results_df.groupby(col).mean()
    avg_results_df.drop('fold', axis=1, inplace=True)
    avg_results_df.sort_values(by=['accuracy'], ascending=False, inplace=True)
    avg_results_df.to_csv(os.path.join(results_path, f'results_by_{col}.csv'))


[docs]def write_predictions(fpath, df, preds):
    """
    Creates a csv file to present the predictions (in (predicted label, \
    score) pairs).

    Args:
        fpath (str): File path to write
        df (pandas.DataFrame): Contains the data points to which the \
            predictions refer to
        preds (list): Contains (predicted label, score) pairs

    Returns:
        None
    """
    n_services = len(Config.services)
    with open(fpath, 'w', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['address', f'predictions'])
        for i in df.itertuples():
            writer.writerow([
                i.address,
                [
                    pred
                    for pred in preds[i.Index * n_services:i.Index * n_services + n_services]
                ]
            ])