Source code for writers

import pandas as pd
from itertools import product
import csv

import features_utilities as feat_ut
import clf_utilities as clf_ut
from config import config


[docs]def write_feature_params_info(fpath, params_names, params_vals):
    """
    Creates a csv file defining the features parameters used in each of the \
    features sets.

    Args:
        fpath (str): File path to write
        params_names (list): Contains the features parameters names
        params_vals (list): Contains a list with values for each feature \
            parameter

    Returns:
        None
    """
    params_info = {}
    for idx, params in enumerate(product(*params_vals)):
        features_params = dict(zip(params_names, params))
        params_info[f'X_train_{idx}, X_test_{idx}'] = features_params
    with open(fpath, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['Feature sets', 'Feature parameters combination'])
        for k, v in params_info.items():
            writer.writerow([k, v])
    return


[docs]def write_feature_space(fpath, features_info=None, best_params=None):
    """
    Creates a csv file containing the features configuration to be searched \
    (namely included features as well as their parameters to be finetuned and \
    whether normalizaton should be used or not). If *best_params* is given, \
    then the csv will present the finetuned features parameters values instead.

    Args:
        fpath (str): File path to write
        features_info (list, optional): Contains (features, normalized or \
            not) pairs
        best_params (dict, optional): Contains features as keys and the \
            corresponding finetuned values as values

    Returns:
        None
    """
    with open(fpath, 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['Feature', 'Parameter', 'Parameter values', 'Normalized'])
        if features_info is None:
            included_features = config.included_adjacency_features + config.included_textual_features
            for feat in included_features:
                if feat not in feat_ut.features_params_map:
                    writer.writerow([feat, '-', '-', True if feat in config.normalized_features else False])
                else:
                    param_name = feat_ut.features_params_map[feat]
                    writer.writerow([feat, param_name, getattr(config, param_name), True if feat in config.normalized_features else False])
        else:
            for f in features_info:
                feat, norm = f[0], f[1]
                if feat not in feat_ut.features_params_map:
                    writer.writerow([feat, '-', '-', norm])
                else:
                    param_name = feat_ut.features_params_map[feat]
                    writer.writerow([feat, param_name, best_params[param_name], norm])
    return


[docs]def write_classifier_space(fpath, clf_name, best_params=None):
    """
    Creates a csv file containing the chosen classifier's name as well as the \
    hyperparameters space to be searched. If *best_params* is given, then the \
    csv will present the finetuned hyperparameters values instead.

    Args:
        fpath (str): File path to write
        clf_name (str): Name of the utilized classifier
        best_params (dict, optional): Contains hyperparameters names as keys \
            and the corresponding finetuned values as values

    Returns:
        None
    """
    with open(fpath, 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['Classifier', 'Parameters'])
        if best_params is None:
            writer.writerow([clf_name, clf_ut.clf_hyperparams_map[clf_name]])
        else:
            writer.writerow([clf_name, best_params])
    return


[docs]def write_evaluation_space(fpath):
    """
    Creates a csv file that contains all the included classifiers as well as \
    the hyperparameters space to be searched for each of them.

    Args:
        fpath (str): File path to write

    Returns:
        None
    """
    with open(fpath, 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['Classifier', 'Parameters'])
        for clf in config.included_classifiers:
            if clf not in clf_ut.clf_hyperparams_map:
                writer.writerow([clf, '-'])
            else:
                writer.writerow([clf, clf_ut.clf_hyperparams_map[clf]])
    return


[docs]def write_evaluation_results(results_path, results_dict):
    """
    Creates three csv files that present (a) full results (b) results per \
    fold and (c) results per classifier of the algorithm_selection step.

    Args:
        results_path (str): Path to write
        results_dict (dict): Contains the algorithm_selection step's results

    Returns:
        None
    """
    all_results_df = pd.DataFrame(results_dict)
    all_results_df.to_csv(
        results_path + '/all_results.csv',
        columns=['fold', 'feature_set', 'classifier',
                 'top_1_accuracy', 'top_5_accuracy', 'top_10_accuracy',
                 'f1_macro', 'f1_micro', 'f1_weighted',
                 'precision_weighted', 'recall_weighted'],
        index=False)

    fold_results_df = all_results_df.groupby(['fold', 'classifier']).mean()
    fold_results_df.to_csv(results_path + '/results_by_fold.csv')

    clf_results_df = fold_results_df.groupby(['fold', 'classifier']).sum().groupby(level=1).mean()
    clf_results_df.sort_values(by=['f1_weighted'], ascending=False).to_csv(results_path + '/results_by_classifier.csv')
    return


[docs]def write_finetuning_results(results_path, results_dict):
    """
    Creates two csv files that present (a) full results and (b) results per \
    feature set and classifier hyperparameters used of the model_selection \
    step.

    Args:
        results_path (str): Path to write
        results_dict (dict): Contains the model_selection step's results

    Returns:
        None
    """
    all_results_df = pd.DataFrame(results_dict)
    all_results_df.to_csv(
        results_path + '/all_results.csv',
        columns=['fold', 'feature_set', 'clf_params',
                 'top_1_accuracy', 'top_5_accuracy', 'top_10_accuracy',
                 'f1_macro', 'f1_micro', 'f1_weighted',
                 'precision_weighted', 'recall_weighted'],
        index=False)

    avg_results_df = all_results_df.groupby(['feature_set', 'clf_params']).mean()
    avg_results_df = avg_results_df.drop('fold', 1)
    avg_results_df.sort_values(by=['f1_weighted'], ascending=False).to_csv(results_path + '/results_by_feature_and_clf_params.csv')
    return


[docs]def write_predictions(fpath, poi_gdf, k_preds):
    """
    Creates a csv file to present the top k predictions (in (predicted label, \
    score) pairs).

    Args:
        fpath (str): File path to write
        poi_gdf (geopandas.GeoDataFrame): Contains the pois to which the \
            predictions refer to
        k_preds (list): Contains (predicted label, score) pairs

    Returns:
        None
    """
    with open(fpath, 'w', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([
            config.id_col,
            config.name_col,
            f'top_{config.k_preds}_predictions'])
        for poi in poi_gdf.itertuples():
            writer.writerow([
                getattr(poi, config.id_col),
                getattr(poi, config.name_col),
                [
                    k_pred
                    for k_pred in k_preds[poi.Index *
                                          config.k_preds:poi.Index *
                                          config.k_preds + config.k_preds]
                ]
            ])
    return