Source code for writers

import pandas as pd
from itertools import product
import csv

import features_utilities as feat_ut
import clf_utilities as clf_ut
from config import config


[docs]def write_feature_params_info(fpath, params_names, params_vals): """ Creates a csv file defining the features parameters used in each of the \ features sets. Args: fpath (str): File path to write params_names (list): Contains the features parameters names params_vals (list): Contains a list with values for each feature \ parameter Returns: None """ params_info = {} for idx, params in enumerate(product(*params_vals)): features_params = dict(zip(params_names, params)) params_info[f'X_train_{idx}, X_test_{idx}'] = features_params with open(fpath, 'w') as f: writer = csv.writer(f) writer.writerow(['Feature sets', 'Feature parameters combination']) for k, v in params_info.items(): writer.writerow([k, v]) return
[docs]def write_feature_space(fpath, features_info=None, best_params=None): """ Creates a csv file containing the features configuration to be searched \ (namely included features as well as their parameters to be finetuned and \ whether normalizaton should be used or not). If *best_params* is given, \ then the csv will present the finetuned features parameters values instead. Args: fpath (str): File path to write features_info (list, optional): Contains (features, normalized or \ not) pairs best_params (dict, optional): Contains features as keys and the \ corresponding finetuned values as values Returns: None """ with open(fpath, 'w') as file: writer = csv.writer(file) writer.writerow(['Feature', 'Parameter', 'Parameter values', 'Normalized']) if features_info is None: included_features = config.included_adjacency_features + config.included_textual_features for feat in included_features: if feat not in feat_ut.features_params_map: writer.writerow([feat, '-', '-', True if feat in config.normalized_features else False]) else: param_name = feat_ut.features_params_map[feat] writer.writerow([feat, param_name, getattr(config, param_name), True if feat in config.normalized_features else False]) else: for f in features_info: feat, norm = f[0], f[1] if feat not in feat_ut.features_params_map: writer.writerow([feat, '-', '-', norm]) else: param_name = feat_ut.features_params_map[feat] writer.writerow([feat, param_name, best_params[param_name], norm]) return
[docs]def write_classifier_space(fpath, clf_name, best_params=None): """ Creates a csv file containing the chosen classifier's name as well as the \ hyperparameters space to be searched. If *best_params* is given, then the \ csv will present the finetuned hyperparameters values instead. Args: fpath (str): File path to write clf_name (str): Name of the utilized classifier best_params (dict, optional): Contains hyperparameters names as keys \ and the corresponding finetuned values as values Returns: None """ with open(fpath, 'w') as file: writer = csv.writer(file) writer.writerow(['Classifier', 'Parameters']) if best_params is None: writer.writerow([clf_name, clf_ut.clf_hyperparams_map[clf_name]]) else: writer.writerow([clf_name, best_params]) return
[docs]def write_evaluation_space(fpath): """ Creates a csv file that contains all the included classifiers as well as \ the hyperparameters space to be searched for each of them. Args: fpath (str): File path to write Returns: None """ with open(fpath, 'w') as file: writer = csv.writer(file) writer.writerow(['Classifier', 'Parameters']) for clf in config.included_classifiers: if clf not in clf_ut.clf_hyperparams_map: writer.writerow([clf, '-']) else: writer.writerow([clf, clf_ut.clf_hyperparams_map[clf]]) return
[docs]def write_evaluation_results(results_path, results_dict): """ Creates three csv files that present (a) full results (b) results per \ fold and (c) results per classifier of the algorithm_selection step. Args: results_path (str): Path to write results_dict (dict): Contains the algorithm_selection step's results Returns: None """ all_results_df = pd.DataFrame(results_dict) all_results_df.to_csv( results_path + '/all_results.csv', columns=['fold', 'feature_set', 'classifier', 'top_1_accuracy', 'top_5_accuracy', 'top_10_accuracy', 'f1_macro', 'f1_micro', 'f1_weighted', 'precision_weighted', 'recall_weighted'], index=False) fold_results_df = all_results_df.groupby(['fold', 'classifier']).mean() fold_results_df.to_csv(results_path + '/results_by_fold.csv') clf_results_df = fold_results_df.groupby(['fold', 'classifier']).sum().groupby(level=1).mean() clf_results_df.sort_values(by=['f1_weighted'], ascending=False).to_csv(results_path + '/results_by_classifier.csv') return
[docs]def write_finetuning_results(results_path, results_dict): """ Creates two csv files that present (a) full results and (b) results per \ feature set and classifier hyperparameters used of the model_selection \ step. Args: results_path (str): Path to write results_dict (dict): Contains the model_selection step's results Returns: None """ all_results_df = pd.DataFrame(results_dict) all_results_df.to_csv( results_path + '/all_results.csv', columns=['fold', 'feature_set', 'clf_params', 'top_1_accuracy', 'top_5_accuracy', 'top_10_accuracy', 'f1_macro', 'f1_micro', 'f1_weighted', 'precision_weighted', 'recall_weighted'], index=False) avg_results_df = all_results_df.groupby(['feature_set', 'clf_params']).mean() avg_results_df = avg_results_df.drop('fold', 1) avg_results_df.sort_values(by=['f1_weighted'], ascending=False).to_csv(results_path + '/results_by_feature_and_clf_params.csv') return
[docs]def write_predictions(fpath, poi_gdf, k_preds): """ Creates a csv file to present the top k predictions (in (predicted label, \ score) pairs). Args: fpath (str): File path to write poi_gdf (geopandas.GeoDataFrame): Contains the pois to which the \ predictions refer to k_preds (list): Contains (predicted label, score) pairs Returns: None """ with open(fpath, 'w', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow([ config.id_col, config.name_col, f'top_{config.k_preds}_predictions']) for poi in poi_gdf.itertuples(): writer.writerow([ getattr(poi, config.id_col), getattr(poi, config.name_col), [ k_pred for k_pred in k_preds[poi.Index * config.k_preds:poi.Index * config.k_preds + config.k_preds] ] ]) return