import pandas as pd
import csv
import os
from geocoding import clf_utilities as clf_ut
from geocoding.config import Config
[docs]def write_feats_space(fpath):
"""
Writes the features configuration in *fpath*.
Args:
fpath (str): Path to write
Returns:
None
"""
with open(fpath, 'w') as file:
writer = csv.writer(file)
writer.writerow(['feature', 'normalized'])
for f in Config.included_features:
writer.writerow([f, True if f in Config.normalized_features else False])
[docs]def write_clf_space(fpath, clf_name, best_params=None):
"""
Writes *clf_name* classifier configuration in *fpath*. If *best_params* \
is given then writes the best performing configuration of *clf_name*.
Args:
fpath (str): Path to write
clf_name (str): Name of classifier to consider
best_params (dict, optional): Has hyperparametrs as keys and the \
corresponding values as values
Returns:
None
"""
with open(fpath, 'w') as file:
writer = csv.writer(file)
writer.writerow(['classifier', 'parameters'])
if best_params is None:
writer.writerow([clf_name, clf_ut.clf_hparams_map[clf_name][0]])
else:
writer.writerow([clf_name, best_params])
[docs]def write_results(results_path, results, step):
"""
Writes full and averaged experiment results.
Args:
results_path (str): Path to write
results (dict): Contains metrics as keys and the corresponding values \
values
step (str): Defines the experiment step
Returns:
None
"""
col = 'classifier' if step == 'algorithm_selection' else 'clf_params'
all_results_df = pd.DataFrame(results)
all_results_df.to_csv(
os.path.join(results_path, 'all_results.csv'),
columns=['fold', col, 'accuracy', 'f1_macro', 'f1_micro', 'f1_weighted'],
index=False
)
avg_results_df = all_results_df.groupby(col).mean()
avg_results_df.drop('fold', axis=1, inplace=True)
avg_results_df.sort_values(by=['accuracy'], ascending=False, inplace=True)
avg_results_df.to_csv(os.path.join(results_path, f'results_by_{col}.csv'))
[docs]def write_predictions(fpath, df, preds):
"""
Creates a csv file to present the predictions (in (predicted label, \
score) pairs).
Args:
fpath (str): File path to write
df (pandas.DataFrame): Contains the data points to which the \
predictions refer to
preds (list): Contains (predicted label, score) pairs
Returns:
None
"""
n_services = len(Config.services)
with open(fpath, 'w', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['address', f'predictions'])
for i in df.itertuples():
writer.writerow([
i.address,
[
pred
for pred in preds[i.Index * n_services:i.Index * n_services + n_services]
]
])