import pandas as pd
import argparse
import os
import shutil
import pickle
import time
from geocoding import features_utilities as feat_ut, clf_utilities as clf_ut, writers as wrtrs
from geocoding.config import Config
[docs]def main():
"""
Implements the fifth step of the experiment pipeline. This step loads a \
pickled trained model from the previous step and deploys it in order to \
make predictions on a test dataset.
Returns:
None
"""
# Construct argument parser and parse arguments
ap = argparse.ArgumentParser()
ap.add_argument('-fpath', required=True)
ap.add_argument('-experiment_path', required=True)
args = vars(ap.parse_args())
features_path = os.path.join(Config.base_dir, 'experiments', args['experiment_path'], 'features_extraction_results')
model_training_path = os.path.join(Config.base_dir, 'experiments', args['experiment_path'], 'model_training_results')
for path in [features_path, model_training_path]:
if os.path.exists(path) is False:
print('No such file:', path)
return
t1 = time.time()
results_path = os.path.join(Config.base_dir, 'experiments', args['experiment_path'], 'model_deployment_results')
if os.path.exists(results_path):
shutil.rmtree(results_path)
os.makedirs(results_path)
os.makedirs(os.path.join(results_path, 'features'))
df = feat_ut.load_points_df(os.path.join(Config.base_dir, args['fpath']))
encoder = pickle.load(open(os.path.join(features_path, 'encoder.pkl'), 'rb'))
df, _ = feat_ut.encode_labels(df, encoder)
features = list(pd.read_csv(os.path.join(features_path, 'included_features.csv'))['feature'])
feat_ut.get_required_external_files(df, results_path, features)
X_test = feat_ut.create_test_features(df, results_path, os.path.join(model_training_path, 'pickled_objects'), results_path, features)
model = pickle.load(open(os.path.join(model_training_path, 'model.pkl'), 'rb'))
preds = clf_ut.get_predictions(model, X_test)
encoder = pickle.load(open(os.path.join(features_path, 'encoder.pkl'), 'rb'))
preds = clf_ut.inverse_transform_labels(encoder, preds)
wrtrs.write_predictions(os.path.join(results_path, 'predictions.csv'), df, preds)
print(f'Model deployment done in {time.time() - t1:.3f} sec.')
return
if __name__ == "__main__":
main()