Source code for adjacency_features

import numpy as np
from shapely.geometry import Point
import pickle

from sklearn.neighbors import KDTree


[docs]def create_poi_index(poi_gdf, path):
    """
    Creates spatial index containing the pois given.

    Args:
        poi_gdf (geopandas.GeoDataFrame) : Contains pois to be stored in the \
            index
        path (str): Path to save the index

    Returns:
        None
    """
    poi_coords = poi_gdf[['lon', 'lat']].values
    poi_index = KDTree(poi_coords)
    pickle.dump(poi_index, open(path, 'wb'))
    return


[docs]def get_classes_in_radius_bln(poi_gdf, poi_index_path, nlabels, label_map, thr):
    """
    Creates a features array. For each poi *p* (each row) the array will
    contain 1 (True) in column *c*, if there is at least one poi of category *c*
    inside *p*'s defined radius.

    Args:
        poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
            features will be created
        poi_index_path (str): Path to the stored index
        nlabels (int): Number of poi categories
        label_map (list): A list containing the labels of the train pois
        thr (float): Radius to be searched (in meters)

    Returns:
        numpy.ndarray: The features array of shape (n_samples, n_features), \
            here (len(poi_gdf), nlabels)
    """
    poi_index = pickle.load(open(poi_index_path, 'rb'))
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
        result_pois_idxs = poi_index.query_radius(poi_coords, r=thr)[0]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            X[poi.Index][rpi_label] = 1
    return X


[docs]def get_classes_in_radius_cnt(poi_gdf, poi_index_path, nlabels, label_map, thr):
    """
    Creates a features array. For each poi *p* (each row) the array will
    contain an integer in column *c*, representing the number of pois of
    category *c* inside *p*'s defined radius.

    Args:
        poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
            features will be created
        poi_index_path (str): Path to the stored index
        nlabels (int): Number of poi categories
        label_map (list): A list containing the labels of the train pois
        thr (float): Radius to be searched (in meters)

    Returns:
        numpy.ndarray: The features array of shape (n_samples, n_features), \
            here (len(poi_gdf), nlabels)
    """
    poi_index = pickle.load(open(poi_index_path, 'rb'))
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        class_cnt = dict((c, 0) for c in range(nlabels))
        poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
        result_pois_idxs = poi_index.query_radius(poi_coords, r=thr)[0]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            class_cnt[rpi_label] += 1
        for k, v in class_cnt.items():
            X[poi.Index][k] = v
    return X


[docs]def get_classes_in_street_and_radius_bln(poi_gdf, street_gdf, pois_by_street, nlabels, label_map, geometry_map, thr):
    """
    Creates a features array. For each poi *p*, the nearest street to *p* is \
    identified and the pois of this street are kept. These pois are then \
    filtered and only those which are inside *p*'s defined radius are \
    considered (e.g. a set of pois *P*). Finally, for each poi *p* (each row) \
    the array will contain 1 (True) in column *c*, if there is at least one \
    poi of category *c* among pois in *P*.

    Args:
        poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
            features will be created
        street_gdf (geopandas.GeoDataFrame): Contains all streets extracted \
            from OSM, along with their geometries
        pois_by_street (dict): Has streets ids as keys and a list containing \
            the pois which belong to each street as values
        nlabels (int): Number of poi categories
        label_map (list): A list containing the labels of the train pois
        geometry_map (list): A list containing the geometries of the train pois
        thr (float): Radius to be searched (in meters)

    Returns:
        numpy.ndarray: The features array of shape (n_samples, n_features), \
            here (len(poi_gdf), nlabels)
    """
    street_index = street_gdf.sindex
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        poi_coords = (poi.lon, poi.lat)
        candidates = list(street_index.nearest(poi_coords))
        nearest = candidates[np.argmin([
            Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
            for c in candidates
        ])]
        result_pois_idxs = [
            poi_idx for poi_idx in pois_by_street[nearest]
            if poi.geometry.distance(geometry_map[poi_idx]) < thr]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            X[poi.Index][rpi_label] = 1
    return X


[docs]def get_classes_in_street_and_radius_cnt(poi_gdf, street_gdf, pois_by_street, nlabels, label_map, geometry_map, thr):
    """
    Creates a features array. For each poi *p*, the nearest street to *p* is \
    identified and the pois of this street are kept. These pois are then \
    filtered and only those which are inside *p*'s defined radius are \
    considered (e.g. a set of pois *P*). Finally, for each poi *p* (each row) \
    the array will contain an integer in column *c*, representing the number \
    of pois of category *c* among pois in *P*.

    Args:
        poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
            features will be created
        street_gdf (geopandas.GeoDataFrame): Contains all streets extracted \
            from OSM, along with their geometries
        pois_by_street (dict): Has streets ids as keys and a list containing \
            the pois which belong to each street as values
        nlabels (int): Number of poi categories
        label_map (list): A list containing the labels of the train pois
        geometry_map (list): A list containing the geometries of the train pois
        thr (float): Radius to be searched (in meters)

    Returns:
        numpy.ndarray: The features array of shape (n_samples, n_features), \
            here (len(poi_gdf), nlabels)
    """
    street_index = street_gdf.sindex
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        poi_coords = (poi.lon, poi.lat)
        candidates = list(street_index.nearest(poi_coords))
        nearest = candidates[np.argmin([
            Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
            for c in candidates
        ])]
        class_cnt = dict((c, 0) for c in range(nlabels))
        result_pois_idxs = [
            poi_idx for poi_idx in pois_by_street[nearest]
            if poi.geometry.distance(geometry_map[poi_idx]) < thr]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            class_cnt[rpi_label] += 1
        for k, v in class_cnt.items():
            X[poi.Index][k] = v
    return X


[docs]def get_classes_in_neighbors_bln(poi_gdf, poi_index_path, nlabels, label_map, k):
    """
    Creates a features array. For each poi *p* (each row) the array will \
    contain 1 (True) in column *c*, if there is at least one poi of category \
    *c* among the *k* nearest neighbors of *p*.

    Args:
        poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
            features will be created
        poi_index_path (str): Path to the stored index
        nlabels (int): Number of poi categories
        label_map (list): A list containing the labels of the train pois
        k (int): Number of nearest neighbors to take into account

    Returns:
        numpy.ndarray: The features array of shape (n_samples, n_features), \
            here (len(poi_gdf), nlabels)
    """
    poi_index = pickle.load(open(poi_index_path, 'rb'))
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
        result_pois_idxs = poi_index.query(poi_coords, k=k)[1][0]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            X[poi.Index][rpi_label] = 1
    return X


[docs]def get_classes_in_neighbors_cnt(poi_gdf, poi_index_path, nlabels, label_map, k):
    """
    Creates a features array. For each poi *p* (each row) the array will \
    contain an integer in column *c*, representing the number of pois of \
    category *c* among the *k* nearest neighbors of *p*.

    Args:
        poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
            features will be created
        poi_index_path (str): Path to the stored index
        nlabels (int): Number of poi categories
        label_map (list): A list containing the labels of the train pois
        k (int): Number of nearest neighbors to take into account

    Returns:
        numpy.ndarray: The features array of shape (n_samples, n_features), \
            here (len(poi_gdf), nlabels)
    """
    poi_index = pickle.load(open(poi_index_path, 'rb'))
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        class_cnt = dict((c, 0) for c in range(nlabels))
        poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
        result_pois_idxs = poi_index.query(poi_coords, k=k)[1][0]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            class_cnt[rpi_label] += 1
        for k, v in class_cnt.items():
            X[poi.Index][k] = v
    return X


def get_classes_in_street_radius_bln(poi_gdf, street_gdf, nlabels, label_map, geometry_map, thr):
    street_index = street_gdf.sindex
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        poi_coords = (poi.lon, poi.lat)
        candidates = list(street_index.nearest(poi_coords))
        nearest = candidates[np.argmin([
            Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
            for c in candidates
        ])]
        result_pois_idxs = [
            i for i, geom in enumerate(geometry_map)
            if street_gdf.iloc[nearest]['geometry'].distance(geom) < thr]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            X[poi.Index][rpi_label] = 1
    return X


def get_classes_in_street_radius_cnt(poi_gdf, street_gdf, nlabels, label_map, geometry_map, thr):
    street_index = street_gdf.sindex
    X = np.zeros((len(poi_gdf), nlabels))
    for poi in poi_gdf.itertuples():
        poi_coords = (poi.lon, poi.lat)
        candidates = list(street_index.nearest(poi_coords))
        nearest = candidates[np.argmin([
            Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
            for c in candidates
        ])]
        class_cnt = dict((c, 0) for c in range(nlabels))
        result_pois_idxs = [
            i for i, geom in enumerate(geometry_map)
            if street_gdf.iloc[nearest]['geometry'].distance(geom) < thr]
        for rpi in result_pois_idxs:
            rpi_label = label_map[rpi]
            class_cnt[rpi_label] += 1
        for k, v in class_cnt.items():
            X[poi.Index][k] = v
    return X