import numpy as np
from shapely.geometry import Point
import pickle
from sklearn.neighbors import KDTree
[docs]def create_poi_index(poi_gdf, path):
"""
Creates spatial index containing the pois given.
Args:
poi_gdf (geopandas.GeoDataFrame) : Contains pois to be stored in the \
index
path (str): Path to save the index
Returns:
None
"""
poi_coords = poi_gdf[['lon', 'lat']].values
poi_index = KDTree(poi_coords)
pickle.dump(poi_index, open(path, 'wb'))
return
[docs]def get_classes_in_radius_bln(poi_gdf, poi_index_path, nlabels, label_map, thr):
"""
Creates a features array. For each poi *p* (each row) the array will
contain 1 (True) in column *c*, if there is at least one poi of category *c*
inside *p*'s defined radius.
Args:
poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
features will be created
poi_index_path (str): Path to the stored index
nlabels (int): Number of poi categories
label_map (list): A list containing the labels of the train pois
thr (float): Radius to be searched (in meters)
Returns:
numpy.ndarray: The features array of shape (n_samples, n_features), \
here (len(poi_gdf), nlabels)
"""
poi_index = pickle.load(open(poi_index_path, 'rb'))
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
result_pois_idxs = poi_index.query_radius(poi_coords, r=thr)[0]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
X[poi.Index][rpi_label] = 1
return X
[docs]def get_classes_in_radius_cnt(poi_gdf, poi_index_path, nlabels, label_map, thr):
"""
Creates a features array. For each poi *p* (each row) the array will
contain an integer in column *c*, representing the number of pois of
category *c* inside *p*'s defined radius.
Args:
poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
features will be created
poi_index_path (str): Path to the stored index
nlabels (int): Number of poi categories
label_map (list): A list containing the labels of the train pois
thr (float): Radius to be searched (in meters)
Returns:
numpy.ndarray: The features array of shape (n_samples, n_features), \
here (len(poi_gdf), nlabels)
"""
poi_index = pickle.load(open(poi_index_path, 'rb'))
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
class_cnt = dict((c, 0) for c in range(nlabels))
poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
result_pois_idxs = poi_index.query_radius(poi_coords, r=thr)[0]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
class_cnt[rpi_label] += 1
for k, v in class_cnt.items():
X[poi.Index][k] = v
return X
[docs]def get_classes_in_street_and_radius_bln(poi_gdf, street_gdf, pois_by_street, nlabels, label_map, geometry_map, thr):
"""
Creates a features array. For each poi *p*, the nearest street to *p* is \
identified and the pois of this street are kept. These pois are then \
filtered and only those which are inside *p*'s defined radius are \
considered (e.g. a set of pois *P*). Finally, for each poi *p* (each row) \
the array will contain 1 (True) in column *c*, if there is at least one \
poi of category *c* among pois in *P*.
Args:
poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
features will be created
street_gdf (geopandas.GeoDataFrame): Contains all streets extracted \
from OSM, along with their geometries
pois_by_street (dict): Has streets ids as keys and a list containing \
the pois which belong to each street as values
nlabels (int): Number of poi categories
label_map (list): A list containing the labels of the train pois
geometry_map (list): A list containing the geometries of the train pois
thr (float): Radius to be searched (in meters)
Returns:
numpy.ndarray: The features array of shape (n_samples, n_features), \
here (len(poi_gdf), nlabels)
"""
street_index = street_gdf.sindex
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
poi_coords = (poi.lon, poi.lat)
candidates = list(street_index.nearest(poi_coords))
nearest = candidates[np.argmin([
Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
for c in candidates
])]
result_pois_idxs = [
poi_idx for poi_idx in pois_by_street[nearest]
if poi.geometry.distance(geometry_map[poi_idx]) < thr]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
X[poi.Index][rpi_label] = 1
return X
[docs]def get_classes_in_street_and_radius_cnt(poi_gdf, street_gdf, pois_by_street, nlabels, label_map, geometry_map, thr):
"""
Creates a features array. For each poi *p*, the nearest street to *p* is \
identified and the pois of this street are kept. These pois are then \
filtered and only those which are inside *p*'s defined radius are \
considered (e.g. a set of pois *P*). Finally, for each poi *p* (each row) \
the array will contain an integer in column *c*, representing the number \
of pois of category *c* among pois in *P*.
Args:
poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
features will be created
street_gdf (geopandas.GeoDataFrame): Contains all streets extracted \
from OSM, along with their geometries
pois_by_street (dict): Has streets ids as keys and a list containing \
the pois which belong to each street as values
nlabels (int): Number of poi categories
label_map (list): A list containing the labels of the train pois
geometry_map (list): A list containing the geometries of the train pois
thr (float): Radius to be searched (in meters)
Returns:
numpy.ndarray: The features array of shape (n_samples, n_features), \
here (len(poi_gdf), nlabels)
"""
street_index = street_gdf.sindex
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
poi_coords = (poi.lon, poi.lat)
candidates = list(street_index.nearest(poi_coords))
nearest = candidates[np.argmin([
Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
for c in candidates
])]
class_cnt = dict((c, 0) for c in range(nlabels))
result_pois_idxs = [
poi_idx for poi_idx in pois_by_street[nearest]
if poi.geometry.distance(geometry_map[poi_idx]) < thr]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
class_cnt[rpi_label] += 1
for k, v in class_cnt.items():
X[poi.Index][k] = v
return X
[docs]def get_classes_in_neighbors_bln(poi_gdf, poi_index_path, nlabels, label_map, k):
"""
Creates a features array. For each poi *p* (each row) the array will \
contain 1 (True) in column *c*, if there is at least one poi of category \
*c* among the *k* nearest neighbors of *p*.
Args:
poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
features will be created
poi_index_path (str): Path to the stored index
nlabels (int): Number of poi categories
label_map (list): A list containing the labels of the train pois
k (int): Number of nearest neighbors to take into account
Returns:
numpy.ndarray: The features array of shape (n_samples, n_features), \
here (len(poi_gdf), nlabels)
"""
poi_index = pickle.load(open(poi_index_path, 'rb'))
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
result_pois_idxs = poi_index.query(poi_coords, k=k)[1][0]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
X[poi.Index][rpi_label] = 1
return X
[docs]def get_classes_in_neighbors_cnt(poi_gdf, poi_index_path, nlabels, label_map, k):
"""
Creates a features array. For each poi *p* (each row) the array will \
contain an integer in column *c*, representing the number of pois of \
category *c* among the *k* nearest neighbors of *p*.
Args:
poi_gdf (geopandas.GeoDataFrame): Contains pois for which the \
features will be created
poi_index_path (str): Path to the stored index
nlabels (int): Number of poi categories
label_map (list): A list containing the labels of the train pois
k (int): Number of nearest neighbors to take into account
Returns:
numpy.ndarray: The features array of shape (n_samples, n_features), \
here (len(poi_gdf), nlabels)
"""
poi_index = pickle.load(open(poi_index_path, 'rb'))
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
class_cnt = dict((c, 0) for c in range(nlabels))
poi_coords = np.array([poi.lon, poi.lat]).reshape(1, -1)
result_pois_idxs = poi_index.query(poi_coords, k=k)[1][0]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
class_cnt[rpi_label] += 1
for k, v in class_cnt.items():
X[poi.Index][k] = v
return X
def get_classes_in_street_radius_bln(poi_gdf, street_gdf, nlabels, label_map, geometry_map, thr):
street_index = street_gdf.sindex
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
poi_coords = (poi.lon, poi.lat)
candidates = list(street_index.nearest(poi_coords))
nearest = candidates[np.argmin([
Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
for c in candidates
])]
result_pois_idxs = [
i for i, geom in enumerate(geometry_map)
if street_gdf.iloc[nearest]['geometry'].distance(geom) < thr]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
X[poi.Index][rpi_label] = 1
return X
def get_classes_in_street_radius_cnt(poi_gdf, street_gdf, nlabels, label_map, geometry_map, thr):
street_index = street_gdf.sindex
X = np.zeros((len(poi_gdf), nlabels))
for poi in poi_gdf.itertuples():
poi_coords = (poi.lon, poi.lat)
candidates = list(street_index.nearest(poi_coords))
nearest = candidates[np.argmin([
Point(poi_coords).distance(street_gdf.iloc[c]['geometry'])
for c in candidates
])]
class_cnt = dict((c, 0) for c in range(nlabels))
result_pois_idxs = [
i for i, geom in enumerate(geometry_map)
if street_gdf.iloc[nearest]['geometry'].distance(geom) < thr]
for rpi in result_pois_idxs:
rpi_label = label_map[rpi]
class_cnt[rpi_label] += 1
for k, v in class_cnt.items():
X[poi.Index][k] = v
return X