From 404250df425df4f89d9edfe2357fc0cb7b8b77e6 Mon Sep 17 00:00:00 2001 From: AdeB Date: Thu, 16 Jul 2015 10:36:11 -0400 Subject: Update the heatmap script. --- data_analysis/maps.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'data_analysis') diff --git a/data_analysis/maps.py b/data_analysis/maps.py index 991f279..e951f23 100644 --- a/data_analysis/maps.py +++ b/data_analysis/maps.py @@ -1,13 +1,17 @@ import cPickle -import scipy import numpy as np import matplotlib.pyplot as plt +from fuel.schemes import ConstantScheme +from fuel.streams import DataStream + import data +from data.hdf5 import TaxiDataset, TaxiStream def compute_number_coordinates(): - train_it = data.train_it() + stream = TaxiDataset('train').get_example_stream() + train_it = stream.get_epoch_iterator() # Count the number of coordinates n_coordinates = 0 @@ -24,16 +28,20 @@ def extract_coordinates(n_coordinates=None): if n_coordinates is None: n_coordinates = compute_number_coordinates() + dataset = TaxiDataset('train') + stream = DataStream(dataset, iteration_scheme=ConstantScheme(1, dataset.num_examples)) + coordinates = np.zeros((n_coordinates, 2), dtype="float32") - train_it = data.train_it() + train_it = stream.get_epoch_iterator() c = 0 for ride in train_it: - for point in ride[-1]: + for point in zip(ride[2], ride[3]): coordinates[c] = point c += 1 + print c - cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb")) + cPickle.dump(coordinates, open(data.path + "/coordinates_array.pkl", "wb")) def draw_map(coordinates, xrg, yrg): @@ -47,9 +55,9 @@ def draw_map(coordinates, xrg, yrg): if __name__ == "__main__": - # extract_coordinates(n_coordinates=83360928) + extract_coordinates(n_coordinates=32502730) - coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb")) + coordinates = cPickle.load(open(data.path + "/coordinates_array.pkl", "rb")) xrg = [-8.75, -8.55] yrg = [41.05, 41.25] draw_map(coordinates, xrg, yrg) -- cgit v1.2.3 From 97e9ac0e15b890076af0cf469efbead89f8eb804 Mon Sep 17 00:00:00 2001 From: AdeB Date: Thu, 16 Jul 2015 10:38:45 -0400 Subject: Update number of coordinates function --- data_analysis/maps.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'data_analysis') diff --git a/data_analysis/maps.py b/data_analysis/maps.py index e951f23..d5db182 100644 --- a/data_analysis/maps.py +++ b/data_analysis/maps.py @@ -10,13 +10,14 @@ from data.hdf5 import TaxiDataset, TaxiStream def compute_number_coordinates(): - stream = TaxiDataset('train').get_example_stream() + dataset = TaxiDataset('train') + stream = DataStream(dataset, iteration_scheme=ConstantScheme(1, dataset.num_examples)) train_it = stream.get_epoch_iterator() # Count the number of coordinates n_coordinates = 0 for ride in train_it: - n_coordinates += len(ride[-1]) + n_coordinates += len(ride[2]) print n_coordinates return n_coordinates @@ -51,7 +52,7 @@ def draw_map(coordinates, xrg, yrg): hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg]) plt.imshow(np.log(hist)) - plt.savefig(data.DATA_PATH + "/analysis/xyhmap2.png") + plt.savefig(data.path + "/analysis/xyhmap2.png") if __name__ == "__main__": -- cgit v1.2.3 From b6566c010be7c871a5b6c199feaf1dfda0910ade Mon Sep 17 00:00:00 2001 From: AdeB Date: Thu, 16 Jul 2015 18:59:50 -0400 Subject: Fix a bug in the heatmap generation --- data_analysis/maps.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'data_analysis') diff --git a/data_analysis/maps.py b/data_analysis/maps.py index d5db182..2912c8d 100644 --- a/data_analysis/maps.py +++ b/data_analysis/maps.py @@ -2,22 +2,16 @@ import cPickle import numpy as np import matplotlib.pyplot as plt -from fuel.schemes import ConstantScheme -from fuel.streams import DataStream - import data -from data.hdf5 import TaxiDataset, TaxiStream +from data.hdf5 import taxi_it def compute_number_coordinates(): - dataset = TaxiDataset('train') - stream = DataStream(dataset, iteration_scheme=ConstantScheme(1, dataset.num_examples)) - train_it = stream.get_epoch_iterator() # Count the number of coordinates n_coordinates = 0 - for ride in train_it: - n_coordinates += len(ride[2]) + for ride in taxi_it('train'): + n_coordinates += len(ride['latitude']) print n_coordinates return n_coordinates @@ -29,17 +23,14 @@ def extract_coordinates(n_coordinates=None): if n_coordinates is None: n_coordinates = compute_number_coordinates() - dataset = TaxiDataset('train') - stream = DataStream(dataset, iteration_scheme=ConstantScheme(1, dataset.num_examples)) - coordinates = np.zeros((n_coordinates, 2), dtype="float32") - train_it = stream.get_epoch_iterator() c = 0 - for ride in train_it: - for point in zip(ride[2], ride[3]): + for ride in taxi_it('train'): + for point in zip(ride['latitude'], ride['longitude']): coordinates[c] = point c += 1 + print c cPickle.dump(coordinates, open(data.path + "/coordinates_array.pkl", "wb")) @@ -52,13 +43,14 @@ def draw_map(coordinates, xrg, yrg): hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg]) plt.imshow(np.log(hist)) + plt.gca().invert_yaxis() plt.savefig(data.path + "/analysis/xyhmap2.png") if __name__ == "__main__": - extract_coordinates(n_coordinates=32502730) + extract_coordinates(n_coordinates=83409386) coordinates = cPickle.load(open(data.path + "/coordinates_array.pkl", "rb")) - xrg = [-8.75, -8.55] - yrg = [41.05, 41.25] + xrg = [41.05, 41.25] + yrg = [-8.75, -8.55] draw_map(coordinates, xrg, yrg) -- cgit v1.2.3