From 6a0b47a2fc7c4e800f14212ae81dbd56de17fa94 Mon Sep 17 00:00:00 2001 From: AdeB Date: Sat, 25 Apr 2015 10:09:01 -0400 Subject: Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval. --- data_analysis/maps.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 data_analysis/maps.py (limited to 'data_analysis/maps.py') diff --git a/data_analysis/maps.py b/data_analysis/maps.py new file mode 100644 index 0000000..0b37f37 --- /dev/null +++ b/data_analysis/maps.py @@ -0,0 +1,55 @@ +import cPickle +import scipy +import numpy as np +import matplotlib.pyplot as plt + +import data + + +def compute_number_coordinates(): + train_it = data.train_it() + + # Count the number of coordinates + n_coordinates = 0 + for ride in train_it: + n_coordinates += len(ride[-1]) + print n_coordinates + + return n_coordinates + + +def extract_coordinates(n_coordinates=None): + """Extract coordinates from the dataset and store them in a numpy array""" + + if n_coordinates is None: + n_coordinates = compute_number_coordinates() + + coordinates = np.zeros((n_coordinates, 2), dtype="float32") + train_it = data.train_it() + + c = 0 + for ride in train_it: + for point in ride[-1]: + coordinates[c] = point + c += 1 + + cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb")) + + +def draw_map(coordinates, xrg, yrg): + + hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg]) + + plt.imshow(np.log(hist)) + plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf") + + scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist)) + + +if __name__ == "__main__": + # extract_coordinates(n_coordinates=83360928) + + coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb")) + xrg = [-8.75, -8.55] + yrg = [41.05, 41.25] + draw_map(coordinates, xrg, yrg) -- cgit v1.2.3