aboutsummaryrefslogtreecommitdiff
path: root/data_analysis/maps.py
diff options
context:
space:
mode:
authorAdeB <adbrebs@gmail.com>2015-04-25 10:09:01 -0400
committerAdeB <adbrebs@gmail.com>2015-04-25 10:09:01 -0400
commit6a0b47a2fc7c4e800f14212ae81dbd56de17fa94 (patch)
tree3840e421fb77ad2218721d46ff662efa46e107f2 /data_analysis/maps.py
parent676af1086b141a7803626b040e7da03526b95406 (diff)
downloadtaxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.tar.gz
taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.zip
Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval.
Diffstat (limited to 'data_analysis/maps.py')
-rw-r--r--data_analysis/maps.py55
1 files changed, 55 insertions, 0 deletions
diff --git a/data_analysis/maps.py b/data_analysis/maps.py
new file mode 100644
index 0000000..0b37f37
--- /dev/null
+++ b/data_analysis/maps.py
@@ -0,0 +1,55 @@
+import cPickle
+import scipy
+import numpy as np
+import matplotlib.pyplot as plt
+
+import data
+
+
+def compute_number_coordinates():
+ train_it = data.train_it()
+
+ # Count the number of coordinates
+ n_coordinates = 0
+ for ride in train_it:
+ n_coordinates += len(ride[-1])
+ print n_coordinates
+
+ return n_coordinates
+
+
+def extract_coordinates(n_coordinates=None):
+ """Extract coordinates from the dataset and store them in a numpy array"""
+
+ if n_coordinates is None:
+ n_coordinates = compute_number_coordinates()
+
+ coordinates = np.zeros((n_coordinates, 2), dtype="float32")
+ train_it = data.train_it()
+
+ c = 0
+ for ride in train_it:
+ for point in ride[-1]:
+ coordinates[c] = point
+ c += 1
+
+ cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb"))
+
+
+def draw_map(coordinates, xrg, yrg):
+
+ hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
+
+ plt.imshow(np.log(hist))
+ plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf")
+
+ scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist))
+
+
+if __name__ == "__main__":
+ # extract_coordinates(n_coordinates=83360928)
+
+ coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb"))
+ xrg = [-8.75, -8.55]
+ yrg = [41.05, 41.25]
+ draw_map(coordinates, xrg, yrg)