aboutsummaryrefslogtreecommitdiff
path: root/data_analysis
diff options
context:
space:
mode:
authorAdeB <adbrebs@gmail.com>2015-04-25 10:09:01 -0400
committerAdeB <adbrebs@gmail.com>2015-04-25 10:09:01 -0400
commit6a0b47a2fc7c4e800f14212ae81dbd56de17fa94 (patch)
tree3840e421fb77ad2218721d46ff662efa46e107f2 /data_analysis
parent676af1086b141a7803626b040e7da03526b95406 (diff)
downloadtaxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.tar.gz
taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.zip
Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval.
Diffstat (limited to 'data_analysis')
-rw-r--r--data_analysis/maps.py55
-rw-r--r--data_analysis/maps_old.py29
2 files changed, 84 insertions, 0 deletions
diff --git a/data_analysis/maps.py b/data_analysis/maps.py
new file mode 100644
index 0000000..0b37f37
--- /dev/null
+++ b/data_analysis/maps.py
@@ -0,0 +1,55 @@
+import cPickle
+import scipy
+import numpy as np
+import matplotlib.pyplot as plt
+
+import data
+
+
+def compute_number_coordinates():
+ train_it = data.train_it()
+
+ # Count the number of coordinates
+ n_coordinates = 0
+ for ride in train_it:
+ n_coordinates += len(ride[-1])
+ print n_coordinates
+
+ return n_coordinates
+
+
+def extract_coordinates(n_coordinates=None):
+ """Extract coordinates from the dataset and store them in a numpy array"""
+
+ if n_coordinates is None:
+ n_coordinates = compute_number_coordinates()
+
+ coordinates = np.zeros((n_coordinates, 2), dtype="float32")
+ train_it = data.train_it()
+
+ c = 0
+ for ride in train_it:
+ for point in ride[-1]:
+ coordinates[c] = point
+ c += 1
+
+ cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb"))
+
+
+def draw_map(coordinates, xrg, yrg):
+
+ hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
+
+ plt.imshow(np.log(hist))
+ plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf")
+
+ scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist))
+
+
+if __name__ == "__main__":
+ # extract_coordinates(n_coordinates=83360928)
+
+ coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb"))
+ xrg = [-8.75, -8.55]
+ yrg = [41.05, 41.25]
+ draw_map(coordinates, xrg, yrg)
diff --git a/data_analysis/maps_old.py b/data_analysis/maps_old.py
new file mode 100644
index 0000000..adfe26c
--- /dev/null
+++ b/data_analysis/maps_old.py
@@ -0,0 +1,29 @@
+import matplotlib.pyplot as plt
+import numpy
+import cPickle
+import scipy
+
+print "Loading data..."
+with open("../train_normal.pkl") as f: normal = cPickle.load(f)
+
+print "Extracting x and y"
+xes = [c[0] for l in normal for c in l[-1]]
+yes = [c[1] for l in normal for c in l[-1]]
+
+xrg = [-8.75, -8.55]
+yrg = [41.05, 41.25]
+
+print "Doing 1d histogram"
+#plt.clf(); plt.hist(xes, bins=1000, range=xrg); plt.savefig("xhist.pdf")
+#plt.clf(); plt.hist(yes, bins=1000, range=yrg); plt.savefig("yhist.pdf")
+
+print "Doing 2d histogram"
+#plt.clf(); plt.hist2d(xes, yes, bins=500, range=[xrg, yrg]); plt.savefig("xymap.pdf")
+
+hist, xx, yy = numpy.histogram2d(xes, yes, bins=2000, range=[xrg, yrg])
+
+import ipdb; ipdb.set_trace()
+
+plt.clf(); plt.imshow(numpy.log(hist)); plt.savefig("xyhmap.pdf")
+
+scipy.misc.imsave("xymap.png", numpy.log(hist))