aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdeB <adbrebs@gmail.com>2015-04-25 10:09:01 -0400
committerAdeB <adbrebs@gmail.com>2015-04-25 10:09:01 -0400
commit6a0b47a2fc7c4e800f14212ae81dbd56de17fa94 (patch)
tree3840e421fb77ad2218721d46ff662efa46e107f2
parent676af1086b141a7803626b040e7da03526b95406 (diff)
downloadtaxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.tar.gz
taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.zip
Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval.
-rw-r--r--.gitignore2
-rw-r--r--data_analysis/maps.py55
-rw-r--r--data_analysis/maps_old.py (renamed from alex/plots.py)2
3 files changed, 58 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 3b04a69..179523a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+.idea/*
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/data_analysis/maps.py b/data_analysis/maps.py
new file mode 100644
index 0000000..0b37f37
--- /dev/null
+++ b/data_analysis/maps.py
@@ -0,0 +1,55 @@
+import cPickle
+import scipy
+import numpy as np
+import matplotlib.pyplot as plt
+
+import data
+
+
+def compute_number_coordinates():
+ train_it = data.train_it()
+
+ # Count the number of coordinates
+ n_coordinates = 0
+ for ride in train_it:
+ n_coordinates += len(ride[-1])
+ print n_coordinates
+
+ return n_coordinates
+
+
+def extract_coordinates(n_coordinates=None):
+ """Extract coordinates from the dataset and store them in a numpy array"""
+
+ if n_coordinates is None:
+ n_coordinates = compute_number_coordinates()
+
+ coordinates = np.zeros((n_coordinates, 2), dtype="float32")
+ train_it = data.train_it()
+
+ c = 0
+ for ride in train_it:
+ for point in ride[-1]:
+ coordinates[c] = point
+ c += 1
+
+ cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb"))
+
+
+def draw_map(coordinates, xrg, yrg):
+
+ hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
+
+ plt.imshow(np.log(hist))
+ plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf")
+
+ scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist))
+
+
+if __name__ == "__main__":
+ # extract_coordinates(n_coordinates=83360928)
+
+ coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb"))
+ xrg = [-8.75, -8.55]
+ yrg = [41.05, 41.25]
+ draw_map(coordinates, xrg, yrg)
diff --git a/alex/plots.py b/data_analysis/maps_old.py
index e405480..adfe26c 100644
--- a/alex/plots.py
+++ b/data_analysis/maps_old.py
@@ -4,7 +4,7 @@ import cPickle
import scipy
print "Loading data..."
-with open("train_normal.pkl") as f: normal = cPickle.load(f)
+with open("../train_normal.pkl") as f: normal = cPickle.load(f)
print "Extracting x and y"
xes = [c[0] for l in normal for c in l[-1]]