diff options
author | AdeB <adbrebs@gmail.com> | 2015-04-25 10:09:01 -0400 |
---|---|---|
committer | AdeB <adbrebs@gmail.com> | 2015-04-25 10:09:01 -0400 |
commit | 6a0b47a2fc7c4e800f14212ae81dbd56de17fa94 (patch) | |
tree | 3840e421fb77ad2218721d46ff662efa46e107f2 | |
parent | 676af1086b141a7803626b040e7da03526b95406 (diff) | |
download | taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.tar.gz taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.zip |
Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval.
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | data_analysis/maps.py | 55 | ||||
-rw-r--r-- | data_analysis/maps_old.py (renamed from alex/plots.py) | 2 |
3 files changed, 58 insertions, 1 deletions
@@ -1,3 +1,5 @@ +.idea/* + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/data_analysis/maps.py b/data_analysis/maps.py new file mode 100644 index 0000000..0b37f37 --- /dev/null +++ b/data_analysis/maps.py @@ -0,0 +1,55 @@ +import cPickle +import scipy +import numpy as np +import matplotlib.pyplot as plt + +import data + + +def compute_number_coordinates(): + train_it = data.train_it() + + # Count the number of coordinates + n_coordinates = 0 + for ride in train_it: + n_coordinates += len(ride[-1]) + print n_coordinates + + return n_coordinates + + +def extract_coordinates(n_coordinates=None): + """Extract coordinates from the dataset and store them in a numpy array""" + + if n_coordinates is None: + n_coordinates = compute_number_coordinates() + + coordinates = np.zeros((n_coordinates, 2), dtype="float32") + train_it = data.train_it() + + c = 0 + for ride in train_it: + for point in ride[-1]: + coordinates[c] = point + c += 1 + + cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb")) + + +def draw_map(coordinates, xrg, yrg): + + hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg]) + + plt.imshow(np.log(hist)) + plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf") + + scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist)) + + +if __name__ == "__main__": + # extract_coordinates(n_coordinates=83360928) + + coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb")) + xrg = [-8.75, -8.55] + yrg = [41.05, 41.25] + draw_map(coordinates, xrg, yrg) diff --git a/alex/plots.py b/data_analysis/maps_old.py index e405480..adfe26c 100644 --- a/alex/plots.py +++ b/data_analysis/maps_old.py @@ -4,7 +4,7 @@ import cPickle import scipy print "Loading data..." -with open("train_normal.pkl") as f: normal = cPickle.load(f) +with open("../train_normal.pkl") as f: normal = cPickle.load(f) print "Extracting x and y" xes = [c[0] for l in normal for c in l[-1]] |