diff options
author | AdeB <adbrebs@gmail.com> | 2015-04-25 10:09:01 -0400 |
---|---|---|
committer | AdeB <adbrebs@gmail.com> | 2015-04-25 10:09:01 -0400 |
commit | 6a0b47a2fc7c4e800f14212ae81dbd56de17fa94 (patch) | |
tree | 3840e421fb77ad2218721d46ff662efa46e107f2 /data_analysis | |
parent | 676af1086b141a7803626b040e7da03526b95406 (diff) | |
download | taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.tar.gz taxi-6a0b47a2fc7c4e800f14212ae81dbd56de17fa94.zip |
Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval.
Diffstat (limited to 'data_analysis')
-rw-r--r-- | data_analysis/maps.py | 55 | ||||
-rw-r--r-- | data_analysis/maps_old.py | 29 |
2 files changed, 84 insertions, 0 deletions
diff --git a/data_analysis/maps.py b/data_analysis/maps.py new file mode 100644 index 0000000..0b37f37 --- /dev/null +++ b/data_analysis/maps.py @@ -0,0 +1,55 @@ +import cPickle +import scipy +import numpy as np +import matplotlib.pyplot as plt + +import data + + +def compute_number_coordinates(): + train_it = data.train_it() + + # Count the number of coordinates + n_coordinates = 0 + for ride in train_it: + n_coordinates += len(ride[-1]) + print n_coordinates + + return n_coordinates + + +def extract_coordinates(n_coordinates=None): + """Extract coordinates from the dataset and store them in a numpy array""" + + if n_coordinates is None: + n_coordinates = compute_number_coordinates() + + coordinates = np.zeros((n_coordinates, 2), dtype="float32") + train_it = data.train_it() + + c = 0 + for ride in train_it: + for point in ride[-1]: + coordinates[c] = point + c += 1 + + cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb")) + + +def draw_map(coordinates, xrg, yrg): + + hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg]) + + plt.imshow(np.log(hist)) + plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf") + + scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist)) + + +if __name__ == "__main__": + # extract_coordinates(n_coordinates=83360928) + + coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb")) + xrg = [-8.75, -8.55] + yrg = [41.05, 41.25] + draw_map(coordinates, xrg, yrg) diff --git a/data_analysis/maps_old.py b/data_analysis/maps_old.py new file mode 100644 index 0000000..adfe26c --- /dev/null +++ b/data_analysis/maps_old.py @@ -0,0 +1,29 @@ +import matplotlib.pyplot as plt +import numpy +import cPickle +import scipy + +print "Loading data..." +with open("../train_normal.pkl") as f: normal = cPickle.load(f) + +print "Extracting x and y" +xes = [c[0] for l in normal for c in l[-1]] +yes = [c[1] for l in normal for c in l[-1]] + +xrg = [-8.75, -8.55] +yrg = [41.05, 41.25] + +print "Doing 1d histogram" +#plt.clf(); plt.hist(xes, bins=1000, range=xrg); plt.savefig("xhist.pdf") +#plt.clf(); plt.hist(yes, bins=1000, range=yrg); plt.savefig("yhist.pdf") + +print "Doing 2d histogram" +#plt.clf(); plt.hist2d(xes, yes, bins=500, range=[xrg, yrg]); plt.savefig("xymap.pdf") + +hist, xx, yy = numpy.histogram2d(xes, yes, bins=2000, range=[xrg, yrg]) + +import ipdb; ipdb.set_trace() + +plt.clf(); plt.imshow(numpy.log(hist)); plt.savefig("xyhmap.pdf") + +scipy.misc.imsave("xymap.png", numpy.log(hist)) |