aboutsummaryrefslogtreecommitdiff
path: root/data_analysis
diff options
context:
space:
mode:
Diffstat (limited to 'data_analysis')
-rw-r--r--data_analysis/maps.py28
1 files changed, 10 insertions, 18 deletions
diff --git a/data_analysis/maps.py b/data_analysis/maps.py
index d5db182..2912c8d 100644
--- a/data_analysis/maps.py
+++ b/data_analysis/maps.py
@@ -2,22 +2,16 @@ import cPickle
import numpy as np
import matplotlib.pyplot as plt
-from fuel.schemes import ConstantScheme
-from fuel.streams import DataStream
-
import data
-from data.hdf5 import TaxiDataset, TaxiStream
+from data.hdf5 import taxi_it
def compute_number_coordinates():
- dataset = TaxiDataset('train')
- stream = DataStream(dataset, iteration_scheme=ConstantScheme(1, dataset.num_examples))
- train_it = stream.get_epoch_iterator()
# Count the number of coordinates
n_coordinates = 0
- for ride in train_it:
- n_coordinates += len(ride[2])
+ for ride in taxi_it('train'):
+ n_coordinates += len(ride['latitude'])
print n_coordinates
return n_coordinates
@@ -29,17 +23,14 @@ def extract_coordinates(n_coordinates=None):
if n_coordinates is None:
n_coordinates = compute_number_coordinates()
- dataset = TaxiDataset('train')
- stream = DataStream(dataset, iteration_scheme=ConstantScheme(1, dataset.num_examples))
-
coordinates = np.zeros((n_coordinates, 2), dtype="float32")
- train_it = stream.get_epoch_iterator()
c = 0
- for ride in train_it:
- for point in zip(ride[2], ride[3]):
+ for ride in taxi_it('train'):
+ for point in zip(ride['latitude'], ride['longitude']):
coordinates[c] = point
c += 1
+
print c
cPickle.dump(coordinates, open(data.path + "/coordinates_array.pkl", "wb"))
@@ -52,13 +43,14 @@ def draw_map(coordinates, xrg, yrg):
hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
plt.imshow(np.log(hist))
+ plt.gca().invert_yaxis()
plt.savefig(data.path + "/analysis/xyhmap2.png")
if __name__ == "__main__":
- extract_coordinates(n_coordinates=32502730)
+ extract_coordinates(n_coordinates=83409386)
coordinates = cPickle.load(open(data.path + "/coordinates_array.pkl", "rb"))
- xrg = [-8.75, -8.55]
- yrg = [41.05, 41.25]
+ xrg = [41.05, 41.25]
+ yrg = [-8.75, -8.55]
draw_map(coordinates, xrg, yrg)