Merge branch 'master' of github.com:adbrebs/taxi

author: Alex Auvolat <alex.auvolat@ens.fr> 2015-07-20 17:40:28 -0400
committer: Alex Auvolat <alex.auvolat@ens.fr> 2015-07-20 17:40:28 -0400
commit: 58dcf7b17e9db6af53808994a7d39a759fcc5028 (patch)
tree: 04da88df5cc94c62fc450101180ae6d7c2dbe9e2
parent: 3a694dde577103f269ff888c19c820712fbab96a (diff)
parent: b6566c010be7c871a5b6c199feaf1dfda0910ade (diff)
download: taxi-58dcf7b17e9db6af53808994a7d39a759fcc5028.tar.gz
taxi-58dcf7b17e9db6af53808994a7d39a759fcc5028.zip
3 files changed, 16 insertions, 15 deletions
diff --git a/README.md b/README.md
index 05cd7ed..ef46106 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,6 @@ Note that some script expect the repository to be in your PYTHONPATH (go to the
 6. Create a folder `model_data` and a folder `output` (next to the training script), which will receive respectively a regular save of the model parameters and many submission files generated from the model at a regular interval.
 7. Run `./train.py dest_mlp_tgtcls_1_cswdtx_alexandre` to train the model. Output solutions are generated in `output/` every 1000 iterations. Interrupt the model with three consecutive Ctrl+C at any times. The training script is set to stop training after 10 000 000 iterations, but a result file produced after less than 2 000 000 iterations is already the winning solution. We trained our model on a GeForce GTX 680 card and it took about an afternoon to generate the winning solution.
    When running the training script, set the following Theano flags environment variable to exploit GPU parallelism:
-   `THEANO_FLAGS=floatX=float32,device=gpu,optimizer=FAST_RUN`
+   `THEANO_FLAGS=floatX=float32,device=gpu,optimizer=fast_run`
 
 *More information in this pdf: https://github.com/adbrebs/taxi/blob/master/doc/short_report.pdf*
diff --git a/data_analysis/maps.py b/data_analysis/maps.py
index 991f279..2912c8d 100644
--- a/data_analysis/maps.py
+++ b/data_analysis/maps.py
@@ -1,18 +1,17 @@
 import cPickle
-import scipy
 import numpy as np
 import matplotlib.pyplot as plt
 
 import data
+from data.hdf5 import taxi_it
 
 
 def compute_number_coordinates():
-    train_it = data.train_it()
 
     # Count the number of coordinates
     n_coordinates = 0
-    for ride in train_it:
-        n_coordinates += len(ride[-1])
+    for ride in taxi_it('train'):
+        n_coordinates += len(ride['latitude'])
     print n_coordinates
 
     return n_coordinates
@@ -25,15 +24,16 @@ def extract_coordinates(n_coordinates=None):
         n_coordinates = compute_number_coordinates()
 
     coordinates = np.zeros((n_coordinates, 2), dtype="float32")
-    train_it = data.train_it()
 
     c = 0
-    for ride in train_it:
-        for point in ride[-1]:
+    for ride in taxi_it('train'):
+        for point in zip(ride['latitude'], ride['longitude']):
             coordinates[c] = point
             c += 1
 
-    cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb"))
+    print c
+
+    cPickle.dump(coordinates, open(data.path + "/coordinates_array.pkl", "wb"))
 
 
 def draw_map(coordinates, xrg, yrg):
@@ -43,13 +43,14 @@ def draw_map(coordinates, xrg, yrg):
     hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
 
     plt.imshow(np.log(hist))
-    plt.savefig(data.DATA_PATH + "/analysis/xyhmap2.png")
+    plt.gca().invert_yaxis()
+    plt.savefig(data.path + "/analysis/xyhmap2.png")
 
 
 if __name__ == "__main__":
-    # extract_coordinates(n_coordinates=83360928)
+    extract_coordinates(n_coordinates=83409386)
 
-    coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb"))
-    xrg = [-8.75, -8.55]
-    yrg = [41.05, 41.25]
+    coordinates = cPickle.load(open(data.path + "/coordinates_array.pkl", "rb"))
+    xrg = [41.05, 41.25]
+    yrg = [-8.75, -8.55]
     draw_map(coordinates, xrg, yrg)
diff --git a/prepare.sh b/prepare.sh
index 7559167..ccaca1b 100755
--- a/prepare.sh
+++ b/prepare.sh
@@ -121,4 +121,4 @@ echo -n "${YELLOW}mkdir output... $RESET"; mkdir output; echo "${GREEN}ok"
 
 echo -e "\n$GREEN${BOLD}The data was successfully prepared"
 echo "${YELLOW}To train the winning model on gpu, you can now run the following command:"
-echo "${YELLOW}THEANO_FLAGS=floatX=float32,device=gpu,optimizer=FAST_RUN python2 train.py dest_mlp_tgtcls_1_cswdtx_alexandre"
+echo "${YELLOW}THEANO_FLAGS=floatX=float32,device=gpu,optimizer=fast_run python2 train.py dest_mlp_tgtcls_1_cswdtx_alexandre"
author	Alex Auvolat <alex.auvolat@ens.fr>	2015-07-20 17:40:28 -0400
committer	Alex Auvolat <alex.auvolat@ens.fr>	2015-07-20 17:40:28 -0400
commit	58dcf7b17e9db6af53808994a7d39a759fcc5028 (patch)
tree	04da88df5cc94c62fc450101180ae6d7c2dbe9e2
parent	3a694dde577103f269ff888c19c820712fbab96a (diff)
parent	b6566c010be7c871a5b6c199feaf1dfda0910ade (diff)
download	taxi-58dcf7b17e9db6af53808994a7d39a759fcc5028.tar.gz taxi-58dcf7b17e9db6af53808994a7d39a759fcc5028.zip