aboutsummaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorAlex Auvolat <alex.auvolat@ens.fr>2015-07-20 17:40:20 -0400
committerAlex Auvolat <alex.auvolat@ens.fr>2015-07-20 17:40:20 -0400
commit3a694dde577103f269ff888c19c820712fbab96a (patch)
tree06b02111d4dfa21a17536ca21e0b4ddf4d72772b /data
parent1e8da55c32746e7bf898717c032144b056256d3c (diff)
downloadtaxi-3a694dde577103f269ff888c19c820712fbab96a.tar.gz
taxi-3a694dde577103f269ff888c19c820712fbab96a.zip
Large validation set
Diffstat (limited to 'data')
-rw-r--r--data/cuts/large_valid.py9
-rwxr-xr-xdata/make_valid_cut.py5
2 files changed, 14 insertions, 0 deletions
diff --git a/data/cuts/large_valid.py b/data/cuts/large_valid.py
new file mode 100644
index 0000000..d0767be
--- /dev/null
+++ b/data/cuts/large_valid.py
@@ -0,0 +1,9 @@
+import random
+
+begin = 1372636853
+end = 1404172787
+
+random.seed(1234)
+cuts = []
+for i in range(500):
+ cuts.append(random.randrange(begin, end))
diff --git a/data/make_valid_cut.py b/data/make_valid_cut.py
index f986f1d..4724fda 100755
--- a/data/make_valid_cut.py
+++ b/data/make_valid_cut.py
@@ -17,6 +17,8 @@ _fields = ['trip_id', 'call_type', 'origin_call', 'origin_stand', 'taxi_id', 'ti
def make_valid(cutfile, outpath):
cuts = importlib.import_module('.%s' % cutfile, 'data.cuts').cuts
+ print "Number of cuts:", len(cuts)
+
valid = []
for line in taxi_it('train'):
@@ -39,6 +41,9 @@ def make_valid(cutfile, outpath):
'travel_time': 15 * (len(latitude)-1)
})
valid.append(line)
+ break
+
+ print "Number of trips in validation set:", len(valid)
file = h5py.File(outpath, 'a')
clen = file['trip_id'].shape[0]