diff options
author | Alex Auvolat <alex.auvolat@ens.fr> | 2015-05-04 13:15:33 -0400 |
---|---|---|
committer | Alex Auvolat <alex.auvolat@ens.fr> | 2015-05-04 13:15:33 -0400 |
commit | 71bb4d90da2bad933fdca48d1879886fe7aa9bc8 (patch) | |
tree | a1979f2ab037f273479d66fc1d4198631124c48b /make_valid_cut.py | |
parent | de76aae44b6c0cbe9ab42c7ae215c3ae9e4e4055 (diff) | |
download | taxi-71bb4d90da2bad933fdca48d1879886fe7aa9bc8.tar.gz taxi-71bb4d90da2bad933fdca48d1879886fe7aa9bc8.zip |
Add make_valid_cut
Diffstat (limited to 'make_valid_cut.py')
-rw-r--r-- | make_valid_cut.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/make_valid_cut.py b/make_valid_cut.py new file mode 100644 index 0000000..2698af8 --- /dev/null +++ b/make_valid_cut.py @@ -0,0 +1,40 @@ +# Cuts the training dataset at the following timestamps : + +cuts = [ + 1376503200, + 1380616200, + 1381167900, + 1383364800, + 1387722600, +] + +import random +import csv +import ast + +f = open("train.csv") +fr = csv.reader(f) +_skip_header = fr.next() +g = open("cutvalid.csv", "w") +gw = csv.writer(g) + +for l in fr: + polyline = ast.literal_eval(l[-1]) + if len(polyline) == 0: continue + time = int(l[5]) + for ts in cuts: + if time <= ts and time + 15 * (len(polyline) - 1) >= ts: + # keep it + n = (ts - time) / 15 + 1 + cut = polyline[:n] + row = l[:-1] + [ + cut.__str__(), + polyline[-1][0], + polyline[-1][1], + 15 * (len(polyline)-1) + ] + print row + gw.writerow(row) + +f.close() +g.close() |