diff options
author | Étienne Simon <esimon@esimon.eu> | 2015-05-05 21:55:13 -0400 |
---|---|---|
committer | Étienne Simon <esimon@esimon.eu> | 2015-05-05 22:05:21 -0400 |
commit | 1f2ff96e6480a62089fcac35154a956c218ed678 (patch) | |
tree | d0bb7a2a6d7ba6ae512a2ce3729b1ccbdc21c822 /make_valid_cut.py | |
parent | 54613c1f9cf510ca7a71d6619418f2247515aec6 (diff) | |
download | taxi-1f2ff96e6480a62089fcac35154a956c218ed678.tar.gz taxi-1f2ff96e6480a62089fcac35154a956c218ed678.zip |
Clean data module and generalize use of hdf5.
Diffstat (limited to 'make_valid_cut.py')
-rw-r--r-- | make_valid_cut.py | 40 |
1 files changed, 0 insertions, 40 deletions
diff --git a/make_valid_cut.py b/make_valid_cut.py deleted file mode 100644 index 2698af8..0000000 --- a/make_valid_cut.py +++ /dev/null @@ -1,40 +0,0 @@ -# Cuts the training dataset at the following timestamps : - -cuts = [ - 1376503200, - 1380616200, - 1381167900, - 1383364800, - 1387722600, -] - -import random -import csv -import ast - -f = open("train.csv") -fr = csv.reader(f) -_skip_header = fr.next() -g = open("cutvalid.csv", "w") -gw = csv.writer(g) - -for l in fr: - polyline = ast.literal_eval(l[-1]) - if len(polyline) == 0: continue - time = int(l[5]) - for ts in cuts: - if time <= ts and time + 15 * (len(polyline) - 1) >= ts: - # keep it - n = (ts - time) / 15 + 1 - cut = polyline[:n] - row = l[:-1] + [ - cut.__str__(), - polyline[-1][0], - polyline[-1][1], - 15 * (len(polyline)-1) - ] - print row - gw.writerow(row) - -f.close() -g.close() |