aboutsummaryrefslogtreecommitdiff
path: root/make_valid.py
diff options
context:
space:
mode:
authorÉtienne Simon <esimon@esimon.eu>2015-05-05 21:55:13 -0400
committerÉtienne Simon <esimon@esimon.eu>2015-05-05 22:05:21 -0400
commit1f2ff96e6480a62089fcac35154a956c218ed678 (patch)
treed0bb7a2a6d7ba6ae512a2ce3729b1ccbdc21c822 /make_valid.py
parent54613c1f9cf510ca7a71d6619418f2247515aec6 (diff)
downloadtaxi-1f2ff96e6480a62089fcac35154a956c218ed678.tar.gz
taxi-1f2ff96e6480a62089fcac35154a956c218ed678.zip
Clean data module and generalize use of hdf5.
Diffstat (limited to 'make_valid.py')
-rw-r--r--make_valid.py37
1 files changed, 0 insertions, 37 deletions
diff --git a/make_valid.py b/make_valid.py
deleted file mode 100644
index d5e147d..0000000
--- a/make_valid.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Takes valid-full.csv which is a subset of the lines of train.csv, formatted in the
-# exact same way
-# Outputs valid.csv which contains the polylines cut at an arbitrary location, and three
-# new columns containing the destination point and the length in seconds of the original polyline
-# (see contest definition for the time taken by a taxi along a polyline)
-
-import random
-import csv
-import ast
-
-with open("valid-full.csv") as f:
- vlines = [l for l in csv.reader(f)]
-
-def make_valid_item(l):
- polyline = ast.literal_eval(l[-1])
- last = polyline[-1]
- cut_idx = random.randrange(len(polyline)+1)
- cut = polyline[:cut_idx]
- return l[:-1] + [
- cut.__str__(),
- last[0],
- last[1],
- 15 * (len(polyline)-1),
- ]
-
-vlines = map(make_valid_item, filter(lambda l: (len(ast.literal_eval(l[-1])) > 0), vlines))
-
-with open("valid.csv", "w") as f:
- wr = csv.writer(f)
- for r in vlines:
- wr.writerow(r)
-
-with open("valid-solution.csv", "w") as f:
- wr = csv.writer(f)
- wr.writerow(["TRIP_ID", "LATITUDE", "LONGITUDE"])
- for r in vlines:
- wr.writerow([r[0], r[-2], r[-3]])