diff options
author | Étienne Simon <esimon@esimon.eu> | 2015-05-05 21:55:13 -0400 |
---|---|---|
committer | Étienne Simon <esimon@esimon.eu> | 2015-05-05 22:05:21 -0400 |
commit | 1f2ff96e6480a62089fcac35154a956c218ed678 (patch) | |
tree | d0bb7a2a6d7ba6ae512a2ce3729b1ccbdc21c822 /make_valid.py | |
parent | 54613c1f9cf510ca7a71d6619418f2247515aec6 (diff) | |
download | taxi-1f2ff96e6480a62089fcac35154a956c218ed678.tar.gz taxi-1f2ff96e6480a62089fcac35154a956c218ed678.zip |
Clean data module and generalize use of hdf5.
Diffstat (limited to 'make_valid.py')
-rw-r--r-- | make_valid.py | 37 |
1 files changed, 0 insertions, 37 deletions
diff --git a/make_valid.py b/make_valid.py deleted file mode 100644 index d5e147d..0000000 --- a/make_valid.py +++ /dev/null @@ -1,37 +0,0 @@ -# Takes valid-full.csv which is a subset of the lines of train.csv, formatted in the -# exact same way -# Outputs valid.csv which contains the polylines cut at an arbitrary location, and three -# new columns containing the destination point and the length in seconds of the original polyline -# (see contest definition for the time taken by a taxi along a polyline) - -import random -import csv -import ast - -with open("valid-full.csv") as f: - vlines = [l for l in csv.reader(f)] - -def make_valid_item(l): - polyline = ast.literal_eval(l[-1]) - last = polyline[-1] - cut_idx = random.randrange(len(polyline)+1) - cut = polyline[:cut_idx] - return l[:-1] + [ - cut.__str__(), - last[0], - last[1], - 15 * (len(polyline)-1), - ] - -vlines = map(make_valid_item, filter(lambda l: (len(ast.literal_eval(l[-1])) > 0), vlines)) - -with open("valid.csv", "w") as f: - wr = csv.writer(f) - for r in vlines: - wr.writerow(r) - -with open("valid-solution.csv", "w") as f: - wr = csv.writer(f) - wr.writerow(["TRIP_ID", "LATITUDE", "LONGITUDE"]) - for r in vlines: - wr.writerow([r[0], r[-2], r[-3]]) |