From 3a694dde577103f269ff888c19c820712fbab96a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 20 Jul 2015 17:40:20 -0400 Subject: Large validation set --- data/cuts/large_valid.py | 9 +++++++++ data/make_valid_cut.py | 5 +++++ 2 files changed, 14 insertions(+) create mode 100644 data/cuts/large_valid.py (limited to 'data') diff --git a/data/cuts/large_valid.py b/data/cuts/large_valid.py new file mode 100644 index 0000000..d0767be --- /dev/null +++ b/data/cuts/large_valid.py @@ -0,0 +1,9 @@ +import random + +begin = 1372636853 +end = 1404172787 + +random.seed(1234) +cuts = [] +for i in range(500): + cuts.append(random.randrange(begin, end)) diff --git a/data/make_valid_cut.py b/data/make_valid_cut.py index f986f1d..4724fda 100755 --- a/data/make_valid_cut.py +++ b/data/make_valid_cut.py @@ -17,6 +17,8 @@ _fields = ['trip_id', 'call_type', 'origin_call', 'origin_stand', 'taxi_id', 'ti def make_valid(cutfile, outpath): cuts = importlib.import_module('.%s' % cutfile, 'data.cuts').cuts + print "Number of cuts:", len(cuts) + valid = [] for line in taxi_it('train'): @@ -39,6 +41,9 @@ def make_valid(cutfile, outpath): 'travel_time': 15 * (len(latitude)-1) }) valid.append(line) + break + + print "Number of trips in validation set:", len(valid) file = h5py.File(outpath, 'a') clen = file['trip_id'].shape[0] -- cgit v1.2.3