diff options
Diffstat (limited to 'data')
-rw-r--r-- | data/cuts/large_valid.py | 9 | ||||
-rwxr-xr-x | data/make_valid_cut.py | 5 |
2 files changed, 14 insertions, 0 deletions
diff --git a/data/cuts/large_valid.py b/data/cuts/large_valid.py new file mode 100644 index 0000000..d0767be --- /dev/null +++ b/data/cuts/large_valid.py @@ -0,0 +1,9 @@ +import random + +begin = 1372636853 +end = 1404172787 + +random.seed(1234) +cuts = [] +for i in range(500): + cuts.append(random.randrange(begin, end)) diff --git a/data/make_valid_cut.py b/data/make_valid_cut.py index f986f1d..4724fda 100755 --- a/data/make_valid_cut.py +++ b/data/make_valid_cut.py @@ -17,6 +17,8 @@ _fields = ['trip_id', 'call_type', 'origin_call', 'origin_stand', 'taxi_id', 'ti def make_valid(cutfile, outpath): cuts = importlib.import_module('.%s' % cutfile, 'data.cuts').cuts + print "Number of cuts:", len(cuts) + valid = [] for line in taxi_it('train'): @@ -39,6 +41,9 @@ def make_valid(cutfile, outpath): 'travel_time': 15 * (len(latitude)-1) }) valid.append(line) + break + + print "Number of trips in validation set:", len(valid) file = h5py.File(outpath, 'a') clen = file['trip_id'].shape[0] |