aboutsummaryrefslogtreecommitdiff
path: root/data/init_valid.py
diff options
context:
space:
mode:
Diffstat (limited to 'data/init_valid.py')
-rwxr-xr-xdata/init_valid.py61
1 files changed, 61 insertions, 0 deletions
diff --git a/data/init_valid.py b/data/init_valid.py
new file mode 100755
index 0000000..14a854c
--- /dev/null
+++ b/data/init_valid.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# Initialize the valid hdf5
+
+import os
+import sys
+
+import h5py
+import numpy
+import theano
+
+import data
+
+
+_fields = {
+ 'trip_id': 'S19',
+ 'call_type': numpy.uint8,
+ 'origin_call': numpy.uint32,
+ 'origin_stand': numpy.uint8,
+ 'taxi_id': numpy.uint16,
+ 'timestamp': numpy.uint32,
+ 'day_type': numpy.uint8,
+ 'missing_data': numpy.bool,
+ 'latitude': data.Polyline,
+ 'longitude': data.Polyline,
+ 'destination_latitude': theano.config.floatX,
+ 'destination_longitude': theano.config.floatX,
+ 'travel_time': numpy.uint32,
+}
+
+
+def init_valid(path):
+ h5file = h5py.File(path, 'w')
+
+ for k, v in _fields.items():
+ h5file.create_dataset(k, (0,), dtype=v, maxshape=(None,))
+
+ split_array = numpy.empty(len(_fields), dtype=numpy.dtype([
+ ('split', 'a', 64),
+ ('source', 'a', 21),
+ ('start', numpy.int64, 1),
+ ('stop', numpy.int64, 1),
+ ('available', numpy.bool, 1),
+ ('comment', 'a', 1)]))
+
+ split_array[:]['split'] = 'dummy'.encode('utf8')
+ for (i, k) in enumerate(_fields.keys()):
+ split_array[i] = k.encode('utf8')
+ split_array[:]['start'] = 0
+ split_array[:]['stop'] = 0
+ split_array[:]['available'] = False
+ split_array[:]['comment'] = '.'.encode('utf8')
+ h5file.attrs['split'] = split_array
+
+ h5file.flush()
+ h5file.close()
+
+if __name__ == '__main__':
+ if len(sys.argv) > 2:
+ print >> sys.stderr, 'Usage: %s [file]' % sys.argv[0]
+ sys.exit(1)
+ init_valid(sys.argv[1] if len(sys.argv) == 2 else os.path.join(data.path, 'valid.hdf5'))