aboutsummaryrefslogtreecommitdiff
path: root/data/csv_to_hdf5.py
diff options
context:
space:
mode:
authorÉtienne Simon <esimon@esimon.eu>2015-05-17 14:56:34 -0400
committerÉtienne Simon <esimon@esimon.eu>2015-05-17 14:58:46 -0400
commit1e6d08b0c9ac5983691b182631c71e9d46ee71cc (patch)
tree2e9518793de3cae1a6d9914cd2d2ef6991cb7508 /data/csv_to_hdf5.py
parent9ff3d163609707c0138c0de731eec40449bd1815 (diff)
downloadtaxi-1e6d08b0c9ac5983691b182631c71e9d46ee71cc.tar.gz
taxi-1e6d08b0c9ac5983691b182631c71e9d46ee71cc.zip
Use signed integer.
Diffstat (limited to 'data/csv_to_hdf5.py')
-rwxr-xr-xdata/csv_to_hdf5.py16
1 files changed, 8 insertions, 8 deletions
diff --git a/data/csv_to_hdf5.py b/data/csv_to_hdf5.py
index 97cf428..b011b52 100755
--- a/data/csv_to_hdf5.py
+++ b/data/csv_to_hdf5.py
@@ -51,12 +51,12 @@ def read_taxis(input_directory, h5file, dataset):
print >> sys.stderr, 'read %s: begin' % dataset
size=getattr(data, '%s_size'%dataset)
trip_id = numpy.empty(shape=(size,), dtype='S19')
- call_type = numpy.empty(shape=(size,), dtype=numpy.uint8)
- origin_call = numpy.empty(shape=(size,), dtype=numpy.uint32)
- origin_stand = numpy.empty(shape=(size,), dtype=numpy.uint8)
- taxi_id = numpy.empty(shape=(size,), dtype=numpy.uint16)
- timestamp = numpy.empty(shape=(size,), dtype=numpy.uint32)
- day_type = numpy.empty(shape=(size,), dtype=numpy.uint8)
+ call_type = numpy.empty(shape=(size,), dtype=numpy.int8)
+ origin_call = numpy.empty(shape=(size,), dtype=numpy.int32)
+ origin_stand = numpy.empty(shape=(size,), dtype=numpy.int8)
+ taxi_id = numpy.empty(shape=(size,), dtype=numpy.int16)
+ timestamp = numpy.empty(shape=(size,), dtype=numpy.int32)
+ day_type = numpy.empty(shape=(size,), dtype=numpy.int8)
missing_data = numpy.empty(shape=(size,), dtype=numpy.bool)
latitude = numpy.empty(shape=(size,), dtype=data.Polyline)
longitude = numpy.empty(shape=(size,), dtype=data.Polyline)
@@ -87,12 +87,12 @@ def read_taxis(input_directory, h5file, dataset):
return splits
def unique(h5file):
- unique_taxi_id = numpy.empty(shape=(data.taxi_id_size,), dtype=numpy.uint32)
+ unique_taxi_id = numpy.empty(shape=(data.taxi_id_size,), dtype=numpy.int32)
assert len(taxi_id_dict) == data.taxi_id_size
for k, v in taxi_id_dict.items():
unique_taxi_id[v] = k
- unique_origin_call = numpy.empty(shape=(data.origin_call_size,), dtype=numpy.uint32)
+ unique_origin_call = numpy.empty(shape=(data.origin_call_size,), dtype=numpy.int32)
assert len(origin_call_dict) == data.origin_call_size
for k, v in origin_call_dict.items():
unique_origin_call[v] = k