diff options
author | Étienne Simon <esimon@esimon.eu> | 2015-05-17 14:56:34 -0400 |
---|---|---|
committer | Étienne Simon <esimon@esimon.eu> | 2015-05-17 14:58:46 -0400 |
commit | 1e6d08b0c9ac5983691b182631c71e9d46ee71cc (patch) | |
tree | 2e9518793de3cae1a6d9914cd2d2ef6991cb7508 /data/csv_to_hdf5.py | |
parent | 9ff3d163609707c0138c0de731eec40449bd1815 (diff) | |
download | taxi-1e6d08b0c9ac5983691b182631c71e9d46ee71cc.tar.gz taxi-1e6d08b0c9ac5983691b182631c71e9d46ee71cc.zip |
Use signed integer.
Diffstat (limited to 'data/csv_to_hdf5.py')
-rwxr-xr-x | data/csv_to_hdf5.py | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/data/csv_to_hdf5.py b/data/csv_to_hdf5.py index 97cf428..b011b52 100755 --- a/data/csv_to_hdf5.py +++ b/data/csv_to_hdf5.py @@ -51,12 +51,12 @@ def read_taxis(input_directory, h5file, dataset): print >> sys.stderr, 'read %s: begin' % dataset size=getattr(data, '%s_size'%dataset) trip_id = numpy.empty(shape=(size,), dtype='S19') - call_type = numpy.empty(shape=(size,), dtype=numpy.uint8) - origin_call = numpy.empty(shape=(size,), dtype=numpy.uint32) - origin_stand = numpy.empty(shape=(size,), dtype=numpy.uint8) - taxi_id = numpy.empty(shape=(size,), dtype=numpy.uint16) - timestamp = numpy.empty(shape=(size,), dtype=numpy.uint32) - day_type = numpy.empty(shape=(size,), dtype=numpy.uint8) + call_type = numpy.empty(shape=(size,), dtype=numpy.int8) + origin_call = numpy.empty(shape=(size,), dtype=numpy.int32) + origin_stand = numpy.empty(shape=(size,), dtype=numpy.int8) + taxi_id = numpy.empty(shape=(size,), dtype=numpy.int16) + timestamp = numpy.empty(shape=(size,), dtype=numpy.int32) + day_type = numpy.empty(shape=(size,), dtype=numpy.int8) missing_data = numpy.empty(shape=(size,), dtype=numpy.bool) latitude = numpy.empty(shape=(size,), dtype=data.Polyline) longitude = numpy.empty(shape=(size,), dtype=data.Polyline) @@ -87,12 +87,12 @@ def read_taxis(input_directory, h5file, dataset): return splits def unique(h5file): - unique_taxi_id = numpy.empty(shape=(data.taxi_id_size,), dtype=numpy.uint32) + unique_taxi_id = numpy.empty(shape=(data.taxi_id_size,), dtype=numpy.int32) assert len(taxi_id_dict) == data.taxi_id_size for k, v in taxi_id_dict.items(): unique_taxi_id[v] = k - unique_origin_call = numpy.empty(shape=(data.origin_call_size,), dtype=numpy.uint32) + unique_origin_call = numpy.empty(shape=(data.origin_call_size,), dtype=numpy.int32) assert len(origin_call_dict) == data.origin_call_size for k, v in origin_call_dict.items(): unique_origin_call[v] = k |