diff options
author | Étienne Simon <esimon@esimon.eu> | 2015-04-29 15:40:05 -0400 |
---|---|---|
committer | Étienne Simon <esimon@esimon.eu> | 2015-04-29 15:40:51 -0400 |
commit | f768d3e770216d4227ffd989cf98f1628fc476a3 (patch) | |
tree | b5620a34eaebac5290b74882018bab16f4658e27 /transformers.py | |
parent | 61e0d47b6c6a570feebb43d474138020b13495aa (diff) | |
download | taxi-f768d3e770216d4227ffd989cf98f1628fc476a3.tar.gz taxi-f768d3e770216d4227ffd989cf98f1628fc476a3.zip |
Adapt model to hdf5 dataset. WIP
Diffstat (limited to 'transformers.py')
-rw-r--r-- | transformers.py | 69 |
1 files changed, 35 insertions, 34 deletions
diff --git a/transformers.py b/transformers.py index c60d362..13852ac 100644 --- a/transformers.py +++ b/transformers.py @@ -3,15 +3,15 @@ import numpy import theano import random -def at_least_k(k, pl, pad_at_begin): - if len(pl) == 0: - pl = [[ -8.61612, 41.1573]] - if len(pl) < k: +def at_least_k(k, v, pad_at_begin, is_longitude): + if len(v) == 0: + v = numpy.array([41.1573 if is_longitude else -8.61612], dtype=theano.config.floatX) + if len(v) < k: if pad_at_begin: - pl = [pl[0]] * (k - len(pl)) + pl + v = numpy.concatenate((numpy.full((k - len(v),), v[0]), v)) else: - pl = pl + [pl[-1]] * (k - len(pl)) - return pl + v = numpy.concatenate((v, numpy.full((k - len(v),), v[-1]))) + return v class Select(Transformer): @@ -27,38 +27,39 @@ class Select(Transformer): return [data[id] for id in self.ids] def add_first_k(k, stream): - id_polyline=stream.sources.index('polyline') - def first_k(x): - pl = at_least_k(k, x[id_polyline], False) - return (numpy.array(pl[:k], dtype=theano.config.floatX).flatten(),) - stream = Mapping(stream, first_k, ('first_k',)) - return stream + id_latitude = stream.sources.index('latitude') + id_longitude = stream.sources.index('longitude') + return Mapping(stream, + lambda data: + (numpy.array(at_least_k(k, data[id_latitude], False, False)[:k], dtype=theano.config.floatX), + numpy.array(at_least_k(k, data[id_longitude], False, True)[:k], dtype=theano.config.floatX)), + ('first_k_latitude', 'first_k_longitude')) def add_random_k(k, stream): - id_polyline=stream.sources.index('polyline') + id_latitude = stream.sources.index('latitude') + id_longitude = stream.sources.index('longitude') def random_k(x): - pl = at_least_k(k, x[id_polyline], True) - loc = random.randrange(len(pl)-k+1) - return (numpy.array(pl[loc:loc+k], dtype=theano.config.floatX).flatten(),) - stream = Mapping(stream, random_k, ('last_k',)) - return stream + lat = at_least_k(k, x[id_latitude], True, False) + lon = at_least_k(k, x[id_latitude], True, True) + loc = random.randrange(len(lat)-k+1) + return (numpy.array(lat[loc:loc+k], dtype=theano.config.floatX), + numpy.array(lon[loc:loc+k], dtype=theano.config.floatX)), + return Mapping(stream, random_k, ('last_k_latitude', 'last_k_longitude')) def add_last_k(k, stream): - id_polyline=stream.sources.index('polyline') - def last_k(x): - pl = at_least_k(k, x[id_polyline], True) - return (numpy.array(pl[-k:], dtype=theano.config.floatX).flatten(),) - stream = Mapping(stream, last_k, ('last_k',)) - return stream + id_latitude = stream.sources.index('latitude') + id_longitude = stream.sources.index('longitude') + return Mapping(stream, + lambda data: + (numpy.array(at_least_k(k, data[id_latitude], True, False)[-k:], dtype=theano.config.floatX), + numpy.array(at_least_k(k, data[id_longitude], True, True)[-k:], dtype=theano.config.floatX)), + ('last_k_latitude', 'last_k_longitude')) def add_destination(stream): - id_polyline=stream.sources.index('polyline') + id_latitude = stream.sources.index('latitude') + id_longitude = stream.sources.index('longitude') return Mapping(stream, - lambda x: - (numpy.array(at_least_k(1, x[id_polyline], True)[-1], dtype=theano.config.floatX),), - ('destination',)) - -def concat_destination_xy(stream): - id_dx=stream.sources.index('destination_x') - id_dy=stream.sources.index('destination_y') - return Mapping(stream, lambda x: (numpy.array([x[id_dx], x[id_dy]], dtype=theano.config.floatX),), ('destination',)) + lambda data: + (numpy.array(at_least_k(1, data[id_latitude], True, False)[-1], dtype=theano.config.floatX), + numpy.array(at_least_k(1, data[id_longitude], True, True)[-1], dtype=theano.config.floatX)), + ('destination_latitude', 'destination_longitude')) |