from fuel.transformers import Transformer, Filter, Mapping
import numpy
import theano
import random
import data
def at_least_k(k, v, pad_at_begin, is_longitude):
if len(v) == 0:
v = numpy.array([data.porto_center[1 if is_longitude else 0]], dtype=theano.config.floatX)
if len(v) < k:
if pad_at_begin:
v = numpy.concatenate((numpy.full((k - len(v),), v[0]), v))
else:
v = numpy.concatenate((v, numpy.full((k - len(v),), v[-1])))
return v
class Select(Transformer):
def __init__(self, data_stream, sources):
super(Select, self).__init__(data_stream)
self.ids = [data_stream.sources.index(source) for source in sources]
self.sources=sources
def get_data(self, request=None):
if request is not None:
raise ValueError
data=next(self.child_epoch_iterator)
return [data[id] for id in self.ids]
class TaxiGenerateSplits(Transformer):
def __init__(self, data_stream, max_splits=-1):
super(TaxiGenerateSplits, self).__init__(data_stream)
self.sources = data_stream.sources + ('destination_latitude', 'destination_longitude')
self.max_splits = max_splits
self.data = None
self.splits = []
self.isplit = 0
self.id_latitude = data_stream.sources.index('latitude')
self.id_longitude = data_stream.sources.index('longitude')
def get_data(self, request=None):
if request is not None:
raise ValueError
while self.isplit >= len(self.splits):
self.data = next(self.child_epoch_iterator)
self.splits = range(len(self.data[self.id_longitude]))
random.shuffle(self.splits)
if self.max_splits != -1 and len(self.splits) > self.max_splits:
self.splits = self.splits[:self.max_splits]
self.isplit = 0
i = self.isplit
self.isplit += 1
n = self.splits[i]+1
r = list(self.data)
r[self.id_latitude] = numpy.array(r[self.id_latitude][:n], dtype=theano.config.floatX)
r[self.id_longitude] = numpy.array(r[self.id_longitude][:n], dtype=theano.config.floatX)
dlat = numpy.float32(self.data[self.id_latitude][-1])
dlon = numpy.float32(self.data[self.id_longitude][-1])
return tuple(r + [dlat, dlon])
class first_k(object):
def __init__(self, k, id_latitude, id_longitude):
self.k = k
self.id_latitude = id_latitude
self.id_longitude = id_longitude
def __call__(self, data):
return (numpy.array(at_least_k(self.k, data[self.id_latitude], False, False)[:self.k],
dtype=theano.config.floatX),
numpy.array(at_least_k(self.k, data[self.id_longitude], False, True)[:self.k],
dtype=theano.config.floatX))
def add_first_k(k, stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
return Mapping(stream, first_k(k, id_latitude, id_longitude), ('first_k_latitude', 'first_k_longitude'))
class random_k(object):
def __init__(self, k, id_latitude, id_longitude):
self.k = k
self.id_latitude = id_latitude
self.id_longitude = id_longitude
def __call__(self, x):
lat = at_least_k(self.k, x[self.id_latitude], True, False)
lon = at_least_k(self.k, x[self.id_longitude], True, True)
loc = random.randrange(len(lat)-self.k+1)
return (numpy.array(lat[loc:loc+self.k], dtype=theano.config.floatX),
numpy.array(lon[loc:loc+self.k], dtype=theano.config.floatX))
def add_random_k(k, stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
return Mapping(stream, random_k(k, id_latitude, id_longitude), ('last_k_latitude', 'last_k_longitude'))
class last_k(object):
def __init__(self, k, id_latitude, id_longitude):
self.k = k
self.id_latitude = id_latitude
self.id_longitude = id_longitude
def __call__(self, data):
return (numpy.array(at_least_k(self.k, data[self.id_latitude], True, False)[-self.k:],
dtype=theano.config.floatX),
numpy.array(at_least_k(self.k, data[self.id_longitude], True, True)[-self.k:],
dtype=theano.config.floatX))
def add_last_k(k, stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
return Mapping(stream, last_k(k, id_latitude, id_longitude), ('last_k_latitude', 'last_k_longitude'))
class destination(object):
def __init__(self, id_latitude, id_longitude):
self.id_latitude = id_latitude
self.id_longitude = id_longitude
def __call__(self, data):
return (numpy.array(at_least_k(1, data[self.id_latitude], True, False)[-1],
dtype=theano.config.floatX),
numpy.array(at_least_k(1, data[self.id_longitude], True, True)[-1],
dtype=theano.config.floatX))
def add_destination(stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
return Mapping(stream, destination(id_latitude, id_longitude), ('destination_latitude', 'destination_longitude'))
class trip_filter(object):
def __init__(self, id_trip_id, exclude):
self.id_trip_id = id_trip_id
self.exclude = exclude
def __call__(self, data):
if data[self.id_trip_id] in self.exclude:
return False
else:
return True
def filter_out_trips(exclude_trips, stream):
id_trip_id = stream.sources.index('trip_id')
return Filter(stream, trip_filter(id_trip_id, exclude_trips))