aboutsummaryrefslogtreecommitdiff
path: root/transformers.py
blob: 876cee20d7eeb40c65cc96f43702752a086fa494 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from fuel.transformers import Transformer, Filter, Mapping
import numpy
import theano
import random
import data

def at_least_k(k, v, pad_at_begin, is_longitude):
    if len(v) == 0:
        v = numpy.array([data.porto_center[1 if is_longitude else 0]], dtype=theano.config.floatX)
    if len(v) < k:
        if pad_at_begin:
            v = numpy.concatenate((numpy.full((k - len(v),), v[0]), v))
        else:
            v = numpy.concatenate((v, numpy.full((k - len(v),), v[-1])))
    return v


class Select(Transformer):
    def __init__(self, data_stream, sources):
        super(Select, self).__init__(data_stream)
        self.ids = [data_stream.sources.index(source) for source in sources]
        self.sources=sources

    def get_data(self, request=None):
        if request is not None:
            raise ValueError
        data=next(self.child_epoch_iterator)
        return [data[id] for id in self.ids]
        
class TaxiGenerateSplits(Transformer):
    def __init__(self, data_stream, max_splits=-1):
        super(TaxiGenerateSplits, self).__init__(data_stream)
        self.sources = data_stream.sources + ('destination_latitude', 'destination_longitude')
        self.max_splits = max_splits
        self.data = None
        self.splits = []
        self.isplit = 0
        self.id_latitude = data_stream.sources.index('latitude')
        self.id_longitude = data_stream.sources.index('longitude')

    def get_data(self, request=None):
        if request is not None:
            raise ValueError
        while self.isplit >= len(self.splits):
            self.data = next(self.child_epoch_iterator)
            self.splits = range(len(self.data[self.id_longitude]))
            random.shuffle(self.splits)
            if self.max_splits != -1 and len(self.splits) > self.max_splits:
                self.splits = self.splits[:self.max_splits]
            self.isplit = 0
        
        i = self.isplit
        self.isplit += 1
        n = self.splits[i]+1

        r = list(self.data)

        r[self.id_latitude] = numpy.array(r[self.id_latitude][:n], dtype=theano.config.floatX)
        r[self.id_longitude] = numpy.array(r[self.id_longitude][:n], dtype=theano.config.floatX)

        dlat = numpy.float32(self.data[self.id_latitude][-1])
        dlon = numpy.float32(self.data[self.id_longitude][-1])

        return tuple(r + [dlat, dlon])


class first_k(object):
    def __init__(self, k, id_latitude, id_longitude):
        self.k = k
        self.id_latitude = id_latitude
        self.id_longitude = id_longitude
    def __call__(self, data): 
        return (numpy.array(at_least_k(self.k, data[self.id_latitude], False, False)[:self.k],
                            dtype=theano.config.floatX),
                numpy.array(at_least_k(self.k, data[self.id_longitude], False, True)[:self.k],
                            dtype=theano.config.floatX))
def add_first_k(k, stream):
    id_latitude = stream.sources.index('latitude')
    id_longitude = stream.sources.index('longitude')
    return Mapping(stream, first_k(k, id_latitude, id_longitude), ('first_k_latitude', 'first_k_longitude'))

class random_k(object):
    def __init__(self, k, id_latitude, id_longitude):
        self.k = k
        self.id_latitude = id_latitude
        self.id_longitude = id_longitude
    def __call__(self, x):
        lat = at_least_k(self.k, x[self.id_latitude], True, False)
        lon = at_least_k(self.k, x[self.id_longitude], True, True)
        loc = random.randrange(len(lat)-self.k+1)
        return (numpy.array(lat[loc:loc+self.k], dtype=theano.config.floatX),
                numpy.array(lon[loc:loc+self.k], dtype=theano.config.floatX))
def add_random_k(k, stream):
    id_latitude = stream.sources.index('latitude')
    id_longitude = stream.sources.index('longitude')
    return Mapping(stream, random_k(k, id_latitude, id_longitude), ('last_k_latitude', 'last_k_longitude'))

class last_k(object):
    def __init__(self, k, id_latitude, id_longitude):
        self.k = k
        self.id_latitude = id_latitude
        self.id_longitude = id_longitude
    def __call__(self, data):
        return (numpy.array(at_least_k(self.k, data[self.id_latitude], True, False)[-self.k:],
                            dtype=theano.config.floatX),
                numpy.array(at_least_k(self.k, data[self.id_longitude], True, True)[-self.k:],
                            dtype=theano.config.floatX))
def add_last_k(k, stream):
    id_latitude = stream.sources.index('latitude')
    id_longitude = stream.sources.index('longitude')
    return Mapping(stream, last_k(k, id_latitude, id_longitude), ('last_k_latitude', 'last_k_longitude'))

class destination(object):
    def __init__(self, id_latitude, id_longitude):
        self.id_latitude = id_latitude
        self.id_longitude = id_longitude
    def __call__(self, data):
        return (numpy.array(at_least_k(1, data[self.id_latitude], True, False)[-1],
                            dtype=theano.config.floatX),
                numpy.array(at_least_k(1, data[self.id_longitude], True, True)[-1],
                            dtype=theano.config.floatX))
def add_destination(stream):
    id_latitude = stream.sources.index('latitude')
    id_longitude = stream.sources.index('longitude')
    return Mapping(stream, destination(id_latitude, id_longitude), ('destination_latitude', 'destination_longitude'))


class trip_filter(object):
    def __init__(self, id_trip_id, exclude):
        self.id_trip_id = id_trip_id
        self.exclude = exclude
    def __call__(self, data):
        if data[self.id_trip_id] in self.exclude:
            return False
        else:
            return True
def filter_out_trips(exclude_trips, stream):
    id_trip_id = stream.sources.index('trip_id')
    return Filter(stream, trip_filter(id_trip_id, exclude_trips))