aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--model.py7
-rw-r--r--transformers.py67
3 files changed, 52 insertions, 25 deletions
diff --git a/.gitignore b/.gitignore
index 0d15c6e..abfc4f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,3 +63,6 @@ target/
# Random datafiles
*.csv
+
+# saved params
+taxi_model/*
diff --git a/model.py b/model.py
index c5c75d3..c89809f 100644
--- a/model.py
+++ b/model.py
@@ -30,7 +30,7 @@ from blocks.algorithms import GradientDescent, Scale, AdaDelta, Momentum
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.extensions import Printing
-from blocks.extensions.saveload import Dump, LoadFromDump
+from blocks.extensions.saveload import Dump, LoadFromDump, Checkpoint
from blocks.extensions.monitoring import DataStreamMonitoring
import data
@@ -149,8 +149,9 @@ def main():
prefix='valid',
every_n_batches=1000),
Printing(every_n_batches=1000),
- # Dump('taxi_model', every_n_batches=100),
- # LoadFromDump('taxi_model'),
+ # Checkpoint('model.pkl', every_n_batches=100),
+ Dump('taxi_model', every_n_batches=100),
+ LoadFromDump('taxi_model'),
]
main_loop = MainLoop(
diff --git a/transformers.py b/transformers.py
index c5b8d87..5ad9a87 100644
--- a/transformers.py
+++ b/transformers.py
@@ -26,41 +26,64 @@ class Select(Transformer):
raise ValueError
data=next(self.child_epoch_iterator)
return [data[id] for id in self.ids]
+
+class first_k(object):
+ def __init__(self, k, id_latitude, id_longitude):
+ self.k = k
+ self.id_latitude = id_latitude
+ self.id_longitude = id_longitude
+ def __call__(self, data):
+ return (numpy.array(at_least_k(self.k, data[self.id_latitude], False, False)[:self.k],
+ dtype=theano.config.floatX),
+ numpy.array(at_least_k(self.k, data[self.id_longitude], False, True)[:self.k],
+ dtype=theano.config.floatX))
def add_first_k(k, stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
- return Mapping(stream,
- lambda data:
- (numpy.array(at_least_k(k, data[id_latitude], False, False)[:k], dtype=theano.config.floatX),
- numpy.array(at_least_k(k, data[id_longitude], False, True)[:k], dtype=theano.config.floatX)),
- ('first_k_latitude', 'first_k_longitude'))
+ return Mapping(stream, first_k(k, id_latitude, id_longitude), ('first_k_latitude', 'first_k_longitude'))
+class random_k(object):
+ def __init__(self, k, id_latitude, id_longitude):
+ self.k = k
+ self.id_latitude = id_latitude
+ self.id_longitude = id_longitude
+ def __call__(self, x):
+ lat = at_least_k(self.k, x[self.id_latitude], True, False)
+ lon = at_least_k(self.k, x[self.id_longitude], True, True)
+ loc = random.randrange(len(lat)-self.k+1)
+ return (numpy.array(lat[loc:loc+self.k], dtype=theano.config.floatX),
+ numpy.array(lon[loc:loc+self.k], dtype=theano.config.floatX))
def add_random_k(k, stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
- def random_k(x):
- lat = at_least_k(k, x[id_latitude], True, False)
- lon = at_least_k(k, x[id_longitude], True, True)
- loc = random.randrange(len(lat)-k+1)
- return (numpy.array(lat[loc:loc+k], dtype=theano.config.floatX),
- numpy.array(lon[loc:loc+k], dtype=theano.config.floatX))
- return Mapping(stream, random_k, ('last_k_latitude', 'last_k_longitude'))
+ return Mapping(stream, random_k(k, id_latitude, id_longitude), ('last_k_latitude', 'last_k_longitude'))
+class last_k(object):
+ def __init__(self, k, id_latitude, id_longitude):
+ self.k = k
+ self.id_latitude = id_latitude
+ self.id_longitude = id_longitude
+ def __call__(self, data):
+ return (numpy.array(at_least_k(self.k, data[self.id_latitude], True, False)[-self.k:],
+ dtype=theano.config.floatX),
+ numpy.array(at_least_k(self.k, data[self.id_longitude], True, True)[-self.k:],
+ dtype=theano.config.floatX))
def add_last_k(k, stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
- return Mapping(stream,
- lambda data:
- (numpy.array(at_least_k(k, data[id_latitude], True, False)[-k:], dtype=theano.config.floatX),
- numpy.array(at_least_k(k, data[id_longitude], True, True)[-k:], dtype=theano.config.floatX)),
- ('last_k_latitude', 'last_k_longitude'))
+ return Mapping(stream, last_k(k, id_latitude, id_longitude), ('last_k_latitude', 'last_k_longitude'))
+class destination(object):
+ def __init__(self, id_latitude, id_longitude):
+ self.id_latitude = id_latitude
+ self.id_longitude = id_longitude
+ def __call__(self, data):
+ return (numpy.array(at_least_k(1, data[self.id_latitude], True, False)[-1],
+ dtype=theano.config.floatX),
+ numpy.array(at_least_k(1, data[self.id_longitude], True, True)[-1],
+ dtype=theano.config.floatX))
def add_destination(stream):
id_latitude = stream.sources.index('latitude')
id_longitude = stream.sources.index('longitude')
- return Mapping(stream,
- lambda data:
- (numpy.array(at_least_k(1, data[id_latitude], True, False)[-1], dtype=theano.config.floatX),
- numpy.array(at_least_k(1, data[id_longitude], True, True)[-1], dtype=theano.config.floatX)),
- ('destination_latitude', 'destination_longitude'))
+ return Mapping(stream, destination(id_latitude, id_longitude), ('destination_latitude', 'destination_longitude'))