From 27a0e0949c6ca3f7bd18569a23ddd0e1b3e9a64e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 10 Jul 2015 17:16:20 -0400 Subject: Batch shuffling --- config/dest_mlp_tgtcls_1_cswdtx_batchshuffle.py | 4 ++-- model/mlp.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/config/dest_mlp_tgtcls_1_cswdtx_batchshuffle.py b/config/dest_mlp_tgtcls_1_cswdtx_batchshuffle.py index a4db33c..b816930 100644 --- a/config/dest_mlp_tgtcls_1_cswdtx_batchshuffle.py +++ b/config/dest_mlp_tgtcls_1_cswdtx_batchshuffle.py @@ -23,14 +23,14 @@ dim_embeddings = [ ] dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) -dim_hidden = [1000] +dim_hidden = [500] dim_output = tgtcls.shape[0] embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.1) mlp_biases_init = Constant(0.01) -step_rule = Momentum(learning_rate=0.01, momentum=0.9) +step_rule = Momentum(learning_rate=0.001, momentum=0.99) batch_size = 200 diff --git a/model/mlp.py b/model/mlp.py index 1f53e8c..7d04c82 100644 --- a/model/mlp.py +++ b/model/mlp.py @@ -52,6 +52,12 @@ class FFMLP(Initializable): def predict_inputs(self): return self.inputs +class UniformGenerator(object): + def __init__(self): + self.rng = numpy.random.RandomState(123) + def __call__(self, *args): + return float(self.rng.uniform()) + class Stream(object): def __init__(self, config): self.config = config @@ -69,17 +75,15 @@ class Stream(object): stream = transformers.TaxiExcludeTrips(stream, valid_trips_ids) stream = transformers.TaxiGenerateSplits(stream, max_splits=self.config.max_splits) - stream = transformers.add_destination(stream) - - stream = transformers.taxi_add_datetime(stream) - stream = transformers.taxi_add_first_last_len(stream, self.config.n_begin_end_pts) - stream = transformers.Select(stream, tuple(req_vars)) if hasattr(self.config, 'shuffle_batch_size'): stream = transformers.Batch(stream, iteration_scheme=ConstantScheme(self.config.shuffle_batch_size)) - rng = numpy.random.RandomState(123) - stream = Mapping(stream, SortMapping(lambda x: float(rng.uniform()))) + stream = Mapping(stream, SortMapping(key=UniformGenerator())) stream = Unpack(stream) + + stream = transformers.taxi_add_datetime(stream) + stream = transformers.taxi_add_first_last_len(stream, self.config.n_begin_end_pts) + stream = transformers.Select(stream, tuple(req_vars)) stream = Batch(stream, iteration_scheme=ConstantScheme(self.config.batch_size)) -- cgit v1.2.3