diff options
author | Alex Auvolat <alex@adnab.me> | 2016-04-26 16:33:02 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2016-04-26 16:33:02 +0200 |
commit | b5584610a14578d0f3ebf9eea3067a0284f67288 (patch) | |
tree | cc23207cf82a6ee0d0bc121d1f0f6bd6e2a8e531 | |
parent | 760587b5d9771257160fac216dfcfff852de3ccc (diff) | |
download | text-rnn-b5584610a14578d0f3ebf9eea3067a0284f67288.tar.gz text-rnn-b5584610a14578d0f3ebf9eea3067a0284f67288.zip |
Xreg hyperparameter change
-rw-r--r-- | REMARKS | 1 | ||||
-rw-r--r-- | config/lstm-xreg-relu.py | 48 | ||||
-rw-r--r-- | config/lstm-xreg.py | 4 | ||||
-rw-r--r-- | model/lstm.py | 6 |
4 files changed, 55 insertions, 4 deletions
@@ -0,0 +1 @@ +- lstm-xreg-relu : does not converge at all, cost is stuck around 3.6 and error rate is 86% diff --git a/config/lstm-xreg-relu.py b/config/lstm-xreg-relu.py new file mode 100644 index 0000000..7697558 --- /dev/null +++ b/config/lstm-xreg-relu.py @@ -0,0 +1,48 @@ +from blocks.algorithms import AdaDelta +from blocks.bricks import Tanh, Rectifier + +from model.lstm import Model + +dataset = 'data/logcompil.txt' +io_dim = 256 + +# An epoch will be composed of 'num_seqs' sequences of len 'seq_len' +# divided in chunks of lengh 'seq_div_size' +num_seqs = 50 +seq_len = 5000 +seq_div_size = 200 + +layers = [ + {'dim': 1024, + 'xreg': (768, 0.1, 50, 5, 5) + }, + {'dim': 1024, + 'xreg': (768, 0.1, 50, 5, 5) + }, + {'dim': 1024, + }, +] +activation_function = Rectifier() + +i2h_all = True # input to all hidden layers or only first layer +h2o_all = True # all hiden layers to output or only last layer + +w_noise_std = 0.02 +i_dropout = 0.5 + +l1_reg = 0 + +step_rule = AdaDelta() + +# parameter saving freq (number of batches) +monitor_freq = 100 +save_freq = 100 + +# used for sample generation and IRC mode +sample_temperature = 0.7 #0.5 + +# do we want to generate samples at times during training? +sample_len = 1000 +sample_freq = 100 +sample_init = '\nalex\ttu crois?\n' + diff --git a/config/lstm-xreg.py b/config/lstm-xreg.py index 66f7c51..f8c5094 100644 --- a/config/lstm-xreg.py +++ b/config/lstm-xreg.py @@ -14,10 +14,10 @@ seq_div_size = 200 layers = [ {'dim': 1024, - 'xreg': (768, 0.1, 10, 10, 6) + 'xreg': (768, 0.1, 10, 10, 10, 2) }, {'dim': 1024, - 'xreg': (768, 0.1, 10, 10, 6) + 'xreg': (768, 0.1, 10, 10, 10, 5) }, {'dim': 1024, }, diff --git a/model/lstm.py b/model/lstm.py index d928c88..10b090c 100644 --- a/model/lstm.py +++ b/model/lstm.py @@ -57,14 +57,16 @@ class Model(): states.append((init_cell, new_cells[-1, :, :])) if 'xreg' in p and p['xreg'] is not None: - n, s, w1, w2, w3 = p['xreg'] + n, s, w1, w2, w3, w4 = p['xreg'] cost_x1 = w1 * ((new_hidden.mean(axis=2) - s)**2).mean() cost_x2 = w2 * ((new_hidden.mean(axis=(0,1)) - s)**2).mean() cost_x3 = -w3 * abs(new_hidden - s).mean() + cost_x4 = w4 * abs(new_hidden[:-1,:,:]-new_hidden[1:,:,:]).mean() cost_x1.name = 'cost_x1_%d'%i cost_x2.name = 'cost_x2_%d'%i cost_x3.name = 'cost_x3_%d'%i - costs_xreg += [cost_x1, cost_x2, cost_x3] + cost_x4.name = 'cost_x4_%d'%i + costs_xreg += [cost_x1, cost_x2, cost_x3, cost_x4] dims.append(p['dim']) hidden.append(new_hidden) |