summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--config/__init__.py0
-rw-r--r--config/hpc-lstm-1.py37
-rw-r--r--config/lstm-frigo-irc.py39
-rw-r--r--datastream.py1
-rw-r--r--gentext.py2
-rw-r--r--irc.py173
-rw-r--r--ircext.py4
-rw-r--r--model/__init__.py0
-rw-r--r--model/cchlstm.py (renamed from cchlstm.py)0
-rw-r--r--model/dgsrnn.py (renamed from dgsrnn.py)0
-rw-r--r--model/gfgru.py (renamed from gfgru.py)0
-rw-r--r--model/hpc_lstm.py115
-rw-r--r--model/lstm.py (renamed from lstm.py)107
-rwxr-xr-xtrain.py120
15 files changed, 449 insertions, 151 deletions
diff --git a/.gitignore b/.gitignore
index 3d6982a..9dd37c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
*.pyc
*.swp
data/*
-model_data/*
+params/*
diff --git a/config/__init__.py b/config/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/config/__init__.py
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py
new file mode 100644
index 0000000..e4009d5
--- /dev/null
+++ b/config/hpc-lstm-1.py
@@ -0,0 +1,37 @@
+from blocks.algorithms import AdaDelta, Momentum
+from blocks.bricks import Tanh, Rectifier
+
+from model.hpc_lstm import Model
+
+dataset = 'data/logcompil-2016-03-07.txt'
+io_dim = 256
+
+# An epoch will be composed of 'num_seqs' sequences of len 'seq_len'
+# divided in chunks of lengh 'seq_div_size'
+num_seqs = 100
+seq_len = 2000
+seq_div_size = 100
+
+hidden_dims = [128, 128, 256, 512]
+cost_factors = [1., 1., 1., 1.]
+hidden_q = [0.02, 0.02, 0.05, 0.05]
+activation_function = Tanh()
+
+out_hidden = [512]
+out_hidden_act = [Rectifier]
+
+step_rule = AdaDelta()
+#step_rule = Momentum(learning_rate=0.0001, momentum=0.99)
+
+# parameter saving freq (number of batches)
+monitor_freq = 10
+save_freq = 100
+
+# used for sample generation and IRC mode
+sample_temperature = 0.7 #0.5
+
+# do we want to generate samples at times during training?
+sample_len = 1000
+sample_freq = 100
+sample_init = '\nalex\ttu crois?\n'
+
diff --git a/config/lstm-frigo-irc.py b/config/lstm-frigo-irc.py
new file mode 100644
index 0000000..2d0bf3a
--- /dev/null
+++ b/config/lstm-frigo-irc.py
@@ -0,0 +1,39 @@
+from blocks.algorithms import AdaDelta
+from blocks.bricks import Tanh
+
+from model.lstm import Model
+
+dataset = 'data/logcompil-2016-03-07.txt'
+io_dim = 256
+
+# An epoch will be composed of 'num_seqs' sequences of len 'seq_len'
+# divided in chunks of lengh 'seq_div_size'
+num_seqs = 100
+seq_len = 2000
+seq_div_size = 100
+
+hidden_dims = [1024, 1024, 1024]
+activation_function = Tanh()
+
+i2h_all = True # input to all hidden layers or only first layer
+h2o_all = True # all hiden layers to output or only last layer
+
+w_noise_std = 0.02
+i_dropout = 0.5
+
+l1_reg = 0
+
+step_rule = AdaDelta()
+
+# parameter saving freq (number of batches)
+monitor_freq = 10
+save_freq = 100
+
+# used for sample generation and IRC mode
+sample_temperature = 0.7 #0.5
+
+# do we want to generate samples at times during training?
+sample_len = 1000
+sample_freq = 100
+sample_init = '\nalex\ttu crois?\n'
+
diff --git a/datastream.py b/datastream.py
index 8025945..b7aae25 100644
--- a/datastream.py
+++ b/datastream.py
@@ -38,6 +38,7 @@ class BinaryFileDataset(Dataset):
return os.fstat(self.f.fileno()).st_size
class RandomBlockIterator(IterationScheme):
+ requests_examples=True
def __init__(self, item_range, seq_len, num_seqs_per_epoch, **kwargs):
self.seq_len = seq_len
self.num_seqs = num_seqs_per_epoch
diff --git a/gentext.py b/gentext.py
index b8a27bf..2079602 100644
--- a/gentext.py
+++ b/gentext.py
@@ -46,7 +46,7 @@ class GenText(SimpleExtension):
sys.stdout.write(self.init_text)
while v.shape[1] < self.max_bytes:
prob = prob / 1.00001
- pred = numpy.random.multinomial(1, prob[0, :]).nonzero()[0][0]
+ pred = numpy.random.multinomial(1, prob[0, :]).nonzero()[0][0].astype('int16')
v = numpy.concatenate([v, pred[None, None]], axis=1)
sys.stdout.write(chr(int(pred)))
diff --git a/irc.py b/irc.py
new file mode 100644
index 0000000..f8ca125
--- /dev/null
+++ b/irc.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python2
+
+import logging
+import sys
+import importlib
+
+import theano
+
+from blocks.extensions import Printing, SimpleExtension, FinishAfter
+from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
+
+from blocks.graph import ComputationGraph
+from blocks.main_loop import MainLoop
+from blocks.model import Model
+from blocks.algorithms import GradientDescent
+
+try:
+ from blocks.extras.extensions.plot import Plot
+ plot_avail = False
+except ImportError:
+ plot_avail = False
+
+
+import datastream
+from paramsaveload import SaveLoadParams
+from gentext import GenText
+from ircext import IRCClientExt
+
+logging.basicConfig(level='INFO')
+logger = logging.getLogger(__name__)
+
+sys.setrecursionlimit(500000)
+
+
+class ResetStates(SimpleExtension):
+ def __init__(self, state_vars, **kwargs):
+ super(ResetStates, self).__init__(**kwargs)
+
+ self.f = theano.function(
+ inputs=[], outputs=[],
+ updates=[(v, v.zeros_like()) for v in state_vars])
+
+ def do(self, which_callback, *args):
+ self.f()
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print >> sys.stderr, 'Usage: %s [options] config' % sys.argv[0]
+ sys.exit(1)
+ model_name = sys.argv[-1]
+ config = importlib.import_module('%s' % model_name)
+
+
+ # Build datastream
+ train_stream = datastream.setup_datastream('data/logcompil.txt',
+ config.num_seqs,
+ config.seq_len,
+ config.seq_div_size)
+
+ # Build model
+ m = config.Model()
+ m.pred.name = 'pred'
+
+ # Train the model
+ saveloc = 'model_data/%s-%s' % (model_name, config.param_desc)
+ train_model(m, train_stream, dump_path=saveloc)
+
+
+ # Define the model
+ model = Model(m.sgd_cost)
+
+ # IRC mode : just load the parameters and run an IRC server
+ if '--irc' in sys.argv:
+ try:
+ extensions.append(FinishAfter(before_training=True, after_n_batches=1))
+ print "Initializing main loop"
+ main_loop.run()
+ print "Jumping into IRC"
+ irc.run_forever()
+ except KeyboardInterrupt:
+ pass
+ sys.exit(0)
+
+ # Train the model
+
+ cg = ComputationGraph(m.sgd_cost)
+ algorithm = GradientDescent(cost=m.sgd_cost,
+ step_rule=config.step_rule,
+ parameters=cg.parameters)
+
+ algorithm.add_updates(m.states)
+
+ monitor_vars = [v for p in m.monitor_vars for v in p]
+ extensions = [
+ TrainingDataMonitoring(
+ monitor_vars,
+ prefix='train', every_n_epochs=1),
+ Printing(every_n_epochs=1, after_epoch=False),
+
+ ResetStates([v for v, _ in m.states], after_epoch=True)
+ ]
+ if plot_avail:
+ plot_channels = [['train_' + v.name for v in p] for p in m.monitor_vars]
+ extensions.append(
+ Plot(document='text_'+model_name,
+ channels=plot_channels,
+ server_url='http://localhost:5006',
+ every_n_epochs=1, after_epoch=False)
+ )
+ if config.save_freq is not None and dump_path is not None:
+ extensions.append(
+ SaveLoadParams(path=dump_path+'.pkl',
+ model=model,
+ before_training=True,
+ after_training=True,
+ after_epoch=False,
+ every_n_epochs=config.save_freq)
+ )
+ if config.sample_freq is not None:
+ extensions.append(
+ GenText(m, '\nalex\ttu crois ?\n',
+ config.sample_len, config.sample_temperature,
+ every_n_epochs=config.sample_freq,
+ after_epoch=False, before_training=True)
+ )
+ if config.on_irc:
+ irc = IRCClientExt(m, config.sample_temperature,
+ server='clipper.ens.fr',
+ port=6667,
+ nick='frigo',
+ channels=['#frigotest', '#courssysteme'],
+ after_batch=True)
+ irc.do('before_training')
+ extensions.append(irc)
+
+ if config.on_irc:
+ irc = IRCClientExt(m, config.sample_temperature,
+ server='clipper.ens.fr',
+ port=6667,
+ nick='frigo',
+ channels=['#frigotest', '#courssysteme'],
+ after_batch=True)
+ irc.do('before_training')
+ extensions.append(irc)
+
+ main_loop = MainLoop(
+ model=model,
+ data_stream=train_stream,
+ algorithm=algorithm,
+ extensions=extensions
+ )
+ main_loop.run()
+
+ # IRC mode : just load the parameters and run an IRC server
+ if '--irc' in sys.argv:
+ try:
+ extensions.append(FinishAfter(before_training=True, after_n_batches=1))
+ print "Initializing main loop"
+ main_loop.run()
+ print "Jumping into IRC"
+ irc.run_forever()
+ except KeyboardInterrupt:
+ pass
+ sys.exit(0)
+
+
+
+
+
+
+
+
+# vim: set sts=4 ts=4 sw=4 tw=0 et :
diff --git a/ircext.py b/ircext.py
index 1af2ba8..d8580ad 100644
--- a/ircext.py
+++ b/ircext.py
@@ -125,5 +125,9 @@ class IRCClientExt(SimpleExtension):
def do(self, which_callback, *args):
logger.info('Polling...')
self.irc.reactor.process_once()
+
+ def run_forever(self):
+ self.irc.reactor.process_forever()
+# vim: set sts=4 ts=4 sw=4 tw=0 et :
diff --git a/model/__init__.py b/model/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/model/__init__.py
diff --git a/cchlstm.py b/model/cchlstm.py
index 78c9a1f..78c9a1f 100644
--- a/cchlstm.py
+++ b/model/cchlstm.py
diff --git a/dgsrnn.py b/model/dgsrnn.py
index d6d93ff..d6d93ff 100644
--- a/dgsrnn.py
+++ b/model/dgsrnn.py
diff --git a/gfgru.py b/model/gfgru.py
index 29d4398..29d4398 100644
--- a/gfgru.py
+++ b/model/gfgru.py
diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py
new file mode 100644
index 0000000..8c9cd90
--- /dev/null
+++ b/model/hpc_lstm.py
@@ -0,0 +1,115 @@
+# HPC-LSTM : Hierarchical Predictive Coding LSTM
+
+import theano
+from theano import tensor
+import numpy
+
+from blocks.bricks import Softmax, Tanh, Logistic, Linear, MLP, Identity
+from blocks.bricks.recurrent import LSTM
+from blocks.initialization import IsotropicGaussian, Constant
+
+from blocks.filter import VariableFilter
+from blocks.roles import WEIGHT
+from blocks.graph import ComputationGraph, apply_noise, apply_dropout
+
+
+class Model():
+ def __init__(self, config):
+ inp = tensor.imatrix('bytes')
+
+ in_onehot = tensor.eq(tensor.arange(config.io_dim, dtype='int16').reshape((1, 1, config.io_dim)),
+ inp[:, :, None])
+ in_onehot.name = 'in_onehot'
+
+ bricks = []
+ states = []
+
+ # Construct predictive LSTM hierarchy
+ hidden = []
+ costs = []
+ next_target = in_onehot.dimshuffle(1, 0, 2)
+ for i, (hdim, cf, q) in enumerate(zip(config.hidden_dims, config.cost_factors, config.hidden_q)):
+ init_state = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
+ name='st0_%d'%i)
+ init_cell = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
+ name='cell0_%d'%i)
+
+ linear = Linear(input_dim=config.io_dim, output_dim=4*hdim,
+ name="lstm_in_%d"%i)
+ lstm = LSTM(dim=hdim, activation=config.activation_function,
+ name="lstm_rec_%d"%i)
+ linear2 = Linear(input_dim=hdim, output_dim=config.io_dim, name='lstm_out_%d'%i)
+ tanh = Tanh('lstm_out_tanh_%d'%i)
+ bricks += [linear, lstm, linear2, tanh]
+
+ inter = linear.apply(theano.gradient.disconnected_grad(next_target))
+ new_hidden, new_cells = lstm.apply(inter,
+ states=init_state,
+ cells=init_cell)
+ states.append((init_state, new_hidden[-1, :, :]))
+ states.append((init_cell, new_cells[-1, :, :]))
+
+ hidden += [tensor.concatenate([init_state[None,:,:], new_hidden[:-1,:,:]],axis=0)]
+ pred = tanh.apply(linear2.apply(hidden[-1]))
+ diff = next_target - pred
+ costs += [numpy.float32(cf) * ((abs(next_target)+q)*(diff**2)).sum(axis=2).mean()]
+ next_target = diff
+
+
+ # Construct output from hidden states
+ hidden = [s.dimshuffle(1, 0, 2) for s in hidden]
+
+ out_parts = []
+ out_dims = config.out_hidden + [config.io_dim]
+ for i, (dim, state) in enumerate(zip(config.hidden_dims, hidden)):
+ pred_linear = Linear(input_dim=dim, output_dim=out_dims[0],
+ name='pred_linear_%d'%i)
+ bricks.append(pred_linear)
+ out_parts.append(pred_linear.apply(theano.gradient.disconnected_grad(state)))
+
+ # Do prediction and calculate cost
+ out = sum(out_parts)
+
+ if len(out_dims) > 1:
+ out = config.out_hidden_act[0](name='out_act0').apply(out)
+ mlp = MLP(dims=out_dims,
+ activations=[x(name='out_act%d'%i) for i, x in enumerate(config.out_hidden_act[1:])]
+ +[Identity()],
+ name='out_mlp')
+ bricks.append(mlp)
+ out = mlp.apply(out.reshape((inp.shape[0]*inp.shape[1],-1))).reshape((inp.shape[0],inp.shape[1],-1))
+
+ pred = out.argmax(axis=2)
+
+ cost = Softmax().categorical_cross_entropy(inp.flatten(),
+ out.reshape((inp.shape[0]*inp.shape[1],
+ config.io_dim))).mean()
+ error_rate = tensor.neq(inp.flatten(), pred.flatten()).mean()
+
+ sgd_cost = cost + sum(costs)
+
+ # Initialize all bricks
+ for brick in bricks:
+ brick.weights_init = IsotropicGaussian(0.1)
+ brick.biases_init = Constant(0.)
+ brick.initialize()
+
+
+ # put stuff into self that is usefull for training or extensions
+ self.sgd_cost = sgd_cost
+
+ sgd_cost.name = 'sgd_cost'
+ for i in range(len(costs)):
+ costs[i].name = 'pred_cost_%d'%i
+ cost.name = 'cost'
+ error_rate.name = 'error_rate'
+ self.monitor_vars = [costs, [cost],
+ [error_rate]]
+
+ self.out = out
+ self.pred = pred
+
+ self.states = states
+
+
+# vim: set sts=4 ts=4 sw=4 tw=0 et :
diff --git a/lstm.py b/model/lstm.py
index 1750d58..abd44e0 100644
--- a/lstm.py
+++ b/model/lstm.py
@@ -2,8 +2,7 @@ import theano
from theano import tensor
import numpy
-from blocks.algorithms import Momentum, AdaDelta, RMSProp
-from blocks.bricks import Tanh, Softmax, Linear, MLP
+from blocks.bricks import Softmax, Linear
from blocks.bricks.recurrent import LSTM
from blocks.initialization import IsotropicGaussian, Constant
@@ -11,75 +10,24 @@ from blocks.filter import VariableFilter
from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph, apply_noise, apply_dropout
-# An epoch will be composed of 'num_seqs' sequences of len 'seq_len'
-# divided in chunks of lengh 'seq_div_size'
-num_seqs = 20
-seq_len = 5000
-seq_div_size = 200
-
-io_dim = 256
-
-hidden_dims = [1024, 1024, 1024]
-activation_function = Tanh()
-
-i2h_all = True # input to all hidden layers or only first layer
-h2o_all = True # all hiden layers to output or only last layer
-
-w_noise_std = 0.02
-i_dropout = 0.5
-
-l1_reg = 0
-
-step_rule = 'adadelta'
-learning_rate = 0.1
-momentum = 0.9
-
-
-param_desc = '%s-%sIH,%sHO-n%s-d%s-l1r%s-%dx%d(%d)-%s' % (
- repr(hidden_dims),
- 'all' if i2h_all else 'first',
- 'all' if h2o_all else 'last',
- repr(w_noise_std),
- repr(i_dropout),
- repr(l1_reg),
- num_seqs, seq_len, seq_div_size,
- step_rule
- )
-
-save_freq = 5
-on_irc = True
-
-# parameters for sample generation
-sample_len = 1000
-sample_temperature = 0.7 #0.5
-sample_freq = None
-
-if step_rule == 'rmsprop':
- step_rule = RMSProp()
-elif step_rule == 'adadelta':
- step_rule = AdaDelta()
-elif step_rule == 'momentum':
- step_rule = Momentum(learning_rate=learning_rate, momentum=momentum)
-else:
- assert(False)
class Model():
- def __init__(self):
- inp = tensor.lmatrix('bytes')
+ def __init__(self, config):
+ inp = tensor.imatrix('bytes')
- in_onehot = tensor.eq(tensor.arange(io_dim, dtype='int16').reshape((1, 1, io_dim)),
+ in_onehot = tensor.eq(tensor.arange(config.io_dim, dtype='int16').reshape((1, 1, config.io_dim)),
inp[:, :, None])
in_onehot.name = 'in_onehot'
# Construct hidden states
- dims = [io_dim] + hidden_dims
+ dims = [config.io_dim] + config.hidden_dims
hidden = [in_onehot.dimshuffle(1, 0, 2)]
bricks = []
states = []
for i in xrange(1, len(dims)):
- init_state = theano.shared(numpy.zeros((num_seqs, dims[i])).astype(theano.config.floatX),
+ init_state = theano.shared(numpy.zeros((config.num_seqs, dims[i])).astype(theano.config.floatX),
name='st0_%d'%i)
- init_cell = theano.shared(numpy.zeros((num_seqs, dims[i])).astype(theano.config.floatX),
+ init_cell = theano.shared(numpy.zeros((config.num_seqs, dims[i])).astype(theano.config.floatX),
name='cell0_%d'%i)
linear = Linear(input_dim=dims[i-1], output_dim=4*dims[i],
@@ -87,14 +35,14 @@ class Model():
bricks.append(linear)
inter = linear.apply(hidden[-1])
- if i2h_all and i > 1:
+ if config.i2h_all and i > 1:
linear2 = Linear(input_dim=dims[0], output_dim=4*dims[i],
name="lstm_in0_%d"%i)
bricks.append(linear2)
inter = inter + linear2.apply(hidden[0])
inter.name = 'inter_bis_%d'%i
- lstm = LSTM(dim=dims[i], activation=activation_function,
+ lstm = LSTM(dim=dims[i], activation=config.activation_function,
name="lstm_rec_%d"%i)
bricks.append(lstm)
@@ -111,10 +59,10 @@ class Model():
# Construct output from hidden states
out = None
layers = zip(dims, hidden)[1:]
- if not h2o_all:
+ if not config.h2o_all:
layers = [layers[-1]]
for i, (dim, state) in enumerate(layers):
- top_linear = Linear(input_dim=dim, output_dim=io_dim,
+ top_linear = Linear(input_dim=dim, output_dim=config.io_dim,
name='top_linear_%d'%i)
bricks.append(top_linear)
out_i = top_linear.apply(state)
@@ -126,7 +74,7 @@ class Model():
cost = Softmax().categorical_cross_entropy(inp[:, 1:].flatten(),
out[:, :-1, :].reshape((inp.shape[0]*(inp.shape[1]-1),
- io_dim)))
+ config.io_dim))).mean()
error_rate = tensor.neq(inp[:, 1:].flatten(), pred[:, :-1].flatten()).mean()
# Initialize all bricks
@@ -137,24 +85,35 @@ class Model():
# Apply noise and dropout
cg = ComputationGraph([cost, error_rate])
- if w_noise_std > 0:
+ if config.w_noise_std > 0:
noise_vars = VariableFilter(roles=[WEIGHT])(cg)
- cg = apply_noise(cg, noise_vars, w_noise_std)
- if i_dropout > 0:
- cg = apply_dropout(cg, hidden[1:], i_dropout)
+ cg = apply_noise(cg, noise_vars, config.w_noise_std)
+ if config.i_dropout > 0:
+ cg = apply_dropout(cg, hidden[1:], config.i_dropout)
[cost_reg, error_rate_reg] = cg.outputs
# add l1 regularization
- if l1_reg > 0:
+ if config.l1_reg > 0:
l1pen = sum(abs(st).mean() for st in hidden[1:])
- cost_reg = cost_reg + l1_reg * l1pen
+ cost_reg = cost_reg + config.l1_reg * l1pen
+
+ cost_reg += 1e-10 # so that it is not the same Theano variable
+ error_rate_reg += 1e-10
+
+ # put stuff into self that is usefull for training or extensions
+ self.sgd_cost = cost_reg
+
+ cost.name = 'cost'
+ cost_reg.name = 'cost_reg'
+ error_rate.name = 'error_rate'
+ error_rate_reg.name = 'error_rate_reg'
+ self.monitor_vars = [[cost, cost_reg],
+ [error_rate, error_rate_reg]]
- self.cost = cost
- self.error_rate = error_rate
- self.cost_reg = cost_reg
- self.error_rate_reg = error_rate_reg
self.out = out
self.pred = pred
self.states = states
+
+# vim: set sts=4 ts=4 sw=4 tw=0 et :
diff --git a/train.py b/train.py
index 58bff1e..09555f2 100755
--- a/train.py
+++ b/train.py
@@ -1,58 +1,37 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
import logging
-import numpy
import sys
import importlib
-from contextlib import closing
+logging.basicConfig(level='INFO')
+logger = logging.getLogger(__name__)
import theano
-from theano import tensor
-from theano.tensor.shared_randomstreams import RandomStreams
-from blocks.serialization import load_parameter_values, secure_dump, BRICK_DELIMITER
-from blocks.extensions import Printing, SimpleExtension
+from blocks.extensions import Printing, SimpleExtension, FinishAfter, ProgressBar
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
-from blocks.extensions.saveload import Checkpoint, Load
+
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.model import Model
-from blocks.algorithms import GradientDescent, StepRule, CompositeRule
+from blocks.algorithms import GradientDescent
try:
from blocks.extras.extensions.plot import Plot
plot_avail = True
except ImportError:
plot_avail = False
+ logger.warning('Plotting extension not available')
+
import datastream
from paramsaveload import SaveLoadParams
from gentext import GenText
-from ircext import IRCClientExt
-logging.basicConfig(level='INFO')
-logger = logging.getLogger(__name__)
sys.setrecursionlimit(500000)
-if __name__ == "__main__":
- if len(sys.argv) != 2:
- print >> sys.stderr, 'Usage: %s config' % sys.argv[0]
- sys.exit(1)
- model_name = sys.argv[1]
- config = importlib.import_module('%s' % model_name)
-
-
-class ElementwiseRemoveNotFinite(StepRule):
- def __init__(self, scaler=0.1):
- self.scaler = scaler
-
- def compute_step(self, param, previous_step):
- not_finite = tensor.isnan(previous_step) + tensor.isinf(previous_step)
- step = tensor.switch(not_finite, self.scaler * param, previous_step)
-
- return step, []
class ResetStates(SimpleExtension):
def __init__(self, state_vars, **kwargs):
@@ -65,84 +44,75 @@ class ResetStates(SimpleExtension):
def do(self, which_callback, *args):
self.f()
-def train_model(m, train_stream, dump_path=None):
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print >> sys.stderr, 'Usage: %s [options] config' % sys.argv[0]
+ sys.exit(1)
+ model_name = sys.argv[-1]
+ config = importlib.import_module('.%s' % model_name, 'config')
+
+ # Build datastream
+ train_stream = datastream.setup_datastream(config.dataset,
+ config.num_seqs,
+ config.seq_len,
+ config.seq_div_size)
- # Define the model
- model = Model(m.sgd_cost)
+ # Build model
+ m = config.Model(config)
- cg = ComputationGraph(m.sgd_cost)
+ # Train the model
+ cg = Model(m.sgd_cost)
algorithm = GradientDescent(cost=m.sgd_cost,
- step_rule=CompositeRule([
- ElementwiseRemoveNotFinite(),
- config.step_rule]),
+ step_rule=config.step_rule,
parameters=cg.parameters)
algorithm.add_updates(m.states)
- monitor_vars = [v for p in m.monitor_vars for v in p]
+ monitor_vars = list(set(v for p in m.monitor_vars for v in p))
extensions = [
TrainingDataMonitoring(
monitor_vars,
- prefix='train', every_n_epochs=1),
- Printing(every_n_epochs=1, after_epoch=False),
+ prefix='train', every_n_batches=config.monitor_freq),
+ Printing(every_n_batches=config.monitor_freq, after_epoch=False),
+ ProgressBar(),
ResetStates([v for v, _ in m.states], after_epoch=True)
]
if plot_avail:
plot_channels = [['train_' + v.name for v in p] for p in m.monitor_vars]
extensions.append(
- Plot(document='text_'+model_name+'_'+config.param_desc,
+ Plot(document='text_'+model_name,
channels=plot_channels,
- server_url='http://eos6:5006/',
- every_n_epochs=1, after_epoch=False)
+ # server_url='http://localhost:5006',
+ every_n_batches=config.monitor_freq)
)
- if config.save_freq is not None and dump_path is not None:
+
+ if config.save_freq is not None and not '--nosave' in sys.argv:
extensions.append(
- SaveLoadParams(path=dump_path+'.pkl',
- model=model,
- before_training=True,
+ SaveLoadParams(path='params/%s.pkl'%model_name,
+ model=cg,
+ before_training=(not '--noload' in sys.argv),
after_training=True,
- after_epoch=False,
- every_n_epochs=config.save_freq)
+ every_n_batches=config.save_freq)
)
+
if config.sample_freq is not None:
extensions.append(
- GenText(m, '\nalex\ttu crois ?\n',
+ GenText(m, config.sample_init,
config.sample_len, config.sample_temperature,
- every_n_epochs=config.sample_freq,
- after_epoch=False, before_training=True)
- )
- if config.on_irc:
- extensions.append(
- IRCClientExt(m, config.sample_temperature,
- server='irc.ulminfo.fr',
- port=6667,
- nick='frigo',
- channels=['#frigotest', '#courssysteme'],
- after_batch=True)
+ before_training=True,
+ every_n_batches=config.sample_freq)
)
main_loop = MainLoop(
- model=model,
+ model=cg,
data_stream=train_stream,
algorithm=algorithm,
extensions=extensions
)
main_loop.run()
+ main_loop.profile.report()
-if __name__ == "__main__":
- # Build datastream
- train_stream = datastream.setup_datastream('data/logcompil.txt',
- config.num_seqs,
- config.seq_len,
- config.seq_div_size)
-
- # Build model
- m = config.Model()
- m.pred.name = 'pred'
-
- # Train the model
- saveloc = 'model_data/%s-%s' % (model_name, config.param_desc)
- train_model(m, train_stream, dump_path=saveloc)
+# vim: set sts=4 ts=4 sw=4 tw=0 et :