import theano from theano import tensor from blocks.algorithms import Momentum, AdaDelta from blocks.bricks import Tanh, Softmax, Linear, MLP from blocks.bricks.recurrent import LSTM from blocks.initialization import IsotropicGaussian, Constant from blocks.filter import VariableFilter from blocks.roles import WEIGHT from blocks.graph import ComputationGraph, apply_noise chars_per_seq = 100 seqs_per_epoch = 1 io_dim = 256 hidden_dims = [200, 500] activation_function = Tanh() w_noise_std = 0.01 step_rule = AdaDelta() pt_freq = 1 param_desc = '' # todo class Model(): def __init__(self): inp = tensor.lvector('bytes') in_onehot = tensor.eq(tensor.arange(io_dim, dtype='int16').reshape((1, io_dim)), inp[:, None]) dims = [io_dim] + hidden_dims prev = in_onehot[None, :, :] bricks = [] for i in xrange(1, len(dims)): linear = Linear(input_dim=dims[i-1], output_dim=4*dims[i], name="lstm_in_%d"%i) lstm = LSTM(dim=dims[i], activation=activation_function, name="lstm_rec_%d"%i) prev = lstm.apply(linear.apply(prev))[0] bricks = bricks + [linear, lstm] top_linear = MLP(dims=[hidden_dims[-1], io_dim], activations=[Softmax()], name="pred_mlp") bricks.append(top_linear) out = top_linear.apply(prev.reshape((inp.shape[0], hidden_dims[-1]))) pred = out.argmax(axis=1) cost = Softmax().categorical_cross_entropy(inp[:-1], out[1:]) error_rate = tensor.neq(inp[:-1], pred[1:]).mean() # Initialize for brick in bricks: brick.weights_init = IsotropicGaussian(0.1) brick.biases_init = Constant(0.) brick.initialize() # apply noise cg = ComputationGraph([cost, error_rate]) noise_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, noise_vars, w_noise_std) [cost_reg, error_rate_reg] = cg.outputs self.cost = cost self.error_rate = error_rate self.cost_reg = cost_reg self.error_rate_reg = error_rate_reg self.pred = pred