diff options
author | Thomas Mesnard <thomas.mesnard@ens.fr> | 2015-12-28 20:35:38 +0100 |
---|---|---|
committer | Thomas Mesnard <thomas.mesnard@ens.fr> | 2015-12-28 20:35:38 +0100 |
commit | e8e37dee0c5c846b1aa2dd24dc99095191f72a9b (patch) | |
tree | d033f04eaca8178ada7ee966c4d8e56df45a6ace | |
parent | c9ba2abc7172b4657216e0fcc638098060d7f753 (diff) | |
download | pgm-ctc-e8e37dee0c5c846b1aa2dd24dc99095191f72a9b.tar.gz pgm-ctc-e8e37dee0c5c846b1aa2dd24dc99095191f72a9b.zip |
Kind of works
-rw-r--r-- | ctc.py | 23 | ||||
-rw-r--r-- | edit_distance.py | 24 | ||||
-rw-r--r-- | main.py | 31 |
3 files changed, 57 insertions, 21 deletions
@@ -28,14 +28,14 @@ class CTC(Brick): - Return the probability found at the end of that sequence """ T = probs.shape[0] + B = probs.shape[1] C = probs.shape[2]-1 L = l.shape[0] S = 2*L+1 - B = l.shape[1] # l_blk = l with interleaved blanks l_blk = C * tensor.ones((S, B), dtype='int32') - l_blk = tensor.set_subtensor(l_blk[1::2,:],l) + l_blk = tensor.set_subtensor(l_blk[1::2,:], l) l_blk = l_blk.T # now l_blk is B x S # dimension of alpha (corresponds to alpha hat in the paper) : @@ -43,13 +43,10 @@ class CTC(Brick): # dimension of c : # T x B # first value of alpha (size B x S) - alpha0 = tensor.concatenate([ - probs[0, :, C][:,None], - probs[0][tensor.arange(B), l[0]][:,None], - tensor.zeros((B, S-2)) + alpha0 = tensor.concatenate([ tensor.ones((B, 1)), + tensor.zeros((B, S-1)) ], axis=1) - c0 = alpha0.sum(axis=1) - alpha0 = alpha0 / c0[:,None] + c0 = tensor.ones((B,)) # recursion l_blk_2 = tensor.concatenate([-tensor.ones((B,2)), l_blk[:,:-2]], axis=1) @@ -76,8 +73,11 @@ class CTC(Brick): sequences=[probs, probs_mask], outputs_info=[alpha0, c0]) + prob = tensor.log(c).sum(axis=0) + tensor.log(alpha[-1][tensor.arange(B), 2*l_len.astype('int32')-1] + + alpha[-1][tensor.arange(B), 2*l_len.astype('int32')]) + # return the log probability of the labellings - return tensor.log(c).sum(axis=0) + return -prob def best_path_decoding(self, probs, probs_mask=None): @@ -89,7 +89,8 @@ class CTC(Brick): maxprob = probs.argmax(axis=2) is_double = tensor.eq(maxprob[:-1], maxprob[1:]) maxprob = tensor.switch(tensor.concatenate([tensor.zeros((1,B)), is_double]), - maxprob, C*tensor.ones_like(maxprob)) + C*tensor.ones_like(maxprob), maxprob) + # maxprob = theano.printing.Print('maxprob')(maxprob.T).T # returns two values : # label : (T x) T x B @@ -105,7 +106,7 @@ class CTC(Brick): [label_length, label], _ = scan(fn=recursion, sequences=[maxprob, probs_mask], - outputs_info=[tensor.zeros((B,),dtype='int32'),tensor.zeros((T,B))]) + outputs_info=[tensor.zeros((B,),dtype='int32'),-tensor.ones((T,B))]) return label[-1], label_length[-1] diff --git a/edit_distance.py b/edit_distance.py new file mode 100644 index 0000000..d76cc00 --- /dev/null +++ b/edit_distance.py @@ -0,0 +1,24 @@ +import numpy +import theano +from theano import tensor + +@theano.compile.ops.as_op(itypes=[tensor.imatrix, tensor.ivector, tensor.imatrix, tensor.ivector], + otypes=[tensor.ivector]) +def batch_edit_distance(a, a_len, b, b_len): + B = a.shape[0] + assert b.shape[0] == B + + q = max(a.shape[1], b.shape[1]) * numpy.ones((B, a.shape[1]+1, b.shape[1]+1), dtype='int32') + q[:, 0, 0] = 0 + + for i in range(a.shape[1]+1): + for j in range(b.shape[1]+1): + if i > 0: + q[:, i, j] = numpy.minimum(q[:, i, j], q[:, i-1, j]+1) + if j > 0: + q[:, i, j] = numpy.minimum(q[:, i, j], q[:, i, j-1]+1) + if i > 0 and j > 0: + q[:, i, j] = numpy.minimum(q[:, i, j], q[:, i-1, j-1]+numpy.not_equal(a[:, i-1], b[:, j-1])) + return q[numpy.arange(B), a_len, b_len] + +# vim: set sts=4 ts=4 sw=4 tw=0 et : @@ -8,7 +8,7 @@ from ctc import CTC from blocks.initialization import IsotropicGaussian, Constant from fuel.datasets import IterableDataset from fuel.streams import DataStream -from blocks.algorithms import (GradientDescent, Scale, +from blocks.algorithms import (GradientDescent, Scale, AdaDelta, RemoveNotFinite, StepClipping, CompositeRule) from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring from blocks.main_loop import MainLoop @@ -18,10 +18,12 @@ from blocks.graph import ComputationGraph from dummy_dataset import setup_datastream +from edit_distance import batch_edit_distance + floatX = theano.config.floatX -n_epochs = 200 +n_epochs = 10000 num_input_classes = 5 h_dim = 20 rec_dim = 20 @@ -63,6 +65,10 @@ y_hat = tensor.nnet.softmax( ).reshape((y_hat_pre.shape[0], y_hat_pre.shape[1], -1)) y_hat.name = 'y_hat' +#y_hat = theano.printing.Print('y_hat')(y_hat) +#y = theano.printing.Print('y')(y) +#y_mask = theano.printing.Print('y_mask')(y_mask) + y_hat_mask = input_mask # Cost @@ -71,6 +77,10 @@ cost = CTC().apply(y, y_hat, y_len, y_hat_mask).mean() cost.name = 'CTC' dl, dl_length = CTC().best_path_decoding(y_hat, y_hat_mask) + +edit_distance = batch_edit_distance(dl.T.astype('int32'), dl_length, y.T.astype('int32'), y_len.astype('int32')).mean() +edit_distance.name = 'edit_distance' + L = y.shape[0] B = y.shape[1] dl = dl[:L, :] @@ -80,6 +90,7 @@ is_error = tensor.switch(is_error.sum(axis=0), tensor.ones((B,)), tensor.neq(y_l error_rate = is_error.mean() error_rate.name = 'error_rate' + # Initialization for brick in [input_to_h, pre_lstm, lstm, rec_to_o]: brick.weights_init = IsotropicGaussian(0.01) @@ -87,23 +98,23 @@ for brick in [input_to_h, pre_lstm, lstm, rec_to_o]: brick.initialize() print('Bulding DataStream ...') -ds, stream = setup_datastream(batch_size=10, - nb_examples=1000, rng_seed=123, - min_out_len=10, max_out_len=20) -valid_ds, valid_stream = setup_datastream(batch_size=10, +ds, stream = setup_datastream(batch_size=100, + nb_examples=10000, rng_seed=123, + min_out_len=5, max_out_len=10) +valid_ds, valid_stream = setup_datastream(batch_size=100, nb_examples=1000, rng_seed=456, - min_out_len=10, max_out_len=20) + min_out_len=5, max_out_len=10) print('Bulding training process...') algorithm = GradientDescent(cost=cost, parameters=ComputationGraph(cost).parameters, - step_rule=CompositeRule([StepClipping(10.0), - Scale(0.02)])) + step_rule=CompositeRule([RemoveNotFinite(), AdaDelta()])) + # CompositeRule([StepClipping(10.0), Scale(0.02)])) monitor_cost = TrainingDataMonitoring([cost, error_rate], prefix="train", after_epoch=True) -monitor_valid = DataStreamMonitoring([cost, error_rate], +monitor_valid = DataStreamMonitoring([cost, error_rate, edit_distance], data_stream=valid_stream, prefix="valid", after_epoch=True) |