danstowell@0: danstowell@0: import numpy as np danstowell@0: danstowell@0: import theano danstowell@0: import theano.tensor as T danstowell@0: danstowell@0: from lasagne.layers.base import Layer danstowell@0: danstowell@0: ############################################################################################################### danstowell@0: danstowell@0: class NormalisationLayer(Layer): danstowell@0: """ danstowell@0: This layer applies a simple mean-and-std normalisation to input data. danstowell@0: This allows you to "learn" the mean+std from training data and then apply it "live" to any future incoming data. danstowell@0: danstowell@0: NOTE: the parameters are NOT learnt during training, but must be initialised BEFORE training using the set_normalisation() function. danstowell@0: """ danstowell@0: def __init__(self, incoming, numbins, **kwargs): danstowell@0: "numbins is the number of frequency bins in the spectrograms we're going to be normalising" danstowell@0: super(NormalisationLayer, self).__init__(incoming, **kwargs) danstowell@0: self.numbins = numbins danstowell@0: self._output_shape = None danstowell@0: self.initialised = False danstowell@0: # and for the normalisation, per frequency bin - typically, we "sub" the mean and then "mul" by 1/std (I write this as mul rather than div because often more efficient) danstowell@0: self.normn_sub = theano.shared(np.zeros((1, 1, numbins, 1), dtype=theano.config.floatX), borrow=True, name='norm_sub', broadcastable=(1, 1, 0, 1)) danstowell@0: self.normn_mul = theano.shared(np.ones( (1, 1, numbins, 1), dtype=theano.config.floatX), borrow=True, name='norm_mul', broadcastable=(1, 1, 0, 1)) danstowell@0: # here we're defining a theano func that I can use to "manually" normalise some data if needed as a separate thing danstowell@0: inputdata = T.tensor4('inputdata') danstowell@0: self.transform_some_data = theano.function([inputdata], (inputdata - self.normn_sub) * self.normn_mul) danstowell@0: danstowell@0: def get_output_shape_for(self, input_shape): danstowell@0: return input_shape danstowell@0: danstowell@0: def get_output_for(self, inputdata, **kwargs): danstowell@0: #if not self.initialised: danstowell@0: # print("NormalisationLayer must be initalised with normalisation parameters before training") danstowell@0: return (inputdata - self.normn_sub) * self.normn_mul danstowell@0: danstowell@0: def set_normalisation(self, databatches): danstowell@0: numbins = self.numbins danstowell@0: # we first collapse the data batches, essentially into one very long spectrogram... danstowell@0: #print("databatches.shape: %s" % str(databatches.shape)) danstowell@0: data = np.concatenate(np.vstack(np.vstack(databatches)), axis=-1) danstowell@0: #print("data.shape: %s" % str(data.shape)) danstowell@0: danstowell@0: centre = np.mean(data, axis=1) danstowell@0: self.normn_sub.set_value( centre.astype(theano.config.floatX).reshape((1,1,numbins,1)), borrow=True) danstowell@0: self.normn_mul.set_value(1. / data.std( axis=1).reshape((1,1,-1,1)), borrow=True) danstowell@0: danstowell@0: self.initialised = True danstowell@0: