embeddings.py (1735B)
1 #!/usr/bin/env python2 2 3 import numpy 4 import theano 5 import theano.tensor as T 6 import theano.sparse as S 7 8 class Embeddings(object): 9 """ Embeddings matrix class. 10 11 This class has one parameter: 12 E -- the "Embeddings" 13 """ 14 def __init__(self, rng, number, dimension, tag): 15 """ Initialise the parameter. 16 17 Keyword arguments: 18 rng -- module for random number generation 19 number -- number of embeddings 20 dimension -- dimension of the embeddings 21 tag -- name of the embeddings for parameter declaration 22 """ 23 24 self.number = number 25 self.dimension = dimension 26 27 E_bound = numpy.sqrt(6. / dimension) 28 E_values = rng.uniform(low=-E_bound, high=E_bound, size=(number, dimension)) 29 E_values = E_values / numpy.sqrt(numpy.sum(E_values **2, axis=1))[:, numpy.newaxis] 30 self.E = theano.shared(name=tag, value=numpy.asarray(E_values, dtype=theano.config.floatX)) 31 32 self.parameters = [self.E] 33 34 def embed(self, entities): 35 """ Embed given entities. 36 37 Keyword arguments: 38 entities -- a sparse matrix of size ('x', self.number) 39 """ 40 return S.dot(entities, self.E) 41 42 def updates(self, cost, learning_rate): 43 """ Compute the updates to perform a SGD step w.r.t. a given cost. 44 45 Keyword arguments: 46 cost -- The cost to optimise. 47 learning_rate -- The learning rate used for gradient descent. 48 """ 49 return [(self.E, self.E - learning_rate * T.grad(cost=cost, wrt=self.E))] 50 51 def normalise_updates(self): 52 """ Normalise the embeddings' L2 norm to 1. """ 53 return [(self.E, self.E / T.sqrt(T.sum(self.E **2, axis=1)).dimshuffle(0, 'x'))]