tntn

Theano reimplementation of Neural Tensor Networks (NTN)
git clone https://esimon.eu/repos/tntn.git
Log | Files | Refs | README

model.py (11079B)


      1 #!/usr/bin/env python2
      2 
      3 import numpy
      4 import scipy
      5 import theano
      6 import theano.tensor as T
      7 import theano.sparse as S
      8 
      9 class Relation(object):
     10     """ Relation class.
     11 
     12     This class has four parameters:
     13     W -- a (1,2)-tensor "Slices of Tensor Layer"
     14     V -- a (1,1)-tensor "Standard Layer"
     15     b -- a (1,0)-tensor "Bias"
     16     u -- a (0,1)-tensor "Linear Layer"
     17     """
     18 
     19     def __init__(self, rng, act, n_in, n_hid, tag):
     20         """ Initialise the parameters.
     21 
     22         Keyword arguments:
     23         rng -- numpy.random module for number generation
     24         act -- activation function
     25         n_in -- dimension of the embeddings
     26         n_hid -- size of the hidden layer
     27         tag -- name of the relation for parameter declaration
     28         """
     29 
     30         wbound = numpy.sqrt(6./(n_in**2 + n_hid))
     31         vbound = numpy.sqrt(6./(n_in*2 + n_hid))
     32         ubound = numpy.sqrt(6./(n_hid + 1))
     33 
     34         self.act = act
     35         if act==T.nnet.sigmoid:
     36             wbound, vbound, ubound = (4*i for i in (wbound, vbound, ubound))
     37 
     38         def ip(name, size, bound):
     39             return theano.shared(name=name, value=numpy.asarray(rng.uniform(low=-bound, high=bound, size=size), dtype=theano.config.floatX))
     40 
     41         self.W = ip(tag+".W", (n_in, n_in, n_hid), wbound)
     42         self.V = ip(tag+".V", (n_hid, n_in*2), vbound)
     43         self.u = ip(tag+".u", (n_hid,), ubound)
     44         self.b = theano.shared(name=tag+".b", value=numpy.zeros(shape=(n_hid,), dtype=theano.config.floatX))
     45         self.params = [ self.W, self.V, self.u, self.b ]
     46 
     47     def score(self, inputl, inputr):
     48         """ Compute the score on given embeddings. """
     49         bilinear = ((inputr.transpose().reshape((inputr.shape[1], inputr.shape[0], 1))) * T.tensordot(inputl, self.W, axes=([0], [0]))).sum(1).transpose()
     50         linear = T.dot(self.V, T.concatenate([inputl, inputr]))
     51         bias = self.b.dimshuffle(0, 'x')
     52         return T.dot(self.u, self.act(bilinear + linear + bias))
     53 
     54     def regularizer(self):
     55         """ Compute the squared L2-norm of the relation's parameters. """
     56         return sum(T.sum(x**2) for x in [self.u, self.V, self.W, self.b])
     57     
     58     def contrast(self, posl, posr, negl, negr):
     59         """ Compute the contrast on a given set of valid and corrupted embeddings. """
     60         dist = 1 - self.score(posl, posr) + self.score(negl, negr)
     61         return T.mean((dist>0)*dist)
     62 
     63     def updates(self, cost, learning_rate):
     64         """ Compute the updates to perform w.r.t. a given cost."""
     65         return [ (param, param - learning_rate * T.grad(cost=cost, wrt=param)) for param in self.params ]
     66 
     67 class Embeddings(object):
     68     """ Embeddings matrix class.
     69 
     70     This class has one parameter:
     71     E -- a set of (1,0)-tensor "Embeddings"
     72     """
     73     def __init__(self, rng, number, dimension, tag):
     74         """ Initialise the parameter.
     75 
     76         Keyword arguments:
     77         rng -- numpy.random module for number generation
     78         number -- number of embeddings
     79         dimension -- dimension of the embeddings
     80         tag -- name of the embeddings for parameter declaration
     81         """
     82 
     83         self.number = number
     84         self.dimension = dimension
     85 
     86         Ebound = numpy.sqrt(6. / dimension)
     87         E_values = rng.uniform(low=-Ebound, high=Ebound, size=(dimension, number))
     88         E_values = E_values / numpy.sqrt(numpy.sum(E_values **2, axis=0))
     89         self.E = theano.shared(name=tag, value=numpy.asarray(E_values, dtype=theano.config.floatX))
     90 
     91     def embed(self, entity):
     92         """ Embed an entity. """
     93         return S.dot(self.E, entity)
     94 
     95     def regularizer(self):
     96         """ Compute the squared L2-norm of the embeddings parameter. """
     97         return T.sum(self.E**2)
     98 
     99     def updates(self, cost, learning_rate):
    100         """ Compute the updates to perform w.r.t. a given cost."""
    101         return [(self.E, self.E - learning_rate * T.grad(cost=cost, wrt=self.E))]
    102 
    103 class NTN(object):
    104     """ Neural Tensor Network class.
    105 
    106     This model has two parameters:
    107     E -- the embeddings
    108     R -- the relations
    109     """
    110 
    111     def __init__(self, rng, n_embedding, d_embedding, n_relation, act, n_hid, tag):
    112         """ Initialise the parameters.
    113 
    114         Keyword arguments:
    115         rng -- numpy.random module for number generation
    116         n_embedding -- number of embeddings
    117         d_embedding -- dimension of the embeddings
    118         n_relation -- number of relations
    119         act -- activation function
    120         n_hid -- size of the hidden layer ("number of slices")
    121         tag -- name of the model for parameter declaration
    122         """
    123         self.n_embedding = n_embedding
    124         self.d_embedding = d_embedding
    125         self.n_relation = n_relation
    126 
    127         self.E = Embeddings(rng, n_embedding, d_embedding, tag+".E")
    128         self.R = [ Relation(rng, act, d_embedding, n_hid, tag+".R"+str(r)) for r in xrange(n_relation) ]
    129 
    130     def updates(self, relation, cost, learning_rate):
    131         """ Compute the updates to perform w.r.t. a given cost."""
    132         return self.R[relation].updates(cost, learning_rate) + self.E.updates(cost, learning_rate)
    133 
    134     def train(self, relation, regularization, learning_rate):
    135         """ Construct the training function for a given relation
    136 
    137         Keyword arguments:
    138         relation -- The relation for which the model will be trained.
    139         regularization -- The regularization weight hyperparameter.
    140         learning_rate -- The learning rate hyperparameter.
    141 
    142         Returned Theano function:
    143         (left_positive, right_positive, left_negative, right_negative) -> objective
    144         The four arguments must have the same shape: (self.n_embedding, N) for any N.
    145         """
    146         R = self.R[relation]
    147 
    148         inputs = tuple(S.csc_matrix() for _ in xrange(4))
    149         X = map((lambda var: self.E.embed(var)), inputs)
    150 
    151         objective = R.contrast(*X) + regularization * (self.E.regularizer() + R.regularizer())
    152         updates = self.updates(relation, objective, learning_rate)
    153         return theano.function(inputs=list(inputs), outputs=objective, updates=updates)
    154 
    155     def score(self, relation):
    156         """ Construct the scoring function for a given relation
    157 
    158         Keyword arguments:
    159         relation -- The relation for which the model will be trained.
    160 
    161         Returned Theano function:
    162         (left, right) -> objective
    163         The two arguments must have the same shape: (self.n_embedding, N) for any N.
    164         """
    165         inputs = tuple(S.csc_matrix() for _ in xrange(2))
    166         X = map((lambda var: self.E.embed(var)), inputs)
    167         g = self.R[relation].score(*X)
    168 
    169         return theano.function(inputs=list(inputs), outputs=g)
    170 
    171     def test(self, relation):
    172         """ Construct the testing function for a given relation
    173 
    174         Keyword arguments:
    175         relation -- The relation for which the model will be tested.
    176 
    177         Returned Theano function:
    178         (left, right, Y, threshold) -> score
    179         The first two arguments must have the same shape: (self.n_embedding, N) for any N.
    180         The third argument is the expected result, its shape must be (1, N) for the same N as left and right.
    181         The fourth argument is the threshold at which a relation is considered to hold.
    182         """
    183         entities = (S.csc_matrix(), S.csc_matrix())
    184         X = map((lambda var: self.E.embed(var)), entities)
    185         Y = T.vector()
    186         threshold = T.scalar()
    187         R=self.R[relation]
    188 
    189         error = T.mean(T.neq(R.score(*X) >= threshold, Y))
    190         return theano.function(inputs=list(entities)+[Y, threshold], outputs=error)
    191 
    192 def test_ntn():
    193     n_embedding = 1000
    194     d_embedding = 100
    195     n_hid = 3
    196     learning_rate = 0.1 # FIXME
    197     regularization = 0.0001
    198     n_epoch = 500
    199     n_batches = 10
    200     threshold_precision = 10000
    201     rng=numpy.random
    202     batch_size = (2 * n_embedding) / n_batches
    203 
    204     print '... Constructing dataset'
    205     def rand_embedding(number):
    206         coo_row = rng.permutation(n_embedding)[0:number]
    207         coo_col = range(number)
    208         coo_data = numpy.ones(number, dtype=theano.config.floatX)
    209         randommat = scipy.sparse.coo_matrix((coo_data, (coo_row, coo_col)), shape=(n_embedding, number))
    210         return scipy.sparse.csc_matrix(randommat)
    211 
    212     left = rand_embedding(n_embedding)
    213     right = rand_embedding(n_embedding)
    214 
    215     print '... Building model'
    216     model = NTN(rng, n_embedding, d_embedding, 1, T.tanh, n_hid, "NTN")
    217     train = model.train(0, regularization, learning_rate)
    218     test = model.test(0)
    219     score = model.score(0)
    220 
    221     print '... Training model'
    222     for epoch in xrange(n_epoch):
    223         order = rng.permutation(n_embedding)
    224         left_positive = scipy.sparse.hstack([left[:, order], left[:, order]], dtype=theano.config.floatX, format='csc')
    225         right_positive = scipy.sparse.hstack([right[:, order], right[:, order]], dtype=theano.config.floatX, format='csc')
    226         left_negative = scipy.sparse.hstack([rand_embedding(n_embedding), left[:, order]], dtype=theano.config.floatX, format='csc')
    227         right_negative = scipy.sparse.hstack([right[:, order], rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
    228 
    229         obj=0.
    230         for batch in xrange(n_batches):
    231             lpt = left_positive[:, batch*batch_size:(batch+1)*batch_size]
    232             rpt = right_positive[:, batch*batch_size:(batch+1)*batch_size]
    233             lnt = left_negative[:, batch*batch_size:(batch+1)*batch_size]
    234             rnt = right_negative[:, batch*batch_size:(batch+1)*batch_size]
    235             obj = obj + train(lpt, rpt, lnt, rnt)
    236 
    237         if (epoch+1)%100==0:
    238             print "Epoch", 1+epoch, "/", n_epoch,
    239             print "\tObj: ", obj/n_batches
    240  
    241     print '... Searching threshold'
    242     valid_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
    243     valid_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
    244     valid_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)])
    245 
    246     scores = score(valid_left, valid_right)
    247     min_threshold, max_threshold = min(scores), max(scores)
    248     threshold = min_threshold
    249     threshold_error = 2
    250     for candidate in numpy.linspace(min_threshold, max_threshold, threshold_precision):
    251         error = test(valid_left, valid_right, valid_y, candidate.astype(theano.config.floatX))
    252         if error < threshold_error:
    253             threshold_error = error
    254             threshold = candidate.astype(theano.config.floatX)
    255     print 'Threshold :', threshold
    256 
    257     print '... Testing model'
    258     test_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
    259     test_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
    260     test_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)])
    261     error = test(test_left, test_right, test_y, threshold)
    262     print 'Error :', error
    263 
    264 if __name__ == '__main__':
    265     test_ntn()