commit 6ccff23ecf2f6f601ea69d3fef7eec1821e74a07
parent 1bf3985b3db97221b126f3e9a070232ef3a138f3
Author: Étienne Simon <esimon@esimon.eu>
Date:   Mon,  7 Apr 2014 15:39:36 +0200
NTN model with SGD on dummy data
Diffstat:
| A | model.py |  |  | 263 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
1 file changed, 263 insertions(+), 0 deletions(-)
diff --git a/model.py b/model.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python2
+import numpy
+import scipy
+import theano
+import theano.tensor as T
+import theano.sparse as S
+
+class Relation(object):
+    """ Relation class.
+
+    This class has four parameters:
+    W -- a (1,2)-tensor "Slices of Tensor Layer"
+    V -- a (1,1)-tensor "Standard Layer"
+    b -- a (1,0)-tensor "Bias"
+    u -- a (0,1)-tensor "Linear Layer"
+    """
+
+    def __init__(self, rng, act, n_in, n_hid, tag):
+        """ Initialise the parameters.
+
+        Keyword arguments:
+        rng -- numpy.random module for number generation
+        act -- activation function
+        n_in -- dimension of the embeddings
+        n_hid -- size of the hidden layer
+        tag -- name of the relation for parameter declaration
+        """
+
+        wbound = numpy.sqrt(6./(n_in**2 + n_hid))
+        vbound = numpy.sqrt(6./(n_in*2 + n_hid))
+        ubound = numpy.sqrt(6./(n_hid + 1))
+
+        self.act = act
+        if act==T.nnet.sigmoid:
+            wbound, vbound, ubound = (4*i for i in (wbound, vbound, ubound))
+
+        def ip(name, size, bound):
+            return theano.shared(name=name, value=numpy.asarray(rng.uniform(low=-bound, high=bound, size=size), dtype=theano.config.floatX))
+
+        self.W = ip(tag+".W", (n_in, n_in, n_hid), wbound)
+        self.V = ip(tag+".V", (n_hid, n_in*2), vbound)
+        self.u = ip(tag+".u", (n_hid,), ubound)
+        self.b = theano.shared(name=tag+".b", value=numpy.zeros(shape=(n_hid,1), dtype=theano.config.floatX), broadcastable=[False, True])
+        self.params = [ self.W, self.V, self.u, self.b ]
+
+    def score(self, inputl, inputr):
+        """ Compute the score on given embeddings. """
+        bilinear = ((inputr.transpose().reshape((inputr.shape[1], inputr.shape[0], 1))) * T.tensordot(inputl, self.W, axes=([0], [0]))).sum(1).transpose()
+        linear = T.dot(self.V, T.concatenate([inputl, inputr]))
+        return T.dot(self.u, self.act(bilinear + linear + self.b))
+
+    def regularizer(self):
+        """ Compute the squared L2-norm of the relation's parameters. """
+        return sum(T.sum(x**2) for x in [self.u, self.V, self.W, self.b])
+    
+    def contrast(self, posl, posr, negl, negr):
+        """ Compute the contrast on a given set of valid and corrupted embeddings. """
+        dist = 1 - self.score(posl, posr) + self.score(negl, negr)
+        return T.mean((dist>0)*dist)
+
+    def updates(self, cost, learning_rate):
+        """ Compute the updates to perform w.r.t. a given cost."""
+        return [ (param, param - learning_rate * T.grad(cost=cost, wrt=param)) for param in self.params ]
+
+class Embeddings(object):
+    """ Embeddings matrix class.
+
+    This class has one parameter:
+    E -- a set of (1,0)-tensor "Embeddings"
+    """
+    def __init__(self, rng, number, dimension, tag):
+        """ Initialise the parameter.
+
+        Keyword arguments:
+        rng -- numpy.random module for number generation
+        number -- number of embeddings
+        dimension -- dimension of the embeddings
+        tag -- name of the embeddings for parameter declaration
+        """
+
+        self.number = number
+        self.dimension = dimension
+
+        Ebound = numpy.sqrt(6. / dimension)
+        E_values = rng.uniform(low=-Ebound, high=Ebound, size=(dimension, number))
+        E_values = E_values / numpy.sqrt(numpy.sum(E_values **2, axis=0))
+        self.E = theano.shared(name=tag, value=numpy.asarray(E_values, dtype=theano.config.floatX))
+
+    def embed(self, entity):
+        """ Embed an entity. """
+        return S.dot(self.E, entity)
+
+    def regularizer(self):
+        """ Compute the squared L2-norm of the embeddings parameter. """
+        return T.sum(self.E**2)
+
+    def updates(self, cost, learning_rate):
+        """ Compute the updates to perform w.r.t. a given cost."""
+        return [(self.E, self.E - learning_rate * T.grad(cost=cost, wrt=self.E))]
+
+class NTN(object):
+    """ Neural Tensor Network class.
+
+    This model has two parameters:
+    E -- the embeddings
+    R -- the relations
+    """
+
+    def __init__(self, rng, n_embedding, d_embedding, n_relation, act, n_hid, tag):
+        """ Initialise the parameters.
+
+        Keyword arguments:
+        rng -- numpy.random module for number generation
+        n_embedding -- number of embeddings
+        d_embedding -- dimension of the embeddings
+        n_relation -- number of relations
+        act -- activation function
+        n_hid -- size of the hidden layer ("number of slices")
+        tag -- name of the model for parameter declaration
+        """
+        self.n_embedding = n_embedding
+        self.d_embedding = d_embedding
+        self.n_relation = n_relation
+
+        self.E = Embeddings(rng, n_embedding, d_embedding, tag+".E")
+        self.R = [ Relation(rng, act, d_embedding, n_hid, tag+".R"+str(r)) for r in xrange(n_relation) ]
+
+    def updates(self, relation, cost, learning_rate):
+        """ Compute the updates to perform w.r.t. a given cost."""
+        return self.R[relation].updates(cost, learning_rate) + self.E.updates(cost, learning_rate)
+
+    def train(self, relation, regularization, learning_rate):
+        """ Construct the training function for a given relation
+
+        Keyword arguments:
+        relation -- The relation for which the model will be trained.
+        regularization -- The regularization weight hyperparameter.
+        learning_rate -- The learning rate hyperparameter.
+
+        Returned Theano function:
+        (left_positive, right_positive, left_negative, right_negative) -> objective
+        The four arguments must have the same shape: (self.n_embedding, N) for any N.
+        """
+        R = self.R[relation]
+
+        inputs = tuple(S.csc_matrix() for _ in xrange(4))
+        X = map((lambda var: self.E.embed(var)), inputs)
+
+        objective = R.contrast(*X) + regularization * (self.E.regularizer() + R.regularizer())
+        updates = self.updates(relation, objective, learning_rate)
+        return theano.function(inputs=list(inputs), outputs=objective, updates=updates)
+
+    def score(self, relation):
+        """ Construct the scoring function for a given relation
+
+        Keyword arguments:
+        relation -- The relation for which the model will be trained.
+
+        Returned Theano function:
+        (left, right) -> objective
+        The two arguments must have the same shape: (self.n_embedding, N) for any N.
+        """
+        inputs = tuple(S.csc_matrix() for _ in xrange(2))
+        X = map((lambda var: self.E.embed(var)), inputs)
+        g = self.R[relation].score(*X)
+
+        return theano.function(inputs=list(inputs), outputs=g)
+
+    def test(self, relation):
+        """ Construct the testing function for a given relation
+
+        Keyword arguments:
+        relation -- The relation for which the model will be tested.
+
+        Returned Theano function:
+        (left, right, Y, threshold) -> score
+        The first two arguments must have the same shape: (self.n_embedding, N) for any N.
+        The third argument is the expected result, its shape must be (1, N) for the same N as left and right.
+        The fourth argument is the threshold at which a relation is considered to hold.
+        """
+        entities = (S.csc_matrix(), S.csc_matrix())
+        X = map((lambda var: self.E.embed(var)), entities)
+        Y = T.vector()
+        threshold = T.scalar()
+        R=self.R[relation]
+
+        error = T.mean(T.neq(R.score(*X) >= threshold, Y))
+        return theano.function(inputs=list(entities)+[Y, threshold], outputs=error)
+
+def test_ntn():
+    n_embedding = 1000
+    d_embedding = 100
+    n_hid = 3
+    learning_rate = 0.1 # FIXME
+    regularization = 0.0001
+    n_epoch = 500
+    n_batches = 10
+    threshold_precision = 10000
+    rng=numpy.random
+    batch_size = (2 * n_embedding) / n_batches
+
+    print '... Constructing dataset'
+    def rand_embedding(number):
+        coo_row = rng.permutation(n_embedding)[0:number]
+        coo_col = range(number)
+        coo_data = numpy.ones(number, dtype=theano.config.floatX)
+        randommat = scipy.sparse.coo_matrix((coo_data, (coo_row, coo_col)), shape=(n_embedding, number))
+        return scipy.sparse.csc_matrix(randommat)
+
+    left = rand_embedding(n_embedding)
+    right = rand_embedding(n_embedding)
+
+    print '... Building model'
+    model = NTN(rng, n_embedding, d_embedding, 1, T.tanh, n_hid, "NTN")
+    train = model.train(0, regularization, learning_rate)
+    test = model.test(0)
+    score = model.score(0)
+
+    print '... Training model'
+    for epoch in xrange(n_epoch):
+        order = rng.permutation(n_embedding)
+        left_positive = scipy.sparse.hstack([left[:, order], left[:, order]], dtype=theano.config.floatX, format='csc')
+        right_positive = scipy.sparse.hstack([right[:, order], right[:, order]], dtype=theano.config.floatX, format='csc')
+        left_negative = scipy.sparse.hstack([rand_embedding(n_embedding), left[:, order]], dtype=theano.config.floatX, format='csc')
+        right_negative = scipy.sparse.hstack([right[:, order], rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+
+        obj=0.
+        for batch in xrange(n_batches):
+            lpt = left_positive[:, batch*batch_size:(batch+1)*batch_size]
+            rpt = right_positive[:, batch*batch_size:(batch+1)*batch_size]
+            lnt = left_negative[:, batch*batch_size:(batch+1)*batch_size]
+            rnt = right_negative[:, batch*batch_size:(batch+1)*batch_size]
+            obj = obj + train(lpt, rpt, lnt, rnt)
+
+        if (epoch+1)%100==0:
+            print "Epoch", 1+epoch, "/", n_epoch,
+            print "\tObj: ", obj/n_batches
+ 
+    print '... Searching threshold'
+    valid_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+    valid_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+    valid_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)])
+
+    scores = score(valid_left, valid_right)
+    min_threshold, max_threshold = min(scores), max(scores)
+    threshold = min_threshold
+    threshold_error = 2
+    for candidate in numpy.linspace(min_threshold, max_threshold, threshold_precision):
+        error = test(valid_left, valid_right, valid_y, candidate.astype(theano.config.floatX))
+        if error < threshold_error:
+            threshold_error = error
+            threshold = candidate.astype(theano.config.floatX)
+    print 'Threshold :', threshold
+
+    print '... Testing model'
+    test_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+    test_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+    test_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)])
+    error = test(test_left, test_right, test_y, threshold)
+    print 'Error :', error
+
+if __name__ == '__main__':
+    test_ntn()