commit 6ccff23ecf2f6f601ea69d3fef7eec1821e74a07
parent 1bf3985b3db97221b126f3e9a070232ef3a138f3
Author: Étienne Simon <esimon@esimon.eu>
Date: Mon, 7 Apr 2014 15:39:36 +0200
NTN model with SGD on dummy data
Diffstat:
A | model.py | | | 263 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 263 insertions(+), 0 deletions(-)
diff --git a/model.py b/model.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python2
+import numpy
+import scipy
+import theano
+import theano.tensor as T
+import theano.sparse as S
+
+class Relation(object):
+ """ Relation class.
+
+ This class has four parameters:
+ W -- a (1,2)-tensor "Slices of Tensor Layer"
+ V -- a (1,1)-tensor "Standard Layer"
+ b -- a (1,0)-tensor "Bias"
+ u -- a (0,1)-tensor "Linear Layer"
+ """
+
+ def __init__(self, rng, act, n_in, n_hid, tag):
+ """ Initialise the parameters.
+
+ Keyword arguments:
+ rng -- numpy.random module for number generation
+ act -- activation function
+ n_in -- dimension of the embeddings
+ n_hid -- size of the hidden layer
+ tag -- name of the relation for parameter declaration
+ """
+
+ wbound = numpy.sqrt(6./(n_in**2 + n_hid))
+ vbound = numpy.sqrt(6./(n_in*2 + n_hid))
+ ubound = numpy.sqrt(6./(n_hid + 1))
+
+ self.act = act
+ if act==T.nnet.sigmoid:
+ wbound, vbound, ubound = (4*i for i in (wbound, vbound, ubound))
+
+ def ip(name, size, bound):
+ return theano.shared(name=name, value=numpy.asarray(rng.uniform(low=-bound, high=bound, size=size), dtype=theano.config.floatX))
+
+ self.W = ip(tag+".W", (n_in, n_in, n_hid), wbound)
+ self.V = ip(tag+".V", (n_hid, n_in*2), vbound)
+ self.u = ip(tag+".u", (n_hid,), ubound)
+ self.b = theano.shared(name=tag+".b", value=numpy.zeros(shape=(n_hid,1), dtype=theano.config.floatX), broadcastable=[False, True])
+ self.params = [ self.W, self.V, self.u, self.b ]
+
+ def score(self, inputl, inputr):
+ """ Compute the score on given embeddings. """
+ bilinear = ((inputr.transpose().reshape((inputr.shape[1], inputr.shape[0], 1))) * T.tensordot(inputl, self.W, axes=([0], [0]))).sum(1).transpose()
+ linear = T.dot(self.V, T.concatenate([inputl, inputr]))
+ return T.dot(self.u, self.act(bilinear + linear + self.b))
+
+ def regularizer(self):
+ """ Compute the squared L2-norm of the relation's parameters. """
+ return sum(T.sum(x**2) for x in [self.u, self.V, self.W, self.b])
+
+ def contrast(self, posl, posr, negl, negr):
+ """ Compute the contrast on a given set of valid and corrupted embeddings. """
+ dist = 1 - self.score(posl, posr) + self.score(negl, negr)
+ return T.mean((dist>0)*dist)
+
+ def updates(self, cost, learning_rate):
+ """ Compute the updates to perform w.r.t. a given cost."""
+ return [ (param, param - learning_rate * T.grad(cost=cost, wrt=param)) for param in self.params ]
+
+class Embeddings(object):
+ """ Embeddings matrix class.
+
+ This class has one parameter:
+ E -- a set of (1,0)-tensor "Embeddings"
+ """
+ def __init__(self, rng, number, dimension, tag):
+ """ Initialise the parameter.
+
+ Keyword arguments:
+ rng -- numpy.random module for number generation
+ number -- number of embeddings
+ dimension -- dimension of the embeddings
+ tag -- name of the embeddings for parameter declaration
+ """
+
+ self.number = number
+ self.dimension = dimension
+
+ Ebound = numpy.sqrt(6. / dimension)
+ E_values = rng.uniform(low=-Ebound, high=Ebound, size=(dimension, number))
+ E_values = E_values / numpy.sqrt(numpy.sum(E_values **2, axis=0))
+ self.E = theano.shared(name=tag, value=numpy.asarray(E_values, dtype=theano.config.floatX))
+
+ def embed(self, entity):
+ """ Embed an entity. """
+ return S.dot(self.E, entity)
+
+ def regularizer(self):
+ """ Compute the squared L2-norm of the embeddings parameter. """
+ return T.sum(self.E**2)
+
+ def updates(self, cost, learning_rate):
+ """ Compute the updates to perform w.r.t. a given cost."""
+ return [(self.E, self.E - learning_rate * T.grad(cost=cost, wrt=self.E))]
+
+class NTN(object):
+ """ Neural Tensor Network class.
+
+ This model has two parameters:
+ E -- the embeddings
+ R -- the relations
+ """
+
+ def __init__(self, rng, n_embedding, d_embedding, n_relation, act, n_hid, tag):
+ """ Initialise the parameters.
+
+ Keyword arguments:
+ rng -- numpy.random module for number generation
+ n_embedding -- number of embeddings
+ d_embedding -- dimension of the embeddings
+ n_relation -- number of relations
+ act -- activation function
+ n_hid -- size of the hidden layer ("number of slices")
+ tag -- name of the model for parameter declaration
+ """
+ self.n_embedding = n_embedding
+ self.d_embedding = d_embedding
+ self.n_relation = n_relation
+
+ self.E = Embeddings(rng, n_embedding, d_embedding, tag+".E")
+ self.R = [ Relation(rng, act, d_embedding, n_hid, tag+".R"+str(r)) for r in xrange(n_relation) ]
+
+ def updates(self, relation, cost, learning_rate):
+ """ Compute the updates to perform w.r.t. a given cost."""
+ return self.R[relation].updates(cost, learning_rate) + self.E.updates(cost, learning_rate)
+
+ def train(self, relation, regularization, learning_rate):
+ """ Construct the training function for a given relation
+
+ Keyword arguments:
+ relation -- The relation for which the model will be trained.
+ regularization -- The regularization weight hyperparameter.
+ learning_rate -- The learning rate hyperparameter.
+
+ Returned Theano function:
+ (left_positive, right_positive, left_negative, right_negative) -> objective
+ The four arguments must have the same shape: (self.n_embedding, N) for any N.
+ """
+ R = self.R[relation]
+
+ inputs = tuple(S.csc_matrix() for _ in xrange(4))
+ X = map((lambda var: self.E.embed(var)), inputs)
+
+ objective = R.contrast(*X) + regularization * (self.E.regularizer() + R.regularizer())
+ updates = self.updates(relation, objective, learning_rate)
+ return theano.function(inputs=list(inputs), outputs=objective, updates=updates)
+
+ def score(self, relation):
+ """ Construct the scoring function for a given relation
+
+ Keyword arguments:
+ relation -- The relation for which the model will be trained.
+
+ Returned Theano function:
+ (left, right) -> objective
+ The two arguments must have the same shape: (self.n_embedding, N) for any N.
+ """
+ inputs = tuple(S.csc_matrix() for _ in xrange(2))
+ X = map((lambda var: self.E.embed(var)), inputs)
+ g = self.R[relation].score(*X)
+
+ return theano.function(inputs=list(inputs), outputs=g)
+
+ def test(self, relation):
+ """ Construct the testing function for a given relation
+
+ Keyword arguments:
+ relation -- The relation for which the model will be tested.
+
+ Returned Theano function:
+ (left, right, Y, threshold) -> score
+ The first two arguments must have the same shape: (self.n_embedding, N) for any N.
+ The third argument is the expected result, its shape must be (1, N) for the same N as left and right.
+ The fourth argument is the threshold at which a relation is considered to hold.
+ """
+ entities = (S.csc_matrix(), S.csc_matrix())
+ X = map((lambda var: self.E.embed(var)), entities)
+ Y = T.vector()
+ threshold = T.scalar()
+ R=self.R[relation]
+
+ error = T.mean(T.neq(R.score(*X) >= threshold, Y))
+ return theano.function(inputs=list(entities)+[Y, threshold], outputs=error)
+
+def test_ntn():
+ n_embedding = 1000
+ d_embedding = 100
+ n_hid = 3
+ learning_rate = 0.1 # FIXME
+ regularization = 0.0001
+ n_epoch = 500
+ n_batches = 10
+ threshold_precision = 10000
+ rng=numpy.random
+ batch_size = (2 * n_embedding) / n_batches
+
+ print '... Constructing dataset'
+ def rand_embedding(number):
+ coo_row = rng.permutation(n_embedding)[0:number]
+ coo_col = range(number)
+ coo_data = numpy.ones(number, dtype=theano.config.floatX)
+ randommat = scipy.sparse.coo_matrix((coo_data, (coo_row, coo_col)), shape=(n_embedding, number))
+ return scipy.sparse.csc_matrix(randommat)
+
+ left = rand_embedding(n_embedding)
+ right = rand_embedding(n_embedding)
+
+ print '... Building model'
+ model = NTN(rng, n_embedding, d_embedding, 1, T.tanh, n_hid, "NTN")
+ train = model.train(0, regularization, learning_rate)
+ test = model.test(0)
+ score = model.score(0)
+
+ print '... Training model'
+ for epoch in xrange(n_epoch):
+ order = rng.permutation(n_embedding)
+ left_positive = scipy.sparse.hstack([left[:, order], left[:, order]], dtype=theano.config.floatX, format='csc')
+ right_positive = scipy.sparse.hstack([right[:, order], right[:, order]], dtype=theano.config.floatX, format='csc')
+ left_negative = scipy.sparse.hstack([rand_embedding(n_embedding), left[:, order]], dtype=theano.config.floatX, format='csc')
+ right_negative = scipy.sparse.hstack([right[:, order], rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+
+ obj=0.
+ for batch in xrange(n_batches):
+ lpt = left_positive[:, batch*batch_size:(batch+1)*batch_size]
+ rpt = right_positive[:, batch*batch_size:(batch+1)*batch_size]
+ lnt = left_negative[:, batch*batch_size:(batch+1)*batch_size]
+ rnt = right_negative[:, batch*batch_size:(batch+1)*batch_size]
+ obj = obj + train(lpt, rpt, lnt, rnt)
+
+ if (epoch+1)%100==0:
+ print "Epoch", 1+epoch, "/", n_epoch,
+ print "\tObj: ", obj/n_batches
+
+ print '... Searching threshold'
+ valid_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+ valid_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+ valid_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)])
+
+ scores = score(valid_left, valid_right)
+ min_threshold, max_threshold = min(scores), max(scores)
+ threshold = min_threshold
+ threshold_error = 2
+ for candidate in numpy.linspace(min_threshold, max_threshold, threshold_precision):
+ error = test(valid_left, valid_right, valid_y, candidate.astype(theano.config.floatX))
+ if error < threshold_error:
+ threshold_error = error
+ threshold = candidate.astype(theano.config.floatX)
+ print 'Threshold :', threshold
+
+ print '... Testing model'
+ test_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+ test_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc')
+ test_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)])
+ error = test(test_left, test_right, test_y, threshold)
+ print 'Error :', error
+
+if __name__ == '__main__':
+ test_ntn()