tntn

Theano reimplementation of Neural Tensor Networks (NTN)
git clone https://esimon.eu/repos/tntn.git
Log | Files | Refs | README

commit 6ccff23ecf2f6f601ea69d3fef7eec1821e74a07
parent 1bf3985b3db97221b126f3e9a070232ef3a138f3
Author: Étienne Simon <esimon@esimon.eu>
Date:   Mon,  7 Apr 2014 15:39:36 +0200

NTN model with SGD on dummy data

Diffstat:
Amodel.py | 263+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 263 insertions(+), 0 deletions(-)

diff --git a/model.py b/model.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python2 +import numpy +import scipy +import theano +import theano.tensor as T +import theano.sparse as S + +class Relation(object): + """ Relation class. + + This class has four parameters: + W -- a (1,2)-tensor "Slices of Tensor Layer" + V -- a (1,1)-tensor "Standard Layer" + b -- a (1,0)-tensor "Bias" + u -- a (0,1)-tensor "Linear Layer" + """ + + def __init__(self, rng, act, n_in, n_hid, tag): + """ Initialise the parameters. + + Keyword arguments: + rng -- numpy.random module for number generation + act -- activation function + n_in -- dimension of the embeddings + n_hid -- size of the hidden layer + tag -- name of the relation for parameter declaration + """ + + wbound = numpy.sqrt(6./(n_in**2 + n_hid)) + vbound = numpy.sqrt(6./(n_in*2 + n_hid)) + ubound = numpy.sqrt(6./(n_hid + 1)) + + self.act = act + if act==T.nnet.sigmoid: + wbound, vbound, ubound = (4*i for i in (wbound, vbound, ubound)) + + def ip(name, size, bound): + return theano.shared(name=name, value=numpy.asarray(rng.uniform(low=-bound, high=bound, size=size), dtype=theano.config.floatX)) + + self.W = ip(tag+".W", (n_in, n_in, n_hid), wbound) + self.V = ip(tag+".V", (n_hid, n_in*2), vbound) + self.u = ip(tag+".u", (n_hid,), ubound) + self.b = theano.shared(name=tag+".b", value=numpy.zeros(shape=(n_hid,1), dtype=theano.config.floatX), broadcastable=[False, True]) + self.params = [ self.W, self.V, self.u, self.b ] + + def score(self, inputl, inputr): + """ Compute the score on given embeddings. """ + bilinear = ((inputr.transpose().reshape((inputr.shape[1], inputr.shape[0], 1))) * T.tensordot(inputl, self.W, axes=([0], [0]))).sum(1).transpose() + linear = T.dot(self.V, T.concatenate([inputl, inputr])) + return T.dot(self.u, self.act(bilinear + linear + self.b)) + + def regularizer(self): + """ Compute the squared L2-norm of the relation's parameters. """ + return sum(T.sum(x**2) for x in [self.u, self.V, self.W, self.b]) + + def contrast(self, posl, posr, negl, negr): + """ Compute the contrast on a given set of valid and corrupted embeddings. """ + dist = 1 - self.score(posl, posr) + self.score(negl, negr) + return T.mean((dist>0)*dist) + + def updates(self, cost, learning_rate): + """ Compute the updates to perform w.r.t. a given cost.""" + return [ (param, param - learning_rate * T.grad(cost=cost, wrt=param)) for param in self.params ] + +class Embeddings(object): + """ Embeddings matrix class. + + This class has one parameter: + E -- a set of (1,0)-tensor "Embeddings" + """ + def __init__(self, rng, number, dimension, tag): + """ Initialise the parameter. + + Keyword arguments: + rng -- numpy.random module for number generation + number -- number of embeddings + dimension -- dimension of the embeddings + tag -- name of the embeddings for parameter declaration + """ + + self.number = number + self.dimension = dimension + + Ebound = numpy.sqrt(6. / dimension) + E_values = rng.uniform(low=-Ebound, high=Ebound, size=(dimension, number)) + E_values = E_values / numpy.sqrt(numpy.sum(E_values **2, axis=0)) + self.E = theano.shared(name=tag, value=numpy.asarray(E_values, dtype=theano.config.floatX)) + + def embed(self, entity): + """ Embed an entity. """ + return S.dot(self.E, entity) + + def regularizer(self): + """ Compute the squared L2-norm of the embeddings parameter. """ + return T.sum(self.E**2) + + def updates(self, cost, learning_rate): + """ Compute the updates to perform w.r.t. a given cost.""" + return [(self.E, self.E - learning_rate * T.grad(cost=cost, wrt=self.E))] + +class NTN(object): + """ Neural Tensor Network class. + + This model has two parameters: + E -- the embeddings + R -- the relations + """ + + def __init__(self, rng, n_embedding, d_embedding, n_relation, act, n_hid, tag): + """ Initialise the parameters. + + Keyword arguments: + rng -- numpy.random module for number generation + n_embedding -- number of embeddings + d_embedding -- dimension of the embeddings + n_relation -- number of relations + act -- activation function + n_hid -- size of the hidden layer ("number of slices") + tag -- name of the model for parameter declaration + """ + self.n_embedding = n_embedding + self.d_embedding = d_embedding + self.n_relation = n_relation + + self.E = Embeddings(rng, n_embedding, d_embedding, tag+".E") + self.R = [ Relation(rng, act, d_embedding, n_hid, tag+".R"+str(r)) for r in xrange(n_relation) ] + + def updates(self, relation, cost, learning_rate): + """ Compute the updates to perform w.r.t. a given cost.""" + return self.R[relation].updates(cost, learning_rate) + self.E.updates(cost, learning_rate) + + def train(self, relation, regularization, learning_rate): + """ Construct the training function for a given relation + + Keyword arguments: + relation -- The relation for which the model will be trained. + regularization -- The regularization weight hyperparameter. + learning_rate -- The learning rate hyperparameter. + + Returned Theano function: + (left_positive, right_positive, left_negative, right_negative) -> objective + The four arguments must have the same shape: (self.n_embedding, N) for any N. + """ + R = self.R[relation] + + inputs = tuple(S.csc_matrix() for _ in xrange(4)) + X = map((lambda var: self.E.embed(var)), inputs) + + objective = R.contrast(*X) + regularization * (self.E.regularizer() + R.regularizer()) + updates = self.updates(relation, objective, learning_rate) + return theano.function(inputs=list(inputs), outputs=objective, updates=updates) + + def score(self, relation): + """ Construct the scoring function for a given relation + + Keyword arguments: + relation -- The relation for which the model will be trained. + + Returned Theano function: + (left, right) -> objective + The two arguments must have the same shape: (self.n_embedding, N) for any N. + """ + inputs = tuple(S.csc_matrix() for _ in xrange(2)) + X = map((lambda var: self.E.embed(var)), inputs) + g = self.R[relation].score(*X) + + return theano.function(inputs=list(inputs), outputs=g) + + def test(self, relation): + """ Construct the testing function for a given relation + + Keyword arguments: + relation -- The relation for which the model will be tested. + + Returned Theano function: + (left, right, Y, threshold) -> score + The first two arguments must have the same shape: (self.n_embedding, N) for any N. + The third argument is the expected result, its shape must be (1, N) for the same N as left and right. + The fourth argument is the threshold at which a relation is considered to hold. + """ + entities = (S.csc_matrix(), S.csc_matrix()) + X = map((lambda var: self.E.embed(var)), entities) + Y = T.vector() + threshold = T.scalar() + R=self.R[relation] + + error = T.mean(T.neq(R.score(*X) >= threshold, Y)) + return theano.function(inputs=list(entities)+[Y, threshold], outputs=error) + +def test_ntn(): + n_embedding = 1000 + d_embedding = 100 + n_hid = 3 + learning_rate = 0.1 # FIXME + regularization = 0.0001 + n_epoch = 500 + n_batches = 10 + threshold_precision = 10000 + rng=numpy.random + batch_size = (2 * n_embedding) / n_batches + + print '... Constructing dataset' + def rand_embedding(number): + coo_row = rng.permutation(n_embedding)[0:number] + coo_col = range(number) + coo_data = numpy.ones(number, dtype=theano.config.floatX) + randommat = scipy.sparse.coo_matrix((coo_data, (coo_row, coo_col)), shape=(n_embedding, number)) + return scipy.sparse.csc_matrix(randommat) + + left = rand_embedding(n_embedding) + right = rand_embedding(n_embedding) + + print '... Building model' + model = NTN(rng, n_embedding, d_embedding, 1, T.tanh, n_hid, "NTN") + train = model.train(0, regularization, learning_rate) + test = model.test(0) + score = model.score(0) + + print '... Training model' + for epoch in xrange(n_epoch): + order = rng.permutation(n_embedding) + left_positive = scipy.sparse.hstack([left[:, order], left[:, order]], dtype=theano.config.floatX, format='csc') + right_positive = scipy.sparse.hstack([right[:, order], right[:, order]], dtype=theano.config.floatX, format='csc') + left_negative = scipy.sparse.hstack([rand_embedding(n_embedding), left[:, order]], dtype=theano.config.floatX, format='csc') + right_negative = scipy.sparse.hstack([right[:, order], rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') + + obj=0. + for batch in xrange(n_batches): + lpt = left_positive[:, batch*batch_size:(batch+1)*batch_size] + rpt = right_positive[:, batch*batch_size:(batch+1)*batch_size] + lnt = left_negative[:, batch*batch_size:(batch+1)*batch_size] + rnt = right_negative[:, batch*batch_size:(batch+1)*batch_size] + obj = obj + train(lpt, rpt, lnt, rnt) + + if (epoch+1)%100==0: + print "Epoch", 1+epoch, "/", n_epoch, + print "\tObj: ", obj/n_batches + + print '... Searching threshold' + valid_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') + valid_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') + valid_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)]) + + scores = score(valid_left, valid_right) + min_threshold, max_threshold = min(scores), max(scores) + threshold = min_threshold + threshold_error = 2 + for candidate in numpy.linspace(min_threshold, max_threshold, threshold_precision): + error = test(valid_left, valid_right, valid_y, candidate.astype(theano.config.floatX)) + if error < threshold_error: + threshold_error = error + threshold = candidate.astype(theano.config.floatX) + print 'Threshold :', threshold + + print '... Testing model' + test_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') + test_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') + test_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)]) + error = test(test_left, test_right, test_y, threshold) + print 'Error :', error + +if __name__ == '__main__': + test_ntn()