model.py (11079B)
1 #!/usr/bin/env python2 2 3 import numpy 4 import scipy 5 import theano 6 import theano.tensor as T 7 import theano.sparse as S 8 9 class Relation(object): 10 """ Relation class. 11 12 This class has four parameters: 13 W -- a (1,2)-tensor "Slices of Tensor Layer" 14 V -- a (1,1)-tensor "Standard Layer" 15 b -- a (1,0)-tensor "Bias" 16 u -- a (0,1)-tensor "Linear Layer" 17 """ 18 19 def __init__(self, rng, act, n_in, n_hid, tag): 20 """ Initialise the parameters. 21 22 Keyword arguments: 23 rng -- numpy.random module for number generation 24 act -- activation function 25 n_in -- dimension of the embeddings 26 n_hid -- size of the hidden layer 27 tag -- name of the relation for parameter declaration 28 """ 29 30 wbound = numpy.sqrt(6./(n_in**2 + n_hid)) 31 vbound = numpy.sqrt(6./(n_in*2 + n_hid)) 32 ubound = numpy.sqrt(6./(n_hid + 1)) 33 34 self.act = act 35 if act==T.nnet.sigmoid: 36 wbound, vbound, ubound = (4*i for i in (wbound, vbound, ubound)) 37 38 def ip(name, size, bound): 39 return theano.shared(name=name, value=numpy.asarray(rng.uniform(low=-bound, high=bound, size=size), dtype=theano.config.floatX)) 40 41 self.W = ip(tag+".W", (n_in, n_in, n_hid), wbound) 42 self.V = ip(tag+".V", (n_hid, n_in*2), vbound) 43 self.u = ip(tag+".u", (n_hid,), ubound) 44 self.b = theano.shared(name=tag+".b", value=numpy.zeros(shape=(n_hid,), dtype=theano.config.floatX)) 45 self.params = [ self.W, self.V, self.u, self.b ] 46 47 def score(self, inputl, inputr): 48 """ Compute the score on given embeddings. """ 49 bilinear = ((inputr.transpose().reshape((inputr.shape[1], inputr.shape[0], 1))) * T.tensordot(inputl, self.W, axes=([0], [0]))).sum(1).transpose() 50 linear = T.dot(self.V, T.concatenate([inputl, inputr])) 51 bias = self.b.dimshuffle(0, 'x') 52 return T.dot(self.u, self.act(bilinear + linear + bias)) 53 54 def regularizer(self): 55 """ Compute the squared L2-norm of the relation's parameters. """ 56 return sum(T.sum(x**2) for x in [self.u, self.V, self.W, self.b]) 57 58 def contrast(self, posl, posr, negl, negr): 59 """ Compute the contrast on a given set of valid and corrupted embeddings. """ 60 dist = 1 - self.score(posl, posr) + self.score(negl, negr) 61 return T.mean((dist>0)*dist) 62 63 def updates(self, cost, learning_rate): 64 """ Compute the updates to perform w.r.t. a given cost.""" 65 return [ (param, param - learning_rate * T.grad(cost=cost, wrt=param)) for param in self.params ] 66 67 class Embeddings(object): 68 """ Embeddings matrix class. 69 70 This class has one parameter: 71 E -- a set of (1,0)-tensor "Embeddings" 72 """ 73 def __init__(self, rng, number, dimension, tag): 74 """ Initialise the parameter. 75 76 Keyword arguments: 77 rng -- numpy.random module for number generation 78 number -- number of embeddings 79 dimension -- dimension of the embeddings 80 tag -- name of the embeddings for parameter declaration 81 """ 82 83 self.number = number 84 self.dimension = dimension 85 86 Ebound = numpy.sqrt(6. / dimension) 87 E_values = rng.uniform(low=-Ebound, high=Ebound, size=(dimension, number)) 88 E_values = E_values / numpy.sqrt(numpy.sum(E_values **2, axis=0)) 89 self.E = theano.shared(name=tag, value=numpy.asarray(E_values, dtype=theano.config.floatX)) 90 91 def embed(self, entity): 92 """ Embed an entity. """ 93 return S.dot(self.E, entity) 94 95 def regularizer(self): 96 """ Compute the squared L2-norm of the embeddings parameter. """ 97 return T.sum(self.E**2) 98 99 def updates(self, cost, learning_rate): 100 """ Compute the updates to perform w.r.t. a given cost.""" 101 return [(self.E, self.E - learning_rate * T.grad(cost=cost, wrt=self.E))] 102 103 class NTN(object): 104 """ Neural Tensor Network class. 105 106 This model has two parameters: 107 E -- the embeddings 108 R -- the relations 109 """ 110 111 def __init__(self, rng, n_embedding, d_embedding, n_relation, act, n_hid, tag): 112 """ Initialise the parameters. 113 114 Keyword arguments: 115 rng -- numpy.random module for number generation 116 n_embedding -- number of embeddings 117 d_embedding -- dimension of the embeddings 118 n_relation -- number of relations 119 act -- activation function 120 n_hid -- size of the hidden layer ("number of slices") 121 tag -- name of the model for parameter declaration 122 """ 123 self.n_embedding = n_embedding 124 self.d_embedding = d_embedding 125 self.n_relation = n_relation 126 127 self.E = Embeddings(rng, n_embedding, d_embedding, tag+".E") 128 self.R = [ Relation(rng, act, d_embedding, n_hid, tag+".R"+str(r)) for r in xrange(n_relation) ] 129 130 def updates(self, relation, cost, learning_rate): 131 """ Compute the updates to perform w.r.t. a given cost.""" 132 return self.R[relation].updates(cost, learning_rate) + self.E.updates(cost, learning_rate) 133 134 def train(self, relation, regularization, learning_rate): 135 """ Construct the training function for a given relation 136 137 Keyword arguments: 138 relation -- The relation for which the model will be trained. 139 regularization -- The regularization weight hyperparameter. 140 learning_rate -- The learning rate hyperparameter. 141 142 Returned Theano function: 143 (left_positive, right_positive, left_negative, right_negative) -> objective 144 The four arguments must have the same shape: (self.n_embedding, N) for any N. 145 """ 146 R = self.R[relation] 147 148 inputs = tuple(S.csc_matrix() for _ in xrange(4)) 149 X = map((lambda var: self.E.embed(var)), inputs) 150 151 objective = R.contrast(*X) + regularization * (self.E.regularizer() + R.regularizer()) 152 updates = self.updates(relation, objective, learning_rate) 153 return theano.function(inputs=list(inputs), outputs=objective, updates=updates) 154 155 def score(self, relation): 156 """ Construct the scoring function for a given relation 157 158 Keyword arguments: 159 relation -- The relation for which the model will be trained. 160 161 Returned Theano function: 162 (left, right) -> objective 163 The two arguments must have the same shape: (self.n_embedding, N) for any N. 164 """ 165 inputs = tuple(S.csc_matrix() for _ in xrange(2)) 166 X = map((lambda var: self.E.embed(var)), inputs) 167 g = self.R[relation].score(*X) 168 169 return theano.function(inputs=list(inputs), outputs=g) 170 171 def test(self, relation): 172 """ Construct the testing function for a given relation 173 174 Keyword arguments: 175 relation -- The relation for which the model will be tested. 176 177 Returned Theano function: 178 (left, right, Y, threshold) -> score 179 The first two arguments must have the same shape: (self.n_embedding, N) for any N. 180 The third argument is the expected result, its shape must be (1, N) for the same N as left and right. 181 The fourth argument is the threshold at which a relation is considered to hold. 182 """ 183 entities = (S.csc_matrix(), S.csc_matrix()) 184 X = map((lambda var: self.E.embed(var)), entities) 185 Y = T.vector() 186 threshold = T.scalar() 187 R=self.R[relation] 188 189 error = T.mean(T.neq(R.score(*X) >= threshold, Y)) 190 return theano.function(inputs=list(entities)+[Y, threshold], outputs=error) 191 192 def test_ntn(): 193 n_embedding = 1000 194 d_embedding = 100 195 n_hid = 3 196 learning_rate = 0.1 # FIXME 197 regularization = 0.0001 198 n_epoch = 500 199 n_batches = 10 200 threshold_precision = 10000 201 rng=numpy.random 202 batch_size = (2 * n_embedding) / n_batches 203 204 print '... Constructing dataset' 205 def rand_embedding(number): 206 coo_row = rng.permutation(n_embedding)[0:number] 207 coo_col = range(number) 208 coo_data = numpy.ones(number, dtype=theano.config.floatX) 209 randommat = scipy.sparse.coo_matrix((coo_data, (coo_row, coo_col)), shape=(n_embedding, number)) 210 return scipy.sparse.csc_matrix(randommat) 211 212 left = rand_embedding(n_embedding) 213 right = rand_embedding(n_embedding) 214 215 print '... Building model' 216 model = NTN(rng, n_embedding, d_embedding, 1, T.tanh, n_hid, "NTN") 217 train = model.train(0, regularization, learning_rate) 218 test = model.test(0) 219 score = model.score(0) 220 221 print '... Training model' 222 for epoch in xrange(n_epoch): 223 order = rng.permutation(n_embedding) 224 left_positive = scipy.sparse.hstack([left[:, order], left[:, order]], dtype=theano.config.floatX, format='csc') 225 right_positive = scipy.sparse.hstack([right[:, order], right[:, order]], dtype=theano.config.floatX, format='csc') 226 left_negative = scipy.sparse.hstack([rand_embedding(n_embedding), left[:, order]], dtype=theano.config.floatX, format='csc') 227 right_negative = scipy.sparse.hstack([right[:, order], rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') 228 229 obj=0. 230 for batch in xrange(n_batches): 231 lpt = left_positive[:, batch*batch_size:(batch+1)*batch_size] 232 rpt = right_positive[:, batch*batch_size:(batch+1)*batch_size] 233 lnt = left_negative[:, batch*batch_size:(batch+1)*batch_size] 234 rnt = right_negative[:, batch*batch_size:(batch+1)*batch_size] 235 obj = obj + train(lpt, rpt, lnt, rnt) 236 237 if (epoch+1)%100==0: 238 print "Epoch", 1+epoch, "/", n_epoch, 239 print "\tObj: ", obj/n_batches 240 241 print '... Searching threshold' 242 valid_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') 243 valid_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') 244 valid_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)]) 245 246 scores = score(valid_left, valid_right) 247 min_threshold, max_threshold = min(scores), max(scores) 248 threshold = min_threshold 249 threshold_error = 2 250 for candidate in numpy.linspace(min_threshold, max_threshold, threshold_precision): 251 error = test(valid_left, valid_right, valid_y, candidate.astype(theano.config.floatX)) 252 if error < threshold_error: 253 threshold_error = error 254 threshold = candidate.astype(theano.config.floatX) 255 print 'Threshold :', threshold 256 257 print '... Testing model' 258 test_left = scipy.sparse.hstack([left, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') 259 test_right = scipy.sparse.hstack([right, rand_embedding(n_embedding)], dtype=theano.config.floatX, format='csc') 260 test_y = numpy.concatenate([numpy.ones(shape=(n_embedding,), dtype=theano.config.floatX), numpy.zeros(shape=(n_embedding,), dtype=theano.config.floatX)]) 261 error = test(test_left, test_right, test_y, threshold) 262 print 'Error :', error 263 264 if __name__ == '__main__': 265 test_ntn()