commit 70514cf66048db50eec73546cb9b1beffe12ee35
parent 24057ca725bd1467abdac8850aa28bd1ec0b355d
Author: Étienne Simon <esimon@esimon.eu>
Date: Fri, 18 Apr 2014 16:29:03 +0200
Genericise relations
Diffstat:
5 files changed, 55 insertions(+), 41 deletions(-)
diff --git a/main.py b/main.py
@@ -22,7 +22,6 @@ if __name__ == '__main__':
for k, v in config.iteritems():
if isinstance(v, basestring) and v.startswith('python:'):
config[k] = eval(v[7:])
- datalog_filepath = config['datalog filepath']
data = Dataset(data)
if model_path is None:
diff --git a/model.py b/model.py
@@ -98,7 +98,7 @@ class Model(object):
self.train_function = theano.function(inputs=list(inputs), outputs=[criterion], updates=self.updates(criterion))
self.normalise_function = theano.function(inputs=[], outputs=[], updates=self.embeddings.normalise_updates())
- relation = T.addbroadcast(relation, 0)
+ relation = map(lambda r: T.addbroadcast(r, 0), relation)
left_broadcasted = T.addbroadcast(left_positive, 0)
right_broadcasted = T.addbroadcast(right_positive, 0)
left_score = self.config['similarity'](self.relations.apply(left_broadcasted, relation), right_positive)
@@ -164,7 +164,7 @@ class Model(object):
log('Validation epoch {:<5}'.format(epoch))
(valid_mean, valid_top10) = self.error('valid')
log(' valid mean: {0:<15} valid top10: {1:<15}'.format(valid_mean, valid_top10))
- datalog(epoch, valid_mean, valid_top10)
+ datalog(self.config['datalog filepath'], epoch, valid_mean, valid_top10)
if not hasattr(self, 'best_mean') or valid_mean < self.best_mean:
self.best_mean = valid_mean
log('(best so far')
diff --git a/relations/base.py b/relations/base.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python2
+
+import numpy
+import theano
+import theano.tensor as T
+import theano.sparse as S
+
+class Base_relation(object):
+ """ Base relation class. """
+
+ def __init__(self, rng, number, parameters, tag):
+ """ Initialise the parameter.
+
+ Keyword arguments:
+ rng -- module for random number generation
+ number -- number of relation
+ parameters -- dictionary of the form {name: shape} describing the relations parameters
+ tag -- name of the relations for parameter declaration
+ """
+
+ self.number = number
+ self.parameters = []
+
+ for name, shape in parameters.iteritems():
+ bound = numpy.sqrt(6. / sum(shape))
+ values = rng.uniform(low=-bound, high=bound, size=(number,)+shape)
+ values = values / numpy.sqrt(numpy.sum(values **2, axis=1))[:, numpy.newaxis]
+ var = theano.shared(name=tag+'.'+name, value=numpy.asarray(values, dtype=theano.config.floatX))
+ setattr(self, name, var)
+ self.parameters.append(var)
+
+ def lookup(self, relations):
+ """ Embed given relations. """
+ return [ S.dot(relations, parameter) for parameter in self.parameters ]
+
+ def updates(self, cost, learning_rate):
+ """ Compute the updates to perform a SGD step w.r.t. a given cost.
+
+ Keyword arguments:
+ cost -- The cost to optimise.
+ learning_rate -- The learning rate used for gradient descent.
+ """
+ return [ (parameter, parameter - learning_rate * T.grad(cost=cost, wrt=parameter)) for parameter in self.parameters ]
diff --git a/relations/translations.py b/relations/translations.py
@@ -5,45 +5,19 @@ import theano
import theano.tensor as T
import theano.sparse as S
-class Translations(object):
+from base import *
+
+class Translations(Base_relation):
""" Translations class.
This class has one parameter:
R -- the translations
"""
def __init__(self, rng, number, dimension, tag):
- """ Initialise the parameter.
-
- Keyword arguments:
- rng -- module for random number generation
- number -- number of relation
- dimension -- dimension of the embeddings
- tag -- name of the relations for parameter declaration
- """
-
- self.number = number
- self.dimension = dimension
-
- R_bound = numpy.sqrt(6. / dimension)
- R_values = rng.uniform(low=-R_bound, high=R_bound, size=(number, dimension))
- R_values = R_values / numpy.sqrt(numpy.sum(R_values **2, axis=1))[:, numpy.newaxis]
- self.R = theano.shared(name=tag, value=numpy.asarray(R_values, dtype=theano.config.floatX))
-
- self.parameters = [self.R]
-
- def lookup(self, relations):
- """ Embed given relations. """
- return S.dot(relations, self.R)
+ """ Initialise the parameter. """
+ parameters = { 'R': (dimension,) }
+ super(Translations, self).__init__(rng, number, parameters, tag)
def apply(self, inputs, relations):
""" Apply the given relations to a given input. """
- return relations + inputs
-
- def updates(self, cost, learning_rate):
- """ Compute the updates to perform a SGD step w.r.t. a given cost.
-
- Keyword arguments:
- cost -- The cost to optimise.
- learning_rate -- The learning rate used for gradient descent.
- """
- return [(self.R, self.R - learning_rate * T.grad(cost=cost, wrt=self.R))]
+ return relations[0] + inputs
diff --git a/utils/log.py b/utils/log.py
@@ -5,8 +5,6 @@ def log(message):
print(message, end='')
sys.stdout.flush()
-datalog_filepath=None
-
-def datalog(*data):
- with open(filepath, 'a') as file:
- file.write('\t'.join(data)+'\n')
+def datalog(datalog_filepath, *data):
+ with open(datalog_filepath, 'a') as file:
+ file.write('\t'.join(map(str, data))+'\n')