transform

old TransE-like models
git clone https://esimon.eu/repos/transform.git
Log | Files | Refs | README

commit 2a950f40e9a3981adb72595e3807c00e8211ff55
parent 72e1c4ed6aceb97bf016c84df82cbfadd9d4612d
Author: Étienne Simon <esimon@esimon.eu>
Date:   Mon, 26 May 2014 16:48:05 +0200

Change "embeddings" with "entities" where necessary.

Diffstat:
Mdataset.py | 16++++++++--------
Mmodel.py | 2+-
Mutils/build Bordes FB15k.py | 2+-
Mutils/build dummy dataset.py | 20++++++++++----------
4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/dataset.py b/dataset.py @@ -8,16 +8,16 @@ import theano class Dataset(object): def __init__(self, prefix): log('# Loading dataset "{0}"\n'.format(prefix)) - with open(prefix+'/embeddings', 'r') as file: - self.embeddings = file.readlines() + with open(prefix+'/entities', 'r') as file: + self.entities = file.readlines() with open(prefix+'/relations', 'r') as file: self.relations = file.readlines() - self.number_embeddings = len(self.embeddings) + self.number_entities = len(self.entities) self.number_relations = len(self.relations) self.load_file(prefix, 'train') self.load_file(prefix, 'valid') self.load_file(prefix, 'test') - self.universe = scipy.sparse.eye(len(self.embeddings), format='csr', dtype=theano.config.floatX) + self.universe = scipy.sparse.eye(self.number_entities), format='csr', dtype=theano.config.floatX) def load_file(self, prefix, name): with open('{0}/{1}'.format(prefix, name), 'r') as file: @@ -25,17 +25,17 @@ class Dataset(object): [left, relation, right] = map(list, zip(*content)) N = len(relation) setattr(self, name+'_size', N) - setattr(self, name+'_right', scipy.sparse.csr_matrix(([1]*N, right, range(N+1)), shape=(N, self.number_embeddings), dtype=theano.config.floatX)) + setattr(self, name+'_right', scipy.sparse.csr_matrix(([1]*N, right, range(N+1)), shape=(N, self.number_entities), dtype=theano.config.floatX)) setattr(self, name+'_relation', scipy.sparse.csr_matrix(([1]*N, relation, range(N+1)), shape=(N, self.number_relations), dtype=theano.config.floatX)) - setattr(self, name+'_left', scipy.sparse.csr_matrix(([1]*N, left, range(N+1)), shape=(N, self.number_embeddings), dtype=theano.config.floatX)) + setattr(self, name+'_left', scipy.sparse.csr_matrix(([1]*N, left, range(N+1)), shape=(N, self.number_entities), dtype=theano.config.floatX)) def training_minibatch(self, rng, batch_size): # Sampling corrupted entities def sample_matrix(): row = range(self.train_size+1) - col = rng.randint(0, self.number_embeddings, size=self.train_size) + col = rng.randint(0, self.number_entities, size=self.train_size) data = numpy.ones(self.train_size) - random_embeddings = scipy.sparse.csr_matrix((data, col, row), shape=(self.train_size, self.number_embeddings), dtype=theano.config.floatX) + random_embeddings = scipy.sparse.csr_matrix((data, col, row), shape=(self.train_size, self.number_entities), dtype=theano.config.floatX) return random_embeddings corrupted_left = sample_matrix() corrupted_right = sample_matrix() diff --git a/model.py b/model.py @@ -44,7 +44,7 @@ class Model(object): if filepath is None: Relations = config['relations'] self.epoch = 0 - self.embeddings = Embeddings(config['rng'], dataset.number_embeddings, config['dimension'], self.tag+'.embeddings') + self.embeddings = Embeddings(config['rng'], dataset.number_entities, config['dimension'], self.tag+'.embeddings') self.relations = Relations(config['rng'], dataset.number_relations, config['dimension'], self.tag+'.relations') else: log('## Loading model from "{0}"\n'.format(filepath)) diff --git a/utils/build Bordes FB15k.py b/utils/build Bordes FB15k.py @@ -59,7 +59,7 @@ def compile_dataset(path): log('Writting entities...') e2i, i2e, r2i, i2r = {}, {}, {}, {} - with open(path+'/embeddings', 'w') as file: + with open(path+'/entities', 'w') as file: i=0 for entity in entities: e2i[entity]=i diff --git a/utils/build dummy dataset.py b/utils/build dummy dataset.py @@ -6,11 +6,11 @@ import os import shutil import random -def construct_dummy_dataset(kind, prefix, n_embeddings, n_relations): +def construct_dummy_dataset(kind, prefix, n_entities, n_relations): os.mkdir(prefix) - with open(prefix+'/embeddings', 'w') as file: - for i in xrange(n_embeddings): + with open(prefix+'/entities', 'w') as file: + for i in xrange(n_entities): file.write('E{0}\n'.format(i)) with open(prefix+'/relations', 'w') as file: @@ -19,14 +19,14 @@ def construct_dummy_dataset(kind, prefix, n_embeddings, n_relations): with open(prefix+'/train', 'w') as file: for r in xrange(n_relations): - right = range(n_embeddings/2) + right = range(n_entities/2) random.shuffle(right) if kind=='id': - for e in xrange(n_embeddings): + for e in xrange(n_entities): file.write('{0}\t{1}\t{2}\n'.format(e, r, e)) elif kind=='halfperm': - for e in xrange(n_embeddings/2): - file.write('{0}\t{1}\t{2}\n'.format(e, r, right[e]+n_embeddings/2)) + for e in xrange(n_entities/2): + file.write('{0}\t{1}\t{2}\n'.format(e, r, right[e]+n_entities/2)) else: raise error('Unknown kind') @@ -35,12 +35,12 @@ def construct_dummy_dataset(kind, prefix, n_embeddings, n_relations): if __name__ == '__main__': if len(sys.argv)<5: - print('Usage: {0} {{id, halfperm}} dataset_name n_embeddings n_relations'.format(sys.argv[0]), file=sys.stderr) + print('Usage: {0} {{id, halfperm}} dataset_name n_entities n_relations'.format(sys.argv[0]), file=sys.stderr) sys.exit(1) kind = sys.argv[1] prefix = sys.argv[2] - n_embeddings = int(sys.argv[3]) + n_entities = int(sys.argv[3]) n_relations = int(sys.argv[4]) - construct_dummy_dataset(kind, prefix, n_embeddings, n_relations) + construct_dummy_dataset(kind, prefix, n_entities, n_relations)