commit 2a950f40e9a3981adb72595e3807c00e8211ff55
parent 72e1c4ed6aceb97bf016c84df82cbfadd9d4612d
Author: Étienne Simon <esimon@esimon.eu>
Date: Mon, 26 May 2014 16:48:05 +0200
Change "embeddings" with "entities" where necessary.
Diffstat:
4 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/dataset.py b/dataset.py
@@ -8,16 +8,16 @@ import theano
class Dataset(object):
def __init__(self, prefix):
log('# Loading dataset "{0}"\n'.format(prefix))
- with open(prefix+'/embeddings', 'r') as file:
- self.embeddings = file.readlines()
+ with open(prefix+'/entities', 'r') as file:
+ self.entities = file.readlines()
with open(prefix+'/relations', 'r') as file:
self.relations = file.readlines()
- self.number_embeddings = len(self.embeddings)
+ self.number_entities = len(self.entities)
self.number_relations = len(self.relations)
self.load_file(prefix, 'train')
self.load_file(prefix, 'valid')
self.load_file(prefix, 'test')
- self.universe = scipy.sparse.eye(len(self.embeddings), format='csr', dtype=theano.config.floatX)
+ self.universe = scipy.sparse.eye(self.number_entities), format='csr', dtype=theano.config.floatX)
def load_file(self, prefix, name):
with open('{0}/{1}'.format(prefix, name), 'r') as file:
@@ -25,17 +25,17 @@ class Dataset(object):
[left, relation, right] = map(list, zip(*content))
N = len(relation)
setattr(self, name+'_size', N)
- setattr(self, name+'_right', scipy.sparse.csr_matrix(([1]*N, right, range(N+1)), shape=(N, self.number_embeddings), dtype=theano.config.floatX))
+ setattr(self, name+'_right', scipy.sparse.csr_matrix(([1]*N, right, range(N+1)), shape=(N, self.number_entities), dtype=theano.config.floatX))
setattr(self, name+'_relation', scipy.sparse.csr_matrix(([1]*N, relation, range(N+1)), shape=(N, self.number_relations), dtype=theano.config.floatX))
- setattr(self, name+'_left', scipy.sparse.csr_matrix(([1]*N, left, range(N+1)), shape=(N, self.number_embeddings), dtype=theano.config.floatX))
+ setattr(self, name+'_left', scipy.sparse.csr_matrix(([1]*N, left, range(N+1)), shape=(N, self.number_entities), dtype=theano.config.floatX))
def training_minibatch(self, rng, batch_size):
# Sampling corrupted entities
def sample_matrix():
row = range(self.train_size+1)
- col = rng.randint(0, self.number_embeddings, size=self.train_size)
+ col = rng.randint(0, self.number_entities, size=self.train_size)
data = numpy.ones(self.train_size)
- random_embeddings = scipy.sparse.csr_matrix((data, col, row), shape=(self.train_size, self.number_embeddings), dtype=theano.config.floatX)
+ random_embeddings = scipy.sparse.csr_matrix((data, col, row), shape=(self.train_size, self.number_entities), dtype=theano.config.floatX)
return random_embeddings
corrupted_left = sample_matrix()
corrupted_right = sample_matrix()
diff --git a/model.py b/model.py
@@ -44,7 +44,7 @@ class Model(object):
if filepath is None:
Relations = config['relations']
self.epoch = 0
- self.embeddings = Embeddings(config['rng'], dataset.number_embeddings, config['dimension'], self.tag+'.embeddings')
+ self.embeddings = Embeddings(config['rng'], dataset.number_entities, config['dimension'], self.tag+'.embeddings')
self.relations = Relations(config['rng'], dataset.number_relations, config['dimension'], self.tag+'.relations')
else:
log('## Loading model from "{0}"\n'.format(filepath))
diff --git a/utils/build Bordes FB15k.py b/utils/build Bordes FB15k.py
@@ -59,7 +59,7 @@ def compile_dataset(path):
log('Writting entities...')
e2i, i2e, r2i, i2r = {}, {}, {}, {}
- with open(path+'/embeddings', 'w') as file:
+ with open(path+'/entities', 'w') as file:
i=0
for entity in entities:
e2i[entity]=i
diff --git a/utils/build dummy dataset.py b/utils/build dummy dataset.py
@@ -6,11 +6,11 @@ import os
import shutil
import random
-def construct_dummy_dataset(kind, prefix, n_embeddings, n_relations):
+def construct_dummy_dataset(kind, prefix, n_entities, n_relations):
os.mkdir(prefix)
- with open(prefix+'/embeddings', 'w') as file:
- for i in xrange(n_embeddings):
+ with open(prefix+'/entities', 'w') as file:
+ for i in xrange(n_entities):
file.write('E{0}\n'.format(i))
with open(prefix+'/relations', 'w') as file:
@@ -19,14 +19,14 @@ def construct_dummy_dataset(kind, prefix, n_embeddings, n_relations):
with open(prefix+'/train', 'w') as file:
for r in xrange(n_relations):
- right = range(n_embeddings/2)
+ right = range(n_entities/2)
random.shuffle(right)
if kind=='id':
- for e in xrange(n_embeddings):
+ for e in xrange(n_entities):
file.write('{0}\t{1}\t{2}\n'.format(e, r, e))
elif kind=='halfperm':
- for e in xrange(n_embeddings/2):
- file.write('{0}\t{1}\t{2}\n'.format(e, r, right[e]+n_embeddings/2))
+ for e in xrange(n_entities/2):
+ file.write('{0}\t{1}\t{2}\n'.format(e, r, right[e]+n_entities/2))
else:
raise error('Unknown kind')
@@ -35,12 +35,12 @@ def construct_dummy_dataset(kind, prefix, n_embeddings, n_relations):
if __name__ == '__main__':
if len(sys.argv)<5:
- print('Usage: {0} {{id, halfperm}} dataset_name n_embeddings n_relations'.format(sys.argv[0]), file=sys.stderr)
+ print('Usage: {0} {{id, halfperm}} dataset_name n_entities n_relations'.format(sys.argv[0]), file=sys.stderr)
sys.exit(1)
kind = sys.argv[1]
prefix = sys.argv[2]
- n_embeddings = int(sys.argv[3])
+ n_entities = int(sys.argv[3])
n_relations = int(sys.argv[4])
- construct_dummy_dataset(kind, prefix, n_embeddings, n_relations)
+ construct_dummy_dataset(kind, prefix, n_entities, n_relations)