taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 3f3ab2bfe3ebfa266d433012be1c89c722d63352
parent 32b078f28add3d22529e55aeac6674d924e9b510
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Thu,  2 Jul 2015 11:15:37 -0400

Unify parameters for joint_simple_tgtcls_111_cswdtx_bigger{,_dropout}

Diffstat:
Mconfig/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py | 9++++-----
Mconfig/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py | 7+++----
Mconfig/memory_network_1.py | 1+
Mmodel/memory_network.py | 12++++++++----
Mtrain.py | 5++++-
5 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py @@ -29,14 +29,14 @@ dim_embeddings = [ # Common network part dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) -dim_hidden = [1000] +dim_hidden = [5000] # Destination prediction part -dim_hidden_dest = [400] +dim_hidden_dest = [1000] dim_output_dest = dest_tgtcls.shape[0] # Time prediction part -dim_hidden_time = [400] +dim_hidden_time = [500] dim_output_time = len(time_tgtcls) # Cost ratio between distance cost and time cost @@ -46,8 +46,7 @@ embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.1) mlp_biases_init = Constant(0.01) -learning_rate = 0.000001 -momentum = 0.99 +# use adadelta, so no learning_rate or momentum batch_size = 200 valid_set = 'cuts/test_times_0' diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py @@ -46,11 +46,10 @@ embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.1) mlp_biases_init = Constant(0.01) -# apply_dropout = True -# dropout_p = 0.5 +apply_dropout = True +dropout_p = 0.5 -learning_rate = 0.001 -momentum = 0.9 +# use adadelta, so no learning_rate or momentum batch_size = 200 valid_set = 'cuts/test_times_0' diff --git a/config/memory_network_1.py b/config/memory_network_1.py @@ -31,6 +31,7 @@ candidate_encoder.dim_hidden = [100, 100, 100] candidate_encoder.weights_init = IsotropicGaussian(0.01) candidate_encoder.biases_init = Constant(0.001) +normalize_representation = True embed_weights_init = IsotropicGaussian(0.001) diff --git a/model/memory_network.py b/model/memory_network.py @@ -21,11 +21,11 @@ class Model(Initializable): self.context_embedder = ContextEmbedder(config) - self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden], - dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden, + self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + [config.representation_activation()], + dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden + [config.representation_size], name='prefix_encoder') - self.candidate_encoder = MLP(activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden], - dims=[config.candidate_encoder.dim_input] + config.candidate_encoder.dim_hidden, + self.candidate_encoder = MLP(activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden] + [config.representation_activation()], + dims=[config.candidate_encoder.dim_input] + config.candidate_encoder.dim_hidden + [config.representation_size], name='candidate_encoder') self.softmax = Softmax() @@ -46,11 +46,15 @@ class Model(Initializable): prefix_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.prefix_extremities.items()) prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1) prefix_representation = self.prefix_encoder.apply(prefix_inputs) + if self.config.normalize_representation: + prefix_representation = prefix_representation / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True)) candidate_embeddings = tuple(self.context_embedder.apply(**{k: kwargs['candidate_%s'%k] for k in self.context_embedder.inputs })) candidate_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.candidate_extremities.items()) candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1) candidate_representation = self.candidate_encoder.apply(candidate_inputs) + if self.config.normalize_representation: + candidate_representation = candidate_representation / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True)) similarity_score = tensor.dot(prefix_representation, candidate_representation.T) similarity = self.softmax.apply(similarity_score) diff --git a/train.py b/train.py @@ -70,6 +70,7 @@ class SaveLoadParams(SimpleExtension): with open(self.path, 'w') as f: logger.info('Saving parameters to %s...'%self.path) cPickle.dump(self.model.get_param_values(), f, protocol=cPickle.HIGHEST_PROTOCOL) + logger.info('Done saving.') def do_load(self): try: @@ -153,8 +154,10 @@ if __name__ == "__main__": Printing(every_n_batches=1000), SaveLoadParams(dump_path, cg, - before_training=config.load_model, # before training -> load params + before_training=True, # before training -> load params every_n_batches=1000, # every N batches -> save params + after_epoch=True, # after epoch -> save params + after_training=True, # after training -> save params ), ]