mtb.py (1378B)
1 from gbure.model.matching_the_blanks import Model 2 from gbure.model.fewshot import Model as EvalModel 3 from torch.optim import Adam as Optimizer 4 from torch.optim.lr_scheduler import LinearLR as Scheduler 5 6 7 dataset_name = "T-REx" 8 unsupervised = "mtb" 9 10 eval_dataset_name = "FewRel" 11 valid_name = "7def1330ba9527d6" 12 shot = 1 13 way = 5 14 15 # From Section 4.1 16 linguistic_similarity = "dot" 17 18 # Observed to be better 19 latent_metric_scale = "standard" 20 latent_dot_mean = 1067.65 21 latent_dot_std = 111.17 22 23 # From Section 4.3 24 blank_probability = 0.7 25 26 # From Section 5 27 transformer_model = "bert-base-cased" 28 sample_per_epoch = 100000 29 learning_rate = 3e-5 30 accumulated_batch_size = 2048 31 32 # Stated to be 10 in Section 5, but found 5 was better on T-REx dataset. 33 max_epoch = 5 34 35 # From BERT 36 mlm_probability = 0.15 37 mlm_masked_probability = 0.8 38 mlm_random_probability = 0.1 39 40 # Guessed 41 # post_transformer_layer might need to be changed depending on the subsequent task 42 # the "layer_norm" gives results within expectations for non-finetuned few-shot. 43 max_sentence_length = 100 # Maybe should be 40 (from footnote 2, guessed from ACL slides) 44 language_model_weight = 1 45 edge_sampling = "uniform-inverse degree" 46 clip_gradient = 1 47 48 strong_negative_probability = 0.5 49 weak_negative_probability = 0.0 50 51 # Implementation details 52 seed = 0 53 amp = True 54 initial_grad_scale = 1 55 batch_size = 8 56 eval_batch_size = 2 57 workers = 8