gbure

Graph-based approaches on unsupervised relation extraction evaluated as a fewshot problem
git clone https://esimon.eu/repos/gbure.git
Log | Files | Refs | README | LICENSE

mtb.py (1378B)


      1 from gbure.model.matching_the_blanks import Model
      2 from gbure.model.fewshot import Model as EvalModel
      3 from torch.optim import Adam as Optimizer
      4 from torch.optim.lr_scheduler import LinearLR as Scheduler
      5 
      6 
      7 dataset_name = "T-REx"
      8 unsupervised = "mtb"
      9 
     10 eval_dataset_name = "FewRel"
     11 valid_name = "7def1330ba9527d6"
     12 shot = 1
     13 way = 5
     14 
     15 # From Section 4.1
     16 linguistic_similarity = "dot"
     17 
     18 # Observed to be better
     19 latent_metric_scale = "standard"
     20 latent_dot_mean = 1067.65
     21 latent_dot_std = 111.17
     22 
     23 # From Section 4.3
     24 blank_probability = 0.7
     25 
     26 # From Section 5
     27 transformer_model = "bert-base-cased"
     28 sample_per_epoch = 100000
     29 learning_rate = 3e-5
     30 accumulated_batch_size = 2048
     31 
     32 # Stated to be 10 in Section 5, but found 5 was better on T-REx dataset.
     33 max_epoch = 5
     34 
     35 # From BERT
     36 mlm_probability = 0.15
     37 mlm_masked_probability = 0.8
     38 mlm_random_probability = 0.1
     39 
     40 # Guessed
     41 # post_transformer_layer might need to be changed depending on the subsequent task
     42 # the "layer_norm" gives results within expectations for non-finetuned few-shot.
     43 max_sentence_length = 100  # Maybe should be 40 (from footnote 2, guessed from ACL slides)
     44 language_model_weight = 1
     45 edge_sampling = "uniform-inverse degree"
     46 clip_gradient = 1
     47 
     48 strong_negative_probability = 0.5
     49 weak_negative_probability = 0.0
     50 
     51 # Implementation details
     52 seed = 0
     53 amp = True
     54 initial_grad_scale = 1
     55 batch_size = 8
     56 eval_batch_size = 2
     57 workers = 8