main_27.py 1015 B

1234567891011121314151617181920212223242526
  1. def _make_sentence(self, tokens_left, tokens_right, seq_length=128):
  2. len_left = len(tokens_left)
  3. len_right = len(tokens_right)
  4. cut_len = len_left + len_right - (seq_length - 1)
  5. if cut_len > 0:
  6. cut_left = len_left - seq_length // 2
  7. cut_right = len_right - (seq_length - 1) // 2
  8. if cut_left < 0:
  9. cut_left, cut_right = 0, cut_left + cut_right
  10. elif cut_right < 0:
  11. cut_left, cut_right = cut_left + cut_right, 0
  12. else:
  13. cut_left, cut_right = 0, 0
  14. tokens_left = tokens_left[cut_left:]
  15. # tokens_right = tokens_right[:-cut_right]
  16. tokens_right = tokens_right[:len(tokens_right) - cut_right]
  17. tokens = tokens_left + [self.bert_tokenizer.mask_token] + tokens_right
  18. attention_mask = [1] * len(tokens_left) + [1] + [1] * len(tokens_right)
  19. if len(tokens) < seq_length:
  20. num_padding = seq_length - len(tokens)
  21. tokens += [self.bert_tokenizer.pad_token] * num_padding
  22. attention_mask += [0] * num_paddi