def _make_sentence(self, tokens_left, tokens_right, seq_length=128): len_left = len(tokens_left) len_right = len(tokens_right) cut_len = len_left + len_right - (seq_length - 1) if cut_len > 0: cut_left = len_left - seq_length // 2 cut_right = len_right - (seq_length - 1) // 2 if cut_left < 0: cut_left, cut_right = 0, cut_left + cut_right elif cut_right < 0: cut_left, cut_right = cut_left + cut_right, 0 else: cut_left, cut_right = 0, 0 tokens_left = tokens_left[cut_left:] # tokens_right = tokens_right[:-cut_right] tokens_right = tokens_right[:len(tokens_right) - cut_right] tokens = tokens_left + [self.bert_tokenizer.mask_token] + tokens_right attention_mask = [1] * len(tokens_left) + [1] + [1] * len(tokens_right) if len(tokens) < seq_length: num_padding = seq_length - len(tokens) tokens += [self.bert_tokenizer.pad_token] * num_padding attention_mask += [0] * num_paddi