12345678910111213141516171819202122232425 |
- def build_pseudonymisation_map_flair(sentences, pseudos, acceptance_score, tags="all"):
- """
- Gets all replacements to be made in pseudonimized text using flair tagged sentences
- :param sentences: list of tuples (flair tagged sentences, original)
- :param pseudos: list of pseudos to be used
- :param acceptance_score: minimum confidence score to accept NER tag
- :return: dict: keys are spans in sentence, values are pseudos
- """
- replacements = {}
- mapping = {}
- for sentence in sentences:
- for entity in sentence[0].get_spans('ner'):
- if entity.score > acceptance_score and entity.tag != '0' and (entity.tag in tags or tags == "all"):
- # ajouter le score en param
- # TODO refaire la gestion des B et I tags
- for token in entity.tokens:
- if token.text.lower() not in mapping:
- mapping[token.text.lower()] = pseudos.pop(0)
- replacements[sentence[1][token.idx - 1]] = mapping[token.text.lower()]
- return replacements
|