process_file_3.py 1.0 KB

12345678910111213141516171819202122232425
  1. def build_pseudonymisation_map_flair(sentences, pseudos, acceptance_score, tags="all"):
  2. """
  3. Gets all replacements to be made in pseudonimized text using flair tagged sentences
  4. :param sentences: list of tuples (flair tagged sentences, original)
  5. :param pseudos: list of pseudos to be used
  6. :param acceptance_score: minimum confidence score to accept NER tag
  7. :return: dict: keys are spans in sentence, values are pseudos
  8. """
  9. replacements = {}
  10. mapping = {}
  11. for sentence in sentences:
  12. for entity in sentence[0].get_spans('ner'):
  13. if entity.score > acceptance_score and entity.tag != '0' and (entity.tag in tags or tags == "all"):
  14. # ajouter le score en param
  15. # TODO refaire la gestion des B et I tags
  16. for token in entity.tokens:
  17. if token.text.lower() not in mapping:
  18. mapping[token.text.lower()] = pseudos.pop(0)
  19. replacements[sentence[1][token.idx - 1]] = mapping[token.text.lower()]
  20. return replacements