main_20.py 874 B

1234567891011121314151617181920
  1. def pseudonymize_6(text: str, tagger: SequenceTagger) -> Tuple[str, str]:
  2. """
  3. Perform the pseudonymization action and return both the tagged version (see function "tag_entities") and the pseudonymized version
  4. Args:
  5. text (str): the input text to pseudonymize
  6. tagger (SequenceTagger): the flair model for NER
  7. Returns:
  8. Tuple[str, str]: the original text with tags, and the pseudonymized text
  9. """
  10. with sw.timer("root"):
  11. text_sentences = [Sentence(t.strip()) for t in text.split("\n") if t.strip()]
  12. with sw.timer("model_annotation"):
  13. # inplace function
  14. tagger.predict(
  15. sentences=text_sentences,
  16. mini_batch_size=32,
  17. embedding_storage_mode="none",
  18. verbose=True,
  19. )
  20. return tag_entities(sentences=text_sentences)