main_33.py 895 B

123456789101112131415161718192021222324
  1. def pseudonymize(
  2. self,
  3. original_text: str,
  4. presidio_response: List[RecognizerResult],
  5. count: int,
  6. ):
  7. """
  8. :param original_text: str containing the original text
  9. :param presidio_response: list of results from Presidio, to be used to know where entities are
  10. :param count: number of perturbations to return
  11. :return: List[str] with fake perturbations of original text
  12. """
  13. presidio_response = sorted(presidio_response, key=lambda resp: resp.start)
  14. anonymizer_engine = AnonymizerEngine()
  15. anonymized_result = anonymizer_engine.anonymize(
  16. text=original_text, analyzer_results=presidio_response
  17. )
  18. templated_text = anonymized_result.text
  19. templated_text = templated_text.replace(">", "}}").replace("<", "{{")
  20. fake_texts = [self.parse(templated_text, add_spans=False) for _ in range(count)]
  21. return fake_texts