1234567891011121314151617181920 |
- def pseudonymize_6(text: str, tagger: SequenceTagger) -> Tuple[str, str]:
- """
- Perform the pseudonymization action and return both the tagged version (see function "tag_entities") and the pseudonymized version
- Args:
- text (str): the input text to pseudonymize
- tagger (SequenceTagger): the flair model for NER
- Returns:
- Tuple[str, str]: the original text with tags, and the pseudonymized text
- """
- with sw.timer("root"):
- text_sentences = [Sentence(t.strip()) for t in text.split("\n") if t.strip()]
- with sw.timer("model_annotation"):
- # inplace function
- tagger.predict(
- sentences=text_sentences,
- mini_batch_size=32,
- embedding_storage_mode="none",
- verbose=True,
- )
- return tag_entities(sentences=text_sentences)
|