__main___1.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435
  1. def main():
  2. # The following is a simulated dataset that have address, sensitive numerics, emails and entity names.
  3. Texts = [
  4. "I had an ok experience and I live close by 2000 Vernier rd grosse pointe woods MI 48236. I had a good time at 2999 vernier",
  5. "I used to know someone who lived at, 2025 magna rd grosse pointe MI 48237 they loved it and told us many cool stories about the lake",
  6. "I liked their services 22000 moross rd, detroit MI 48236",
  7. "lots of diverse life experiences at 6233 orlina st, apt 1001, detroit MI 48217",
  8. "2013 1st ambonstreet", "245e ousterkade 9",
  9. "oh yeah, I had a great time at 20225 Liverni a really really good time",
  10. '1231451469', '42.2', '123 145 1469', '123.145.1469', '(123) 145.1469', '(123) 145 1469',
  11. '(123) 145–1469', '123–145–1469', '+1(123) 145–1469 ', '1234567890999111', '123HELLO56',
  12. '-123', '04/04/1998', 'it’s015–96–0342 you know my number call me', '+123–145–1469',
  13. '48236–123', 'I live close to (42.293564, -83.638916)', '123-4-5648', '1-234-5-6789',
  14. "I used these two email mouafek.ayadi@esprit.tn, moufak.ayadi@oddo-bhf.com",
  15. "this is another email afek.aadi@esit.com",
  16. "they work at Microsoft, and my name is Sami",
  17. "Google CEO is Sunder Pichai",
  18. 'http://url.com', 'http://www.url.com/',
  19. 'https://url.com/bla3/blah3/', 'www.google.com'
  20. ]
  21. return redact_all.redact(pd.Series(Texts))
  22. if __name__ == "__main__":
  23. # main()
  24. # print(main())
  25. print('Redaction Started...\n')
  26. sleep(1)
  27. data = main()
  28. data.to_excel(Path, sheet_name='Redacted_Data', index=False)
  29. print('\nRedaction Finished and Data exported successfully!')
  30. data.to_excel(Path, sheet_name='emails', index=False)