redact_datetime.py 672 B

1234567891011121314151617181920
  1. # https://regexr.com/3iok2
  2. import re
  3. import pandas as pd
  4. def datestimes(df):
  5. df = df.apply(lambda text: re.sub(r'\d{4}-\d{2}-\d{2}', r'xxx', text)) # 2018-03-15
  6. df = df.apply(lambda text: re.sub(r'[\d ]\d:\d\d \w\w', r'xxx', text)) # 05:30 PM
  7. df = df.apply(lambda text: re.sub(r'\d\d:[0-5]\d:[0-5]\d', r'xxx', text)) # 06:08:18
  8. return df
  9. ############################## Test
  10. ###################################
  11. # text = "I eat potato at 05:30 PM and i'm happy, then i eat again at 10:12 AM, " \
  12. # "2018-03-14 06:08:18, he went on 2018-03-15 06:08:18, lets play, 2018-03-15 slkfldfjezli"
  13. # print(datestimes(pd.Series(text)).values)