123456789101112131415161718 |
- def urls(df):
- pattern1 = r'^https?:\/\/.*[\r\n]*'
- pattern2 = r'(?:http://)?\w+\.\S*[^.\s]'
- df = df.apply(lambda text: re.sub(pattern1, 'xxx', text, flags=re.MULTILINE))
- df = df.apply(lambda text: re.sub(pattern2, 'xxx', text, flags=re.MULTILINE))
- return pd.Series(df)
- ############################################ Test
- #################################################
- # #
- # Texts = [
- # 'http://url.com','http://www.url.com/',
- # 'https://url.com/bla3/blah3/', 'www.google.com'
- # ]
- # print(urls(pd.Series(Texts)))
|