adjix.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. from terroroftinytown.services.base import BaseService, html_unescape
  2. import re
  3. from terroroftinytown.services.status import URLStatus
  4. from terroroftinytown.client.errors import UnexpectedNoResult
  5. class AdjixService(BaseService):
  6. def process_redirect(self, response):
  7. if '<title>Spammer</title>' in response.text or \
  8. '<title>Phisher</title>' in response.text or \
  9. 'It has automatically been terminated.' in response.text or \
  10. 'This link was created by a spammer' in response.text or \
  11. 'This link was created by an unknown spammer' in response.text or \
  12. 'This link was abused by' in response.text or \
  13. '<title>Abuse</title>' in response.text or \
  14. '<title>Link Removed</title>' in response.text or \
  15. '<title>Phishing Link</title>' in response.text or \
  16. '<title>TOS</title>' in response.text:
  17. return (URLStatus.unavailable, None, None)
  18. if not response.text.strip():
  19. return (URLStatus.not_found, None, None)
  20. groups = re.findall((
  21. r'CONTENT="\d+;URL=(.*)(?:\r\n|">)|'
  22. '<frame src="(.*)(?:\r\n|">)|'
  23. 'rel="canonical" href="(.*)"/>'
  24. ),
  25. response.text
  26. )
  27. for group in groups:
  28. text = group[0] or group[1] or group[2]
  29. link = html_unescape(text)
  30. if 'ad.adjix.com' in link:
  31. continue
  32. return (URLStatus.ok, link, response.encoding)
  33. for group in groups:
  34. text = group[0] or group[1] or group[2]
  35. link = html_unescape(text)
  36. return (URLStatus.ok, link, response.encoding)
  37. raise UnexpectedNoResult(
  38. "Didn't get anything for {0}".format(self.current_shortcode))