snipurl.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import re
  2. from terroroftinytown.client.errors import UnhandledStatusCode, \
  3. UnexpectedNoResult
  4. from terroroftinytown.services.base import BaseService, html_unescape
  5. from terroroftinytown.services.status import URLStatus
  6. class SnipurlService(BaseService):
  7. def process_redirect(self, response):
  8. try:
  9. url_status, link, encoding = BaseService.process_redirect(
  10. self, response)
  11. except UnexpectedNoResult:
  12. if 'Location' not in response.headers:
  13. return URLStatus.not_found, None, None
  14. else:
  15. raise
  16. if link == "/site/getprivate?snip=" + self.current_shortcode:
  17. return URLStatus.unavailable, None, None
  18. else:
  19. return url_status, link, encoding
  20. def process_unknown_code(self, response):
  21. first_status_code = response.status_code
  22. if first_status_code not in (200, 500):
  23. return BaseService.process_unknown_code(self, response)
  24. url = self.params['url_template'].format(
  25. shortcode=self.current_shortcode)
  26. response = self.fetch_url(url, 'get')
  27. second_status_code = response.status_code
  28. if second_status_code not in (200, 500):
  29. raise UnhandledStatusCode(
  30. "HTTP status changed from %s to %i on second request for %s"
  31. % (first_status_code, second_status_code,
  32. self.current_shortcode)
  33. )
  34. pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
  35. match = re.search(pattern, response.text)
  36. if not match:
  37. text = response.text.replace("<br />\n", "")
  38. match = re.search(pattern, text)
  39. if not match:
  40. pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
  41. match = re.search(pattern, response.text, re.DOTALL)
  42. if not match:
  43. raise UnexpectedNoResult(
  44. "Could not find target URL on preview page for {0}"
  45. .format(self.current_shortcode))
  46. url = html_unescape(match.group(1))
  47. url = url.replace('\n', '').replace('\r', '')
  48. return URLStatus.ok, url, response.encoding