LiuFan
/
PrivacyScanData


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
							import re

from terroroftinytown.client.errors import UnhandledStatusCode, \
    UnexpectedNoResult
from terroroftinytown.services.base import BaseService, html_unescape
from terroroftinytown.services.status import URLStatus


class SnipurlService(BaseService):
    def process_redirect(self, response):
        try:
            url_status, link, encoding = BaseService.process_redirect(
                self, response)
        except UnexpectedNoResult:
            if 'Location' not in response.headers:
                return URLStatus.not_found, None, None
            else:
                raise

        if link == "/site/getprivate?snip=" + self.current_shortcode:
            return URLStatus.unavailable, None, None
        else:
            return url_status, link, encoding

    def process_unknown_code(self, response):
        first_status_code = response.status_code

        if first_status_code not in (200, 500):
            return BaseService.process_unknown_code(self, response)

        url = self.params['url_template'].format(
            shortcode=self.current_shortcode)
        response = self.fetch_url(url, 'get')
        second_status_code = response.status_code

        if second_status_code not in (200, 500):
            raise UnhandledStatusCode(
                "HTTP status changed from %s to %i on second request for %s"
                % (first_status_code, second_status_code,
                   self.current_shortcode)
                )

        pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
        match = re.search(pattern, response.text)

        if not match:
            text = response.text.replace("<br />\n", "")
            match = re.search(pattern, text)

        if not match:
            pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
            match = re.search(pattern, response.text, re.DOTALL)

        if not match:
            raise UnexpectedNoResult(
                "Could not find target URL on preview page for {0}"
                .format(self.current_shortcode))

        url = html_unescape(match.group(1))
        url = url.replace('\n', '').replace('\r', '')

        return URLStatus.ok, url, response.encoding