erome.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. #!/usr/bin/env python3
  2. import logging
  3. import re
  4. from typing import Callable, Optional
  5. import bs4
  6. from praw.models import Submission
  7. from bdfr.exceptions import SiteDownloaderError
  8. from bdfr.resource import Resource
  9. from bdfr.site_authenticator import SiteAuthenticator
  10. from bdfr.site_downloaders.base_downloader import BaseDownloader
  11. logger = logging.getLogger(__name__)
  12. class Erome(BaseDownloader):
  13. def __init__(self, post: Submission):
  14. super().__init__(post)
  15. def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
  16. links = self._get_links(self.post.url)
  17. if not links:
  18. raise SiteDownloaderError('Erome parser could not find any links')
  19. out = []
  20. for link in links:
  21. if not re.match(r'https?://.*', link):
  22. link = 'https://' + link
  23. out.append(Resource(self.post, link, self.erome_download(link)))
  24. return out
  25. @staticmethod
  26. def _get_links(url: str) -> set[str]:
  27. page = Erome.retrieve_url(url)
  28. soup = bs4.BeautifulSoup(page.text, 'html.parser')
  29. front_images = soup.find_all('img', attrs={'class': 'lasyload'})
  30. out = [im.get('data-src') for im in front_images]
  31. videos = soup.find_all('source')
  32. out.extend([vid.get('src') for vid in videos])
  33. return set(out)
  34. @staticmethod
  35. def erome_download(url: str) -> Callable:
  36. download_parameters = {
  37. 'headers': {
  38. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
  39. ' Chrome/88.0.4324.104 Safari/537.36',
  40. 'Referer': 'https://www.erome.com/',
  41. },
  42. }
  43. return lambda global_params: Resource.http_download(url, global_params | download_parameters)