download_factory.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. #!/usr/bin/env python3
  2. # coding=utf-8
  3. import re
  4. import urllib.parse
  5. from typing import Type
  6. from bdfr.exceptions import NotADownloadableLinkError
  7. from bdfr.site_downloaders.base_downloader import BaseDownloader
  8. from bdfr.site_downloaders.direct import Direct
  9. from bdfr.site_downloaders.erome import Erome
  10. from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
  11. from bdfr.site_downloaders.gallery import Gallery
  12. from bdfr.site_downloaders.gfycat import Gfycat
  13. from bdfr.site_downloaders.imgur import Imgur
  14. from bdfr.site_downloaders.pornhub import PornHub
  15. from bdfr.site_downloaders.redgifs import Redgifs
  16. from bdfr.site_downloaders.self_post import SelfPost
  17. from bdfr.site_downloaders.vidble import Vidble
  18. from bdfr.site_downloaders.vreddit import VReddit
  19. from bdfr.site_downloaders.youtube import Youtube
  20. class DownloadFactory:
  21. @staticmethod
  22. def pull_lever(url: str) -> Type[BaseDownloader]:
  23. sanitised_url = DownloadFactory.sanitise_url(url)
  24. if re.match(r'(i\.|m\.)?imgur', sanitised_url):
  25. return Imgur
  26. elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url):
  27. return Redgifs
  28. elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \
  29. not DownloadFactory.is_web_resource(sanitised_url):
  30. return Direct
  31. elif re.match(r'erome\.com.*', sanitised_url):
  32. return Erome
  33. elif re.match(r'reddit\.com/gallery/.*', sanitised_url):
  34. return Gallery
  35. elif re.match(r'patreon\.com.*', sanitised_url):
  36. return Gallery
  37. elif re.match(r'gfycat\.', sanitised_url):
  38. return Gfycat
  39. elif re.match(r'reddit\.com/r/', sanitised_url):
  40. return SelfPost
  41. elif re.match(r'(m\.)?youtu\.?be', sanitised_url):
  42. return Youtube
  43. elif re.match(r'i\.redd\.it.*', sanitised_url):
  44. return Direct
  45. elif re.match(r'v\.redd\.it.*', sanitised_url):
  46. return VReddit
  47. elif re.match(r'pornhub\.com.*', sanitised_url):
  48. return PornHub
  49. elif re.match(r'vidble\.com', sanitised_url):
  50. return Vidble
  51. elif YtdlpFallback.can_handle_link(sanitised_url):
  52. return YtdlpFallback
  53. else:
  54. raise NotADownloadableLinkError(f'No downloader module exists for url {url}')
  55. @staticmethod
  56. def sanitise_url(url: str) -> str:
  57. beginning_regex = re.compile(r'\s*(www\.?)?')
  58. split_url = urllib.parse.urlsplit(url)
  59. split_url = split_url.netloc + split_url.path
  60. split_url = re.sub(beginning_regex, '', split_url)
  61. return split_url
  62. @staticmethod
  63. def is_web_resource(url: str) -> bool:
  64. web_extensions = (
  65. 'asp',
  66. 'aspx',
  67. 'cfm',
  68. 'cfml',
  69. 'css',
  70. 'htm',
  71. 'html',
  72. 'js',
  73. 'php',
  74. 'php3',
  75. 'xhtml',
  76. )
  77. if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url):
  78. return True
  79. else:
  80. return False