__main__.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #!/usr/bin/env python3
  2. import logging
  3. import sys
  4. import click
  5. from bdfr.archiver import Archiver
  6. from bdfr.cloner import RedditCloner
  7. from bdfr.configuration import Configuration
  8. from bdfr.downloader import RedditDownloader
  9. logger = logging.getLogger()
  10. _common_options = [
  11. click.argument('directory', type=str),
  12. click.option('--authenticate', is_flag=True, default=None),
  13. click.option('--config', type=str, default=None),
  14. click.option('--opts', type=str, default=None),
  15. click.option('--disable-module', multiple=True, default=None, type=str),
  16. click.option('--exclude-id', default=None, multiple=True),
  17. click.option('--exclude-id-file', default=None, multiple=True),
  18. click.option('--file-scheme', default=None, type=str),
  19. click.option('--folder-scheme', default=None, type=str),
  20. click.option('--ignore-user', type=str, multiple=True, default=None),
  21. click.option('--include-id-file', multiple=True, default=None),
  22. click.option('--log', type=str, default=None),
  23. click.option('--saved', is_flag=True, default=None),
  24. click.option('--search', default=None, type=str),
  25. click.option('--submitted', is_flag=True, default=None),
  26. click.option('--subscribed', is_flag=True, default=None),
  27. click.option('--time-format', type=str, default=None),
  28. click.option('--upvoted', is_flag=True, default=None),
  29. click.option('-L', '--limit', default=None, type=int),
  30. click.option('-l', '--link', multiple=True, default=None, type=str),
  31. click.option('-m', '--multireddit', multiple=True, default=None, type=str),
  32. click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new', 'controversial', 'rising', 'relevance')),
  33. default=None),
  34. click.option('-s', '--subreddit', multiple=True, default=None, type=str),
  35. click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
  36. click.option('-u', '--user', type=str, multiple=True, default=None),
  37. click.option('-v', '--verbose', default=None, count=True),
  38. ]
  39. _downloader_options = [
  40. click.option('--make-hard-links', is_flag=True, default=None),
  41. click.option('--max-wait-time', type=int, default=None),
  42. click.option('--no-dupes', is_flag=True, default=None),
  43. click.option('--search-existing', is_flag=True, default=None),
  44. click.option('--skip', default=None, multiple=True),
  45. click.option('--skip-domain', default=None, multiple=True),
  46. click.option('--skip-subreddit', default=None, multiple=True),
  47. click.option('--min-score', type=int, default=None),
  48. click.option('--max-score', type=int, default=None),
  49. click.option('--min-score-ratio', type=float, default=None),
  50. click.option('--max-score-ratio', type=float, default=None),
  51. ]
  52. _archiver_options = [
  53. click.option('--all-comments', is_flag=True, default=None),
  54. click.option('--comment-context', is_flag=True, default=None),
  55. click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None),
  56. ]
  57. def _add_options(opts: list):
  58. def wrap(func):
  59. for opt in opts:
  60. func = opt(func)
  61. return func
  62. return wrap
  63. @click.group()
  64. def cli():
  65. pass
  66. @cli.command('download')
  67. @_add_options(_common_options)
  68. @_add_options(_downloader_options)
  69. @click.pass_context
  70. def cli_download(context: click.Context, **_):
  71. config = Configuration()
  72. config.process_click_arguments(context)
  73. setup_logging(config.verbose)
  74. try:
  75. reddit_downloader = RedditDownloader(config)
  76. reddit_downloader.download()
  77. except Exception:
  78. logger.exception('Downloader exited unexpectedly')
  79. raise
  80. else:
  81. logger.info('Program complete')
  82. @cli.command('archive')
  83. @_add_options(_common_options)
  84. @_add_options(_archiver_options)
  85. @click.pass_context
  86. def cli_archive(context: click.Context, **_):
  87. config = Configuration()
  88. config.process_click_arguments(context)
  89. setup_logging(config.verbose)
  90. try:
  91. reddit_archiver = Archiver(config)
  92. reddit_archiver.download()
  93. except Exception:
  94. logger.exception('Archiver exited unexpectedly')
  95. raise
  96. else:
  97. logger.info('Program complete')
  98. @cli.command('clone')
  99. @_add_options(_common_options)
  100. @_add_options(_archiver_options)
  101. @_add_options(_downloader_options)
  102. @click.pass_context
  103. def cli_clone(context: click.Context, **_):
  104. config = Configuration()
  105. config.process_click_arguments(context)
  106. setup_logging(config.verbose)
  107. try:
  108. reddit_scraper = RedditCloner(config)
  109. reddit_scraper.download()
  110. except Exception:
  111. logger.exception('Scraper exited unexpectedly')
  112. raise
  113. else:
  114. logger.info('Program complete')
  115. def setup_logging(verbosity: int):
  116. class StreamExceptionFilter(logging.Filter):
  117. def filter(self, record: logging.LogRecord) -> bool:
  118. result = not (record.levelno == logging.ERROR and record.exc_info)
  119. return result
  120. logger.setLevel(1)
  121. stream = logging.StreamHandler(sys.stdout)
  122. stream.addFilter(StreamExceptionFilter())
  123. formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
  124. stream.setFormatter(formatter)
  125. logger.addHandler(stream)
  126. if verbosity <= 0:
  127. stream.setLevel(logging.INFO)
  128. elif verbosity == 1:
  129. stream.setLevel(logging.DEBUG)
  130. else:
  131. stream.setLevel(9)
  132. logging.getLogger('praw').setLevel(logging.CRITICAL)
  133. logging.getLogger('prawcore').setLevel(logging.CRITICAL)
  134. logging.getLogger('urllib3').setLevel(logging.CRITICAL)
  135. if __name__ == '__main__':
  136. cli()