1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- #!/usr/bin/env python3
- # coding=utf-8
- import json
- import logging
- import re
- from typing import Iterator
- import dict2xml
- import praw.models
- import yaml
- from bdfr.archive_entry.base_archive_entry import BaseArchiveEntry
- from bdfr.archive_entry.comment_archive_entry import CommentArchiveEntry
- from bdfr.archive_entry.submission_archive_entry import SubmissionArchiveEntry
- from bdfr.configuration import Configuration
- from bdfr.connector import RedditConnector
- from bdfr.exceptions import ArchiverError
- from bdfr.resource import Resource
- logger = logging.getLogger(__name__)
- class Archiver(RedditConnector):
- def __init__(self, args: Configuration):
- super(Archiver, self).__init__(args)
- def download(self):
- for generator in self.reddit_lists:
- for submission in generator:
- if (submission.author and submission.author.name in self.args.ignore_user) or \
- (submission.author is None and 'DELETED' in self.args.ignore_user):
- logger.debug(
- f'Submission {submission.id} in {submission.subreddit.display_name} skipped'
- f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user')
- continue
- logger.debug(f'Attempting to archive submission {submission.id}')
- self.write_entry(submission)
- def get_submissions_from_link(self):
- supplied_submissions = []
- for sub_id in self.args.link:
- if len(sub_id) == 6:
- supplied_submissions.append(self.reddit_instance.submission(id=sub_id))
- elif re.match(r'^\w{7}$', sub_id):
- supplied_submissions.append(self.reddit_instance.comment(id=sub_id))
- else:
- supplied_submissions.append(self.reddit_instance.submission(url=sub_id))
- return [supplied_submissions]
- def get_user_data(self):
- results = super(Archiver, self).get_user_data()
- if self.args.user and self.args.all_comments:
- sort = self.determine_sort_function()
- for user in self.args.user:
- logger.debug(f'Retrieving comments of user {user}')
- results.append(sort(self.reddit_instance.redditor(user).comments, limit=self.args.limit))
- return results
-
- def _write_entry_json(self, entry: BaseArchiveEntry):
- resource = Resource(entry.source, '', lambda: None, '.json')
- content = json.dumps(entry.compile())
- self._write_content_to_disk(resource, content)
- def _write_entry_xml(self, entry: BaseArchiveEntry):
- resource = Resource(entry.source, '', lambda: None, '.xml')
- content = dict2xml.dict2xml(entry.compile(), wrap='root')
- self._write_content_to_disk(resource, content)
- def _write_entry_yaml(self, entry: BaseArchiveEntry):
- resource = Resource(entry.source, '', lambda: None, '.yaml')
- content = yaml.dump(entry.compile())
- self._write_content_to_disk(resource, content)
- def _write_content_to_disk(self, resource: Resource, content: str):
- file_path = self.file_name_formatter.format_path(resource, self.download_directory)
- file_path.parent.mkdir(exist_ok=True, parents=True)
- with open(file_path, 'w', encoding="utf-8") as file:
- logger.debug(
- f'Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}'
- f' format at {file_path}')
- file.write(content)
|