123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 |
- import os
- import markdown
- from . import common
- from . import exceptions
- from . import tsdb
- HTML_HEADER = '''
- <html>
- <head>
- <title>{title}</title>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
- <style>
- .submission, .comment
- {{
- padding-left: 20px;
- padding-right: 4px;
- }}
- .comment
- {{
- margin-top: 4px;
- margin-bottom: 4px;
- border: 1px solid black;
- }}
- .submission
- {{
- border: 2px solid blue;
- }}
- .hidden
- {{
- display: none;
- }}
- </style>
- </head>
- <body>
- '''.strip()
- HTML_FOOTER = '''
- </body>
- <script>
- function toggle_collapse(comment_div)
- {
- var button = comment_div.getElementsByClassName("toggle_hide_button")[0];
- var collapsible = comment_div.getElementsByClassName("collapsible")[0];
- if (collapsible.classList.contains("hidden"))
- {
- collapsible.classList.remove("hidden");
- button.innerText = "[-]";
- }
- else
- {
- collapsible.classList.add("hidden");
- button.innerText = "[+]";
- }
- }
- </script>
- </html>
- '''.strip()
- HTML_COMMENT = '''
- <div class="comment" id="{id}">
- <p class="userinfo">
- <a
- class="toggle_hide_button"
- href="javascript:void(0)"
- onclick="toggle_collapse(this.parentElement.parentElement)">[-]
- </a>
- {usernamelink}
- |
- <span class="score">{score} points</span>
- |
- <a class="timestamp" href="{permalink}">{human}</a>
- </p>
- <div class="collapsible">
- {body}
- {{children}}
- </div>
- </div>
- '''.strip()
- HTML_SUBMISSION = '''
- <div class="submission" id="{id}">
- <p class="userinfo">
- {usernamelink}
- |
- <span class="score">{score} points</span>
- |
- <a class="timestamp" href="{permalink}">{human}</a>
- </p>
- <strong>{title}</strong>
- <p>{url_or_text}</p>
- </div>
- {{children}}
- '''.strip()
- class TreeNode:
- def __init__(self, identifier, data, parent=None):
- assert isinstance(identifier, str)
- assert '\\' not in identifier
- self.identifier = identifier
- self.data = data
- self.parent = parent
- self.children = {}
- def __getitem__(self, key):
- return self.children[key]
- def __repr__(self):
- return 'TreeNode %s' % self.abspath()
- def abspath(self):
- node = self
- nodes = [node]
- while node.parent is not None:
- node = node.parent
- nodes.append(node)
- nodes.reverse()
- nodes = [node.identifier for node in nodes]
- return '\\'.join(nodes)
- def add_child(self, other_node, overwrite_parent=False):
- self.check_child_availability(other_node.identifier)
- if other_node.parent is not None and not overwrite_parent:
- raise ValueError('That node already has a parent. Try `overwrite_parent=True`')
- other_node.parent = self
- self.children[other_node.identifier] = other_node
- return other_node
- def check_child_availability(self, identifier):
- if ':' in identifier:
- raise Exception('Only roots may have a colon')
- if identifier in self.children:
- raise Exception('Node %s already has child %s' % (self.identifier, identifier))
- def detach(self):
- del self.parent.children[self.identifier]
- self.parent = None
- def listnodes(self, customsort=None):
- items = list(self.children.items())
- if customsort is None:
- items.sort(key=lambda x: x[0].lower())
- else:
- items.sort(key=customsort)
- return [item[1] for item in items]
- def merge_other(self, othertree, otherroot=None):
- newroot = None
- if ':' in othertree.identifier:
- if otherroot is None:
- raise Exception('Must specify a new name for the other tree\'s root')
- else:
- newroot = otherroot
- else:
- newroot = othertree.identifier
- othertree.identifier = newroot
- othertree.parent = self
- self.check_child_availability(newroot)
- self.children[newroot] = othertree
- def printtree(self, customsort=None):
- for node in self.walk(customsort):
- print(node.abspath())
- def walk(self, customsort=None):
- yield self
- for child in self.listnodes(customsort=customsort):
- #print(child)
- #print(child.listnodes())
- yield from child.walk(customsort=customsort)
- def html_format_comment(comment):
- text = HTML_COMMENT.format(
- id=comment.idstr,
- body=sanitize_braces(render_markdown(comment.body)),
- usernamelink=html_helper_userlink(comment),
- score=comment.score,
- human=common.human(comment.created),
- permalink=html_helper_permalink(comment),
- )
- return text
- def html_format_submission(submission):
- text = HTML_SUBMISSION.format(
- id=submission.idstr,
- title=sanitize_braces(submission.title),
- usernamelink=html_helper_userlink(submission),
- score=submission.score,
- human=common.human(submission.created),
- permalink=html_helper_permalink(submission),
- url_or_text=html_helper_urlortext(submission),
- )
- return text
- def html_from_database(database, specific_submission=None):
- '''
- Given a timesearch database, produce html pages for each
- of the submissions it contains (or one particular submission fullname)
- '''
- if markdown is None:
- raise ImportError('Page cannot be rendered without the markdown module')
- submission_trees = trees_from_database(database, specific_submission)
- for submission_tree in submission_trees:
- page = html_from_tree(submission_tree, sort=lambda x: x.data.score * -1)
- database.offline_reading_dir.makedirs(exist_ok=True)
- html = ''
- header = HTML_HEADER.format(title=submission_tree.data.title)
- html += header
- html += page
- html += HTML_FOOTER
- yield (submission_tree.identifier, html)
- def html_from_tree(tree, sort=None):
- '''
- Given a tree *whose root is the submission*, return
- HTML-formatted text representing each submission's comment page.
- '''
- if tree.data.object_type == 'submission':
- page = html_format_submission(tree.data)
- elif tree.data.object_type == 'comment':
- page = html_format_comment(tree.data)
- children = tree.listnodes()
- if sort is not None:
- children.sort(key=sort)
- children = [html_from_tree(child, sort) for child in children]
- if len(children) == 0:
- children = ''
- else:
- children = '\n\n'.join(children)
- try:
- page = page.format(children=children)
- except IndexError:
- print(page)
- raise
- return page
- def html_helper_permalink(item):
- '''
- Given a submission or a comment, return the URL for its permalink.
- '''
- link = 'https://old.reddit.com/r/%s/comments/' % item.subreddit
- if item.object_type == 'submission':
- link += item.idstr[3:]
- elif item.object_type == 'comment':
- link += '%s/_/%s' % (item.submission[3:], item.idstr[3:])
- return link
- def html_helper_urlortext(submission):
- '''
- Given a submission, return either an <a> tag for its url, or its
- markdown-rendered selftext.
- '''
- if submission.url:
- text = '<a href="{url}">{url}</a>'.format(url=submission.url)
- elif submission.selftext:
- text = render_markdown(submission.selftext)
- else:
- text = ''
- text = sanitize_braces(text)
- return text
- def html_helper_userlink(item):
- '''
- Given a submission or comment, return an <a> tag for its author, or [deleted].
- '''
- name = item.author
- if name.lower() == '[deleted]':
- return '[deleted]'
- link = 'https://old.reddit.com/u/{name}'
- link = '<a href="%s">{name}</a>' % link
- link = link.format(name=name)
- return link
- def render_markdown(text):
- # I was going to use html.escape, but then it turns html entities like
- # into &nbsp; which doesn't work.
- # So I only want to escape the brackets.
- escaped = text.replace('<', '<').replace('>', '&rt;')
- text = markdown.markdown(escaped, output_format='html5')
- return text
- def sanitize_braces(text):
- text = text.replace('{', '{{')
- text = text.replace('}', '}}')
- return text
- def trees_from_database(database, specific_submission=None):
- '''
- Given a timesearch database, take all of the submission
- ids, take all of the comments for each submission id, and run them
- through `tree_from_submission`.
- Yield each submission's tree as it is generated.
- '''
- cur1 = database.sql.cursor()
- cur2 = database.sql.cursor()
- if specific_submission is None:
- cur1.execute('SELECT idstr FROM submissions ORDER BY created ASC')
- submission_ids = common.fetchgenerator(cur1)
- # sql always returns rows as tuples, even when selecting one column.
- submission_ids = (x[0] for x in submission_ids)
- else:
- specific_submission = common.t3_prefix(specific_submission)
- submission_ids = [specific_submission]
- found_some_posts = False
- for submission_id in submission_ids:
- found_some_posts = True
- cur2.execute('SELECT * FROM submissions WHERE idstr == ?', [submission_id])
- submission = cur2.fetchone()
- cur2.execute('SELECT * FROM comments WHERE submission == ?', [submission_id])
- fetched_comments = cur2.fetchall()
- submission_tree = tree_from_submission(submission, fetched_comments)
- yield submission_tree
- if not found_some_posts:
- raise Exception('Found no submissions!')
- def tree_from_submission(submission_dbrow, comments_dbrows):
- '''
- Given the sqlite data for a submission and all of its comments,
- return a tree with the submission id as the root
- '''
- submission = tsdb.DBEntry(submission_dbrow)
- comments = [tsdb.DBEntry(c) for c in comments_dbrows]
- comments.sort(key=lambda x: x.created)
- print('Building tree for %s (%d comments)' % (submission.idstr, len(comments)))
- # Thanks Martin Schmidt for the algorithm
- # http://stackoverflow.com/a/29942118/5430534
- tree = TreeNode(identifier=submission.idstr, data=submission)
- node_map = {}
- for comment in comments:
- # Ensure this comment is in a node of its own
- this_node = node_map.get(comment.idstr, None)
- if this_node:
- # This ID was detected as a parent of a previous iteration
- # Now we're actually filling it in.
- this_node.data = comment
- else:
- this_node = TreeNode(comment.idstr, comment)
- node_map[comment.idstr] = this_node
- # Attach this node to the parent.
- if comment.parent.startswith('t3_'):
- tree.add_child(this_node)
- else:
- parent_node = node_map.get(comment.parent, None)
- if not parent_node:
- parent_node = TreeNode(comment.parent, data=None)
- node_map[comment.parent] = parent_node
- parent_node.add_child(this_node)
- this_node.parent = parent_node
- return tree
- def offline_reading(subreddit=None, username=None, specific_submission=None):
- if not specific_submission and not common.is_xor(subreddit, username):
- raise exceptions.NotExclusive(['subreddit', 'username'])
- if specific_submission and not username and not subreddit:
- database = tsdb.TSDB.for_submission(specific_submission, do_create=False)
- elif subreddit:
- database = tsdb.TSDB.for_subreddit(subreddit, do_create=False)
- else:
- database = tsdb.TSDB.for_user(username, do_create=False)
- htmls = html_from_database(database, specific_submission=specific_submission)
- for (id, html) in htmls:
- html_basename = '%s.html' % id
- html_filepath = database.offline_reading_dir.with_child(html_basename)
- html_handle = html_filepath.open('w', encoding='utf-8')
- html_handle.write(html)
- html_handle.close()
- print('Wrote', html_filepath.relative_path)
- def offline_reading_argparse(args):
- return offline_reading(
- subreddit=args.subreddit,
- username=args.username,
- specific_submission=args.specific_submission,
- )
|