#!/usr/bin/env python # -*- coding: utf-8 -*- import praw import snudown import datetime import time import re import sys from requests.exceptions import HTTPError """ Customization Configuration """ # Default postID: # postID='15zmjl' # Path to which to output the file # outputFilePath='./' # The Path to the stylesheet, relative to where the html file will be stored # pathToCSS='css/style.css' """ Reddit Post Archiver By Samuel Johnson Stoever """ if len(sys.argv) == 1: print('No post ID was provided. Using default postID.') elif len(sys.argv) > 2: print('Too Many Arguments. Using default postID.') else: postID = sys.argv[1] outputFilePath = outputFilePath + postID + '.html' monthsList = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] def writeHeader(posttitle): htmlFile.write('\n\n\n') htmlFile.write('\t\n') htmlFile.write('\t\n') htmlFile.write('\t' + posttitle + '\n') htmlFile.write('\n\n') def parsePost(postObject): writeHeader(fixUnicode(postObject.title)) postObject.replace_more_comments() postAuthorName = '' postAuthorExists = 0 try: postAuthorName = fixUnicode(postObject.author.name) postAuthorExists = 1 except AttributeError: postAuthorExists = 0 htmlFile.write('
\n') if postObject.is_self: # The post is a self post htmlFile.write(fixUnicode(postObject.title)) htmlFile.write('\n
') else: # The post is a link post htmlFile.write('') htmlFile.write(fixUnicode(postObject.title)) htmlFile.write('\n
') if postAuthorExists: htmlFile.write('Posted by ') htmlFile.write(postAuthorName) htmlFile.write('. ') else: htmlFile.write('Posted by [Deleted].
') htmlFile.write('Posted at ') postDate = time.gmtime(postObject.created_utc) htmlFile.write(str(postDate.tm_hour) + ':') htmlFile.write(str(postDate.tm_min) + ' UTC on ') htmlFile.write(monthsList[postDate.tm_mon-1] + ' ') htmlFile.write(str(postDate.tm_mday) + ', ' + str(postDate.tm_year)) htmlFile.write('. ' + str(postObject.ups - postObject.downs)) if postObject.is_self: htmlFile.write(' Points. (self.' + postObject.subreddit.display_name) if postObject.is_self: htmlFile.write(')') else: htmlFile.write(' Subreddit)') htmlFile.write(' (Permalink)\n') if postObject.is_self: htmlFile.write('
\n') htmlFile.write(snudown.markdown(fixMarkdown(postObject.selftext))) htmlFile.write('
\n') else: htmlFile.write('
\n

\n') htmlFile.write(postObject.url) htmlFile.write('

\n
\n') htmlFile.write('
\n') for comment in postObject._comments: parseComment(comment, postAuthorName, postAuthorExists) htmlFile.write('
\n') htmlFile.write('') htmlFile.write('\n\n\n\n') #Done def parseComment(redditComment, postAuthorName, postAuthorExists, isRoot=True): commentAuthorName = '' commentAuthorExists = 0 try: commentAuthorName = fixUnicode(redditComment.author.name) commentAuthorExists = 1 except AttributeError: commentAuthorExists = 0 if isRoot: htmlFile.write('
\n') else: htmlFile.write('
\n') htmlFile.write('
\n') if commentAuthorExists: if postAuthorExists and postAuthorName == commentAuthorName: htmlFile.write('' + commentAuthorName + ' ') else: htmlFile.write('' + commentAuthorName + ' ') else: htmlFile.write('[Deleted] ') htmlFile.write(str(redditComment.ups - redditComment.downs)) htmlFile.write(' Points ') htmlFile.write('Posted at ') postDate = time.gmtime(redditComment.created_utc) htmlFile.write(str(postDate.tm_hour) + ':') htmlFile.write(str(postDate.tm_min) + ' UTC on ') htmlFile.write(monthsList[postDate.tm_mon-1] + ' ') htmlFile.write(str(postDate.tm_mday) + ', ' + str(postDate.tm_year)) htmlFile.write('
\n') htmlFile.write(snudown.markdown(fixMarkdown(redditComment.body))) for reply in redditComment._replies: parseComment(reply, postAuthorName, postAuthorExists, False) htmlFile.write('
\n') #Done def fixMarkdown(markdown): newMarkdown = markdown.encode('utf8') return re.sub('\>', '>', str(newMarkdown)) def fixUnicode(text): return str(text.encode('utf8')) # End Function Definitions r = praw.Reddit(user_agent='RedditPostArchiver Bot, version 0.93') # Disclaimer, storing plain text passwords is bad. # uncomment the following line to login (e.g., in case of Unable to Archive Post: # r.login('username', 'password') try: thePost = r.get_submission(submission_id=postID) htmlFile = open(outputFilePath,'w') parsePost(thePost) htmlFile.close() except HTTPError: print('Unable to Archive Post: Invalid PostID or Log In Required (see line 157 of script)') ##Done