index.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. import datetime
  2. import os
  3. from . import common
  4. from . import exceptions
  5. from . import tsdb
  6. LINE_FORMAT_TXT = '''
  7. {timestamp}: [{title}]({link}) - /u/{author} (+{score})
  8. '''.replace('\n', '')
  9. LINE_FORMAT_HTML = '''
  10. <div>{timestamp}: <a href="{link}">[{flairtext}] {title}</a> - <a href="{authorlink}">{author}</a> (+{score})</div>
  11. '''.replace('\n', '')
  12. TIMESTAMP_FORMAT = '%Y %b %d'
  13. # The time format.
  14. # "%Y %b %d" = "2016 August 10"
  15. # See http://strftime.org/
  16. HTML_HEADER = '''
  17. <html>
  18. <head>
  19. <meta charset="UTF-8">
  20. <style>
  21. *
  22. {
  23. font-family: Consolas;
  24. }
  25. </style>
  26. </head>
  27. <body>
  28. '''
  29. HTML_FOOTER = '''
  30. </body>
  31. </html>
  32. '''
  33. def index(
  34. subreddit=None,
  35. username=None,
  36. do_all=False,
  37. do_date=False,
  38. do_title=False,
  39. do_score=False,
  40. do_author=False,
  41. do_subreddit=False,
  42. do_flair=False,
  43. html=False,
  44. offline=False,
  45. score_threshold=0,
  46. ):
  47. if not common.is_xor(subreddit, username):
  48. raise exceptions.NotExclusive(['subreddit', 'username'])
  49. if subreddit:
  50. database = tsdb.TSDB.for_subreddit(subreddit, do_create=False)
  51. else:
  52. database = tsdb.TSDB.for_user(username, do_create=False)
  53. kwargs = {'html': html, 'offline': offline, 'score_threshold': score_threshold}
  54. wrote = None
  55. if do_all or do_date:
  56. print('Writing time file')
  57. wrote = index_worker(database, suffix='_date', orderby='created ASC', **kwargs)
  58. if do_all or do_title:
  59. print('Writing title file')
  60. wrote = index_worker(database, suffix='_title', orderby='title ASC', **kwargs)
  61. if do_all or do_score:
  62. print('Writing score file')
  63. wrote = index_worker(database, suffix='_score', orderby='score DESC', **kwargs)
  64. if not username and (do_all or do_author):
  65. print('Writing author file')
  66. wrote = index_worker(database, suffix='_author', orderby='author ASC', **kwargs)
  67. if username and (do_all or do_subreddit):
  68. print('Writing subreddit file')
  69. wrote = index_worker(database, suffix='_subreddit', orderby='subreddit ASC', **kwargs)
  70. if do_all or do_flair:
  71. print('Writing flair file')
  72. # Items with flair come before items without. Each group is sorted by time separately.
  73. orderby = 'flair_text IS NULL ASC, created ASC'
  74. wrote = index_worker(database, suffix='_flair', orderby=orderby, **kwargs)
  75. if not wrote:
  76. raise Exception('No sorts selected! Read the docstring')
  77. print('Done.')
  78. def index_worker(
  79. database,
  80. suffix,
  81. orderby,
  82. score_threshold=0,
  83. html=False,
  84. offline=False,
  85. ):
  86. cur = database.sql.cursor()
  87. statement = 'SELECT * FROM submissions WHERE score >= {threshold} ORDER BY {order}'
  88. statement = statement.format(threshold=score_threshold, order=orderby)
  89. cur.execute(statement)
  90. database.index_dir.makedirs(exist_ok=True)
  91. extension = '.html' if html else '.txt'
  92. mash_basename = database.filepath.replace_extension('').basename
  93. mash_basename += suffix + extension
  94. mash_filepath = database.index_dir.with_child(mash_basename)
  95. mash_handle = mash_filepath.open('w', encoding='UTF-8')
  96. if html:
  97. mash_handle.write(HTML_HEADER)
  98. line_format = LINE_FORMAT_HTML
  99. else:
  100. line_format = LINE_FORMAT_TXT
  101. do_timestamp = '{timestamp}' in line_format
  102. for submission in common.fetchgenerator(cur):
  103. submission = tsdb.DBEntry(submission)
  104. if do_timestamp:
  105. timestamp = int(submission.created)
  106. timestamp = datetime.datetime.utcfromtimestamp(timestamp)
  107. timestamp = timestamp.strftime(TIMESTAMP_FORMAT)
  108. else:
  109. timestamp = ''
  110. if offline:
  111. link = f'../offline_reading/{submission.idstr}.html'
  112. else:
  113. link = f'https://redd.it/{submission.idstr[3:]}'
  114. author = submission.author
  115. if author.lower() == '[deleted]':
  116. author_link = '#'
  117. else:
  118. author_link = 'https://reddit.com/u/%s' % author
  119. line = line_format.format(
  120. author=author,
  121. authorlink=author_link,
  122. flaircss=submission.flair_css_class or '',
  123. flairtext=submission.flair_text or '',
  124. id=submission.idstr,
  125. numcomments=submission.num_comments,
  126. score=submission.score,
  127. link=link,
  128. subreddit=submission.subreddit,
  129. timestamp=timestamp,
  130. title=submission.title.replace('\n', ' '),
  131. url=submission.url or link,
  132. )
  133. line += '\n'
  134. mash_handle.write(line)
  135. if html:
  136. mash_handle.write(HTML_FOOTER)
  137. mash_handle.close()
  138. print('Wrote', mash_filepath.relative_path)
  139. return mash_filepath
  140. def index_argparse(args):
  141. return index(
  142. subreddit=args.subreddit,
  143. username=args.username,
  144. do_all=args.do_all,
  145. do_date=args.do_date,
  146. do_title=args.do_title,
  147. do_score=args.do_score,
  148. do_author=args.do_author,
  149. do_subreddit=args.do_subreddit,
  150. do_flair=args.do_flair,
  151. html=args.html,
  152. offline=args.offline,
  153. score_threshold=common.int_none(args.score_threshold),
  154. )