archive.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. #!/usr/bin/env python3
  2. """
  3. This is the main program for the Zulip archive system. For help:
  4. python archive.py -h
  5. Note that this actual file mostly does the following:
  6. parse command line arguments
  7. check some settings from settings.py
  8. complain if you haven't made certain directories
  9. The actual work is done in two main libraries:
  10. lib/html.py
  11. lib/populate.py
  12. """
  13. # The workflow (timing for the leanprover Zulip chat, on my slow laptop):
  14. # - populate_all() builds a json file in `settings.json_directory` for each topic,
  15. # containing message data and an index json file mapping streams to their topics.
  16. # This uses the Zulip API and takes ~10 minutes to crawl the whole chat.
  17. # - populate_incremental() assumes there is already a json cache and collects only new messages.
  18. # - build_website() builds the webstie
  19. # - See hosting.md for suggestions on hosting.
  20. #
  21. import sys
  22. if sys.version_info < (3, 6):
  23. version_error = " Python version must be 3.6 or higher\n\
  24. Your current version of python is {}.{}\n\
  25. Please try again with python3.".format(
  26. sys.version_info.major, sys.version_info.minor
  27. )
  28. raise Exception(version_error)
  29. import argparse
  30. import configparser
  31. import os
  32. import zulip
  33. from lib.common import stream_validator, exit_immediately
  34. # Most of the heavy lifting is done by the following modules:
  35. from lib.populate import populate_all, populate_incremental
  36. from lib.website import build_website
  37. from lib.sitemap import build_sitemap
  38. try:
  39. import settings
  40. except ModuleNotFoundError:
  41. # TODO: Add better instructions.
  42. exit_immediately(
  43. """
  44. We can't find settings.py.
  45. Please copy default_settings.py to settings.py
  46. and then edit the settings.py file to fit your use case.
  47. For testing, you can often leave the default settings,
  48. but you will still want to review them first.
  49. """
  50. )
  51. NO_JSON_DIR_ERROR_WRITE = """
  52. We cannot find a place to write JSON files.
  53. Please run the below command:
  54. mkdir {}"""
  55. NO_JSON_DIR_ERROR_READ = """
  56. We cannot find a place to read JSON files.
  57. Please run the below command:
  58. mkdir {}
  59. And then fetch the JSON:
  60. python archive.py -t"""
  61. NO_HTML_DIR_ERROR = """
  62. We cannot find a place to write HTML files.
  63. Please run the below command:
  64. mkdir {}"""
  65. def get_json_directory(for_writing):
  66. json_dir = settings.json_directory
  67. if not json_dir.exists():
  68. # I use posix paths here, since even on Windows folks will
  69. # probably be using some kinda Unix-y shell to run mkdir.
  70. if for_writing:
  71. error_msg = NO_JSON_DIR_ERROR_WRITE.format(json_dir.as_posix())
  72. else:
  73. error_msg = NO_JSON_DIR_ERROR_READ.format(json_dir.as_posix())
  74. exit_immediately(error_msg)
  75. if not json_dir.is_dir():
  76. exit_immediately(str(json_dir) + " needs to be a directory")
  77. return settings.json_directory
  78. def get_html_directory():
  79. html_dir = settings.html_directory
  80. if not html_dir.exists():
  81. error_msg = NO_HTML_DIR_ERROR.format(html_dir.as_posix())
  82. exit_immediately(error_msg)
  83. if not html_dir.is_dir():
  84. exit_immediately(str(html_dir) + " needs to be a directory")
  85. return settings.html_directory
  86. def get_client_info():
  87. config_file = "./zuliprc"
  88. client = zulip.Client(config_file=config_file)
  89. # It would be convenient if the Zulip client object
  90. # had a `site` field, but instead I just re-read the file
  91. # directly to get it.
  92. config = configparser.RawConfigParser()
  93. config.read(config_file)
  94. zulip_url = config.get("api", "site")
  95. return client, zulip_url
  96. def run():
  97. parser = argparse.ArgumentParser(
  98. description="Build an html archive of the Zulip chat."
  99. )
  100. parser.add_argument(
  101. "-b", action="store_true", default=False, help="Build .md files"
  102. )
  103. parser.add_argument(
  104. "--no-sitemap",
  105. action="store_true",
  106. default=False,
  107. help="Don't build sitemap files",
  108. )
  109. parser.add_argument(
  110. "-t", action="store_true", default=False, help="Make a clean json archive"
  111. )
  112. parser.add_argument(
  113. "-i",
  114. action="store_true",
  115. default=False,
  116. help="Incrementally update the json archive",
  117. )
  118. results = parser.parse_args()
  119. if results.t and results.i:
  120. print("Cannot perform both a total and incremental update. Use -t or -i.")
  121. exit(1)
  122. if not (results.t or results.i or results.b):
  123. print("\nERROR!\n\nYou have not specified any work to do.\n")
  124. parser.print_help()
  125. exit(1)
  126. json_root = get_json_directory(for_writing=results.t)
  127. # The directory where this archive.py is located
  128. repo_root = os.path.dirname(os.path.realpath(__file__))
  129. if results.b:
  130. md_root = get_html_directory()
  131. if results.t or results.i:
  132. is_valid_stream_name = stream_validator(settings)
  133. client, zulip_url = get_client_info()
  134. if results.t:
  135. populate_all(
  136. client,
  137. json_root,
  138. is_valid_stream_name,
  139. )
  140. elif results.i:
  141. populate_incremental(
  142. client,
  143. json_root,
  144. is_valid_stream_name,
  145. )
  146. if results.b:
  147. build_website(
  148. json_root,
  149. md_root,
  150. settings.site_url,
  151. settings.html_root,
  152. settings.title,
  153. zulip_url,
  154. settings.zulip_icon_url,
  155. repo_root,
  156. settings.page_head_html,
  157. settings.page_footer_html,
  158. )
  159. if not results.no_sitemap:
  160. build_sitemap(settings.site_url, md_root.as_posix(), md_root.as_posix())
  161. if __name__ == "__main__":
  162. run()