website.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. """
  2. This module emits the content for your archive.
  3. It emits HTML, and YAML, mostly by calling
  4. into other modules.
  5. As of April 2021, the generated html pages can be hosted simply with `python -m
  6. http.server`.
  7. This module is probably the most likely module to be forked if
  8. you have unique requirements for how your archive should look.
  9. If you are interested in porting this system away from Python to your
  10. language of choice, this is probably the best place to start.
  11. """
  12. from pathlib import Path
  13. from distutils.dir_util import copy_tree
  14. import html
  15. from shutil import copyfile
  16. from .url import (
  17. sanitize_stream,
  18. sanitize,
  19. )
  20. from .files import (
  21. open_main_page,
  22. open_stream_topics_page,
  23. open_topic_messages_page,
  24. read_zulip_messages_for_topic,
  25. read_zulip_stream_info,
  26. )
  27. from .html import (
  28. format_message_html,
  29. last_updated_footer_html,
  30. topic_page_links_html,
  31. stream_list_page_html,
  32. topic_list_page_html,
  33. )
  34. from .url import (
  35. archive_stream_url,
  36. )
  37. def to_topic_page_head_html(title):
  38. return f'<html>\n<head><meta charset="utf-8"><title>{title}</title></head>\n'
  39. def build_website(
  40. json_root,
  41. md_root,
  42. site_url,
  43. html_root,
  44. title,
  45. zulip_url,
  46. zulip_icon_url,
  47. repo_root,
  48. page_head_html,
  49. page_footer_html,
  50. ):
  51. stream_info = read_zulip_stream_info(json_root)
  52. streams = stream_info["streams"]
  53. date_footer_html = last_updated_footer_html(stream_info)
  54. write_main_page(
  55. md_root,
  56. site_url,
  57. html_root,
  58. title,
  59. streams,
  60. date_footer_html,
  61. page_head_html,
  62. page_footer_html,
  63. )
  64. write_css(md_root)
  65. for stream_name in streams:
  66. print("building: ", stream_name)
  67. stream_data = streams[stream_name]
  68. topic_data = stream_data["topic_data"]
  69. write_stream_topics(
  70. md_root,
  71. site_url,
  72. html_root,
  73. title,
  74. stream_name,
  75. stream_data,
  76. date_footer_html,
  77. page_head_html,
  78. page_footer_html,
  79. )
  80. for topic_name in topic_data:
  81. write_topic_messages(
  82. json_root,
  83. md_root,
  84. site_url,
  85. html_root,
  86. title,
  87. zulip_url,
  88. zulip_icon_url,
  89. stream_name,
  90. streams[stream_name],
  91. topic_name,
  92. date_footer_html,
  93. page_head_html,
  94. page_footer_html,
  95. )
  96. # Copy the entire content of <repo_root>/assets into md_root.
  97. # We use copy_tree from distutils instead of shutil.copytree so that it
  98. # doesn't raise an error when assets/ already exists inside the md_root.
  99. copy_tree(str(Path(repo_root) / "assets"), str(Path(md_root) / "assets"))
  100. # Copy .nojekyll into md_root as well.
  101. copyfile(str(Path(repo_root) / ".nojekyll"), str(Path(md_root) / ".nojekyll"))
  102. # writes the index page listing all streams.
  103. # `streams`: a dict mapping stream names to stream json objects as described in the header.
  104. def write_main_page(
  105. md_root,
  106. site_url,
  107. html_root,
  108. title,
  109. streams,
  110. date_footer_html,
  111. page_head_html,
  112. page_footer_html,
  113. ):
  114. """
  115. The main page in our website lists streams:
  116. Streams:
  117. general (70 topics)
  118. announce (42 topics)
  119. """
  120. outfile = open_main_page(md_root)
  121. content_html = stream_list_page_html(streams)
  122. outfile.write(page_head_html)
  123. outfile.write(content_html)
  124. outfile.write(date_footer_html)
  125. outfile.write(page_footer_html)
  126. outfile.close()
  127. def write_stream_topics(
  128. md_root,
  129. site_url,
  130. html_root,
  131. title,
  132. stream_name,
  133. stream,
  134. date_footer_html,
  135. page_head_html,
  136. page_footer_html,
  137. ):
  138. """
  139. A stream page lists all topics for the stream:
  140. Stream: social
  141. Topics:
  142. lunch (4 messages)
  143. happy hour (1 message)
  144. """
  145. sanitized_stream_name = sanitize_stream(stream_name, stream["id"])
  146. outfile = open_stream_topics_page(md_root, sanitized_stream_name)
  147. stream_url = archive_stream_url(site_url, html_root, sanitized_stream_name)
  148. topic_data = stream["topic_data"]
  149. content_html = topic_list_page_html(stream_name, stream_url, topic_data)
  150. outfile.write(page_head_html)
  151. outfile.write(content_html)
  152. outfile.write(date_footer_html)
  153. outfile.write(page_footer_html)
  154. outfile.close()
  155. def write_topic_messages(
  156. json_root,
  157. md_root,
  158. site_url,
  159. html_root,
  160. title,
  161. zulip_url,
  162. zulip_icon_url,
  163. stream_name,
  164. stream,
  165. topic_name,
  166. date_footer_html,
  167. page_head_html,
  168. page_footer_html,
  169. ):
  170. """
  171. Writes the topics page, which lists all messages
  172. for one particular topic within a stream:
  173. Stream: social
  174. Topic: lunch
  175. Alice:
  176. I want pizza!
  177. Bob:
  178. No, let's get tacos!
  179. """
  180. stream_id = stream["id"]
  181. sanitized_stream_name = sanitize_stream(stream_name, stream_id)
  182. sanitized_topic_name = sanitize(topic_name)
  183. messages = read_zulip_messages_for_topic(
  184. json_root, sanitized_stream_name, sanitized_topic_name
  185. )
  186. outfile = open_topic_messages_page(
  187. md_root,
  188. sanitized_stream_name,
  189. sanitized_topic_name,
  190. )
  191. topic_links = topic_page_links_html(
  192. site_url,
  193. html_root,
  194. zulip_url,
  195. sanitized_stream_name,
  196. sanitized_topic_name,
  197. stream_name,
  198. topic_name,
  199. )
  200. # We use a topic-specific title instead of `page_head_html` to improve
  201. # search engine indexing.
  202. outfile.write(
  203. to_topic_page_head_html(
  204. html.escape(topic_name) + " · " + html.escape(stream_name) + " · " + title
  205. )
  206. )
  207. outfile.write(topic_links)
  208. outfile.write(
  209. f'\n<head><link href="{html.escape(site_url)}/style.css" rel="stylesheet"></head>\n'
  210. )
  211. for msg in messages:
  212. msg_html = format_message_html(
  213. site_url,
  214. html_root,
  215. zulip_url,
  216. zulip_icon_url,
  217. stream_name,
  218. stream_id,
  219. topic_name,
  220. msg,
  221. )
  222. outfile.write(msg_html)
  223. outfile.write("\n\n")
  224. outfile.write(date_footer_html)
  225. outfile.write(page_footer_html)
  226. outfile.close()
  227. def write_css(md_root):
  228. copyfile("style.css", md_root / "style.css")