sitemap.py 631 B

12345678910111213141516
  1. from glob import iglob
  2. from typing import Iterator
  3. from xml_sitemap_writer import XMLSitemap
  4. def build_sitemap(base_url: str, archive_dir_path: str, sitemap_write_dir_path: str):
  5. def iterate_html_files() -> Iterator[str]:
  6. # Iterator yields relative path like
  7. # archive/stream/10-errors/topic/laptop.html
  8. # TODO: Investigate when running in windows
  9. # TODO: Must ensure that the relative URLs are valid
  10. return iglob("**/*.html", root_dir=archive_dir_path, recursive=True)
  11. with XMLSitemap(sitemap_write_dir_path, base_url) as sitemap:
  12. sitemap.add_urls(iterate_html_files())