pre2table.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import sys
  2. from typing import List
  3. from bs4 import BeautifulSoup, Tag
  4. from requests import (
  5. get,
  6. Response
  7. )
  8. def main():
  9. if len(sys.argv) != 2:
  10. raise ValueError("Pass URL as script parameter")
  11. url: str = sys.argv[1]
  12. if not url.startswith("http"):
  13. raise ValueError("Pass valid URL as script parameter")
  14. if not url.endswith("/"):
  15. url = f"{url}/"
  16. report_filename: str = f'{url.rstrip("/").split(sep="/")[-1]}.html'
  17. response: Response = get(url=url)
  18. if response.status_code == 200:
  19. soup: BeautifulSoup = BeautifulSoup(markup=response.text, features="html.parser")
  20. pre_tag: Tag = soup.find(name="pre")
  21. all_a_tags: List[Tag] = soup.find_all(name="a")
  22. all_hrefs: List[str] = [a_tag.attrs.get("href") for a_tag in all_a_tags]
  23. dates_and_sizes: List[Tag] = [a_tag.next_sibling for a_tag in all_a_tags]
  24. new_dates_and_sizes: List[dict] = [{"date": None, "time": None, "size": None}]
  25. for ds in dates_and_sizes:
  26. ds = str(ds).strip()
  27. if ds:
  28. date = ds.split()[0]
  29. time = ds.split()[1]
  30. size = ds.split()[-1]
  31. new_dates_and_sizes.append(
  32. {"date": date, "time": time, "size": size}
  33. )
  34. all_full_hrefs = [f"{url}{href}" for href in all_hrefs]
  35. table: Tag = soup.new_tag(name="table", style="font-family: monospace")
  36. table_header: Tag = soup.new_tag(name="thead")
  37. table_header_row: Tag = soup.new_tag(name="tr")
  38. td: Tag = soup.new_tag(name="td")
  39. td.string = "Filename"
  40. table_header_row.append(tag=td)
  41. td: Tag = soup.new_tag(name="td")
  42. td.string = "Date/Time"
  43. table_header_row.append(tag=td)
  44. td: Tag = soup.new_tag(name="td")
  45. td.string = "Size"
  46. table_header_row.append(tag=td)
  47. table_header.append(tag=table_header_row)
  48. table_body: Tag = soup.new_tag(name="tbody")
  49. for href, full_href, date_size in zip(all_hrefs, all_full_hrefs, new_dates_and_sizes):
  50. table_row: Tag = soup.new_tag(name="tr")
  51. td: Tag = soup.new_tag(name="td")
  52. a: Tag = soup.new_tag(name="a", href=full_href)
  53. a.string = href
  54. td.append(tag=a)
  55. table_row.append(tag=td)
  56. date_time: str = f'{date_size.get("date")} {date_size.get("time")}'
  57. if "None" in date_time:
  58. date_time = ""
  59. td: Tag = soup.new_tag(name="td")
  60. td.string = date_time
  61. table_row.append(tag=td)
  62. size: str = f'{date_size.get("size")}'
  63. if "None" in size:
  64. size = ""
  65. td: Tag = soup.new_tag(name="td")
  66. td.string = size
  67. table_row.append(tag=td)
  68. table_body.append(table_row)
  69. table.append(tag=table_header)
  70. table.append(tag=table_body)
  71. pre_tag.insert_after(table)
  72. pre_tag.decompose()
  73. with open(file=report_filename, mode="w", encoding="utf-8") as f:
  74. s = str(soup)
  75. f.write(s)
  76. #
  77. if __name__ == '__main__':
  78. main()
  79. #