main_14.py 705 B

1234567891011121314151617
  1. def _parse_url_parts(self, tld_extractor: TLDExtract, url_str: str) -> dict:
  2. url = tld_extractor(url_str)
  3. parts = {}
  4. parts["scheme"] = self._find_first(r"^([a-z0-9]+)\:\/\/", url_str)
  5. parts["auth"] = self._find_first(r"(?:.*\/\/|^)(.*:.*)@.*", url_str)
  6. parts["domain"] = url.domain
  7. parts["subdomain"] = url.subdomain
  8. parts["suffix"] = url.suffix
  9. url_list = ".".join(list(url))
  10. parts["path"] = self._find_first(
  11. rf"(?:^[a-z0-9]+\:\/\/)?{url_list}(?:\:\d+)?([^#^\?]*).*", url_str
  12. )
  13. parts["query"] = self._find_first(r".*(\?\w+=[a-zA-Z0-9](?:&\w+=[a-zA-Z0-9]+)*).*", url_str)
  14. parts["fragment"] = self._find_first(r".*#(.*)", url_str)
  15. return parts