1234567891011121314151617 |
- def _parse_url_parts(self, tld_extractor: TLDExtract, url_str: str) -> dict:
- url = tld_extractor(url_str)
- parts = {}
- parts["scheme"] = self._find_first(r"^([a-z0-9]+)\:\/\/", url_str)
- parts["auth"] = self._find_first(r"(?:.*\/\/|^)(.*:.*)@.*", url_str)
- parts["domain"] = url.domain
- parts["subdomain"] = url.subdomain
- parts["suffix"] = url.suffix
- url_list = ".".join(list(url))
- parts["path"] = self._find_first(
- rf"(?:^[a-z0-9]+\:\/\/)?{url_list}(?:\:\d+)?([^#^\?]*).*", url_str
- )
- parts["query"] = self._find_first(r".*(\?\w+=[a-zA-Z0-9](?:&\w+=[a-zA-Z0-9]+)*).*", url_str)
- parts["fragment"] = self._find_first(r".*#(.*)", url_str)
- return parts
|