def download_unknowns(url: str) -> None: """.""" page_content: bytes = get_none_soup(url) page_string: bytes = page_content[0:100] """parse section of page bytes and use as name. If unknown encoding convert to number string (exclude first few bytes that state filetype) """ try: page_unicode = page_string.decode("ISO-8859-1").replace(R'%', '_') page_parsed = [char for char in page_unicode if char.isalnum() or char == '_'] unknown_file_name = "".join(page_parsed)[10:30] except UnicodeDecodeError: try: page_unicode = page_string.decode('utf-8').replace(R'%', '_') page_parsed = [char for char in page_unicode if char.isalnum() or char == '_'] unknown_file_name = "".join(page_parsed)[10:30] except UnicodeDecodeError: unknown_file_name = "unk_" for char in page_content[10:30]: if char != b'\\': unknown_file_name += str(char) print(unknown_file_name) """check beginning of page bytes for a filetype""" if b'%PDF' in page_string: # ; extension = '.pdf' else: extension = '.unk.txt' with open(save_file_dir + '/' + unknown_file_name + extension, 'wb') as file: file.write(page_content)