LiuFan
/
PrivacyScanData


			
				
					
						
						
							1234567891011121314151617181920212223242526272829
							def download_unknowns(url: str) -> None:
    """."""
    page_content: bytes = get_none_soup(url)
    page_string: bytes = page_content[0:100]
    """parse section of page bytes and use as name. If unknown encoding
    convert to number string (exclude first few bytes that state filetype) """
    try:
        page_unicode = page_string.decode("ISO-8859-1").replace(R'%', '_')
        page_parsed = [char for char in page_unicode if char.isalnum() or char == '_']
        unknown_file_name = "".join(page_parsed)[10:30]
    except UnicodeDecodeError:
        try:
            page_unicode = page_string.decode('utf-8').replace(R'%', '_')
            page_parsed = [char for char in page_unicode if char.isalnum() or char == '_']
            unknown_file_name = "".join(page_parsed)[10:30]
        except UnicodeDecodeError:
            unknown_file_name = "unk_"
            for char in page_content[10:30]:
                if char != b'\\':
                    unknown_file_name += str(char)
    print(unknown_file_name)
    """check beginning of page bytes for a filetype"""
    if b'%PDF' in page_string:  # ;
        extension = '.pdf'
    else:
        extension = '.unk.txt'

    with open(save_file_dir + '/' + unknown_file_name + extension, 'wb') as file:
        file.write(page_content)