123456789101112131415161718192021222324 |
- def download_images(start_url: str, filetypes: List[str]) -> None:
- """.."""
- base_url = get_base_url(start_url)
- # print(start_url)
- soup = get_soup(start_url) # ;print(soup)
- if soup is not None:
- for index, image in enumerate(soup.select('img')): # print(image)
- # image_raw = str(image)
- src_raw = str(image.get('src')) # print(image.attrs['src'])
- if src_raw.startswith('http'):
- image_url = src_raw
- elif src_raw.startswith('/'):
- image_url = base_url + src_raw
- else:
- image_url = src_raw
- # print(image_url)
- for image_type in filter(lambda x: x in src_raw, filetypes): # print(image)
- image_response = requests.get(image_url, stream=True)
- if image_response.status_code == 200:
- image_name = re.sub(r'.*/', '', src_raw).replace(R'.', '_')
- # print(image_name, index)
- fp: BinaryIO = open(save_image_dir + '/' + image_name + str(index) + image_type, 'wb')
- fp.write(image_response.content)
- fp.close()
|