123456789101112131415161718192021222324 |
- def download_images(start_url: str, filetypes: List[str]) -> None:
- """.."""
- base_url = get_base_url(start_url)
-
- soup = get_soup(start_url)
- if soup is not None:
- for index, image in enumerate(soup.select('img')):
-
- src_raw = str(image.get('src'))
- if src_raw.startswith('http'):
- image_url = src_raw
- elif src_raw.startswith('/'):
- image_url = base_url + src_raw
- else:
- image_url = src_raw
-
- for image_type in filter(lambda x: x in src_raw, filetypes):
- image_response = requests.get(image_url, stream=True)
- if image_response.status_code == 200:
- image_name = re.sub(r'.*/', '', src_raw).replace(R'.', '_')
-
- fp: BinaryIO = open(save_image_dir + '/' + image_name + str(index) + image_type, 'wb')
- fp.write(image_response.content)
- fp.close()
|