utils_37.py 1.2 KB

123456789101112131415161718192021222324
  1. def download_images(start_url: str, filetypes: List[str]) -> None:
  2. """.."""
  3. base_url = get_base_url(start_url)
  4. # print(start_url)
  5. soup = get_soup(start_url) # ;print(soup)
  6. if soup is not None:
  7. for index, image in enumerate(soup.select('img')): # print(image)
  8. # image_raw = str(image)
  9. src_raw = str(image.get('src')) # print(image.attrs['src'])
  10. if src_raw.startswith('http'):
  11. image_url = src_raw
  12. elif src_raw.startswith('/'):
  13. image_url = base_url + src_raw
  14. else:
  15. image_url = src_raw
  16. # print(image_url)
  17. for image_type in filter(lambda x: x in src_raw, filetypes): # print(image)
  18. image_response = requests.get(image_url, stream=True)
  19. if image_response.status_code == 200:
  20. image_name = re.sub(r'.*/', '', src_raw).replace(R'.', '_')
  21. # print(image_name, index)
  22. fp: BinaryIO = open(save_image_dir + '/' + image_name + str(index) + image_type, 'wb')
  23. fp.write(image_response.content)
  24. fp.close()