123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- import requests
- from bs4 import BeautifulSoup as bs
- import os
- from azure.storage.blob import BlobServiceClient, BlobClient
- from azure.storage.blob import ContentSettings, ContainerClient
- MY_CONNECTION_STRING = "DefaultEndpointsProtocol************************"
- MY_IMAGE_CONTAINER = "picture"
- LOCAL_IMAGE_PATH = "..\Picture"
- URL = 'WebSiteURL'
- class AzureBlobStorage:
- def Scrapp(self):
-
- if not os.path.exists('.\Picture'):
- os.mkdir('.\Picture')
- os.chdir('.\Picture')
-
- page_begin = 1
-
- page_end = 230 + 1
-
- for page in range(page_begin, page_end):
- req = requests.get(URL + str(page))
- soup = bs(req.text, 'html.parser')
- images = soup.find_all('img')
- for images in images:
- name = images['src']
- alpha = images['src']
- link = 'WebSiteURL' + alpha
- print(link)
-
- with open(name.replace(' ', '-').replace('/', '').replace('"', "'").replace('.jpg','') + '.jpg','wb') as f:
- im = requests.get(link)
- f.write(im.content)
-
- print('Writing: ', name)
- def __init__(self):
-
- self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
- def upload_all_images_in_folder(self):
-
- all_file_names = [f for f in os.listdir(LOCAL_IMAGE_PATH)
- if os.path.isfile(os.path.join(LOCAL_IMAGE_PATH, f)) and ".jpg" in f]
-
- for file_name in all_file_names:
- self.upload_image(file_name)
- def upload_image(self, file_name):
-
- blob_client = self.blob_service_client.get_blob_client(container=MY_IMAGE_CONTAINER,
- blob=file_name)
-
- upload_file_path = os.path.join(LOCAL_IMAGE_PATH, file_name)
-
-
- image_content_setting = ContentSettings(content_type='image/jpeg')
- print(f"uploading file - {file_name}")
- with open(upload_file_path, "rb") as data:
- blob_client.upload_blob(data, overwrite=True, content_settings=image_content_setting)
- def upload_all_images_in_folder(self):
-
- all_file_names = [f for f in os.listdir(LOCAL_IMAGE_PATH)
- if os.path.isfile(os.path.join(LOCAL_IMAGE_PATH, f)) and ".jpg" in f]
-
- for file_name in all_file_names:
- self.upload_image(file_name)
- if __name__=='__main__':
-
-
- azure_blob_file_uploader = AzureBlobStorage()
- azure_blob_file_uploader.Scrapp()
- azure_blob_file_uploader.upload_all_images_in_folder()
|