123456789101112131415161718192021222324252627282930313233343536373839 |
- def get_recipes():
- recipies = []
- salad_url = 'https://www.allrecipes.com/recipes/96/salad/'
- url = 'https://www.allrecipes.com/recipes/96/salad/'
- print('Accessing list')
- try:
- r = requests.get(url, headers=headers)
- if r.status_code == 200:
- html = r.text
- soup = BeautifulSoup(html, 'lxml')
- links = soup.select('.fixed-recipe-card__h3 a')
- idx = 0
- for link in links:
- sleep(2)
- recipe = fetch_raw(link['href'])
- recipies.append(recipe)
- idx += 1
- except Exception as ex:
- print('Exception in get_recipes')
- print(str(ex))
- finally:
- return recipies
- if __name__ == '__main__':
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
- 'Pragma': 'no-cache'
- }
- all_recipes = get_recipes()
- if len(all_recipes) > 0:
- kafka_producer = connect_kafka_producer()
- for recipe in all_recipes:
- publish_message(kafka_producer, 'raw_recipes', 'raw', recipe.strip())
- if kafka_producer is not None:
- kafka_producer.close()
|