producer-raw-recipies_4.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. def get_recipes():
  2. recipies = []
  3. salad_url = 'https://www.allrecipes.com/recipes/96/salad/'
  4. url = 'https://www.allrecipes.com/recipes/96/salad/'
  5. print('Accessing list')
  6. try:
  7. r = requests.get(url, headers=headers)
  8. if r.status_code == 200:
  9. html = r.text
  10. soup = BeautifulSoup(html, 'lxml')
  11. links = soup.select('.fixed-recipe-card__h3 a')
  12. idx = 0
  13. for link in links:
  14. sleep(2)
  15. recipe = fetch_raw(link['href'])
  16. recipies.append(recipe)
  17. idx += 1
  18. except Exception as ex:
  19. print('Exception in get_recipes')
  20. print(str(ex))
  21. finally:
  22. return recipies
  23. if __name__ == '__main__':
  24. headers = {
  25. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
  26. 'Pragma': 'no-cache'
  27. }
  28. all_recipes = get_recipes()
  29. if len(all_recipes) > 0:
  30. kafka_producer = connect_kafka_producer()
  31. for recipe in all_recipes:
  32. publish_message(kafka_producer, 'raw_recipes', 'raw', recipe.strip())
  33. if kafka_producer is not None:
  34. kafka_producer.close()