utils_24.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. def parsing_sravni_ru(soup):
  2. names = soup.find_all('span', class_='_106rrj0') # scraping names
  3. # scraping age childrens
  4. age_divs = soup.find_all('div', {'style': 'grid-area:firstCell-1', 'class': '_pjql8'})
  5. ages = []
  6. for i in age_divs:
  7. age_span = i.find('span')
  8. ages.append(age_span)
  9. # scraping course duration
  10. duration_divs = soup.find_all('div', {'style': 'grid-area:secondCell-1', 'class': '_pjql8'})
  11. durations = []
  12. for i in duration_divs:
  13. duration_span = i.find('span')
  14. durations.append(duration_span)
  15. # scraping price
  16. prices = soup.find_all('span', class_='_e9qrci _k8dl2y')
  17. items = []
  18. for (n, l, i, p) in zip(names, ages, durations, prices):
  19. name = n.text.strip()
  20. age = l.text.strip()
  21. duration = i.text.strip()
  22. price = p.text.strip().replace('\xa0', '')
  23. items.append(
  24. {
  25. 'name': name,
  26. 'age': age,
  27. 'duration': duration,
  28. 'price': price,
  29. }
  30. )
  31. # save json file
  32. with open("./data/items.json", "w", encoding="utf-8") as f:
  33. json.dump(items, f, indent=4, ensure_ascii=False)
  34. with open("./data/items.csv", 'a', encoding="utf-8") as file:
  35. for i in items:
  36. writer = csv.writer(file)
  37. writer.writerow(
  38. (
  39. i['name'],
  40. i['age'],
  41. i['duration'],
  42. i['price']
  43. )
  44. )