archive_group_2.py 1.4 KB

123456789101112131415161718192021222324252627282930
  1. def archive_message(groupName, msgNumber, depth=0):
  2. global failed
  3. failed = False
  4. s = requests.Session()
  5. resp = s.get('https://groups.yahoo.com/api/v1/groups/' + groupName + '/messages/' + str(msgNumber) + '/raw', cookies={'T': cookie_T, 'Y': cookie_Y})
  6. if resp.status_code != 200:
  7. #some other problem, perhaps being refused access by Yahoo?
  8. #retry for a max of 3 times anyway
  9. if depth < 3:
  10. print ("Cannot get message " + str(msgNumber) + ", attempt " + str(depth+1) + " of 3 due to HTTP status code " + str(resp.status_code))
  11. time.sleep(0.1)
  12. archive_message(groupName,msgNumber,depth+1)
  13. else:
  14. if resp.status_code == 500:
  15. #we are most likely being blocked by Yahoo
  16. log("Archive halted - it appears Yahoo has blocked you.", groupName)
  17. log("Check if you can access the group's homepage from your browser. If you can't, you have been blocked.", groupName)
  18. log("Don't worry, in a few hours (normally less than 3) you'll be unblocked and you can run this script again - it'll continue where you left off." ,groupName)
  19. sys.exit()
  20. log("Failed to retrive message " + str(msgNumber) + " due to HTTP status code " + str(resp.status_code), groupName )
  21. failed = True
  22. if failed == True:
  23. return False
  24. msgJson = resp.text
  25. writeFile = open((groupName + "/" + str(msgNumber) + ".json"), "wb")
  26. writeFile.write(msgJson.encode('utf-8'))
  27. writeFile.close()
  28. return True