init_archive.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import urllib
  2. import urllib2
  3. import json
  4. import ConfigParser
  5. import sys
  6. from sync_fb import Archiver
  7. #To be done-- To use Requests instead of urllib2
  8. def main():
  9. url_fb = []
  10. try:
  11. if (len(sys.argv)>1):
  12. init_config = ConfigParser.ConfigParser()
  13. init_config.read(sys.argv[1])
  14. fb_url = init_config.get('group','url')
  15. else:
  16. print ('Correct usage: Init_archive <path to default properties file>')
  17. return
  18. jsdata = urllib2.urlopen(fb_url).read()
  19. jsondata = json.loads(jsdata)
  20. while(fb_url is not None):
  21. jsdata = urllib2.urlopen(fb_url).read()
  22. jsondata = json.loads(jsdata)
  23. #paging attribute used to go to next page
  24. fb_url = jsondata.get('paging').get('next')
  25. url_fb.append(fb_url)
  26. except Exception as err:
  27. print (str(len(url_fb)) + " :Length of json pages of group! Sit back and relax its gonna take some time an hour or so " )
  28. # Archive post in reverse order so the new link post to db and kipppt first
  29. fb_url = url_fb.pop()
  30. jsdata = urllib2.urlopen(fb_url).read()
  31. jsondata = json.loads(jsdata)
  32. while(fb_url is not None):
  33. Archiver(sys.argv[1]).process_data(fb_url)
  34. jsdata = urllib2.urlopen(fb_url).read()
  35. jsondata = json.loads(jsdata)
  36. fb_url = url_fb.pop()
  37. if __name__ == "__main__":
  38. main()