fetch.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. import warnings
  2. warnings.simplefilter('ignore', DeprecationWarning)
  3. import httplib2, urllib, time
  4. try:
  5. import json
  6. except ImportError:
  7. import simplejson as json
  8. from config import USERNAME, PASSWORD
  9. USER_TIMELINE = "http://twitter.com/statuses/user_timeline.json"
  10. FILE = "my_tweets.json"
  11. h = httplib2.Http()
  12. h.add_credentials(USERNAME, PASSWORD, 'twitter.com')
  13. def load_all():
  14. try:
  15. return json.load(open(FILE))
  16. except IOError:
  17. return []
  18. def fetch_and_save_new_tweets():
  19. tweets = load_all()
  20. old_tweet_ids = set(t['id'] for t in tweets)
  21. if tweets:
  22. since_id = max(t['id'] for t in tweets)
  23. else:
  24. since_id = None
  25. new_tweets = fetch_all(since_id)
  26. num_new_saved = 0
  27. for tweet in new_tweets:
  28. if tweet['id'] not in old_tweet_ids:
  29. tweets.append(tweet)
  30. num_new_saved += 1
  31. tweets.sort(key = lambda t: t['id'], reverse=True)
  32. # Delete the 'user' key
  33. for t in tweets:
  34. if 'user' in t:
  35. del t['user']
  36. # Save back to disk
  37. json.dump(tweets, open(FILE, 'w'), indent = 2)
  38. print ("Saved %s new tweets") % num_new_saved
  39. def fetch_all(since_id = None):
  40. all_tweets = []
  41. seen_ids = set()
  42. page = 0
  43. args = {'count': 200}
  44. if since_id is not None:
  45. args['since_id'] = since_id
  46. all_tweets_len = len(all_tweets)
  47. while True:
  48. args['page'] = page
  49. headers, body = h.request(
  50. USER_TIMELINE + '?' + urllib.urlencode(args), method='GET'
  51. )
  52. page += 1
  53. tweets = json.loads(body)
  54. if 'error' in tweets:
  55. raise (ValueError, tweets)
  56. if not tweets:
  57. break
  58. for tweet in tweets:
  59. if tweet['id'] not in seen_ids:
  60. seen_ids.add(tweet['id'])
  61. all_tweets.append(tweet)
  62. #print "Fetched another %s" % (len(all_tweets) - all_tweets_len)
  63. all_tweets_len = len(all_tweets)
  64. time.sleep(2)
  65. all_tweets.sort(key = lambda t: t['id'], reverse=True)
  66. return all_tweets
  67. if __name__ == '__main__':
  68. fetch_and_save_new_tweets()