undrain_recovery.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. '''Put the working set from the export script back into the database.'''
  2. import gzip
  3. import logging
  4. import pickle
  5. from terroroftinytown.tracker.bootstrap import Bootstrap
  6. from terroroftinytown.tracker.model import new_session, Result
  7. from sqlalchemy.sql.expression import insert
  8. logger = logging.getLogger(__name__)
  9. class UndrainBootstrap(Bootstrap):
  10. def start(self, args=None):
  11. super().start(args=args)
  12. logging.basicConfig(level=logging.INFO)
  13. self.recover()
  14. def setup_args(self):
  15. super().setup_args()
  16. self.arg_parser.add_argument('working_set_file')
  17. def recover(self):
  18. logger.info('Recovering from %s', self.args.working_set_file)
  19. with gzip.open(self.args.working_set_file, 'rb') as file, \
  20. new_session() as session:
  21. query = insert(Result)
  22. values = []
  23. line_num = 0
  24. while True:
  25. doc = pickle.load(file)
  26. if doc == 'eof':
  27. break
  28. values.append({
  29. 'project_id': doc['project_id'],
  30. 'shortcode': doc['shortcode'],
  31. 'url': doc['url'],
  32. 'encoding': doc['encoding'],
  33. 'datetime': doc['datetime'],
  34. })
  35. if line_num % 10000 == 0:
  36. logger.info('Recover progress: %d', line_num)
  37. session.execute(query, values)
  38. session.commit()
  39. values = []
  40. line_num += 1
  41. logger.info('Finishing up...')
  42. session.execute(query, values)
  43. session.commit()
  44. logger.info('Done!')
  45. if __name__ == '__main__':
  46. UndrainBootstrap().start()