commands.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. from __future__ import print_function
  2. import logging
  3. import sys
  4. from ckan.lib.cli import CkanCommand
  5. from ckanext.archiver import utils
  6. REQUESTS_HEADER = {'content-type': 'application/json'}
  7. class Archiver(CkanCommand):
  8. '''
  9. Download and save copies of all package resources.
  10. The result of each download attempt is saved to the CKAN task_status table,
  11. so the information can be used later for QA analysis.
  12. Usage:
  13. paster archiver init
  14. - Creates the database table archiver needs to run
  15. paster archiver update [{package-name/id}|{group-name/id}]
  16. - Archive all resources or just those belonging to a specific
  17. package or group, if specified
  18. paster archiver update-test [{package-name/id}|{group-name/id}]
  19. - Does an archive in the current process i.e. avoiding Celery queue
  20. so that you can test on the command-line more easily.
  21. paster archiver clean-status
  22. - Cleans the TaskStatus records that contain the status of each
  23. archived resource, whether it was successful or not, with errors.
  24. It does not change the cache_url etc. in the Resource
  25. paster archiver clean-cached-resources
  26. - Removes all cache_urls and other references to resource files on
  27. disk.
  28. paster archiver view [{dataset name/id}]
  29. - Views info archival info, in general and if you specify one, about
  30. a particular dataset\'s resources.
  31. paster archiver report [outputfile]
  32. - Generates a report on orphans, either resources where the path
  33. does not exist, or files on disk that don't have a corresponding
  34. orphan. The outputfile parameter is the name of the CSV output
  35. from running the report
  36. paster archiver delete-orphans [outputfile]
  37. - Deletes orphans that are files on disk with no corresponding
  38. resource. This uses the report command and will write out a
  39. report to [outputfile]
  40. paster archiver migrate-archive-dirs
  41. - Migrate the layout of the archived resource directories.
  42. Previous versions of ckanext-archiver stored resources on disk
  43. at: {resource-id}/filename.csv and this version puts them at:
  44. {2-chars-of-resource-id}/{resource-id}/filename.csv
  45. Running this moves them to the new locations and updates the
  46. cache_url on each resource to reflect the new location.
  47. paster archiver migrate
  48. - Updates the database schema to include new fields.
  49. paster archiver size-report
  50. - Reports on the sizes of files archived.
  51. paster archiver delete-files-larger-than-max
  52. - For when you reduce the ckanext-archiver.max_content_length and
  53. want to delete archived files that are now above the threshold,
  54. and stop referring to these files in the Archival table of the db.
  55. '''
  56. # TODO
  57. # paster archiver clean-files
  58. # - Remove all archived resources
  59. summary = __doc__.split('\n')[0]
  60. usage = __doc__
  61. min_args = 0
  62. max_args = 2
  63. def __init__(self, name):
  64. super(Archiver, self).__init__(name)
  65. self.parser.add_option('-q', '--queue',
  66. action='store',
  67. dest='queue',
  68. help='Send to a particular queue')
  69. def command(self):
  70. """
  71. Parse command line arguments and call appropriate method.
  72. """
  73. if not self.args or self.args[0] in ['--help', '-h', 'help']:
  74. print(self.usage)
  75. sys.exit(1)
  76. cmd = self.args[0]
  77. self._load_config()
  78. # Initialise logger after the config is loaded, so it is not disabled.
  79. self.log = logging.getLogger(__name__)
  80. if cmd == 'update':
  81. self.update()
  82. elif cmd == 'update-test':
  83. self.update_test()
  84. elif cmd == 'clean-status':
  85. self.clean_status()
  86. elif cmd == 'clean-cached-resources':
  87. self.clean_cached_resources()
  88. elif cmd == 'view':
  89. if len(self.args) == 2:
  90. utils.view(self.args[1])
  91. else:
  92. utils.view()
  93. elif cmd == 'report':
  94. if len(self.args) != 2:
  95. self.log.error('Command requires a parameter, the name of the output')
  96. return
  97. self.report(self.args[1], delete=False)
  98. elif cmd == 'delete-orphans':
  99. if len(self.args) != 2:
  100. self.log.error('Command requires a parameter, the name of the output')
  101. return
  102. self.report(self.args[1], delete=True)
  103. elif cmd == 'init':
  104. utils.init()
  105. self.log.info('Archiver tables are initialized')
  106. elif cmd == 'migrate-archive-dirs':
  107. self.migrate_archive_dirs()
  108. elif cmd == 'migrate':
  109. self.migrate()
  110. elif cmd == 'size-report':
  111. self.size_report()
  112. elif cmd == 'delete-files-larger-than-max':
  113. self.delete_files_larger_than_max_content_length()
  114. else:
  115. self.log.error('Command %s not recognized' % (cmd,))
  116. def update(self):
  117. utils.update(self.args[1:], self.options.queue)
  118. self.log.info('Completed queueing')
  119. def update_test(self):
  120. utils.update_test(self.args[1:], self.options.queue)
  121. self.log.info('Completed test update')
  122. def clean_status(self):
  123. utils.clean_status()
  124. def clean_cached_resources(self):
  125. utils.clean_cached_resources()
  126. def report(self, output_file, delete=False):
  127. utils.report(output_file, delete)
  128. def migrate(self):
  129. utils.migrate()
  130. def migrate_archive_dirs(self):
  131. utils.migrate_archive_dirs()
  132. def size_report(self):
  133. utils.size_report()
  134. def delete_files_larger_than_max_content_length(self):
  135. utils.delete_files_larger_than_max_content_length()