running_stats.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. '''Tool for a script to keep track changes performed on a large number
  2. of objects.
  3. StatsCount - when you are counting incidences of a small set of outcomes
  4. StatsList - when you also want to remember an ID associated with each incidence
  5. Examples:
  6. from running_stats import StatsCount
  7. package_stats = StatsCount()
  8. for package in packages:
  9. if package.enabled:
  10. package.delete()
  11. package_stats.increment('deleted')
  12. else:
  13. package_stats.increment('not deleted')
  14. print package_stats.report()
  15. > deleted: 30
  16. > not deleted: 70
  17. from running_stats import StatsList
  18. package_stats = StatsList()
  19. for package in packages:
  20. if package.enabled:
  21. package.delete()
  22. package_stats.add('deleted', package.name)
  23. else:
  24. package_stats.add('not deleted' package.name)
  25. print package_stats.report()
  26. > deleted: 30 pollution-uk, flood-regions, river-quality, ...
  27. > not deleted: 70 spending-bristol, ...
  28. '''
  29. from __future__ import print_function
  30. import copy
  31. import datetime
  32. class StatsCount(dict):
  33. # {category:count}
  34. _init_value = 0
  35. report_value_limit = 150
  36. def __init__(self, *args, **kwargs):
  37. self._start_time = datetime.datetime.now()
  38. super(StatsCount, self).__init__(*args, **kwargs)
  39. def _init_category(self, category):
  40. if category not in self:
  41. self[category] = copy.deepcopy(self._init_value)
  42. def increment(self, category):
  43. self._init_category(category)
  44. self[category] += 1
  45. def report_value(self, category):
  46. '''Returns the value for a category and value to sort categories by.'''
  47. value = repr(self[category])
  48. if len(value) > self.report_value_limit:
  49. value = value[:self.report_value_limit] + '...'
  50. return (value, self[category])
  51. def report(self, indent=1, order_by_title=False, show_time_taken=True):
  52. lines = []
  53. indent_str = '\t' * indent
  54. report_dict = dict()
  55. for category in list(self.keys()):
  56. report_dict[category] = self.report_value(category)
  57. if order_by_title:
  58. items = sorted(report_dict.items())
  59. else:
  60. items = sorted(iter(report_dict.items()),
  61. key=lambda x: -x[1][1])
  62. for category, value_tuple in items:
  63. value = value_tuple[0]
  64. lines.append(indent_str + '%s: %s' % (category, value))
  65. if not self:
  66. lines = [indent_str + 'None']
  67. if show_time_taken:
  68. time_taken = datetime.datetime.now() - self._start_time
  69. lines.append(indent_str + 'Time taken (h:m:s): %s' % time_taken)
  70. return '\n'.join(lines)
  71. class StatsList(StatsCount):
  72. # {category:[values]}
  73. _init_value = []
  74. def add(self, category, value):
  75. self._init_category(category)
  76. self[category].append(value)
  77. return '%s: %s' % (category, value) # so you can log it too
  78. def report_value(self, category):
  79. value = self[category]
  80. number_of_values = len(value)
  81. value_str = '%i %r' % (number_of_values, value)
  82. if len(value_str) > self.report_value_limit:
  83. value_str = value_str[:self.report_value_limit] + '...'
  84. return (value_str, number_of_values)
  85. if __name__ == '__main__':
  86. package_stats = StatsList()
  87. package_stats.add('Success', 'good1')
  88. package_stats.add('Success', 'good2')
  89. package_stats.add('Success', 'good3')
  90. package_stats.add('Success', 'good4')
  91. package_stats.add('Failure', 'bad1')
  92. print(package_stats.report())
  93. print(StatsList().report())