summarize 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #!/usr/bin/env python
  2. """
  3. Summarizes results of benchmarking.
  4. Usage
  5. =====
  6. Run this script with::
  7. ./summarize performance.csv
  8. And that should output::
  9. +------------------------+----------+----------------------+
  10. | Metric over 1 run(s) | Mean | Standard Deviation |
  11. +========================+==========+======================+
  12. | Total Time (seconds) | 1.200 | 0.0 |
  13. +------------------------+----------+----------------------+
  14. | Maximum Memory | 42.3 MiB | 0 Bytes |
  15. +------------------------+----------+----------------------+
  16. | Maximum CPU (percent) | 88.1 | 0.0 |
  17. +------------------------+----------+----------------------+
  18. | Average Memory | 33.9 MiB | 0 Bytes |
  19. +------------------------+----------+----------------------+
  20. | Average CPU (percent) | 30.5 | 0.0 |
  21. +------------------------+----------+----------------------+
  22. The script can also be ran with multiple files:
  23. ./summarize performance.csv performance-2.csv
  24. And will have a similar output:
  25. +------------------------+----------+----------------------+
  26. | Metric over 2 run(s) | Mean | Standard Deviation |
  27. +========================+==========+======================+
  28. | Total Time (seconds) | 1.155 | 0.0449999570847 |
  29. +------------------------+----------+----------------------+
  30. | Maximum Memory | 42.5 MiB | 110.0 KiB |
  31. +------------------------+----------+----------------------+
  32. | Maximum CPU (percent) | 94.5 | 6.45 |
  33. +------------------------+----------+----------------------+
  34. | Average Memory | 35.6 MiB | 1.7 MiB |
  35. +------------------------+----------+----------------------+
  36. | Average CPU (percent) | 27.5 | 3.03068181818 |
  37. +------------------------+----------+----------------------+
  38. You can also specify the ``--output-format json`` option to print the
  39. summary as JSON instead of a pretty printed table::
  40. {
  41. "total_time": 72.76999998092651,
  42. "std_dev_average_memory": 0.0,
  43. "std_dev_total_time": 0.0,
  44. "average_memory": 56884518.57534247,
  45. "std_dev_average_cpu": 0.0,
  46. "std_dev_max_memory": 0.0,
  47. "average_cpu": 61.19315068493151,
  48. "max_memory": 58331136.0
  49. }
  50. """
  51. import argparse
  52. import csv
  53. import json
  54. from math import sqrt
  55. from tabulate import tabulate
  56. def human_readable_size(value):
  57. """Converts integer values in bytes to human readable values"""
  58. hummanize_suffixes = ('KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB')
  59. base = 1024
  60. bytes_int = float(value)
  61. if bytes_int == 1:
  62. return '1 Byte'
  63. elif bytes_int < base:
  64. return '%d Bytes' % bytes_int
  65. for i, suffix in enumerate(hummanize_suffixes):
  66. unit = base ** (i + 2)
  67. if round((bytes_int / unit) * base) < base:
  68. return f'{(base * bytes_int / unit):.1f} {suffix}'
  69. class Summarizer:
  70. DATA_INDEX_IN_ROW = {'time': 0, 'memory': 1, 'cpu': 2}
  71. def __init__(self):
  72. self.total_files = 0
  73. self._num_rows = 0
  74. self._start_time = None
  75. self._end_time = None
  76. self._totals = {
  77. 'time': [],
  78. 'average_memory': [],
  79. 'average_cpu': [],
  80. 'max_memory': [],
  81. 'max_cpu': [],
  82. }
  83. self._averages = {
  84. 'memory': 0.0,
  85. 'cpu': 0.0,
  86. }
  87. self._maximums = {'memory': 0.0, 'cpu': 0.0}
  88. @property
  89. def total_time(self):
  90. return self._average_across_all_files('time')
  91. @property
  92. def max_cpu(self):
  93. return self._average_across_all_files('max_cpu')
  94. @property
  95. def max_memory(self):
  96. return self._average_across_all_files('max_memory')
  97. @property
  98. def average_cpu(self):
  99. return self._average_across_all_files('average_cpu')
  100. @property
  101. def average_memory(self):
  102. return self._average_across_all_files('average_memory')
  103. @property
  104. def std_dev_total_time(self):
  105. return self._standard_deviation_across_all_files('time')
  106. @property
  107. def std_dev_max_cpu(self):
  108. return self._standard_deviation_across_all_files('max_cpu')
  109. @property
  110. def std_dev_max_memory(self):
  111. return self._standard_deviation_across_all_files('max_memory')
  112. @property
  113. def std_dev_average_cpu(self):
  114. return self._standard_deviation_across_all_files('average_cpu')
  115. @property
  116. def std_dev_average_memory(self):
  117. return self._standard_deviation_across_all_files('average_memory')
  118. def _average_across_all_files(self, name):
  119. return sum(self._totals[name]) / len(self._totals[name])
  120. def _standard_deviation_across_all_files(self, name):
  121. mean = self._average_across_all_files(name)
  122. differences = [total - mean for total in self._totals[name]]
  123. sq_differences = [difference**2 for difference in differences]
  124. return sqrt(sum(sq_differences) / len(self._totals[name]))
  125. def summarize_as_table(self):
  126. """Formats the processed data as pretty printed table.
  127. :return: str of formatted table
  128. """
  129. h = human_readable_size
  130. table = [
  131. [
  132. 'Total Time (seconds)',
  133. '%.3f' % self.total_time,
  134. self.std_dev_total_time,
  135. ],
  136. ['Maximum Memory', h(self.max_memory), h(self.std_dev_max_memory)],
  137. [
  138. 'Maximum CPU (percent)',
  139. '%.1f' % self.max_cpu,
  140. self.std_dev_max_cpu,
  141. ],
  142. [
  143. 'Average Memory',
  144. h(self.average_memory),
  145. h(self.std_dev_average_memory),
  146. ],
  147. [
  148. 'Average CPU (percent)',
  149. '%.1f' % self.average_cpu,
  150. self.std_dev_average_cpu,
  151. ],
  152. ]
  153. return tabulate(
  154. table,
  155. headers=[
  156. 'Metric over %s run(s)' % (self.total_files),
  157. 'Mean',
  158. 'Standard Deviation',
  159. ],
  160. tablefmt="grid",
  161. )
  162. def summarize_as_json(self):
  163. """Return JSON summary of processed data.
  164. :return: str of formatted JSON
  165. """
  166. return json.dumps(
  167. {
  168. 'total_time': self.total_time,
  169. 'std_dev_total_time': self.std_dev_total_time,
  170. 'max_memory': self.max_memory,
  171. 'std_dev_max_memory': self.std_dev_max_memory,
  172. 'average_memory': self.average_memory,
  173. 'std_dev_average_memory': self.std_dev_average_memory,
  174. 'average_cpu': self.average_cpu,
  175. 'std_dev_average_cpu': self.std_dev_average_cpu,
  176. },
  177. indent=2,
  178. )
  179. def process(self, args):
  180. """Processes the data from the CSV file"""
  181. for benchmark_file in args.benchmark_files:
  182. self.process_individual_file(benchmark_file)
  183. self.total_files += 1
  184. def process_individual_file(self, benchmark_file):
  185. with open(benchmark_file) as f:
  186. reader = csv.reader(f)
  187. # Process each row from the CSV file
  188. row = None
  189. for row in reader:
  190. self._validate_row(row, benchmark_file)
  191. self.process_data_row(row)
  192. self._validate_row(row, benchmark_file)
  193. self._end_time = self._get_time(row)
  194. self._finalize_processed_data_for_file()
  195. def _validate_row(self, row, filename):
  196. if not row:
  197. raise RuntimeError(
  198. 'Row: %s could not be processed. The CSV file (%s) may be '
  199. 'empty.' % (row, filename)
  200. )
  201. def process_data_row(self, row):
  202. # If the row is the first row collect the start time.
  203. if self._num_rows == 0:
  204. self._start_time = self._get_time(row)
  205. self._num_rows += 1
  206. self.process_data_point(row, 'memory')
  207. self.process_data_point(row, 'cpu')
  208. def process_data_point(self, row, name):
  209. # Determine where in the CSV row the requested data is located.
  210. index = self.DATA_INDEX_IN_ROW[name]
  211. # Get the data point.
  212. data_point = float(row[index])
  213. self._add_to_average(name, data_point)
  214. self._account_for_maximum(name, data_point)
  215. def _finalize_processed_data_for_file(self):
  216. # Add numbers to the total, which keeps track of data over
  217. # all files provided.
  218. self._totals['time'].append(self._end_time - self._start_time)
  219. self._totals['max_cpu'].append(self._maximums['cpu'])
  220. self._totals['max_memory'].append(self._maximums['memory'])
  221. self._totals['average_cpu'].append(
  222. self._averages['cpu'] / self._num_rows
  223. )
  224. self._totals['average_memory'].append(
  225. self._averages['memory'] / self._num_rows
  226. )
  227. # Reset some of the data needed to be tracked for each specific
  228. # file.
  229. self._num_rows = 0
  230. self._maximums = self._maximums.fromkeys(self._maximums, 0.0)
  231. self._averages = self._averages.fromkeys(self._averages, 0.0)
  232. def _get_time(self, row):
  233. return float(row[self.DATA_INDEX_IN_ROW['time']])
  234. def _add_to_average(self, name, data_point):
  235. self._averages[name] += data_point
  236. def _account_for_maximum(self, name, data_point):
  237. if data_point > self._maximums[name]:
  238. self._maximums[name] = data_point
  239. def main():
  240. parser = argparse.ArgumentParser(usage=__doc__)
  241. parser.add_argument(
  242. 'benchmark_files',
  243. nargs='+',
  244. help=(
  245. 'The CSV output file from the benchmark script. If you provide'
  246. 'more than one of these files, it will give you the average '
  247. 'across all of the files for each metric.'
  248. ),
  249. )
  250. parser.add_argument(
  251. '-f',
  252. '--output-format',
  253. default='table',
  254. choices=['table', 'json'],
  255. help=(
  256. 'Specify what output format to use for displaying results. '
  257. 'By default, a pretty printed table is used, but you can also '
  258. 'specify "json" to display pretty printed JSON.'
  259. ),
  260. )
  261. args = parser.parse_args()
  262. summarizer = Summarizer()
  263. summarizer.process(args)
  264. if args.output_format == 'table':
  265. result = summarizer.summarize_as_table()
  266. else:
  267. result = summarizer.summarize_as_json()
  268. print(result)
  269. if __name__ == '__main__':
  270. main()