benchmark 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #!/usr/bin/env python
  2. """
  3. Use for benchmarking performance of other scripts. Provides data about
  4. time, memory use, cpu usage, network in, network out about the script ran in
  5. the form of a csv.
  6. Usage
  7. =====
  8. NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running.
  9. To use the script, run::
  10. ./benchmark "./my-script-to-run"
  11. If no ``--output-file`` was provided, the data will be saved to
  12. ``performance.csv``
  13. """
  14. import argparse
  15. import os
  16. import subprocess
  17. import sys
  18. import time
  19. import psutil
  20. # Determine the interface to track network IO depending on the platform.
  21. if sys.platform.startswith('linux'):
  22. INTERFACE = 'eth0'
  23. elif sys.platform == 'darwin':
  24. INTERFACE = 'en0'
  25. else:
  26. # TODO: Add support for windows. This would require figuring out what
  27. # interface to use on windows.
  28. raise RuntimeError('Script cannot be run on %s' % sys.platform)
  29. def benchmark(args):
  30. parent_pid = os.getpid()
  31. child_p = run_script(args)
  32. try:
  33. # Benchmark the process where the script is being ran.
  34. return run_benchmark(child_p.pid, args.output_file, args.data_interval)
  35. except KeyboardInterrupt:
  36. # If there is an interrupt, then try to clean everything up.
  37. proc = psutil.Process(parent_pid)
  38. procs = proc.children(recursive=True)
  39. for child in procs:
  40. child.terminate()
  41. gone, alive = psutil.wait_procs(procs, timeout=1)
  42. for child in alive:
  43. child.kill()
  44. return 1
  45. def run_script(args):
  46. return subprocess.Popen(args.script, shell=True)
  47. def run_benchmark(pid, output_file, data_interval):
  48. p = psutil.Process(pid)
  49. previous_net = psutil.net_io_counters(pernic=True)[INTERFACE]
  50. previous_time = time.time()
  51. with open(output_file, 'w') as f:
  52. while p.is_running():
  53. if p.status() == psutil.STATUS_ZOMBIE:
  54. p.kill()
  55. break
  56. time.sleep(data_interval)
  57. process_to_measure = _get_underlying_python_process(p)
  58. try:
  59. # Collect the memory and cpu usage.
  60. memory_used = process_to_measure.memory_info().rss
  61. cpu_percent = process_to_measure.cpu_percent()
  62. current_net = psutil.net_io_counters(pernic=True)[INTERFACE]
  63. except psutil.AccessDenied:
  64. # Trying to get process information from a closed process will
  65. # result in AccessDenied.
  66. break
  67. # Collect data on the in/out network io.
  68. sent_delta = current_net.bytes_sent - previous_net.bytes_sent
  69. recv_delta = current_net.bytes_recv - previous_net.bytes_recv
  70. # Determine the lapsed time to determine the network io rate.
  71. current_time = time.time()
  72. previous_net = current_net
  73. dt = current_time - previous_time
  74. previous_time = current_time
  75. sent_rate = sent_delta / dt
  76. recv_rate = recv_delta / dt
  77. # Save all of the data into a CSV file.
  78. f.write(
  79. f"{current_time},{memory_used},{cpu_percent},"
  80. f"{sent_rate},{recv_rate}\n"
  81. )
  82. f.flush()
  83. return 0
  84. def _get_underlying_python_process(process):
  85. # For some scripts such as the streaming CLI commands, the process is
  86. # nested under a shell script that does not account for the python process.
  87. # We want to always be measuring the python process.
  88. children = process.children(recursive=True)
  89. for child_process in children:
  90. if 'python' in child_process.name().lower():
  91. return child_process
  92. return process
  93. def main():
  94. parser = argparse.ArgumentParser(usage=__doc__)
  95. parser.add_argument('script', help='The script to run for benchmarking')
  96. parser.add_argument(
  97. '--data-interval',
  98. default=1,
  99. type=float,
  100. help='The interval in seconds to poll for data points',
  101. )
  102. parser.add_argument(
  103. '--output-file',
  104. default='performance.csv',
  105. help='The file to output the data collected to',
  106. )
  107. args = parser.parse_args()
  108. return benchmark(args)
  109. if __name__ == '__main__':
  110. sys.exit(main())