submit_azureml_pytest.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. # Copyright (c) Microsoft Corporation. All rights reserved.
  2. # Licensed under the MIT License.
  3. """
  4. This python script sets up an environment on AzureML and submits a
  5. script to it to run pytest. It is usually intended to be used as
  6. part of a DevOps pipeline which runs testing on a github repo but
  7. can also be used from command line.
  8. Many parameters are set to default values and some are expected to be passed
  9. in from either the DevOps pipeline or command line.
  10. If calling from command line, there are some parameters you must pass in for
  11. your job to run.
  12. Args:
  13. Required:
  14. --clustername (str): the Azure cluster for this run. It can already exist
  15. or it will be created.
  16. --subid (str): the Azure subscription id
  17. Optional but suggested, this info will be stored on Azure as
  18. text information as part of the experiment:
  19. --pr (str): the Github PR number
  20. --reponame (str): the Github repository name
  21. --branch (str): the branch being run
  22. It is also possible to put any text string in these.
  23. Example:
  24. Usually, this script is run by a DevOps pipeline. It can also be
  25. run from cmd line.
  26. >>> python tests/ci/refac.py --clustername 'cluster-d3-v2'
  27. --subid '12345678-9012-3456-abcd-123456789012'
  28. --pr '666'
  29. --reponame 'Recommenders'
  30. --branch 'staging'
  31. """
  32. import argparse
  33. import logging
  34. from azureml.core.authentication import AzureCliAuthentication
  35. from azureml.core import Workspace
  36. from azureml.core import Experiment
  37. from azureml.core.runconfig import RunConfiguration
  38. from azureml.core.conda_dependencies import CondaDependencies
  39. from azureml.core.script_run_config import ScriptRunConfig
  40. from azureml.core.compute import ComputeTarget, AmlCompute
  41. from azureml.core.compute_target import ComputeTargetException
  42. from azureml.core.workspace import WorkspaceException
  43. def setup_workspace(workspace_name, subscription_id, resource_group, cli_auth,
  44. location):
  45. """
  46. This sets up an Azure Workspace.
  47. An existing Azure Workspace is used or a new one is created if needed for
  48. the pytest run.
  49. Args:
  50. workspace_name (str): Centralized location on Azure to work
  51. with all the artifacts used by AzureML
  52. service
  53. subscription_id (str): the Azure subscription id
  54. resource_group (str): Azure Resource Groups are logical collections of
  55. assets associated with a project. Resource groups
  56. make it easy to track or delete all resources
  57. associated with a project by tracking or deleting
  58. the Resource group.
  59. cli_auth Azure authentication
  60. location (str): workspace reference
  61. Returns:
  62. ws: workspace reference
  63. """
  64. logger.debug('setup: workspace_name is {}'.format(workspace_name))
  65. logger.debug('setup: resource_group is {}'.format(resource_group))
  66. logger.debug('setup: subid is {}'.format(subscription_id))
  67. logger.debug('setup: location is {}'.format(location))
  68. try:
  69. # use existing workspace if there is one
  70. ws = Workspace.get(
  71. name=workspace_name,
  72. subscription_id=subscription_id,
  73. resource_group=resource_group,
  74. auth=cli_auth
  75. )
  76. except WorkspaceException:
  77. # this call might take a minute or two.
  78. logger.debug('Creating new workspace')
  79. ws = Workspace.create(
  80. name=workspace_name,
  81. subscription_id=subscription_id,
  82. resource_group=resource_group,
  83. # create_resource_group=True,
  84. location=location,
  85. auth=cli_auth
  86. )
  87. return ws
  88. def setup_persistent_compute_target(workspace, cluster_name, vm_size,
  89. max_nodes):
  90. """
  91. Set up a persistent compute target on AzureML.
  92. A persistent compute target runs noticeably faster than a
  93. regular compute target for subsequent runs. The benefit
  94. is that AzureML manages turning the compute on/off as needed for
  95. each job so the user does not need to do this.
  96. Args:
  97. workspace (str): Centralized location on Azure to work with
  98. all the
  99. artifacts used by AzureML service
  100. cluster_name (str): the Azure cluster for this run. It can
  101. already exist or it will be created.
  102. vm_size (str): Azure VM size, like STANDARD_D3_V2
  103. max_nodes (int): Number of VMs, max_nodes=4 will
  104. autoscale up to 4 VMs
  105. Returns:
  106. cpu_cluster : cluster reference
  107. """
  108. # setting vmsize and num nodes creates a persistent AzureML
  109. # compute resource
  110. logger.debug("setup: cluster_name {}".format(cluster_name))
  111. # https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets
  112. try:
  113. cpu_cluster = ComputeTarget(workspace=workspace, name=cluster_name)
  114. logger.debug('setup: Found existing cluster, use it.')
  115. except ComputeTargetException:
  116. logger.debug('setup: create cluster')
  117. compute_config = AmlCompute.provisioning_configuration(
  118. vm_size=vm_size,
  119. max_nodes=max_nodes)
  120. cpu_cluster = ComputeTarget.create(workspace,
  121. cluster_name,
  122. compute_config)
  123. cpu_cluster.wait_for_completion(show_output=True)
  124. return cpu_cluster
  125. def create_run_config(cpu_cluster, docker_proc_type, conda_env_file):
  126. """
  127. AzureML requires the run environment to be setup prior to submission.
  128. This configures a docker persistent compute. Even though
  129. it is called Persistent compute, AzureML handles startup/shutdown
  130. of the compute environment.
  131. Args:
  132. cpu_cluster (str) : Names the cluster for the test
  133. In the case of unit tests, any of
  134. the following:
  135. - Reco_cpu_test
  136. - Reco_gpu_test
  137. docker_proc_type (str) : processor type, cpu or gpu
  138. conda_env_file (str) : filename which contains info to
  139. set up conda env
  140. Return:
  141. run_amlcompute : AzureML run config
  142. """
  143. # runconfig with max_run_duration_seconds did not work, check why:
  144. # run_amlcompute = RunConfiguration(max_run_duration_seconds=60*30)
  145. run_amlcompute = RunConfiguration()
  146. run_amlcompute.target = cpu_cluster
  147. run_amlcompute.environment.docker.enabled = True
  148. run_amlcompute.environment.docker.base_image = docker_proc_type
  149. # Use conda_dependencies.yml to create a conda environment in
  150. # the Docker image for execution
  151. # False means the user will provide a conda file for setup
  152. # True means the user will manually configure the environment
  153. run_amlcompute.environment.python.user_managed_dependencies = False
  154. run_amlcompute.environment.python.conda_dependencies = CondaDependencies(
  155. conda_dependencies_file_path=conda_env_file)
  156. return run_amlcompute
  157. def create_experiment(workspace, experiment_name):
  158. """
  159. AzureML requires an experiment as a container of trials.
  160. This will either create a new experiment or use an
  161. existing one.
  162. Args:
  163. workspace (str) : name of AzureML workspace
  164. experiment_name (str) : AzureML experiment name
  165. Return:
  166. exp - AzureML experiment
  167. """
  168. logger.debug('create: experiment_name {}'.format(experiment_name))
  169. exp = Experiment(workspace=workspace, name=experiment_name)
  170. return(exp)
  171. def submit_experiment_to_azureml(test, test_folder, test_markers, junitxml,
  172. run_config, experiment):
  173. """
  174. Submitting the experiment to AzureML actually runs the script.
  175. Args:
  176. test (str) - pytest script, folder/test
  177. such as ./tests/ci/run_pytest.py
  178. test_folder (str) - folder where tests to run are stored,
  179. like ./tests/unit
  180. test_markers (str) - test markers used by pytest
  181. "not notebooks and not spark and not gpu"
  182. junitxml (str) - file of output summary of tests run
  183. note "--junitxml" is required as part
  184. of the string
  185. Example: "--junitxml reports/test-unit.xml"
  186. run_config - environment configuration
  187. experiment - instance of an Experiment, a collection of
  188. trials where each trial is a run.
  189. Return:
  190. run : AzureML run or trial
  191. """
  192. logger.debug('submit: testfolder {}'.format(test_folder))
  193. logger.debug('junitxml: {}'.format(junitxml))
  194. project_folder = "."
  195. script_run_config = ScriptRunConfig(source_directory=project_folder,
  196. script=test,
  197. run_config=run_config,
  198. arguments=["--testfolder",
  199. test_folder,
  200. "--testmarkers",
  201. test_markers,
  202. "--xmlname",
  203. junitxml]
  204. )
  205. run = experiment.submit(script_run_config)
  206. # waits only for configuration to complete
  207. run.wait_for_completion(show_output=True, wait_post_processing=True)
  208. # test logs can also be found on azure
  209. # go to azure portal to see log in azure ws and look for experiment name
  210. # and look for individual run
  211. logger.debug('files {}'.format(run.get_file_names))
  212. return run
  213. def create_arg_parser():
  214. """
  215. Many of the argument defaults are used as arg_parser makes it easy to
  216. use defaults. The user has many options they can select.
  217. """
  218. parser = argparse.ArgumentParser(description='Process some inputs')
  219. # script to run pytest
  220. parser.add_argument("--test",
  221. action="store",
  222. default="./tests/ci/run_pytest.py",
  223. help="location of script to run pytest")
  224. # test folder
  225. parser.add_argument("--testfolder",
  226. action="store",
  227. default="./tests/unit",
  228. help="folder where tests are stored")
  229. # pytest test markers
  230. parser.add_argument("--testmarkers",
  231. action="store",
  232. default="not notebooks and not spark and not gpu",
  233. help="pytest markers indicate tests to run")
  234. # test summary file
  235. parser.add_argument("--junitxml",
  236. action="store",
  237. default="reports/test-unit.xml",
  238. help="file for returned test results")
  239. # max num nodes in Azure cluster
  240. parser.add_argument("--maxnodes",
  241. action="store",
  242. default=4,
  243. help="specify the maximum number of nodes for the run")
  244. # Azure resource group
  245. parser.add_argument("--rg",
  246. action="store",
  247. default="recommender",
  248. help="Azure Resource Group")
  249. # AzureML workspace Name
  250. parser.add_argument("--wsname",
  251. action="store",
  252. default="RecoWS",
  253. help="AzureML workspace name")
  254. # AzureML clustername
  255. parser.add_argument("--clustername",
  256. action="store",
  257. default="amlcompute",
  258. help="Set name of Azure cluster")
  259. # Azure VM size
  260. parser.add_argument("--vmsize",
  261. action="store",
  262. default="STANDARD_D3_V2",
  263. help="Set the size of the VM either STANDARD_D3_V2")
  264. # cpu or gpu
  265. parser.add_argument("--dockerproc",
  266. action="store",
  267. default="cpu",
  268. help="Base image used in docker container")
  269. # Azure subscription id, when used in a pipeline, it is stored in keyvault
  270. parser.add_argument("--subid",
  271. action="store",
  272. default="123456",
  273. help="Azure Subscription ID")
  274. # ./reco.yaml is created in the azure devops pipeline.
  275. # Not recommended to change this.
  276. parser.add_argument("--condafile",
  277. action="store",
  278. default="./reco.yaml",
  279. help="file with environment variables")
  280. # AzureML experiment name
  281. parser.add_argument("--expname",
  282. action="store",
  283. default="persistentAML",
  284. help="experiment name on Azure")
  285. # Azure datacenter location
  286. parser.add_argument("--location",
  287. default="EastUS",
  288. help="Azure location")
  289. # github repo, stored in AzureML experiment for info purposes
  290. parser.add_argument("--reponame",
  291. action="store",
  292. default="--reponame MyGithubRepo",
  293. help="GitHub repo being tested")
  294. # github branch, stored in AzureML experiment for info purposes
  295. parser.add_argument("--branch",
  296. action="store",
  297. default="--branch MyGithubBranch",
  298. help=" Identify the branch test test is run on")
  299. # github pull request, stored in AzureML experiment for info purposes
  300. parser.add_argument("--pr",
  301. action="store",
  302. default="--pr PRTestRun",
  303. help="If a pr triggered the test, list it here")
  304. args = parser.parse_args()
  305. return args
  306. if __name__ == "__main__":
  307. logger = logging.getLogger('submit_azureml_pytest.py')
  308. # logger.setLevel(logging.DEBUG)
  309. # logging.basicConfig(level=logging.DEBUG)
  310. args = create_arg_parser()
  311. if args.dockerproc == "cpu":
  312. from azureml.core.runconfig import DEFAULT_CPU_IMAGE
  313. docker_proc_type = DEFAULT_CPU_IMAGE
  314. else:
  315. from azureml.core.runconfig import DEFAULT_GPU_IMAGE
  316. docker_proc_type = DEFAULT_GPU_IMAGE
  317. cli_auth = AzureCliAuthentication()
  318. workspace = setup_workspace(workspace_name=args.wsname,
  319. subscription_id=args.subid,
  320. resource_group=args.rg,
  321. cli_auth=cli_auth,
  322. location=args.location)
  323. cpu_cluster = setup_persistent_compute_target(
  324. workspace=workspace,
  325. cluster_name=args.clustername,
  326. vm_size=args.vmsize,
  327. max_nodes=args.maxnodes)
  328. run_config = create_run_config(cpu_cluster=cpu_cluster,
  329. docker_proc_type=docker_proc_type,
  330. conda_env_file=args.condafile)
  331. logger.info('exp: In Azure, look for experiment named {}'.format(
  332. args.expname))
  333. # create new or use existing experiment
  334. experiment = Experiment(workspace=workspace, name=args.expname)
  335. run = submit_experiment_to_azureml(test=args.test,
  336. test_folder=args.testfolder,
  337. test_markers=args.testmarkers,
  338. junitxml=args.junitxml,
  339. run_config=run_config,
  340. experiment=experiment)
  341. # add helpful information to experiment on Azure
  342. run.tag('RepoName', args.reponame)
  343. run.tag('Branch', args.branch)
  344. run.tag('PR', args.pr)
  345. # download files from AzureML
  346. run.download_files(prefix='reports', output_paths='./reports')
  347. run.complete()