validate_azure_dladmin_identity.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. #!/usr/bin/env python3
  2. ###
  3. # CLOUDERA CDP Control (cdpctl)
  4. #
  5. # (C) Cloudera, Inc. 2021-2021
  6. # All rights reserved.
  7. #
  8. # Applicable Open Source License: GNU AFFERO GENERAL PUBLIC LICENSE
  9. #
  10. # NOTE: Cloudera open source products are modular software products
  11. # made up of hundreds of individual components, each of which was
  12. # individually copyrighted. Each Cloudera open source product is a
  13. # collective work under U.S. Copyright Law. Your license to use the
  14. # collective work is as provided in your written agreement with
  15. # Cloudera. Used apart from the collective work, this file is
  16. # licensed for your use pursuant to the open source license
  17. # identified above.
  18. #
  19. # This code is provided to you pursuant a written agreement with
  20. # (i) Cloudera, Inc. or (ii) a third-party authorized to distribute
  21. # this code. If you do not have a written agreement with Cloudera nor
  22. # with an authorized and properly licensed third party, you do not
  23. # have any rights to access nor to use this code.
  24. #
  25. # Absent a written agreement with Cloudera, Inc. (“Cloudera”) to the
  26. # contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY
  27. # KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED
  28. # WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO
  29. # IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND
  30. # FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU,
  31. # AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS
  32. # ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE
  33. # OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY
  34. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR
  35. # CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES
  36. # RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF
  37. # BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
  38. # DATA.
  39. #
  40. # Source File Name: validate_azure_dladmin_identity.py
  41. ###
  42. """Validation of Azure Datalake Admin Identity."""
  43. from typing import Any, Dict, List
  44. import pytest
  45. from azure.mgmt.authorization import AuthorizationManagementClient
  46. from azure.mgmt.resource import ResourceManagementClient
  47. from cdpctl.validation import fail, get_config_value
  48. from cdpctl.validation.azure_utils import (
  49. check_for_actions,
  50. get_client,
  51. get_role_assignments,
  52. get_storage_container_scope,
  53. parse_adls_path,
  54. )
  55. from cdpctl.validation.infra.issues import (
  56. AZURE_IDENTITY_MISSING_ACTIONS_FOR_LOCATION,
  57. AZURE_IDENTITY_MISSING_DATA_ACTIONS_FOR_LOCATION,
  58. )
  59. @pytest.fixture(autouse=True, name="resource_client")
  60. def resource_client_fixture(config: Dict[str, Any]) -> ResourceManagementClient:
  61. """Return an Azure Resource Client."""
  62. return get_client("resource", config)
  63. @pytest.fixture(autouse=True, name="auth_client")
  64. def auth_client_fixture(config: Dict[str, Any]) -> AuthorizationManagementClient:
  65. """Return an Azure Auth Client."""
  66. return get_client("auth", config)
  67. @pytest.mark.azure
  68. @pytest.mark.infra
  69. def azure_dladmin_actions_for_logs_storage_validation(
  70. config: Dict[str, Any],
  71. auth_client: AuthorizationManagementClient,
  72. resource_client: ResourceManagementClient,
  73. azure_data_required_actions,
  74. ) -> None: # pragma: no cover
  75. """Datalake Admin Identity has required Actions on logs storage location.""" # noqa: D401,E501
  76. _azure_dladmin_logs_storage_actions_check(
  77. config=config,
  78. auth_client=auth_client,
  79. resource_client=resource_client,
  80. azure_data_required_actions=azure_data_required_actions,
  81. )
  82. def _azure_dladmin_logs_storage_actions_check(
  83. config: Dict[str, Any],
  84. auth_client: AuthorizationManagementClient,
  85. resource_client: ResourceManagementClient,
  86. azure_data_required_actions: List[str],
  87. ) -> None: # pragma: no cover
  88. # noqa: D401,E501
  89. sub_id: str = get_config_value(config=config, key="infra:azure:subscription_id")
  90. rg_name: str = get_config_value(config=config, key="infra:azure:metagroup:name")
  91. storage_name: str = get_config_value(config=config, key="env:azure:storage:name")
  92. log_path: str = get_config_value(config=config, key="env:azure:storage:path:logs")
  93. datalake_admin: str = get_config_value(
  94. config=config, key="env:azure:role:name:datalake_admin"
  95. )
  96. parsed_logger_path = parse_adls_path(log_path)
  97. container_name = parsed_logger_path[1]
  98. role_assignments = get_role_assignments(
  99. auth_client=auth_client,
  100. resource_client=resource_client,
  101. identity_name=datalake_admin,
  102. subscription_id=sub_id,
  103. resource_group=rg_name,
  104. )
  105. proper_scope = get_storage_container_scope(
  106. sub_id, rg_name, storage_name, container_name
  107. )
  108. missing_actions, _ = check_for_actions(
  109. auth_client=auth_client,
  110. role_assigments=role_assignments,
  111. proper_scope=proper_scope,
  112. required_actions=azure_data_required_actions,
  113. required_data_actions=[],
  114. )
  115. if missing_actions:
  116. fail(
  117. AZURE_IDENTITY_MISSING_ACTIONS_FOR_LOCATION,
  118. subjects=[
  119. datalake_admin,
  120. f"storageAccounts/{storage_name}/blobServices/default/containers/{container_name}", # noqa: E501
  121. ],
  122. resources=missing_actions,
  123. )
  124. @pytest.mark.azure
  125. @pytest.mark.infra
  126. def azure_dladmin_data_actions_for_logs_storage_validation(
  127. config: Dict[str, Any],
  128. auth_client: AuthorizationManagementClient,
  129. resource_client: ResourceManagementClient,
  130. azure_data_required_data_actions,
  131. ) -> None: # pragma: no cover
  132. """Datalake Admin Identity has required Data Actions on logs storage location.""" # noqa: D401,E501
  133. _azure_dladmin_logs_storage_data_actions_check(
  134. config=config,
  135. auth_client=auth_client,
  136. resource_client=resource_client,
  137. azure_data_required_data_actions=azure_data_required_data_actions,
  138. )
  139. def _azure_dladmin_logs_storage_data_actions_check(
  140. config: Dict[str, Any],
  141. auth_client: AuthorizationManagementClient,
  142. resource_client: ResourceManagementClient,
  143. azure_data_required_data_actions: List[str],
  144. ) -> None: # pragma: no cover
  145. # noqa: D401,E501
  146. sub_id: str = get_config_value(config=config, key="infra:azure:subscription_id")
  147. rg_name: str = get_config_value(config=config, key="infra:azure:metagroup:name")
  148. storage_name: str = get_config_value(config=config, key="env:azure:storage:name")
  149. log_path: str = get_config_value(config=config, key="env:azure:storage:path:logs")
  150. datalake_admin: str = get_config_value(
  151. config=config, key="env:azure:role:name:datalake_admin"
  152. )
  153. parsed_logger_path = parse_adls_path(log_path)
  154. container_name = parsed_logger_path[1]
  155. role_assignments = get_role_assignments(
  156. auth_client=auth_client,
  157. resource_client=resource_client,
  158. identity_name=datalake_admin,
  159. subscription_id=sub_id,
  160. resource_group=rg_name,
  161. )
  162. proper_scope = get_storage_container_scope(
  163. sub_id, rg_name, storage_name, container_name
  164. )
  165. _, missing_data_actions = check_for_actions(
  166. auth_client=auth_client,
  167. role_assigments=role_assignments,
  168. proper_scope=proper_scope,
  169. required_actions=[],
  170. required_data_actions=azure_data_required_data_actions,
  171. )
  172. if missing_data_actions:
  173. fail(
  174. AZURE_IDENTITY_MISSING_DATA_ACTIONS_FOR_LOCATION,
  175. subjects=[
  176. datalake_admin,
  177. f"storageAccounts/{storage_name}/blobServices/default/containers/{container_name}", # noqa: E501
  178. ],
  179. resources=missing_data_actions,
  180. )
  181. @pytest.mark.azure
  182. @pytest.mark.infra
  183. def azure_dladmin_actions_for_data_storage_validation(
  184. config: Dict[str, Any],
  185. auth_client: AuthorizationManagementClient,
  186. resource_client: ResourceManagementClient,
  187. azure_data_required_actions,
  188. ) -> None: # pragma: no cover
  189. """Datalake Admin Identity has required Actions on data storage location.""" # noqa: D401,E501
  190. _azure_dladmin_data_storage_actions_check(
  191. config=config,
  192. auth_client=auth_client,
  193. resource_client=resource_client,
  194. azure_data_required_actions=azure_data_required_actions,
  195. )
  196. def _azure_dladmin_data_storage_actions_check(
  197. config: Dict[str, Any],
  198. auth_client: AuthorizationManagementClient,
  199. resource_client: ResourceManagementClient,
  200. azure_data_required_actions: List[str],
  201. ) -> None: # pragma: no cover
  202. # noqa: D401,E501
  203. sub_id: str = get_config_value(config=config, key="infra:azure:subscription_id")
  204. rg_name: str = get_config_value(config=config, key="infra:azure:metagroup:name")
  205. storage_name: str = get_config_value(config=config, key="env:azure:storage:name")
  206. data_path: str = get_config_value(config=config, key="env:azure:storage:path:data")
  207. datalake_admin: str = get_config_value(
  208. config=config, key="env:azure:role:name:datalake_admin"
  209. )
  210. parsed_data_path = parse_adls_path(data_path)
  211. container_name = parsed_data_path[1]
  212. role_assignments = get_role_assignments(
  213. auth_client=auth_client,
  214. resource_client=resource_client,
  215. identity_name=datalake_admin,
  216. subscription_id=sub_id,
  217. resource_group=rg_name,
  218. )
  219. proper_scope = get_storage_container_scope(
  220. sub_id, rg_name, storage_name, container_name
  221. )
  222. missing_actions, _ = check_for_actions(
  223. auth_client=auth_client,
  224. role_assigments=role_assignments,
  225. proper_scope=proper_scope,
  226. required_actions=azure_data_required_actions,
  227. required_data_actions=[],
  228. )
  229. if missing_actions:
  230. fail(
  231. AZURE_IDENTITY_MISSING_ACTIONS_FOR_LOCATION,
  232. subjects=[
  233. datalake_admin,
  234. f"storageAccounts/{storage_name}/blobServices/default/containers/{container_name}", # noqa: E501
  235. ],
  236. resources=missing_actions,
  237. )
  238. @pytest.mark.azure
  239. @pytest.mark.infra
  240. def azure_dladmin_data_actions_for_data_storage_validation(
  241. config: Dict[str, Any],
  242. auth_client: AuthorizationManagementClient,
  243. resource_client: ResourceManagementClient,
  244. azure_data_required_data_actions,
  245. ) -> None: # pragma: no cover
  246. """Datalake Admin Identity has required Data Actions on data storage location.""" # noqa: D401,E501
  247. _azure_dladmin_data_storage_data_actions_check(
  248. config=config,
  249. auth_client=auth_client,
  250. resource_client=resource_client,
  251. azure_data_required_data_actions=azure_data_required_data_actions,
  252. )
  253. def _azure_dladmin_data_storage_data_actions_check(
  254. config: Dict[str, Any],
  255. auth_client: AuthorizationManagementClient,
  256. resource_client: ResourceManagementClient,
  257. azure_data_required_data_actions: List[str],
  258. ) -> None: # pragma: no cover
  259. # noqa: D401,E501
  260. sub_id: str = get_config_value(config=config, key="infra:azure:subscription_id")
  261. rg_name: str = get_config_value(config=config, key="infra:azure:metagroup:name")
  262. storage_name: str = get_config_value(config=config, key="env:azure:storage:name")
  263. data_path: str = get_config_value(config=config, key="env:azure:storage:path:data")
  264. datalake_admin: str = get_config_value(
  265. config=config, key="env:azure:role:name:datalake_admin"
  266. )
  267. parsed_data_path = parse_adls_path(data_path)
  268. container_name = parsed_data_path[1]
  269. role_assignments = get_role_assignments(
  270. auth_client=auth_client,
  271. resource_client=resource_client,
  272. identity_name=datalake_admin,
  273. subscription_id=sub_id,
  274. resource_group=rg_name,
  275. )
  276. proper_scope = get_storage_container_scope(
  277. sub_id, rg_name, storage_name, container_name
  278. )
  279. _, missing_data_actions = check_for_actions(
  280. auth_client=auth_client,
  281. role_assigments=role_assignments,
  282. proper_scope=proper_scope,
  283. required_actions=[],
  284. required_data_actions=azure_data_required_data_actions,
  285. )
  286. if missing_data_actions:
  287. fail(
  288. AZURE_IDENTITY_MISSING_DATA_ACTIONS_FOR_LOCATION,
  289. subjects=[
  290. datalake_admin,
  291. f"storageAccounts/{storage_name}/blobServices/default/containers/{container_name}", # noqa: E501
  292. ],
  293. resources=missing_data_actions,
  294. )
  295. @pytest.mark.azure
  296. @pytest.mark.infra
  297. def azure_dladmin_actions_for_backup_storage_validation(
  298. config: Dict[str, Any],
  299. auth_client: AuthorizationManagementClient,
  300. resource_client: ResourceManagementClient,
  301. azure_data_required_actions,
  302. ) -> None: # pragma: no cover
  303. """Datalake Admin Identity has required Actions on backup storage location.""" # noqa: D401,E501
  304. _azure_dladmin_backup_storage_actions_check(
  305. config=config,
  306. auth_client=auth_client,
  307. resource_client=resource_client,
  308. azure_data_required_actions=azure_data_required_actions,
  309. )
  310. def _azure_dladmin_backup_storage_actions_check(
  311. config: Dict[str, Any],
  312. auth_client: AuthorizationManagementClient,
  313. resource_client: ResourceManagementClient,
  314. azure_data_required_actions: List[str],
  315. ) -> None: # pragma: no cover
  316. # noqa: D401,E501
  317. sub_id: str = get_config_value(config=config, key="infra:azure:subscription_id")
  318. rg_name: str = get_config_value(config=config, key="infra:azure:metagroup:name")
  319. storage_name: str = get_config_value(config=config, key="env:azure:storage:name")
  320. backup_path: str = get_config_value(
  321. config=config, key="env:azure:storage:path:backup"
  322. )
  323. datalake_admin: str = get_config_value(
  324. config=config, key="env:azure:role:name:datalake_admin"
  325. )
  326. parsed_logger_path = parse_adls_path(backup_path)
  327. container_name = parsed_logger_path[1]
  328. role_assignments = get_role_assignments(
  329. auth_client=auth_client,
  330. resource_client=resource_client,
  331. identity_name=datalake_admin,
  332. subscription_id=sub_id,
  333. resource_group=rg_name,
  334. )
  335. proper_scope = get_storage_container_scope(
  336. sub_id, rg_name, storage_name, container_name
  337. )
  338. missing_actions, _ = check_for_actions(
  339. auth_client=auth_client,
  340. role_assigments=role_assignments,
  341. proper_scope=proper_scope,
  342. required_actions=azure_data_required_actions,
  343. required_data_actions=[],
  344. )
  345. if missing_actions:
  346. fail(
  347. AZURE_IDENTITY_MISSING_ACTIONS_FOR_LOCATION,
  348. subjects=[
  349. datalake_admin,
  350. f"storageAccounts/{storage_name}/blobServices/default/containers/{container_name}", # noqa: E501
  351. ],
  352. resources=missing_actions,
  353. )
  354. @pytest.mark.azure
  355. @pytest.mark.infra
  356. def azure_dladmin_data_actions_for_backup_storage_validation(
  357. config: Dict[str, Any],
  358. auth_client: AuthorizationManagementClient,
  359. resource_client: ResourceManagementClient,
  360. azure_data_required_data_actions,
  361. ) -> None: # pragma: no cover
  362. """Datalake Admin Identity has required Data Actions on backup storage location.""" # noqa: D401,E501
  363. _azure_dladmin_backup_storage_data_actions_check(
  364. config=config,
  365. auth_client=auth_client,
  366. resource_client=resource_client,
  367. azure_data_required_data_actions=azure_data_required_data_actions,
  368. )
  369. def _azure_dladmin_backup_storage_data_actions_check(
  370. config: Dict[str, Any],
  371. auth_client: AuthorizationManagementClient,
  372. resource_client: ResourceManagementClient,
  373. azure_data_required_data_actions: List[str],
  374. ) -> None: # pragma: no cover
  375. # noqa: D401,E501
  376. sub_id: str = get_config_value(config=config, key="infra:azure:subscription_id")
  377. rg_name: str = get_config_value(config=config, key="infra:azure:metagroup:name")
  378. storage_name: str = get_config_value(config=config, key="env:azure:storage:name")
  379. backup_path: str = get_config_value(
  380. config=config, key="env:azure:storage:path:backup"
  381. )
  382. datalake_admin: str = get_config_value(
  383. config=config, key="env:azure:role:name:datalake_admin"
  384. )
  385. parsed_logger_path = parse_adls_path(backup_path)
  386. container_name = parsed_logger_path[1]
  387. role_assignments = get_role_assignments(
  388. auth_client=auth_client,
  389. resource_client=resource_client,
  390. identity_name=datalake_admin,
  391. subscription_id=sub_id,
  392. resource_group=rg_name,
  393. )
  394. proper_scope = get_storage_container_scope(
  395. sub_id, rg_name, storage_name, container_name
  396. )
  397. _, missing_data_actions = check_for_actions(
  398. auth_client=auth_client,
  399. role_assigments=role_assignments,
  400. proper_scope=proper_scope,
  401. required_actions=[],
  402. required_data_actions=azure_data_required_data_actions,
  403. )
  404. if missing_data_actions:
  405. fail(
  406. AZURE_IDENTITY_MISSING_DATA_ACTIONS_FOR_LOCATION,
  407. subjects=[
  408. datalake_admin,
  409. f"storageAccounts/{storage_name}/blobServices/default/containers/{container_name}", # noqa: E501
  410. ],
  411. resources=missing_data_actions,
  412. )