test_execute_query.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. import os
  2. import re
  3. from types import SimpleNamespace
  4. import pytest
  5. from mock import patch
  6. from backend.lambdas.tasks.execute_query import handler, make_query, escape_item
  7. pytestmark = [pytest.mark.unit, pytest.mark.task]
  8. def escape_resp(resp):
  9. return re.sub("[\x00-\x20]+", " ", resp.strip())
  10. @patch("backend.lambdas.tasks.execute_query.client")
  11. @patch("backend.lambdas.tasks.execute_query.make_query")
  12. def test_it_executes_queries(query_mock, client_mock):
  13. client_mock.start_query_execution.return_value = {"QueryExecutionId": "123"}
  14. query_mock.return_value = "test"
  15. resp = handler(
  16. {"QueryData": {}, "Bucket": "mybucket", "Prefix": "my_prefix"},
  17. SimpleNamespace(),
  18. )
  19. assert "123" == resp
  20. client_mock.start_query_execution.assert_called_with(
  21. QueryString="test",
  22. ResultConfiguration={"OutputLocation": "s3://mybucket/my_prefix/"},
  23. WorkGroup="primary",
  24. )
  25. @patch("backend.lambdas.tasks.execute_query.client")
  26. @patch("backend.lambdas.tasks.execute_query.make_query")
  27. def test_it_permits_custom_workgroups(query_mock, client_mock):
  28. client_mock.start_query_execution.return_value = {"QueryExecutionId": "123"}
  29. query_mock.return_value = "test"
  30. with patch.dict(os.environ, {"WorkGroup": "custom"}):
  31. resp = handler(
  32. {"QueryData": {}, "Bucket": "mybucket", "Prefix": "my_prefix"},
  33. SimpleNamespace(),
  34. )
  35. assert "123" == resp
  36. client_mock.start_query_execution.assert_called_with(
  37. QueryString="test",
  38. ResultConfiguration={"OutputLocation": "s3://mybucket/my_prefix/"},
  39. WorkGroup="custom",
  40. )
  41. def test_it_generates_query_with_partition():
  42. resp = make_query(
  43. {
  44. "Database": "amazonreviews",
  45. "Table": "amazon_reviews_parquet",
  46. "Columns": [
  47. {
  48. "Column": "customer_id",
  49. "Type": "Simple",
  50. }
  51. ],
  52. "PartitionKeys": [{"Key": "product_category", "Value": "Books"}],
  53. "DataMapperId": "dm_1234",
  54. "JobId": "job_1234567890",
  55. }
  56. )
  57. assert escape_resp(resp) == escape_resp(
  58. """
  59. SELECT DISTINCT "$path" FROM (
  60. SELECT t."$path"
  61. FROM "amazonreviews"."amazon_reviews_parquet" t,
  62. "s3f2_manifests_database"."s3f2_manifests_table" m
  63. WHERE
  64. m."jobid"='job_1234567890' AND
  65. m."datamapperid"='dm_1234' AND
  66. cast(t."customer_id" as varchar)=m."queryablematchid" AND m."queryablecolumns"='customer_id'
  67. AND "product_category" = 'Books'
  68. )
  69. """
  70. )
  71. def test_it_generates_query_with_int_partition():
  72. resp = make_query(
  73. {
  74. "Database": "amazonreviews",
  75. "Table": "amazon_reviews_parquet",
  76. "Columns": [
  77. {
  78. "Column": "customer_id",
  79. "Type": "Simple",
  80. }
  81. ],
  82. "PartitionKeys": [{"Key": "year", "Value": 2010}],
  83. "DataMapperId": "dm_1234",
  84. "JobId": "job_1234567890",
  85. }
  86. )
  87. assert escape_resp(resp) == escape_resp(
  88. """
  89. SELECT DISTINCT "$path" FROM (
  90. SELECT t."$path"
  91. FROM "amazonreviews"."amazon_reviews_parquet" t,
  92. "s3f2_manifests_database"."s3f2_manifests_table" m
  93. WHERE
  94. m."jobid"='job_1234567890' AND
  95. m."datamapperid"='dm_1234' AND
  96. cast(t."customer_id" as varchar)=m."queryablematchid" AND m."queryablecolumns"='customer_id'
  97. AND "year" = 2010
  98. )
  99. """
  100. )
  101. def test_it_generates_query_with_multiple_partitions():
  102. resp = make_query(
  103. {
  104. "Database": "amazonreviews",
  105. "Table": "amazon_reviews_parquet",
  106. "Columns": [
  107. {
  108. "Column": "customer_id",
  109. "Type": "Simple",
  110. }
  111. ],
  112. "PartitionKeys": [
  113. {"Key": "product_category", "Value": "Books"},
  114. {"Key": "published", "Value": "2019"},
  115. ],
  116. "DataMapperId": "dm_1234",
  117. "JobId": "job_1234567890",
  118. }
  119. )
  120. assert escape_resp(resp) == escape_resp(
  121. """
  122. SELECT DISTINCT "$path" FROM (
  123. SELECT t."$path"
  124. FROM "amazonreviews"."amazon_reviews_parquet" t,
  125. "s3f2_manifests_database"."s3f2_manifests_table" m
  126. WHERE
  127. m."jobid"='job_1234567890' AND
  128. m."datamapperid"='dm_1234' AND
  129. cast(t."customer_id" as varchar)=m."queryablematchid" AND m."queryablecolumns"='customer_id'
  130. AND "product_category" = 'Books' AND "published" = '2019'
  131. )
  132. """
  133. )
  134. def test_it_generates_query_without_partition():
  135. resp = make_query(
  136. {
  137. "Database": "amazonreviews",
  138. "Table": "amazon_reviews_parquet",
  139. "Columns": [
  140. {
  141. "Column": "customer_id",
  142. "Type": "Simple",
  143. }
  144. ],
  145. "DataMapperId": "dm_1234",
  146. "JobId": "job_1234567890",
  147. }
  148. )
  149. assert escape_resp(resp) == escape_resp(
  150. """
  151. SELECT DISTINCT "$path" FROM (
  152. SELECT t."$path"
  153. FROM "amazonreviews"."amazon_reviews_parquet" t,
  154. "s3f2_manifests_database"."s3f2_manifests_table" m
  155. WHERE
  156. m."jobid"='job_1234567890' AND
  157. m."datamapperid"='dm_1234' AND
  158. cast(t."customer_id" as varchar)=m."queryablematchid" AND m."queryablecolumns"='customer_id'
  159. )
  160. """
  161. )
  162. def test_it_generates_query_with_multiple_columns():
  163. resp = make_query(
  164. {
  165. "Database": "amazonreviews",
  166. "Table": "amazon_reviews_parquet",
  167. "Columns": [
  168. {"Column": "a", "Type": "Simple"},
  169. {"Column": "b", "Type": "Simple"},
  170. ],
  171. "DataMapperId": "dm_1234",
  172. "JobId": "job_1234567890",
  173. }
  174. )
  175. assert escape_resp(resp) == escape_resp(
  176. """
  177. SELECT DISTINCT "$path" FROM (
  178. SELECT t."$path"
  179. FROM "amazonreviews"."amazon_reviews_parquet" t,
  180. "s3f2_manifests_database"."s3f2_manifests_table" m
  181. WHERE
  182. m."jobid"='job_1234567890' AND
  183. m."datamapperid"='dm_1234' AND
  184. cast(t."a" as varchar)=m."queryablematchid" AND m."queryablecolumns"='a'
  185. UNION ALL
  186. SELECT t."$path"
  187. FROM "amazonreviews"."amazon_reviews_parquet" t,
  188. "s3f2_manifests_database"."s3f2_manifests_table" m
  189. WHERE
  190. m."jobid"='job_1234567890' AND
  191. m."datamapperid"='dm_1234' AND
  192. cast(t."b" as varchar)=m."queryablematchid" AND m."queryablecolumns"='b'
  193. )
  194. """
  195. )
  196. def test_it_generates_query_with_columns_of_complex_type():
  197. resp = make_query(
  198. {
  199. "Database": "amazonreviews",
  200. "Table": "amazon_reviews_parquet",
  201. "Columns": [{"Column": "a.b.c", "Type": "Simple"}],
  202. "DataMapperId": "dm_1234",
  203. "JobId": "job_1234567890",
  204. }
  205. )
  206. assert escape_resp(resp) == escape_resp(
  207. """
  208. SELECT DISTINCT "$path" FROM (
  209. SELECT t."$path"
  210. FROM "amazonreviews"."amazon_reviews_parquet" t,
  211. "s3f2_manifests_database"."s3f2_manifests_table" m
  212. WHERE
  213. m."jobid"='job_1234567890' AND
  214. m."datamapperid"='dm_1234' AND
  215. cast(t."a"."b"."c" as varchar)=m."queryablematchid" AND m."queryablecolumns"='a.b.c'
  216. )
  217. """
  218. )
  219. def test_it_generates_query_with_composite_matches():
  220. resp = make_query(
  221. {
  222. "Database": "amazonreviews",
  223. "Table": "amazon_reviews_parquet",
  224. "Columns": [
  225. {
  226. "Columns": ["user.first_name", "user.last_name"],
  227. "Type": "Composite",
  228. },
  229. {
  230. "Columns": ["user.age", "user.last_name"],
  231. "Type": "Composite",
  232. },
  233. {
  234. "Columns": ["user.userid"],
  235. "Type": "Composite",
  236. },
  237. ],
  238. "DataMapperId": "dm_1234",
  239. "JobId": "job_1234567890",
  240. }
  241. )
  242. assert escape_resp(resp) == escape_resp(
  243. """
  244. SELECT DISTINCT "$path" FROM (
  245. SELECT t."$path"
  246. FROM "amazonreviews"."amazon_reviews_parquet" t,
  247. "s3f2_manifests_database"."s3f2_manifests_table" m
  248. WHERE
  249. m."jobid"='job_1234567890' AND
  250. m."datamapperid"='dm_1234' AND
  251. concat(t."user"."first_name", '_S3F2COMP_', t."user"."last_name")=m."queryablematchid" AND
  252. m."queryablecolumns"='user.first_name_S3F2COMP_user.last_name'
  253. UNION ALL
  254. SELECT t."$path"
  255. FROM "amazonreviews"."amazon_reviews_parquet" t,
  256. "s3f2_manifests_database"."s3f2_manifests_table" m
  257. WHERE
  258. m."jobid"='job_1234567890' AND
  259. m."datamapperid"='dm_1234' AND
  260. concat(t."user"."age", '_S3F2COMP_', t."user"."last_name")=m."queryablematchid" AND
  261. m."queryablecolumns"='user.age_S3F2COMP_user.last_name'
  262. UNION ALL
  263. SELECT t."$path"
  264. FROM "amazonreviews"."amazon_reviews_parquet" t,
  265. "s3f2_manifests_database"."s3f2_manifests_table" m
  266. WHERE
  267. m."jobid"='job_1234567890' AND
  268. m."datamapperid"='dm_1234' AND
  269. cast(t."user"."userid" as varchar)=m."queryablematchid" AND m."queryablecolumns"='user.userid'
  270. )
  271. """
  272. )
  273. def test_it_generates_query_with_simple_and_composite_matches():
  274. resp = make_query(
  275. {
  276. "Database": "amazonreviews",
  277. "Table": "amazon_reviews_parquet",
  278. "Columns": [
  279. {
  280. "Column": "a.b.c",
  281. "Type": "Simple",
  282. },
  283. {
  284. "Columns": ["user.first_name", "user.last_name"],
  285. "Type": "Composite",
  286. },
  287. ],
  288. "DataMapperId": "dm_1234",
  289. "JobId": "job_1234567890",
  290. },
  291. )
  292. assert escape_resp(resp) == escape_resp(
  293. """
  294. SELECT DISTINCT "$path" FROM (
  295. SELECT t."$path"
  296. FROM "amazonreviews"."amazon_reviews_parquet" t,
  297. "s3f2_manifests_database"."s3f2_manifests_table" m
  298. WHERE
  299. m."jobid"='job_1234567890' AND
  300. m."datamapperid"='dm_1234' AND
  301. cast(t."a"."b"."c" as varchar)=m."queryablematchid" AND m."queryablecolumns"='a.b.c'
  302. UNION ALL
  303. SELECT t."$path"
  304. FROM "amazonreviews"."amazon_reviews_parquet" t,
  305. "s3f2_manifests_database"."s3f2_manifests_table" m
  306. WHERE
  307. m."jobid"='job_1234567890' AND
  308. m."datamapperid"='dm_1234' AND
  309. concat(t."user"."first_name", '_S3F2COMP_', t."user"."last_name")=m."queryablematchid" AND
  310. m."queryablecolumns"='user.first_name_S3F2COMP_user.last_name'
  311. )
  312. """
  313. )
  314. def test_it_escapes_strings():
  315. assert "''' OR 1=1'" == escape_item("' OR 1=1")
  316. def test_it_escapes_ints():
  317. assert 2 == escape_item(2)
  318. def test_it_escapes_floats():
  319. assert float(2) == escape_item(float(2))
  320. def test_it_escapes_none():
  321. assert "NULL" == escape_item(None)
  322. def test_it_raises_for_unsupported_type():
  323. with pytest.raises(ValueError):
  324. escape_item(["val"])