world_bank.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. """Loads datasets, dashboards and slices in a new superset instance"""
  18. import json
  19. import os
  20. import textwrap
  21. import pandas as pd
  22. from sqlalchemy import DateTime, String
  23. from sqlalchemy.sql import column
  24. from superset import db
  25. from superset.connectors.sqla.models import SqlMetric
  26. from superset.models.dashboard import Dashboard
  27. from superset.models.slice import Slice
  28. from superset.utils import core as utils
  29. from .helpers import (
  30. config,
  31. EXAMPLES_FOLDER,
  32. get_example_data,
  33. get_slice_json,
  34. merge_slice,
  35. misc_dash_slices,
  36. TBL,
  37. update_slice_ids,
  38. )
  39. def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
  40. only_metadata: bool = False, force: bool = False
  41. ) -> None:
  42. """Loads the world bank health dataset, slices and a dashboard"""
  43. tbl_name = "wb_health_population"
  44. database = utils.get_example_database()
  45. table_exists = database.has_table_by_name(tbl_name)
  46. if not only_metadata and (not table_exists or force):
  47. data = get_example_data("countries.json.gz")
  48. pdf = pd.read_json(data)
  49. pdf.columns = [col.replace(".", "_") for col in pdf.columns]
  50. pdf.year = pd.to_datetime(pdf.year)
  51. pdf.to_sql(
  52. tbl_name,
  53. database.get_sqla_engine(),
  54. if_exists="replace",
  55. chunksize=50,
  56. dtype={
  57. "year": DateTime(),
  58. "country_code": String(3),
  59. "country_name": String(255),
  60. "region": String(255),
  61. },
  62. index=False,
  63. )
  64. print("Creating table [wb_health_population] reference")
  65. tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
  66. if not tbl:
  67. tbl = TBL(table_name=tbl_name)
  68. tbl.description = utils.readfile(os.path.join(EXAMPLES_FOLDER, "countries.md"))
  69. tbl.main_dttm_col = "year"
  70. tbl.database = database
  71. tbl.filter_select_enabled = True
  72. metrics = [
  73. "sum__SP_POP_TOTL",
  74. "sum__SH_DYN_AIDS",
  75. "sum__SH_DYN_AIDS",
  76. "sum__SP_RUR_TOTL_ZS",
  77. "sum__SP_DYN_LE00_IN",
  78. "sum__SP_RUR_TOTL",
  79. ]
  80. for metric in metrics:
  81. if not any(col.metric_name == metric for col in tbl.metrics):
  82. aggr_func = metric[:3]
  83. col = str(column(metric[5:]).compile(db.engine))
  84. tbl.metrics.append(
  85. SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})")
  86. )
  87. db.session.merge(tbl)
  88. db.session.commit()
  89. tbl.fetch_metadata()
  90. metric = "sum__SP_POP_TOTL"
  91. metrics = ["sum__SP_POP_TOTL"]
  92. secondary_metric = {
  93. "aggregate": "SUM",
  94. "column": {
  95. "column_name": "SP_RUR_TOTL",
  96. "optionName": "_col_SP_RUR_TOTL",
  97. "type": "DOUBLE",
  98. },
  99. "expressionType": "SIMPLE",
  100. "hasCustomLabel": True,
  101. "label": "Rural Population",
  102. }
  103. defaults = {
  104. "compare_lag": "10",
  105. "compare_suffix": "o10Y",
  106. "limit": "25",
  107. "granularity_sqla": "year",
  108. "groupby": [],
  109. "row_limit": config["ROW_LIMIT"],
  110. "since": "2014-01-01",
  111. "until": "2014-01-02",
  112. "time_range": "2014-01-01 : 2014-01-02",
  113. "markup_type": "markdown",
  114. "country_fieldtype": "cca3",
  115. "entity": "country_code",
  116. "show_bubbles": True,
  117. }
  118. print("Creating slices")
  119. slices = [
  120. Slice(
  121. slice_name="Region Filter",
  122. viz_type="filter_box",
  123. datasource_type="table",
  124. datasource_id=tbl.id,
  125. params=get_slice_json(
  126. defaults,
  127. viz_type="filter_box",
  128. date_filter=False,
  129. filter_configs=[
  130. {
  131. "asc": False,
  132. "clearable": True,
  133. "column": "region",
  134. "key": "2s98dfu",
  135. "metric": "sum__SP_POP_TOTL",
  136. "multiple": True,
  137. },
  138. {
  139. "asc": False,
  140. "clearable": True,
  141. "key": "li3j2lk",
  142. "column": "country_name",
  143. "metric": "sum__SP_POP_TOTL",
  144. "multiple": True,
  145. },
  146. ],
  147. ),
  148. ),
  149. Slice(
  150. slice_name="World's Population",
  151. viz_type="big_number",
  152. datasource_type="table",
  153. datasource_id=tbl.id,
  154. params=get_slice_json(
  155. defaults,
  156. since="2000",
  157. viz_type="big_number",
  158. compare_lag="10",
  159. metric="sum__SP_POP_TOTL",
  160. compare_suffix="over 10Y",
  161. ),
  162. ),
  163. Slice(
  164. slice_name="Most Populated Countries",
  165. viz_type="table",
  166. datasource_type="table",
  167. datasource_id=tbl.id,
  168. params=get_slice_json(
  169. defaults,
  170. viz_type="table",
  171. metrics=["sum__SP_POP_TOTL"],
  172. groupby=["country_name"],
  173. ),
  174. ),
  175. Slice(
  176. slice_name="Growth Rate",
  177. viz_type="line",
  178. datasource_type="table",
  179. datasource_id=tbl.id,
  180. params=get_slice_json(
  181. defaults,
  182. viz_type="line",
  183. since="1960-01-01",
  184. metrics=["sum__SP_POP_TOTL"],
  185. num_period_compare="10",
  186. groupby=["country_name"],
  187. ),
  188. ),
  189. Slice(
  190. slice_name="% Rural",
  191. viz_type="world_map",
  192. datasource_type="table",
  193. datasource_id=tbl.id,
  194. params=get_slice_json(
  195. defaults,
  196. viz_type="world_map",
  197. metric="sum__SP_RUR_TOTL_ZS",
  198. num_period_compare="10",
  199. secondary_metric=secondary_metric,
  200. ),
  201. ),
  202. Slice(
  203. slice_name="Life Expectancy VS Rural %",
  204. viz_type="bubble",
  205. datasource_type="table",
  206. datasource_id=tbl.id,
  207. params=get_slice_json(
  208. defaults,
  209. viz_type="bubble",
  210. since="2011-01-01",
  211. until="2011-01-02",
  212. series="region",
  213. limit=0,
  214. entity="country_name",
  215. x="sum__SP_RUR_TOTL_ZS",
  216. y="sum__SP_DYN_LE00_IN",
  217. size="sum__SP_POP_TOTL",
  218. max_bubble_size="50",
  219. adhoc_filters=[
  220. {
  221. "clause": "WHERE",
  222. "expressionType": "SIMPLE",
  223. "filterOptionName": "2745eae5",
  224. "comparator": [
  225. "TCA",
  226. "MNP",
  227. "DMA",
  228. "MHL",
  229. "MCO",
  230. "SXM",
  231. "CYM",
  232. "TUV",
  233. "IMY",
  234. "KNA",
  235. "ASM",
  236. "ADO",
  237. "AMA",
  238. "PLW",
  239. ],
  240. "operator": "NOT IN",
  241. "subject": "country_code",
  242. }
  243. ],
  244. ),
  245. ),
  246. Slice(
  247. slice_name="Rural Breakdown",
  248. viz_type="sunburst",
  249. datasource_type="table",
  250. datasource_id=tbl.id,
  251. params=get_slice_json(
  252. defaults,
  253. viz_type="sunburst",
  254. groupby=["region", "country_name"],
  255. since="2011-01-01",
  256. until="2011-01-01",
  257. metric=metric,
  258. secondary_metric=secondary_metric,
  259. ),
  260. ),
  261. Slice(
  262. slice_name="World's Pop Growth",
  263. viz_type="area",
  264. datasource_type="table",
  265. datasource_id=tbl.id,
  266. params=get_slice_json(
  267. defaults,
  268. since="1960-01-01",
  269. until="now",
  270. viz_type="area",
  271. groupby=["region"],
  272. metrics=metrics,
  273. ),
  274. ),
  275. Slice(
  276. slice_name="Box plot",
  277. viz_type="box_plot",
  278. datasource_type="table",
  279. datasource_id=tbl.id,
  280. params=get_slice_json(
  281. defaults,
  282. since="1960-01-01",
  283. until="now",
  284. whisker_options="Min/max (no outliers)",
  285. x_ticks_layout="staggered",
  286. viz_type="box_plot",
  287. groupby=["region"],
  288. metrics=metrics,
  289. ),
  290. ),
  291. Slice(
  292. slice_name="Treemap",
  293. viz_type="treemap",
  294. datasource_type="table",
  295. datasource_id=tbl.id,
  296. params=get_slice_json(
  297. defaults,
  298. since="1960-01-01",
  299. until="now",
  300. viz_type="treemap",
  301. metrics=["sum__SP_POP_TOTL"],
  302. groupby=["region", "country_code"],
  303. ),
  304. ),
  305. Slice(
  306. slice_name="Parallel Coordinates",
  307. viz_type="para",
  308. datasource_type="table",
  309. datasource_id=tbl.id,
  310. params=get_slice_json(
  311. defaults,
  312. since="2011-01-01",
  313. until="2011-01-01",
  314. viz_type="para",
  315. limit=100,
  316. metrics=["sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS"],
  317. secondary_metric="sum__SP_POP_TOTL",
  318. series="country_name",
  319. ),
  320. ),
  321. ]
  322. misc_dash_slices.add(slices[-1].slice_name)
  323. for slc in slices:
  324. merge_slice(slc)
  325. print("Creating a World's Health Bank dashboard")
  326. dash_name = "World Bank's Data"
  327. slug = "world_health"
  328. dash = db.session.query(Dashboard).filter_by(slug=slug).first()
  329. if not dash:
  330. dash = Dashboard()
  331. dash.published = True
  332. js = textwrap.dedent(
  333. """\
  334. {
  335. "CHART-36bfc934": {
  336. "children": [],
  337. "id": "CHART-36bfc934",
  338. "meta": {
  339. "chartId": 40,
  340. "height": 25,
  341. "sliceName": "Region Filter",
  342. "width": 2
  343. },
  344. "type": "CHART"
  345. },
  346. "CHART-37982887": {
  347. "children": [],
  348. "id": "CHART-37982887",
  349. "meta": {
  350. "chartId": 41,
  351. "height": 25,
  352. "sliceName": "World's Population",
  353. "width": 2
  354. },
  355. "type": "CHART"
  356. },
  357. "CHART-17e0f8d8": {
  358. "children": [],
  359. "id": "CHART-17e0f8d8",
  360. "meta": {
  361. "chartId": 42,
  362. "height": 92,
  363. "sliceName": "Most Populated Countries",
  364. "width": 3
  365. },
  366. "type": "CHART"
  367. },
  368. "CHART-2ee52f30": {
  369. "children": [],
  370. "id": "CHART-2ee52f30",
  371. "meta": {
  372. "chartId": 43,
  373. "height": 38,
  374. "sliceName": "Growth Rate",
  375. "width": 6
  376. },
  377. "type": "CHART"
  378. },
  379. "CHART-2d5b6871": {
  380. "children": [],
  381. "id": "CHART-2d5b6871",
  382. "meta": {
  383. "chartId": 44,
  384. "height": 52,
  385. "sliceName": "% Rural",
  386. "width": 7
  387. },
  388. "type": "CHART"
  389. },
  390. "CHART-0fd0d252": {
  391. "children": [],
  392. "id": "CHART-0fd0d252",
  393. "meta": {
  394. "chartId": 45,
  395. "height": 50,
  396. "sliceName": "Life Expectancy VS Rural %",
  397. "width": 8
  398. },
  399. "type": "CHART"
  400. },
  401. "CHART-97f4cb48": {
  402. "children": [],
  403. "id": "CHART-97f4cb48",
  404. "meta": {
  405. "chartId": 46,
  406. "height": 38,
  407. "sliceName": "Rural Breakdown",
  408. "width": 3
  409. },
  410. "type": "CHART"
  411. },
  412. "CHART-b5e05d6f": {
  413. "children": [],
  414. "id": "CHART-b5e05d6f",
  415. "meta": {
  416. "chartId": 47,
  417. "height": 50,
  418. "sliceName": "World's Pop Growth",
  419. "width": 4
  420. },
  421. "type": "CHART"
  422. },
  423. "CHART-e76e9f5f": {
  424. "children": [],
  425. "id": "CHART-e76e9f5f",
  426. "meta": {
  427. "chartId": 48,
  428. "height": 50,
  429. "sliceName": "Box plot",
  430. "width": 4
  431. },
  432. "type": "CHART"
  433. },
  434. "CHART-a4808bba": {
  435. "children": [],
  436. "id": "CHART-a4808bba",
  437. "meta": {
  438. "chartId": 49,
  439. "height": 50,
  440. "sliceName": "Treemap",
  441. "width": 8
  442. },
  443. "type": "CHART"
  444. },
  445. "COLUMN-071bbbad": {
  446. "children": [
  447. "ROW-1e064e3c",
  448. "ROW-afdefba9"
  449. ],
  450. "id": "COLUMN-071bbbad",
  451. "meta": {
  452. "background": "BACKGROUND_TRANSPARENT",
  453. "width": 9
  454. },
  455. "type": "COLUMN"
  456. },
  457. "COLUMN-fe3914b8": {
  458. "children": [
  459. "CHART-36bfc934",
  460. "CHART-37982887"
  461. ],
  462. "id": "COLUMN-fe3914b8",
  463. "meta": {
  464. "background": "BACKGROUND_TRANSPARENT",
  465. "width": 2
  466. },
  467. "type": "COLUMN"
  468. },
  469. "GRID_ID": {
  470. "children": [
  471. "ROW-46632bc2",
  472. "ROW-3fa26c5d",
  473. "ROW-812b3f13"
  474. ],
  475. "id": "GRID_ID",
  476. "type": "GRID"
  477. },
  478. "HEADER_ID": {
  479. "id": "HEADER_ID",
  480. "meta": {
  481. "text": "World's Bank Data"
  482. },
  483. "type": "HEADER"
  484. },
  485. "ROOT_ID": {
  486. "children": [
  487. "GRID_ID"
  488. ],
  489. "id": "ROOT_ID",
  490. "type": "ROOT"
  491. },
  492. "ROW-1e064e3c": {
  493. "children": [
  494. "COLUMN-fe3914b8",
  495. "CHART-2d5b6871"
  496. ],
  497. "id": "ROW-1e064e3c",
  498. "meta": {
  499. "background": "BACKGROUND_TRANSPARENT"
  500. },
  501. "type": "ROW"
  502. },
  503. "ROW-3fa26c5d": {
  504. "children": [
  505. "CHART-b5e05d6f",
  506. "CHART-0fd0d252"
  507. ],
  508. "id": "ROW-3fa26c5d",
  509. "meta": {
  510. "background": "BACKGROUND_TRANSPARENT"
  511. },
  512. "type": "ROW"
  513. },
  514. "ROW-46632bc2": {
  515. "children": [
  516. "COLUMN-071bbbad",
  517. "CHART-17e0f8d8"
  518. ],
  519. "id": "ROW-46632bc2",
  520. "meta": {
  521. "background": "BACKGROUND_TRANSPARENT"
  522. },
  523. "type": "ROW"
  524. },
  525. "ROW-812b3f13": {
  526. "children": [
  527. "CHART-a4808bba",
  528. "CHART-e76e9f5f"
  529. ],
  530. "id": "ROW-812b3f13",
  531. "meta": {
  532. "background": "BACKGROUND_TRANSPARENT"
  533. },
  534. "type": "ROW"
  535. },
  536. "ROW-afdefba9": {
  537. "children": [
  538. "CHART-2ee52f30",
  539. "CHART-97f4cb48"
  540. ],
  541. "id": "ROW-afdefba9",
  542. "meta": {
  543. "background": "BACKGROUND_TRANSPARENT"
  544. },
  545. "type": "ROW"
  546. },
  547. "DASHBOARD_VERSION_KEY": "v2"
  548. }
  549. """
  550. )
  551. pos = json.loads(js)
  552. update_slice_ids(pos, slices)
  553. dash.dashboard_title = dash_name
  554. dash.position_json = json.dumps(pos, indent=4)
  555. dash.slug = slug
  556. dash.slices = slices[:-1]
  557. db.session.merge(dash)
  558. db.session.commit()