main.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. # Qlik Sense Data Retention Controller
  2. # Created by: Jesper Bagge, 2021
  3. # This application is licensed under MIT.
  4. import datetime
  5. import os
  6. import json
  7. import sys
  8. from datetime import datetime as dt
  9. from datetime import timedelta
  10. import argparse
  11. import csv
  12. from websocket import create_connection
  13. base_path = os.path.dirname(os.path.abspath(__file__))
  14. cert_path = os.path.join(base_path, 'certs')
  15. ssl = {
  16. 'certfile': os.path.join(cert_path, 'client.pem'),
  17. 'keyfile': os.path.join(cert_path, 'client_key.pem'),
  18. 'ca_certs': os.path.join(cert_path, 'root.pem')
  19. }
  20. def _connect(hostname):
  21. """Helper function to manage websocket connection"""
  22. url = f"wss://{hostname}:4747/app/"
  23. ws = create_connection(
  24. url=url,
  25. header={'X-Qlik-User': 'UserDirectory=internal; UserId=sa_engine'},
  26. sslopt={**ssl}
  27. )
  28. # consume connection info
  29. _ = ws.recv()
  30. return ws
  31. def _communicate(ws, message: dict) -> dict:
  32. """Sends a JSON-formatted python dict on the supplied websocket connection"""
  33. ws.send(json.dumps(message))
  34. return json.loads(ws.recv())
  35. def get_app_list(host: str):
  36. # get full document list
  37. websocket = _connect(host)
  38. msg = {
  39. "method": "GetDocList",
  40. "handle": -1, # global context
  41. "params": []
  42. }
  43. response = _communicate(websocket, msg)
  44. websocket.close()
  45. # parse and filter application list
  46. return response['result']['qDocList']
  47. def stale_apps(app_list: list, days_stale=180, min_mb=1, include_published=False) -> list:
  48. # set reload time threshold
  49. threshold = dt.utcnow() - timedelta(days=days_stale)
  50. apps = []
  51. for doc in app_list:
  52. # filter on include_published option
  53. if doc['qMeta']['published'] in (False, include_published):
  54. # parse reload time
  55. qLastReloadTime = doc.get('qLastReloadTime', None)
  56. if qLastReloadTime:
  57. reload_time = dt.strptime(qLastReloadTime, '%Y-%m-%dT%H:%M:%S.%fZ')
  58. else:
  59. reload_time = dt.fromtimestamp(0)
  60. # match threshold
  61. if reload_time < threshold:
  62. # parse size
  63. size = round(doc['qFileSize'] / 1024 / 1024, 2)
  64. # skip files that are below minimum size. probably already empty.
  65. if min_mb < size:
  66. # add stale doc to list
  67. apps.append({
  68. 'name': doc['qDocName'],
  69. 'id': doc['qDocId'],
  70. 'size_mb': round(doc['qFileSize'] / 1024 / 1024, 2),
  71. 'last_reload': reload_time
  72. })
  73. return apps
  74. def drop_data_from_app(host: str, doc_id: str):
  75. """Creates a websocket, opens an app without data, saves app to disk and closes websocket."""
  76. websocket = _connect(host)
  77. # open document without data
  78. msg = {
  79. 'handle': -1,
  80. 'method': 'OpenDoc',
  81. "params": {
  82. 'qDocName': doc_id,
  83. 'qNoData': True
  84. }
  85. }
  86. response = _communicate(websocket, msg)
  87. # save document
  88. if response['result']['qReturn']['qType'] == 'Doc':
  89. msg = {
  90. 'handle': response['result']['qReturn']['qHandle'],
  91. 'method': 'DoSave',
  92. 'params': {
  93. 'qFileName': ''
  94. }
  95. }
  96. # consume status from save
  97. response = _communicate(websocket, msg)
  98. # close the socket
  99. websocket.close()
  100. return response
  101. def write_stale_apps_to_csv(apps: list) -> None:
  102. """Creates a CSV file from list of stale apps"""
  103. ts = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S')
  104. filename = f"stale_apps_{ts}.csv"
  105. data = [
  106. {
  107. 'name': a['name'],
  108. 'id': a['id'],
  109. 'size_mb': a['size_mb'],
  110. 'last_reload': a['last_reload'].strftime('%Y-%m-%d %H:%M:%S') # can't dump datetime object to disk
  111. } for a in apps
  112. ]
  113. with open(os.path.join(base_path, filename), 'w+', encoding='utf-8') as f:
  114. writer = csv.DictWriter(f, ['name', 'id', 'size_mb', 'last_reload'], quoting=csv.QUOTE_NONNUMERIC)
  115. writer.writeheader()
  116. writer.writerows(data)
  117. return None
  118. if __name__ == '__main__':
  119. # create parser
  120. parser = argparse.ArgumentParser(
  121. prog='Qlik Sense Data Retention Controller',
  122. usage='%(prog)s [options]',
  123. description='Drops stale data from unpublished Qlik Sense apps.',
  124. epilog='Happy dumping!'
  125. )
  126. # parse args
  127. parser.add_argument('-host',
  128. action='store',
  129. required=True,
  130. type=str,
  131. help='URL to the Qlik Sense server.')
  132. parser.add_argument('-d',
  133. '--days',
  134. action='store',
  135. type=int,
  136. default=180,
  137. help='Days since reload threshold to consider an app stale')
  138. parser.add_argument('-mb',
  139. '--min',
  140. action='store',
  141. type=float,
  142. default=1.0,
  143. help='Minimum filesize (mb) to be considered. Default is 1.')
  144. parser.add_argument('-ip',
  145. '--include-published',
  146. action='store_true',
  147. help='Include published applications. Default is False.')
  148. parser.add_argument('-r',
  149. '--report',
  150. action='store_true',
  151. help='Reports list of found apps to disk. Default is False.')
  152. parser.add_argument('-t',
  153. '--truncate',
  154. action='store_true',
  155. help='Truncates data from found apps. Default is False.')
  156. args = parser.parse_args()
  157. servername = args.host
  158. # call server for complete app list
  159. applications = get_app_list(servername)
  160. # parse list and filter for stale applications
  161. print(f"Searching for apps that are more than {args.days} days old.")
  162. stale_applications = stale_apps(applications,
  163. days_stale=args.days,
  164. min_mb=args.min,
  165. include_published=args.include_published)
  166. tot_size = round(sum([i['size_mb'] for i in stale_applications]), 1)
  167. print(f"Found {len(stale_applications)} applications with a total of {tot_size} MB's of data.")
  168. print(f"Execute with --report to write list to CSV and execute with --truncate to clear the data from them.")
  169. # voluntarily dump a report of applications to disk
  170. if args.report:
  171. print(f"Writing apps to CSV.")
  172. write_stale_apps_to_csv(stale_applications)
  173. # if the truncate argument isn't supplied, the script will exit gracefully here
  174. if not args.truncate:
  175. sys.exit(0)
  176. else:
  177. # walk through list of stale apps, open them without data and save them
  178. doc_ids = [doc['id'] for doc in stale_applications]
  179. for i, _id in enumerate(doc_ids):
  180. print(f"Truncating data from app: {_id}. {i}/{len(doc_ids)}.")
  181. _ = drop_data_from_app(servername, _id)
  182. print("Done!")