3 years ago · ba16f626df
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+.DS_Store
			
 
				+*.pyc
			
 
				+__pycache__
			
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
 
				+# Default ignored files
			
 
				+/shelf/
			
 
				+/workspace.xml
			
--- a/.idea/GnnForPrivacyScan.iml
+++ b/.idea/GnnForPrivacyScan.iml
@@ -0,0 +1,10 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<module type="PYTHON_MODULE" version="4">
			
 
				+  <component name="NewModuleRootManager">
			
 
				+    <content url="file://$MODULE_DIR$">
			
 
				+      <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
			
 
				+    </content>
			
 
				+    <orderEntry type="inheritedJdk" />
			
 
				+    <orderEntry type="sourceFolder" forTests="false" />
			
 
				+  </component>
			
 
				+</module>
			
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,15 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <profile version="1.0">
			
 
				+    <option name="myName" value="Project Default" />
			
 
				+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
			
 
				+      <option name="ignoredPackages">
			
 
				+        <value>
			
 
				+          <list size="2">
			
 
				+            <item index="0" class="java.lang.String" itemvalue="requests" />
			
 
				+            <item index="1" class="java.lang.String" itemvalue="Flask" />
			
 
				+          </list>
			
 
				+        </value>
			
 
				+      </option>
			
 
				+    </inspection_tool>
			
 
				+  </profile>
			
 
				+</component>
			
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <settings>
			
 
				+    <option name="USE_PROJECT_PROFILE" value="false" />
			
 
				+    <version value="1.0" />
			
 
				+  </settings>
			
 
				+</component>
			
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (base)" project-jdk-type="Python SDK" />
			
 
				+  <component name="PyCharmProfessionalAdvertiser">
			
 
				+    <option name="shown" value="true" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectModuleManager">
			
 
				+    <modules>
			
 
				+      <module fileurl="file://$PROJECT_DIR$/.idea/GnnForPrivacyScan.iml" filepath="$PROJECT_DIR$/.idea/GnnForPrivacyScan.iml" />
			
 
				+    </modules>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,30 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="VcsDirectoryMappings">
			
 
				+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Azure/AzureStorage" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Azure/azure-multiapi-storage-python" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Azure/python-text-classification" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/BI/BusinessIntelligence-Kaggle" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Hash/Encryption_And_Hashing" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/ML-In-Action" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/Machine-Learining-Security" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/Machine-Learning" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/Machine_Learning_and_Having_It_Deep_and_Structured" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/NATS/NatsExample" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/NATS/asyncio-nats-examples" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Pseudonym/Data-Masking" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/S3/NatsExample" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/S3/odoo-s3-storage" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/S3/s3-concat" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/archive/auto-archiver" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/Calories-Alert-Kafka" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/MessageCorps" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/ai-project-fraud-detection" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/kafka-fraud-detector" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/kafkaesk" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/scrapy-kafka" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/tail2kafka" vcs="Git" />
			
 
				+    <mapping directory="$PROJECT_DIR$/data/purposeCombined/visualize/Visualization-of-popular-algorithms-in-Python" vcs="Git" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/data/.DS_Store
+++ b/data/.DS_Store
--- a/data/edge.txt
+++ b/data/edge.txt
@@ -0,0 +1,65 @@
 
				+name 1
			
 
				+asname 2
			
 
				+target 3
			
 
				+annotation 4
			
 
				+value 5
			
 
				+simple 6
			
 
				+arg 7
			
 
				+type_comment 8
			
 
				+posonlyargs 9
			
 
				+args 10
			
 
				+vararg 11
			
 
				+kwonlyargs 12
			
 
				+kw_defaults 13
			
 
				+kwarg 14
			
 
				+defaults 15
			
 
				+test 16
			
 
				+msg 17
			
 
				+targets 18
			
 
				+iter 19
			
 
				+body 20
			
 
				+orelse 21
			
 
				+decorator_list 22
			
 
				+returns 23
			
 
				+items 24
			
 
				+attr 25
			
 
				+ctx 26
			
 
				+op 27
			
 
				+left 28
			
 
				+right 29
			
 
				+values 30
			
 
				+func 31
			
 
				+keywords 32
			
 
				+bases 33
			
 
				+ops 34
			
 
				+comparators 35
			
 
				+ifs 36
			
 
				+is_async 37
			
 
				+kind 38
			
 
				+keys 39
			
 
				+key 40
			
 
				+generators 41
			
 
				+type 42
			
 
				+conversion 43
			
 
				+format_spec 44
			
 
				+argtypes 45
			
 
				+elt 46
			
 
				+names 47
			
 
				+module 48
			
 
				+level 49
			
 
				+elts 50
			
 
				+type_ignores 51
			
 
				+id 52
			
 
				+exc 53
			
 
				+cause 54
			
 
				+lower 55
			
 
				+upper 56
			
 
				+step 57
			
 
				+slice 58
			
 
				+handlers 59
			
 
				+finalbody 60
			
 
				+lineno 61
			
 
				+tag 62
			
 
				+operand 63
			
 
				+context_expr 64
			
 
				+optional_vars 65
			
--- a/data/node.txt
+++ b/data/node.txt
@@ -0,0 +1,108 @@
 
				+AST 1
			
 
				+operator 2
			
 
				+Add 3
			
 
				+alias 4
			
 
				+boolop 5
			
 
				+And 6
			
 
				+stmt 7
			
 
				+AnnAssign 8
			
 
				+arg 9
			
 
				+arguments 10
			
 
				+Assert 11
			
 
				+Assign 12
			
 
				+AsyncFor 13
			
 
				+AsyncFunctionDef 14
			
 
				+AsyncWith 15
			
 
				+expr 16
			
 
				+Attribute 17
			
 
				+AugAssign 18
			
 
				+Await 19
			
 
				+BinOp 20
			
 
				+BitAnd 21
			
 
				+BitOr 22
			
 
				+BitXor 23
			
 
				+BoolOp 24
			
 
				+Break 25
			
 
				+Call 26
			
 
				+ClassDef 27
			
 
				+cmpop 28
			
 
				+Compare 29
			
 
				+comprehension 30
			
 
				+Constant 31
			
 
				+Continue 32
			
 
				+expr_context 33
			
 
				+Del 34
			
 
				+Delete 35
			
 
				+Dict 36
			
 
				+DictComp 37
			
 
				+Div 38
			
 
				+Eq 39
			
 
				+excepthandler 40
			
 
				+ExceptHandler 41
			
 
				+Expr 42
			
 
				+mod 43
			
 
				+Expression 44
			
 
				+FloorDiv 45
			
 
				+For 46
			
 
				+FormattedValue 47
			
 
				+FunctionDef 48
			
 
				+FunctionType 49
			
 
				+GeneratorExp 50
			
 
				+Global 51
			
 
				+Gt 52
			
 
				+GtE 53
			
 
				+If 54
			
 
				+IfExp 55
			
 
				+Import 56
			
 
				+ImportFrom 57
			
 
				+In 58
			
 
				+Interactive 59
			
 
				+unaryop 60
			
 
				+Invert 61
			
 
				+Is 62
			
 
				+IsNot 63
			
 
				+JoinedStr 64
			
 
				+keyword 65
			
 
				+Lambda 66
			
 
				+List 67
			
 
				+ListComp 68
			
 
				+Load 69
			
 
				+LShift 70
			
 
				+Lt 71
			
 
				+LtE 72
			
 
				+MatMult 73
			
 
				+Mod 74
			
 
				+Module 75
			
 
				+Mult 76
			
 
				+Name 77
			
 
				+NamedExpr 78
			
 
				+Nonlocal 79
			
 
				+Not 80
			
 
				+NotEq 81
			
 
				+NotIn 82
			
 
				+Or 83
			
 
				+Pass 84
			
 
				+Pow 85
			
 
				+Raise 86
			
 
				+Return 87
			
 
				+RShift 88
			
 
				+Set 89
			
 
				+SetComp 90
			
 
				+Slice 91
			
 
				+Starred 92
			
 
				+Store 93
			
 
				+Sub 94
			
 
				+Subscript 95
			
 
				+Try 96
			
 
				+Tuple 97
			
 
				+type_ignore 98
			
 
				+TypeIgnore 99
			
 
				+UAdd 100
			
 
				+UnaryOp 101
			
 
				+USub 102
			
 
				+While 103
			
 
				+With 104
			
 
				+withitem 105
			
 
				+Yield 106
			
 
				+YieldFrom 107
			
 
				+__loader__ 108
			
--- a/data/purposeCombined/.DS_Store
+++ b/data/purposeCombined/.DS_Store
--- a/data/purposeCombined/Azure/.vscode/settings.json
+++ b/data/purposeCombined/Azure/.vscode/settings.json
@@ -0,0 +1,3 @@
 
				+{
			
 
				+    "python.formatting.provider": "black"
			
 
				+}
			
--- a/data/purposeCombined/Azure/AddUp/Azure-blob-storage.py
+++ b/data/purposeCombined/Azure/AddUp/Azure-blob-storage.py
@@ -0,0 +1,82 @@
 
				+from azure.storage.blob import BlobClient, BlobServiceClient
			
 
				+import os
			
 
				+import requests
			
 
				+
			
 
				+def list_files() -> list:
			
 
				+    file_list = []
			
 
				+    
			
 
				+    for root, dirs, files in os.walk("data"):
			
 
				+        for name in files:
			
 
				+            file_list.append({"file_name": name, "local_path": os.path.join(root,name)})
			
 
				+
			
 
				+    return file_list
			
 
				+
			
 
				+def get_filename_from_url(url: str) -> str:
			
 
				+    file_name=url.split('/')[-1]
			
 
				+    return file_name
			
 
				+
			
 
				+def get_random_images() -> list:
			
 
				+    # helper function uses loremflickr.com to get a random list of images 
			
 
				+    images = []
			
 
				+
			
 
				+    for i in range(10):
			
 
				+        resp = requests.get(url=f"https://loremflickr.com/json/320/240?random={i}")
			
 
				+        resp_json = resp.json()
			
 
				+        images.append(resp_json["file"])
			
 
				+
			
 
				+    return images
			
 
				+
			
 
				+def create_blob_from_url(storage_connection_string,container_name):
			
 
				+    try:
			
 
				+        # urls to fetch into blob storage
			
 
				+        url_list = get_random_images()
			
 
				+
			
 
				+        # Instantiate a new BlobServiceClient and a new ContainerClient
			
 
				+        blob_service_client = BlobServiceClient.from_connection_string(storage_connection_string)
			
 
				+        container_client = blob_service_client.get_container_client(container_name)
			
 
				+
			
 
				+        for u in url_list:
			
 
				+            # Download file from url then upload blob file
			
 
				+            r = requests.get(u, stream = True)
			
 
				+            if r.status_code == 200:
			
 
				+                r.raw.decode_content = True
			
 
				+                blob_client = container_client.get_blob_client(get_filename_from_url(u))
			
 
				+                blob_client.upload_blob(r.raw,overwrite=True)
			
 
				+        return True
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(e.message, e.args)
			
 
				+        return False 
			
 
				+
			
 
				+def create_blob_from_path(storage_connection_string,container_name):
			
 
				+    try:
			
 
				+        # Instantiate a new BlobServiceClient and a new ContainerClient
			
 
				+        blob_service_client = BlobServiceClient.from_connection_string(storage_connection_string)
			
 
				+        container_client = blob_service_client.get_container_client(container_name)
			
 
				+
			
 
				+        for f in list_files():
			
 
				+            with open(f["local_path"], "rb") as data:
			
 
				+                blob_client = container_client.get_blob_client(f["file_name"])
			
 
				+                blob_client.upload_blob(data,overwrite=True)
			
 
				+        return True
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(e.message, e.args)
			
 
				+        return False
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    # get storage account settings
			
 
				+    storage_connection_string = os.environ.get("STORAGE_CONNECTION_STRING")
			
 
				+    container_name = os.environ.get("STORAGE_CONTAINER")
			
 
				+
			
 
				+    # # if you want to copy from a public url
			
 
				+    result = create_blob_from_url(storage_connection_string,container_name)
			
 
				+    
			
 
				+    # OR if you want to upload form your local drive
			
 
				+    #create_blob_from_path(storage_connection_string,container_name)
			
 
				+
			
 
				+    if(result):
			
 
				+        print("Done!")
			
 
				+    else:
			
 
				+        print("An error occured!")
			
--- a/data/purposeCombined/Azure/AddUp/blob-upload-1.py
+++ b/data/purposeCombined/Azure/AddUp/blob-upload-1.py
@@ -0,0 +1,51 @@
 
				+import os
			
 
				+from flask import Flask, request, redirect, url_for
			
 
				+from werkzeug import secure_filename
			
 
				+from azure.storage.blob import BlockBlobService
			
 
				+import string, random, requests
			
 
				+
			
 
				+app = Flask(__name__, instance_relative_config=True)
			
 
				+
			
 
				+app.config.from_pyfile('config.py')
			
 
				+account = app.config['ACCOUNT']   # Azure account name
			
 
				+key = app.config['STORAGE_KEY']      # Azure Storage account access key  
			
 
				+container = app.config['CONTAINER'] # Container name
			
 
				+
			
 
				+blob_service = BlockBlobService(account_name=account, account_key=key)
			
 
				+
			
 
				+@app.route('/', methods=['GET', 'POST'])
			
 
				+def upload_file():
			
 
				+    if request.method == 'POST':
			
 
				+        file = request.files['file']
			
 
				+        filename = secure_filename(file.filename)
			
 
				+        fileextension = filename.rsplit('.',1)[1]
			
 
				+        Randomfilename = id_generator()
			
 
				+        filename = Randomfilename + '.' + fileextension
			
 
				+        try:
			
 
				+            blob_service.create_blob_from_stream(container, filename, file)
			
 
				+        except Exception:
			
 
				+            print ('Exception=' + Exception)
			
 
				+            pass
			
 
				+        ref =  'http://'+ account + '.blob.core.windows.net/' + container + '/' + filename
			
 
				+        return '''
			
 
				+	    <!doctype html>
			
 
				+	    <title>File Link</title>
			
 
				+	    <h1>Uploaded File Link</h1>
			
 
				+	    <p>''' + ref + '''</p>
			
 
				+	    <img src="'''+ ref +'''">
			
 
				+	    '''
			
 
				+    return '''
			
 
				+    <!doctype html>
			
 
				+    <title>Upload new File</title>
			
 
				+    <h1>Upload new File</h1>
			
 
				+    <form action="" method=post enctype=multipart/form-data>
			
 
				+      <p><input type=file name=file>
			
 
				+         <input type=submit value=Upload>
			
 
				+    </form>
			
 
				+    '''
			
 
				+
			
 
				+def id_generator(size=32, chars=string.ascii_uppercase + string.digits):
			
 
				+    return ''.join(random.choice(chars) for _ in range(size))
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    app.run(debug=True)
			
--- a/data/purposeCombined/Azure/AddUp/blob-upload-2.py
+++ b/data/purposeCombined/Azure/AddUp/blob-upload-2.py
@@ -0,0 +1,221 @@
 
				+import mimetypes
			
 
				+import datetime
			
 
				+
			
 
				+from azure.common import AzureMissingResourceHttpError
			
 
				+from azure.storage.blob import BlobService
			
 
				+
			
 
				+from django.core.files.storage import Storage
			
 
				+from django.conf import settings
			
 
				+
			
 
				+try:
			
 
				+    from django.utils.deconstruct import deconstructible
			
 
				+except ImportError:
			
 
				+    # Support for django 1.7 and below
			
 
				+    def deconstructible(func):
			
 
				+        return func
			
 
				+
			
 
				+
			
 
				+@deconstructible
			
 
				+class AzureStorage(Storage):
			
 
				+    """
			
 
				+    Custom file storage system for Azure
			
 
				+    """
			
 
				+
			
 
				+    container = settings.AZURE_STORAGE.get('CONTAINER')
			
 
				+    account_name = settings.AZURE_STORAGE.get('ACCOUNT_NAME')
			
 
				+    account_key = settings.AZURE_STORAGE.get('ACCOUNT_KEY')
			
 
				+    cdn_host = settings.AZURE_STORAGE.get('CDN_HOST')
			
 
				+    use_ssl = settings.AZURE_STORAGE.get('USE_SSL')
			
 
				+
			
 
				+    def __init__(self, account_name=None, account_key=None, container=None,
			
 
				+         use_ssl=None, cdn_host=None):
			
 
				+
			
 
				+        if account_name is not None:
			
 
				+            self.account_name = account_name
			
 
				+
			
 
				+        if account_key is not None:
			
 
				+            self.account_key = account_key
			
 
				+
			
 
				+        if container is not None:
			
 
				+            self.container = container
			
 
				+
			
 
				+        if use_ssl is not None:
			
 
				+            self.use_ssl = use_ssl
			
 
				+
			
 
				+        if cdn_host is not None:
			
 
				+            self.cdn_host = cdn_host
			
 
				+
			
 
				+    def __getstate__(self):
			
 
				+        return dict(
			
 
				+            account_name=self.account_name,
			
 
				+            account_key=self.account_key,
			
 
				+            container=self.container,
			
 
				+            cdn_host=self.cdn_host,
			
 
				+            use_ssl=self.use_ssl
			
 
				+        )
			
 
				+
			
 
				+    def _get_service(self):
			
 
				+        if not hasattr(self, '_blob_service'):
			
 
				+            self._blob_service = BlobService(
			
 
				+                account_name=self.account_name,
			
 
				+                account_key=self.account_key,
			
 
				+                protocol='https' if self.use_ssl else 'http'
			
 
				+            )
			
 
				+
			
 
				+        return self._blob_service
			
 
				+
			
 
				+    def _get_properties(self, name):
			
 
				+        return self._get_service().get_blob_properties(
			
 
				+            container_name=self.container,
			
 
				+            blob_name=name
			
 
				+        )
			
 
				+
			
 
				+    def _open(self, name, mode='rb'):
			
 
				+        """
			
 
				+        Return the AzureStorageFile.
			
 
				+        """
			
 
				+
			
 
				+        from django.core.files.base import ContentFile
			
 
				+
			
 
				+        contents = self._get_service().get_blob_to_bytes(
			
 
				+            container_name=self.container,
			
 
				+            blob_name=name
			
 
				+        )
			
 
				+
			
 
				+        return ContentFile(contents)
			
 
				+
			
 
				+    def _save(self, name, content):
			
 
				+        """
			
 
				+        Use the Azure Storage service to write ``content`` to a remote file
			
 
				+        (called ``name``).
			
 
				+        """
			
 
				+        
			
 
				+
			
 
				+        content.open()
			
 
				+
			
 
				+        content_type = None
			
 
				+
			
 
				+        if hasattr(content.file, 'content_type'):
			
 
				+            content_type = content.file.content_type
			
 
				+        else:
			
 
				+            content_type = mimetypes.guess_type(name)[0]
			
 
				+
			
 
				+        cache_control = self.get_cache_control(
			
 
				+            self.container,
			
 
				+            name,
			
 
				+            content_type
			
 
				+        )
			
 
				+
			
 
				+        self._get_service().put_block_blob_from_file(
			
 
				+            container_name=self.container,
			
 
				+            blob_name=name,
			
 
				+            stream=content,
			
 
				+            x_ms_blob_content_type=content_type,
			
 
				+            cache_control=cache_control,
			
 
				+            x_ms_blob_cache_control=cache_control
			
 
				+        )
			
 
				+
			
 
				+        content.close()
			
 
				+
			
 
				+        return name
			
 
				+
			
 
				+    def listdir(self, path):
			
 
				+        """
			
 
				+        Lists the contents of the specified path, returning a 2-tuple of lists;
			
 
				+        the first item being directories, the second item being files.
			
 
				+        """
			
 
				+
			
 
				+        files = []
			
 
				+
			
 
				+        if path and not path.endswith('/'):
			
 
				+            path = '%s/' % path
			
 
				+
			
 
				+        path_len = len(path)
			
 
				+
			
 
				+        if not path:
			
 
				+            path = None
			
 
				+
			
 
				+        blob_list = self._get_service().list_blobs(self.container, prefix=path)
			
 
				+
			
 
				+        for name in blob_list:
			
 
				+            files.append(name[path_len:])
			
 
				+
			
 
				+        return ([], files)
			
 
				+
			
 
				+    def exists(self, name):
			
 
				+        """
			
 
				+        Returns True if a file referenced by the given name already exists in
			
 
				+        the storage system, or False if the name is available for a new file.
			
 
				+        """
			
 
				+        try:
			
 
				+            self._get_properties(name)
			
 
				+
			
 
				+            return True
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            return False
			
 
				+
			
 
				+    def delete(self, name):
			
 
				+        """
			
 
				+        Deletes the file referenced by name.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            self._get_service().delete_blob(self.container, name)
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            pass
			
 
				+
			
 
				+    def get_cache_control(self, container, name, content_type):
			
 
				+        """
			
 
				+        Get the Cache-Control value for a blob, used when saving the blob on
			
 
				+        Azure.  Returns `None` by default to remain compatible with the
			
 
				+        default setting for the SDK.
			
 
				+        """
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				+    def size(self, name):
			
 
				+        """
			
 
				+        Returns the total size, in bytes, of the file referenced by name.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            properties = self._get_properties(name)
			
 
				+
			
 
				+            return int(properties['content-length'])
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            pass
			
 
				+
			
 
				+    def url(self, name):
			
 
				+        """
			
 
				+        Returns the URL where the contents of the file referenced by name can
			
 
				+        be accessed.
			
 
				+        """
			
 
				+
			
 
				+        blob_url_args = {
			
 
				+            'container_name': self.container,
			
 
				+            'blob_name': name,
			
 
				+        }
			
 
				+
			
 
				+        if self.cdn_host:
			
 
				+            # The account name should be built into the cdn hostname
			
 
				+            blob_url_args['account_name'] = ''
			
 
				+            blob_url_args['host_base'] = self.cdn_host
			
 
				+
			
 
				+        return self._get_service().make_blob_url(
			
 
				+            **blob_url_args
			
 
				+        )
			
 
				+
			
 
				+    def modified_time(self, name):
			
 
				+        """
			
 
				+        Returns a datetime object containing the last modified time.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            properties = self._get_properties(name)
			
 
				+
			
 
				+            return datetime.datetime.strptime(
			
 
				+                properties['last-modified'],
			
 
				+                '%a, %d %b %Y %H:%M:%S %Z'
			
 
				+            )
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            pass
			
--- a/data/purposeCombined/Azure/AddUp/blob-upload.py
+++ b/data/purposeCombined/Azure/AddUp/blob-upload.py
@@ -0,0 +1,107 @@
 
				+# ----------------------------------------------------------------------------------
			
 
				+# MIT License
			
 
				+#
			
 
				+# Copyright(c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+# of this software and associated documentation files (the "Software"), to deal
			
 
				+# in the Software without restriction, including without limitation the rights
			
 
				+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+# copies of the Software, and to permit persons to whom the Software is
			
 
				+# furnished to do so, subject to the following conditions:
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+# The above copyright notice and this permission notice shall be included in all
			
 
				+# copies or substantial portions of the Software.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+# SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+import os
			
 
				+import uuid
			
 
				+import sys
			
 
				+from azure.storage.blob import BlockBlobService, PublicAccess
			
 
				+
			
 
				+# ---------------------------------------------------------------------------------------------------------
			
 
				+# Method that creates a test file in the 'Sample' folder.
			
 
				+# This sample application creates a test file, uploads the test file to the Blob storage,
			
 
				+# lists the blobs in the container, and downloads the file with a new name.
			
 
				+# ---------------------------------------------------------------------------------------------------------
			
 
				+# Documentation References:
			
 
				+# Associated Article - https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python
			
 
				+# What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/
			
 
				+# Getting Started with Blobs-https://docs.microsoft.com/en-us/azure/storage/blobs/storage-python-how-to-use-blob-storage
			
 
				+# Blob Service Concepts - http://msdn.microsoft.com/en-us/library/dd179376.aspx
			
 
				+# Blob Service REST API - http://msdn.microsoft.com/en-us/library/dd135733.aspx
			
 
				+# ----------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+def run_sample():
			
 
				+    try:
			
 
				+        # Create the BlockBlobService that is used to call the Blob service for the storage account
			
 
				+        blob_service_client = BlockBlobService(
			
 
				+            account_name='accountname', account_key='accountkey')
			
 
				+
			
 
				+        # Create a container called 'quickstartblobs'.
			
 
				+        container_name = 'quickstartblobs'
			
 
				+        blob_service_client.create_container(container_name)
			
 
				+
			
 
				+        # Set the permission so the blobs are public.
			
 
				+        blob_service_client.set_container_acl(
			
 
				+            container_name, public_access=PublicAccess.Container)
			
 
				+
			
 
				+        # Create Sample folder if it not exists, and create a file in folder Sample to test the upload and download.
			
 
				+        local_path = os.path.expanduser("~/Sample")
			
 
				+        if not os.path.exists(local_path):
			
 
				+            os.makedirs(os.path.expanduser("~/Sample"))
			
 
				+        local_file_name = "QuickStart_" + str(uuid.uuid4()) + ".txt"
			
 
				+        full_path_to_file = os.path.join(local_path, local_file_name)
			
 
				+
			
 
				+        # Write text to the file.
			
 
				+        file = open(full_path_to_file,  'w')
			
 
				+        file.write("Hello, World!")
			
 
				+        file.close()
			
 
				+
			
 
				+        print("Temp file = " + full_path_to_file)
			
 
				+        print("\nUploading to Blob storage as blob" + local_file_name)
			
 
				+
			
 
				+        # Upload the created file, use local_file_name for the blob name
			
 
				+        blob_service_client.create_blob_from_path(
			
 
				+            container_name, local_file_name, full_path_to_file)
			
 
				+
			
 
				+        # List the blobs in the container
			
 
				+        print("\nList blobs in the container")
			
 
				+        generator = blob_service_client.list_blobs(container_name)
			
 
				+        for blob in generator:
			
 
				+            print("\t Blob name: " + blob.name)
			
 
				+
			
 
				+        # Download the blob(s).
			
 
				+        # Add '_DOWNLOADED' as prefix to '.txt' so you can see both files in Documents.
			
 
				+        full_path_to_file2 = os.path.join(local_path, str.replace(
			
 
				+            local_file_name ,'.txt', '_DOWNLOADED.txt'))
			
 
				+        print("\nDownloading blob to " + full_path_to_file2)
			
 
				+        blob_service_client.get_blob_to_path(
			
 
				+            container_name, local_file_name, full_path_to_file2)
			
 
				+
			
 
				+        sys.stdout.write("Sample finished running. When you hit <any key>, the sample will be deleted and the sample "
			
 
				+                         "application will exit.")
			
 
				+        sys.stdout.flush()
			
 
				+        input()
			
 
				+
			
 
				+        # Clean up resources. This includes the container and the temp files
			
 
				+        blob_service_client.delete_container(container_name)
			
 
				+        os.remove(full_path_to_file)
			
 
				+        os.remove(full_path_to_file2)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+
			
 
				+
			
 
				+# Main method.
			
 
				+if __name__ == '__main__':
			
 
				+    run_sample()
			
--- a/data/purposeCombined/Azure/AddUp/circuitbreaker.py
+++ b/data/purposeCombined/Azure/AddUp/circuitbreaker.py
@@ -0,0 +1,231 @@
 
				+# ----------------------------------------------------------------------------------
			
 
				+# MIT License
			
 
				+#
			
 
				+# Copyright(c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+# of this software and associated documentation files (the "Software"), to deal
			
 
				+# in the Software without restriction, including without limitation the rights
			
 
				+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+# copies of the Software, and to permit persons to whom the Software is
			
 
				+# furnished to do so, subject to the following conditions:
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+# The above copyright notice and this permission notice shall be included in all
			
 
				+# copies or substantial portions of the Software.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+# SOFTWARE.
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+
			
 
				+import os
			
 
				+import uuid
			
 
				+import time
			
 
				+import sys
			
 
				+from azure.storage.blob import BlockBlobService
			
 
				+from azure.storage.common.models import LocationMode
			
 
				+from azure.storage.common.retry import LinearRetry
			
 
				+
			
 
				+
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+# Azure Storage Circuit Breaker Demo
			
 
				+# INSTRUCTIONS
			
 
				+# Please see the README.md file for an overview explaining this application and how to run it.
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+# Documentation References:
			
 
				+# Associated Article - https://docs.microsoft.com/en-us/azure/storage/blobs/storage-create-geo-redundant-storage-python
			
 
				+# Designing HA Apps with RA-GRS storage -https://docs.microsoft.com/azure/storage/storage-designing-ha-apps-with-ra-grs/
			
 
				+# Getting Started with Blobs-https://docs.microsoft.com/en-us/azure/storage/blobs/storage-python-how-to-use-blob-storage
			
 
				+# Azure Storage Replication - https://docs.microsoft.com/azure/storage/storage-redundancy
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+
			
 
				+account_name = os.environ.get('accountname')
			
 
				+account_key = os.environ.get('accountkey')
			
 
				+
			
 
				+# Track how many times retry events occur.
			
 
				+retry_count = 0  # Number of retries that have occurred
			
 
				+retry_threshold = 5  # Threshold number of retries before switching to secondary
			
 
				+secondary_read_count = 0  # Number of reads from secondary that have occurred
			
 
				+secondary_threshold = 20  # Threshold number of reads from secondary before switching back to primary
			
 
				+
			
 
				+# This is the CloudBlobClient object used to access the blob service
			
 
				+blob_client = None
			
 
				+
			
 
				+# This is the container used to store and access the blob to be used for testing
			
 
				+container_name = None
			
 
				+
			
 
				+'''
			
 
				+Main method. Sets up the objects needed, the performs a loop to perform blob
			
 
				+ operation repeatedly, responding to the Retry and Response Received events.
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+def run_circuit_breaker():
			
 
				+    # Name of image to use for testing.
			
 
				+    image_to_upload = "HelloWorld.png"
			
 
				+
			
 
				+    global blob_client
			
 
				+    global container_name
			
 
				+    try:
			
 
				+
			
 
				+        # Create a reference to the blob client and container using the storage account name and key
			
 
				+        blob_client = BlockBlobService(account_name, account_key)
			
 
				+
			
 
				+        # Make the container unique by using a UUID in the name.
			
 
				+        container_name = "democontainer" + str(uuid.uuid4())
			
 
				+        blob_client.create_container(container_name)
			
 
				+
			
 
				+    except Exception as ex:
			
 
				+        print("Please make sure you have put the correct storage account name and key.")
			
 
				+        print(ex)
			
 
				+
			
 
				+    # Define a reference to the actual blob and upload the block_blob to the newly created container
			
 
				+    full_path_to_file = os.path.join(os.path.dirname(__file__), image_to_upload)
			
 
				+    blob_client.create_blob_from_path(container_name, image_to_upload, full_path_to_file)
			
 
				+
			
 
				+    # Set the location mode to secondary, so you can check just the secondary data center.
			
 
				+    blob_client.location_mode = LocationMode.SECONDARY
			
 
				+    blob_client.retry = LinearRetry(backoff=0).retry
			
 
				+
			
 
				+    # Before proceeding, wait until the blob has been replicated to the secondary data center.
			
 
				+    # Loop and check for the presence of the blob once in a second until it hits 60 seconds
			
 
				+    # or until it finds it
			
 
				+    counter = 0
			
 
				+    while counter < 60:
			
 
				+        counter += 1
			
 
				+        sys.stdout.write("\nAttempt {0} to see if the blob has replicated to the secondary storage yet.".format(counter))
			
 
				+        sys.stdout.flush()
			
 
				+        if blob_client.exists(container_name, image_to_upload):
			
 
				+            break
			
 
				+
			
 
				+        # Wait a second, then loop around and try again
			
 
				+        # When it's finished replicating to the secondary, continue.
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+    # Set the starting LocationMode to Primary, then Secondary.
			
 
				+    # Here we use the linear retry by default, but allow it to retry to secondary if
			
 
				+    # the initial request to primary fails.
			
 
				+    # Note that the default is Primary. You must have RA-GRS enabled to use this
			
 
				+    blob_client.location_mode = LocationMode.PRIMARY
			
 
				+    blob_client.retry = LinearRetry(max_attempts=retry_threshold, backoff=1).retry
			
 
				+
			
 
				+    ''' 
			
 
				+        ************INSTRUCTIONS**************k
			
 
				+        To perform the test, first replace the 'accountname' and 'accountkey' with your storage account name and key.
			
 
				+        Every time it calls get_blob_to_path it will hit the response_callback function.
			
 
				+
			
 
				+        Next, run this app. While this loop is running, pause the program by pressing any key, and
			
 
				+        put the intercept code in Fiddler (that will intercept and return a 503).
			
 
				+
			
 
				+        For instructions on modifying Fiddler, look at the Fiddler_script.text file in this project.
			
 
				+        There are also full instructions in the ReadMe_Instructions.txt file included in this project.
			
 
				+
			
 
				+        After adding the custom script to Fiddler, calls to primary storage will fail with a retryable
			
 
				+        error which will trigger the Retrying event (above).
			
 
				+        Then it will switch over and read the secondary. It will do that 20 times, then try to
			
 
				+        switch back to the primary.
			
 
				+        After seeing that happen, pause this again and remove the intercepting Fiddler code
			
 
				+        Then you'll see it return to the primary and finish.
			
 
				+        '''
			
 
				+
			
 
				+    print("\n\nThe application will pause at 200 unit interval")
			
 
				+
			
 
				+    for i in range(0, 1000):
			
 
				+        if blob_client.location_mode == LocationMode.SECONDARY:
			
 
				+            sys.stdout.write("S{0} ".format(str(i)))
			
 
				+        else:
			
 
				+            sys.stdout.write("P{0} ".format(str(i)))
			
 
				+        sys.stdout.flush()
			
 
				+
			
 
				+        try:
			
 
				+
			
 
				+            # These function is called immediately after retry evaluation is performed.
			
 
				+            # It is used to trigger the change from primary to secondary and back
			
 
				+            blob_client.retry_callback = retry_callback
			
 
				+
			
 
				+            # Download the file
			
 
				+            blob_client.get_blob_to_path(container_name, image_to_upload,
			
 
				+                                                str.replace(full_path_to_file, ".png", "Copy.png"))
			
 
				+
			
 
				+            # Set the application to pause at 200 unit intervals to implement simulated failures
			
 
				+            if i == 200 or i == 400 or i == 600 or i == 800:
			
 
				+                sys.stdout.write("\nPress the Enter key to resume")
			
 
				+                sys.stdout.flush()
			
 
				+                if sys.version_info[0] < 3:
			
 
				+                    raw_input()
			
 
				+                else:
			
 
				+                    input()
			
 
				+        except Exception as ex:
			
 
				+            print(ex)
			
 
				+        finally:
			
 
				+            # Force an exists call to succeed by resetting the status
			
 
				+            blob_client.response_callback = response_callback
			
 
				+
			
 
				+    # Clean up resources
			
 
				+    blob_client.delete_container(container_name)
			
 
				+
			
 
				+
			
 
				+'''
			
 
				+RequestCompleted Event handler
			
 
				+If it's not pointing at the secondary, let it go through. It was either successful,
			
 
				+or it failed with a non-retryable event.
			
 
				+If it's pointing at the secondary, increment the read count.
			
 
				+If the number of reads has hit the threshold of how many reads you want to do against the secondary,
			
 
				+before you switch back to primary, switch back and reset the secondary_read_count.
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+def response_callback(response):
			
 
				+    global secondary_read_count
			
 
				+    if blob_client.location_mode == LocationMode.SECONDARY:
			
 
				+
			
 
				+        # You're reading the secondary. Let it read the secondary [secondaryThreshold] times,
			
 
				+        # then switch back to the primary and see if it is available now.
			
 
				+        secondary_read_count += 1
			
 
				+        if secondary_read_count >= secondary_threshold:
			
 
				+            blob_client.location_mode = LocationMode.PRIMARY
			
 
				+            secondary_read_count = 0
			
 
				+
			
 
				+
			
 
				+'''
			
 
				+Retry Event handler
			
 
				+If it has retried more times than allowed, and it's not already pointed to the secondary,
			
 
				+flip it to the secondary and reset the retry count.
			
 
				+If it has retried more times than allowed, and it's already pointed to the secondary throw an exception.
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+def retry_callback(retry_context):
			
 
				+    global retry_count
			
 
				+    retry_count = retry_context.count
			
 
				+    sys.stdout.write("\nRetrying event because of failure reading the primary. RetryCount= {0}".format(retry_count))
			
 
				+    sys.stdout.flush()
			
 
				+
			
 
				+    # Check if we have more than n-retries in which case switch to secondary
			
 
				+    if retry_count >= retry_threshold:
			
 
				+
			
 
				+        # Check to see if we can fail over to secondary.
			
 
				+        if blob_client.location_mode != LocationMode.SECONDARY:
			
 
				+            blob_client.location_mode = LocationMode.SECONDARY
			
 
				+            retry_count = 0
			
 
				+        else:
			
 
				+            raise Exception("Both primary and secondary are unreachable. "
			
 
				+                            "Check your application's network connection.")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print("Azure storage Circuit Breaker Sample \n")
			
 
				+    try:
			
 
				+        run_circuit_breaker()
			
 
				+    except Exception as e:
			
 
				+        print("Error thrown = {0}".format(e))
			
 
				+    sys.stdout.write("\nPress any key to exit.")
			
 
				+    sys.stdout.flush()
			
 
				+    if sys.version_info[0]<3:
			
 
				+        raw_input()
			
 
				+    else:
			
 
				+        input()
			
--- a/data/purposeCombined/Azure/AddUp/datafactory.py
+++ b/data/purposeCombined/Azure/AddUp/datafactory.py
@@ -0,0 +1,138 @@
 
				+from azure.common.credentials import ServicePrincipalCredentials
			
 
				+from azure.mgmt.resource import ResourceManagementClient
			
 
				+from azure.mgmt.datafactory import DataFactoryManagementClient
			
 
				+from azure.mgmt.datafactory.models import *
			
 
				+from datetime import datetime, timedelta
			
 
				+import time
			
 
				+
			
 
				+def print_item(group):
			
 
				+    """Print an Azure object instance."""
			
 
				+    print("\tName: {}".format(group.name))
			
 
				+    print("\tId: {}".format(group.id))
			
 
				+    if hasattr(group, 'location'):
			
 
				+        print("\tLocation: {}".format(group.location))
			
 
				+    if hasattr(group, 'tags'):
			
 
				+        print("\tTags: {}".format(group.tags))
			
 
				+    if hasattr(group, 'properties'):
			
 
				+        print_properties(group.properties)
			
 
				+    print("\n")        
			
 
				+
			
 
				+def print_properties(props):
			
 
				+    """Print a ResourceGroup properties instance."""
			
 
				+    if props and hasattr(props, 'provisioning_state') and props.provisioning_state:
			
 
				+        print("\tProperties:")
			
 
				+        print("\t\tProvisioning State: {}".format(props.provisioning_state))
			
 
				+    print("\n")
			
 
				+
			
 
				+def print_activity_run_details(activity_run):
			
 
				+    """Print activity run details."""
			
 
				+    print("\n\tActivity run details\n")
			
 
				+    print("\tActivity run status: {}".format(activity_run.status))    
			
 
				+    if activity_run.status == 'Succeeded':
			
 
				+        print("\tNumber of bytes read: {}".format(activity_run.output['dataRead']))       
			
 
				+        print("\tNumber of bytes written: {}".format(activity_run.output['dataWritten']))           
			
 
				+        print("\tCopy duration: {}".format(activity_run.output['copyDuration']))           
			
 
				+    else:
			
 
				+        print("\tErrors: {}".format(activity_run.error['message']))
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    # Azure subscription ID
			
 
				+    subscription_id = '<Azure subscription ID>'
			
 
				+
			
 
				+    # This program creates this resource group. If it's an existing resource group, comment out the code that creates the resource group
			
 
				+    rg_name = '<Azure resource group name>'
			
 
				+
			
 
				+    # The data factory name. It must be globally unique.
			
 
				+    df_name = '<Data factory name>'        
			
 
				+
			
 
				+    # Specify your Active Directory client ID, client secret, and tenant ID
			
 
				+    credentials = ServicePrincipalCredentials(client_id='<AAD application ID>', secret='<AAD app authentication key>', tenant='<AAD tenant ID>')
			
 
				+    resource_client = ResourceManagementClient(credentials, subscription_id)
			
 
				+    adf_client = DataFactoryManagementClient(credentials, subscription_id)
			
 
				+
			
 
				+    rg_params = {'location':'eastus'}
			
 
				+    df_params = {'location':'eastus'}
			
 
				+
			
 
				+    # create the resource group
			
 
				+    # comment out if the resource group already exits
			
 
				+    resource_client.resource_groups.create_or_update(rg_name, rg_params)
			
 
				+
			
 
				+    # Create a data factory
			
 
				+    df_resource = Factory(location='eastus')
			
 
				+    df = adf_client.factories.create_or_update(rg_name, df_name, df_resource)
			
 
				+    print_item(df)
			
 
				+    while df.provisioning_state != 'Succeeded':
			
 
				+        df = adf_client.factories.get(rg_name, df_name)
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+    # Create an Azure Storage linked service
			
 
				+    ls_name = 'storageLinkedService'
			
 
				+
			
 
				+    # Specify the name and key of your Azure Storage account
			
 
				+    storage_string = SecureString('DefaultEndpointsProtocol=https;AccountName=<Azure storage account>;AccountKey=<Azure storage authentication key>')
			
 
				+
			
 
				+    ls_azure_storage = AzureStorageLinkedService(connection_string=storage_string)
			
 
				+    ls = adf_client.linked_services.create_or_update(rg_name, df_name, ls_name, ls_azure_storage)
			
 
				+    print_item(ls)
			
 
				+
			
 
				+    # Create an Azure blob dataset (input)
			
 
				+    ds_name = 'ds_in'
			
 
				+    ds_ls = LinkedServiceReference(ls_name)
			
 
				+    blob_path= 'adftutorial/inputpy'
			
 
				+    blob_filename = 'input.txt'
			
 
				+    ds_azure_blob= AzureBlobDataset(ds_ls, folder_path=blob_path, file_name = blob_filename)
			
 
				+    ds = adf_client.datasets.create_or_update(rg_name, df_name, ds_name, ds_azure_blob)
			
 
				+    print_item(ds)
			
 
				+
			
 
				+    # Create an Azure blob dataset (output)
			
 
				+    dsOut_name = 'ds_out'
			
 
				+    output_blobpath = 'adftutorial/outputpy'
			
 
				+    dsOut_azure_blob = AzureBlobDataset(ds_ls, folder_path=output_blobpath)
			
 
				+    dsOut = adf_client.datasets.create_or_update(rg_name, df_name, dsOut_name, dsOut_azure_blob)
			
 
				+    print_item(dsOut)
			
 
				+
			
 
				+    # Create a copy activity
			
 
				+    act_name =  'copyBlobtoBlob'
			
 
				+    blob_source = BlobSource()
			
 
				+    blob_sink = BlobSink()
			
 
				+    dsin_ref = DatasetReference(ds_name)
			
 
				+    dsOut_ref = DatasetReference(dsOut_name)
			
 
				+    copy_activity = CopyActivity(act_name,inputs=[dsin_ref], outputs=[dsOut_ref], source=blob_source, sink=blob_sink)
			
 
				+
			
 
				+    # Create a pipeline with the copy activity
			
 
				+    p_name =  'copyPipeline'
			
 
				+    params_for_pipeline = {}
			
 
				+    p_obj = PipelineResource(activities=[copy_activity], parameters=params_for_pipeline)
			
 
				+    p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
			
 
				+    print_item(p)
			
 
				+
			
 
				+    # Create a pipeline run
			
 
				+    run_response = adf_client.pipelines.create_run(rg_name, df_name, p_name,
			
 
				+        {
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    # Monitor the pipeilne run
			
 
				+    time.sleep(30)
			
 
				+    pipeline_run = adf_client.pipeline_runs.get(rg_name, df_name, run_response.run_id)
			
 
				+    print("\n\tPipeline run status: {}".format(pipeline_run.status))
			
 
				+    activity_runs_paged = list(adf_client.activity_runs.list_by_pipeline_run(rg_name, df_name, pipeline_run.run_id, datetime.now() - timedelta(1),  datetime.now() + timedelta(1)))
			
 
				+    print_activity_run_details(activity_runs_paged[0])
			
 
				+
			
 
				+    # Create a trigger
			
 
				+    tr_name = 'mytrigger'
			
 
				+    scheduler_recurrence = ScheduleTriggerRecurrence(frequency='Minute', interval='15',start_time=datetime.now(), end_time=datetime.now() + timedelta(1), time_zone='UTC') 
			
 
				+    pipeline_parameters = {'inputPath':'adftutorial/inputpy', 'outputPath':'adftutorial/outputpy'}
			
 
				+    pipelines_to_run = []
			
 
				+    pipeline_reference = PipelineReference('copyPipeline')
			
 
				+    pipelines_to_run.append(TriggerPipelineReference(pipeline_reference, pipeline_parameters))
			
 
				+    tr_properties = ScheduleTrigger(description='My scheduler trigger', pipelines = pipelines_to_run, recurrence=scheduler_recurrence)    
			
 
				+    adf_client.triggers.create_or_update(rg_name, df_name, tr_name, tr_properties)
			
 
				+
			
 
				+    # start the trigger
			
 
				+    adf_client.triggers.start(rg_name, df_name, tr_name)
			
 
				+    
			
 
				+
			
 
				+# Start the main method
			
 
				+main()
			
--- a/data/purposeCombined/Azure/AddUp/file_advanced_samples.py
+++ b/data/purposeCombined/Azure/AddUp/file_advanced_samples.py
@@ -0,0 +1,202 @@
 
				+#-------------------------------------------------------------------------
			
 
				+# Microsoft Developer & Platform Evangelism
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, 
			
 
				+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
			
 
				+# OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+# The example companies, organizations, products, domain names,
			
 
				+# e-mail addresses, logos, people, places, and events depicted
			
 
				+# herein are fictitious. No association with any real company,
			
 
				+# organization, product, domain name, email address, logo, person,
			
 
				+# places, or events is intended or should be inferred.
			
 
				+#--------------------------------------------------------------------------
			
 
				+
			
 
				+import os
			
 
				+from random_data import RandomData
			
 
				+
			
 
				+from azure.storage.fileshare import ShareServiceClient
			
 
				+from azure.storage.fileshare import CorsRule, RetentionPolicy, Metrics
			
 
				+
			
 
				+#
			
 
				+# Azure File Service Sample - Demonstrate how to perform common tasks using the Microsoft Azure File Service.  
			
 
				+#  
			
 
				+# Documentation References:  
			
 
				+#  - What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/  
			
 
				+#  - Getting Started with Files - https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-file-storage/  
			
 
				+#  - File Service Concepts - http://msdn.microsoft.com/en-us/library/dn166972.aspx  
			
 
				+#  - File Service REST API - http://msdn.microsoft.com/en-us/library/dn167006.aspx  
			
 
				+#  - Storage Emulator - http://azure.microsoft.com/en-us/documentation/articles/storage-use-emulator/
			
 
				+#  
			
 
				+class FileAdvancedSamples():
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.random_data = RandomData()
			
 
				+    
			
 
				+    # Runs all samples for Azure Storage File service.
			
 
				+    def run_all_samples(self, connection_string):
			
 
				+        print('Azure Storage File Advanced samples - Starting.')
			
 
				+        
			
 
				+        try:
			
 
				+            # Create an instance of ShareServiceClient
			
 
				+            service = ShareServiceClient.from_connection_string(conn_str=connection_string)
			
 
				+
			
 
				+            # List shares
			
 
				+            print('\n\n* List shares *\n')
			
 
				+            self.list_shares(service)
			
 
				+
			
 
				+            # Set Cors
			
 
				+            print('\n\n* Set cors rules *\n')
			
 
				+            self.set_cors_rules(service)
			
 
				+
			
 
				+            # Set Service Properties
			
 
				+            print('\n\n* Set service properties *\n')
			
 
				+            self.set_service_properties(service)
			
 
				+
			
 
				+            # Share, directory and file properties and metadata
			
 
				+            print('\n\n* Metadata and properties *\n')
			
 
				+            self.metadata_and_properties(service)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print('Error occurred in the sample.', e) 
			
 
				+
			
 
				+        finally:
			
 
				+            print('\nAzure Storage File Advanced samples - Completed.\n')
			
 
				+    
			
 
				+    # List file shares
			
 
				+    def list_shares(self, service):
			
 
				+        share_prefix = 'sharesample' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:        
			
 
				+            print('1. Create multiple shares with prefix: ', share_prefix)
			
 
				+            for i in range(5):
			
 
				+                service.create_share(share_name=share_prefix + str(i))
			
 
				+            
			
 
				+            print('2. List shares')
			
 
				+            shares = service.list_shares()
			
 
				+            for share in shares:
			
 
				+                print('  Share name:' + share.name)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(e) 
			
 
				+
			
 
				+        finally:
			
 
				+            print('3. Delete shares with prefix:' + share_prefix) 
			
 
				+            for i in range(5):
			
 
				+                service.delete_share(share_prefix + str(i))
			
 
				+    
			
 
				+
			
 
				+    # Set CORS
			
 
				+    def set_cors_rules(self, service):
			
 
				+        print('1. Get Cors Rules')
			
 
				+        original_cors_rules = service.get_service_properties()['cors']
			
 
				+
			
 
				+        print('2. Overwrite Cors Rules')
			
 
				+        cors_rule = CorsRule(
			
 
				+            allowed_origins=['*'], 
			
 
				+            allowed_methods=['POST', 'GET'],
			
 
				+            allowed_headers=['*'],
			
 
				+            exposed_headers=['*'],
			
 
				+            max_age_in_seconds=3600)
			
 
				+
			
 
				+        try:
			
 
				+            service.set_service_properties(cors=[cors_rule])
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+        finally:
			
 
				+            #reverting cors rules back to the original ones
			
 
				+            print('3. Revert Cors Rules back the original ones')
			
 
				+            service.set_service_properties(cors=original_cors_rules)
			
 
				+        
			
 
				+        print("CORS sample completed")
			
 
				+    
			
 
				+
			
 
				+    # Manage properties of the File service, including logging and metrics settings, and the default service version.
			
 
				+    def set_service_properties(self, service):
			
 
				+
			
 
				+        print('1. Get File service properties')
			
 
				+        props = service.get_service_properties()
			
 
				+
			
 
				+        retention = RetentionPolicy(enabled=True, days=5)
			
 
				+        hour_metrics = Metrics(enabled=True, include_apis=True, retention_policy=retention)
			
 
				+        minute_metrics = Metrics(enabled=False)
			
 
				+
			
 
				+        try:
			
 
				+            print('2. Ovewrite File service properties')
			
 
				+            service.set_service_properties(hour_metrics=hour_metrics, minute_metrics=minute_metrics)
			
 
				+
			
 
				+        finally:
			
 
				+            print('3. Revert File service properties back to the original ones')
			
 
				+            service.set_service_properties(hour_metrics=props['hour_metrics'], minute_metrics=props['minute_metrics'])
			
 
				+
			
 
				+        print('4. Set File service properties completed')
			
 
				+    
			
 
				+
			
 
				+    # Manage metadata and properties of the share
			
 
				+    def metadata_and_properties(self, service):
			
 
				+        share_name = 'sharename' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:
			
 
				+            # All directories and share must be created in a parent share.
			
 
				+            # Max capacity: 5TB per share
			
 
				+            print('1. Create sample share with name ' + share_name)
			
 
				+            quota = 1 # in GB
			
 
				+            metadata = { "foo": "bar", "baz": "foo" }
			
 
				+            share_client = service.create_share(share_name=share_name)
			
 
				+            print('Sample share "'+ share_name +'" created.')
			
 
				+
			
 
				+            print('2. Get share properties.')
			
 
				+            properties = share_client.get_share_properties()
			
 
				+
			
 
				+            print('3. Get share metadata.')
			
 
				+            get_metadata = properties['metadata']
			
 
				+            for k, v in get_metadata.items():
			
 
				+                print("\t" + k + ": " + v)
			
 
				+
			
 
				+            dir_name = 'dirname' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+            print('4. Create sample directory with name ' + dir_name)
			
 
				+            metadata = { "abc": "def", "jkl": "mno" }
			
 
				+            directory_client = share_client.create_directory(dir_name, metadata=metadata)
			
 
				+            print('Sample directory "'+ dir_name +'" created.')
			
 
				+
			
 
				+            print('5. Get directory properties.')
			
 
				+            properties = directory_client.get_directory_properties()
			
 
				+            
			
 
				+            print('6. Get directory metadata.')
			
 
				+            get_metadata = properties['metadata']
			
 
				+            for k, v in get_metadata.items():
			
 
				+                print("\t" + k + ": " + v)
			
 
				+
			
 
				+            file_name = 'sample.txt'
			
 
				+            # Uploading text to share_name/dir_name/sample.txt in Azure Files account.
			
 
				+            # Max capacity: 1TB per file
			
 
				+            print('7. Upload sample file from text to directory.')
			
 
				+            metadata = { "prop1": "val1", "prop2": "val2" }
			
 
				+            file_client = directory_client.get_file_client(file_name)
			
 
				+            file_client.upload_file('Hello World! - from text sample', metadata=metadata)
			
 
				+            print('Sample file "' + file_name + '" created and uploaded to: ' + share_name + '/' + dir_name)        
			
 
				+
			
 
				+            print('8. Get file properties.')
			
 
				+            properties = file_client.get_file_properties()
			
 
				+
			
 
				+            print('9. Get file metadata.')
			
 
				+            get_metadata = properties['metadata']
			
 
				+            for k, v in get_metadata.items():
			
 
				+                print("\t" + k + ": " + v)
			
 
				+
			
 
				+            # This is for demo purposes, all files will be deleted when share is deleted
			
 
				+            print('10. Delete file.')
			
 
				+            file_client.delete_file()
			
 
				+
			
 
				+            # This is for demo purposes, all directories will be deleted when share is deleted
			
 
				+            print('11. Delete directory.')
			
 
				+            directory_client.delete_directory()
			
 
				+
			
 
				+        finally:
			
 
				+            print('12. Delete share.')
			
 
				+            share_client.delete_share(share_name)
			
 
				+
			
 
				+        print("Metadata and properties sample completed")
			
--- a/data/purposeCombined/Azure/AddUp/file_basic_samples.py
+++ b/data/purposeCombined/Azure/AddUp/file_basic_samples.py
@@ -0,0 +1,190 @@
 
				+#-------------------------------------------------------------------------
			
 
				+# Microsoft Developer & Platform Evangelism
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, 
			
 
				+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
			
 
				+# OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+# The example companies, organizations, products, domain names,
			
 
				+# e-mail addresses, logos, people, places, and events depicted
			
 
				+# herein are fictitious. No association with any real company,
			
 
				+# organization, product, domain name, email address, logo, person,
			
 
				+# places, or events is intended or should be inferred.
			
 
				+#--------------------------------------------------------------------------
			
 
				+
			
 
				+from random_data import RandomData
			
 
				+import tempfile
			
 
				+import os
			
 
				+
			
 
				+from azure.storage.fileshare import ShareServiceClient
			
 
				+
			
 
				+
			
 
				+class FileBasicSamples():
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.random_data = RandomData()
			
 
				+
			
 
				+    # Runs all samples for Azure Storage File service.
			
 
				+    def run_all_samples(self, connection_string):
			
 
				+        print('Azure Storage File Basis samples - Starting.')
			
 
				+        
			
 
				+        #declare variables
			
 
				+        filename = 'filesample' + self.random_data.get_random_name(6)
			
 
				+        sharename = 'sharesample' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        try:
			
 
				+            # Create an instance of ShareServiceClient
			
 
				+            service = ShareServiceClient.from_connection_string(conn_str=connection_string)
			
 
				+
			
 
				+            print('\n\n* Basic file operations *\n')
			
 
				+            self.basic_file_operations(sharename, filename, service)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print('error:' + e) 
			
 
				+
			
 
				+        finally:
			
 
				+            # Delete all Azure Files created in this sample
			
 
				+            self.file_delete_samples(sharename, filename, service)
			
 
				+
			
 
				+        print('\nAzure Storage File Basic samples - Completed.\n')
			
 
				+    
			
 
				+    def basic_file_operations(self, sharename, filename, service):
			
 
				+        # Creating an SMB file share in your Azure Files account.
			
 
				+        print('\nAttempting to create a sample file from text for upload demonstration.')   
			
 
				+        # All directories and share must be created in a parent share.
			
 
				+        # Max capacity: 5TB per share
			
 
				+
			
 
				+        print('Creating sample share.')
			
 
				+        share_client = service.create_share(share_name=sharename)
			
 
				+        print('Sample share "'+ sharename +'" created.')
			
 
				+
			
 
				+
			
 
				+        # Creating an optional file directory in your Azure Files account.
			
 
				+        print('Creating a sample directory.')    
			
 
				+        # Get the directory client
			
 
				+        directory_client = share_client.create_directory("mydirectory")
			
 
				+        print('Sample directory "mydirectory" created.')
			
 
				+
			
 
				+
			
 
				+        # Uploading text to sharename/mydirectory/my_text_file in Azure Files account.
			
 
				+        # Max capacity: 1TB per file
			
 
				+        print('Uploading a sample file from text.')   
			
 
				+        # create_file_client
			
 
				+        file_client = directory_client.get_file_client(filename)
			
 
				+        # Upload a file
			
 
				+        file_client.upload_file('Hello World! - from text sample')
			
 
				+        print('Sample file "' + filename + '" created and uploaded to: ' + sharename + '/mydirectory')
			
 
				+  
			
 
				+
			
 
				+        # Demonstrate how to copy a file
			
 
				+        print('\nCopying file ' + filename)
			
 
				+        # Create another file client which will copy the file from url
			
 
				+        destination_file_client = share_client.get_file_client('file1copy')
			
 
				+
			
 
				+        # Copy the sample source file from the url to the destination file
			
 
				+        copy_resp = destination_file_client.start_copy_from_url(source_url=file_client.url)
			
 
				+        if copy_resp['copy_status'] ==  'pending':
			
 
				+            # Demonstrate how to abort a copy operation (just for demo, probably will never get here)
			
 
				+            print('Abort copy operation')
			
 
				+            destination_file.abort_copy()
			
 
				+        else:
			
 
				+            print('Copy was a ' + copy_resp['copy_status'])
			
 
				+        
			
 
				+
			
 
				+        # Demonstrate how to create a share and upload a file from a local temporary file path
			
 
				+        print('\nAttempting to upload a sample file from path for upload demonstration.')  
			
 
				+        # Creating a temporary file to upload to Azure Files
			
 
				+        print('Creating a temporary file from text.') 
			
 
				+        with tempfile.NamedTemporaryFile(delete=False) as my_temp_file:
			
 
				+            my_temp_file.file.write(b"Hello world!")
			
 
				+        print('Sample temporary file created.') 
			
 
				+
			
 
				+        # Uploading my_temp_file to sharename folder in Azure Files
			
 
				+        # Max capacity: 1TB per file
			
 
				+        print('Uploading a sample file from local path.')
			
 
				+        # Create file_client
			
 
				+        file_client = share_client.get_file_client(filename)
			
 
				+
			
 
				+        # Upload a file
			
 
				+        with open(my_temp_file.name, "rb") as source_file:
			
 
				+            file_client.upload_file(source_file)
			
 
				+
			
 
				+        print('Sample file "' + filename + '" uploaded from path to share: ' + sharename)
			
 
				+
			
 
				+        # Close the temp file
			
 
				+        my_temp_file.close()
			
 
				+
			
 
				+        # Get the list of valid ranges and write to the specified range
			
 
				+        print('\nGet list of valid ranges of the file.') 
			
 
				+        file_ranges = file_client.get_ranges()
			
 
				+
			
 
				+        data = b'abcdefghijkl'
			
 
				+        print('Put a range of data to the file.')
			
 
				+        
			
 
				+        file_client.upload_range(data=data, offset=file_ranges[0]['start'], length=len(data))
			
 
				+
			
 
				+
			
 
				+        # Demonstrate how to download a file from Azure Files
			
 
				+        # The following example download the file that was previously uploaded to Azure Files
			
 
				+        print('\nAttempting to download a sample file from Azure files for demonstration.')
			
 
				+
			
 
				+        destination_file = tempfile.tempdir + '\mypathfile.txt'
			
 
				+
			
 
				+        with open(destination_file, "wb") as file_handle:
			
 
				+            data = file_client.download_file()
			
 
				+            data.readinto(file_handle)
			
 
				+
			
 
				+        print('Sample file downloaded to: ' + destination_file)
			
 
				+
			
 
				+
			
 
				+        # Demonstrate how to list files and directories contains under Azure File share
			
 
				+        print('\nAttempting to list files and directories directory under share "' + sharename + '":')
			
 
				+
			
 
				+        # Create a generator to list directories and files under share
			
 
				+        # This is not a recursive listing operation
			
 
				+        generator = share_client.list_directories_and_files()
			
 
				+
			
 
				+        # Prints the directories and files under the share
			
 
				+        for file_or_dir in generator:
			
 
				+            print(file_or_dir['name'])
			
 
				+        
			
 
				+        # remove temp file
			
 
				+        os.remove(my_temp_file.name)
			
 
				+
			
 
				+        print('Files and directories under share "' + sharename + '" listed.')
			
 
				+        print('\nCompleted successfully - Azure basic Files operations.')
			
 
				+
			
 
				+
			
 
				+    # Demonstrate how to delete azure files created for this demonstration
			
 
				+    # Warning: Deleting a share or directory will also delete all files and directories that are contained in it.
			
 
				+    def file_delete_samples(self, sharename, filename, service):
			
 
				+        print('\nDeleting all samples created for this demonstration.')
			
 
				+
			
 
				+        try:
			
 
				+            # Deleting file: 'sharename/mydirectory/filename'
			
 
				+            # This is for demo purposes only, it's unnecessary, as we're deleting the share later
			
 
				+            print('Deleting a sample file.')
			
 
				+
			
 
				+            share_client = service.get_share_client(sharename)
			
 
				+            directory_client = share_client.get_directory_client('mydirectory')
			
 
				+            
			
 
				+            directory_client.delete_file(file_name=filename)
			
 
				+            print('Sample file "' + filename + '" deleted from: ' + sharename + '/mydirectory' )
			
 
				+
			
 
				+            # Deleting directory: 'sharename/mydirectory'
			
 
				+            print('Deleting sample directory and all files and directories under it.')
			
 
				+            share_client.delete_directory('mydirectory')
			
 
				+            print('Sample directory "/mydirectory" deleted from: ' + sharename)
			
 
				+
			
 
				+            # Deleting share: 'sharename'
			
 
				+            print('Deleting sample share ' + sharename + ' and all files and directories under it.')
			
 
				+            share_client.delete_share(sharename)
			
 
				+            print('Sample share "' + sharename + '" deleted.')
			
 
				+
			
 
				+            print('\nCompleted successfully - Azure Files samples deleted.')
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print('********ErrorDelete***********')
			
 
				+            print(e)
			
--- a/data/purposeCombined/Azure/AddUp/python-quick-start.py
+++ b/data/purposeCombined/Azure/AddUp/python-quick-start.py
@@ -0,0 +1,415 @@
 
				+# python quickstart client Code Sample
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation
			
 
				+#
			
 
				+# All rights reserved.
			
 
				+#
			
 
				+# MIT License
			
 
				+#
			
 
				+# Permission is hereby granted, free of charge, to any person obtaining a
			
 
				+# copy of this software and associated documentation files (the "Software"),
			
 
				+# to deal in the Software without restriction, including without limitation
			
 
				+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				+# and/or sell copies of the Software, and to permit persons to whom the
			
 
				+# Software is furnished to do so, subject to the following conditions:
			
 
				+#
			
 
				+# The above copyright notice and this permission notice shall be included in
			
 
				+# all copies or substantial portions of the Software.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+# DEALINGS IN THE SOFTWARE.
			
 
				+
			
 
				+"""
			
 
				+Create a pool of nodes to output text files from azure blob storage.
			
 
				+"""
			
 
				+
			
 
				+import datetime
			
 
				+import io
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+
			
 
				+from azure.storage.blob import (
			
 
				+    BlobServiceClient,
			
 
				+    BlobSasPermissions,
			
 
				+    generate_blob_sas
			
 
				+)
			
 
				+from azure.batch import BatchServiceClient
			
 
				+from azure.batch.batch_auth import SharedKeyCredentials
			
 
				+import azure.batch.models as batchmodels
			
 
				+from azure.core.exceptions import ResourceExistsError
			
 
				+
			
 
				+import config
			
 
				+
			
 
				+DEFAULT_ENCODING = "utf-8"
			
 
				+
			
 
				+
			
 
				+# Update the Batch and Storage account credential strings in config.py with values
			
 
				+# unique to your accounts. These are used when constructing connection strings
			
 
				+# for the Batch and Storage client objects.
			
 
				+
			
 
				+def query_yes_no(question: str, default: str = "yes") -> str:
			
 
				+    """
			
 
				+    Prompts the user for yes/no input, displaying the specified question text.
			
 
				+
			
 
				+    :param str question: The text of the prompt for input.
			
 
				+    :param str default: The default if the user hits <ENTER>. Acceptable values
			
 
				+    are 'yes', 'no', and None.
			
 
				+    :return: 'yes' or 'no'
			
 
				+    """
			
 
				+    valid = {'y': 'yes', 'n': 'no'}
			
 
				+    if default is None:
			
 
				+        prompt = ' [y/n] '
			
 
				+    elif default == 'yes':
			
 
				+        prompt = ' [Y/n] '
			
 
				+    elif default == 'no':
			
 
				+        prompt = ' [y/N] '
			
 
				+    else:
			
 
				+        raise ValueError(f"Invalid default answer: '{default}'")
			
 
				+
			
 
				+    choice = default
			
 
				+
			
 
				+    while 1:
			
 
				+        user_input = input(question + prompt).lower()
			
 
				+        if not user_input:
			
 
				+            break
			
 
				+        try:
			
 
				+            choice = valid[user_input[0]]
			
 
				+            break
			
 
				+        except (KeyError, IndexError):
			
 
				+            print("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
			
 
				+
			
 
				+    return choice
			
 
				+
			
 
				+
			
 
				+def print_batch_exception(batch_exception: batchmodels.BatchErrorException):
			
 
				+    """
			
 
				+    Prints the contents of the specified Batch exception.
			
 
				+
			
 
				+    :param batch_exception:
			
 
				+    """
			
 
				+    print('-------------------------------------------')
			
 
				+    print('Exception encountered:')
			
 
				+    if batch_exception.error and \
			
 
				+            batch_exception.error.message and \
			
 
				+            batch_exception.error.message.value:
			
 
				+        print(batch_exception.error.message.value)
			
 
				+        if batch_exception.error.values:
			
 
				+            print()
			
 
				+            for mesg in batch_exception.error.values:
			
 
				+                print(f'{mesg.key}:\t{mesg.value}')
			
 
				+    print('-------------------------------------------')
			
 
				+
			
 
				+
			
 
				+def upload_file_to_container(blob_storage_service_client: BlobServiceClient,
			
 
				+                             container_name: str, file_path: str) -> batchmodels.ResourceFile:
			
 
				+    """
			
 
				+    Uploads a local file to an Azure Blob storage container.
			
 
				+
			
 
				+    :param blob_storage_service_client: A blob service client.
			
 
				+    :param str container_name: The name of the Azure Blob storage container.
			
 
				+    :param str file_path: The local path to the file.
			
 
				+    :return: A ResourceFile initialized with a SAS URL appropriate for Batch
			
 
				+    tasks.
			
 
				+    """
			
 
				+    blob_name = os.path.basename(file_path)
			
 
				+    blob_client = blob_storage_service_client.get_blob_client(container_name, blob_name)
			
 
				+
			
 
				+    print(f'Uploading file {file_path} to container [{container_name}]...')
			
 
				+
			
 
				+    with open(file_path, "rb") as data:
			
 
				+        blob_client.upload_blob(data, overwrite=True)
			
 
				+
			
 
				+    sas_token = generate_blob_sas(
			
 
				+        config.STORAGE_ACCOUNT_NAME,
			
 
				+        container_name,
			
 
				+        blob_name,
			
 
				+        account_key=config.STORAGE_ACCOUNT_KEY,
			
 
				+        permission=BlobSasPermissions(read=True),
			
 
				+        expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=2)
			
 
				+    )
			
 
				+
			
 
				+    sas_url = generate_sas_url(
			
 
				+        config.STORAGE_ACCOUNT_NAME,
			
 
				+        config.STORAGE_ACCOUNT_DOMAIN,
			
 
				+        container_name,
			
 
				+        blob_name,
			
 
				+        sas_token
			
 
				+    )
			
 
				+
			
 
				+    return batchmodels.ResourceFile(
			
 
				+        http_url=sas_url,
			
 
				+        file_path=blob_name
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def generate_sas_url(
			
 
				+    account_name: str,
			
 
				+    account_domain: str,
			
 
				+    container_name: str,
			
 
				+    blob_name: str,
			
 
				+    sas_token: str
			
 
				+) -> str:
			
 
				+    """
			
 
				+    Generates and returns a sas url for accessing blob storage
			
 
				+    """
			
 
				+    return f"https://{account_name}.{account_domain}/{container_name}/{blob_name}?{sas_token}"
			
 
				+
			
 
				+
			
 
				+def create_pool(batch_service_client: BatchServiceClient, pool_id: str):
			
 
				+    """
			
 
				+    Creates a pool of compute nodes with the specified OS settings.
			
 
				+
			
 
				+    :param batch_service_client: A Batch service client.
			
 
				+    :param str pool_id: An ID for the new pool.
			
 
				+    :param str publisher: Marketplace image publisher
			
 
				+    :param str offer: Marketplace image offer
			
 
				+    :param str sku: Marketplace image sku
			
 
				+    """
			
 
				+    print(f'Creating pool [{pool_id}]...')
			
 
				+
			
 
				+    # Create a new pool of Linux compute nodes using an Azure Virtual Machines
			
 
				+    # Marketplace image. For more information about creating pools of Linux
			
 
				+    # nodes, see:
			
 
				+    # https://azure.microsoft.com/documentation/articles/batch-linux-nodes/
			
 
				+    new_pool = batchmodels.PoolAddParameter(
			
 
				+        id=pool_id,
			
 
				+        virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
			
 
				+            image_reference=batchmodels.ImageReference(
			
 
				+                publisher="canonical",
			
 
				+                offer="0001-com-ubuntu-server-focal",
			
 
				+                sku="20_04-lts",
			
 
				+                version="latest"
			
 
				+            ),
			
 
				+            node_agent_sku_id="batch.node.ubuntu 20.04"),
			
 
				+        vm_size=config.POOL_VM_SIZE,
			
 
				+        target_dedicated_nodes=config.POOL_NODE_COUNT
			
 
				+    )
			
 
				+    batch_service_client.pool.add(new_pool)
			
 
				+
			
 
				+
			
 
				+def create_job(batch_service_client: BatchServiceClient, job_id: str, pool_id: str):
			
 
				+    """
			
 
				+    Creates a job with the specified ID, associated with the specified pool.
			
 
				+
			
 
				+    :param batch_service_client: A Batch service client.
			
 
				+    :param str job_id: The ID for the job.
			
 
				+    :param str pool_id: The ID for the pool.
			
 
				+    """
			
 
				+    print(f'Creating job [{job_id}]...')
			
 
				+
			
 
				+    job = batchmodels.JobAddParameter(
			
 
				+        id=job_id,
			
 
				+        pool_info=batchmodels.PoolInformation(pool_id=pool_id))
			
 
				+
			
 
				+    batch_service_client.job.add(job)
			
 
				+
			
 
				+
			
 
				+def add_tasks(batch_service_client: BatchServiceClient, job_id: str, resource_input_files: list):
			
 
				+    """
			
 
				+    Adds a task for each input file in the collection to the specified job.
			
 
				+
			
 
				+    :param batch_service_client: A Batch service client.
			
 
				+    :param str job_id: The ID of the job to which to add the tasks.
			
 
				+    :param list resource_input_files: A collection of input files. One task will be
			
 
				+     created for each input file.
			
 
				+    """
			
 
				+
			
 
				+    print(f'Adding {resource_input_files} tasks to job [{job_id}]...')
			
 
				+
			
 
				+    tasks = []
			
 
				+
			
 
				+    for idx, input_file in enumerate(resource_input_files):
			
 
				+
			
 
				+        command = f"/bin/bash -c \"cat {input_file.file_path}\""
			
 
				+        tasks.append(batchmodels.TaskAddParameter(
			
 
				+            id=f'Task{idx}',
			
 
				+            command_line=command,
			
 
				+            resource_files=[input_file]
			
 
				+        )
			
 
				+        )
			
 
				+
			
 
				+    batch_service_client.task.add_collection(job_id, tasks)
			
 
				+
			
 
				+
			
 
				+def wait_for_tasks_to_complete(batch_service_client: BatchServiceClient, job_id: str,
			
 
				+                               timeout: datetime.timedelta):
			
 
				+    """
			
 
				+    Returns when all tasks in the specified job reach the Completed state.
			
 
				+
			
 
				+    :param batch_service_client: A Batch service client.
			
 
				+    :param job_id: The id of the job whose tasks should be to monitored.
			
 
				+    :param timeout: The duration to wait for task completion. If all
			
 
				+    tasks in the specified job do not reach Completed state within this time
			
 
				+    period, an exception will be raised.
			
 
				+    """
			
 
				+    timeout_expiration = datetime.datetime.now() + timeout
			
 
				+
			
 
				+    print(f"Monitoring all tasks for 'Completed' state, timeout in {timeout}...", end='')
			
 
				+
			
 
				+    while datetime.datetime.now() < timeout_expiration:
			
 
				+        print('.', end='')
			
 
				+        sys.stdout.flush()
			
 
				+        tasks = batch_service_client.task.list(job_id)
			
 
				+
			
 
				+        incomplete_tasks = [task for task in tasks if
			
 
				+                            task.state != batchmodels.TaskState.completed]
			
 
				+        if not incomplete_tasks:
			
 
				+            print()
			
 
				+            return True
			
 
				+
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+    print()
			
 
				+    raise RuntimeError("ERROR: Tasks did not reach 'Completed' state within "
			
 
				+                       "timeout period of " + str(timeout))
			
 
				+
			
 
				+
			
 
				+def print_task_output(batch_service_client: BatchServiceClient, job_id: str,
			
 
				+                      text_encoding: str=None):
			
 
				+    """
			
 
				+    Prints the stdout.txt file for each task in the job.
			
 
				+
			
 
				+    :param batch_client: The batch client to use.
			
 
				+    :param str job_id: The id of the job with task output files to print.
			
 
				+    """
			
 
				+
			
 
				+    print('Printing task output...')
			
 
				+
			
 
				+    tasks = batch_service_client.task.list(job_id)
			
 
				+
			
 
				+    for task in tasks:
			
 
				+
			
 
				+        node_id = batch_service_client.task.get(
			
 
				+            job_id, task.id).node_info.node_id
			
 
				+        print(f"Task: {task.id}")
			
 
				+        print(f"Node: {node_id}")
			
 
				+
			
 
				+        stream = batch_service_client.file.get_from_task(
			
 
				+            job_id, task.id, config.STANDARD_OUT_FILE_NAME)
			
 
				+
			
 
				+        file_text = _read_stream_as_string(
			
 
				+            stream,
			
 
				+            text_encoding)
			
 
				+
			
 
				+        if text_encoding is None:
			
 
				+            text_encoding = DEFAULT_ENCODING
			
 
				+
			
 
				+        sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = text_encoding)
			
 
				+        sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = text_encoding)
			
 
				+
			
 
				+        print("Standard output:")
			
 
				+        print(file_text)
			
 
				+
			
 
				+
			
 
				+def _read_stream_as_string(stream, encoding) -> str:
			
 
				+    """
			
 
				+    Read stream as string
			
 
				+
			
 
				+    :param stream: input stream generator
			
 
				+    :param str encoding: The encoding of the file. The default is utf-8.
			
 
				+    :return: The file content.
			
 
				+    """
			
 
				+    output = io.BytesIO()
			
 
				+    try:
			
 
				+        for data in stream:
			
 
				+            output.write(data)
			
 
				+        if encoding is None:
			
 
				+            encoding = DEFAULT_ENCODING
			
 
				+        return output.getvalue().decode(encoding)
			
 
				+    finally:
			
 
				+        output.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    start_time = datetime.datetime.now().replace(microsecond=0)
			
 
				+    print(f'Sample start: {start_time}')
			
 
				+    print()
			
 
				+
			
 
				+    # Create the blob client, for use in obtaining references to
			
 
				+    # blob storage containers and uploading files to containers.
			
 
				+    blob_service_client = BlobServiceClient(
			
 
				+        account_url=f"https://{config.STORAGE_ACCOUNT_NAME}.{config.STORAGE_ACCOUNT_DOMAIN}/",
			
 
				+        credential=config.STORAGE_ACCOUNT_KEY
			
 
				+    )
			
 
				+
			
 
				+    # Use the blob client to create the containers in Azure Storage if they
			
 
				+    # don't yet exist.
			
 
				+    input_container_name = 'input'      # pylint: disable=invalid-name
			
 
				+    try:
			
 
				+        blob_service_client.create_container(input_container_name)
			
 
				+    except ResourceExistsError:
			
 
				+        pass
			
 
				+
			
 
				+    # The collection of data files that are to be processed by the tasks.
			
 
				+    input_file_paths = [os.path.join(sys.path[0], 'taskdata0.txt'),
			
 
				+                        os.path.join(sys.path[0], 'taskdata1.txt'),
			
 
				+                        os.path.join(sys.path[0], 'taskdata2.txt')]
			
 
				+
			
 
				+    # Upload the data files.
			
 
				+    input_files = [
			
 
				+        upload_file_to_container(blob_service_client, input_container_name, file_path)
			
 
				+        for file_path in input_file_paths]
			
 
				+
			
 
				+    # Create a Batch service client. We'll now be interacting with the Batch
			
 
				+    # service in addition to Storage
			
 
				+    credentials = SharedKeyCredentials(config.BATCH_ACCOUNT_NAME,
			
 
				+        config.BATCH_ACCOUNT_KEY)
			
 
				+
			
 
				+    batch_client = BatchServiceClient(
			
 
				+        credentials,
			
 
				+        batch_url=config.BATCH_ACCOUNT_URL)
			
 
				+
			
 
				+    try:
			
 
				+        # Create the pool that will contain the compute nodes that will execute the
			
 
				+        # tasks.
			
 
				+        create_pool(batch_client, config.POOL_ID)
			
 
				+
			
 
				+        # Create the job that will run the tasks.
			
 
				+        create_job(batch_client, config.JOB_ID, config.POOL_ID)
			
 
				+
			
 
				+        # Add the tasks to the job.
			
 
				+        add_tasks(batch_client, config.JOB_ID, input_files)
			
 
				+
			
 
				+        # Pause execution until tasks reach Completed state.
			
 
				+        wait_for_tasks_to_complete(batch_client,
			
 
				+                                   config.JOB_ID,
			
 
				+                                   datetime.timedelta(minutes=30))
			
 
				+
			
 
				+        print("  Success! All tasks reached the 'Completed' state within the "
			
 
				+              "specified timeout period.")
			
 
				+
			
 
				+        # Print the stdout.txt and stderr.txt files for each task to the console
			
 
				+        print_task_output(batch_client, config.JOB_ID)
			
 
				+
			
 
				+        # Print out some timing info
			
 
				+        end_time = datetime.datetime.now().replace(microsecond=0)
			
 
				+        print()
			
 
				+        print(f'Sample end: {end_time}')
			
 
				+        elapsed_time = end_time - start_time
			
 
				+        print(f'Elapsed time: {elapsed_time}')
			
 
				+        print()
			
 
				+        input('Press ENTER to exit...')
			
 
				+
			
 
				+    except batchmodels.BatchErrorException as err:
			
 
				+        print_batch_exception(err)
			
 
				+        raise
			
 
				+
			
 
				+    finally:
			
 
				+      # Clean up storage resources
			
 
				+        print(f'Deleting container [{input_container_name}]...')
			
 
				+        blob_service_client.delete_container(input_container_name)
			
 
				+
			
 
				+        # Clean up Batch resources (if the user so chooses).
			
 
				+        if query_yes_no('Delete job?') == 'yes':
			
 
				+            batch_client.job.delete(config.JOB_ID)
			
 
				+
			
 
				+        if query_yes_no('Delete pool?') == 'yes':
			
 
				+            batch_client.pool.delete(config.POOL_ID)
			
 
				+ 
			
--- a/data/purposeCombined/Azure/AddUp/table_advanced_samples.py
+++ b/data/purposeCombined/Azure/AddUp/table_advanced_samples.py
@@ -0,0 +1,218 @@
 
				+#-------------------------------------------------------------------------
			
 
				+# Microsoft Developer & Platform Evangelism
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, 
			
 
				+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
			
 
				+# OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+# The example companies, organizations, products, domain names,
			
 
				+# e-mail addresses, logos, people, places, and events depicted
			
 
				+# herein are fictitious. No association with any real company,
			
 
				+# organization, product, domain name, email address, logo, person,
			
 
				+# places, or events is intended or should be inferred.
			
 
				+#--------------------------------------------------------------------------
			
 
				+import config
			
 
				+import datetime
			
 
				+import time
			
 
				+from random_data import RandomData
			
 
				+from tablestorageaccount import TableStorageAccount
			
 
				+from azure.storage import CloudStorageAccount, AccessPolicy
			
 
				+from azure.storage.table import TableService, Entity, TablePermissions
			
 
				+from azure.storage.models import CorsRule, Logging, Metrics, RetentionPolicy, ResourceTypes, AccountPermissions
			
 
				+
			
 
				+#
			
 
				+# Azure Table Service Sample - Demonstrate how to perform common tasks using the Microsoft Azure Table Service
			
 
				+# including creating a table, CRUD operations and different querying techniques.
			
 
				+#
			
 
				+# Documentation References:
			
 
				+#  - What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/
			
 
				+#  - Getting Started with Tables - https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-table-storage/
			
 
				+#  - Table Service Concepts - http://msdn.microsoft.com/en-us/library/dd179463.aspx
			
 
				+#  - Table Service REST API - http://msdn.microsoft.com/en-us/library/dd179423.aspx
			
 
				+#  - Table Service Python API - http://azure.github.io/azure-storage-python/ref/azure.storage.table.html
			
 
				+#  - Storage Emulator - http://azure.microsoft.com/en-us/documentation/articles/storage-use-emulator/
			
 
				+#
			
 
				+class TableAdvancedSamples():
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.random_data = RandomData()
			
 
				+
			
 
				+    # Runs all samples for Azure Storage Table service.
			
 
				+    def run_all_samples(self, account):
			
 
				+        table_service = account.create_table_service()
			
 
				+        print('Azure Storage Advanced Table samples - Starting.')
			
 
				+        
			
 
				+        print('\n\n* List tables *\n')
			
 
				+        self.list_tables(table_service)
			
 
				+        
			
 
				+        if not account.is_azure_cosmosdb_table():
			
 
				+           print('\n\n* Set service properties *\n')
			
 
				+           self.set_service_properties(table_service)
			
 
				+        
			
 
				+           print('\n\n* Set Cors rules *\n')
			
 
				+           self.set_cors_rules(table_service)
			
 
				+        
			
 
				+           print('\n\n* ACL operations *\n')
			
 
				+           self.table_acl_operations(table_service)
			
 
				+        
			
 
				+        if (config.IS_EMULATED):
			
 
				+            print('\n\n* Shared Access Signature is not supported in emulator *\n')
			
 
				+        else:
			
 
				+            print('\n\n* SAS operations *\n')
			
 
				+            self.table_operations_with_sas(account)
			
 
				+
			
 
				+        print('\nAzure Storage Advanced Table samples - Completed.\n')
			
 
				+
			
 
				+    # Manage tables including creating, listing and deleting
			
 
				+    def list_tables(self, table_service):
			
 
				+        table_prefix = 'table' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:        
			
 
				+            # Create tables
			
 
				+            for i in range(5):
			
 
				+                table_name = table_prefix + str(i)
			
 
				+                print('1. Create a table with name - ' + table_name)
			
 
				+                table_service.create_table(table_name)
			
 
				+            
			
 
				+            # List all the tables 
			
 
				+            print('2. List tables')
			
 
				+            tables = table_service.list_tables()
			
 
				+            for table in tables:
			
 
				+                print('\Table Name: ' + table.name)
			
 
				+
			
 
				+        finally:
			
 
				+            # Delete the tables
			
 
				+            print("3. Delete Tables")
			
 
				+            for i in range(5):
			
 
				+                table_name = table_prefix + str(i)
			
 
				+                if(table_service.exists(table_name)):
			
 
				+                    table_service.delete_table(table_name)
			
 
				+            
			
 
				+        print("List tables sample completed")
			
 
				+    
			
 
				+    # Manage properties of the Table service, including logging and metrics settings, and the default service version.
			
 
				+    def set_service_properties(self, table_service):
			
 
				+        print('1. Get Table service properties')
			
 
				+        props = table_service.get_table_service_properties()
			
 
				+
			
 
				+        retention = RetentionPolicy(enabled=True, days=5)
			
 
				+        logging = Logging(delete=True, read=False, write=True, retention_policy=retention)
			
 
				+        hour_metrics = Metrics(enabled=True, include_apis=True, retention_policy=retention)
			
 
				+        minute_metrics = Metrics(enabled=False)
			
 
				+
			
 
				+        try:
			
 
				+            print('2. Ovewrite Table service properties')
			
 
				+            table_service.set_table_service_properties(logging=logging, hour_metrics=hour_metrics, minute_metrics=minute_metrics)
			
 
				+
			
 
				+        finally:
			
 
				+            print('3. Revert Table service properties back to the original ones')
			
 
				+            table_service.set_table_service_properties(logging=props.logging, hour_metrics=props.hour_metrics, minute_metrics=props.minute_metrics)
			
 
				+
			
 
				+        print('4. Set Table service properties completed')
			
 
				+    
			
 
				+    # Manage CORS rules on the table service
			
 
				+    def set_cors_rules(self, table_service):
			
 
				+        cors_rule = CorsRule(
			
 
				+            allowed_origins=['*'], 
			
 
				+            allowed_methods=['POST', 'GET'],
			
 
				+            allowed_headers=['*'],
			
 
				+            exposed_headers=['*'],
			
 
				+            max_age_in_seconds=3600)
			
 
				+        
			
 
				+        print('1. Get Cors Rules')
			
 
				+        original_cors_rules = table_service.get_table_service_properties().cors
			
 
				+
			
 
				+        try:        
			
 
				+            print('2. Overwrite Cors Rules')
			
 
				+            table_service.set_table_service_properties(cors=[cors_rule])
			
 
				+
			
 
				+        finally:
			
 
				+            #reverting cors rules back to the original ones
			
 
				+            print('3. Revert Cors Rules back the original ones')
			
 
				+            table_service.set_table_service_properties(cors=original_cors_rules)
			
 
				+        
			
 
				+        print("CORS sample completed")
			
 
				+
			
 
				+    # Manage table access policy
			
 
				+    def table_acl_operations(self, table_service):
			
 
				+        table_name = 'acltable' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:        
			
 
				+            print('1. Create a table with name - ' + table_name)
			
 
				+            table_service.create_table(table_name)
			
 
				+                
			
 
				+            print('2. Set access policy for table')
			
 
				+            access_policy = AccessPolicy(permission=TablePermissions.QUERY,
			
 
				+                                        expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+            identifiers = {'id': access_policy}
			
 
				+            table_service.set_table_acl(table_name, identifiers)
			
 
				+
			
 
				+            print('3. Wait 30 seconds for acl to propagate')
			
 
				+            time.sleep(30)
			
 
				+
			
 
				+            print('4. Get access policy from table')
			
 
				+            acl = table_service.get_table_acl(table_name)
			
 
				+
			
 
				+            print('5. Clear access policy in table')
			
 
				+            table_service.set_table_acl(table_name)
			
 
				+
			
 
				+        finally:
			
 
				+            print('5. Delete table')
			
 
				+            if(table_service.exists(table_name)):
			
 
				+                table_service.delete_table(table_name)
			
 
				+            
			
 
				+        print("Table ACL operations sample completed")
			
 
				+    
			
 
				+    # Manage shared access signature on a table
			
 
				+    def table_operations_with_sas(self, account):
			
 
				+        table_name = 'sastable' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        try:
			
 
				+            # Create a Table Service object
			
 
				+            table_service = account.create_table_service()
			
 
				+            
			
 
				+            print('1. Create table with name - ' + table_name)
			
 
				+            table_service.create_table(table_name)
			
 
				+            
			
 
				+            # Create a Shared Access Signature for the table
			
 
				+            print('2. Get sas for table')
			
 
				+            
			
 
				+            table_sas = table_service.generate_table_shared_access_signature(
			
 
				+                table_name, 
			
 
				+                TablePermissions.QUERY + TablePermissions.ADD + TablePermissions.UPDATE + TablePermissions.DELETE, 
			
 
				+                datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+
			
 
				+            shared_account = TableStorageAccount(account_name=account.account_name, sas_token=table_sas, endpoint_suffix=account.endpoint_suffix)
			
 
				+            shared_table_service = shared_account.create_table_service()
			
 
				+
			
 
				+            # Create a sample entity to insert into the table
			
 
				+            customer = {'PartitionKey': 'Harp', 'RowKey': '1', 'email' : 'harp@contoso.com', 'phone' : '555-555-5555'}
			
 
				+
			
 
				+            # Insert the entity into the table
			
 
				+            print('3. Insert new entity into table with sas - ' + table_name)
			
 
				+            shared_table_service.insert_entity(table_name, customer)
			
 
				+            
			
 
				+            # Demonstrate how to query the entity
			
 
				+            print('4. Read the inserted entity with sas.')
			
 
				+            entity = shared_table_service.get_entity(table_name, 'Harp', '1')
			
 
				+            
			
 
				+            print(entity['email'])
			
 
				+            print(entity['phone'])
			
 
				+
			
 
				+            # Demonstrate how to update the entity by changing the phone number
			
 
				+            print('5. Update an existing entity by changing the phone number with sas')
			
 
				+            customer = {'PartitionKey': 'Harp', 'RowKey': '1', 'email' : 'harp@contoso.com', 'phone' : '425-123-1234'}
			
 
				+            shared_table_service.update_entity(table_name, customer)
			
 
				+
			
 
				+            # Demonstrate how to delete an entity
			
 
				+            print('6. Delete the entity with sas')
			
 
				+            shared_table_service.delete_entity(table_name, 'Harp', '1')
			
 
				+
			
 
				+        finally:
			
 
				+            print('7. Delete table')
			
 
				+            if(table_service.exists(table_name)):
			
 
				+                table_service.delete_table(table_name)
			
 
				+            
			
 
				+        print("Table operations with sas completed")
			
--- a/data/purposeCombined/Azure/AddUp/table_basic_samples.py
+++ b/data/purposeCombined/Azure/AddUp/table_basic_samples.py
@@ -0,0 +1,96 @@
 
				+#-------------------------------------------------------------------------
			
 
				+# Microsoft Developer & Platform Evangelism
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, 
			
 
				+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
			
 
				+# OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+# The example companies, organizations, products, domain names,
			
 
				+# e-mail addresses, logos, people, places, and events depicted
			
 
				+# herein are fictitious. No association with any real company,
			
 
				+# organization, product, domain name, email address, logo, person,
			
 
				+# places, or events is intended or should be inferred.
			
 
				+#--------------------------------------------------------------------------
			
 
				+import config
			
 
				+from random_data import RandomData
			
 
				+from azure.storage import CloudStorageAccount
			
 
				+from azure.storage.table import TableService, Entity
			
 
				+
			
 
				+#
			
 
				+# Azure Table Service Sample - Demonstrate how to perform common tasks using the Microsoft Azure Table Service
			
 
				+# including creating a table, CRUD operations and different querying techniques.
			
 
				+#
			
 
				+# Documentation References:
			
 
				+#  - What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/
			
 
				+#  - Getting Started with Tables - https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-table-storage/
			
 
				+#  - Table Service Concepts - http://msdn.microsoft.com/en-us/library/dd179463.aspx
			
 
				+#  - Table Service REST API - http://msdn.microsoft.com/en-us/library/dd179423.aspx
			
 
				+#  - Table Service Python API - http://azure.github.io/azure-storage-ruby/
			
 
				+#  - Storage Emulator - http://azure.microsoft.com/en-us/documentation/articles/storage-use-emulator/
			
 
				+#
			
 
				+class TableBasicSamples():
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.random_data = RandomData()
			
 
				+
			
 
				+    # Runs all samples for Azure Storage Table service.
			
 
				+    def run_all_samples(self, account):
			
 
				+        print('Azure Storage Basic Table samples - Starting.')
			
 
				+        table_name = 'tablebasics' + self.random_data.get_random_name(6)
			
 
				+        table_service = None
			
 
				+        try:
			
 
				+            table_service = account.create_table_service()
			
 
				+
			
 
				+            # Create a new table
			
 
				+            print('Create a table with name - ' + table_name)
			
 
				+
			
 
				+            try:
			
 
				+                table_service.create_table(table_name)
			
 
				+            except Exception as err:
			
 
				+                print('Error creating table, ' + table_name + 'check if it already exists')
			
 
				+ 
			
 
				+            # Create a sample entity to insert into the table
			
 
				+            customer = {'PartitionKey': 'Harp', 'RowKey': '1', 'email' : 'harp@contoso.com', 'phone' : '555-555-5555'}
			
 
				+
			
 
				+            # Insert the entity into the table
			
 
				+            print('Inserting a new entity into table - ' + table_name)
			
 
				+            table_service.insert_entity(table_name, customer)
			
 
				+            print('Successfully inserted the new entity')
			
 
				+
			
 
				+            # Demonstrate how to query the entity
			
 
				+            print('Read the inserted entity.')
			
 
				+            entity = table_service.get_entity(table_name, 'Harp', '1')
			
 
				+            print(entity['email'])
			
 
				+            print(entity['phone'])
			
 
				+
			
 
				+            # Demonstrate how to update the entity by changing the phone number
			
 
				+            print('Update an existing entity by changing the phone number')
			
 
				+            customer = {'PartitionKey': 'Harp', 'RowKey': '1', 'email' : 'harp@contoso.com', 'phone' : '425-123-1234'}
			
 
				+            table_service.update_entity(table_name, customer)
			
 
				+
			
 
				+            # Demonstrate how to query the updated entity, filter the results with a filter query and select only the value in the phone column
			
 
				+            print('Read the updated entity with a filter query')
			
 
				+            entities = table_service.query_entities(table_name, filter="PartitionKey eq 'Harp'", select='phone')
			
 
				+            for entity in entities:
			
 
				+                print(entity['phone'])
			
 
				+
			
 
				+            # Demonstrate how to delete an entity
			
 
				+            print('Delete the entity')
			
 
				+            table_service.delete_entity(table_name, 'Harp', '1')
			
 
				+            print('Successfully deleted the entity')
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if (config.IS_EMULATED):
			
 
				+                print('Error occurred in the sample. If you are using the emulator, please make sure the emulator is running.', e)
			
 
				+            else: 
			
 
				+                print('Error occurred in the sample. Please make sure the account name and key are correct.', e)
			
 
				+        finally:
			
 
				+            # Demonstrate deleting the table, if you don't want to have the table deleted comment the below block of code
			
 
				+            print('Deleting the table.')
			
 
				+            if(table_service.exists(table_name)):
			
 
				+                table_service.delete_table(table_name)
			
 
				+            print('Successfully deleted the table')
			
 
				+
			
 
				+        print('\nAzure Storage Basic Table samples - Completed.\n')
			
--- a/data/purposeCombined/Azure/AzureStorage
+++ b/data/purposeCombined/Azure/AzureStorage
@@ -0,0 +1 @@
 
				+Subproject commit ac4dbd83e307a5b8d3fd3b77103ec837b821c564
			
--- a/data/purposeCombined/Azure/DLfile.py
+++ b/data/purposeCombined/Azure/DLfile.py
@@ -0,0 +1,125 @@
 
				+from azure.datalake.store import core, lib
			
 
				+import config
			
 
				+
			
 
				+import sys, io
			
 
				+import schedule, threading, time
			
 
				+
			
 
				+from datetime import datetime
			
 
				+
			
 
				+from os import listdir
			
 
				+from os.path import isfile, join
			
 
				+
			
 
				+
			
 
				+import glob
			
 
				+
			
 
				+
			
 
				+def run_once_threaded(job_func):
			
 
				+    job_thread = threading.Thread(target=job_func)
			
 
				+    job_thread.start()
			
 
				+    return schedule.CancelJob
			
 
				+
			
 
				+def run_threaded(job_func):
			
 
				+    job_thread = threading.Thread(target=job_func)
			
 
				+    job_thread.start()
			
 
				+    
			
 
				+
			
 
				+local_upload_folder_path = "LOCAL_FOLDER_PATH"
			
 
				+adls_upload_folder_path = "ADLS_FOLDER_PATH"
			
 
				+
			
 
				+
			
 
				+orginal_stdout = sys.stdout
			
 
				+
			
 
				+buf = io.StringIO()
			
 
				+sys.stdout = buf
			
 
				+adlCreds = -1
			
 
				+
			
 
				+uploaded_files = False
			
 
				+
			
 
				+def postToTeams():
			
 
				+ output = buf.getvalue()
			
 
				+ if output == "":
			
 
				+  return
			
 
				+ orginal_stdout.write(output)
			
 
				+
			
 
				+  
			
 
				+ now = datetime.now()
			
 
				+ current_time = now.strftime("%H:%M:%S")
			
 
				+ 
			
 
				+ config.sendToTeams("{}<br>{}".format(current_time, output))
			
 
				+ 
			
 
				+ buf.truncate(0)
			
 
				+ buf.seek(0)
			
 
				+ 
			
 
				+def authenticate():
			
 
				+ global adlCreds
			
 
				+ adlCreds = lib.auth(config.azure_tenant_id)
			
 
				+
			
 
				+
			
 
				+def authenticated():
			
 
				+ if adlCreds ==  -1:
			
 
				+  return
			
 
				+  
			
 
				+#  print("Authentication sucess!")
			
 
				+  
			
 
				+ run_once_threaded(upload_files)
			
 
				+ 
			
 
				+ return schedule.CancelJob
			
 
				+
			
 
				+ 
			
 
				+def upload_files():
			
 
				+ adl = core.AzureDLFileSystem(adlCreds, store_name=config.store_name)
			
 
				+ uploadedFolders = adl.ls(adls_upload_folder_path)
			
 
				+ 
			
 
				+ uploadedFolders = set([folder.replace(adls_upload_folder_path[1:], "")+"/" for folder in uploadedFolders])
			
 
				+ 
			
 
				+ local_folders = glob.glob(local_upload_folder_path+"*") # * means all if need specific format then *.csv
			
 
				+ local_folders = set([d.replace(local_upload_folder_path, "")+"/" for d in local_folders])
			
 
				+
			
 
				+ to_upload_folders = local_folders.difference(uploadedFolders)
			
 
				+
			
 
				+ folder_names = sorted([d.replace(local_upload_folder_path, "") for d in to_upload_folders])
			
 
				+
			
 
				+ files = []
			
 
				+ for folder in folder_names:
			
 
				+  path = local_upload_folder_path+folder
			
 
				+  for f in listdir(path):
			
 
				+   if isfile(join(path, f)):
			
 
				+    files.append(folder+f)
			
 
				+
			
 
				+
			
 
				+ print("Uploading the following folders:<br>{}<br>Total number of files to upload:<br>{}".format(", ". join(folder_names), len(files)))
			
 
				+ 
			
 
				+
			
 
				+ for f in files:
			
 
				+  adl.put(local_upload_folder_path+f, adls_upload_folder_path+f)
			
 
				+    
			
 
				+
			
 
				+ print("Upload finished.")
			
 
				+ time.sleep(2)
			
 
				+ global uploaded_files
			
 
				+ uploaded_files = True
			
 
				+
			
 
				+
			
 
				+def exit_program():
			
 
				+ if uploaded_files == True:
			
 
				+  exit()
			
 
				+
			
 
				+schedule.every(2).seconds.do(run_threaded, postToTeams)
			
 
				+schedule.every().seconds.do(run_once_threaded, authenticate)
			
 
				+schedule.every().seconds.do(authenticated)
			
 
				+schedule.every().seconds.do(exit_program)
			
 
				+
			
 
				+
			
 
				+while 1:
			
 
				+    schedule.run_pending()
			
 
				+    time.sleep(1) 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
 
				+ 
			
--- a/data/purposeCombined/Azure/azure-multiapi-storage-python
+++ b/data/purposeCombined/Azure/azure-multiapi-storage-python
@@ -0,0 +1 @@
 
				+Subproject commit dc0e7dc1066ca4cd2d6006a5bccd7ec37521ec1c
			
--- a/data/purposeCombined/Azure/blob-adapter.py
+++ b/data/purposeCombined/Azure/blob-adapter.py
@@ -0,0 +1,64 @@
 
				+import configparser
			
 
				+from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
			
 
				+from azure.core.exceptions import HttpResponseError, ResourceExistsError
			
 
				+from flask import jsonify
			
 
				+
			
 
				+class AzureBlobAdapter:
			
 
				+    FILE_PREFIX = 'IN_CARE'
			
 
				+    blob_service_client: BlobServiceClient
			
 
				+    blob_client: BlobClient
			
 
				+    container_client: ContainerClient
			
 
				+    configs = configparser.ConfigParser()
			
 
				+    configs.read('azure_blob.cfg')
			
 
				+
			
 
				+    # init method or constructor
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        connection_string = self.get_config('connection_string')
			
 
				+        print("Azure Blob Storage v" + __version__ +
			
 
				+              " - Blob Python libs")
			
 
				+        self.blob_service_client = BlobServiceClient.from_connection_string(
			
 
				+            connection_string)
			
 
				+
			
 
				+    def upload(self, file_dict):
			
 
				+        upload_response = {}
			
 
				+        for key in file_dict:
			
 
				+            print("File Dict Key: [{}] value is: {}".format(key, file_dict[key]))
			
 
				+            print("\nUploading to Azure Storage as blob:\n\t" + key)
			
 
				+
			
 
				+            self.blob_client = self.blob_service_client.get_blob_client(container=self.get_config('container_name'), blob=key)
			
 
				+            with open(file_dict[key], "rb") as data:
			
 
				+                try:
			
 
				+                    self.blob_client.upload_blob(data)
			
 
				+                    print('File: Uploaded Successfully: {}'.format(key))
			
 
				+                    upload_response[key] = 'Successfully Uploaded'
			
 
				+                except ResourceExistsError:
			
 
				+                    print('File: NOT Uploaded Successfully: {}'.format(key))
			
 
				+                    upload_response[key] = 'This Resource already exists'
			
 
				+                    upload_response['Partial'] = True
			
 
				+                    print('This Resource already exists')
			
 
				+                    # return 'This Resource already exists'
			
 
				+        print("Before Returning Response:")
			
 
				+        print(jsonify(upload_response))
			
 
				+        print("---------------")
			
 
				+        return upload_response
			
 
				+
			
 
				+    def get_blob_client(self, blob_name):
			
 
				+        self.blob_client = self.blob_service_client.get_blob_client(
			
 
				+            container=self.get_config('container_name'), blob=blob_name)
			
 
				+        return self.blob_client
			
 
				+
			
 
				+    def list_blobs(self):
			
 
				+        print("\nList blobs in the container")
			
 
				+        self.container_client = self.blob_service_client.get_container_client(
			
 
				+            container=self.get_config('container_name'))
			
 
				+        blob_list = self.container_client.list_blobs()
			
 
				+        blobs = []
			
 
				+        for blob in blob_list:
			
 
				+            # print("\t Blob name: " + blob.name)
			
 
				+            blobs.append(blob.name)
			
 
				+        return blobs
			
 
				+
			
 
				+    def get_config(self, app_property):
			
 
				+        config_value = self.configs['azure_blob_config'][app_property]
			
 
				+        return config_value
			
--- a/data/purposeCombined/Azure/blob-permission.py
+++ b/data/purposeCombined/Azure/blob-permission.py
@@ -0,0 +1,98 @@
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+from azure.storage.blob import BlobSasPermissions, generate_blob_sas
			
 
				+
			
 
				+from azurebatchload.checks import Checks
			
 
				+
			
 
				+
			
 
				+class Base(Checks):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        destination,
			
 
				+        folder,
			
 
				+        extension=None,
			
 
				+        modified_since=None,
			
 
				+        method="batch",
			
 
				+        list_files=None,
			
 
				+        expiry_download_links=7,
			
 
				+    ):
			
 
				+        super().__init__(directory=folder)
			
 
				+
			
 
				+        self.destination = destination
			
 
				+        self.folder = folder
			
 
				+        self.extension = extension
			
 
				+        self.modified_since = modified_since
			
 
				+        if not self._check_azure_cli_installed():
			
 
				+            self.method = "single"
			
 
				+        else:
			
 
				+            self.method = method
			
 
				+        self.list_files = list_files
			
 
				+        credentials = self._check_connection_credentials()
			
 
				+        self.connection_string = credentials[0]
			
 
				+        self.account_name = credentials[1]
			
 
				+        self.account_key = credentials[2]
			
 
				+        self.expiry_download_links = expiry_download_links
			
 
				+
			
 
				+    def checks(self):
			
 
				+        allowed_methods = ("batch", "single")
			
 
				+        if self.method not in allowed_methods:
			
 
				+            raise ValueError(f"Method {self.method} is not a valid method. Choose from {' or '.join(allowed_methods)}.")
			
 
				+
			
 
				+        if self.list_files and self.method == "batch":
			
 
				+            raise ValueError("list_files is only allowed with method='single'.")
			
 
				+
			
 
				+        if self.list_files and not isinstance(self.list_files, list):
			
 
				+            raise ValueError(f"Argument list_files was set, but is not of type list, but type {type(self.list_files)}")
			
 
				+
			
 
				+    def create_blob_link(self, blob_folder, blob_name) -> str:
			
 
				+        if blob_folder:
			
 
				+            full_path_blob = f"{blob_folder}/{blob_name}"
			
 
				+        else:
			
 
				+            full_path_blob = blob_name
			
 
				+        url = f"https://{self.account_name}.blob.core.windows.net/{self.destination}/{full_path_blob}"
			
 
				+        sas_token = generate_blob_sas(
			
 
				+            account_name=self.account_name,
			
 
				+            account_key=self.account_key,
			
 
				+            container_name=self.destination,
			
 
				+            blob_name=full_path_blob,
			
 
				+            permission=BlobSasPermissions(read=True, delete_previous_version=False),
			
 
				+            expiry=datetime.utcnow() + timedelta(days=self.expiry_download_links),
			
 
				+        )
			
 
				+
			
 
				+        url_with_sas = f"{url}?{sas_token}"
			
 
				+        return url_with_sas
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def create_not_case_sensitive_extension(extension):
			
 
				+        """
			
 
				+        We create in-case sensitive fnmatch
			
 
				+        .pdf -> .[Pp][Dd][Ff]
			
 
				+        .csv -> .[Cc][Ss][Vv]
			
 
				+        """
			
 
				+        new_extension = ""
			
 
				+        for letter in extension:
			
 
				+            if not letter.isalpha():
			
 
				+                new_extension += letter
			
 
				+            else:
			
 
				+                new_extension += f"[{letter.upper()}{letter}]"
			
 
				+
			
 
				+        if not new_extension.startswith("*"):
			
 
				+            new_extension = "*" + new_extension
			
 
				+
			
 
				+        return new_extension
			
 
				+
			
 
				+    def define_pattern(self):
			
 
				+        self.extension = self.create_not_case_sensitive_extension(self.extension)
			
 
				+        if self.folder and not self.extension:
			
 
				+            if self.folder.endswith("/"):
			
 
				+                pattern = self.folder + "*"
			
 
				+            else:
			
 
				+                pattern = self.folder + "/*"
			
 
				+        elif self.folder and self.extension:
			
 
				+            pattern = self.folder.rstrip("/") + "/" + "*" + self.extension
			
 
				+        elif not self.folder and self.extension:
			
 
				+            pattern = "*" + self.extension
			
 
				+        else:
			
 
				+            pattern = None
			
 
				+
			
 
				+        return pattern
			
--- a/data/purposeCombined/Azure/blob-upload-1.py
+++ b/data/purposeCombined/Azure/blob-upload-1.py
@@ -0,0 +1,101 @@
 
				+import logging
			
 
				+import os
			
 
				+
			
 
				+from azure.storage.blob import BlobServiceClient
			
 
				+
			
 
				+from azurebatchload.core import Base
			
 
				+
			
 
				+
			
 
				+class Upload(Base):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        destination,
			
 
				+        source,
			
 
				+        folder=None,
			
 
				+        extension=None,
			
 
				+        method="batch",
			
 
				+        modified_since=None,
			
 
				+        overwrite=False,
			
 
				+        list_files=None,
			
 
				+        create_download_links=False,
			
 
				+        expiry_download_links=7,
			
 
				+    ):
			
 
				+        super(Upload, self).__init__(
			
 
				+            destination=destination,
			
 
				+            folder=source,
			
 
				+            extension=extension,
			
 
				+            modified_since=modified_since,
			
 
				+            method=method,
			
 
				+            list_files=list_files,
			
 
				+            expiry_download_links=expiry_download_links,
			
 
				+        )
			
 
				+        self.blob_folder = folder
			
 
				+        self.overwrite = overwrite
			
 
				+        self.create_download_links = create_download_links
			
 
				+
			
 
				+    def upload_batch(self):
			
 
				+        cmd = f"az storage fs directory upload " f"-f {self.destination} " f"-s {self.folder} -r"
			
 
				+
			
 
				+        non_default = {"-d": self.blob_folder, "--connection-string": self.connection_string}
			
 
				+
			
 
				+        for flag, value in non_default.items():
			
 
				+            if value:
			
 
				+                cmd = f"{cmd} {flag} '{value}'"
			
 
				+
			
 
				+        os.system(cmd)
			
 
				+
			
 
				+    def upload_single(self):
			
 
				+        blob_service_client = BlobServiceClient.from_connection_string(self.connection_string)
			
 
				+        download_links = {}
			
 
				+
			
 
				+        for root, dirs, files in os.walk(self.folder):
			
 
				+            for file in files:
			
 
				+
			
 
				+                full_path = os.path.join(root, file)
			
 
				+
			
 
				+                # ignore hidden files
			
 
				+                if file.startswith("."):
			
 
				+                    continue
			
 
				+
			
 
				+                # if list_files is given, only upload matched files
			
 
				+                if self.list_files and file not in self.list_files:
			
 
				+                    continue
			
 
				+
			
 
				+                # if extension is given only upload if extension is matched
			
 
				+                if self.extension and os.path.isfile(full_path) and not file.lower().endswith(self.extension.lower()):
			
 
				+                    continue
			
 
				+
			
 
				+                blob_folder = root.replace(self.folder, "").lstrip("/")
			
 
				+
			
 
				+                if self.blob_folder:
			
 
				+                    # we only want to append blob_folder if it actually is a path or folder
			
 
				+                    # blob_folder can be empty string ""
			
 
				+                    if blob_folder:
			
 
				+                        blob_folder = os.path.join(self.blob_folder, blob_folder)
			
 
				+                    else:
			
 
				+                        blob_folder = self.blob_folder
			
 
				+
			
 
				+                # if no folder is given, just upload to the container root path
			
 
				+                if not blob_folder:
			
 
				+                    container = self.destination
			
 
				+                else:
			
 
				+                    container = os.path.join(self.destination, blob_folder)
			
 
				+                container_client = blob_service_client.get_container_client(container=container)
			
 
				+
			
 
				+                with open(full_path, "rb") as data:
			
 
				+                    logging.debug(f"Uploading blob {full_path}")
			
 
				+                    container_client.upload_blob(data=data, name=file, overwrite=self.overwrite)
			
 
				+
			
 
				+                if self.create_download_links:
			
 
				+                    download_links[file] = self.create_blob_link(blob_folder=blob_folder, blob_name=file)
			
 
				+
			
 
				+        return download_links
			
 
				+
			
 
				+    def upload(self):
			
 
				+        self.checks()
			
 
				+
			
 
				+        logging.info(f"Uploading to container {self.destination} with method = '{self.method}'.")
			
 
				+        if self.method == "batch":
			
 
				+            return self.upload_batch()
			
 
				+        else:
			
 
				+            return self.upload_single()
			
--- a/data/purposeCombined/Azure/blob-upload-2.py
+++ b/data/purposeCombined/Azure/blob-upload-2.py
@@ -0,0 +1,81 @@
 
				+import requests
			
 
				+from bs4 import BeautifulSoup as bs
			
 
				+import os
			
 
				+from azure.storage.blob import BlobServiceClient, BlobClient
			
 
				+from azure.storage.blob import ContentSettings, ContainerClient
			
 
				+
			
 
				+#Your Connexion String
			
 
				+MY_CONNECTION_STRING = "DefaultEndpointsProtocol************************"
			
 
				+#Your Container Name
			
 
				+MY_IMAGE_CONTAINER = "picture"
			
 
				+#Your local path
			
 
				+LOCAL_IMAGE_PATH = "..\Picture"
			
 
				+#change the url to the one you want to scrape
			
 
				+URL = 'WebSiteURL'
			
 
				+
			
 
				+class AzureBlobStorage:
			
 
				+    def Scrapp(self):
			
 
				+        #create folder with the picture if it doesn't exist
			
 
				+        if not os.path.exists('.\Picture'):
			
 
				+            os.mkdir('.\Picture')
			
 
				+        os.chdir('.\Picture')
			
 
				+        #Change the number to begin where you want to start
			
 
				+        page_begin = 1
			
 
				+        #Change the number to the number of pages you want to scrape
			
 
				+        page_end = 230 + 1
			
 
				+
			
 
				+        #If you want to scrape only one page, change the page_end to page_begin or delete the loop
			
 
				+        for page in range(page_begin, page_end):
			
 
				+            req = requests.get(URL + str(page))
			
 
				+            soup = bs(req.text, 'html.parser')
			
 
				+            images = soup.find_all('img')
			
 
				+            for images in images:
			
 
				+                name = images['src']
			
 
				+                alpha = images['src']
			
 
				+                link = 'WebSiteURL' + alpha
			
 
				+                print(link)
			
 
				+                #replace the name of the photo it's better :))
			
 
				+                with open(name.replace(' ', '-').replace('/', '').replace('"', "'").replace('.jpg','') + '.jpg','wb') as f:
			
 
				+                    im = requests.get(link)
			
 
				+                    f.write(im.content)
			
 
				+                    #check the name on the terminal
			
 
				+                    print('Writing: ', name)
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        # Initialize the connection to Azure storage account
			
 
				+        self.blob_service_client = BlobServiceClient.from_connection_string(MY_CONNECTION_STRING)
			
 
				+
			
 
				+    def upload_all_images_in_folder(self):
			
 
				+        # Get all files with jpg extension and exclude directories
			
 
				+        all_file_names = [f for f in os.listdir(LOCAL_IMAGE_PATH)
			
 
				+                          if os.path.isfile(os.path.join(LOCAL_IMAGE_PATH, f)) and ".jpg" in f]
			
 
				+        # Upload each file
			
 
				+        for file_name in all_file_names:
			
 
				+            self.upload_image(file_name)
			
 
				+
			
 
				+    def upload_image(self, file_name):
			
 
				+        # Create blob with same name as local file name
			
 
				+        blob_client = self.blob_service_client.get_blob_client(container=MY_IMAGE_CONTAINER,
			
 
				+                                                               blob=file_name)
			
 
				+        # Get full path to the file
			
 
				+        upload_file_path = os.path.join(LOCAL_IMAGE_PATH, file_name)
			
 
				+        # Create blob on storage
			
 
				+        # Overwrite if it already exists!
			
 
				+        image_content_setting = ContentSettings(content_type='image/jpeg')
			
 
				+        print(f"uploading file - {file_name}")
			
 
				+        with open(upload_file_path, "rb") as data:
			
 
				+            blob_client.upload_blob(data, overwrite=True, content_settings=image_content_setting)
			
 
				+
			
 
				+    def upload_all_images_in_folder(self):
			
 
				+        # Get all files with jpg extension and exclude directories
			
 
				+        all_file_names = [f for f in os.listdir(LOCAL_IMAGE_PATH)
			
 
				+                          if os.path.isfile(os.path.join(LOCAL_IMAGE_PATH, f)) and ".jpg" in f]
			
 
				+        # Upload each file
			
 
				+        for file_name in all_file_names:
			
 
				+            self.upload_image(file_name)
			
 
				+if __name__=='__main__':
			
 
				+        
			
 
				+    # Initialize class and upload files
			
 
				+    azure_blob_file_uploader = AzureBlobStorage()
			
 
				+    azure_blob_file_uploader.Scrapp()
			
 
				+    azure_blob_file_uploader.upload_all_images_in_folder()
			
--- a/data/purposeCombined/Azure/blob-upload-3.py
+++ b/data/purposeCombined/Azure/blob-upload-3.py
@@ -0,0 +1,57 @@
 
				+from flask import Flask
			
 
				+from flask import jsonify
			
 
				+from flask import request
			
 
				+from werkzeug import secure_filename
			
 
				+from azure.storage.blob import BlockBlobService
			
 
				+import os
			
 
				+
			
 
				+
			
 
				+app = Flask(__name__, static_folder='static', static_url_path='')
			
 
				+
			
 
				+app.config['ALLOWED_EXTENSIONS'] = set(['txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'])
			
 
				+app.config['MAX_CONTENT_LENGTH'] = 1 * 1024 * 1024    # 1 Mb limit
			
 
				+app.config['AZURE_STORAGE_ACCOUNT'] = "flasktest"
			
 
				+app.config['AZURE_STORAGE_CONTAINER'] = "doc"
			
 
				+app.config['AZURE_STORAGE_KEY'] = os.environ['AZURE_STORAGE_KEY']
			
 
				+try:
			
 
				+    os.environ['FLASK_DEBUG']
			
 
				+    app.debug = True
			
 
				+except KeyError:
			
 
				+    app.debug = False
			
 
				+
			
 
				+
			
 
				+def allowed_file(filename):
			
 
				+    return '.' in filename and \
			
 
				+           filename.rsplit('.', 1)[1] in app.config['ALLOWED_EXTENSIONS']
			
 
				+
			
 
				+@app.route('/')
			
 
				+def root():
			
 
				+    return app.send_static_file('index.html')
			
 
				+
			
 
				+
			
 
				+# basedir = os.path.abspath(os.path.dirname(__file__))
			
 
				+
			
 
				+@app.route('/uploadajax', methods=['POST'])
			
 
				+def upldfile():
			
 
				+    if request.method == 'POST':
			
 
				+        file = request.files['file']
			
 
				+        if file and allowed_file(file.filename):
			
 
				+            filename = secure_filename(file.filename)
			
 
				+            app.logger.info('FileName: ' + filename)
			
 
				+            
			
 
				+            block_blob_service = BlockBlobService(account_name=app.config['AZURE_STORAGE_ACCOUNT'], account_key=app.config['AZURE_STORAGE_KEY'])
			
 
				+            block_blob_service.create_blob_from_bytes(
			
 
				+                'doc',
			
 
				+                filename,
			
 
				+                file.read())
			
 
				+            
			
 
				+#             updir = os.path.join(basedir, 'upload/')
			
 
				+#             file.save(os.path.join(updir, filename))
			
 
				+#             file_size = os.path.getsize(os.path.join(updir, filename))
			
 
				+            return jsonify(name=filename, url='https://'+app.config['AZURE_STORAGE_ACCOUNT']+'.blob.core.windows.net/' \
			
 
				+                           +app.config['AZURE_STORAGE_CONTAINER']+'/'+filename)
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+ app.run()
			
--- a/data/purposeCombined/Azure/blob-upload-4.py
+++ b/data/purposeCombined/Azure/blob-upload-4.py
@@ -0,0 +1,67 @@
 
				+import os, uuid
			
 
				+from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
			
 
				+import argparse
			
 
				+from argparse import ArgumentParser
			
 
				+
			
 
				+parser = ArgumentParser()
			
 
				+parser.add_argument('--connect_str', default='', type=str)
			
 
				+parser.add_argument('--container_name', default='', type=str)
			
 
				+parser.add_argument('--source', default='', type=str)
			
 
				+parser.add_argument('--target', default='', type=str)
			
 
				+parser.add_argument('--is_directory', default=False, action='store_true')
			
 
				+parser.add_argument('--download', default=False, action='store_true')
			
 
				+parser.add_argument('--upload', default=False, action='store_true')
			
 
				+arg = parser.parse_args()
			
 
				+
			
 
				+connect_str = arg.connect_str #Enter your connection string here! Refer to https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python?tabs=environment-variable-windows for more info
			
 
				+container_name = arg.container_name #Enter your continaer name from azure blob storage here!
			
 
				+blob_service_client = BlobServiceClient.from_connection_string(connect_str) # Create the BlobServiceClient object which will be used to create a container client
			
 
				+
			
 
				+def upload_file_to_blob(upload_file_path, target): #file path - >file path
			
 
				+    blob_client = blob_service_client.get_blob_client(container=container_name, blob=target)
			
 
				+    print("\nUploading to Azure Storage as blob:\n\t" + upload_file_path)
			
 
				+    with open(upload_file_path, "rb") as data:
			
 
				+        blob_client.upload_blob(data)
			
 
				+
			
 
				+def upload_directory_to_blob(upload_file_path, target): #directory name -> directory name
			
 
				+    print("\nUploading directory to Azure Storage as blob:\n\t" + upload_file_path)
			
 
				+    files = os.listdir(upload_file_path)
			
 
				+    for dir in files:
			
 
				+        file_name = upload_file_path + '/' + dir
			
 
				+        target_ = target+ '/' + dir
			
 
				+        blob_client = blob_service_client.get_blob_client(container=container_name, blob=target_)
			
 
				+        with open(file_name, "rb") as data:
			
 
				+            blob_client.upload_blob(data)
			
 
				+
			
 
				+def download_file_from_blob(source, download_file_path):
			
 
				+    blob_client = blob_service_client.get_blob_client(container=container_name, blob=source)
			
 
				+    print("\nDownloading blob to \n\t from container" + download_file_path)
			
 
				+
			
 
				+    with open(download_file_path, "wb") as download_file:
			
 
				+        download_file.write(blob_client.download_blob().readall())
			
 
				+
			
 
				+def download_directory_from_blob(source, download_directory_path):
			
 
				+    container_client = ContainerClient.from_connection_string(conn_str=connect_str, container_name=container_name)
			
 
				+    print(f"\nDownloading all blobs from the following directory {source} in container {container_name}")
			
 
				+    blob_list = container_client.list_blobs()
			
 
				+    for blob in blob_list:
			
 
				+        if source in blob.name:
			
 
				+            blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob.name)
			
 
				+            os.makedirs(os.path.dirname(blob.name), exist_ok=True)
			
 
				+            with open(blob.name, "wb") as download_file:
			
 
				+                download_file.write(blob_client.download_blob().readall())
			
 
				+
			
 
				+
			
 
				+if not arg.download and not arg.upload:
			
 
				+    raise Exception('Specificy either --upload or --download. Specify only one.')
			
 
				+
			
 
				+if arg.download: #downloading from source to target
			
 
				+    if not arg.is_directory:
			
 
				+        download_file_from_blob(arg.source, arg.target)
			
 
				+    else:
			
 
				+        download_directory_from_blob(arg.source, arg.target)
			
 
				+else: #Uploading source to target
			
 
				+    if not arg.is_directory:
			
 
				+        upload_file_to_blob(arg.source, arg.target)
			
 
				+    else:
			
 
				+        upload_directory_to_blob(arg.source, arg.target)
			
--- a/data/purposeCombined/Azure/blob-upload.py
+++ b/data/purposeCombined/Azure/blob-upload.py
@@ -0,0 +1,107 @@
 
				+# ----------------------------------------------------------------------------------
			
 
				+# MIT License
			
 
				+#
			
 
				+# Copyright(c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+# of this software and associated documentation files (the "Software"), to deal
			
 
				+# in the Software without restriction, including without limitation the rights
			
 
				+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+# copies of the Software, and to permit persons to whom the Software is
			
 
				+# furnished to do so, subject to the following conditions:
			
 
				+# ----------------------------------------------------------------------------------
			
 
				+# The above copyright notice and this permission notice shall be included in all
			
 
				+# copies or substantial portions of the Software.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+# SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+import os
			
 
				+import uuid
			
 
				+import sys
			
 
				+from azure.storage.blob import BlockBlobService, PublicAccess
			
 
				+
			
 
				+# ---------------------------------------------------------------------------------------------------------
			
 
				+# Method that creates a test file in the 'Sample' folder.
			
 
				+# This sample application creates a test file, uploads the test file to the Blob storage,
			
 
				+# lists the blobs in the container, and downloads the file with a new name.
			
 
				+# ---------------------------------------------------------------------------------------------------------
			
 
				+# Documentation References:
			
 
				+# Associated Article - https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python
			
 
				+# What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/
			
 
				+# Getting Started with Blobs-https://docs.microsoft.com/en-us/azure/storage/blobs/storage-python-how-to-use-blob-storage
			
 
				+# Blob Service Concepts - http://msdn.microsoft.com/en-us/library/dd179376.aspx
			
 
				+# Blob Service REST API - http://msdn.microsoft.com/en-us/library/dd135733.aspx
			
 
				+# ----------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+def run_sample():
			
 
				+    try:
			
 
				+        # Create the BlockBlobService that is used to call the Blob service for the storage account
			
 
				+        blob_service_client = BlockBlobService(
			
 
				+            account_name='accountname', account_key='accountkey')
			
 
				+
			
 
				+        # Create a container called 'quickstartblobs'.
			
 
				+        container_name = 'quickstartblobs'
			
 
				+        blob_service_client.create_container(container_name)
			
 
				+
			
 
				+        # Set the permission so the blobs are public.
			
 
				+        blob_service_client.set_container_acl(
			
 
				+            container_name, public_access=PublicAccess.Container)
			
 
				+
			
 
				+        # Create Sample folder if it not exists, and create a file in folder Sample to test the upload and download.
			
 
				+        local_path = os.path.expanduser("~/Sample")
			
 
				+        if not os.path.exists(local_path):
			
 
				+            os.makedirs(os.path.expanduser("~/Sample"))
			
 
				+        local_file_name = "QuickStart_" + str(uuid.uuid4()) + ".txt"
			
 
				+        full_path_to_file = os.path.join(local_path, local_file_name)
			
 
				+
			
 
				+        # Write text to the file.
			
 
				+        file = open(full_path_to_file,  'w')
			
 
				+        file.write("Hello, World!")
			
 
				+        file.close()
			
 
				+
			
 
				+        print("Temp file = " + full_path_to_file)
			
 
				+        print("\nUploading to Blob storage as blob" + local_file_name)
			
 
				+
			
 
				+        # Upload the created file, use local_file_name for the blob name
			
 
				+        blob_service_client.create_blob_from_path(
			
 
				+            container_name, local_file_name, full_path_to_file)
			
 
				+
			
 
				+        # List the blobs in the container
			
 
				+        print("\nList blobs in the container")
			
 
				+        generator = blob_service_client.list_blobs(container_name)
			
 
				+        for blob in generator:
			
 
				+            print("\t Blob name: " + blob.name)
			
 
				+
			
 
				+        # Download the blob(s).
			
 
				+        # Add '_DOWNLOADED' as prefix to '.txt' so you can see both files in Documents.
			
 
				+        full_path_to_file2 = os.path.join(local_path, str.replace(
			
 
				+            local_file_name ,'.txt', '_DOWNLOADED.txt'))
			
 
				+        print("\nDownloading blob to " + full_path_to_file2)
			
 
				+        blob_service_client.get_blob_to_path(
			
 
				+            container_name, local_file_name, full_path_to_file2)
			
 
				+
			
 
				+        sys.stdout.write("Sample finished running. When you hit <any key>, the sample will be deleted and the sample "
			
 
				+                         "application will exit.")
			
 
				+        sys.stdout.flush()
			
 
				+        input()
			
 
				+
			
 
				+        # Clean up resources. This includes the container and the temp files
			
 
				+        blob_service_client.delete_container(container_name)
			
 
				+        os.remove(full_path_to_file)
			
 
				+        os.remove(full_path_to_file2)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+
			
 
				+
			
 
				+# Main method.
			
 
				+if __name__ == '__main__':
			
 
				+    run_sample()
			
--- a/data/purposeCombined/Azure/django-blob.py
+++ b/data/purposeCombined/Azure/django-blob.py
@@ -0,0 +1,221 @@
 
				+import mimetypes
			
 
				+import datetime
			
 
				+
			
 
				+from azure.common import AzureMissingResourceHttpError
			
 
				+from azure.storage.blob import BlobService
			
 
				+
			
 
				+from django.core.files.storage import Storage
			
 
				+from django.conf import settings
			
 
				+
			
 
				+try:
			
 
				+    from django.utils.deconstruct import deconstructible
			
 
				+except ImportError:
			
 
				+    # Support for django 1.7 and below
			
 
				+    def deconstructible(func):
			
 
				+        return func
			
 
				+
			
 
				+
			
 
				+@deconstructible
			
 
				+class AzureStorage(Storage):
			
 
				+    """
			
 
				+    Custom file storage system for Azure
			
 
				+    """
			
 
				+
			
 
				+    container = settings.AZURE_STORAGE.get('CONTAINER')
			
 
				+    account_name = settings.AZURE_STORAGE.get('ACCOUNT_NAME')
			
 
				+    account_key = settings.AZURE_STORAGE.get('ACCOUNT_KEY')
			
 
				+    cdn_host = settings.AZURE_STORAGE.get('CDN_HOST')
			
 
				+    use_ssl = settings.AZURE_STORAGE.get('USE_SSL')
			
 
				+
			
 
				+    def __init__(self, account_name=None, account_key=None, container=None,
			
 
				+         use_ssl=None, cdn_host=None):
			
 
				+
			
 
				+        if account_name is not None:
			
 
				+            self.account_name = account_name
			
 
				+
			
 
				+        if account_key is not None:
			
 
				+            self.account_key = account_key
			
 
				+
			
 
				+        if container is not None:
			
 
				+            self.container = container
			
 
				+
			
 
				+        if use_ssl is not None:
			
 
				+            self.use_ssl = use_ssl
			
 
				+
			
 
				+        if cdn_host is not None:
			
 
				+            self.cdn_host = cdn_host
			
 
				+
			
 
				+    def __getstate__(self):
			
 
				+        return dict(
			
 
				+            account_name=self.account_name,
			
 
				+            account_key=self.account_key,
			
 
				+            container=self.container,
			
 
				+            cdn_host=self.cdn_host,
			
 
				+            use_ssl=self.use_ssl
			
 
				+        )
			
 
				+
			
 
				+    def _get_service(self):
			
 
				+        if not hasattr(self, '_blob_service'):
			
 
				+            self._blob_service = BlobService(
			
 
				+                account_name=self.account_name,
			
 
				+                account_key=self.account_key,
			
 
				+                protocol='https' if self.use_ssl else 'http'
			
 
				+            )
			
 
				+
			
 
				+        return self._blob_service
			
 
				+
			
 
				+    def _get_properties(self, name):
			
 
				+        return self._get_service().get_blob_properties(
			
 
				+            container_name=self.container,
			
 
				+            blob_name=name
			
 
				+        )
			
 
				+
			
 
				+    def _open(self, name, mode='rb'):
			
 
				+        """
			
 
				+        Return the AzureStorageFile.
			
 
				+        """
			
 
				+
			
 
				+        from django.core.files.base import ContentFile
			
 
				+
			
 
				+        contents = self._get_service().get_blob_to_bytes(
			
 
				+            container_name=self.container,
			
 
				+            blob_name=name
			
 
				+        )
			
 
				+
			
 
				+        return ContentFile(contents)
			
 
				+
			
 
				+    def _save(self, name, content):
			
 
				+        """
			
 
				+        Use the Azure Storage service to write ``content`` to a remote file
			
 
				+        (called ``name``).
			
 
				+        """
			
 
				+        
			
 
				+
			
 
				+        content.open()
			
 
				+
			
 
				+        content_type = None
			
 
				+
			
 
				+        if hasattr(content.file, 'content_type'):
			
 
				+            content_type = content.file.content_type
			
 
				+        else:
			
 
				+            content_type = mimetypes.guess_type(name)[0]
			
 
				+
			
 
				+        cache_control = self.get_cache_control(
			
 
				+            self.container,
			
 
				+            name,
			
 
				+            content_type
			
 
				+        )
			
 
				+
			
 
				+        self._get_service().put_block_blob_from_file(
			
 
				+            container_name=self.container,
			
 
				+            blob_name=name,
			
 
				+            stream=content,
			
 
				+            x_ms_blob_content_type=content_type,
			
 
				+            cache_control=cache_control,
			
 
				+            x_ms_blob_cache_control=cache_control
			
 
				+        )
			
 
				+
			
 
				+        content.close()
			
 
				+
			
 
				+        return name
			
 
				+
			
 
				+    def listdir(self, path):
			
 
				+        """
			
 
				+        Lists the contents of the specified path, returning a 2-tuple of lists;
			
 
				+        the first item being directories, the second item being files.
			
 
				+        """
			
 
				+
			
 
				+        files = []
			
 
				+
			
 
				+        if path and not path.endswith('/'):
			
 
				+            path = '%s/' % path
			
 
				+
			
 
				+        path_len = len(path)
			
 
				+
			
 
				+        if not path:
			
 
				+            path = None
			
 
				+
			
 
				+        blob_list = self._get_service().list_blobs(self.container, prefix=path)
			
 
				+
			
 
				+        for name in blob_list:
			
 
				+            files.append(name[path_len:])
			
 
				+
			
 
				+        return ([], files)
			
 
				+
			
 
				+    def exists(self, name):
			
 
				+        """
			
 
				+        Returns True if a file referenced by the given name already exists in
			
 
				+        the storage system, or False if the name is available for a new file.
			
 
				+        """
			
 
				+        try:
			
 
				+            self._get_properties(name)
			
 
				+
			
 
				+            return True
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            return False
			
 
				+
			
 
				+    def delete(self, name):
			
 
				+        """
			
 
				+        Deletes the file referenced by name.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            self._get_service().delete_blob(self.container, name)
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            pass
			
 
				+
			
 
				+    def get_cache_control(self, container, name, content_type):
			
 
				+        """
			
 
				+        Get the Cache-Control value for a blob, used when saving the blob on
			
 
				+        Azure.  Returns `None` by default to remain compatible with the
			
 
				+        default setting for the SDK.
			
 
				+        """
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				+    def size(self, name):
			
 
				+        """
			
 
				+        Returns the total size, in bytes, of the file referenced by name.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            properties = self._get_properties(name)
			
 
				+
			
 
				+            return int(properties['content-length'])
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            pass
			
 
				+
			
 
				+    def url(self, name):
			
 
				+        """
			
 
				+        Returns the URL where the contents of the file referenced by name can
			
 
				+        be accessed.
			
 
				+        """
			
 
				+
			
 
				+        blob_url_args = {
			
 
				+            'container_name': self.container,
			
 
				+            'blob_name': name,
			
 
				+        }
			
 
				+
			
 
				+        if self.cdn_host:
			
 
				+            # The account name should be built into the cdn hostname
			
 
				+            blob_url_args['account_name'] = ''
			
 
				+            blob_url_args['host_base'] = self.cdn_host
			
 
				+
			
 
				+        return self._get_service().make_blob_url(
			
 
				+            **blob_url_args
			
 
				+        )
			
 
				+
			
 
				+    def modified_time(self, name):
			
 
				+        """
			
 
				+        Returns a datetime object containing the last modified time.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            properties = self._get_properties(name)
			
 
				+
			
 
				+            return datetime.datetime.strptime(
			
 
				+                properties['last-modified'],
			
 
				+                '%a, %d %b %Y %H:%M:%S %Z'
			
 
				+            )
			
 
				+        except AzureMissingResourceHttpError:
			
 
				+            pass
			
--- a/data/purposeCombined/Azure/python-text-classification
+++ b/data/purposeCombined/Azure/python-text-classification
@@ -0,0 +1 @@
 
				+Subproject commit 8078e57805781f1453f1dd7ea84f8b93aa70cafa
			
--- a/data/purposeCombined/Azure/storage-blob.py
+++ b/data/purposeCombined/Azure/storage-blob.py
@@ -0,0 +1,555 @@
 
				+#----------------------------------------------------------------------------------
			
 
				+# Microsoft Developer & Platform Evangelism
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, 
			
 
				+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
			
 
				+# OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+# The example companies, organizations, products, domain names,
			
 
				+# e-mail addresses, logos, people, places, and events depicted
			
 
				+# herein are fictitious.  No association with any real company,
			
 
				+# organization, product, domain name, email address, logo, person,
			
 
				+# places, or events is intended or should be inferred.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+
			
 
				+import os
			
 
				+import config
			
 
				+from random_data import RandomData
			
 
				+import base64
			
 
				+import datetime
			
 
				+import time
			
 
				+from azure.storage import CloudStorageAccount, AccessPolicy
			
 
				+from azure.storage.blob import BlockBlobService, PageBlobService, AppendBlobService
			
 
				+from azure.storage.models import CorsRule, Logging, Metrics, RetentionPolicy, ResourceTypes, AccountPermissions
			
 
				+from azure.storage.blob.models import BlobBlock, ContainerPermissions, ContentSettings
			
 
				+#
			
 
				+# Azure Storage Blob Sample - Demonstrate how to use the Blob Storage service. 
			
 
				+# Blob storage stores unstructured data such as text, binary data, documents or media files. 
			
 
				+# Blobs can be accessed from anywhere in the world via HTTP or HTTPS. 
			
 
				+#
			
 
				+ 
			
 
				+# Documentation References: 
			
 
				+#  - What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/ 
			
 
				+#  - Getting Started with Blobs - https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/
			
 
				+#  - Blob Service Concepts - http://msdn.microsoft.com/en-us/library/dd179376.aspx 
			
 
				+#  - Blob Service REST API - http://msdn.microsoft.com/en-us/library/dd135733.aspx 
			
 
				+#  - Blob Service Python API - http://azure.github.io/azure-storage-python/ref/azure.storage.blob.html
			
 
				+#  - Storage Emulator - http://azure.microsoft.com/en-us/documentation/articles/storage-use-emulator/ 
			
 
				+#
			
 
				+class BlobAdvancedSamples():
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.random_data = RandomData()
			
 
				+
			
 
				+    # Runs all samples for Azure Storage Blob service.
			
 
				+    # Input Arguments:
			
 
				+    # account - CloudStorageAccount to use for running the samples
			
 
				+    def run_all_samples(self, account):
			
 
				+        print('\n\nAzure Storage Blob advanced sample - Starting.')
			
 
				+        
			
 
				+        try:
			
 
				+            print('\n\n* Container operations *\n')
			
 
				+            self.list_containers(account)
			
 
				+
			
 
				+            print('\n\n* Set CORS *\n')
			
 
				+            self.set_cors_rules(account)
			
 
				+
			
 
				+            print('\n\n* Container lease *\n')
			
 
				+            self.lease_container(account)
			
 
				+
			
 
				+            print('\n\n* Copy blob *\n')
			
 
				+            self.copy_blob(account)
			
 
				+            
			
 
				+            print('\n\n* Page blob operations *\n')
			
 
				+            self.page_blob_operations(account)
			
 
				+            
			
 
				+            print('\n\n* Block blob operations *\n')
			
 
				+            self.block_blob_operations(account)
			
 
				+
			
 
				+            print('\n\n* Properties and Metadata operations *\n')
			
 
				+            self.properties_and_metadata_operations(account)
			
 
				+            
			
 
				+            print('\n\n* Container ACL operations *\n')
			
 
				+            self.container_acl_operations(account)
			
 
				+
			
 
				+            print('\n\n* Blob lease *\n')
			
 
				+            self.lease_blob(account)  
			
 
				+            
			
 
				+            if (config.IS_EMULATED):
			
 
				+                print('\nShared Access Signature is not supported in emulator');
			
 
				+            else:
			
 
				+                print('\n\n* Container with SAS operations *\n')
			
 
				+                self.container_operations_with_sas(account)      
			
 
				+  
			
 
				+                print('\n\n* SAS with access policy *\n')
			
 
				+                self.sas_with_container_access_policy(account)
			
 
				+
			
 
				+                print('\n\n* Set blob service logging and metrics properties *\n')
			
 
				+                self.set_service_properties(account)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if (config.IS_EMULATED):
			
 
				+                print('Error occurred in the sample. If you are using the emulator, please make sure the emulator is running.', e)
			
 
				+            else: 
			
 
				+                print('Error occurred in the sample. Please make sure the account name and key are correct.', e)
			
 
				+
			
 
				+        finally:
			
 
				+            print('\nAzure Storage Blob advanced sample - Completed.\n')
			
 
				+
			
 
				+
			
 
				+    # Copy a source blob to a destination blob
			
 
				+    def copy_blob(self, account):
			
 
				+
			
 
				+        file_upload = "HelloWorld.png"
			
 
				+        container_name = 'blockblobcontainer' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+
			
 
				+        try:
			
 
				+            # Create a new container
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name)
			
 
				+                    
			
 
				+            # Upload file as a block blob
			
 
				+            print('2. Upload BlockBlob')
			
 
				+            #Get full path on drive to file_to_upload by joining the fully qualified directory name and file name on the local drive
			
 
				+            full_path_to_file = os.path.join(os.path.dirname(__file__), file_upload)
			
 
				+            blockblob_service.create_blob_from_path(container_name, file_upload, full_path_to_file)
			
 
				+
			
 
				+            target_blob = "target.png"
			
 
				+            blob_source_url = blockblob_service.make_blob_url(container_name, file_upload)
			
 
				+
			
 
				+            print('3. Copy blob')
			
 
				+            blockblob_service.copy_blob(container_name, target_blob, blob_source_url)
			
 
				+
			
 
				+            print('4. Get target blob')
			
 
				+            target_blob_properties = blockblob_service.get_blob_properties(container_name, target_blob)
			
 
				+
			
 
				+            print('5. Get copy properties')
			
 
				+            copy_properties = target_blob_properties.properties.copy
			
 
				+            
			
 
				+            print('Copy properties status: ' + copy_properties.status)
			
 
				+
			
 
				+            if(copy_properties.status == "pending"):
			
 
				+                print('6. Abort copy')
			
 
				+                blockblob_service.abort_copy_blob(container_name, blob_name, copy_properties.id)
			
 
				+        finally:
			
 
				+            # Delete the container
			
 
				+            print("7. Delete Container")
			
 
				+            if blockblob_service.exists(container_name):
			
 
				+                blockblob_service.delete_container(container_name)
			
 
				+
			
 
				+    def sas_with_container_access_policy(self, account):
			
 
				+        container_name = 'demosasblobcontainer' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+        
			
 
				+        try:
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name)
			
 
				+            
			
 
				+            print('2. Create blob "blo1" with text')
			
 
				+            blockblob_service.create_blob_from_text(container_name, 'blob1', b'hello world')
			
 
				+
			
 
				+            print('3. Set access policy for container')
			
 
				+            # Set access policy on container
			
 
				+            access_policy = AccessPolicy(permission=ContainerPermissions.READ,
			
 
				+                                        expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+            identifiers = {'id': access_policy}
			
 
				+            acl = blockblob_service.set_container_acl(container_name, identifiers)
			
 
				+
			
 
				+            # Wait 30 seconds for acl to propagate
			
 
				+            print('Wait 30 seconds for acl to propagate')
			
 
				+            time.sleep(30)
			
 
				+
			
 
				+            print('4. Get sas for access policy in container')
			
 
				+            # Indicates to use the access policy set on the container
			
 
				+            sas = blockblob_service.generate_container_shared_access_signature(
			
 
				+                container_name,
			
 
				+                id='id'
			
 
				+            )
			
 
				+
			
 
				+            print('5. Create blob service with sas')
			
 
				+            # Create a service and use the SAS
			
 
				+            shared_blockblob_service = BlockBlobService(
			
 
				+                account_name=account.account_name,
			
 
				+                sas_token=sas,
			
 
				+            )
			
 
				+
			
 
				+            print('6. Read blob content with sas')
			
 
				+            blob = shared_blockblob_service.get_blob_to_text(container_name, 'blob1')
			
 
				+            content = blob.content # hello world
			
 
				+        finally:
			
 
				+            print('7. Delete container')
			
 
				+            blockblob_service.delete_container(container_name)
			
 
				+        
			
 
				+        print("SAS with access policy sample completed")
			
 
				+        
			
 
				+    def container_operations_with_sas(self, account):
			
 
				+        container_name = 'demosasblobcontainer' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+        
			
 
				+        # Create a Shared Access Signature for the account
			
 
				+        print('1.Get account sas')
			
 
				+        
			
 
				+        account_sas = blockblob_service.generate_account_shared_access_signature(
			
 
				+            ResourceTypes.CONTAINER + ResourceTypes.OBJECT, 
			
 
				+            AccountPermissions.READ + AccountPermissions.WRITE + AccountPermissions.DELETE + AccountPermissions.LIST + AccountPermissions.CREATE, 
			
 
				+            datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+
			
 
				+        shared_account = CloudStorageAccount(account_name=account.account_name, sas_token=account_sas)
			
 
				+        shared_account_block_service = shared_account.create_block_blob_service()
			
 
				+
			
 
				+        try:
			
 
				+            print('2. Create container with account sas. Container name - ' + container_name)
			
 
				+            shared_account_block_service.create_container(container_name)
			
 
				+            
			
 
				+            # For the purposes of the demo, get a Container SAS
			
 
				+            # In a real-world application, the above Account SAS can be used
			
 
				+            print('3. Get container sas')
			
 
				+            container_sas = blockblob_service.generate_container_shared_access_signature(
			
 
				+                container_name, 
			
 
				+                ContainerPermissions.READ + ContainerPermissions.WRITE + ContainerPermissions.DELETE + ContainerPermissions.LIST, 
			
 
				+                datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+            
			
 
				+            shared_container_account = CloudStorageAccount(account_name=account.account_name, sas_token=container_sas)
			
 
				+            shared_container_block_service = shared_container_account.create_block_blob_service()
			
 
				+            
			
 
				+            print('4. Create blob with container sas')
			
 
				+            shared_container_block_service.create_blob_from_text(container_name, 'myblob', 'blob data')
			
 
				+            
			
 
				+            print('5. List blobs with container sas')
			
 
				+            blobs = shared_container_block_service.list_blobs(container_name)
			
 
				+            for blob in blobs:
			
 
				+                print('blob ' + blob.name)
			
 
				+            
			
 
				+            print('6. Delete blob with container sas')
			
 
				+            shared_container_block_service.delete_blob(container_name, 'myblob')
			
 
				+        finally:            
			
 
				+            print('7. Delete container')
			
 
				+            blockblob_service.delete_container(container_name)
			
 
				+            
			
 
				+        print("Containers Sas sample completed")
			
 
				+        
			
 
				+    def list_containers(self, account):
			
 
				+        
			
 
				+        container_prefix = 'blockblobcontainers' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+
			
 
				+        try:
			
 
				+            # Create containers
			
 
				+            for i in range(5):
			
 
				+                container_name = container_prefix + str(i)
			
 
				+                print('1. Create a container with name - ' + container_name)
			
 
				+                blockblob_service.create_container(container_name)
			
 
				+            
			
 
				+            # List all the blobs in the container 
			
 
				+            print('2. List containers with prefix ' + container_prefix)
			
 
				+            containers = blockblob_service.list_containers(container_prefix)
			
 
				+            for container in containers:
			
 
				+                print('\tContainer Name: ' + container.name)
			
 
				+        finally:
			
 
				+            # Delete the containers
			
 
				+            print("3. Delete Containers")
			
 
				+            for i in range(5):
			
 
				+                container_name = container_prefix + str(i)
			
 
				+                if blockblob_service.exists(container_name):
			
 
				+                    blockblob_service.delete_container(container_name)
			
 
				+            
			
 
				+        print("Containers sample completed")
			
 
				+
			
 
				+    def container_acl_operations(self, account):
			
 
				+        
			
 
				+        container_name = 'aclblockblobcontainer' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+
			
 
				+        try:
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name)
			
 
				+                
			
 
				+            print('2. Set access policy for container')
			
 
				+            access_policy = AccessPolicy(permission=ContainerPermissions.READ,
			
 
				+                                        expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+            identifiers = {'id': access_policy}
			
 
				+            blockblob_service.set_container_acl(container_name, identifiers)
			
 
				+
			
 
				+            print('3. Get access policy from container')
			
 
				+            acl = blockblob_service.get_container_acl(container_name)
			
 
				+
			
 
				+            print('4. Clear access policy in container')
			
 
				+            # Clear
			
 
				+            blockblob_service.set_container_acl(container_name)
			
 
				+
			
 
				+        finally:            
			
 
				+            print('5. Delete container')
			
 
				+            blockblob_service.delete_container(container_name)
			
 
				+            
			
 
				+        print("Container ACL operations sample completed")
			
 
				+        
			
 
				+    def properties_and_metadata_operations(self, account):
			
 
				+        file_blob_name = "HelloWorld.png"
			
 
				+        text_blob_name = "Text"
			
 
				+         
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+
			
 
				+        container_name = 'blockblobbasicscontainer' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:
			
 
				+            # Create a new container
			
 
				+            print('1. Create a container with name and custom metadata - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name, {'sample':'azure-storage'})
			
 
				+                    
			
 
				+            # Upload file as a block blob
			
 
				+            print('2. Uploading BlockBlob from file with properties and custom metadata')
			
 
				+            #Get full path on drive to file_to_upload by joining the fully qualified directory name and file name on the local drive
			
 
				+            full_path_to_file = os.path.join(os.path.dirname(__file__), file_blob_name)
			
 
				+            
			
 
				+            blockblob_service.create_blob_from_path(container_name, file_blob_name, full_path_to_file, 
			
 
				+                content_settings=ContentSettings(content_type='application/png'),
			
 
				+                metadata={'category':'azure-samples'})
			
 
				+            
			
 
				+            blockblob_service.create_blob_from_text(container_name, text_blob_name, 'Data',
			
 
				+                content_settings=ContentSettings(content_encoding ='UTF-8', content_language='en'),
			
 
				+                metadata={'origin':'usa', 'title': 'azure-samples'})
			
 
				+            
			
 
				+            # Get all the container properties 
			
 
				+            print('3. Get Container metadata')
			
 
				+
			
 
				+            container = blockblob_service.get_container_properties(container_name)
			
 
				+            
			
 
				+            print('    Metadata:')
			
 
				+
			
 
				+            for key in container.metadata:
			
 
				+                print('        ' + key + ':' + container.metadata[key])
			
 
				+            
			
 
				+            # Get all the blob properties 
			
 
				+            print('4. Get Blob properties')
			
 
				+            blob = blockblob_service.get_blob_properties(container_name, file_blob_name)
			
 
				+            
			
 
				+            print('    Metadata:')
			
 
				+            for key in blob.metadata:
			
 
				+                print('        ' + key + ':' + blob.metadata[key])
			
 
				+            
			
 
				+            print('    Properties:')
			
 
				+            print('        Content-Type:' + blob.properties.content_settings.content_type)
			
 
				+        finally:            
			
 
				+            # Delete the container
			
 
				+            print("5. Delete Container")
			
 
				+            if blockblob_service.exists(container_name):
			
 
				+                blockblob_service.delete_container(container_name)
			
 
				+        
			
 
				+    # Set CORS
			
 
				+    def set_cors_rules(self, account):
			
 
				+
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+        
			
 
				+        cors_rule = CorsRule(
			
 
				+            allowed_origins=['*'], 
			
 
				+            allowed_methods=['POST', 'GET'],
			
 
				+            allowed_headers=['*'],
			
 
				+            exposed_headers=['*'],
			
 
				+            max_age_in_seconds=3600)
			
 
				+        
			
 
				+        print('1. Get Cors Rules')
			
 
				+        original_cors_rules =  blockblob_service.get_blob_service_properties().cors;
			
 
				+        
			
 
				+        try:
			
 
				+            print('2. Overwrite Cors Rules')
			
 
				+            blockblob_service.set_blob_service_properties(cors=[cors_rule])
			
 
				+        finally:        
			
 
				+            print('3. Revert Cors Rules back the original ones')
			
 
				+            #reverting cors rules back to the original ones
			
 
				+            blockblob_service.set_blob_service_properties(cors=original_cors_rules)
			
 
				+        
			
 
				+        print("CORS sample completed")
			
 
				+
			
 
				+    # Lease Container
			
 
				+    def lease_container(self, account):
			
 
				+        # Create a Block Blob Service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+        
			
 
				+        try:
			
 
				+            container_name = 'blockblobcontainer' + self.random_data.get_random_name(6)
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name)
			
 
				+
			
 
				+            print('2. Acquire lease on container')
			
 
				+            lease_id = blockblob_service.acquire_container_lease(container_name, lease_duration=15)
			
 
				+
			
 
				+            print("3. Deleted container without lease")
			
 
				+            try:
			
 
				+                blockblob_service.delete_container(container_name)
			
 
				+            except:
			
 
				+                print('Got expected exception. Cannot delete container, lease not specified')
			
 
				+        finally:
			
 
				+            print("4. Delete container with lease")
			
 
				+            blockblob_service.delete_container(container_name, lease_id=lease_id)
			
 
				+
			
 
				+        print("Lease container sample completed")
			
 
				+
			
 
				+    # Lease Blob
			
 
				+    def lease_blob(self, account):
			
 
				+        blob_name = "exclusive"
			
 
				+        
			
 
				+        # Create an block blob service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+        container_name = 'blobcontainer' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:
			
 
				+            # Create a new container
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name)
			
 
				+                    
			
 
				+            # Create a block blob
			
 
				+            print('2. Create Block Blob')
			
 
				+            blob = self.random_data.get_random_bytes(255)
			
 
				+            blockblob_service.create_blob_from_bytes(container_name, blob_name, blob)
			
 
				+            
			
 
				+            print('3. Acquire lease on blob')
			
 
				+            lease_id = blockblob_service.acquire_blob_lease(container_name, blob_name, lease_duration=15)
			
 
				+            
			
 
				+            # Write to a block blob
			
 
				+            print('4. Try to write to Block Blob without lease')
			
 
				+            block_id = self.random_data.get_random_name(32)
			
 
				+            block = self.random_data.get_random_bytes(255)
			
 
				+            try:
			
 
				+                blockblob_service.put_block(container_name, blob_name, block, block_id)
			
 
				+            except:
			
 
				+                print('Got expected exception. Cannot write blob, lease not specified')
			
 
				+
			
 
				+            print('5. Write to Block Blob with lease')
			
 
				+            blockblob_service.put_block(container_name, blob_name, block, block_id, lease_id=lease_id)
			
 
				+
			
 
				+            print("6. Deleted blob without lease")
			
 
				+            try:
			
 
				+                blockblob_service.delete_blob(container_name, blob_name)
			
 
				+            except:
			
 
				+                print('Got expected exception. Cannot delete blob, lease not specified')
			
 
				+
			
 
				+            print("7. Delete blob with lease")
			
 
				+            blockblob_service.delete_blob(container_name, blob_name, lease_id=lease_id)
			
 
				+        finally:
			
 
				+            print("8. Delete container")
			
 
				+            if blockblob_service.exists(container_name):
			
 
				+                blockblob_service.delete_container(container_name)
			
 
				+
			
 
				+        print("Lease blob sample completed")
			
 
				+        
			
 
				+    #Page Blob Operations
			
 
				+    def page_blob_operations(self, account):
			
 
				+        file_to_upload = "HelloWorld.png"
			
 
				+        page_size = 1024;
			
 
				+        
			
 
				+        # Create an page blob service object
			
 
				+        pageblob_service = account.create_page_blob_service()
			
 
				+        container_name = 'pageblobcontainer' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:
			
 
				+            # Create a new container
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            pageblob_service.create_container(container_name)
			
 
				+            
			
 
				+            # Create a new page blob to upload the file
			
 
				+            print('2. Create a page blob')
			
 
				+            pageblob_service.create_blob(container_name, file_to_upload, page_size * 1024)
			
 
				+            
			
 
				+            # Read the file
			
 
				+            print('3. Upload pages to page blob')
			
 
				+            index = 0
			
 
				+            with open(file_to_upload, "rb") as file:
			
 
				+                file_bytes = file.read(page_size)
			
 
				+                while len(file_bytes) > 0:
			
 
				+                    if len(file_bytes) < page_size:
			
 
				+                        file_bytes = bytes(file_bytes + bytearray(page_size - len(file_bytes)))
			
 
				+                        
			
 
				+                    pageblob_service.update_page(container_name, file_to_upload, file_bytes, index * page_size, index * page_size + page_size - 1)
			
 
				+                    
			
 
				+                    file_bytes = file.read(page_size)
			
 
				+                    
			
 
				+                    index = index + 1
			
 
				+            
			
 
				+            pages = pageblob_service.get_page_ranges(container_name, file_to_upload)
			
 
				+            
			
 
				+            print('4. Enumerate pages in page blob')
			
 
				+            for page in pages:
			
 
				+                print('Page ' + str(page.start) + ' - ' + str(page.end))
			
 
				+        finally:
			
 
				+            print('5. Delete container')
			
 
				+            if pageblob_service.exists(container_name):
			
 
				+                pageblob_service.delete_container(container_name)
			
 
				+
			
 
				+    #Block Blob Operations
			
 
				+    def block_blob_operations(self, account):
			
 
				+        file_to_upload = "HelloWorld.png"
			
 
				+        block_size = 1024
			
 
				+        
			
 
				+        # Create an page blob service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+        container_name = 'blockblobcontainer' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:
			
 
				+            # Create a new container
			
 
				+            print('1. Create a container with name - ' + container_name)
			
 
				+            blockblob_service.create_container(container_name)
			
 
				+            
			
 
				+            blocks = []
			
 
				+            
			
 
				+            # Read the file
			
 
				+            print('2. Upload file to block blob')
			
 
				+            with open(file_to_upload, "rb") as file:
			
 
				+                file_bytes = file.read(block_size)
			
 
				+                while len(file_bytes) > 0:
			
 
				+                    block_id = self.random_data.get_random_name(32) 
			
 
				+                    blockblob_service.put_block(container_name, file_to_upload, file_bytes, block_id)                    
			
 
				+                    
			
 
				+                    blocks.append(BlobBlock(id=block_id))
			
 
				+                    
			
 
				+                    file_bytes = file.read(block_size)
			
 
				+            
			
 
				+            blockblob_service.put_block_list(container_name, file_to_upload, blocks)
			
 
				+            
			
 
				+            print('3. Get the block list')
			
 
				+            blockslist = blockblob_service.get_block_list(container_name, file_to_upload, None, 'all')
			
 
				+            blocks = blockslist.committed_blocks
			
 
				+
			
 
				+            print('4. Enumerate blocks in block blob')
			
 
				+            for block in blocks:
			
 
				+                print('Block ' + block.id)
			
 
				+        finally:
			
 
				+            print('5. Delete container')
			
 
				+            if blockblob_service.exists(container_name):
			
 
				+                blockblob_service.delete_container(container_name)
			
 
				+
			
 
				+    # Manage properties of the Blob service, including logging and metrics settings, and the default service version.
			
 
				+    def set_service_properties(self, account):
			
 
				+
			
 
				+        # Create an page blob service object
			
 
				+        blockblob_service = account.create_block_blob_service()
			
 
				+
			
 
				+        print('1. Get Blob service properties')
			
 
				+        props = blockblob_service.get_blob_service_properties();
			
 
				+
			
 
				+        retention = RetentionPolicy(enabled=True, days=5)
			
 
				+        logging = Logging(delete=True, read=False, write=True, retention_policy=retention)
			
 
				+        hour_metrics = Metrics(enabled=True, include_apis=True, retention_policy=retention)
			
 
				+        minute_metrics = Metrics(enabled=False)
			
 
				+
			
 
				+        try:
			
 
				+            print('2. Ovewrite Blob service properties')
			
 
				+            blockblob_service.set_blob_service_properties(logging=logging, hour_metrics=hour_metrics, minute_metrics=minute_metrics, target_version='2015-04-05')
			
 
				+        finally:
			
 
				+            print('3. Revert Blob service properties back to the original ones')
			
 
				+            blockblob_service.set_blob_service_properties(logging=props.logging, hour_metrics=props.hour_metrics, minute_metrics=props.minute_metrics, target_version='2015-04-05')
			
 
				+
			
 
				+        print('4. Set Blob service properties completed')
			
--- a/data/purposeCombined/Azure/table-service.py
+++ b/data/purposeCombined/Azure/table-service.py
@@ -0,0 +1,130 @@
 
				+import requests
			
 
				+import config
			
 
				+from azure import storage
			
 
				+from PackageInformationWorker.PyPIPackageInformation import PyPIPackageInformation
			
 
				+import json
			
 
				+import azure.storage.queue as queue
			
 
				+import traceback
			
 
				+import urllib
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger()
			
 
				+account_name = config.STORAGE_ACCOUNT_NAME
			
 
				+account_key = config.STORAGE_ACCOUNT_KEY
			
 
				+STATIC_ROW_KEY = 'ROWKEY'
			
 
				+table_service = storage.CloudStorageAccount(account_name, account_key).create_table_service()
			
 
				+table_service.create_table(config.PACKAGE_VERSION_DATA_TABLENAME)
			
 
				+table_service.create_table(config.PACKAGE_SUMMARY_TABLENAME)
			
 
				+
			
 
				+def main():
			
 
				+    # package, version = ('azure', '1.0.0')
			
 
				+    # get a package to look at
			
 
				+    # check that package and version.
			
 
				+    # version data just gets filled in
			
 
				+    # summary trickier.
			
 
				+    # summary -> name,
			
 
				+    #               first_published (might be different than python2_start if
			
 
				+    #               not using trove classifier)
			
 
				+    #               python2_start (change if we find earlier),
			
 
				+    #               python2_end (change if we find earlier, remove if package
			
 
				+    #               after this come in and has python2),
			
 
				+    #               python3_start (change if we find earlier)
			
 
				+    try:
			
 
				+        qs = queue.QueueService(config.STORAGE_ACCOUNT_NAME, config.STORAGE_ACCOUNT_KEY)
			
 
				+        messages_in_batch = 5
			
 
				+
			
 
				+        while True:
			
 
				+            messages = qs.get_messages(config.PACKAGE_QUEUE_NAME,numofmessages=messages_in_batch, visibilitytimeout=messages_in_batch*60)
			
 
				+            for message in messages:
			
 
				+                entity = json.loads(message.message_text)
			
 
				+                _process_one_package(entity["package"], entity["version"])
			
 
				+                # once completed delete the message
			
 
				+                qs.delete_message(config.PACKAGE_QUEUE_NAME, message.message_id, message.pop_receipt)
			
 
				+    except Exception as e:
			
 
				+        # swallow exception here. we will just reprocess and delete the message.
			
 
				+        # known failures:
			
 
				+        # - connection aborted by get_messages sometimes.  this happens with a connectionreseterror (10054)
			
 
				+        # - Random json errors. Could add retry.  
			
 
				+        logger.error(traceback.format_exc())
			
 
				+          
			
 
				+def _process_one_package(package_name, version):
			
 
				+    logger.info("Worker: Package:{} Version:{}".format(package_name, version))
			
 
				+    if not package_name or not version:
			
 
				+        logger.warn("Package_name or version was empty. Moving on as the queue had bad data")
			
 
				+        return
			
 
				+
			
 
				+    # .6684 seconds to run.  74577 total packages
			
 
				+    package_info = PyPIPackageInformation.get_package_specific_version_info(package_name, version)
			
 
				+    if not package_info:
			
 
				+        logger.error("Worker: Package:{} Version:{} failed to get package info".format(package_name, version))
			
 
				+        return
			
 
				+
			
 
				+    supports_python_2 = len([x for x in package_info['classifiers'] if x.startswith('Programming Language :: Python :: 2')]) > 0
			
 
				+    supports_python_3 = len([x for x in package_info['classifiers'] if x.startswith('Programming Language :: Python :: 3')]) > 0
			
 
				+    uploaded = package_info['uploaded']
			
 
				+
			
 
				+    try:
			
 
				+        summary_entity = table_service.get_entity(config.PACKAGE_SUMMARY_TABLENAME, package_name, STATIC_ROW_KEY)
			
 
				+    except:
			
 
				+        # we don't have a summary for this entry.
			
 
				+        summary_entity = { 
			
 
				+            'PartitionKey':package_name, 'RowKey':STATIC_ROW_KEY, 'First_Published':None, 
			
 
				+            'Python2_Start':None, 'Python2_End':None, 'Python3_Start':None
			
 
				+            }
			
 
				+        table_service.insert_or_replace_entity(config.PACKAGE_SUMMARY_TABLENAME, package_name, STATIC_ROW_KEY, summary_entity)
			
 
				+        summary_entity = table_service.get_entity(config.PACKAGE_SUMMARY_TABLENAME, package_name, STATIC_ROW_KEY)
			
 
				+
			
 
				+    # set fields using upload. Upload is none if the version has never been uploaded
			
 
				+    # Basically just filter out packages that never have content from our records.
			
 
				+    if uploaded is not None:
			
 
				+        if not hasattr(summary_entity, 'First_Published') or summary_entity.First_Published is None or summary_entity.First_Published > uploaded:
			
 
				+            # if the published date is empty or later than the current release we
			
 
				+            # are viewing update
			
 
				+            summary_entity.First_Published = uploaded
			
 
				+
			
 
				+        if supports_python_2 and \
			
 
				+            (not hasattr(summary_entity, 'Python2_Start') or summary_entity.Python2_Start is None or summary_entity.Python2_Start > uploaded):
			
 
				+            # if the published date is empty or later than the date and it supports
			
 
				+            # python 2
			
 
				+            summary_entity.Python2_Start = uploaded
			
 
				+    
			
 
				+        if supports_python_2 and hasattr(summary_entity, 'Python2_End') and summary_entity.Python2_End is not None and summary_entity.Python2_End < uploaded:
			
 
				+            # we support python2 but it is after the date we thought python 2
			
 
				+            # support ended we must not have really ended
			
 
				+            summary_entity.Python2_End = None    
			
 
				+        elif hasattr(summary_entity, 'Python2_Start') and hasattr(summary_entity, 'Python2_End') and \
			
 
				+            summary_entity.Python2_Start is not None and summary_entity.Python2_End is not None and \
			
 
				+            (summary_entity.Python2_End > uploaded and summary_entity.Python2_Start < uploaded):
			
 
				+            # if we don't support python2, and we have started supporting python2
			
 
				+            # at some point
			
 
				+            # and if the date we are saying we ended is after the start
			
 
				+            summary_entity.Python2_End = uploaded
			
 
				+
			
 
				+        if supports_python_3 and \
			
 
				+            (not hasattr(summary_entity, 'Python3_Start') or summary_entity.Python3_Start is None or summary_entity.Python3_Start > uploaded):
			
 
				+            # if the published date is empty or later than the current release we
			
 
				+            # are viewing update
			
 
				+            summary_entity.Python3_Start = uploaded
			
 
				+
			
 
				+    version_entity = _insert_entity_to_package_version_table(package_name, version, supports_python_2, supports_python_3, package_info['downloads'], uploaded)
			
 
				+    summary_entity = table_service.insert_or_replace_entity(config.PACKAGE_SUMMARY_TABLENAME, package_name, STATIC_ROW_KEY, summary_entity)
			
 
				+
			
 
				+def _insert_entity_to_package_version_table(package, version, python2, python3, downloads, upload_time):
			
 
				+    # TODO: issue with python azure storage.  Version can't have '~' in it. https://github.com/Azure/azure-storage-python/issues/76
			
 
				+    package_sanitized = urllib.parse.quote_plus(package)
			
 
				+    version_sanitized = urllib.parse.quote_plus(version)
			
 
				+
			
 
				+    try:
			
 
				+        result =  table_service.insert_or_replace_entity(config.PACKAGE_VERSION_DATA_TABLENAME, package_sanitized, version_sanitized,
			
 
				+                                    {'PartitionKey' : package_sanitized,
			
 
				+                                     'RowKey': version_sanitized, 
			
 
				+                                     'Python2': python2, 
			
 
				+                                     'Python3': python3,
			
 
				+                                     'Downloads': downloads,
			
 
				+                                     'UploadTime': upload_time})
			
 
				+
			
 
				+        return result
			
 
				+    except Exception as e:
			
 
				+        logger.error("Failed to insert Package:{} Version:{} Python2:{} Python3:{} Downloads:{} UploadTime:{} Exception:{}".format(
			
 
				+            package, version, python2, python3, downloads, upload_time, traceback.format_exc()))
			
 
				+        raise e
			
--- a/data/purposeCombined/Azure/table-storage.py
+++ b/data/purposeCombined/Azure/table-storage.py
@@ -0,0 +1,218 @@
 
				+#-------------------------------------------------------------------------
			
 
				+# Microsoft Developer & Platform Evangelism
			
 
				+#
			
 
				+# Copyright (c) Microsoft Corporation. All rights reserved.
			
 
				+#
			
 
				+# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, 
			
 
				+# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
			
 
				+# OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#----------------------------------------------------------------------------------
			
 
				+# The example companies, organizations, products, domain names,
			
 
				+# e-mail addresses, logos, people, places, and events depicted
			
 
				+# herein are fictitious. No association with any real company,
			
 
				+# organization, product, domain name, email address, logo, person,
			
 
				+# places, or events is intended or should be inferred.
			
 
				+#--------------------------------------------------------------------------
			
 
				+import config
			
 
				+import datetime
			
 
				+import time
			
 
				+from random_data import RandomData
			
 
				+from tablestorageaccount import TableStorageAccount
			
 
				+from azure.storage import CloudStorageAccount, AccessPolicy
			
 
				+from azure.storage.table import TableService, Entity, TablePermissions
			
 
				+from azure.storage.models import CorsRule, Logging, Metrics, RetentionPolicy, ResourceTypes, AccountPermissions
			
 
				+
			
 
				+#
			
 
				+# Azure Table Service Sample - Demonstrate how to perform common tasks using the Microsoft Azure Table Service
			
 
				+# including creating a table, CRUD operations and different querying techniques.
			
 
				+#
			
 
				+# Documentation References:
			
 
				+#  - What is a Storage Account - http://azure.microsoft.com/en-us/documentation/articles/storage-whatis-account/
			
 
				+#  - Getting Started with Tables - https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-table-storage/
			
 
				+#  - Table Service Concepts - http://msdn.microsoft.com/en-us/library/dd179463.aspx
			
 
				+#  - Table Service REST API - http://msdn.microsoft.com/en-us/library/dd179423.aspx
			
 
				+#  - Table Service Python API - http://azure.github.io/azure-storage-python/ref/azure.storage.table.html
			
 
				+#  - Storage Emulator - http://azure.microsoft.com/en-us/documentation/articles/storage-use-emulator/
			
 
				+#
			
 
				+class TableAdvancedSamples():
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.random_data = RandomData()
			
 
				+
			
 
				+    # Runs all samples for Azure Storage Table service.
			
 
				+    def run_all_samples(self, account):
			
 
				+        table_service = account.create_table_service()
			
 
				+        print('Azure Storage Advanced Table samples - Starting.')
			
 
				+        
			
 
				+        print('\n\n* List tables *\n')
			
 
				+        self.list_tables(table_service)
			
 
				+        
			
 
				+        if not account.is_azure_cosmosdb_table():
			
 
				+           print('\n\n* Set service properties *\n')
			
 
				+           self.set_service_properties(table_service)
			
 
				+        
			
 
				+           print('\n\n* Set Cors rules *\n')
			
 
				+           self.set_cors_rules(table_service)
			
 
				+        
			
 
				+           print('\n\n* ACL operations *\n')
			
 
				+           self.table_acl_operations(table_service)
			
 
				+        
			
 
				+        if (config.IS_EMULATED):
			
 
				+            print('\n\n* Shared Access Signature is not supported in emulator *\n')
			
 
				+        else:
			
 
				+            print('\n\n* SAS operations *\n')
			
 
				+            self.table_operations_with_sas(account)
			
 
				+
			
 
				+        print('\nAzure Storage Advanced Table samples - Completed.\n')
			
 
				+
			
 
				+    # Manage tables including creating, listing and deleting
			
 
				+    def list_tables(self, table_service):
			
 
				+        table_prefix = 'table' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:        
			
 
				+            # Create tables
			
 
				+            for i in range(5):
			
 
				+                table_name = table_prefix + str(i)
			
 
				+                print('1. Create a table with name - ' + table_name)
			
 
				+                table_service.create_table(table_name)
			
 
				+            
			
 
				+            # List all the tables 
			
 
				+            print('2. List tables')
			
 
				+            tables = table_service.list_tables()
			
 
				+            for table in tables:
			
 
				+                print('\Table Name: ' + table.name)
			
 
				+
			
 
				+        finally:
			
 
				+            # Delete the tables
			
 
				+            print("3. Delete Tables")
			
 
				+            for i in range(5):
			
 
				+                table_name = table_prefix + str(i)
			
 
				+                if(table_service.exists(table_name)):
			
 
				+                    table_service.delete_table(table_name)
			
 
				+            
			
 
				+        print("List tables sample completed")
			
 
				+    
			
 
				+    # Manage properties of the Table service, including logging and metrics settings, and the default service version.
			
 
				+    def set_service_properties(self, table_service):
			
 
				+        print('1. Get Table service properties')
			
 
				+        props = table_service.get_table_service_properties()
			
 
				+
			
 
				+        retention = RetentionPolicy(enabled=True, days=5)
			
 
				+        logging = Logging(delete=True, read=False, write=True, retention_policy=retention)
			
 
				+        hour_metrics = Metrics(enabled=True, include_apis=True, retention_policy=retention)
			
 
				+        minute_metrics = Metrics(enabled=False)
			
 
				+
			
 
				+        try:
			
 
				+            print('2. Ovewrite Table service properties')
			
 
				+            table_service.set_table_service_properties(logging=logging, hour_metrics=hour_metrics, minute_metrics=minute_metrics)
			
 
				+
			
 
				+        finally:
			
 
				+            print('3. Revert Table service properties back to the original ones')
			
 
				+            table_service.set_table_service_properties(logging=props.logging, hour_metrics=props.hour_metrics, minute_metrics=props.minute_metrics)
			
 
				+
			
 
				+        print('4. Set Table service properties completed')
			
 
				+    
			
 
				+    # Manage CORS rules on the table service
			
 
				+    def set_cors_rules(self, table_service):
			
 
				+        cors_rule = CorsRule(
			
 
				+            allowed_origins=['*'], 
			
 
				+            allowed_methods=['POST', 'GET'],
			
 
				+            allowed_headers=['*'],
			
 
				+            exposed_headers=['*'],
			
 
				+            max_age_in_seconds=3600)
			
 
				+        
			
 
				+        print('1. Get Cors Rules')
			
 
				+        original_cors_rules = table_service.get_table_service_properties().cors
			
 
				+
			
 
				+        try:        
			
 
				+            print('2. Overwrite Cors Rules')
			
 
				+            table_service.set_table_service_properties(cors=[cors_rule])
			
 
				+
			
 
				+        finally:
			
 
				+            #reverting cors rules back to the original ones
			
 
				+            print('3. Revert Cors Rules back the original ones')
			
 
				+            table_service.set_table_service_properties(cors=original_cors_rules)
			
 
				+        
			
 
				+        print("CORS sample completed")
			
 
				+
			
 
				+    # Manage table access policy
			
 
				+    def table_acl_operations(self, table_service):
			
 
				+        table_name = 'acltable' + self.random_data.get_random_name(6)
			
 
				+
			
 
				+        try:        
			
 
				+            print('1. Create a table with name - ' + table_name)
			
 
				+            table_service.create_table(table_name)
			
 
				+                
			
 
				+            print('2. Set access policy for table')
			
 
				+            access_policy = AccessPolicy(permission=TablePermissions.QUERY,
			
 
				+                                        expiry=datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+            identifiers = {'id': access_policy}
			
 
				+            table_service.set_table_acl(table_name, identifiers)
			
 
				+
			
 
				+            print('3. Wait 30 seconds for acl to propagate')
			
 
				+            time.sleep(30)
			
 
				+
			
 
				+            print('4. Get access policy from table')
			
 
				+            acl = table_service.get_table_acl(table_name)
			
 
				+
			
 
				+            print('5. Clear access policy in table')
			
 
				+            table_service.set_table_acl(table_name)
			
 
				+
			
 
				+        finally:
			
 
				+            print('5. Delete table')
			
 
				+            if(table_service.exists(table_name)):
			
 
				+                table_service.delete_table(table_name)
			
 
				+            
			
 
				+        print("Table ACL operations sample completed")
			
 
				+    
			
 
				+    # Manage shared access signature on a table
			
 
				+    def table_operations_with_sas(self, account):
			
 
				+        table_name = 'sastable' + self.random_data.get_random_name(6)
			
 
				+        
			
 
				+        try:
			
 
				+            # Create a Table Service object
			
 
				+            table_service = account.create_table_service()
			
 
				+            
			
 
				+            print('1. Create table with name - ' + table_name)
			
 
				+            table_service.create_table(table_name)
			
 
				+            
			
 
				+            # Create a Shared Access Signature for the table
			
 
				+            print('2. Get sas for table')
			
 
				+            
			
 
				+            table_sas = table_service.generate_table_shared_access_signature(
			
 
				+                table_name, 
			
 
				+                TablePermissions.QUERY + TablePermissions.ADD + TablePermissions.UPDATE + TablePermissions.DELETE, 
			
 
				+                datetime.datetime.utcnow() + datetime.timedelta(hours=1))
			
 
				+
			
 
				+            shared_account = TableStorageAccount(account_name=account.account_name, sas_token=table_sas, endpoint_suffix=account.endpoint_suffix)
			
 
				+            shared_table_service = shared_account.create_table_service()
			
 
				+
			
 
				+            # Create a sample entity to insert into the table
			
 
				+            customer = {'PartitionKey': 'Harp', 'RowKey': '1', 'email' : 'harp@contoso.com', 'phone' : '555-555-5555'}
			
 
				+
			
 
				+            # Insert the entity into the table
			
 
				+            print('3. Insert new entity into table with sas - ' + table_name)
			
 
				+            shared_table_service.insert_entity(table_name, customer)
			
 
				+            
			
 
				+            # Demonstrate how to query the entity
			
 
				+            print('4. Read the inserted entity with sas.')
			
 
				+            entity = shared_table_service.get_entity(table_name, 'Harp', '1')
			
 
				+            
			
 
				+            print(entity['email'])
			
 
				+            print(entity['phone'])
			
 
				+
			
 
				+            # Demonstrate how to update the entity by changing the phone number
			
 
				+            print('5. Update an existing entity by changing the phone number with sas')
			
 
				+            customer = {'PartitionKey': 'Harp', 'RowKey': '1', 'email' : 'harp@contoso.com', 'phone' : '425-123-1234'}
			
 
				+            shared_table_service.update_entity(table_name, customer)
			
 
				+
			
 
				+            # Demonstrate how to delete an entity
			
 
				+            print('6. Delete the entity with sas')
			
 
				+            shared_table_service.delete_entity(table_name, 'Harp', '1')
			
 
				+
			
 
				+        finally:
			
 
				+            print('7. Delete table')
			
 
				+            if(table_service.exists(table_name)):
			
 
				+                table_service.delete_table(table_name)
			
 
				+            
			
 
				+        print("Table operations with sas completed")
			
--- a/data/purposeCombined/BI/.DS_Store
+++ b/data/purposeCombined/BI/.DS_Store
--- a/data/purposeCombined/BI/BIL.py
+++ b/data/purposeCombined/BI/BIL.py
@@ -0,0 +1,47 @@
 
				+import numpy as np
			
 
				+import pandas as pd 
			
 
				+import matplotlib.pyplot as plt
			
 
				+import seaborn as sns
			
 
				+import warnings
			
 
				+warnings.filterwarnings('ignore')
			
 
				+
			
 
				+data=pd.read_csv('D:/Ajay/input/Suicide.csv')
			
 
				+
			
 
				+data=data.drop(['HDI for year','country-year'],axis=1)                  #dropping these two columns
			
 
				+
			
 
				+#-----Table------------------------------------------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+grouop_data=data.groupby(['age','sex'])['suicides_no'].sum().unstack()  #collecting data and making table using 'unstack()' function
			
 
				+grouop_data=grouop_data.reset_index().melt(id_vars='age')               #arranging according to age
			
 
				+grouop_data_female=grouop_data.iloc[:6,:]                               #retrieving 6 rows using 'iloc' function
			
 
				+print("\n--Table of Suicides according to Female Age Groups--\n")
			
 
				+from IPython.display import display
			
 
				+display(grouop_data_female)                                             #displaying table
			
 
				+print("\n")
			
 
				+
			
 
				+#-----Country vs. suicide_no-------------------------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+suicidesNo=[]
			
 
				+for country in data.country.unique():                                   
			
 
				+    suicidesNo.append(sum(data[data['country']==country].suicides_no))  #getting total no of suicides of all countries
			
 
				+
			
 
				+suicidesNo=pd.DataFrame(suicidesNo,columns=['suicides_no'])             
			
 
				+country=pd.DataFrame(data.country.unique(),columns=['country'])
			
 
				+data_suicide_countr=pd.concat([suicidesNo,country],axis=1)              #definind data and axis to plot
			
 
				+
			
 
				+data_suicide_countr=data_suicide_countr.sort_values(by='suicides_no',ascending=False)#displaying plot in descending order(i.e. from highest no. of suicides to lowest)
			
 
				+
			
 
				+sns.barplot(y=data_suicide_countr.country[:20],x=data_suicide_countr.suicides_no[:20])  #displaying bars of only 20 countries with highest no. of suicides
			
 
				+plt.title("20 Countries with Higest Suicide Number from 1985 to 2016")
			
 
				+plt.show()
			
 
				+
			
 
				+#-----Population vs. Age_group-----------------------------------------------------------------------------------------------------------------------------------------
			
 
				+
			
 
				+index_suicide=[]
			
 
				+for age in data['age'].unique():
			
 
				+    index_suicide.append(sum(data[data['age']==age].suicides_no)/len(data[data['age']==age].suicides_no))  #getting suicide rate of each age group
			
 
				+    
			
 
				+plt.bar(['5-14 years', '15-24 years', '25-34 years', '35-54 years', '55-74 years', '75+ years'],index_suicide,align='center',alpha=0.5) #defining xticks
			
 
				+plt.xticks(rotation=45)                                                 #rotating xticks by 45 degree anticlockwise
			
 
				+plt.title("Suicide rates of Different Age Groups")
			
 
				+plt.show()
			
--- a/data/purposeCombined/BI/BusinessIntelligence-Kaggle
+++ b/data/purposeCombined/BI/BusinessIntelligence-Kaggle
@@ -0,0 +1 @@
 
				+Subproject commit 06143b2ae0538affe8029950bf36597d253bcffd
			
--- a/data/purposeCombined/BI/ID3_classification.py
+++ b/data/purposeCombined/BI/ID3_classification.py
@@ -0,0 +1,606 @@
 
				+# TODO mention in the report that with every level of the tree the data gets smaller and smaller.
			
 
				+# NEXT STEPS
			
 
				+# TODO: Create an infographic and host it on a web page.
			
 
				+# TODO: Gather live data from news articles (Can try using NLTK & urllib).
			
 
				+# TODO: Use Natural Language Processing to automate some of the data cleaning/integration.
			
 
				+
			
 
				+###################################################################################################################
			
 
				+# Online Retail Analysis - ID3 CLASSIFICATION                                                                     #
			
 
				+#    NOTE! Concepts will be explained with examples from the Street data set, which can be found below.           #
			
 
				+#    The reason for this is because that data set is very small and easy to follow.                               #
			
 
				+#                                                                                                                 #
			
 
				+# 1) RESOURCES                                                                                                    #
			
 
				+#    ID3 TUTORIALS:                                                                                               #
			
 
				+#      1) https://sefiks.com/2017/11/20/a-step-by-step-id3-decision-tree-example/                                 #
			
 
				+#      2) https://medium.com/coinmonks/what-is-entropy-and-why-information-gain-is-matter-4e85d46d2f01            #
			
 
				+#                                                                                                                 #
			
 
				+#    DECISION TREE TUTORIAL: https://www.lucidchart.com/pages/decision-tree                                       #
			
 
				+#    ENTROPY (MORE DETAILS): https://en.wikipedia.org/wiki/Entropy_(information_theory)                           #
			
 
				+#                                                                                                                 #
			
 
				+# 2) DATA SETS                                                                                                    #
			
 
				+#    TEST DATA SET: This data set can be found by navigating to the STREET DATA SET region in this file.          #
			
 
				+#    It is a part of the ID3 file because I believe it would be useful to have an example of how the ID3 code     #
			
 
				+#    works with a data set and also provides an opportunity to better understand what the code is doing.          #
			
 
				+#    To have a look at ID3 applied to a small data set just make a call the test_run() function at the            #
			
 
				+#    end of the file.                                                                                             #
			
 
				+#                                                                                                                 #
			
 
				+# 3) ALGORITHM OVERVIEW                                                                                           #
			
 
				+#    Used to generate a decision tree from a given data set. It works by evaluating each attribute                #
			
 
				+#    in the data set to place the nodes in an order that will return an accurate result.                          #
			
 
				+#                                                                                                                 #
			
 
				+# 4) USES                                                                                                         #
			
 
				+#    A) Classify labeled data generally to do with NLP, approving loans and credit cards, etc.                    #
			
 
				+#    B) Another non-standard use of this algorithm is to use it to fill a missing value in the data set           #
			
 
				+#    during the pre-processing stage.                                                                             #
			
 
				+#                                                                                                                 #
			
 
				+###################################################################################################################
			
 
				+
			
 
				+import math
			
 
				+import copy
			
 
				+
			
 
				+# region PERFORMANCE IMPROVEMENTS (for Python 3.8)
			
 
				+""" 
			
 
				+Applied: (TO DOCUMENT)
			
 
				+
			
 
				+TODO: 
			
 
				+   1) Remove ever dict.keys() used and replace it with dict because dict.keys() creates a list of keys in memory. 
			
 
				+      (More costly than looking through the dictionary itself! Further information below.)  
			
 
				+      https://stackoverflow.com/questions/4730993/python-key-in-dict-keys-performance-for-large-dictionaries
			
 
				+"""
			
 
				+# endregion
			
 
				+
			
 
				+# region PLAY TENNIS DATA SET
			
 
				+DATASET_BY_ATTRIB_DICT = {"outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast",
			
 
				+                                      "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
			
 
				+                          "temperature": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool",
			
 
				+                                          "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
			
 
				+                          "humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal",
			
 
				+                                       "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
			
 
				+                          "wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong",
			
 
				+                                   "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Strong"]}
			
 
				+
			
 
				+
			
 
				+# Answer as to whether or not it is a good time to play tennis.
			
 
				+TARGET_ATTRIB_LIST = ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
			
 
				+
			
 
				+# CONSTANT VARIABLES  # TODO: Optimise these variables by making them immutable (specifying they are const with Python)
			
 
				+TARGET_ATTRIB_NAME = "play tennis"
			
 
				+TRAIN_DATA_SIZE = len(TARGET_ATTRIB_LIST)
			
 
				+# endregion
			
 
				+
			
 
				+
			
 
				+# Represents a tree node and links to derived nodes.
			
 
				+class Node:
			
 
				+
			
 
				+    def __init__(self, node_name, derived_nodes=[]):
			
 
				+        self.node_name = node_name
			
 
				+        self.derived_nodes = derived_nodes
			
 
				+
			
 
				+
			
 
				+class ID3DecisionTree:
			
 
				+    def __init__(self):
			
 
				+        self.root_node = None
			
 
				+
			
 
				+        # Keeps track of all the nodes at the end of the branches that are available to link to.
			
 
				+        # In this way, no code needs to be ran to find the next available space for a new node.
			
 
				+        # The node at index 0 is always the one to add to first, once the new node is linked to it, it gets popped off
			
 
				+        # and the new node gets appended to the end of this list.
			
 
				+        self.active_branch_nodes = []
			
 
				+
			
 
				+        # TODO: Merge this list with the active_branch_nodes to be in dictionary format like so
			
 
				+        # {attrib1: [outcome1, outcome2], attrib2: [outcome1, outcome2, outcome3]}
			
 
				+        self.linked_attributes = []
			
 
				+
			
 
				+        # IMPORTANT NOTE:
			
 
				+        # Key to understanding how the DecisionTree class works is understanding the dataset_occurrence_dict
			
 
				+        # structure, as that is what is used for most calculations. This structure contains only the data from the
			
 
				+        # dataset required to construct the tree. Any repetition of attribute data has been removed to reduce load.
			
 
				+        # The 'dataset_occurrence_dict' structure is an unordered dictionary, where the structure itself gives more
			
 
				+        # information about the dataset. For example, every attribute of the data set is a key, which contains
			
 
				+        # a dictionary of its outcomes/possible values, and for each outcome, there is a dictionary showing the
			
 
				+        # distribution of the outcomes for the selected target attribute.
			
 
				+        # Example of dictionary structure below.
			
 
				+        """ Example structure: (where 'AN'-attribute name; 'ON'-outcome name; 'TON'-target outcome name) 
			
 
				+            dataset_occurrence_dict = {"AN 1": {"ON 1": {"TON 1": 1, "TON 2": 2},
			
 
				+                                                "ON 2": {"TON 1": 0, "TON 2": 1},
			
 
				+                                                "ON 3": {"TON 1": 0, "TON 2": 1}
			
 
				+                                                },
			
 
				+                                       "AN 2": {"ON 1": {"TON 1": 4, "TON 2": 0},
			
 
				+                                                "ON 2": {"TON 1": 1, "TON 2": 0}
			
 
				+                                                }
			
 
				+                                       }
			
 
				+                                                
			
 
				+            The example above can be read, for attribute 1 - AN1, there are 3 outcomes - ON1, ON2, ON3. 
			
 
				+            The target has 2 possible outcomes TON1 and TON2. Those values are being tracked/accounted for, 
			
 
				+            for each possible outcome of each attribute. For AN1, ON1 there is 1 occurrence of TON1 and 2 occurrences of 
			
 
				+            TON2. For AN1, ON2 there are 0 occurrences of TON1, and 1 occurrence of TON2 therefore the answer for this 
			
 
				+            branch is TON2. Same for AN1, ON3 - answer TON2. If all the occurrences of TON1 and TON2 for attrib 1 (AN1)
			
 
				+            are summed, we get the number of entries in the given data set. 
			
 
				+        """
			
 
				+        self.dataset_occurrence_dict = {}
			
 
				+
			
 
				+    # region BUILD TREE UTILITIES
			
 
				+    """ Construct dataset distribution/occurrence dictionary - "dataset_occurrence_dict".
			
 
				+    PARAMETERS
			
 
				+      :param (dict) dataset_by_attrib_dict
			
 
				+      :param (list) target_list """
			
 
				+    def generate_occurrences(self, dataset_by_attrib_dict, target_list):
			
 
				+        # TODO: assert that all attribute lists have the same length
			
 
				+
			
 
				+        # Update the dictionary with each attribute
			
 
				+        for attrib_name in dataset_by_attrib_dict.keys():
			
 
				+            # STEP 1: ADD the current attribute to the 'dataset_occurrence_dict' structure
			
 
				+            self.dataset_occurrence_dict.update({attrib_name: {}})
			
 
				+
			
 
				+            # STEP 2: Fetch a list containing only the unique data from attribute_list and target_list.
			
 
				+            attribute_list = dataset_by_attrib_dict[attrib_name]
			
 
				+            unique_attrib_outcomes = list(set(attribute_list))
			
 
				+            unique_answers = list(set(target_list))
			
 
				+
			
 
				+            # For each unique outcome of the current attribute
			
 
				+            for attrib_outcome in unique_attrib_outcomes:
			
 
				+                #   2.1) Update dictionary to store the next attribute outcome
			
 
				+                self.dataset_occurrence_dict[attrib_name].update({attrib_outcome: {}})
			
 
				+                # print(self.dataset_occurrence_dict)
			
 
				+
			
 
				+                #   2.2) For the current attribute, look at each of its outcomes and add them onto the dictionary
			
 
				+                for outcome in unique_answers:
			
 
				+                    self.dataset_occurrence_dict[attrib_name][attrib_outcome].update({outcome: 0})
			
 
				+                    # print(self.dataset_occurrence_dict)
			
 
				+
			
 
				+            # STEP 3: Goes through the dataset and counts the target outcome occurrences for each attribute occurrence
			
 
				+            for itter in range(len(attribute_list)):
			
 
				+                #   3.1) Fetch the current attribute outcome and the current target outcome from the dataset.
			
 
				+                curr_attrib_occ = attribute_list[itter]
			
 
				+                curr_target_occ = target_list[itter]
			
 
				+
			
 
				+                #   3.2) Update the count for the current target outcome in the current attribute outcome by 1
			
 
				+                self.dataset_occurrence_dict[attrib_name][curr_attrib_occ][curr_target_occ] += 1
			
 
				+
			
 
				+    """ After a node is added to the tree the "dataset_occurrence_dict" dictionary should be updated.
			
 
				+       PARAMETERS
			
 
				+         :param (list) attrib_list - the raw attrib data from the dataset.
			
 
				+         :param (list) target_list - the raw target data from the dataset. """
			
 
				+    def get_next_branch_occurrences(self, dataset_by_attrib_dict, target_list):
			
 
				+        # This is the outcome to update the dataset_occurrence_dict by
			
 
				+
			
 
				+        # A completely separate dictionary from the original, this dictionary will only hold a subdictionary
			
 
				+        # of the original
			
 
				+        subdict = copy.deepcopy(dataset_by_attrib_dict)
			
 
				+        subtar = copy.deepcopy(target_list)
			
 
				+
			
 
				+        indices_to_remove = []
			
 
				+        attrib_to_remove = None
			
 
				+
			
 
				+        # Looking through every possible attribute in the dictionary
			
 
				+        for attrib_key in subdict:
			
 
				+            attrib_found = False
			
 
				+            # Count through each list of outcomes for the given attribute.
			
 
				+            for count in range(len(subdict[attrib_key])):
			
 
				+                # If the active outcome name is equal to the current outcome value in the list
			
 
				+                if dataset_by_attrib_dict[attrib_key][count] == self.active_branch_nodes[0].node_name:
			
 
				+                    attrib_found = True
			
 
				+                    # According to the algorithm, the attribute containing the currently active outcome
			
 
				+                    # should be removed
			
 
				+                    if attrib_key in subdict:
			
 
				+                        attrib_to_remove = attrib_key
			
 
				+                else:
			
 
				+                    indices_to_remove.append(count)
			
 
				+                    # print(subdict[attrib_key][count])
			
 
				+                    # subdict[attrib_key].pop(count)
			
 
				+                    # TODO: assert that there is only one 0 in the list otherwise it is trying to remove the wrong values
			
 
				+
			
 
				+            if attrib_found:
			
 
				+                break
			
 
				+
			
 
				+        # Processing the subdict data
			
 
				+        #print("Subdict: ", subdict)
			
 
				+        del subdict[attrib_to_remove]
			
 
				+
			
 
				+        for attrib in subdict:
			
 
				+            #print("Discarding data in ", attrib)
			
 
				+            complete_list = subdict[attrib]
			
 
				+
			
 
				+            sublist = [value for index, value in enumerate(complete_list) if index not in indices_to_remove]
			
 
				+            subdict[attrib] = sublist
			
 
				+
			
 
				+        #print("After processing the data: ", subdict)
			
 
				+
			
 
				+        # Processing the subtar data
			
 
				+        #print("Discarding data in target list")
			
 
				+        #print("Target data before processing: ", subtar)
			
 
				+        # print(indices_to_remove)
			
 
				+        subtar = [value for index, value in enumerate(subtar) if index not in indices_to_remove]
			
 
				+        #print("Target data after processing: ", subtar)
			
 
				+
			
 
				+        # TODO: Call this function recursively on each branch, pass in the shrinked dictionary
			
 
				+        # TODO: test the base case thoroughly
			
 
				+        # TODO: Build a new dataset_by_attrib_dict for the current outcome
			
 
				+        # TODO: REMOVE outlook from the dataset dict when all its outcomes have children nodes assigned
			
 
				+        # (How to know if an attribute is complete???)
			
 
				+
			
 
				+        return subdict, subtar
			
 
				+
			
 
				+    """ Checks if a branch is complete, i.e. the target outcome was found. 
			
 
				+    PARAMETERS
			
 
				+      :param  (dict) target_val_dist_for_attrib 
			
 
				+      :returns (list) comp_branches - contains all the target outcomes reached for the given attribute."""
			
 
				+    def track_target_outcomes(self, target_val_dist_for_attrib):
			
 
				+        comp_branches = []
			
 
				+
			
 
				+        # Looks through each attribute outcome
			
 
				+        for attrib_outcome_key in target_val_dist_for_attrib.keys():
			
 
				+
			
 
				+            # Tracks how many non-zero occurrences of a target outcome there are for this attribute outcome.
			
 
				+            non_zero_outcome_count = 0
			
 
				+
			
 
				+            # This variable is set to the target outcome if the branch outcome is (100%) certain.
			
 
				+            branch_answer = None
			
 
				+
			
 
				+            # Checks what the distribution of target outcomes is for the current attribute outcome.
			
 
				+            # Ex: question - how sdo people drive based on the terrain, if the terrain is flat do they drive slow
			
 
				+            # or fast, and what is it if the terrain is steep.
			
 
				+            # Target outcomes - fast and slow; attrib outcomes - flat and steep.
			
 
				+            # Distribution dictionary looks like this ->{'fast': {'slow': 0, 'fast': 1}, 'steep':{'slow': 2, 'fast': 1}}
			
 
				+            for target_outcome_key in target_val_dist_for_attrib[attrib_outcome_key].keys():
			
 
				+
			
 
				+                # Fetch the number of occurrences for each target outcome for the current attribute
			
 
				+                """"Another Example: if the target is can_buy_computer(possible values/outcomes: Yes or No) and the current 
			
 
				+                attribute is age (possible values/outcomes:  <=30, 31..40 and >40) this will return how many of the entries 
			
 
				+                where age is <=30 are no, then how many of the entries where age is <=30 are yes, then how many 
			
 
				+                of the entries where age is 31..40 are yes and so on, until all cases are looked at. """
			
 
				+                outcome_occurrences = target_val_dist_for_attrib[attrib_outcome_key][target_outcome_key]
			
 
				+
			
 
				+                # Check if the answer is certain and end the branch, i.e. count how many branches have
			
 
				+                # certain target outcome
			
 
				+                if outcome_occurrences > 0:
			
 
				+                    non_zero_outcome_count += 1
			
 
				+
			
 
				+                    if non_zero_outcome_count == 1:
			
 
				+                        branch_answer = target_outcome_key
			
 
				+
			
 
				+            if non_zero_outcome_count == 0:
			
 
				+                print("INVALID RESULT!")
			
 
				+            elif non_zero_outcome_count == 1:
			
 
				+                print("THE ANSWER FOR <<", attrib_outcome_key, ">> is <<", branch_answer, ">>")
			
 
				+                comp_branches.append({attrib_outcome_key: branch_answer})
			
 
				+            elif non_zero_outcome_count > 1:
			
 
				+                print("THE BRANCH <<", attrib_outcome_key, ">> IS STILL ACTIVE!")
			
 
				+
			
 
				+        return comp_branches
			
 
				+
			
 
				+    # Counts the occurrences of each value for a given attribute.
			
 
				+    def count_value_occ(self, unique_values, attrib_data):
			
 
				+        attrib_val_occ = {}
			
 
				+
			
 
				+        # Construct dictionary
			
 
				+        for value in unique_values:
			
 
				+            attrib_val_occ.update({value: 0})
			
 
				+
			
 
				+        # Initialise Dictionary
			
 
				+        for u_value in unique_values:
			
 
				+            attrib_val_occ[u_value] = attrib_data.count(u_value)
			
 
				+
			
 
				+        return attrib_val_occ
			
 
				+
			
 
				+    def calc_entropy(self, attrib_uv_count, overall):
			
 
				+        entropy = 0
			
 
				+        # print("UV: ", attrib_uv_count)
			
 
				+
			
 
				+        for key in attrib_uv_count.keys():
			
 
				+
			
 
				+            # if there is some occurrence of the value calculate entropy,
			
 
				+            # otherwise ignore it (when there is 0 occurrences of the value)
			
 
				+            if attrib_uv_count[key] != 0:
			
 
				+                fraction = attrib_uv_count[key] / overall
			
 
				+                target_attrib_calc = fraction * math.log2(fraction)
			
 
				+
			
 
				+                entropy += target_attrib_calc
			
 
				+
			
 
				+        return abs(entropy)
			
 
				+
			
 
				+    def calc_attrib_entropy(self, attrib_occurrences):
			
 
				+        entropy_list = {}
			
 
				+
			
 
				+        for attrib_val_key in attrib_occurrences.keys():
			
 
				+            attrib_val = attrib_occurrences[attrib_val_key]
			
 
				+            overall = 0
			
 
				+            for target_values in attrib_val.values():
			
 
				+                overall += target_values
			
 
				+
			
 
				+            print("CALC TARGET ENTROPY FOR EACH ATTRIB OUTCOME: ", attrib_val)
			
 
				+            attrib_entropy = self.calc_entropy(attrib_val, overall)
			
 
				+            entropy_list.update({attrib_val_key: attrib_entropy})
			
 
				+
			
 
				+        print("Entropy list: ", entropy_list)
			
 
				+
			
 
				+        return entropy_list
			
 
				+
			
 
				+    # WEIGHTED AVERAGE ENTROPY for the children
			
 
				+    def calc_entropy_weigh_avg(self, target_val_dist_attrib, overall, attrib_entropy):
			
 
				+        weighted_entropy_avg = 0
			
 
				+        for key in target_val_dist_attrib.keys():
			
 
				+            curr_value = 0
			
 
				+
			
 
				+            for value in target_val_dist_attrib[key].values():
			
 
				+                curr_value += value
			
 
				+            weighted_entropy_avg += curr_value / overall * attrib_entropy[key]
			
 
				+            # overall += curr_value
			
 
				+
			
 
				+        return weighted_entropy_avg
			
 
				+
			
 
				+    def calc_info_gain(self, target_entropy, target_dist_for_attrib):
			
 
				+
			
 
				+        # CALCULATE ENTROPY OF Attribute
			
 
				+        attrib_entropy = self.calc_attrib_entropy(target_dist_for_attrib)
			
 
				+        # print("Attrib Entropy: ", attrib_entropy)
			
 
				+
			
 
				+        weighted_avg_e = self.calc_entropy_weigh_avg(target_dist_for_attrib, TRAIN_DATA_SIZE, attrib_entropy)
			
 
				+        # print("Attrib Weighted AVG: ", weighted_avg_e)
			
 
				+
			
 
				+        attrib_info_gain = target_entropy - weighted_avg_e
			
 
				+
			
 
				+        return attrib_info_gain
			
 
				+
			
 
				+    # IMPORTANT NOTE: An attribute node should always be made together with its outcomes, never an outcome alone
			
 
				+    # as it is not how this function was setup.
			
 
				+    # :param (str) name - should always be the name of an attribute.
			
 
				+    def build_node(self, name, completed_branches):
			
 
				+        attrib_node = Node(name)
			
 
				+        derived_nodes = []
			
 
				+
			
 
				+        completed_outcomes = []
			
 
				+        for branch in completed_branches:
			
 
				+            completed_outcomes.append(list(branch.keys())[0])
			
 
				+
			
 
				+        # if all outcome branches for thi attribute are completed, then the attribute is complete and its outcomes
			
 
				+        # should be popped off the active_branch_nodes list
			
 
				+        # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> CHECK COMPLETE ATTRIB: ", completed_branches)
			
 
				+
			
 
				+        # print(self.dataset_occurrence_dict[name].keys())
			
 
				+        for outcome_name in self.dataset_occurrence_dict[name]:
			
 
				+            new_outcome_node = Node(outcome_name)
			
 
				+            # print("STATUS: NEW OUTCOME NODE CREATED")
			
 
				+
			
 
				+            # Check if the branch for the current outcome is complete (Target answer is 100% certain).
			
 
				+            for branch in completed_branches:
			
 
				+                if outcome_name in branch:
			
 
				+                    # print("FOUND OUTCOME <<", outcome_name, ">> in ", branch)
			
 
				+
			
 
				+                    if len(new_outcome_node.derived_nodes) == 0:
			
 
				+                        # Formally end the node
			
 
				+                        endpoint_node = Node(branch[outcome_name], None)
			
 
				+                        new_outcome_node.derived_nodes.append(endpoint_node)
			
 
				+                        # print("STATUS: NEW OUTCOME ENDPOINT NODE CREATED & LINKED")
			
 
				+
			
 
				+            # The temp_outcome node is created so that the outcome node stored in the tree and the outcome node stored
			
 
				+            # in the active_branch_nodes list are the same. This is important because I never append directly onto the
			
 
				+            # tree but to a reference of the active branch of the tree. This allows to append to any depth of the tree
			
 
				+            # without needing to do any traversal to find the next available node.
			
 
				+            temp_outcome = copy.deepcopy(new_outcome_node)
			
 
				+            derived_nodes.append(temp_outcome)
			
 
				+
			
 
				+            # If the branch is still active/available to add to
			
 
				+            if outcome_name not in completed_outcomes:
			
 
				+                # Add the new node to the active branch list
			
 
				+                self.active_branch_nodes.append(temp_outcome)
			
 
				+            """print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Completed Nodes:", acc_completed)
			
 
				+        acc_completed[name]["completed"] = True
			
 
				+        all_outcomes_list = list(self.dataset_occurrence_dict[name].keys())
			
 
				+
			
 
				+        for outcome in all_outcomes_list:
			
 
				+                if outcome in acc_completed[name]["outcomes"]:
			
 
				+                    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ", outcome, " TRUE")
			
 
				+                else:
			
 
				+                    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>> ", outcome, " FALSE")
			
 
				+                    acc_completed[name]["completed"] = False
			
 
				+
			
 
				+            print(all_outcomes_list)"""
			
 
				+
			
 
				+            new_outcome_node.derived_nodes.clear()
			
 
				+
			
 
				+        # print("STATUS: NEW NODE CREATED")
			
 
				+        attrib_node.derived_nodes = derived_nodes
			
 
				+        return attrib_node
			
 
				+
			
 
				+    # IMPORTANT NODE: active_branch_nodes is only updated when build_node function is called, therefore
			
 
				+    # the link will not be appropriate unless the node was created through the build_node function.
			
 
				+    def link_node(self, new_node):
			
 
				+        """
			
 
				+        print("  <<< CHECKING IF THE TREE SEGMENT IS BUILT RIGHT! >>>    ")
			
 
				+        # TEMP
			
 
				+        print("ATTRIBUTE/PARENT NODE: ", new_node.node_name)
			
 
				+        print("DERIVED NODES LIST: ", new_node.derived_nodes)
			
 
				+
			
 
				+        print("FOR EACH NODE IN DERIVED NODES.")
			
 
				+        for node in new_node.derived_nodes:
			
 
				+            print("\t OUTCOME NODE FOR ATTRIB: ", node.node_name)
			
 
				+            for other in node.derived_nodes:
			
 
				+                print("\t\t TARGET OUTCOME REACHED: ", other.node_name)"""
			
 
				+        if self.root_node is None:
			
 
				+            self.root_node = new_node
			
 
				+
			
 
				+        else:
			
 
				+            # Add the new node to the tree
			
 
				+            # I hard coded 0 as the active node index because index 0 is always the next available node to link to.
			
 
				+            self.active_branch_nodes[0].derived_nodes.append(new_node)
			
 
				+
			
 
				+            # Update the available nodes!
			
 
				+            # The node at index 0 is already taken so that node should be popped off
			
 
				+            self.active_branch_nodes.pop(0)
			
 
				+
			
 
				+    # Builds a part of the tree (attribute node with setup derived nodes/outcome nodes) and links it to the tree.
			
 
				+    def build_tree_chunk(self, dataset_by_attrib_dict, target_attrib_list):
			
 
				+        self.generate_occurrences(dataset_by_attrib_dict, target_attrib_list)
			
 
				+        # print("Main DICTIONARY", self.dataset_occurrence_dict)
			
 
				+
			
 
				+        # TARGET ATTRIBUTE CALCULATIONS - Required for the calculation of info_gain for the rest of the attributes.
			
 
				+        target_uv_data = list(set(target_attrib_list))  # TODO: POSSIBLE EFFICIENCY DECREASE
			
 
				+        target_uv_count = self.count_value_occ(target_uv_data, target_attrib_list)
			
 
				+        # print("Target Unique Value Count: ", target_uv_count)
			
 
				+
			
 
				+        target_entropy = self.calc_entropy(target_uv_count, TRAIN_DATA_SIZE)
			
 
				+        # print("TARGET ENTROPY: ", target_entropy)
			
 
				+
			
 
				+        # Build each node(calc its entropy and info_gain, and assigning each attributes outcomes as children)
			
 
				+        # store the node in the node list and sort the nodes by info_gain to build the tree with them.
			
 
				+        next_node_data = {"name": None, "info gain": 0, "completed": None}
			
 
				+
			
 
				+        for attrib_name in self.dataset_occurrence_dict.keys():
			
 
				+            print("\n", "-" * 50)
			
 
				+
			
 
				+            # ATTRIB CALCULATIONS
			
 
				+            print("attrib_name: ", attrib_name)
			
 
				+
			
 
				+            # Contains a data structure representing the target attribute's value distribution
			
 
				+            # with regard to another attribute
			
 
				+            target_dist_for_attrib = self.dataset_occurrence_dict[attrib_name]
			
 
				+            # print("Target occurrences: ", target_dist_for_attrib)
			
 
				+
			
 
				+            # Check if any of the branches is completed
			
 
				+            completed_branches = self.track_target_outcomes(target_dist_for_attrib)
			
 
				+            print("COMPLETED BRANCHES: ", completed_branches)
			
 
				+
			
 
				+            attrib_info_gain = self.calc_info_gain(target_entropy, target_dist_for_attrib)
			
 
				+            # print("The INFO GAIN for <<", attrib_name, ">> is ", attrib_info_gain)
			
 
				+
			
 
				+            if next_node_data["info gain"] < attrib_info_gain:
			
 
				+                next_node_data["name"] = attrib_name
			
 
				+                next_node_data["info gain"] = attrib_info_gain
			
 
				+                next_node_data["completed"] = completed_branches
			
 
				+
			
 
				+        print("------> The next new node is: ", next_node_data["name"], "\n\n")
			
 
				+        new_node = self.build_node(next_node_data["name"], next_node_data["completed"])
			
 
				+        self.link_node(new_node)
			
 
				+
			
 
				+    # endregion
			
 
				+
			
 
				+    def build_tree(self, dataset_by_attrib_dict, target_attrib_list):
			
 
				+
			
 
				+        self.build_tree_chunk(dataset_by_attrib_dict, target_attrib_list)
			
 
				+        print("\n\n")
			
 
				+
			
 
				+        while len(self.active_branch_nodes) != 0:
			
 
				+            print(">>>>>>>>>>>>>>>>>>> Current active node: ", self.active_branch_nodes[0].node_name)
			
 
				+            # self.linked_attrib_names
			
 
				+            sub_attrib_dict, sub_tar_list = self.get_next_branch_occurrences(dataset_by_attrib_dict, target_attrib_list)
			
 
				+            self.build_tree_chunk(sub_attrib_dict, sub_tar_list)
			
 
				+            print("\n\n>>>>>>>>>>>>>>>>>>> List of active nodes: ", self.active_branch_nodes)
			
 
				+
			
 
				+        print("\n\n", "<"*5, "THE TREE IS COMPLETE!", ">"*5, "\n\n")
			
 
				+
			
 
				+    def visualise_tree(self):
			
 
				+        current_node = self.root_node
			
 
				+        while current_node is not None:
			
 
				+            print(current_node.node_name)
			
 
				+
			
 
				+            # TODO this recursively, base case -> len(node.derived_nodes) == 0
			
 
				+            # EXTRA TODO pass in a variable called branch_track that will start off as "",
			
 
				+            # each time a recursion is spawned add a "\t", that way the print will have a sort of a hiearchy
			
 
				+
			
 
				+    # This function runs classification on one entry and returns the answer.
			
 
				+    # Should only be called after the tree model was built.
			
 
				+    def classify(self, entry_index, dataset_by_attrib_dict):
			
 
				+        answer = None
			
 
				+
			
 
				+        # TODO: assert that root node is not none
			
 
				+        current_node = self.root_node
			
 
				+
			
 
				+        while current_node.derived_nodes is not None:
			
 
				+            print("\n  <<< TRAVERSING TREE >>>  ")
			
 
				+            print("Current Attrib: ", current_node.node_name)
			
 
				+
			
 
				+            # Ask the tree which attribute/column to look for first
			
 
				+            column_name = current_node.node_name
			
 
				+
			
 
				+            # Fetch the value for the given entry (entry_index) from the column identified by the tree.
			
 
				+            current_outcome_name = dataset_by_attrib_dict[column_name][entry_index]
			
 
				+            print("\tCurrent outcome name: ", current_outcome_name)
			
 
				+
			
 
				+            # Get that node from the derived nodes list
			
 
				+            for outcome_node in current_node.derived_nodes:
			
 
				+                if outcome_node.node_name == current_outcome_name:
			
 
				+                    # print("\n  <<< TRAVERSING TREE >>>  ")
			
 
				+                    # print("FOUND VALUE FOR ENTRY <<", entry_index, ">>  ->  <<", outcome_node.node_name, ">>")
			
 
				+                    current_node = outcome_node.derived_nodes[0]
			
 
				+                    # print("Current Attrib: ", current_node.node_name)
			
 
				+                    answer = current_node.node_name
			
 
				+
			
 
				+        print("    <<< FOUND VALUE >>>  ")
			
 
				+        print("    The answer is: ", answer)
			
 
				+
			
 
				+        return answer
			
 
				+
			
 
				+
			
 
				+def test_run_algorithm():
			
 
				+    print(" "*10, " << ID3 CLASSIFICATION ALGORITHM >> ", " "*10)
			
 
				+
			
 
				+    tree = ID3DecisionTree()
			
 
				+    tree.build_tree(DATASET_BY_ATTRIB_DICT, TARGET_ATTRIB_LIST)
			
 
				+
			
 
				+    # APPLY CLASSIFICATION
			
 
				+    # The index of the entry in the dataset.
			
 
				+    entry_index = 0
			
 
				+    tree.classify(entry_index, DATASET_BY_ATTRIB_DICT)
			
 
				+
			
 
				+
			
 
				+test_run_algorithm()
			
 
				+
			
 
				+"""
			
 
				+# Remove the completed branches
			
 
				+for branch in completed_branches:
			
 
				+    for key in branch.keys():
			
 
				+        target_val_dist_for_grade.pop(key)
			
 
				+
			
 
				+print("After removing completed branches: ", target_val_dist_for_grade)
			
 
				+"""
			
 
				+
			
 
				+# region Build Decision Tree
			
 
				+
			
 
				+# endregion
			
 
				+
			
 
				+""" 
			
 
				+What is "Training Data"? 
			
 
				+    Building the tree is done with training data which already has the answer to whatever question is being asked. 
			
 
				+    the example given with the data on the slides that asks if someone can buy a laptop is training data
			
 
				+    because it already knows the answer.
			
 
				+"""
			
 
				+"""
			
 
				+Apply information gain function to each attribute calculate_gain(attr_out)
			
 
				+Should that be applied to the target as well? No
			
 
				+Example:
			
 
				+    - G(train_data, O) = 0.246
			
 
				+    - G(train_data, H) = 0.151
			
 
				+    - G(train_data, W) = 0.048
			
 
				+
			
 
				+Once the root node is known, look at how many unique values are there.
			
 
				+If there are 4 possible values and they are not numbers, 
			
 
				+for example "Sunny", "Rainy", etc. there should be 4 nodes. 
			
 
				+"""
			
 
				+
			
 
				+# region Apply Classification
			
 
				+"""
			
 
				+What is "Test Data"?
			
 
				+    Test data is when we get a new entry and we want to classify it. 
			
 
				+    For example: In the bank they may use an already trained ID3 algorithm to check if you should get a credit card or not.
			
 
				+    They will have different attributes like - number of times you have gone bankrupt; what is your current net worth; 
			
 
				+    are you a student;  what is your credit score; etc.
			
 
				+    Then the target attribute will be EligibleForCreditCard(True or False)
			
 
				+"""
			
 
				+
			
 
				+# Use the built decision tree to look through a row of data from the data set. This is done using test data.
			
 
				+# (How to evaluate if the classification has an error?)
			
 
				+""" 
			
 
				+Steps: 
			
 
				+    1. Find which is the current attribute to look through (To start with ask the tree which attribute is the root node)
			
 
				+        1.1 (When building the tree need to make sure the attributes have the exact same name as the Node data)
			
 
				+        1.2 Search trough all possible attributes
			
 
				+        1.3 Check if the attribute name == the node name
			
 
				+        
			
 
				+    2. Find the attribute value for the current row
			
 
				+        2.1 Ask the data set which value is given for this attribute
			
 
				+        2.2 Find the which of the children nodes in the tree are equivalent to the given value
			
 
				+        
			
 
				+    Repeat these steps recursively until an answer is found. 
			
 
				+"""
			
 
				+# endregion
			
--- a/data/purposeCombined/BI/Practica2.py
+++ b/data/purposeCombined/BI/Practica2.py
@@ -0,0 +1,336 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+"""
			
 
				+Autor:
			
 
				+    Francisco Solano López Rodríguez
			
 
				+Fecha:
			
 
				+    Noviembre/2018
			
 
				+Contenido:
			
 
				+    Practica 2 Clustering
			
 
				+    Inteligencia de Negocio
			
 
				+    Grado en Ingeniería Informática
			
 
				+    Universidad de Granada
			
 
				+"""
			
 
				+
			
 
				+import time
			
 
				+
			
 
				+import matplotlib.pyplot as plt
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+
			
 
				+from sklearn.cluster import KMeans
			
 
				+from sklearn.cluster import KMeans, AgglomerativeClustering,estimate_bandwidth
			
 
				+from sklearn.cluster import Birch,SpectralClustering,MeanShift,DBSCAN, MiniBatchKMeans
			
 
				+from sklearn import metrics
			
 
				+from sklearn import preprocessing
			
 
				+from math import floor
			
 
				+import seaborn as sns
			
 
				+from scipy.cluster.hierarchy import dendrogram,ward
			
 
				+
			
 
				+seed = 12345
			
 
				+
			
 
				+################### FUNCIONES ###########################
			
 
				+
			
 
				+def getPrediction(algorithm, X):
			
 
				+    t = time.time()
			
 
				+    cluster_predict = algorithm.fit_predict(X) 
			
 
				+    tiempo = time.time() - t
			
 
				+
			
 
				+    return cluster_predict, tiempo
			
 
				+
			
 
				+# Función para obtener las medias de cada cluster
			
 
				+def getMeans(dataFrame):
			
 
				+    return dataFrame.groupby("cluster").mean()
			
 
				+
			
 
				+# Función para obtener las desviaciones de cada cluster
			
 
				+def getStd(dataFrame):
			
 
				+    return dataFrame.groupby("cluster").std()
			
 
				+
			
 
				+# Función para pintar Scatter Matrix 
			
 
				+def DrawScatterMatrix(data, name=None, display=True, save=False):
			
 
				+    sns.set()
			
 
				+    variables = list(data)
			
 
				+    variables.remove('cluster')
			
 
				+    sns_plot = sns.pairplot(data, vars=variables, hue="cluster", palette='Paired', plot_kws={"s": 25},
			
 
				+                            diag_kind="hist") 
			
 
				+    sns_plot.fig.subplots_adjust(wspace=.03, hspace=.03)
			
 
				+
			
 
				+    if name != None:        
			
 
				+        plt.title("scatter_"+name)
			
 
				+
			
 
				+    # Mostrar imagen por pantalla
			
 
				+    if display:
			
 
				+        plt.show()
			
 
				+
			
 
				+    # Guardar imagen en memoria
			
 
				+    if save:        
			
 
				+        if name == None:
			
 
				+            name = "_unknown_"
			
 
				+        image_name = "scatter/scatter_" + name + ".png"
			
 
				+        plt.savefig(image_name)
			
 
				+        plt.clf()
			
 
				+        print("Imagen guardada: ", image_name)
			
 
				+
			
 
				+# Función para pintar heatmap
			
 
				+def DrawHeatmap(data, name = None, display=True, save = False):
			
 
				+    data_normal = data.apply(norm_to_zero_one)
			
 
				+    meanDF = getMeans(dataFrame = data_normal)
			
 
				+    hm = sns.heatmap(data=meanDF, linewidths=.1, cmap="Blues", annot=True, xticklabels='auto')
			
 
				+    plt.xticks(rotation=0)
			
 
				+    plt.title("heatmap_"+name)
			
 
				+
			
 
				+    if name != None:        
			
 
				+        plt.title("heatmap_"+name)
			
 
				+
			
 
				+    # Mostrar imagen por pantalla
			
 
				+    if display:
			
 
				+        plt.show()
			
 
				+
			
 
				+    # Guardar imagen en memoria
			
 
				+    if save:
			
 
				+        if name == None:
			
 
				+            name = "_unknown_"
			
 
				+        image_name = "heatmap/heatmap_" + name + ".png"
			
 
				+        plt.savefig(image_name)
			
 
				+        plt.clf()
			
 
				+        print("Imagen guardada: ", image_name)
			
 
				+
			
 
				+# Función para pintar dendograma
			
 
				+def DrawDendrogram(data, name = None, display=True, save = False):
			
 
				+    data_normal = preprocessing.normalize(data,norm='l2')
			
 
				+    linkage_array = ward(X_normal)
			
 
				+
			
 
				+    dendrogram(linkage_array,leaf_rotation=90., leaf_font_size=5.)
			
 
				+    
			
 
				+    if name != None:        
			
 
				+        plt.title("dendograma_" + name)
			
 
				+
			
 
				+    # Mostrar imagen por pantalla
			
 
				+    if display:
			
 
				+        plt.show()
			
 
				+
			
 
				+    # Guardar imagen en memoria
			
 
				+    if save:
			
 
				+        if name == None:
			
 
				+            name = "_unknown_"
			
 
				+        image_name = "dendrogram/dendrogram_" + name + ".png"
			
 
				+        plt.savefig(image_name)
			
 
				+        plt.clf()
			
 
				+        print("Imagen guardada: ", image_name)
			
 
				+
			
 
				+def dataFrameResultados(algoritmos, num_cluster, metrics_CH, metrics_SC, tiempos):
			
 
				+    df_algo = pd.DataFrame(algoritmos, columns=['Algoritmo'])
			
 
				+    df_nc = pd.DataFrame(num_cluster, columns=['Num. Clusters'])
			
 
				+    df_CH = pd.DataFrame(metrics_CH, columns=['CH'])
			
 
				+    df_SC = pd.DataFrame(metrics_SC, columns=['SH'])
			
 
				+    df_t = pd.DataFrame(tiempos, columns=['Tiempo'])
			
 
				+
			
 
				+    resultados = pd.concat([df_algo, df_nc, df_CH, df_SC, df_t], axis=1)
			
 
				+
			
 
				+    return resultados
			
 
				+
			
 
				+def norm_to_zero_one(df):
			
 
				+    return (df - df.min()) * 1.0 / (df.max() - df.min())
			
 
				+
			
 
				+
			
 
				+def executeClustering(algorithms, X, caso):
			
 
				+
			
 
				+    f = open("caso_" + str(caso) + ".txt", 'w')
			
 
				+
			
 
				+    X_normal = X.apply(norm_to_zero_one)
			
 
				+
			
 
				+    names = []
			
 
				+    num_cluster = []
			
 
				+    metrics_CH = []
			
 
				+    metrics_SC = []
			
 
				+    tiempos = []
			
 
				+
			
 
				+    print("\nCaso de estudio ", caso, ", tamaño: ", len(X))
			
 
				+    f.write("\nCaso de estudio " + str(caso) + ", tamaño: " + str(len(X)))
			
 
				+
			
 
				+    for algorithm, name_algorithm in algorithms:
			
 
				+
			
 
				+        print("\n----------------------------------------\n")
			
 
				+        print("Ejecutando algoritmo: ", name_algorithm, "\n")
			
 
				+        f.write("\n--------------------------------------\n")
			
 
				+        f.write("Ejecutando algoritmo: " + name_algorithm + "\n")        
			
 
				+        # Ejecución algoritmo clustering
			
 
				+        cluster_predict, tiempo = getPrediction(algorithm, X_normal)
			
 
				+
			
 
				+        # Pasar las predicciones a dataFrame
			
 
				+        clusters = pd.DataFrame(cluster_predict,index=X.index,columns=['cluster'])
			
 
				+
			
 
				+        print("Tamaño de cada cluster:")
			
 
				+        f.write("\nTamaño de cada cluster:\n")
			
 
				+        size=clusters['cluster'].value_counts()
			
 
				+
			
 
				+        for num,i in size.iteritems():
			
 
				+           print('%s: %5d (%5.2f%%)' % (num,i,100*i/len(clusters)))
			
 
				+           f.write('%s: %5d (%5.2f%%)\n' % (num,i,100*i/len(clusters)))
			
 
				+        print()
			
 
				+
			
 
				+        # Obtener los resultados de las métricas
			
 
				+        metric_CH = metrics.calinski_harabaz_score(X_normal, cluster_predict)
			
 
				+        metric_SC = metrics.silhouette_score(X_normal, cluster_predict, metric='euclidean', 
			
 
				+                                         sample_size=floor(0.2*len(X)), random_state=seed)
			
 
				+
			
 
				+        # Guardamos el nombre del algoritmo, número de cluster, 
			
 
				+        # los tiempos y las métricas para la posterior comparacion 
			
 
				+        names.append(name_algorithm)   
			
 
				+        num_cluster.append(len(set(cluster_predict)))
			
 
				+        metrics_CH.append(metric_CH)
			
 
				+        metrics_SC.append(metric_SC)
			
 
				+        tiempos.append(tiempo)
			
 
				+
			
 
				+        # Se añade la asignación de clusters como columna a X
			
 
				+        X_cluster = pd.concat([X, clusters], axis=1)
			
 
				+        X_normal_cluster = pd.concat([X_normal, clusters], axis=1)
			
 
				+
			
 
				+        name = "caso_" + str(caso) + "_" + name_algorithm  
			
 
				+
			
 
				+        # Pintamos el scatter matrix
			
 
				+        DrawScatterMatrix(data = X_cluster, name = name, display = False, save = True)
			
 
				+
			
 
				+        # Pintamos el heatmap
			
 
				+        DrawHeatmap(data = X_cluster, name = name, display = False, save = True)
			
 
				+
			
 
				+        # DataFrame con la media de cada característica en cada cluster
			
 
				+        meanDF = getMeans(dataFrame = X_cluster)
			
 
				+        print()
			
 
				+        print(meanDF)
			
 
				+        f.write(meanDF.to_string())
			
 
				+
			
 
				+        # Si el algoritmo es AgglomerativeClustering pintamos el dendograma
			
 
				+        if name_algorithm == 'AC':
			
 
				+            DrawDendrogram(data = X_cluster, name = name, display = False, save = True)
			
 
				+
			
 
				+
			
 
				+    resultados = dataFrameResultados(names, num_cluster, metrics_CH, metrics_SC, tiempos)
			
 
				+
			
 
				+    print("\n**************************************\n")
			
 
				+    print(resultados.to_string())
			
 
				+    print("\n**************************************\n")
			
 
				+
			
 
				+    f.write("\n**************************************\n")
			
 
				+    f.write(resultados.to_string())
			
 
				+    f.write("\n**************************************\n")
			
 
				+
			
 
				+    f.close()
			
 
				+
			
 
				+
			
 
				+#########################################################
			
 
				+
			
 
				+# Lectura datos
			
 
				+
			
 
				+print("Leyendo el conjunto de datos...")
			
 
				+censo = pd.read_csv('censo_granada.csv')
			
 
				+censo = censo.replace(np.NaN,0) 
			
 
				+print("Lectura completada.")
			
 
				+
			
 
				+
			
 
				+###### CASOS DE ESTUDIO ######
			
 
				+
			
 
				+#-------- CASO 1 --------
			
 
				+
			
 
				+casado = 2
			
 
				+hombre = 1
			
 
				+mujer = 6
			
 
				+
			
 
				+subset = censo.loc[(censo['EDAD']>=20) & (censo['EDAD']<=50) & (censo['SEXO']==mujer)]
			
 
				+usadas = ['EDAD', 'NPFAM', 'HM5', 'H0515']
			
 
				+X = subset[usadas]
			
 
				+X_normal = preprocessing.normalize(X, norm='l2')
			
 
				+
			
 
				+#-------- CASO 2 --------
			
 
				+
			
 
				+subset_2 = censo.loc[(censo['EDAD']>=20) & (censo['EDAD']<=50) & (censo['SEXO']==hombre)]
			
 
				+usadas_2 = ['EDAD', 'NPFAM', 'HM5', 'H0515']
			
 
				+X_2 = subset_2[usadas_2]
			
 
				+X_normal_2 = X_2.apply(norm_to_zero_one)
			
 
				+
			
 
				+#-------- CASO 3 --------
			
 
				+
			
 
				+subset_3 = censo.loc[(censo['EDAD']>=20) & (censo['EDAD']<=50) & (censo['SEXO']==mujer)]
			
 
				+usadas_3 = ['EDAD', 'NPFAM', 'NHIJOS', 'ESREAL']
			
 
				+X_3 = subset_3[usadas_3]
			
 
				+X_normal_3 = X_3.apply(norm_to_zero_one)
			
 
				+
			
 
				+###############################
			
 
				+
			
 
				+# Obtener la correlación entre las variables
			
 
				+'''
			
 
				+correlation = X.corr()
			
 
				+sns.heatmap(correlation, square = True)
			
 
				+plt.show()
			
 
				+'''
			
 
				+
			
 
				+#################### Algoritmos #####################
			
 
				+
			
 
				+random_seed = 123
			
 
				+
			
 
				+k_means = KMeans(init='k-means++', n_clusters=5, n_init=5, random_state=random_seed)
			
 
				+
			
 
				+agglo=AgglomerativeClustering(n_clusters=5,linkage="ward")
			
 
				+
			
 
				+meanshift = MeanShift(bin_seeding=True)
			
 
				+
			
 
				+miniBatchKMeans = MiniBatchKMeans(init='k-means++',n_clusters=4, n_init=5, max_no_improvement=10, verbose=0, random_state=random_seed)
			
 
				+
			
 
				+dbscan = DBSCAN(eps=0.2)
			
 
				+
			
 
				+dbscan2 = DBSCAN(eps=0.1)
			
 
				+
			
 
				+algorithms = [(k_means, "KMeans"),
			
 
				+              (agglo, "AC"),
			
 
				+              (meanshift, "MeanShift"), 
			
 
				+              (miniBatchKMeans, "MiniBatchKM"),
			
 
				+              (dbscan, "DBSCAN")]
			
 
				+
			
 
				+algorithms2 = [(k_means, "KMeans"),
			
 
				+              (agglo, "AC"),
			
 
				+              (meanshift, "MeanShift"), 
			
 
				+              (miniBatchKMeans, "MiniBatchKM"),
			
 
				+              (dbscan2, "DBSCAN2")]
			
 
				+
			
 
				+
			
 
				+# Kmeans con diferentes números de cluster
			
 
				+
			
 
				+algorithm_kmeans = []
			
 
				+
			
 
				+for i in range(5,9):
			
 
				+    kmeans_i = KMeans(init='k-means++', n_clusters=i, n_init=5)
			
 
				+    algorithm_kmeans.append((kmeans_i, "KMeans_" + str(i)))
			
 
				+
			
 
				+# AgglomerativeClustering con diferentes números de cluster
			
 
				+
			
 
				+algorithm_AC = []
			
 
				+
			
 
				+for i in range(5,9):
			
 
				+    agglo_i = AgglomerativeClustering(n_clusters=i,linkage="ward")
			
 
				+    algorithm_AC.append((agglo_i, "AC_" + str(i)))
			
 
				+
			
 
				+# MiniBatchKmeans con diferentes números de cluster
			
 
				+
			
 
				+algorithm_miniBatch = []
			
 
				+
			
 
				+for i in range(5,9):
			
 
				+    miniBatch_i = MiniBatchKMeans(init='k-means++',n_clusters=i, n_init=5, max_no_improvement=10, verbose=0, random_state=random_seed)
			
 
				+    algorithm_miniBatch.append((miniBatch_i, "MiniBatchKM_" + str(i)))
			
 
				+
			
 
				+#-----------------------------------------------------#
			
 
				+
			
 
				+# EJECUCIÓN CASO 1
			
 
				+executeClustering(algorithms, X, 1)
			
 
				+executeClustering(algorithm_kmeans, X, 1.1)
			
 
				+executeClustering(algorithm_AC, X, 1.2)
			
 
				+
			
 
				+# EJECUCIÓN CASO 2
			
 
				+executeClustering(algorithms, X_2, 2)
			
 
				+executeClustering(algorithm_kmeans, X_2, 2.1)
			
 
				+executeClustering(algorithm_miniBatch, X_2, 2.2)
			
 
				+
			
 
				+# EJECUCIÓN CASO 3
			
 
				+executeClustering(algorithms2, X_3, 3)
			
 
				+executeClustering(algorithm_kmeans, X_3, 3.1)
			
 
				+executeClustering(algorithm_miniBatch, X_3, 3.2)
			
 
				+
			
--- a/data/purposeCombined/BI/apriori.py
+++ b/data/purposeCombined/BI/apriori.py
@@ -0,0 +1,132 @@
 
				+# author: Justin Cui
			
 
				+# date: 2019/10/23
			
 
				+# email: 321923502@qq.com
			
 
				+
			
 
				+
			
 
				+from numpy import *
			
 
				+
			
 
				+
			
 
				+def load_data():
			
 
				+    dataSet = [['bread', 'milk', 'vegetable', 'fruit', 'eggs'],
			
 
				+               ['noodle', 'beef', 'pork', 'water', 'socks', 'gloves', 'shoes', 'rice'],
			
 
				+               ['socks', 'gloves'],
			
 
				+               ['bread', 'milk', 'shoes', 'socks', 'eggs'],
			
 
				+               ['socks', 'shoes', 'sweater', 'cap', 'milk', 'vegetable', 'gloves'],
			
 
				+               ['eggs', 'bread', 'milk', 'fish', 'crab', 'shrimp', 'rice']]
			
 
				+    return dataSet
			
 
				+
			
 
				+
			
 
				+# 扫描全部数据，产生c1
			
 
				+def create_c1(data):
			
 
				+    c1 = []
			
 
				+    for transaction in data:
			
 
				+        for item in transaction:
			
 
				+            if [item] not in c1:
			
 
				+                c1.append([item])
			
 
				+    c1.sort()
			
 
				+    return list(map(frozenset, c1))
			
 
				+
			
 
				+
			
 
				+# 由c（i）生成对应的l（i）
			
 
				+def c2l(data, ck, min_support):
			
 
				+    dict_sup = {}
			
 
				+    for i in data:
			
 
				+        for j in ck:
			
 
				+            if j.issubset(i):
			
 
				+                if j not in dict_sup:
			
 
				+                    dict_sup[j] = 1
			
 
				+                else:
			
 
				+                    dict_sup[j] += 1
			
 
				+    support_data = {}
			
 
				+    result_list = []
			
 
				+    for i in dict_sup:
			
 
				+        temp_sup = dict_sup[i] / len(data)
			
 
				+        if temp_sup >= min_support:
			
 
				+            result_list.append(i)
			
 
				+            support_data[i] = temp_sup
			
 
				+    return result_list, support_data
			
 
				+
			
 
				+
			
 
				+# 由l（k-1）生成c（k）
			
 
				+def get_next_c(Lk, k):
			
 
				+    result_list = []
			
 
				+    len_lk = len(Lk)
			
 
				+    for i in range(len_lk):
			
 
				+        for j in range(i + 1, len_lk):
			
 
				+            l1 = list(Lk[i])[:k]
			
 
				+            l2 = list(Lk[j])[:k]
			
 
				+            if l1 == l2:
			
 
				+                a = Lk[i] | Lk[j]
			
 
				+                a1 = list(a)
			
 
				+                b = []
			
 
				+                for q in range(len(a1)):
			
 
				+                    t = [a1[q]]
			
 
				+                    tt = frozenset(set(a1) - set(t))
			
 
				+                    b.append(tt)
			
 
				+                t = 0
			
 
				+                for w in b:
			
 
				+                    if w in Lk:
			
 
				+                        t += 1
			
 
				+                if t == len(b):
			
 
				+                    result_list.append(b[0] | b[1])
			
 
				+    return result_list
			
 
				+
			
 
				+
			
 
				+# 得到所有的l集
			
 
				+def get_all_l(data_set, min_support):
			
 
				+    c1 = create_c1(data_set)
			
 
				+    data = list(map(set, data_set))
			
 
				+    L1, support_data = c2l(data, c1, min_support)
			
 
				+    L = [L1]
			
 
				+    k = 2
			
 
				+    while (len(L[k - 2]) > 0):
			
 
				+        Ck = get_next_c(L[k - 2], k - 2)
			
 
				+        Lk, sup = c2l(data, Ck, min_support)
			
 
				+        support_data.update(sup)
			
 
				+        L.append(Lk)
			
 
				+        k += 1
			
 
				+    del L[-1]
			
 
				+    return L, support_data
			
 
				+
			
 
				+
			
 
				+# 得到所有L集的子集
			
 
				+def get_subset(from_list, result_list):
			
 
				+    for i in range(len(from_list)):
			
 
				+        t = [from_list[i]]
			
 
				+        tt = frozenset(set(from_list) - set(t))
			
 
				+        if tt not in result_list:
			
 
				+            result_list.append(tt)
			
 
				+            tt = list(tt)
			
 
				+            if len(tt) > 1:
			
 
				+                get_subset(tt, result_list)
			
 
				+
			
 
				+
			
 
				+# 计算置信度
			
 
				+def calc_conf(freqSet, H, supportData, min_conf):
			
 
				+    for conseq in H:
			
 
				+        conf = supportData[freqSet] / supportData[freqSet - conseq]
			
 
				+        lift = supportData[freqSet] / (supportData[conseq] * supportData[freqSet - conseq])
			
 
				+        if conf >= min_conf and lift > 1:
			
 
				+            print(set(freqSet - conseq), '-->', set(conseq), '支持度', round(supportData[freqSet - conseq], 2), '置信度：',
			
 
				+                  conf)
			
 
				+
			
 
				+
			
 
				+# 生成规则
			
 
				+def gen_rule(L, support_data, min_conf=0.7):
			
 
				+    for i in range(len(L)):
			
 
				+        print("\n", i + 1, "-频繁项集为：")
			
 
				+        for freqSet in L[i]:
			
 
				+            print(set(freqSet), end="  ")
			
 
				+    print("\n")
			
 
				+    for i in range(1, len(L)):
			
 
				+        for freqSet in L[i]:
			
 
				+            H1 = list(freqSet)
			
 
				+            all_subset = []
			
 
				+            get_subset(H1, all_subset)
			
 
				+            calc_conf(freqSet, all_subset, support_data, min_conf)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    dataSet = load_data()
			
 
				+    L, supportData = get_all_l(dataSet, 0.5)
			
 
				+    gen_rule(L, supportData, 0.6)
			
--- a/data/purposeCombined/BI/bi_main.py
+++ b/data/purposeCombined/BI/bi_main.py
@@ -0,0 +1,440 @@
 
				+"""
			
 
				+This OOP is to do the BI Challenge
			
 
				+"""
			
 
				+from warnings import simplefilter
			
 
				+simplefilter(action='ignore', category=FutureWarning)
			
 
				+# %matplotlib inline
			
 
				+from google.colab import files
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+# %reload_ext sql
			
 
				+import sqlite3
			
 
				+import seaborn as sns
			
 
				+import matplotlib.pyplot as plt
			
 
				+from plotly.offline import iplot
			
 
				+import plotly.express as px
			
 
				+
			
 
				+pd.options.display.float_format = '{:.2f}'.format # uppress scientific notation
			
 
				+# Declare your Github Repository address
			
 
				+A_url='https://raw.githubusercontent.com/haensel-ams/recruitment_challenge/master/BI_201805/table_A_conversions.csv'
			
 
				+B_url='https://raw.githubusercontent.com/haensel-ams/recruitment_challenge/master/BI_201805/table_B_attribution.csv'
			
 
				+
			
 
				+# The Extract class is to extract data from your Gihub Repos address
			
 
				+class Extract():
			
 
				+
			
 
				+  def __init__(self,A_url,B_url):
			
 
				+    print('\033[1m'+'Please, wait! I am extracting data from your Github Repository'+'\033[0m'+'\n...')
			
 
				+    self.A_url=A_url
			
 
				+    self.table_A_conversions=self.load_data(self.A_url)
			
 
				+    self.B_url=B_url
			
 
				+    self.table_B_attribution=self.load_data(self.B_url)
			
 
				+    print('Data was successfully extracted!')
			
 
				+  
			
 
				+  def load_data(self,url):
			
 
				+    self.data=pd.read_csv(url)
			
 
				+    #display(self.data.head(3))
			
 
				+    return self.data
			
 
				+
			
 
				+# The Transform class is to combine two different  extracted datasets and do the data cleansing
			
 
				+# Also, to know the generanl informantion about KPIs
			
 
				+class Transform():
			
 
				+
			
 
				+  def __init__(self,extract):
			
 
				+    print('\033[1m'+'I am transforming the extracted data'+'\033[0m'+'\n...')
			
 
				+    self.table_A_conversions=extract.table_A_conversions
			
 
				+    self.table_B_attribution=extract.table_B_attribution
			
 
				+    self.joined_tabs = self.combine_tab(self.table_A_conversions, self.table_B_attribution,'Conv_ID')
			
 
				+    self.time_tab=self.cleaning_data(self.joined_tabs)
			
 
				+    # self.infor_Data=self.get_infor(self.time_tab)
			
 
				+    self.get_missing=self.check_missing(self.time_tab)
			
 
				+    self.cleaned_tab=self.time_tab.dropna()
			
 
				+    display(self.cleaned_tab.head(5))
			
 
				+    self.infor_Data=self.get_infor(self.cleaned_tab)
			
 
				+    self.more_infor=self.deep_infor(self.cleaned_tab)
			
 
				+  
			
 
				+  def deep_infor(self,data):
			
 
				+    print('Total annual revenue: %d'%data['Revenue'].sum())
			
 
				+    
			
 
				+
			
 
				+  def combine_tab(self,tab_1,tab_2,common_col):
			
 
				+    print('I am combining two data into one and coverting the time format\n...')
			
 
				+    self.data=pd.merge(tab_1, tab_2, on=common_col, how='outer')
			
 
				+    # display(self.data.head(5))
			
 
				+    return self.data
			
 
				+
			
 
				+  def cleaning_data(self,data):
			
 
				+    data['Conv_Date']= pd.to_datetime(data['Conv_Date']) 
			
 
				+    self.data=data
			
 
				+    print('Data was completely transformed!')
			
 
				+    return self.data
			
 
				+
			
 
				+  def get_infor(self,data):
			
 
				+    print('\033[1m'+'General information:'+'\033[0m')
			
 
				+    self.information=data.info()
			
 
				+    print('\033[1m'+'Descriptive Statistics:'+'\033[0m')
			
 
				+    # print(data.describe())
			
 
				+    return self.information
			
 
				+
			
 
				+  def check_missing(self,data):
			
 
				+    print('\033[1m'+ 'The number of missing values:'+'\033[0m')
			
 
				+    self.miss_data=data.isnull().sum()
			
 
				+    self.miss_rate=100*data.isnull().sum()/len(data)
			
 
				+    self.mis_infor=pd.concat([self.miss_data, self.miss_rate], axis=1).reset_index()
			
 
				+    self.mis_infor=self.mis_infor.rename(columns={0: 'Amounts', 1: 'Percentage'})
			
 
				+    # print(self.mis_infor)
			
 
				+    return self.miss_data
			
 
				+
			
 
				+# The Load class is to load the tranformed data to the database
			
 
				+class  Load():
			
 
				+
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am loading the transformed data to my database'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab
			
 
				+    self.connect=self.connect_database()
			
 
				+    self.insert=self.insert_data(self.data)
			
 
				+    
			
 
				+  def connect_database(self):
			
 
				+    print('I am trying to connect to my SQL database\n....')
			
 
				+    self.connect= "%sql sqlite:///phuong_database.db"
			
 
				+    print(self.connect,'connection is success!',sep='\n')
			
 
				+    return self.connect
			
 
				+
			
 
				+  def insert_data(self,data):
			
 
				+    print('I am loading the transformed data to my SQL Database\n....')
			
 
				+    self.check ="%sql DROP TABLE IF EXISTS data"
			
 
				+    self.insert="%sql PERSIST data"
			
 
				+    self.list_table="%sql SELECT name FROM sqlite_master WHERE type='table'"
			
 
				+    print(self.list_table)
			
 
				+    self.data="%sql SELECT * FROM data LIMIT 3"
			
 
				+    print(self.data)
			
 
				+    print('Data was completely inserted into my SQL Database!')
			
 
				+    return self.insert 
			
 
				+
			
 
				+# The EDA_Overview_KPI class is to generate a preliminary overview on the KPI
			
 
				+class EDA_Overview_KPI():
			
 
				+
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am doing the Explanatory Data Analysis (EDA) process for Revenue KPIs'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Conv_Date','Revenue','User_ID']]
			
 
				+    self.by_kpi=self.group_data(self.data,'Conv_Date','Revenue','User_ID')
			
 
				+    # display(self.by_kpi.head(3))
			
 
				+    self.kpi_fig=self.plot_kpi(self.by_kpi)
			
 
				+    self.sum_stat=self.get_infor(self.by_kpi,'Conv_Date','Revenue','User_ID')
			
 
				+      
			
 
				+    
			
 
				+  def group_data(self,data,target,exp_1,exp_2):
			
 
				+    self.num_target=len(data[target].unique())
			
 
				+    print('The number of '+target+': %d'%self.num_target)
			
 
				+    self.data=data.groupby([target]).agg({exp_1:'sum',exp_2:'count'})
			
 
				+    return self.data
			
 
				+
			
 
				+  def plot_kpi(self,data):
			
 
				+    self.name_column=self.data.columns
			
 
				+    plt.figure(figsize=(15, 9))
			
 
				+    for i,col in enumerate(self.name_column):
			
 
				+        plt.subplot(2,1,i+1)
			
 
				+        plt.plot(self.data[col],label=col)
			
 
				+        plt.title('The changes in of the daily '+col +' over the time period',fontweight='bold',fontsize='12')
			
 
				+        plt.legend()
			
 
				+        plt.autoscale(enable=True, axis='both',tight=True)
			
 
				+    plt.savefig('Overview_KPI.png')
			
 
				+    files.download('Overview_KPI.png')
			
 
				+    return self.name_column
			
 
				+
			
 
				+  def get_infor(self,data,target,exp_1,exp_2):
			
 
				+    self.infor=display(self.data.head(8).T)
			
 
				+    print('\033[1m'+'Desriptive Statistics of the Daily KPIs by '+ target +'\033[0m', self.data.describe(),sep='\n')
			
 
				+    print('Date with the highest revenue:', self.data[exp_1].idxmax(axis = 0) )
			
 
				+    print('Date with the lowest revenue:', self.data[exp_1].idxmin(axis = 0) )
			
 
				+    print('Date with the highest number of users:', self.data[exp_2].idxmax(axis = 0) )
			
 
				+    print('Date with the lowest number of users:', self.data[exp_2].idxmin(axis = 0) )
			
 
				+    return self.infor
			
 
				+
			
 
				+# The EDA_KPI_Return class is to generate a preliminary overview on the return customer
			
 
				+class EDA_KPI_Return():
			
 
				+
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am doing the Explanatory Data Analysis (EDA) process for User KPIs'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Conv_Date','User_ID']]
			
 
				+    self.infor_user=self.get_infor(self.data,'User_ID')
			
 
				+    self.by_user=self.group_data(self.data,'User_ID','Conv_Date')
			
 
				+    display(self.by_user.head(8).T)
			
 
				+    self.user_plot=self.plot_user(self.by_user,'Conv_Date')
			
 
				+
			
 
				+  def get_infor(self,data,exp):
			
 
				+    self.num_user=data[exp].unique()
			
 
				+    print('The number of users: %d'%len(self.num_user))
			
 
				+    return self.num_user
			
 
				+
			
 
				+  def group_data(self,data,target,exp):
			
 
				+    self.num_target=len(data[target].unique())
			
 
				+    print('The number of '+target+': %d'%self.num_target)
			
 
				+    self.data=data.groupby([target]).agg({exp:'count'})
			
 
				+    # display(self.data.head(8).T)
			
 
				+    print('\033[1m'+'Desriptive Statistics of the Daily KPIs by '+ target +'\033[0m', self.data.describe(),sep='\n')
			
 
				+    return self.data
			
 
				+
			
 
				+  def plot_user(self,data,exp):
			
 
				+    self.data=data.rename(columns={exp: 'The number of returns'})
			
 
				+    self.ax=self.data.plot.hist(figsize=(15, 9),bins=1500,xlim=(1,20),color='#86bf91'
			
 
				+                                ,title='The Frequence of return customer',grid=True)
			
 
				+    self.ax.set_xlabel('The number of days')
			
 
				+    plt.savefig('Customer_return.png')
			
 
				+    files.download('Customer_return.png') 
			
 
				+    return self.ax
			
 
				+
			
 
				+# The EDA_Static_Ren class is to explore the information about the total revenue per year
			
 
				+class EDA_Static_Ren():
			
 
				+  
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am  doing the EDA on Conversion'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Channel','Revenue']]
			
 
				+    display(self.data.head(3))
			
 
				+    # self.infor_conver=self.get_infor(self.data)
			
 
				+    self.by_ChanelRen=self.group_data(self.data,'Channel')
			
 
				+    self.pie_ChanelRen=self.plot_pie(self.by_ChanelRen,'Revenue')
			
 
				+
			
 
				+  def plot_pie(self,data,target):
			
 
				+    self.data=data
			
 
				+    self.data['Total Conver'] = self.data.sum(axis=1)
			
 
				+    self.data['Total Top Five'] = self.data[['A','G','H','I','B']].sum(axis=1)
			
 
				+    self.data['The Rest'] = self.data['Total Conver']-self.data['Total Top Five']
			
 
				+    self.ax=self.data[['A','G','H','I','B','The Rest']].T.plot.pie(y=target,figsize=(12, 7),autopct='%1.1f%%',)
			
 
				+    plt.savefig('channel_Static_Ren.jpg')
			
 
				+    files.download('channel_Static_Ren.jpg') 
			
 
				+    return self.data
			
 
				+    
			
 
				+
			
 
				+  def get_infor(self,data):
			
 
				+    self.conver_uni=self.data.User_ID.unique()
			
 
				+    print('The number of conversions: %d'%len(self.conver_uni))
			
 
				+    return self.conver_uni
			
 
				+
			
 
				+  def group_data(self,data,target):
			
 
				+    print('I am grouping data by '+ target + '\n...')
			
 
				+    self.data=data.groupby([target]).agg({'Revenue':'sum'})
			
 
				+    self.data=self.data.T
			
 
				+    display(self.data)
			
 
				+    print('I am done! ')
			
 
				+    return self.data    
			
 
				+
			
 
				+# The EDA_Static_User class is to generate information about the total annual number of visits
			
 
				+class EDA_Static_User():
			
 
				+  
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am  doing the EDA on Conversion'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Channel','User_ID']] #'Conv_Date',
			
 
				+    display(self.data.head(3))
			
 
				+    # self.infor_conver=self.get_infor(self.data)
			
 
				+    self.by_ChanelConv=self.group_data(self.data,'Channel')
			
 
				+    self.pie_channelConv=self.plot_pie(self.by_ChanelConv,'User_ID')
			
 
				+
			
 
				+  def plot_pie(self,data,target):
			
 
				+    self.data=data
			
 
				+    self.data['Total Conver'] = self.data.sum(axis=1)
			
 
				+    self.data['Total Top Five'] = self.data[['A','G','H','I','B']].sum(axis=1)
			
 
				+    self.data['The Rest'] = self.data['Total Conver']-self.data['Total Top Five']
			
 
				+    self.ax=self.data[['A','G','H','I','B','The Rest']].T.plot.pie(y=target,figsize=(12, 7),autopct='%1.1f%%',)
			
 
				+    plt.savefig('channel_Static_User.jpg')
			
 
				+    files.download('channel_Static_User.jpg') 
			
 
				+    return self.data
			
 
				+    
			
 
				+
			
 
				+  def get_infor(self,data):
			
 
				+    self.conver_uni=self.data.User_ID.unique()
			
 
				+    print('The number of conversions: %d'%len(self.conver_uni))
			
 
				+    return self.conver_uni
			
 
				+
			
 
				+  def group_data(self,data,target):
			
 
				+    print('I am grouping data by '+ target + '\n...')
			
 
				+    self.data=data.groupby([target]).agg({'User_ID':'count'})
			
 
				+    self.data=self.data.T
			
 
				+    display(self.data)
			
 
				+    print('I am done! ')
			
 
				+    return self.data    
			
 
				+
			
 
				+# The EDA_Static_Conversion is to generate the information about the total annual number of conversion
			
 
				+class EDA_Static_Conversion():
			
 
				+  
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am  doing the EDA on Conversion'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Channel','Conv_ID','IHC_Conv']] #'Conv_Date',
			
 
				+    display(self.data.head(3))
			
 
				+    self.infor_conver=self.get_infor(self.data)
			
 
				+    self.by_ChanelConv=self.group_data(self.data,'Channel','Conv_ID')
			
 
				+    self.pie_channelConv=self.plot_pie(self.by_ChanelConv,'Conv_ID')
			
 
				+
			
 
				+
			
 
				+  def get_infor(self,data):
			
 
				+    self.conver_uni=self.data.Conv_ID.unique()
			
 
				+    print('The number of conversions: %d'%len(self.conver_uni))
			
 
				+    return self.conver_uni
			
 
				+
			
 
				+  def group_data(self,data,target,exp):
			
 
				+    print('I am grouping data by '+ target + '\n...')
			
 
				+    if data[exp].dtype=='object':
			
 
				+      self.data=data.groupby([target]).agg({exp:'count'})
			
 
				+    else:
			
 
				+      self.data=data.groupby([target]).agg({exp:'sum'})
			
 
				+    self.data=self.data.T
			
 
				+    display(self.data)
			
 
				+    print('I am done! ')
			
 
				+    return self.data    
			
 
				+
			
 
				+  def plot_pie(self,data,target):
			
 
				+    self.data=data
			
 
				+    self.data['Total Conver'] = self.data.sum(axis=1)
			
 
				+    self.data['Total Top Five'] = self.data[['A','G','H','I','B']].sum(axis=1)
			
 
				+    self.data['The Rest'] = self.data['Total Conver']-self.data['Total Top Five']
			
 
				+    self.ax=self.data[['A','G','H','I','B','The Rest']].T.plot.pie(y=target,figsize=(12, 7),autopct='%1.1f%%',)
			
 
				+    plt.savefig('channel_Conver.png')
			
 
				+    files.download('channel_Conver.png') 
			
 
				+    return self.data
			
 
				+
			
 
				+# The EDA_Channel_Revenue class is to analyze the impacts of the online marketing channels on 
			
 
				+# the daily Revenue
			
 
				+class EDA_Channel_Revenue():
			
 
				+
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am analyzing the influences of the online marketing channels on the daily revenue'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Conv_Date','Channel','Revenue']]
			
 
				+    self.by_DateChannel=self.group_data(self.data,'Conv_Date','Channel')
			
 
				+    self.unstaked_data=self.unstack_data(self.by_DateChannel,'Revenue','bar')
			
 
				+    self.plotted_data=self.plot_data(self.unstaked_data)   
			
 
				+    self.exported_data=self.export_data(self.unstaked_data,'channel_revenue')
			
 
				+
			
 
				+  def group_data(self,data,target_1,target_2):
			
 
				+    print('I am grouping data by '+ target_1 +' and '+ target_2 + '\n...')
			
 
				+    self.data=data.groupby([target_1,target_2])#.agg({exp:'count'})
			
 
				+    print('I am done! ')
			
 
				+    return self.data    
			
 
				+
			
 
				+  def unstack_data(self,data,exp,kind):
			
 
				+    print('I am unstacking data \n...')
			
 
				+    data=data.sum()[exp].unstack(level=-1)
			
 
				+    self.data=data
			
 
				+    display(self.data.head(3))
			
 
				+    print('Data were unstacked completely\n...')
			
 
				+    return self.data
			
 
				+
			
 
				+  def plot_data(self,data):
			
 
				+    self.data=data
			
 
				+    print('I am visualizing the contribution of Top 5 Channels to the Daily Revenue\n...')
			
 
				+    self.data['The Total'] = self.data.sum(axis=1)
			
 
				+    self.data['The Rest']= self.data['The Total']-self.data[['A','G','H','I','B']].sum(axis=1)
			
 
				+    self.xlim=('2017-03-01','2018-03-24')
			
 
				+    self.ax =self.data[['A','G','H','I','B','The Rest']].plot.area(xlim=self.xlim, figsize=(12,8))
			
 
				+    self.ax.set_xlabel('Date')
			
 
				+    self.ax.set_ylabel('Revenue')
			
 
				+    print(self.data['The Rest'].describe())
			
 
				+    plt.savefig('channel_ren.png')
			
 
				+    files.download('channel_ren.png') 
			
 
				+    return self.data
			
 
				+    
			
 
				+  def export_data(self,data,title):
			
 
				+    print('I am exporting data to the excel and csv files\n...')
			
 
				+    data.to_excel(title+'.xlsx')
			
 
				+    self.excel=files.download(title+'.xlsx')
			
 
				+    data.to_csv(title+'.csv')
			
 
				+    self.csv=files.download(title+'.csv')
			
 
				+    return self.excel
			
 
				+
			
 
				+# The EDA_Channel_User class is to analyze the impacts of the online marketing channels on 
			
 
				+# the daily number of users
			
 
				+class EDA_Channel_User():
			
 
				+
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am analyzing the influences of the online marketing channels on the daily number of users'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Conv_Date','Channel','User_ID']]
			
 
				+    self.by_DateUser=self.group_data(self.data,'Conv_Date','Channel','User_ID')
			
 
				+    self.unstaked_data=self.unstack_data(self.by_DateUser,'User_ID','bar')
			
 
				+    #display(self.unstaked_data.head(3))
			
 
				+    self.plotted_data=self.plot_data(self.unstaked_data)   
			
 
				+    # self.exported_data=self.export_data(self.unstaked_data,'channel_num_user')
			
 
				+
			
 
				+  def group_data(self,data,target_1,target_2,exp):
			
 
				+    print('I am grouping data by '+ target_1 +' and '+ target_2 + '\n...')
			
 
				+    self.data=data.groupby([target_1,target_2])#.agg({exp:'count'})
			
 
				+    print('I am done! ')
			
 
				+    return self.data    
			
 
				+
			
 
				+  def unstack_data(self,data,exp,kind):
			
 
				+    print('I am unstacking data \n...')
			
 
				+    data=data.count()[exp].unstack(level=-1)
			
 
				+    self.data=data
			
 
				+    print('Data were unstacked completely\n...')
			
 
				+    return self.data
			
 
				+
			
 
				+  def plot_data(self,data):
			
 
				+    self.data=data
			
 
				+    print('I am visualizing the contribution of Top 5 Channels to the Daily Number of Users\n...')
			
 
				+    self.data['The Total'] = self.data.sum(axis=1)
			
 
				+    self.data['The Rest'] = self.data['The Total'] - self.data[['A','G','H','I','B']].sum(axis=1)
			
 
				+    self.xlim=('2017-03-01','2018-03-24')
			
 
				+    self.ax =self.data[['A','G','H','I','B','The Rest']].plot.area(xlim=self.xlim, figsize=(12,8))
			
 
				+    self.ax.set_xlabel('Date')
			
 
				+    self.ax.set_ylabel('The number of Users')
			
 
				+    plt.savefig('channel_user.png')
			
 
				+    files.download('channel_user.png') 
			
 
				+    return self.data
			
 
				+    
			
 
				+  def export_data(self,data,title):
			
 
				+    print('I am exporting data to the excel and csv files\n...')
			
 
				+    data.to_excel(title+'.xlsx')
			
 
				+    self.excel=files.download(title+'.xlsx')
			
 
				+    data.to_csv(title+'.csv')
			
 
				+    self.csv=files.download(title+'.csv')
			
 
				+    return self.excel
			
 
				+
			
 
				+# The EDA_channel_IHC class is to generate the changes in the daily IHC of Channels
			
 
				+class EDA_channel_IHC():
			
 
				+
			
 
				+  def __init__(self,transform):
			
 
				+    print('\033[1m'+'I am  doing the EDA on Conversion'+'\033[0m'+'\n...')
			
 
				+    self.data=transform.cleaned_tab[['Conv_Date','Channel','IHC_Conv']] #'Conv_Date',
			
 
				+    self.by_TimeChannel=self.group_data(self.data,'Conv_Date','Channel','IHC_Conv')
			
 
				+    self.unstacked_data=self.unstack_data(self.by_TimeChannel,'IHC_Conv')
			
 
				+    self.change_plot=self.plot_data(self.unstacked_data)
			
 
				+
			
 
				+  def plot_data(self,data):
			
 
				+    self.data=data
			
 
				+    # self.data['The Rest'] = self.data.sum(axis=1)
			
 
				+    self.xlim=('2017-03-01','2018-03-24')
			
 
				+    self.ylim=('0','550')
			
 
				+    self.ax =self.data[['A','G','H','I','B']].plot.line(xlim=self.xlim,figsize=(12,8))
			
 
				+    self.ax.set_xlabel('Date')
			
 
				+    self.ax.set_ylabel('IHC_Conv')
			
 
				+    plt.savefig('channel_IHC.png')
			
 
				+    files.download('channel_IHC.png') 
			
 
				+    return self.data
			
 
				+
			
 
				+  def group_data(self,data,target_1,target_2,exp):
			
 
				+    print('I am grouping data by '+ target_1 +' and '+ target_2 + '\n...')
			
 
				+    self.data=data.groupby([target_1,target_2])#.agg({exp:'sum'})
			
 
				+    print('I am done! ')
			
 
				+    return self.data    
			
 
				+
			
 
				+  def unstack_data(self,data,exp):
			
 
				+    print('I am unstacking data \n...')
			
 
				+    data=data.sum()[exp].unstack(level=-1)
			
 
				+    self.data=data
			
 
				+    print('Data were unstacked completely\n...')
			
 
				+    return self.data
			
 
				+
			
 
				+
			
 
				+class main():
			
 
				+  Extract=Extract(A_url,B_url)
			
 
				+  Transform=Transform(Extract)
			
 
				+  Load=Load(Transform)
			
 
				+  EDA_Overview_KPI=EDA_Overview_KPI(Transform)
			
 
				+  EDA_Static_Ren=EDA_Static_Ren(Transform)
			
 
				+  EDA_KPI_Return=EDA_KPI_Return(Transform)
			
 
				+  EDA_Static_User=EDA_Static_User(Transform)
			
 
				+  EDA_Static_Conversion=EDA_Static_Conversion(Transform)
			
 
				+  EDA_Channel_Revenue=EDA_Channel_Revenue(Transform)
			
 
				+  EDA_Channel_User=EDA_Channel_User(Transform)
			
 
				+  EDA_channel_IHC=EDA_channel_IHC(Transform)
			
 
				+ 
			
 
				+
			
 
				+if __name__=='__main__':
			
 
				+  main()
			
--- a/data/purposeCombined/BI/cube-backup.py
+++ b/data/purposeCombined/BI/cube-backup.py
@@ -0,0 +1,727 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+# pylint: disable=line-too-long,unused-argument,ungrouped-imports
			
 
				+"""A collection of ORM sqlalchemy models for Superset"""
			
 
				+import json
			
 
				+import logging
			
 
				+import textwrap
			
 
				+from contextlib import closing
			
 
				+from copy import deepcopy
			
 
				+from datetime import datetime
			
 
				+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type
			
 
				+
			
 
				+import numpy
			
 
				+import pandas as pd
			
 
				+import sqlalchemy as sqla
			
 
				+import sqlparse
			
 
				+from flask import g, request
			
 
				+from flask_appbuilder import Model
			
 
				+from sqlalchemy import (
			
 
				+    Boolean,
			
 
				+    Column,
			
 
				+    create_engine,
			
 
				+    DateTime,
			
 
				+    ForeignKey,
			
 
				+    Integer,
			
 
				+    MetaData,
			
 
				+    String,
			
 
				+    Table,
			
 
				+    Text,
			
 
				+)
			
 
				+from sqlalchemy.engine import Dialect, Engine, url
			
 
				+from sqlalchemy.engine.reflection import Inspector
			
 
				+from sqlalchemy.engine.url import make_url, URL
			
 
				+from sqlalchemy.ext.hybrid import hybrid_property
			
 
				+from sqlalchemy.orm import relationship
			
 
				+from sqlalchemy.pool import NullPool
			
 
				+from sqlalchemy.schema import UniqueConstraint
			
 
				+from sqlalchemy.sql import expression, Select
			
 
				+from sqlalchemy_utils import EncryptedType
			
 
				+
			
 
				+from superset import app, db_engine_specs, is_feature_enabled, security_manager
			
 
				+from superset.db_engine_specs.base import TimeGrain
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.helpers import AuditMixinNullable, ImportMixin
			
 
				+from superset.models.tags import DashboardUpdater, FavStarUpdater
			
 
				+from superset.utils import cache as cache_util, core as utils
			
 
				+
			
 
				+config = app.config
			
 
				+custom_password_store = config["SQLALCHEMY_CUSTOM_PASSWORD_STORE"]
			
 
				+stats_logger = config["STATS_LOGGER"]
			
 
				+log_query = config["QUERY_LOGGER"]
			
 
				+metadata = Model.metadata  # pylint: disable=no-member
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+PASSWORD_MASK = "X" * 10
			
 
				+DB_CONNECTION_MUTATOR = config["DB_CONNECTION_MUTATOR"]
			
 
				+
			
 
				+
			
 
				+class Url(Model, AuditMixinNullable):
			
 
				+    """Used for the short url feature"""
			
 
				+
			
 
				+    __tablename__ = "url"
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    url = Column(Text)
			
 
				+
			
 
				+
			
 
				+class KeyValue(Model):  # pylint: disable=too-few-public-methods
			
 
				+
			
 
				+    """Used for any type of key-value store"""
			
 
				+
			
 
				+    __tablename__ = "keyvalue"
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    value = Column(Text, nullable=False)
			
 
				+
			
 
				+
			
 
				+class CssTemplate(Model, AuditMixinNullable):
			
 
				+
			
 
				+    """CSS templates for dashboards"""
			
 
				+
			
 
				+    __tablename__ = "css_templates"
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    template_name = Column(String(250))
			
 
				+    css = Column(Text, default="")
			
 
				+
			
 
				+
			
 
				+class Database(
			
 
				+    Model, AuditMixinNullable, ImportMixin
			
 
				+):  # pylint: disable=too-many-public-methods
			
 
				+
			
 
				+    """An ORM object that stores Database related information"""
			
 
				+
			
 
				+    __tablename__ = "dbs"
			
 
				+    type = "table"
			
 
				+    __table_args__ = (UniqueConstraint("database_name"),)
			
 
				+
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    verbose_name = Column(String(250), unique=True)
			
 
				+    # short unique name, used in permissions
			
 
				+    database_name = Column(String(250), unique=True, nullable=False)
			
 
				+    sqlalchemy_uri = Column(String(1024), nullable=False)
			
 
				+    password = Column(EncryptedType(String(1024), config["SECRET_KEY"]))
			
 
				+    cache_timeout = Column(Integer)
			
 
				+    select_as_create_table_as = Column(Boolean, default=False)
			
 
				+    expose_in_sqllab = Column(Boolean, default=True)
			
 
				+    allow_run_async = Column(Boolean, default=False)
			
 
				+    allow_csv_upload = Column(Boolean, default=False)
			
 
				+    allow_ctas = Column(Boolean, default=False)
			
 
				+    allow_cvas = Column(Boolean, default=False)
			
 
				+    allow_dml = Column(Boolean, default=False)
			
 
				+    force_ctas_schema = Column(String(250))
			
 
				+    allow_multi_schema_metadata_fetch = Column(  # pylint: disable=invalid-name
			
 
				+        Boolean, default=False
			
 
				+    )
			
 
				+    extra = Column(
			
 
				+        Text,
			
 
				+        default=textwrap.dedent(
			
 
				+            """\
			
 
				+    {
			
 
				+        "metadata_params": {},
			
 
				+        "engine_params": {},
			
 
				+        "metadata_cache_timeout": {},
			
 
				+        "schemas_allowed_for_csv_upload": []
			
 
				+    }
			
 
				+    """
			
 
				+        ),
			
 
				+    )
			
 
				+    encrypted_extra = Column(EncryptedType(Text, config["SECRET_KEY"]), nullable=True)
			
 
				+    impersonate_user = Column(Boolean, default=False)
			
 
				+    server_cert = Column(EncryptedType(Text, config["SECRET_KEY"]), nullable=True)
			
 
				+    export_fields = [
			
 
				+        "database_name",
			
 
				+        "sqlalchemy_uri",
			
 
				+        "cache_timeout",
			
 
				+        "expose_in_sqllab",
			
 
				+        "allow_run_async",
			
 
				+        "allow_ctas",
			
 
				+        "allow_cvas",
			
 
				+        "allow_csv_upload",
			
 
				+        "extra",
			
 
				+    ]
			
 
				+    export_children = ["tables"]
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        return self.name
			
 
				+
			
 
				+    @property
			
 
				+    def name(self) -> str:
			
 
				+        return self.verbose_name if self.verbose_name else self.database_name
			
 
				+
			
 
				+    @property
			
 
				+    def allows_subquery(self) -> bool:
			
 
				+        return self.db_engine_spec.allows_subqueries
			
 
				+
			
 
				+    @property
			
 
				+    def function_names(self) -> List[str]:
			
 
				+        try:
			
 
				+            return self.db_engine_spec.get_function_names(self)
			
 
				+        except Exception as ex:  # pylint: disable=broad-except
			
 
				+            # function_names property is used in bulk APIs and should not hard crash
			
 
				+            # more info in: https://github.com/apache/incubator-superset/issues/9678
			
 
				+            logger.error(
			
 
				+                "Failed to fetch database function names with error: %s", str(ex)
			
 
				+            )
			
 
				+        return []
			
 
				+
			
 
				+    @property
			
 
				+    def allows_cost_estimate(self) -> bool:
			
 
				+        extra = self.get_extra()
			
 
				+
			
 
				+        database_version = extra.get("version")
			
 
				+        cost_estimate_enabled: bool = extra.get("cost_estimate_enabled")  # type: ignore
			
 
				+
			
 
				+        return (
			
 
				+            self.db_engine_spec.get_allow_cost_estimate(database_version)
			
 
				+            and cost_estimate_enabled
			
 
				+        )
			
 
				+
			
 
				+    @property
			
 
				+    def allows_virtual_table_explore(self) -> bool:
			
 
				+        extra = self.get_extra()
			
 
				+
			
 
				+        return bool(extra.get("allows_virtual_table_explore", True))
			
 
				+
			
 
				+    @property
			
 
				+    def explore_database_id(self) -> int:
			
 
				+        return self.get_extra().get("explore_database_id", self.id)
			
 
				+
			
 
				+    @property
			
 
				+    def data(self) -> Dict[str, Any]:
			
 
				+        return {
			
 
				+            "id": self.id,
			
 
				+            "name": self.database_name,
			
 
				+            "backend": self.backend,
			
 
				+            "allow_multi_schema_metadata_fetch": self.allow_multi_schema_metadata_fetch,
			
 
				+            "allows_subquery": self.allows_subquery,
			
 
				+            "allows_cost_estimate": self.allows_cost_estimate,
			
 
				+            "allows_virtual_table_explore": self.allows_virtual_table_explore,
			
 
				+            "explore_database_id": self.explore_database_id,
			
 
				+        }
			
 
				+
			
 
				+    @property
			
 
				+    def unique_name(self) -> str:
			
 
				+        return self.database_name
			
 
				+
			
 
				+    @property
			
 
				+    def url_object(self) -> URL:
			
 
				+        return make_url(self.sqlalchemy_uri_decrypted)
			
 
				+
			
 
				+    @property
			
 
				+    def backend(self) -> str:
			
 
				+        sqlalchemy_url = make_url(self.sqlalchemy_uri_decrypted)
			
 
				+        return sqlalchemy_url.get_backend_name()  # pylint: disable=no-member
			
 
				+
			
 
				+    @property
			
 
				+    def metadata_cache_timeout(self) -> Dict[str, Any]:
			
 
				+        return self.get_extra().get("metadata_cache_timeout", {})
			
 
				+
			
 
				+    @property
			
 
				+    def schema_cache_enabled(self) -> bool:
			
 
				+        return "schema_cache_timeout" in self.metadata_cache_timeout
			
 
				+
			
 
				+    @property
			
 
				+    def schema_cache_timeout(self) -> Optional[int]:
			
 
				+        return self.metadata_cache_timeout.get("schema_cache_timeout")
			
 
				+
			
 
				+    @property
			
 
				+    def table_cache_enabled(self) -> bool:
			
 
				+        return "table_cache_timeout" in self.metadata_cache_timeout
			
 
				+
			
 
				+    @property
			
 
				+    def table_cache_timeout(self) -> Optional[int]:
			
 
				+        return self.metadata_cache_timeout.get("table_cache_timeout")
			
 
				+
			
 
				+    @property
			
 
				+    def default_schemas(self) -> List[str]:
			
 
				+        return self.get_extra().get("default_schemas", [])
			
 
				+
			
 
				+    @property
			
 
				+    def connect_args(self) -> Dict[str, Any]:
			
 
				+        return self.get_extra().get("engine_params", {}).get("connect_args", {})
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_password_masked_url_from_uri(  # pylint: disable=invalid-name
			
 
				+        cls, uri: str
			
 
				+    ) -> URL:
			
 
				+        sqlalchemy_url = make_url(uri)
			
 
				+        return cls.get_password_masked_url(sqlalchemy_url)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_password_masked_url(
			
 
				+        cls, url: URL  # pylint: disable=redefined-outer-name
			
 
				+    ) -> URL:
			
 
				+        url_copy = deepcopy(url)
			
 
				+        if url_copy.password is not None:
			
 
				+            url_copy.password = PASSWORD_MASK
			
 
				+        return url_copy
			
 
				+
			
 
				+    def set_sqlalchemy_uri(self, uri: str) -> None:
			
 
				+        conn = sqla.engine.url.make_url(uri.strip())
			
 
				+        if conn.password != PASSWORD_MASK and not custom_password_store:
			
 
				+            # do not over-write the password with the password mask
			
 
				+            self.password = conn.password
			
 
				+        conn.password = PASSWORD_MASK if conn.password else None
			
 
				+        self.sqlalchemy_uri = str(conn)  # hides the password
			
 
				+
			
 
				+    def get_effective_user(
			
 
				+        self,
			
 
				+        url: URL,  # pylint: disable=redefined-outer-name
			
 
				+        user_name: Optional[str] = None,
			
 
				+    ) -> Optional[str]:
			
 
				+        """
			
 
				+        Get the effective user, especially during impersonation.
			
 
				+        :param url: SQL Alchemy URL object
			
 
				+        :param user_name: Default username
			
 
				+        :return: The effective username
			
 
				+        """
			
 
				+        effective_username = None
			
 
				+        if self.impersonate_user:
			
 
				+            effective_username = url.username
			
 
				+            if user_name:
			
 
				+                effective_username = user_name
			
 
				+            elif (
			
 
				+                hasattr(g, "user")
			
 
				+                and hasattr(g.user, "username")
			
 
				+                and g.user.username is not None
			
 
				+            ):
			
 
				+                effective_username = g.user.username
			
 
				+        return effective_username
			
 
				+
			
 
				+    @utils.memoized(watch=("impersonate_user", "sqlalchemy_uri_decrypted", "extra"))
			
 
				+    def get_sqla_engine(
			
 
				+        self,
			
 
				+        schema: Optional[str] = None,
			
 
				+        nullpool: bool = True,
			
 
				+        user_name: Optional[str] = None,
			
 
				+        source: Optional[utils.QuerySource] = None,
			
 
				+    ) -> Engine:
			
 
				+        extra = self.get_extra()
			
 
				+        sqlalchemy_url = make_url(self.sqlalchemy_uri_decrypted)
			
 
				+        self.db_engine_spec.adjust_database_uri(sqlalchemy_url, schema)
			
 
				+        effective_username = self.get_effective_user(sqlalchemy_url, user_name)
			
 
				+        # If using MySQL or Presto for example, will set url.username
			
 
				+        # If using Hive, will not do anything yet since that relies on a
			
 
				+        # configuration parameter instead.
			
 
				+        self.db_engine_spec.modify_url_for_impersonation(
			
 
				+            sqlalchemy_url, self.impersonate_user, effective_username
			
 
				+        )
			
 
				+
			
 
				+        masked_url = self.get_password_masked_url(sqlalchemy_url)
			
 
				+        logger.debug("Database.get_sqla_engine(). Masked URL: %s", str(masked_url))
			
 
				+
			
 
				+        params = extra.get("engine_params", {})
			
 
				+        if nullpool:
			
 
				+            params["poolclass"] = NullPool
			
 
				+
			
 
				+        connect_args = params.get("connect_args", {})
			
 
				+        configuration = connect_args.get("configuration", {})
			
 
				+
			
 
				+        # If using Hive, this will set hive.server2.proxy.user=$effective_username
			
 
				+        configuration.update(
			
 
				+            self.db_engine_spec.get_configuration_for_impersonation(
			
 
				+                str(sqlalchemy_url), self.impersonate_user, effective_username
			
 
				+            )
			
 
				+        )
			
 
				+        if configuration:
			
 
				+            connect_args["configuration"] = configuration
			
 
				+        if connect_args:
			
 
				+            params["connect_args"] = connect_args
			
 
				+
			
 
				+        params.update(self.get_encrypted_extra())
			
 
				+
			
 
				+        if DB_CONNECTION_MUTATOR:
			
 
				+            if not source and request and request.referrer:
			
 
				+                if "/superset/dashboard/" in request.referrer:
			
 
				+                    source = utils.QuerySource.DASHBOARD
			
 
				+                elif "/superset/explore/" in request.referrer:
			
 
				+                    source = utils.QuerySource.CHART
			
 
				+                elif "/superset/sqllab/" in request.referrer:
			
 
				+                    source = utils.QuerySource.SQL_LAB
			
 
				+
			
 
				+            sqlalchemy_url, params = DB_CONNECTION_MUTATOR(
			
 
				+                sqlalchemy_url, params, effective_username, security_manager, source
			
 
				+            )
			
 
				+
			
 
				+        return create_engine(sqlalchemy_url, **params)
			
 
				+
			
 
				+    def get_reserved_words(self) -> Set[str]:
			
 
				+        return self.get_dialect().preparer.reserved_words
			
 
				+
			
 
				+    def get_quoter(self) -> Callable[[str, Any], str]:
			
 
				+        return self.get_dialect().identifier_preparer.quote
			
 
				+
			
 
				+    def get_df(  # pylint: disable=too-many-locals
			
 
				+        self,
			
 
				+        sql: str,
			
 
				+        schema: Optional[str] = None,
			
 
				+        mutator: Optional[Callable[[pd.DataFrame], None]] = None,
			
 
				+    ) -> pd.DataFrame:
			
 
				+        sqls = [str(s).strip(" ;") for s in sqlparse.parse(sql)]
			
 
				+
			
 
				+        engine = self.get_sqla_engine(schema=schema)
			
 
				+        username = utils.get_username()
			
 
				+
			
 
				+        def needs_conversion(df_series: pd.Series) -> bool:
			
 
				+            return not df_series.empty and isinstance(df_series[0], (list, dict))
			
 
				+
			
 
				+        def _log_query(sql: str) -> None:
			
 
				+            if log_query:
			
 
				+                log_query(engine.url, sql, schema, username, __name__, security_manager)
			
 
				+
			
 
				+        with closing(engine.raw_connection()) as conn:
			
 
				+            with closing(conn.cursor()) as cursor:
			
 
				+                for sql_ in sqls[:-1]:
			
 
				+                    _log_query(sql_)
			
 
				+                    self.db_engine_spec.execute(cursor, sql_)
			
 
				+                    cursor.fetchall()
			
 
				+
			
 
				+                _log_query(sqls[-1])
			
 
				+                self.db_engine_spec.execute(cursor, sqls[-1])
			
 
				+
			
 
				+                if cursor.description is not None:
			
 
				+                    columns = [col_desc[0] for col_desc in cursor.description]
			
 
				+                else:
			
 
				+                    columns = []
			
 
				+
			
 
				+                df = pd.DataFrame.from_records(
			
 
				+                    data=list(cursor.fetchall()), columns=columns, coerce_float=True
			
 
				+                )
			
 
				+
			
 
				+                if mutator:
			
 
				+                    mutator(df)
			
 
				+
			
 
				+                for k, v in df.dtypes.items():
			
 
				+                    if v.type == numpy.object_ and needs_conversion(df[k]):
			
 
				+                        df[k] = df[k].apply(utils.json_dumps_w_dates)
			
 
				+                return df
			
 
				+
			
 
				+    def compile_sqla_query(self, qry: Select, schema: Optional[str] = None) -> str:
			
 
				+        engine = self.get_sqla_engine(schema=schema)
			
 
				+
			
 
				+        sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True}))
			
 
				+
			
 
				+        if (
			
 
				+            engine.dialect.identifier_preparer._double_percents  # pylint: disable=protected-access
			
 
				+        ):
			
 
				+            sql = sql.replace("%%", "%")
			
 
				+
			
 
				+        return sql
			
 
				+
			
 
				+    def select_star(  # pylint: disable=too-many-arguments
			
 
				+        self,
			
 
				+        table_name: str,
			
 
				+        schema: Optional[str] = None,
			
 
				+        limit: int = 100,
			
 
				+        show_cols: bool = False,
			
 
				+        indent: bool = True,
			
 
				+        latest_partition: bool = False,
			
 
				+        cols: Optional[List[Dict[str, Any]]] = None,
			
 
				+    ) -> str:
			
 
				+        """Generates a ``select *`` statement in the proper dialect"""
			
 
				+        eng = self.get_sqla_engine(schema=schema, source=utils.QuerySource.SQL_LAB)
			
 
				+        return self.db_engine_spec.select_star(
			
 
				+            self,
			
 
				+            table_name,
			
 
				+            schema=schema,
			
 
				+            engine=eng,
			
 
				+            limit=limit,
			
 
				+            show_cols=show_cols,
			
 
				+            indent=indent,
			
 
				+            latest_partition=latest_partition,
			
 
				+            cols=cols,
			
 
				+        )
			
 
				+
			
 
				+    def apply_limit_to_sql(self, sql: str, limit: int = 1000) -> str:
			
 
				+        return self.db_engine_spec.apply_limit_to_sql(sql, limit, self)
			
 
				+
			
 
				+    def safe_sqlalchemy_uri(self) -> str:
			
 
				+        return self.sqlalchemy_uri
			
 
				+
			
 
				+    @property
			
 
				+    def inspector(self) -> Inspector:
			
 
				+        engine = self.get_sqla_engine()
			
 
				+        return sqla.inspect(engine)
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key= "db:{}:schema:None:table_list",
			
 
				+        attribute_in_key="id",
			
 
				+    )
			
 
				+    def get_all_table_names_in_database(
			
 
				+        self,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[bool] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments."""
			
 
				+        if not self.allow_multi_schema_metadata_fetch:
			
 
				+            return []
			
 
				+        return self.db_engine_spec.get_all_datasource_names(self, "table")
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key= "db:{}:schema:None:view_list", attribute_in_key="id"
			
 
				+    )
			
 
				+    def get_all_view_names_in_database(
			
 
				+        self,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[bool] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments."""
			
 
				+        if not self.allow_multi_schema_metadata_fetch:
			
 
				+            return []
			
 
				+        return self.db_engine_spec.get_all_datasource_names(self, "view")
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key= f"db:{{}}:schema:{kwargs.get('schema')}:table_list",  # type: ignore
			
 
				+        attribute_in_key="id",
			
 
				+    )
			
 
				+    def get_all_table_names_in_schema(
			
 
				+        self,
			
 
				+        schema: str,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[int] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments.
			
 
				+
			
 
				+        For unused parameters, they are referenced in
			
 
				+        cache_util.memoized_func decorator.
			
 
				+
			
 
				+        :param schema: schema name
			
 
				+        :param cache: whether cache is enabled for the function
			
 
				+        :param cache_timeout: timeout in seconds for the cache
			
 
				+        :param force: whether to force refresh the cache
			
 
				+        :return: list of tables
			
 
				+        """
			
 
				+        try:
			
 
				+            tables = self.db_engine_spec.get_table_names(
			
 
				+                database=self, inspector=self.inspector, schema=schema
			
 
				+            )
			
 
				+            return [
			
 
				+                utils.DatasourceName(table=table, schema=schema) for table in tables
			
 
				+            ]
			
 
				+        except Exception as ex:  # pylint: disable=broad-except
			
 
				+            logger.exception(ex)
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key= f"db:{{}}:schema:{kwargs.get('schema')}:view_list",  # type: ignore
			
 
				+        attribute_in_key="id",
			
 
				+    )
			
 
				+    def get_all_view_names_in_schema(
			
 
				+        self,
			
 
				+        schema: str,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[int] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments.
			
 
				+
			
 
				+        For unused parameters, they are referenced in
			
 
				+        cache_util.memoized_func decorator.
			
 
				+
			
 
				+        :param schema: schema name
			
 
				+        :param cache: whether cache is enabled for the function
			
 
				+        :param cache_timeout: timeout in seconds for the cache
			
 
				+        :param force: whether to force refresh the cache
			
 
				+        :return: list of views
			
 
				+        """
			
 
				+        try:
			
 
				+            views = self.db_engine_spec.get_view_names(
			
 
				+                database=self, inspector=self.inspector, schema=schema
			
 
				+            )
			
 
				+            return [utils.DatasourceName(table=view, schema=schema) for view in views]
			
 
				+        except Exception as ex:  # pylint: disable=broad-except
			
 
				+            logger.exception(ex)
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key= "db:{}:schema_list", attribute_in_key="id"
			
 
				+    )
			
 
				+    def get_all_schema_names(
			
 
				+        self,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[int] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[str]:
			
 
				+        """Parameters need to be passed as keyword arguments.
			
 
				+
			
 
				+        For unused parameters, they are referenced in
			
 
				+        cache_util.memoized_func decorator.
			
 
				+
			
 
				+        :param cache: whether cache is enabled for the function
			
 
				+        :param cache_timeout: timeout in seconds for the cache
			
 
				+        :param force: whether to force refresh the cache
			
 
				+        :return: schema list
			
 
				+        """
			
 
				+        return self.db_engine_spec.get_schema_names(self.inspector)
			
 
				+
			
 
				+    @property
			
 
				+    def db_engine_spec(self) -> Type[db_engine_specs.BaseEngineSpec]:
			
 
				+        return db_engine_specs.engines.get(self.backend, db_engine_specs.BaseEngineSpec)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_db_engine_spec_for_backend(
			
 
				+        cls, backend: str
			
 
				+    ) -> Type[db_engine_specs.BaseEngineSpec]:
			
 
				+        return db_engine_specs.engines.get(backend, db_engine_specs.BaseEngineSpec)
			
 
				+
			
 
				+    def grains(self) -> Tuple[TimeGrain, ...]:
			
 
				+        """Defines time granularity database-specific expressions.
			
 
				+
			
 
				+        The idea here is to make it easy for users to change the time grain
			
 
				+        from a datetime (maybe the source grain is arbitrary timestamps, daily
			
 
				+        or 5 minutes increments) to another, "truncated" datetime. Since
			
 
				+        each database has slightly different but similar datetime functions,
			
 
				+        this allows a mapping between database engines and actual functions.
			
 
				+        """
			
 
				+        return self.db_engine_spec.get_time_grains()
			
 
				+
			
 
				+    def get_extra(self) -> Dict[str, Any]:
			
 
				+        return self.db_engine_spec.get_extra_params(self)
			
 
				+
			
 
				+    def get_encrypted_extra(self) -> Dict[str, Any]:
			
 
				+        encrypted_extra = {}
			
 
				+        if self.encrypted_extra:
			
 
				+            try:
			
 
				+                encrypted_extra = json.loads(self.encrypted_extra)
			
 
				+            except json.JSONDecodeError as ex:
			
 
				+                logger.error(ex)
			
 
				+                raise ex
			
 
				+        return encrypted_extra
			
 
				+
			
 
				+    def get_table(self, table_name: str, schema: Optional[str] = None) -> Table:
			
 
				+        extra = self.get_extra()
			
 
				+        meta = MetaData(**extra.get("metadata_params", {}))
			
 
				+        return Table(
			
 
				+            table_name,
			
 
				+            meta,
			
 
				+            schema=schema or None,
			
 
				+            autoload=True,
			
 
				+            autoload_with=self.get_sqla_engine(),
			
 
				+        )
			
 
				+
			
 
				+    def get_columns(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        return self.db_engine_spec.get_columns(self.inspector, table_name, schema)
			
 
				+
			
 
				+    def get_indexes(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        return self.inspector.get_indexes(table_name, schema)
			
 
				+
			
 
				+    def get_pk_constraint(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        return self.inspector.get_pk_constraint(table_name, schema)
			
 
				+
			
 
				+    def get_foreign_keys(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        return self.inspector.get_foreign_keys(table_name, schema)
			
 
				+
			
 
				+    def get_schema_access_for_csv_upload(  # pylint: disable=invalid-name
			
 
				+        self,
			
 
				+    ) -> List[str]:
			
 
				+        allowed_databases = self.get_extra().get("schemas_allowed_for_csv_upload", [])
			
 
				+        if hasattr(g, "user"):
			
 
				+            extra_allowed_databases = config["ALLOWED_USER_CSV_SCHEMA_FUNC"](
			
 
				+                self, g.user
			
 
				+            )
			
 
				+            allowed_databases += extra_allowed_databases
			
 
				+        return sorted(set(allowed_databases))
			
 
				+
			
 
				+    @property
			
 
				+    def sqlalchemy_uri_decrypted(self) -> str:
			
 
				+        conn = sqla.engine.url.make_url(self.sqlalchemy_uri)
			
 
				+        if custom_password_store:
			
 
				+            conn.password = custom_password_store(conn)
			
 
				+        else:
			
 
				+            conn.password = self.password
			
 
				+        return str(conn)
			
 
				+
			
 
				+    @property
			
 
				+    def sql_url(self) -> str:
			
 
				+        return f"/superset/sql/{self.id}/"
			
 
				+
			
 
				+    @hybrid_property
			
 
				+    def perm(self) -> str:
			
 
				+        return f"[{self.database_name}].(id:{self.id})"
			
 
				+
			
 
				+    @perm.expression  # type: ignore
			
 
				+    def perm(cls) -> str:  # pylint: disable=no-self-argument
			
 
				+        return (
			
 
				+            "[" + cls.database_name + "].(id:" + expression.cast(cls.id, String) + ")"
			
 
				+        )
			
 
				+
			
 
				+    def get_perm(self) -> str:
			
 
				+        return self.perm  # type: ignore
			
 
				+
			
 
				+    def has_table(self, table: Table) -> bool:
			
 
				+        engine = self.get_sqla_engine()
			
 
				+        return engine.has_table(table.table_name, table.schema or None)
			
 
				+
			
 
				+    def has_table_by_name(self, table_name: str, schema: Optional[str] = None) -> bool:
			
 
				+        engine = self.get_sqla_engine()
			
 
				+        return engine.has_table(table_name, schema)
			
 
				+
			
 
				+    @utils.memoized
			
 
				+    def get_dialect(self) -> Dialect:
			
 
				+        sqla_url = url.make_url(self.sqlalchemy_uri_decrypted)
			
 
				+        return sqla_url.get_dialect()()  # pylint: disable=no-member
			
 
				+
			
 
				+
			
 
				+sqla.event.listen(Database, "after_insert", security_manager.set_perm)
			
 
				+sqla.event.listen(Database, "after_update", security_manager.set_perm)
			
 
				+
			
 
				+
			
 
				+class Log(Model):  # pylint: disable=too-few-public-methods
			
 
				+
			
 
				+    """ORM object used to log Superset actions to the database"""
			
 
				+
			
 
				+    __tablename__ = "logs"
			
 
				+
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    action = Column(String(512))
			
 
				+    user_id = Column(Integer, ForeignKey("ab_user.id"))
			
 
				+    dashboard_id = Column(Integer)
			
 
				+    slice_id = Column(Integer)
			
 
				+    json = Column(Text)
			
 
				+    user = relationship(
			
 
				+        security_manager.user_model, backref="logs", foreign_keys=[user_id]
			
 
				+    )
			
 
				+    dttm = Column(DateTime, default=datetime.utcnow)
			
 
				+    duration_ms = Column(Integer)
			
 
				+    referrer = Column(String(1024))
			
 
				+
			
 
				+
			
 
				+class FavStar(Model):  # pylint: disable=too-few-public-methods
			
 
				+    __tablename__ = "favstar"
			
 
				+
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    user_id = Column(Integer, ForeignKey("ab_user.id"))
			
 
				+    class_name = Column(String(50))
			
 
				+    obj_id = Column(Integer)
			
 
				+    dttm = Column(DateTime, default=datetime.utcnow)
			
 
				+
			
 
				+
			
 
				+# events for updating tags
			
 
				+if is_feature_enabled("TAGGING_SYSTEM"):
			
 
				+    sqla.event.listen(Dashboard, "after_insert", DashboardUpdater.after_insert)
			
 
				+    sqla.event.listen(Dashboard, "after_update", DashboardUpdater.after_update)
			
 
				+    sqla.event.listen(Dashboard, "after_delete", DashboardUpdater.after_delete)
			
 
				+    sqla.event.listen(FavStar, "after_insert", FavStarUpdater.after_insert)
			
 
				+    sqla.event.listen(FavStar, "after_delete", FavStarUpdater.after_delete)
			
--- a/data/purposeCombined/BI/cube.py
+++ b/data/purposeCombined/BI/cube.py
@@ -0,0 +1,727 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+# pylint: disable=line-too-long,unused-argument,ungrouped-imports
			
 
				+"""A collection of ORM sqlalchemy models for Superset"""
			
 
				+import json
			
 
				+import logging
			
 
				+import textwrap
			
 
				+from contextlib import closing
			
 
				+from copy import deepcopy
			
 
				+from datetime import datetime
			
 
				+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type
			
 
				+
			
 
				+import numpy
			
 
				+import pandas as pd
			
 
				+import sqlalchemy as sqla
			
 
				+import sqlparse
			
 
				+from flask import g, request
			
 
				+from flask_appbuilder import Model
			
 
				+from sqlalchemy import (
			
 
				+    Boolean,
			
 
				+    Column,
			
 
				+    create_engine,
			
 
				+    DateTime,
			
 
				+    ForeignKey,
			
 
				+    Integer,
			
 
				+    MetaData,
			
 
				+    String,
			
 
				+    Table,
			
 
				+    Text,
			
 
				+)
			
 
				+from sqlalchemy.engine import Dialect, Engine, url
			
 
				+from sqlalchemy.engine.reflection import Inspector
			
 
				+from sqlalchemy.engine.url import make_url, URL
			
 
				+from sqlalchemy.ext.hybrid import hybrid_property
			
 
				+from sqlalchemy.orm import relationship
			
 
				+from sqlalchemy.pool import NullPool
			
 
				+from sqlalchemy.schema import UniqueConstraint
			
 
				+from sqlalchemy.sql import expression, Select
			
 
				+from sqlalchemy_utils import EncryptedType
			
 
				+
			
 
				+from superset import app, db_engine_specs, is_feature_enabled, security_manager
			
 
				+from superset.db_engine_specs.base import TimeGrain
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.helpers import AuditMixinNullable, ImportMixin
			
 
				+from superset.models.tags import DashboardUpdater, FavStarUpdater
			
 
				+from superset.utils import cache as cache_util, core as utils
			
 
				+
			
 
				+config = app.config
			
 
				+custom_password_store = config["SQLALCHEMY_CUSTOM_PASSWORD_STORE"]
			
 
				+stats_logger = config["STATS_LOGGER"]
			
 
				+log_query = config["QUERY_LOGGER"]
			
 
				+metadata = Model.metadata  # pylint: disable=no-member
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+PASSWORD_MASK = "X" * 10
			
 
				+DB_CONNECTION_MUTATOR = config["DB_CONNECTION_MUTATOR"]
			
 
				+
			
 
				+
			
 
				+class Url(Model, AuditMixinNullable):
			
 
				+    """Used for the short url feature"""
			
 
				+
			
 
				+    __tablename__ = "url"
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    url = Column(Text)
			
 
				+
			
 
				+
			
 
				+class KeyValue(Model):  # pylint: disable=too-few-public-methods
			
 
				+
			
 
				+    """Used for any type of key-value store"""
			
 
				+
			
 
				+    __tablename__ = "keyvalue"
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    value = Column(Text, nullable=False)
			
 
				+
			
 
				+
			
 
				+class CssTemplate(Model, AuditMixinNullable):
			
 
				+
			
 
				+    """CSS templates for dashboards"""
			
 
				+
			
 
				+    __tablename__ = "css_templates"
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    template_name = Column(String(250))
			
 
				+    css = Column(Text, default="")
			
 
				+
			
 
				+
			
 
				+class Database(
			
 
				+    Model, AuditMixinNullable, ImportMixin
			
 
				+):  # pylint: disable=too-many-public-methods
			
 
				+
			
 
				+    """An ORM object that stores Database related information"""
			
 
				+
			
 
				+    __tablename__ = "dbs"
			
 
				+    type = "table"
			
 
				+    __table_args__ = (UniqueConstraint("database_name"),)
			
 
				+
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    verbose_name = Column(String(250), unique=True)
			
 
				+    # short unique name, used in permissions
			
 
				+    database_name = Column(String(250), unique=True, nullable=False)
			
 
				+    sqlalchemy_uri = Column(String(1024), nullable=False)
			
 
				+    password = Column(EncryptedType(String(1024), config["SECRET_KEY"]))
			
 
				+    cache_timeout = Column(Integer)
			
 
				+    select_as_create_table_as = Column(Boolean, default=False)
			
 
				+    expose_in_sqllab = Column(Boolean, default=True)
			
 
				+    allow_run_async = Column(Boolean, default=False)
			
 
				+    allow_csv_upload = Column(Boolean, default=False)
			
 
				+    allow_ctas = Column(Boolean, default=False)
			
 
				+    allow_cvas = Column(Boolean, default=False)
			
 
				+    allow_dml = Column(Boolean, default=False)
			
 
				+    force_ctas_schema = Column(String(250))
			
 
				+    allow_multi_schema_metadata_fetch = Column(  # pylint: disable=invalid-name
			
 
				+        Boolean, default=False
			
 
				+    )
			
 
				+    extra = Column(
			
 
				+        Text,
			
 
				+        default=textwrap.dedent(
			
 
				+            """\
			
 
				+    {
			
 
				+        "metadata_params": {},
			
 
				+        "engine_params": {},
			
 
				+        "metadata_cache_timeout": {},
			
 
				+        "schemas_allowed_for_csv_upload": []
			
 
				+    }
			
 
				+    """
			
 
				+        ),
			
 
				+    )
			
 
				+    encrypted_extra = Column(EncryptedType(Text, config["SECRET_KEY"]), nullable=True)
			
 
				+    impersonate_user = Column(Boolean, default=False)
			
 
				+    server_cert = Column(EncryptedType(Text, config["SECRET_KEY"]), nullable=True)
			
 
				+    export_fields = [
			
 
				+        "database_name",
			
 
				+        "sqlalchemy_uri",
			
 
				+        "cache_timeout",
			
 
				+        "expose_in_sqllab",
			
 
				+        "allow_run_async",
			
 
				+        "allow_ctas",
			
 
				+        "allow_cvas",
			
 
				+        "allow_csv_upload",
			
 
				+        "extra",
			
 
				+    ]
			
 
				+    export_children = ["tables"]
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        return self.name
			
 
				+
			
 
				+    @property
			
 
				+    def name(self) -> str:
			
 
				+        return self.verbose_name if self.verbose_name else self.database_name
			
 
				+
			
 
				+    @property
			
 
				+    def allows_subquery(self) -> bool:
			
 
				+        return self.db_engine_spec.allows_subqueries
			
 
				+
			
 
				+    @property
			
 
				+    def function_names(self) -> List[str]:
			
 
				+        try:
			
 
				+            return self.db_engine_spec.get_function_names(self)
			
 
				+        except Exception as ex:  # pylint: disable=broad-except
			
 
				+            # function_names property is used in bulk APIs and should not hard crash
			
 
				+            # more info in: https://github.com/apache/incubator-superset/issues/9678
			
 
				+            logger.error(
			
 
				+                "Failed to fetch database function names with error: %s", str(ex)
			
 
				+            )
			
 
				+        return []
			
 
				+
			
 
				+    @property
			
 
				+    def allows_cost_estimate(self) -> bool:
			
 
				+        extra = self.get_extra()
			
 
				+
			
 
				+        database_version = extra.get("version")
			
 
				+        cost_estimate_enabled: bool = extra.get("cost_estimate_enabled")  # type: ignore
			
 
				+
			
 
				+        return (
			
 
				+            self.db_engine_spec.get_allow_cost_estimate(database_version)
			
 
				+            and cost_estimate_enabled
			
 
				+        )
			
 
				+
			
 
				+    @property
			
 
				+    def allows_virtual_table_explore(self) -> bool:
			
 
				+        extra = self.get_extra()
			
 
				+
			
 
				+        return bool(extra.get("allows_virtual_table_explore", True))
			
 
				+
			
 
				+    @property
			
 
				+    def explore_database_id(self) -> int:
			
 
				+        return self.get_extra().get("explore_database_id", self.id)
			
 
				+
			
 
				+    @property
			
 
				+    def data(self) -> Dict[str, Any]:
			
 
				+        return {
			
 
				+            "id": self.id,
			
 
				+            "name": self.database_name,
			
 
				+            "backend": self.backend,
			
 
				+            "allow_multi_schema_metadata_fetch": self.allow_multi_schema_metadata_fetch,
			
 
				+            "allows_subquery": self.allows_subquery,
			
 
				+            "allows_cost_estimate": self.allows_cost_estimate,
			
 
				+            "allows_virtual_table_explore": self.allows_virtual_table_explore,
			
 
				+            "explore_database_id": self.explore_database_id,
			
 
				+        }
			
 
				+
			
 
				+    @property
			
 
				+    def unique_name(self) -> str:
			
 
				+        return self.database_name
			
 
				+
			
 
				+    @property
			
 
				+    def url_object(self) -> URL:
			
 
				+        return make_url(self.sqlalchemy_uri_decrypted)
			
 
				+
			
 
				+    @property
			
 
				+    def backend(self) -> str:
			
 
				+        sqlalchemy_url = make_url(self.sqlalchemy_uri_decrypted)
			
 
				+        return sqlalchemy_url.get_backend_name()  # pylint: disable=no-member
			
 
				+
			
 
				+    @property
			
 
				+    def metadata_cache_timeout(self) -> Dict[str, Any]:
			
 
				+        return self.get_extra().get("metadata_cache_timeout", {})
			
 
				+
			
 
				+    @property
			
 
				+    def schema_cache_enabled(self) -> bool:
			
 
				+        return "schema_cache_timeout" in self.metadata_cache_timeout
			
 
				+
			
 
				+    @property
			
 
				+    def schema_cache_timeout(self) -> Optional[int]:
			
 
				+        return self.metadata_cache_timeout.get("schema_cache_timeout")
			
 
				+
			
 
				+    @property
			
 
				+    def table_cache_enabled(self) -> bool:
			
 
				+        return "table_cache_timeout" in self.metadata_cache_timeout
			
 
				+
			
 
				+    @property
			
 
				+    def table_cache_timeout(self) -> Optional[int]:
			
 
				+        return self.metadata_cache_timeout.get("table_cache_timeout")
			
 
				+
			
 
				+    @property
			
 
				+    def default_schemas(self) -> List[str]:
			
 
				+        return self.get_extra().get("default_schemas", [])
			
 
				+
			
 
				+    @property
			
 
				+    def connect_args(self) -> Dict[str, Any]:
			
 
				+        return self.get_extra().get("engine_params", {}).get("connect_args", {})
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_password_masked_url_from_uri(  # pylint: disable=invalid-name
			
 
				+        cls, uri: str
			
 
				+    ) -> URL:
			
 
				+        sqlalchemy_url = make_url(uri)
			
 
				+        return cls.get_password_masked_url(sqlalchemy_url)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_password_masked_url(
			
 
				+        cls, url: URL  # pylint: disable=redefined-outer-name
			
 
				+    ) -> URL:
			
 
				+        url_copy = deepcopy(url)
			
 
				+        if url_copy.password is not None:
			
 
				+            url_copy.password = PASSWORD_MASK
			
 
				+        return url_copy
			
 
				+
			
 
				+    def set_sqlalchemy_uri(self, uri: str) -> None:
			
 
				+        conn = sqla.engine.url.make_url(uri.strip())
			
 
				+        if conn.password != PASSWORD_MASK and not custom_password_store:
			
 
				+            # do not over-write the password with the password mask
			
 
				+            self.password = conn.password
			
 
				+        conn.password = PASSWORD_MASK if conn.password else None
			
 
				+        self.sqlalchemy_uri = str(conn)  # hides the password
			
 
				+
			
 
				+    def get_effective_user(
			
 
				+        self,
			
 
				+        url: URL,  # pylint: disable=redefined-outer-name
			
 
				+        user_name: Optional[str] = None,
			
 
				+    ) -> Optional[str]:
			
 
				+        """
			
 
				+        Get the effective user, especially during impersonation.
			
 
				+        :param url: SQL Alchemy URL object
			
 
				+        :param user_name: Default username
			
 
				+        :return: The effective username
			
 
				+        """
			
 
				+        effective_username = None
			
 
				+        if self.impersonate_user:
			
 
				+            effective_username = url.username
			
 
				+            if user_name:
			
 
				+                effective_username = user_name
			
 
				+            elif (
			
 
				+                hasattr(g, "user")
			
 
				+                and hasattr(g.user, "username")
			
 
				+                and g.user.username is not None
			
 
				+            ):
			
 
				+                effective_username = g.user.username
			
 
				+        return effective_username
			
 
				+
			
 
				+    @utils.memoized(watch=("impersonate_user", "sqlalchemy_uri_decrypted", "extra"))
			
 
				+    def get_sqla_engine(
			
 
				+        self,
			
 
				+        schema: Optional[str] = None,
			
 
				+        nullpool: bool = True,
			
 
				+        user_name: Optional[str] = None,
			
 
				+        source: Optional[utils.QuerySource] = None,
			
 
				+    ) -> Engine:
			
 
				+        extra = self.get_extra()
			
 
				+        sqlalchemy_url = make_url(self.sqlalchemy_uri_decrypted)
			
 
				+        self.db_engine_spec.adjust_database_uri(sqlalchemy_url, schema)
			
 
				+        effective_username = self.get_effective_user(sqlalchemy_url, user_name)
			
 
				+        # If using MySQL or Presto for example, will set url.username
			
 
				+        # If using Hive, will not do anything yet since that relies on a
			
 
				+        # configuration parameter instead.
			
 
				+        self.db_engine_spec.modify_url_for_impersonation(
			
 
				+            sqlalchemy_url, self.impersonate_user, effective_username
			
 
				+        )
			
 
				+
			
 
				+        masked_url = self.get_password_masked_url(sqlalchemy_url)
			
 
				+        logger.debug("Database.get_sqla_engine(). Masked URL: %s", str(masked_url))
			
 
				+
			
 
				+        params = extra.get("engine_params", {})
			
 
				+        if nullpool:
			
 
				+            params["poolclass"] = NullPool
			
 
				+
			
 
				+        connect_args = params.get("connect_args", {})
			
 
				+        configuration = connect_args.get("configuration", {})
			
 
				+
			
 
				+        # If using Hive, this will set hive.server2.proxy.user=$effective_username
			
 
				+        configuration.update(
			
 
				+            self.db_engine_spec.get_configuration_for_impersonation(
			
 
				+                str(sqlalchemy_url), self.impersonate_user, effective_username
			
 
				+            )
			
 
				+        )
			
 
				+        if configuration:
			
 
				+            connect_args["configuration"] = configuration
			
 
				+        if connect_args:
			
 
				+            params["connect_args"] = connect_args
			
 
				+
			
 
				+        params.update(self.get_encrypted_extra())
			
 
				+
			
 
				+        if DB_CONNECTION_MUTATOR:
			
 
				+            if not source and request and request.referrer:
			
 
				+                if "/superset/dashboard/" in request.referrer:
			
 
				+                    source = utils.QuerySource.DASHBOARD
			
 
				+                elif "/superset/explore/" in request.referrer:
			
 
				+                    source = utils.QuerySource.CHART
			
 
				+                elif "/superset/sqllab/" in request.referrer:
			
 
				+                    source = utils.QuerySource.SQL_LAB
			
 
				+
			
 
				+            sqlalchemy_url, params = DB_CONNECTION_MUTATOR(
			
 
				+                sqlalchemy_url, params, effective_username, security_manager, source
			
 
				+            )
			
 
				+
			
 
				+        return create_engine(sqlalchemy_url, **params)
			
 
				+
			
 
				+    def get_reserved_words(self) -> Set[str]:
			
 
				+        return self.get_dialect().preparer.reserved_words
			
 
				+
			
 
				+    def get_quoter(self) -> Callable[[str, Any], str]:
			
 
				+        return self.get_dialect().identifier_preparer.quote
			
 
				+
			
 
				+    def get_df(  # pylint: disable=too-many-locals
			
 
				+        self,
			
 
				+        sql: str,
			
 
				+        schema: Optional[str] = None,
			
 
				+        mutator: Optional[Callable[[pd.DataFrame], None]] = None,
			
 
				+    ) -> pd.DataFrame:
			
 
				+        sqls = [str(s).strip(" ;") for s in sqlparse.parse(sql)]
			
 
				+
			
 
				+        engine = self.get_sqla_engine(schema=schema)
			
 
				+        username = utils.get_username()
			
 
				+
			
 
				+        def needs_conversion(df_series: pd.Series) -> bool:
			
 
				+            return not df_series.empty and isinstance(df_series[0], (list, dict))
			
 
				+
			
 
				+        def _log_query(sql: str) -> None:
			
 
				+            if log_query:
			
 
				+                log_query(engine.url, sql, schema, username, __name__, security_manager)
			
 
				+
			
 
				+        with closing(engine.raw_connection()) as conn:
			
 
				+            with closing(conn.cursor()) as cursor:
			
 
				+                for sql_ in sqls[:-1]:
			
 
				+                    _log_query(sql_)
			
 
				+                    self.db_engine_spec.execute(cursor, sql_)
			
 
				+                    cursor.fetchall()
			
 
				+
			
 
				+                _log_query(sqls[-1])
			
 
				+                self.db_engine_spec.execute(cursor, sqls[-1])
			
 
				+
			
 
				+                if cursor.description is not None:
			
 
				+                    columns = [col_desc[0] for col_desc in cursor.description]
			
 
				+                else:
			
 
				+                    columns = []
			
 
				+
			
 
				+                df = pd.DataFrame.from_records(
			
 
				+                    data=list(cursor.fetchall()), columns=columns, coerce_float=True
			
 
				+                )
			
 
				+
			
 
				+                if mutator:
			
 
				+                    mutator(df)
			
 
				+
			
 
				+                for k, v in df.dtypes.items():
			
 
				+                    if v.type == numpy.object_ and needs_conversion(df[k]):
			
 
				+                        df[k] = df[k].apply(utils.json_dumps_w_dates)
			
 
				+                return df
			
 
				+
			
 
				+    def compile_sqla_query(self, qry: Select, schema: Optional[str] = None) -> str:
			
 
				+        engine = self.get_sqla_engine(schema=schema)
			
 
				+
			
 
				+        sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True}))
			
 
				+
			
 
				+        if (
			
 
				+            engine.dialect.identifier_preparer._double_percents  # pylint: disable=protected-access
			
 
				+        ):
			
 
				+            sql = sql.replace("%%", "%")
			
 
				+
			
 
				+        return sql
			
 
				+
			
 
				+    def select_star(  # pylint: disable=too-many-arguments
			
 
				+        self,
			
 
				+        table_name: str,
			
 
				+        schema: Optional[str] = None,
			
 
				+        limit: int = 100,
			
 
				+        show_cols: bool = False,
			
 
				+        indent: bool = True,
			
 
				+        latest_partition: bool = False,
			
 
				+        cols: Optional[List[Dict[str, Any]]] = None,
			
 
				+    ) -> str:
			
 
				+        """Generates a ``select *`` statement in the proper dialect"""
			
 
				+        eng = self.get_sqla_engine(schema=schema, source=utils.QuerySource.SQL_LAB)
			
 
				+        return self.db_engine_spec.select_star(
			
 
				+            self,
			
 
				+            table_name,
			
 
				+            schema=schema,
			
 
				+            engine=eng,
			
 
				+            limit=limit,
			
 
				+            show_cols=show_cols,
			
 
				+            indent=indent,
			
 
				+            latest_partition=latest_partition,
			
 
				+            cols=cols,
			
 
				+        )
			
 
				+
			
 
				+    def apply_limit_to_sql(self, sql: str, limit: int = 1000) -> str:
			
 
				+        return self.db_engine_spec.apply_limit_to_sql(sql, limit, self)
			
 
				+
			
 
				+    def safe_sqlalchemy_uri(self) -> str:
			
 
				+        return self.sqlalchemy_uri
			
 
				+
			
 
				+    @property
			
 
				+    def inspector(self) -> Inspector:
			
 
				+        engine = self.get_sqla_engine()
			
 
				+        return sqla.inspect(engine)
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key=lambda *args, **kwargs: "db:{}:schema:None:table_list",
			
 
				+        attribute_in_key="id",
			
 
				+    )
			
 
				+    def get_all_table_names_in_database(
			
 
				+        self,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[bool] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments."""
			
 
				+        if not self.allow_multi_schema_metadata_fetch:
			
 
				+            return []
			
 
				+        return self.db_engine_spec.get_all_datasource_names(self, "table")
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key=lambda *args, **kwargs: "db:{}:schema:None:view_list", attribute_in_key="id"
			
 
				+    )
			
 
				+    def get_all_view_names_in_database(
			
 
				+        self,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[bool] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments."""
			
 
				+        if not self.allow_multi_schema_metadata_fetch:
			
 
				+            return []
			
 
				+        return self.db_engine_spec.get_all_datasource_names(self, "view")
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key=lambda *args, **kwargs: f"db:{{}}:schema:{kwargs.get('schema')}:table_list",  # type: ignore
			
 
				+        attribute_in_key="id",
			
 
				+    )
			
 
				+    def get_all_table_names_in_schema(
			
 
				+        self,
			
 
				+        schema: str,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[int] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments.
			
 
				+
			
 
				+        For unused parameters, they are referenced in
			
 
				+        cache_util.memoized_func decorator.
			
 
				+
			
 
				+        :param schema: schema name
			
 
				+        :param cache: whether cache is enabled for the function
			
 
				+        :param cache_timeout: timeout in seconds for the cache
			
 
				+        :param force: whether to force refresh the cache
			
 
				+        :return: list of tables
			
 
				+        """
			
 
				+        try:
			
 
				+            tables = self.db_engine_spec.get_table_names(
			
 
				+                database=self, inspector=self.inspector, schema=schema
			
 
				+            )
			
 
				+            return [
			
 
				+                utils.DatasourceName(table=table, schema=schema) for table in tables
			
 
				+            ]
			
 
				+        except Exception as ex:  # pylint: disable=broad-except
			
 
				+            logger.exception(ex)
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key=lambda *args, **kwargs: f"db:{{}}:schema:{kwargs.get('schema')}:view_list",  # type: ignore
			
 
				+        attribute_in_key="id",
			
 
				+    )
			
 
				+    def get_all_view_names_in_schema(
			
 
				+        self,
			
 
				+        schema: str,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[int] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[utils.DatasourceName]:
			
 
				+        """Parameters need to be passed as keyword arguments.
			
 
				+
			
 
				+        For unused parameters, they are referenced in
			
 
				+        cache_util.memoized_func decorator.
			
 
				+
			
 
				+        :param schema: schema name
			
 
				+        :param cache: whether cache is enabled for the function
			
 
				+        :param cache_timeout: timeout in seconds for the cache
			
 
				+        :param force: whether to force refresh the cache
			
 
				+        :return: list of views
			
 
				+        """
			
 
				+        try:
			
 
				+            views = self.db_engine_spec.get_view_names(
			
 
				+                database=self, inspector=self.inspector, schema=schema
			
 
				+            )
			
 
				+            return [utils.DatasourceName(table=view, schema=schema) for view in views]
			
 
				+        except Exception as ex:  # pylint: disable=broad-except
			
 
				+            logger.exception(ex)
			
 
				+
			
 
				+    @cache_util.memoized_func(
			
 
				+        key=lambda *args, **kwargs: "db:{}:schema_list", attribute_in_key="id"
			
 
				+    )
			
 
				+    def get_all_schema_names(
			
 
				+        self,
			
 
				+        cache: bool = False,
			
 
				+        cache_timeout: Optional[int] = None,
			
 
				+        force: bool = False,
			
 
				+    ) -> List[str]:
			
 
				+        """Parameters need to be passed as keyword arguments.
			
 
				+
			
 
				+        For unused parameters, they are referenced in
			
 
				+        cache_util.memoized_func decorator.
			
 
				+
			
 
				+        :param cache: whether cache is enabled for the function
			
 
				+        :param cache_timeout: timeout in seconds for the cache
			
 
				+        :param force: whether to force refresh the cache
			
 
				+        :return: schema list
			
 
				+        """
			
 
				+        return self.db_engine_spec.get_schema_names(self.inspector)
			
 
				+
			
 
				+    @property
			
 
				+    def db_engine_spec(self) -> Type[db_engine_specs.BaseEngineSpec]:
			
 
				+        return db_engine_specs.engines.get(self.backend, db_engine_specs.BaseEngineSpec)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_db_engine_spec_for_backend(
			
 
				+        cls, backend: str
			
 
				+    ) -> Type[db_engine_specs.BaseEngineSpec]:
			
 
				+        return db_engine_specs.engines.get(backend, db_engine_specs.BaseEngineSpec)
			
 
				+
			
 
				+    def grains(self) -> Tuple[TimeGrain, ...]:
			
 
				+        """Defines time granularity database-specific expressions.
			
 
				+
			
 
				+        The idea here is to make it easy for users to change the time grain
			
 
				+        from a datetime (maybe the source grain is arbitrary timestamps, daily
			
 
				+        or 5 minutes increments) to another, "truncated" datetime. Since
			
 
				+        each database has slightly different but similar datetime functions,
			
 
				+        this allows a mapping between database engines and actual functions.
			
 
				+        """
			
 
				+        return self.db_engine_spec.get_time_grains()
			
 
				+
			
 
				+    def get_extra(self) -> Dict[str, Any]:
			
 
				+        return self.db_engine_spec.get_extra_params(self)
			
 
				+
			
 
				+    def get_encrypted_extra(self) -> Dict[str, Any]:
			
 
				+        encrypted_extra = {}
			
 
				+        if self.encrypted_extra:
			
 
				+            try:
			
 
				+                encrypted_extra = json.loads(self.encrypted_extra)
			
 
				+            except json.JSONDecodeError as ex:
			
 
				+                logger.error(ex)
			
 
				+                raise ex
			
 
				+        return encrypted_extra
			
 
				+
			
 
				+    def get_table(self, table_name: str, schema: Optional[str] = None) -> Table:
			
 
				+        extra = self.get_extra()
			
 
				+        meta = MetaData(**extra.get("metadata_params", {}))
			
 
				+        return Table(
			
 
				+            table_name,
			
 
				+            meta,
			
 
				+            schema=schema or None,
			
 
				+            autoload=True,
			
 
				+            autoload_with=self.get_sqla_engine(),
			
 
				+        )
			
 
				+
			
 
				+    def get_columns(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        return self.db_engine_spec.get_columns(self.inspector, table_name, schema)
			
 
				+
			
 
				+    def get_indexes(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        return self.inspector.get_indexes(table_name, schema)
			
 
				+
			
 
				+    def get_pk_constraint(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        return self.inspector.get_pk_constraint(table_name, schema)
			
 
				+
			
 
				+    def get_foreign_keys(
			
 
				+        self, table_name: str, schema: Optional[str] = None
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        return self.inspector.get_foreign_keys(table_name, schema)
			
 
				+
			
 
				+    def get_schema_access_for_csv_upload(  # pylint: disable=invalid-name
			
 
				+        self,
			
 
				+    ) -> List[str]:
			
 
				+        allowed_databases = self.get_extra().get("schemas_allowed_for_csv_upload", [])
			
 
				+        if hasattr(g, "user"):
			
 
				+            extra_allowed_databases = config["ALLOWED_USER_CSV_SCHEMA_FUNC"](
			
 
				+                self, g.user
			
 
				+            )
			
 
				+            allowed_databases += extra_allowed_databases
			
 
				+        return sorted(set(allowed_databases))
			
 
				+
			
 
				+    @property
			
 
				+    def sqlalchemy_uri_decrypted(self) -> str:
			
 
				+        conn = sqla.engine.url.make_url(self.sqlalchemy_uri)
			
 
				+        if custom_password_store:
			
 
				+            conn.password = custom_password_store(conn)
			
 
				+        else:
			
 
				+            conn.password = self.password
			
 
				+        return str(conn)
			
 
				+
			
 
				+    @property
			
 
				+    def sql_url(self) -> str:
			
 
				+        return f"/superset/sql/{self.id}/"
			
 
				+
			
 
				+    @hybrid_property
			
 
				+    def perm(self) -> str:
			
 
				+        return f"[{self.database_name}].(id:{self.id})"
			
 
				+
			
 
				+    @perm.expression  # type: ignore
			
 
				+    def perm(cls) -> str:  # pylint: disable=no-self-argument
			
 
				+        return (
			
 
				+            "[" + cls.database_name + "].(id:" + expression.cast(cls.id, String) + ")"
			
 
				+        )
			
 
				+
			
 
				+    def get_perm(self) -> str:
			
 
				+        return self.perm  # type: ignore
			
 
				+
			
 
				+    def has_table(self, table: Table) -> bool:
			
 
				+        engine = self.get_sqla_engine()
			
 
				+        return engine.has_table(table.table_name, table.schema or None)
			
 
				+
			
 
				+    def has_table_by_name(self, table_name: str, schema: Optional[str] = None) -> bool:
			
 
				+        engine = self.get_sqla_engine()
			
 
				+        return engine.has_table(table_name, schema)
			
 
				+
			
 
				+    @utils.memoized
			
 
				+    def get_dialect(self) -> Dialect:
			
 
				+        sqla_url = url.make_url(self.sqlalchemy_uri_decrypted)
			
 
				+        return sqla_url.get_dialect()()  # pylint: disable=no-member
			
 
				+
			
 
				+
			
 
				+sqla.event.listen(Database, "after_insert", security_manager.set_perm)
			
 
				+sqla.event.listen(Database, "after_update", security_manager.set_perm)
			
 
				+
			
 
				+
			
 
				+class Log(Model):  # pylint: disable=too-few-public-methods
			
 
				+
			
 
				+    """ORM object used to log Superset actions to the database"""
			
 
				+
			
 
				+    __tablename__ = "logs"
			
 
				+
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    action = Column(String(512))
			
 
				+    user_id = Column(Integer, ForeignKey("ab_user.id"))
			
 
				+    dashboard_id = Column(Integer)
			
 
				+    slice_id = Column(Integer)
			
 
				+    json = Column(Text)
			
 
				+    user = relationship(
			
 
				+        security_manager.user_model, backref="logs", foreign_keys=[user_id]
			
 
				+    )
			
 
				+    dttm = Column(DateTime, default=datetime.utcnow)
			
 
				+    duration_ms = Column(Integer)
			
 
				+    referrer = Column(String(1024))
			
 
				+
			
 
				+
			
 
				+class FavStar(Model):  # pylint: disable=too-few-public-methods
			
 
				+    __tablename__ = "favstar"
			
 
				+
			
 
				+    id = Column(Integer, primary_key=True)
			
 
				+    user_id = Column(Integer, ForeignKey("ab_user.id"))
			
 
				+    class_name = Column(String(50))
			
 
				+    obj_id = Column(Integer)
			
 
				+    dttm = Column(DateTime, default=datetime.utcnow)
			
 
				+
			
 
				+
			
 
				+# events for updating tags
			
 
				+if is_feature_enabled("TAGGING_SYSTEM"):
			
 
				+    sqla.event.listen(Dashboard, "after_insert", DashboardUpdater.after_insert)
			
 
				+    sqla.event.listen(Dashboard, "after_update", DashboardUpdater.after_update)
			
 
				+    sqla.event.listen(Dashboard, "after_delete", DashboardUpdater.after_delete)
			
 
				+    sqla.event.listen(FavStar, "after_insert", FavStarUpdater.after_insert)
			
 
				+    sqla.event.listen(FavStar, "after_delete", FavStarUpdater.after_delete)
			
--- a/data/purposeCombined/BI/etl_testing.py
+++ b/data/purposeCombined/BI/etl_testing.py
@@ -0,0 +1,197 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Tue May 12 00:00:00 2020
			
 
				+
			
 
				+@author: Shaji
			
 
				+"""
			
 
				+
			
 
				+from . import exceptions
			
 
				+
			
 
				+from datetime import datetime
			
 
				+import os
			
 
				+import pandas as pd
			
 
				+
			
 
				+def column_level_check(source_df,target_df,primary_keys):
			
 
				+    """
			
 
				+    Usage: [arg1]:[Pandas DataFrame - source], [arg2]:[Pandas DataFrame - target], [arg3]:[Primary keys (separated by comma)]
			
 
				+    Description: Performs column level testing between two DataFrames by joining using the primary keys.
			
 
				+    Returns: [Mismatch Count], [Test Log (list)], [Pandas dataframe - mismatch (if any)]
			
 
				+    """
			
 
				+    global execution_status
			
 
				+
			
 
				+    systime=datetime.now()
			
 
				+
			
 
				+    start_time=systime.strftime("%Y")+'-'+systime.strftime("%m")+'-'+systime.strftime("%d")+' '+systime.strftime("%H")+':'+systime.strftime("%M")+':'+systime.strftime("%S")
			
 
				+
			
 
				+    log_list=[]
			
 
				+
			
 
				+    execution_status='RUNNING'
			
 
				+
			
 
				+    log_list.append('START TIME: '+start_time)
			
 
				+
			
 
				+    key_list=primary_keys.split(',')
			
 
				+
			
 
				+    src=source_df
			
 
				+    tgt=target_df
			
 
				+
			
 
				+    log_list.append(str(datetime.now())+': DIFFERENTIATING SOURCE AND TARGET COLUMNS')
			
 
				+    if execution_status!='FAILED':
			
 
				+        try:
			
 
				+            src_k=[]
			
 
				+            src_columns=[]
			
 
				+            for i  in src.columns:
			
 
				+                if str.lower(i) in [str.lower(key) for key in key_list]:
			
 
				+                    src_columns.append(str.lower(i))
			
 
				+                    src_k.append(str.lower(i))
			
 
				+                else:
			
 
				+                    src_columns.append(str(i) + '_src')
			
 
				+            src.columns = src_columns
			
 
				+            tgt_k=[]
			
 
				+            tgt_columns=[]
			
 
				+            for i  in tgt.columns:
			
 
				+                if str.lower(i) in [str.lower(key) for key in key_list]:
			
 
				+                    tgt_columns.append(str.lower(i))
			
 
				+                    tgt_k.append(str.lower(i))
			
 
				+                else:
			
 
				+                    tgt_columns.append(str(i) + '_tgt')
			
 
				+            tgt.columns = tgt_columns
			
 
				+        except Exception as e:
			
 
				+            print('Failed while DIFFERENTIATING SOURCE AND TARGET COLUMNS: '+str(e))
			
 
				+            log_list.append('Failed while DIFFERENTIATING SOURCE AND TARGET COLUMNS: '+str(e))
			
 
				+            execution_status='FAILED'
			
 
				+    log_list.append(str(datetime.now())+': CHECKING IF THE GROUP BY MAKES THE RECORD LEVEL SAME AS ACTUAL')
			
 
				+    if execution_status!='FAILED':
			
 
				+        try:
			
 
				+            index_unique_flag=[]
			
 
				+            if src.groupby(src_k).count().shape[0]==src.shape[0]:
			
 
				+                index_unique_flag.append(True)
			
 
				+            else:
			
 
				+                index_unique_flag.append(False)
			
 
				+            if tgt.groupby(tgt_k).count().shape[0]==tgt.shape[0]:
			
 
				+                index_unique_flag.append(True)
			
 
				+            else:
			
 
				+                index_unique_flag.append(False)
			
 
				+        except Exception as e:
			
 
				+            print('Failed while CHECKING IF THE GROUP BY MAKES THE RECORD LEVEL SAME AS ACTUAL: '+str(e))
			
 
				+            log_list.append('Failed while CHECKING IF THE GROUP BY MAKES THE RECORD LEVEL SAME AS ACTUAL: '+str(e))
			
 
				+            execution_status='FAILED'
			
 
				+    if execution_status!='FAILED':
			
 
				+        try:
			
 
				+            if all(index_unique_flag)==True:
			
 
				+                log_list.append(str(datetime.now())+': JOINING THE TABLES')
			
 
				+                try:
			
 
				+                    df=tgt.set_index(tgt_k).join(src.set_index(src_k),how='left')
			
 
				+                except Exception as e:
			
 
				+                    print('Failed while JOINING THE TABLES: '+str(e))
			
 
				+                    log_list.append('Failed while JOINING THE TABLES: '+str(e))
			
 
				+                    execution_status='FAILED'
			
 
				+                log_list.append(str(datetime.now())+': FINDING THE TARGET COLUMN AND SOURCE COLUMN TO BE COMPARED')
			
 
				+                if execution_status!='FAILED':
			
 
				+                    try:
			
 
				+                        ma_list=[]
			
 
				+                        for i in range(len(df.columns)):
			
 
				+                            if df.columns[i][-3:]=='tgt':
			
 
				+                                for j in range(len(df.columns)):
			
 
				+                                    if df.columns[j][-3:]=='src':
			
 
				+                                        if str.lower(df.columns[i][:-4])==str.lower(df.columns[j][:-4]):
			
 
				+                                            ma_list.append([j,i])
			
 
				+                        match_cols=''
			
 
				+                        for i in range(len(ma_list)):
			
 
				+                            match_cols+=str(i+1)+': '+df.columns[ma_list[i][1]]+' = '+df.columns[ma_list[i][0]]+' , '
			
 
				+                        log_list.append('Matching columns '+match_cols)
			
 
				+                    except Exception as e:
			
 
				+                        print('Failed while FINDING THE TARGET COLUMN AND SOURCE COLUMN TO BE COMPARED: '+str(e))
			
 
				+                        log_list.append('Failed while FINDING THE TARGET COLUMN AND SOURCE COLUMN TO BE COMPARED: '+str(e))
			
 
				+                        execution_status='FAILED'
			
 
				+                log_list.append(str(datetime.now())+': COMPARISION STARTED')
			
 
				+                if execution_status!='FAILED':
			
 
				+                    try:
			
 
				+                        mis_cols=[]
			
 
				+                        res=[]
			
 
				+                        index=[]
			
 
				+                        for i in range(len(ma_list)):
			
 
				+                            if all(df[df.columns[ma_list[i][0]]].apply(lambda x:str(x).strip()).astype(str).fillna(str(0))==df[df.columns[ma_list[i][1]]].apply(lambda x:str(x).strip()).astype(str).fillna(str(0)))==True:
			
 
				+                                res.append(True)
			
 
				+                            else:
			
 
				+                                res.append(False)
			
 
				+                                mis_cols.append(df.columns[ma_list[i][0]])
			
 
				+                                mis_cols.append(df.columns[ma_list[i][1]])
			
 
				+                                for j in range(len(df[df.columns[ma_list[i][0]]].apply(lambda x:str(x).strip()).astype(str).fillna(str(0))==df[df.columns[ma_list[i][1]]].apply(lambda x:str(x).strip()).astype(str).fillna(str(0)))):
			
 
				+                                    if list(df[df.columns[ma_list[i][0]]].apply(lambda x:str(x).strip()).astype(str).fillna(str(0))==df[df.columns[ma_list[i][1]]].apply(lambda x:str(x).strip()).astype(str).fillna(str(0)))[j]==False:
			
 
				+                                        index.append(j)
			
 
				+                        un_df=df[mis_cols].iloc[list(set(index))]
			
 
				+                    except Exception as e:
			
 
				+                        print('Failed while COMPARING: '+str(e))
			
 
				+                        log_list.append('Failed while COMPARING: '+str(e))
			
 
				+                        execution_status='FAILED'
			
 
				+                log_list.append(str(datetime.now())+': TEST RESULT:')
			
 
				+                if execution_status!='FAILED':
			
 
				+                    try:
			
 
				+                        if all(res)==True:
			
 
				+                            mismatch_count=0
			
 
				+                            print('COLUMN LEVEL CHECK PASSED')
			
 
				+                            execution_status='SUCCESS'
			
 
				+                            log_list.append('COLUMN LEVEL CHECK PASSED')
			
 
				+                        else:
			
 
				+                            log_list.append((str(len(set(index)))+' records unmatched'))
			
 
				+                            log_list.append('Column level check Failed')
			
 
				+                            mismatch_count=str(len(set(index)))
			
 
				+                            execution_status='SUCCESS'
			
 
				+                    except Exception as e:
			
 
				+                        print('Failed while getting the TEST RESULT: '+str(e))
			
 
				+                        log_list.append('Failed while getting the TEST RESULT: '+str(e))
			
 
				+                        execution_status='FAILED'
			
 
				+            else:
			
 
				+                log_list.append('The records grouped at the level of key columns are not unique')
			
 
				+        except Exception as e:
			
 
				+            log_list.append('Failed while CHECKING IF THE GROUP BY MAKES THE RECORD LEVEL SAME AS ACTUAL: '+str(e))
			
 
				+            execution_status='FAILED'
			
 
				+    if execution_status=='FAILED':
			
 
				+        print('Check Logs for the error message')
			
 
				+        raise exceptionsExecutionError
			
 
				+    return mismatch_count,log_list,un_df
			
 
				+
			
 
				+def sort_and_compare(source_df,target_df):
			
 
				+    """
			
 
				+    Usage: [arg1]:[Pandas DataFrame - source], [arg2]:[Pandas DataFrame - target]
			
 
				+    Description: Sort and Compare two datasets.
			
 
				+    Returns: [Mismatch Count], [Test Log (list)], [Pandas dataframe - mismatch (if any)]
			
 
				+    """
			
 
				+    log_list=[]
			
 
				+    col1=source_df.columns
			
 
				+    col2=target_df.columns
			
 
				+    cols=list(set(col1.sort_values()).intersection(set(col2.sort_values())))
			
 
				+    log_list.append('Common column(s): '+', '.join(cols))
			
 
				+
			
 
				+    source_df.sort_values(cols, axis=0, ascending=True, inplace=True)
			
 
				+    target_df.sort_values(cols, axis=0, ascending=True, inplace=True)
			
 
				+
			
 
				+    data1=source_df[cols].reset_index(drop=True)
			
 
				+    data2=target_df[cols].reset_index(drop=True)
			
 
				+
			
 
				+    data1.head()
			
 
				+    data2.head()
			
 
				+
			
 
				+    result=data1==data2
			
 
				+    bool_list=[]
			
 
				+    mis_cols=[]
			
 
				+    mis_index=[]
			
 
				+    for i in cols:
			
 
				+        if all(result[i])==True:
			
 
				+            bool_list.append(True)
			
 
				+        else:
			
 
				+            bool_list.append(False)
			
 
				+            mis_cols.append(i)
			
 
				+            for j in range(len(result[i])):
			
 
				+                if result[i][j]==False:
			
 
				+                    mis_index.append(j)
			
 
				+    un_df=pd.concat([data1.iloc[list(set(mis_index))],data2.iloc[list(set(mis_index))]],axis=1)
			
 
				+
			
 
				+    mismatch_count=0
			
 
				+    if all(bool_list)==True:
			
 
				+        log_list.append('Records are matching')
			
 
				+    else:
			
 
				+        mismatch_count=len(set(mis_index))
			
 
				+        log_list.append(str(mismatch_count)+' records unmatched')
			
 
				+        log_list.append('Column(s): '+', '.join(mis_cols))
			
 
				+    return mismatch_count,log_list,un_df[mis_cols]
			
--- a/data/purposeCombined/BI/examples/__init__.py
+++ b/data/purposeCombined/BI/examples/__init__.py
@@ -0,0 +1,33 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+from .bart_lines import load_bart_lines
			
 
				+from .birth_names import load_birth_names
			
 
				+from .country_map import load_country_map_data
			
 
				+from .css_templates import load_css_templates
			
 
				+from .deck import load_deck_dash
			
 
				+from .energy import load_energy
			
 
				+from .flights import load_flights
			
 
				+from .long_lat import load_long_lat_data
			
 
				+from .misc_dashboard import load_misc_dashboard
			
 
				+from .multi_line import load_multi_line
			
 
				+from .multiformat_time_series import load_multiformat_time_series
			
 
				+from .paris import load_paris_iris_geojson
			
 
				+from .random_time_series import load_random_time_series_data
			
 
				+from .sf_population_polygons import load_sf_population_polygons
			
 
				+from .tabbed_dashboard import load_tabbed_dashboard
			
 
				+from .unicode_test_data import load_unicode_test_data
			
 
				+from .world_bank import load_world_bank_health_n_pop
			
--- a/data/purposeCombined/BI/examples/bart_lines.py
+++ b/data/purposeCombined/BI/examples/bart_lines.py
@@ -0,0 +1,63 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+
			
 
				+import pandas as pd
			
 
				+import polyline
			
 
				+from sqlalchemy import String, Text
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.utils.core import get_example_database
			
 
				+
			
 
				+from .helpers import get_example_data, TBL
			
 
				+
			
 
				+
			
 
				+def load_bart_lines(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    tbl_name = "bart_lines"
			
 
				+    database = get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        content = get_example_data("bart-lines.json.gz")
			
 
				+        df = pd.read_json(content, encoding="latin-1")
			
 
				+        df["path_json"] = df.path.map(json.dumps)
			
 
				+        df["polyline"] = df.path.map(polyline.encode)
			
 
				+        del df["path"]
			
 
				+
			
 
				+        df.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "color": String(255),
			
 
				+                "name": String(255),
			
 
				+                "polyline": Text,
			
 
				+                "path_json": Text,
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+
			
 
				+    print("Creating table {} reference".format(tbl_name))
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not tbl:
			
 
				+        tbl = TBL(table_name=tbl_name)
			
 
				+    tbl.description = "BART lines"
			
 
				+    tbl.database = database
			
 
				+    db.session.merge(tbl)
			
 
				+    db.session.commit()
			
 
				+    tbl.fetch_metadata()
			
--- a/data/purposeCombined/BI/examples/birth_names.py
+++ b/data/purposeCombined/BI/examples/birth_names.py
@@ -0,0 +1,763 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+import textwrap
			
 
				+from typing import Dict, Union
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import DateTime, String
			
 
				+from sqlalchemy.sql import column
			
 
				+
			
 
				+from superset import db, security_manager
			
 
				+from superset.connectors.sqla.models import SqlMetric, TableColumn
			
 
				+from superset.models.core import Database
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils.core import get_example_database
			
 
				+
			
 
				+from .helpers import (
			
 
				+    config,
			
 
				+    get_example_data,
			
 
				+    get_slice_json,
			
 
				+    merge_slice,
			
 
				+    misc_dash_slices,
			
 
				+    TBL,
			
 
				+    update_slice_ids,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def gen_filter(
			
 
				+    subject: str, comparator: str, operator: str = "=="
			
 
				+) -> Dict[str, Union[bool, str]]:
			
 
				+    return {
			
 
				+        "clause": "WHERE",
			
 
				+        "comparator": comparator,
			
 
				+        "expressionType": "SIMPLE",
			
 
				+        "operator": operator,
			
 
				+        "subject": subject,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def load_data(tbl_name: str, database: Database) -> None:
			
 
				+    pdf = pd.read_json(get_example_data("birth_names.json.gz"))
			
 
				+    pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
			
 
				+    pdf.to_sql(
			
 
				+        tbl_name,
			
 
				+        database.get_sqla_engine(),
			
 
				+        if_exists="replace",
			
 
				+        chunksize=500,
			
 
				+        dtype={
			
 
				+            "ds": DateTime,
			
 
				+            "gender": String(16),
			
 
				+            "state": String(10),
			
 
				+            "name": String(255),
			
 
				+        },
			
 
				+        index=False,
			
 
				+    )
			
 
				+    print("Done loading table!")
			
 
				+    print("-" * 80)
			
 
				+
			
 
				+
			
 
				+def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    """Loading birth name dataset from a zip file in the repo"""
			
 
				+    # pylint: disable=too-many-locals
			
 
				+    tbl_name = "birth_names"
			
 
				+    database = get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        load_data(tbl_name, database)
			
 
				+
			
 
				+    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not obj:
			
 
				+        print(f"Creating table [{tbl_name}] reference")
			
 
				+        obj = TBL(table_name=tbl_name)
			
 
				+        db.session.add(obj)
			
 
				+    obj.main_dttm_col = "ds"
			
 
				+    obj.database = database
			
 
				+    obj.filter_select_enabled = True
			
 
				+
			
 
				+    if not any(col.column_name == "num_california" for col in obj.columns):
			
 
				+        col_state = str(column("state").compile(db.engine))
			
 
				+        col_num = str(column("num").compile(db.engine))
			
 
				+        obj.columns.append(
			
 
				+            TableColumn(
			
 
				+                column_name="num_california",
			
 
				+                expression=f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
			
 
				+            )
			
 
				+        )
			
 
				+
			
 
				+    if not any(col.metric_name == "sum__num" for col in obj.metrics):
			
 
				+        col = str(column("num").compile(db.engine))
			
 
				+        obj.metrics.append(SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))
			
 
				+
			
 
				+    db.session.commit()
			
 
				+    obj.fetch_metadata()
			
 
				+    tbl = obj
			
 
				+
			
 
				+    metrics = [
			
 
				+        {
			
 
				+            "expressionType": "SIMPLE",
			
 
				+            "column": {"column_name": "num", "type": "BIGINT"},
			
 
				+            "aggregate": "SUM",
			
 
				+            "label": "Births",
			
 
				+            "optionName": "metric_11",
			
 
				+        }
			
 
				+    ]
			
 
				+    metric = "sum__num"
			
 
				+
			
 
				+    defaults = {
			
 
				+        "compare_lag": "10",
			
 
				+        "compare_suffix": "o10Y",
			
 
				+        "limit": "25",
			
 
				+        "granularity_sqla": "ds",
			
 
				+        "groupby": [],
			
 
				+        "row_limit": config["ROW_LIMIT"],
			
 
				+        "since": "100 years ago",
			
 
				+        "until": "now",
			
 
				+        "viz_type": "table",
			
 
				+        "markup_type": "markdown",
			
 
				+    }
			
 
				+
			
 
				+    admin = security_manager.find_user("admin")
			
 
				+
			
 
				+    print("Creating some slices")
			
 
				+    slices = [
			
 
				+        Slice(
			
 
				+            slice_name="Participants",
			
 
				+            viz_type="big_number",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="big_number",
			
 
				+                granularity_sqla="ds",
			
 
				+                compare_lag="5",
			
 
				+                compare_suffix="over 5Y",
			
 
				+                metric=metric,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Genders",
			
 
				+            viz_type="pie",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults, viz_type="pie", groupby=["gender"], metric=metric
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Trends",
			
 
				+            viz_type="line",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="line",
			
 
				+                groupby=["name"],
			
 
				+                granularity_sqla="ds",
			
 
				+                rich_tooltip=True,
			
 
				+                show_legend=True,
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Genders by State",
			
 
				+            viz_type="dist_bar",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                adhoc_filters=[
			
 
				+                    {
			
 
				+                        "clause": "WHERE",
			
 
				+                        "expressionType": "SIMPLE",
			
 
				+                        "filterOptionName": "2745eae5",
			
 
				+                        "comparator": ["other"],
			
 
				+                        "operator": "NOT IN",
			
 
				+                        "subject": "state",
			
 
				+                    }
			
 
				+                ],
			
 
				+                viz_type="dist_bar",
			
 
				+                metrics=[
			
 
				+                    {
			
 
				+                        "expressionType": "SIMPLE",
			
 
				+                        "column": {"column_name": "sum_boys", "type": "BIGINT(20)"},
			
 
				+                        "aggregate": "SUM",
			
 
				+                        "label": "Boys",
			
 
				+                        "optionName": "metric_11",
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "expressionType": "SIMPLE",
			
 
				+                        "column": {"column_name": "sum_girls", "type": "BIGINT(20)"},
			
 
				+                        "aggregate": "SUM",
			
 
				+                        "label": "Girls",
			
 
				+                        "optionName": "metric_12",
			
 
				+                    },
			
 
				+                ],
			
 
				+                groupby=["state"],
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Girls",
			
 
				+            viz_type="table",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                groupby=["name"],
			
 
				+                adhoc_filters=[gen_filter("gender", "girl")],
			
 
				+                row_limit=50,
			
 
				+                timeseries_limit_metric="sum__num",
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Girl Name Cloud",
			
 
				+            viz_type="word_cloud",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="word_cloud",
			
 
				+                size_from="10",
			
 
				+                series="name",
			
 
				+                size_to="70",
			
 
				+                rotation="square",
			
 
				+                limit="100",
			
 
				+                adhoc_filters=[gen_filter("gender", "girl")],
			
 
				+                metric=metric,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Boys",
			
 
				+            viz_type="table",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                groupby=["name"],
			
 
				+                adhoc_filters=[gen_filter("gender", "boy")],
			
 
				+                row_limit=50,
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Boy Name Cloud",
			
 
				+            viz_type="word_cloud",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="word_cloud",
			
 
				+                size_from="10",
			
 
				+                series="name",
			
 
				+                size_to="70",
			
 
				+                rotation="square",
			
 
				+                limit="100",
			
 
				+                adhoc_filters=[gen_filter("gender", "boy")],
			
 
				+                metric=metric,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Top 10 Girl Name Share",
			
 
				+            viz_type="area",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                adhoc_filters=[gen_filter("gender", "girl")],
			
 
				+                comparison_type="values",
			
 
				+                groupby=["name"],
			
 
				+                limit=10,
			
 
				+                stacked_style="expand",
			
 
				+                time_grain_sqla="P1D",
			
 
				+                viz_type="area",
			
 
				+                x_axis_forma="smart_date",
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Top 10 Boy Name Share",
			
 
				+            viz_type="area",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                adhoc_filters=[gen_filter("gender", "boy")],
			
 
				+                comparison_type="values",
			
 
				+                groupby=["name"],
			
 
				+                limit=10,
			
 
				+                stacked_style="expand",
			
 
				+                time_grain_sqla="P1D",
			
 
				+                viz_type="area",
			
 
				+                x_axis_forma="smart_date",
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+    ]
			
 
				+    misc_slices = [
			
 
				+        Slice(
			
 
				+            slice_name="Average and Sum Trends",
			
 
				+            viz_type="dual_line",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="dual_line",
			
 
				+                metric={
			
 
				+                    "expressionType": "SIMPLE",
			
 
				+                    "column": {"column_name": "num", "type": "BIGINT(20)"},
			
 
				+                    "aggregate": "AVG",
			
 
				+                    "label": "AVG(num)",
			
 
				+                    "optionName": "metric_vgops097wej_g8uff99zhk7",
			
 
				+                },
			
 
				+                metric_2="sum__num",
			
 
				+                granularity_sqla="ds",
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Num Births Trend",
			
 
				+            viz_type="line",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(defaults, viz_type="line", metrics=metrics),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Daily Totals",
			
 
				+            viz_type="table",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            created_by=admin,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                groupby=["ds"],
			
 
				+                since="40 years ago",
			
 
				+                until="now",
			
 
				+                viz_type="table",
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Number of California Births",
			
 
				+            viz_type="big_number_total",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                metric={
			
 
				+                    "expressionType": "SIMPLE",
			
 
				+                    "column": {
			
 
				+                        "column_name": "num_california",
			
 
				+                        "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
			
 
				+                    },
			
 
				+                    "aggregate": "SUM",
			
 
				+                    "label": "SUM(num_california)",
			
 
				+                },
			
 
				+                viz_type="big_number_total",
			
 
				+                granularity_sqla="ds",
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Top 10 California Names Timeseries",
			
 
				+            viz_type="line",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                metrics=[
			
 
				+                    {
			
 
				+                        "expressionType": "SIMPLE",
			
 
				+                        "column": {
			
 
				+                            "column_name": "num_california",
			
 
				+                            "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
			
 
				+                        },
			
 
				+                        "aggregate": "SUM",
			
 
				+                        "label": "SUM(num_california)",
			
 
				+                    }
			
 
				+                ],
			
 
				+                viz_type="line",
			
 
				+                granularity_sqla="ds",
			
 
				+                groupby=["name"],
			
 
				+                timeseries_limit_metric={
			
 
				+                    "expressionType": "SIMPLE",
			
 
				+                    "column": {
			
 
				+                        "column_name": "num_california",
			
 
				+                        "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
			
 
				+                    },
			
 
				+                    "aggregate": "SUM",
			
 
				+                    "label": "SUM(num_california)",
			
 
				+                },
			
 
				+                limit="10",
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Names Sorted by Num in California",
			
 
				+            viz_type="table",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                metrics=metrics,
			
 
				+                groupby=["name"],
			
 
				+                row_limit=50,
			
 
				+                timeseries_limit_metric={
			
 
				+                    "expressionType": "SIMPLE",
			
 
				+                    "column": {
			
 
				+                        "column_name": "num_california",
			
 
				+                        "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
			
 
				+                    },
			
 
				+                    "aggregate": "SUM",
			
 
				+                    "label": "SUM(num_california)",
			
 
				+                },
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Number of Girls",
			
 
				+            viz_type="big_number_total",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                metric=metric,
			
 
				+                viz_type="big_number_total",
			
 
				+                granularity_sqla="ds",
			
 
				+                adhoc_filters=[gen_filter("gender", "girl")],
			
 
				+                subheader="total female participants",
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Pivot Table",
			
 
				+            viz_type="pivot_table",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="pivot_table",
			
 
				+                groupby=["name"],
			
 
				+                columns=["state"],
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+    ]
			
 
				+    for slc in slices:
			
 
				+        merge_slice(slc)
			
 
				+
			
 
				+    for slc in misc_slices:
			
 
				+        merge_slice(slc)
			
 
				+        misc_dash_slices.add(slc.slice_name)
			
 
				+
			
 
				+    print("Creating a dashboard")
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug="births").first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+        db.session.add(dash)
			
 
				+    dash.published = True
			
 
				+    dash.json_metadata = textwrap.dedent(
			
 
				+        """\
			
 
				+    {
			
 
				+        "label_colors": {
			
 
				+            "Girls": "#FF69B4",
			
 
				+            "Boys": "#ADD8E6",
			
 
				+            "girl": "#FF69B4",
			
 
				+            "boy": "#ADD8E6"
			
 
				+        }
			
 
				+    }"""
			
 
				+    )
			
 
				+    js = textwrap.dedent(
			
 
				+        # pylint: disable=line-too-long
			
 
				+        """\
			
 
				+        {
			
 
				+          "CHART-6GdlekVise": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-6GdlekVise",
			
 
				+            "meta": {
			
 
				+              "chartId": 5547,
			
 
				+              "height": 50,
			
 
				+              "sliceName": "Top 10 Girl Name Share",
			
 
				+              "width": 5
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-eh0w37bWbR"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-6n9jxb30JG": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-6n9jxb30JG",
			
 
				+            "meta": {
			
 
				+              "chartId": 5540,
			
 
				+              "height": 36,
			
 
				+              "sliceName": "Genders by State",
			
 
				+              "width": 5
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW--EyBZQlDi"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-Jj9qh1ol-N": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-Jj9qh1ol-N",
			
 
				+            "meta": {
			
 
				+              "chartId": 5545,
			
 
				+              "height": 50,
			
 
				+              "sliceName": "Boy Name Cloud",
			
 
				+              "width": 4
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-kzWtcvo8R1"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-ODvantb_bF": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-ODvantb_bF",
			
 
				+            "meta": {
			
 
				+              "chartId": 5548,
			
 
				+              "height": 50,
			
 
				+              "sliceName": "Top 10 Boy Name Share",
			
 
				+              "width": 5
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-kzWtcvo8R1"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-PAXUUqwmX9": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-PAXUUqwmX9",
			
 
				+            "meta": {
			
 
				+              "chartId": 5538,
			
 
				+              "height": 34,
			
 
				+              "sliceName": "Genders",
			
 
				+              "width": 3
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-2n0XgiHDgs"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-_T6n_K9iQN": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-_T6n_K9iQN",
			
 
				+            "meta": {
			
 
				+              "chartId": 5539,
			
 
				+              "height": 36,
			
 
				+              "sliceName": "Trends",
			
 
				+              "width": 7
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW--EyBZQlDi"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-eNY0tcE_ic": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-eNY0tcE_ic",
			
 
				+            "meta": {
			
 
				+              "chartId": 5537,
			
 
				+              "height": 34,
			
 
				+              "sliceName": "Participants",
			
 
				+              "width": 3
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-2n0XgiHDgs"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-g075mMgyYb": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-g075mMgyYb",
			
 
				+            "meta": {
			
 
				+              "chartId": 5541,
			
 
				+              "height": 50,
			
 
				+              "sliceName": "Girls",
			
 
				+              "width": 3
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-eh0w37bWbR"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-n-zGGE6S1y": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-n-zGGE6S1y",
			
 
				+            "meta": {
			
 
				+              "chartId": 5542,
			
 
				+              "height": 50,
			
 
				+              "sliceName": "Girl Name Cloud",
			
 
				+              "width": 4
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-eh0w37bWbR"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "CHART-vJIPjmcbD3": {
			
 
				+            "children": [],
			
 
				+            "id": "CHART-vJIPjmcbD3",
			
 
				+            "meta": {
			
 
				+              "chartId": 5543,
			
 
				+              "height": 50,
			
 
				+              "sliceName": "Boys",
			
 
				+              "width": 3
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-kzWtcvo8R1"
			
 
				+            ],
			
 
				+            "type": "CHART"
			
 
				+          },
			
 
				+          "DASHBOARD_VERSION_KEY": "v2",
			
 
				+          "GRID_ID": {
			
 
				+            "children": [
			
 
				+              "ROW-2n0XgiHDgs",
			
 
				+              "ROW--EyBZQlDi",
			
 
				+              "ROW-eh0w37bWbR",
			
 
				+              "ROW-kzWtcvo8R1"
			
 
				+            ],
			
 
				+            "id": "GRID_ID",
			
 
				+            "parents": [
			
 
				+              "ROOT_ID"
			
 
				+            ],
			
 
				+            "type": "GRID"
			
 
				+          },
			
 
				+          "HEADER_ID": {
			
 
				+            "id": "HEADER_ID",
			
 
				+            "meta": {
			
 
				+              "text": "Births"
			
 
				+            },
			
 
				+            "type": "HEADER"
			
 
				+          },
			
 
				+          "MARKDOWN-zaflB60tbC": {
			
 
				+            "children": [],
			
 
				+            "id": "MARKDOWN-zaflB60tbC",
			
 
				+            "meta": {
			
 
				+              "code": "<div style=\\"text-align:center\\">  <h1>Birth Names Dashboard</h1>  <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>",
			
 
				+              "height": 34,
			
 
				+              "width": 6
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID",
			
 
				+              "ROW-2n0XgiHDgs"
			
 
				+            ],
			
 
				+            "type": "MARKDOWN"
			
 
				+          },
			
 
				+          "ROOT_ID": {
			
 
				+            "children": [
			
 
				+              "GRID_ID"
			
 
				+            ],
			
 
				+            "id": "ROOT_ID",
			
 
				+            "type": "ROOT"
			
 
				+          },
			
 
				+          "ROW--EyBZQlDi": {
			
 
				+            "children": [
			
 
				+              "CHART-_T6n_K9iQN",
			
 
				+              "CHART-6n9jxb30JG"
			
 
				+            ],
			
 
				+            "id": "ROW--EyBZQlDi",
			
 
				+            "meta": {
			
 
				+              "background": "BACKGROUND_TRANSPARENT"
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID"
			
 
				+            ],
			
 
				+            "type": "ROW"
			
 
				+          },
			
 
				+          "ROW-2n0XgiHDgs": {
			
 
				+            "children": [
			
 
				+              "CHART-eNY0tcE_ic",
			
 
				+              "MARKDOWN-zaflB60tbC",
			
 
				+              "CHART-PAXUUqwmX9"
			
 
				+            ],
			
 
				+            "id": "ROW-2n0XgiHDgs",
			
 
				+            "meta": {
			
 
				+              "background": "BACKGROUND_TRANSPARENT"
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID"
			
 
				+            ],
			
 
				+            "type": "ROW"
			
 
				+          },
			
 
				+          "ROW-eh0w37bWbR": {
			
 
				+            "children": [
			
 
				+              "CHART-g075mMgyYb",
			
 
				+              "CHART-n-zGGE6S1y",
			
 
				+              "CHART-6GdlekVise"
			
 
				+            ],
			
 
				+            "id": "ROW-eh0w37bWbR",
			
 
				+            "meta": {
			
 
				+              "background": "BACKGROUND_TRANSPARENT"
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID"
			
 
				+            ],
			
 
				+            "type": "ROW"
			
 
				+          },
			
 
				+          "ROW-kzWtcvo8R1": {
			
 
				+            "children": [
			
 
				+              "CHART-vJIPjmcbD3",
			
 
				+              "CHART-Jj9qh1ol-N",
			
 
				+              "CHART-ODvantb_bF"
			
 
				+            ],
			
 
				+            "id": "ROW-kzWtcvo8R1",
			
 
				+            "meta": {
			
 
				+              "background": "BACKGROUND_TRANSPARENT"
			
 
				+            },
			
 
				+            "parents": [
			
 
				+              "ROOT_ID",
			
 
				+              "GRID_ID"
			
 
				+            ],
			
 
				+            "type": "ROW"
			
 
				+          }
			
 
				+        }
			
 
				+        """  # pylint: enable=line-too-long
			
 
				+    )
			
 
				+    pos = json.loads(js)
			
 
				+    # dashboard v2 doesn't allow add markup slice
			
 
				+    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
			
 
				+    update_slice_ids(pos, dash.slices)
			
 
				+    dash.dashboard_title = "USA Births Names"
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slug = "births"
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/countries.md
+++ b/data/purposeCombined/BI/examples/countries.md
@@ -0,0 +1,373 @@
 
				+<!--
			
 
				+Licensed to the Apache Software Foundation (ASF) under one
			
 
				+or more contributor license agreements.  See the NOTICE file
			
 
				+distributed with this work for additional information
			
 
				+regarding copyright ownership.  The ASF licenses this file
			
 
				+to you under the Apache License, Version 2.0 (the
			
 
				+"License"); you may not use this file except in compliance
			
 
				+with the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+  http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing,
			
 
				+software distributed under the License is distributed on an
			
 
				+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+KIND, either express or implied.  See the License for the
			
 
				+specific language governing permissions and limitations
			
 
				+under the License.
			
 
				+-->
			
 
				+This data was downloaded from the
			
 
				+[World's Health Organization's website](https://datacatalog.worldbank.org/dataset/health-nutrition-and-population-statistics)
			
 
				+
			
 
				+Here's the script that was used to massage the data:
			
 
				+
			
 
				+    DIR = ""
			
 
				+    df_country = pd.read_csv(DIR + '/HNP_Country.csv')
			
 
				+    df_country.columns = ['country_code'] + list(df_country.columns[1:])
			
 
				+    df_country = df_country[['country_code', 'Region']]
			
 
				+    df_country.columns = ['country_code', 'region']
			
 
				+
			
 
				+    df = pd.read_csv(DIR + '/HNP_Data.csv')
			
 
				+    del df['Unnamed: 60']
			
 
				+    df.columns = ['country_name', 'country_code'] + list(df.columns[2:])
			
 
				+    ndf = df.merge(df_country, how='inner')
			
 
				+
			
 
				+    dims = ('country_name', 'country_code', 'region')
			
 
				+    vv = [str(i) for i in range(1960, 2015)]
			
 
				+    mdf = pd.melt(ndf, id_vars=dims + ('Indicator Code',), value_vars=vv)
			
 
				+    mdf['year'] = mdf.variable + '-01-01'
			
 
				+    dims = dims + ('year',)
			
 
				+
			
 
				+    pdf = mdf.pivot_table(values='value', columns='Indicator Code', index=dims)
			
 
				+    pdf = pdf.reset_index()
			
 
				+    pdf.to_csv(DIR + '/countries.csv')
			
 
				+    pdf.to_json(DIR + '/countries.json', orient='records')
			
 
				+
			
 
				+Here's the description of the metrics available:
			
 
				+
			
 
				+Series | Code Indicator Name
			
 
				+--- | ---
			
 
				+NY.GNP.PCAP.CD | GNI per capita, Atlas method (current US$)
			
 
				+SE.ADT.1524.LT.FM.ZS | Literacy rate, youth (ages 15-24), gender parity index (GPI)
			
 
				+SE.ADT.1524.LT.MA.ZS | Literacy rate, youth male (% of males ages 15-24)
			
 
				+SE.ADT.1524.LT.ZS | Literacy rate, youth total (% of people ages 15-24)
			
 
				+SE.ADT.LITR.FE.ZS | Literacy rate, adult female (% of females ages 15 and above)
			
 
				+SE.ADT.LITR.MA.ZS | Literacy rate, adult male (% of males ages 15 and above)
			
 
				+SE.ADT.LITR.ZS | Literacy rate, adult total (% of people ages 15 and above)
			
 
				+SE.ENR.ORPH | Ratio of school attendance of orphans to school attendance of non-orphans ages 10-14
			
 
				+SE.PRM.CMPT.FE.ZS | Primary completion rate, female (% of relevant age group)
			
 
				+SE.PRM.CMPT.MA.ZS | Primary completion rate, male (% of relevant age group)
			
 
				+SE.PRM.CMPT.ZS | Primary completion rate, total (% of relevant age group)
			
 
				+SE.PRM.ENRR | School enrollment, primary (% gross)
			
 
				+SE.PRM.ENRR.FE | School enrollment, primary, female (% gross)
			
 
				+SE.PRM.ENRR.MA | School enrollment, primary, male (% gross)
			
 
				+SE.PRM.NENR | School enrollment, primary (% net)
			
 
				+SE.PRM.NENR.FE | School enrollment, primary, female (% net)
			
 
				+SE.PRM.NENR.MA | School enrollment, primary, male (% net)
			
 
				+SE.SEC.ENRR | School enrollment, secondary (% gross)
			
 
				+SE.SEC.ENRR.FE | School enrollment, secondary, female (% gross)
			
 
				+SE.SEC.ENRR.MA | School enrollment, secondary, male (% gross)
			
 
				+SE.SEC.NENR | School enrollment, secondary (% net)
			
 
				+SE.SEC.NENR.FE | School enrollment, secondary, female (% net)
			
 
				+SE.SEC.NENR.MA | School enrollment, secondary, male (% net)
			
 
				+SE.TER.ENRR | School enrollment, tertiary (% gross)
			
 
				+SE.TER.ENRR.FE | School enrollment, tertiary, female (% gross)
			
 
				+SE.XPD.TOTL.GD.ZS | Government expenditure on education, total (% of GDP)
			
 
				+SH.ANM.CHLD.ZS | Prevalence of anemia among children (% of children under 5)
			
 
				+SH.ANM.NPRG.ZS | Prevalence of anemia among non-pregnant women (% of women ages 15-49)
			
 
				+SH.CON.1524.FE.ZS | Condom use, population ages 15-24, female (% of females ages 15-24)
			
 
				+SH.CON.1524.MA.ZS | Condom use, population ages 15-24, male (% of males ages 15-24)
			
 
				+SH.CON.AIDS.FE.ZS | Condom use at last high-risk sex, adult female (% ages 15-49)
			
 
				+SH.CON.AIDS.MA.ZS | Condom use at last high-risk sex, adult male (% ages 15-49)
			
 
				+SH.DTH.COMM.ZS | Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)
			
 
				+SH.DTH.IMRT | Number of infant deaths
			
 
				+SH.DTH.INJR.ZS | Cause of death, by injury (% of total)
			
 
				+SH.DTH.MORT | Number of under-five deaths
			
 
				+SH.DTH.NCOM.ZS | Cause of death, by non-communicable diseases (% of total)
			
 
				+SH.DTH.NMRT | Number of neonatal deaths
			
 
				+SH.DYN.AIDS | Adults (ages 15+) living with HIV
			
 
				+SH.DYN.AIDS.DH | AIDS estimated deaths (UNAIDS estimates)
			
 
				+SH.DYN.AIDS.FE.ZS | Women's share of population ages 15+ living with HIV (%)
			
 
				+SH.DYN.AIDS.ZS | Prevalence of HIV, total (% of population ages 15-49)
			
 
				+SH.DYN.MORT | Mortality rate, under-5 (per 1,000 live births)
			
 
				+SH.DYN.MORT.FE | Mortality rate, under-5, female (per 1,000 live births)
			
 
				+SH.DYN.MORT.MA | Mortality rate, under-5, male (per 1,000 live births)
			
 
				+SH.DYN.NMRT | Mortality rate, neonatal (per 1,000 live births)
			
 
				+SH.FPL.SATI.ZS | Met need for contraception (% of married women ages 15-49)
			
 
				+SH.H2O.SAFE.RU.ZS | Improved water source, rural (% of rural population with access)
			
 
				+SH.H2O.SAFE.UR.ZS | Improved water source, urban (% of urban population with access)
			
 
				+SH.H2O.SAFE.ZS | Improved water source (% of population with access)
			
 
				+SH.HIV.0014 | Children (0-14) living with HIV
			
 
				+SH.HIV.1524.FE.ZS | Prevalence of HIV, female (% ages 15-24)
			
 
				+SH.HIV.1524.KW.FE.ZS | Comprehensive correct knowledge of HIV/AIDS, ages 15-24, female (2 prevent ways and reject 3 misconceptions)
			
 
				+SH.HIV.1524.KW.MA.ZS | Comprehensive correct knowledge of HIV/AIDS, ages 15-24, male (2 prevent ways and reject 3 misconceptions)
			
 
				+SH.HIV.1524.MA.ZS | Prevalence of HIV, male (% ages 15-24)
			
 
				+SH.HIV.ARTC.ZS | Antiretroviral therapy coverage (% of people living with HIV)
			
 
				+SH.HIV.KNOW.FE.ZS | % of females ages 15-49 having comprehensive correct knowledge about HIV (2 prevent ways and reject 3 misconceptions)
			
 
				+SH.HIV.KNOW.MA.ZS | % of males ages 15-49 having comprehensive correct knowledge about HIV (2 prevent ways and reject 3 misconceptions)
			
 
				+SH.HIV.ORPH | Children orphaned by HIV/AIDS
			
 
				+SH.HIV.TOTL | Adults (ages 15+) and children (0-14 years) living with HIV
			
 
				+SH.IMM.HEPB | Immunization, HepB3 (% of one-year-old children)
			
 
				+SH.IMM.HIB3 | Immunization, Hib3 (% of children ages 12-23 months)
			
 
				+SH.IMM.IBCG | Immunization, BCG (% of one-year-old children)
			
 
				+SH.IMM.IDPT | Immunization, DPT (% of children ages 12-23 months)
			
 
				+SH.IMM.MEAS | Immunization, measles (% of children ages 12-23 months)
			
 
				+SH.IMM.POL3 | Immunization, Pol3 (% of one-year-old children)
			
 
				+SH.MED.BEDS.ZS | Hospital beds (per 1,000 people)
			
 
				+SH.MED.CMHW.P3 | Community health workers (per 1,000 people)
			
 
				+SH.MED.NUMW.P3 | Nurses and midwives (per 1,000 people)
			
 
				+SH.MED.PHYS.ZS | Physicians (per 1,000 people)
			
 
				+SH.MLR.NETS.ZS | Use of insecticide-treated bed nets (% of under-5 population)
			
 
				+SH.MLR.PREG.ZS | Use of any antimalarial drug (% of pregnant women)
			
 
				+SH.MLR.SPF2.ZS | Use of Intermittent Preventive Treatment of malaria, 2+ doses of SP/Fansidar (% of pregnant women)
			
 
				+SH.MLR.TRET.ZS | Children with fever receiving antimalarial drugs (% of children under age 5 with fever)
			
 
				+SH.MMR.DTHS | Number of maternal deaths
			
 
				+SH.MMR.LEVE | Number of weeks of maternity leave
			
 
				+SH.MMR.RISK | Lifetime risk of maternal death (1 in: rate varies by country)
			
 
				+SH.MMR.RISK.ZS | Lifetime risk of maternal death (%)
			
 
				+SH.MMR.WAGE.ZS | Maternal leave benefits (% of wages paid in covered period)
			
 
				+SH.PRG.ANEM | Prevalence of anemia among pregnant women (%)
			
 
				+SH.PRG.ARTC.ZS | Antiretroviral therapy coverage (% of pregnant women living with HIV)
			
 
				+SH.PRG.SYPH.ZS | Prevalence of syphilis (% of women attending antenatal care)
			
 
				+SH.PRV.SMOK.FE | Smoking prevalence, females (% of adults)
			
 
				+SH.PRV.SMOK.MA | Smoking prevalence, males (% of adults)
			
 
				+SH.STA.ACSN | Improved sanitation facilities (% of population with access)
			
 
				+SH.STA.ACSN.RU | Improved sanitation facilities, rural (% of rural population with access)
			
 
				+SH.STA.ACSN.UR | Improved sanitation facilities, urban (% of urban population with access)
			
 
				+SH.STA.ANV4.ZS | Pregnant women receiving prenatal care of at least four visits (% of pregnant women)
			
 
				+SH.STA.ANVC.ZS | Pregnant women receiving prenatal care (%)
			
 
				+SH.STA.ARIC.ZS | ARI treatment (% of children under 5 taken to a health provider)
			
 
				+SH.STA.BFED.ZS | Exclusive breastfeeding (% of children under 6 months)
			
 
				+SH.STA.BRTC.ZS | Births attended by skilled health staff (% of total)
			
 
				+SH.STA.BRTW.ZS | Low-birthweight babies (% of births)
			
 
				+SH.STA.DIAB.ZS | Diabetes prevalence (% of population ages 20 to 79)
			
 
				+SH.STA.IYCF.ZS | Infant and young child feeding practices, all 3 IYCF (% children ages 6-23 months)
			
 
				+SH.STA.MALN.FE.ZS | Prevalence of underweight, weight for age, female (% of children under 5)
			
 
				+SH.STA.MALN.MA.ZS | Prevalence of underweight, weight for age, male (% of children under 5)
			
 
				+SH.STA.MALN.ZS | Prevalence of underweight, weight for age (% of children under 5)
			
 
				+SH.STA.MALR | Malaria cases reported
			
 
				+SH.STA.MMRT | Maternal mortality ratio (modeled estimate, per 100,000 live births)
			
 
				+SH.STA.MMRT.NE | Maternal mortality ratio (national estimate, per 100,000 live births)
			
 
				+SH.STA.ORCF.ZS | Diarrhea treatment (% of children under 5 receiving oral rehydration and continued feeding)
			
 
				+SH.STA.ORTH | Diarrhea treatment (% of children under 5 who received ORS packet)
			
 
				+SH.STA.OW15.FE.ZS | Prevalence of overweight, female (% of female adults)
			
 
				+SH.STA.OW15.MA.ZS | Prevalence of overweight, male (% of male adults)
			
 
				+SH.STA.OW15.ZS | Prevalence of overweight (% of adults)
			
 
				+SH.STA.OWGH.FE.ZS | Prevalence of overweight, weight for height, female (% of children under 5)
			
 
				+SH.STA.OWGH.MA.ZS | Prevalence of overweight, weight for height, male (% of children under 5)
			
 
				+SH.STA.OWGH.ZS | Prevalence of overweight, weight for height (% of children under 5)
			
 
				+SH.STA.PNVC.ZS | Postnatal care coverage (% mothers)
			
 
				+SH.STA.STNT.FE.ZS | Prevalence of stunting, height for age, female (% of children under 5)
			
 
				+SH.STA.STNT.MA.ZS | Prevalence of stunting, height for age, male (% of children under 5)
			
 
				+SH.STA.STNT.ZS | Prevalence of stunting, height for age (% of children under 5)
			
 
				+SH.STA.WAST.FE.ZS | Prevalence of wasting, weight for height, female (% of children under 5)
			
 
				+SH.STA.WAST.MA.ZS | Prevalence of wasting, weight for height, male (% of children under 5)
			
 
				+SH.STA.WAST.ZS | Prevalence of wasting, weight for height (% of children under 5)
			
 
				+SH.SVR.WAST.FE.ZS | Prevalence of severe wasting, weight for height, female (% of children under 5)
			
 
				+SH.SVR.WAST.MA.ZS | Prevalence of severe wasting, weight for height, male (% of children under 5)
			
 
				+SH.SVR.WAST.ZS | Prevalence of severe wasting, weight for height (% of children under 5)
			
 
				+SH.TBS.CURE.ZS | Tuberculosis treatment success rate (% of new cases)
			
 
				+SH.TBS.DTEC.ZS | Tuberculosis case detection rate (%, all forms)
			
 
				+SH.TBS.INCD | Incidence of tuberculosis (per 100,000 people)
			
 
				+SH.TBS.MORT | Tuberculosis death rate (per 100,000 people)
			
 
				+SH.TBS.PREV | Prevalence of tuberculosis (per 100,000 population)
			
 
				+SH.VAC.TTNS.ZS | Newborns protected against tetanus (%)
			
 
				+SH.XPD.EXTR.ZS | External resources for health (% of total expenditure on health)
			
 
				+SH.XPD.OOPC.TO.ZS | Out-of-pocket health expenditure (% of total expenditure on health)
			
 
				+SH.XPD.OOPC.ZS | Out-of-pocket health expenditure (% of private expenditure on health)
			
 
				+SH.XPD.PCAP | Health expenditure per capita (current US$)
			
 
				+SH.XPD.PCAP.PP.KD | Health expenditure per capita, PPP (constant 2011 international $)
			
 
				+SH.XPD.PRIV | Health expenditure, private (% of total health expenditure)
			
 
				+SH.XPD.PRIV.ZS | Health expenditure, private (% of GDP)
			
 
				+SH.XPD.PUBL | Health expenditure, public (% of total health expenditure)
			
 
				+SH.XPD.PUBL.GX.ZS | Health expenditure, public (% of government expenditure)
			
 
				+SH.XPD.PUBL.ZS | Health expenditure, public (% of GDP)
			
 
				+SH.XPD.TOTL.CD | Health expenditure, total (current US$)
			
 
				+SH.XPD.TOTL.ZS | Health expenditure, total (% of GDP)
			
 
				+SI.POV.NAHC | Poverty headcount ratio at national poverty lines (% of population)
			
 
				+SI.POV.RUHC | Rural poverty headcount ratio at national poverty lines (% of rural population)
			
 
				+SI.POV.URHC | Urban poverty headcount ratio at national poverty lines (% of urban population)
			
 
				+SL.EMP.INSV.FE.ZS | Share of women in wage employment in the nonagricultural sector (% of total nonagricultural employment)
			
 
				+SL.TLF.TOTL.FE.ZS | Labor force, female (% of total labor force)
			
 
				+SL.TLF.TOTL.IN | Labor force, total
			
 
				+SL.UEM.TOTL.FE.ZS | Unemployment, female (% of female labor force) (modeled ILO estimate)
			
 
				+SL.UEM.TOTL.MA.ZS | Unemployment, male (% of male labor force) (modeled ILO estimate)
			
 
				+SL.UEM.TOTL.ZS | Unemployment, total (% of total labor force) (modeled ILO estimate)
			
 
				+SM.POP.NETM | Net migration
			
 
				+SN.ITK.DEFC | Number of people who are undernourished
			
 
				+SN.ITK.DEFC.ZS | Prevalence of undernourishment (% of population)
			
 
				+SN.ITK.SALT.ZS | Consumption of iodized salt (% of households)
			
 
				+SN.ITK.VITA.ZS | Vitamin A supplementation coverage rate (% of children ages 6-59 months)
			
 
				+SP.ADO.TFRT | Adolescent fertility rate (births per 1,000 women ages 15-19)
			
 
				+SP.DYN.AMRT.FE | Mortality rate, adult, female (per 1,000 female adults)
			
 
				+SP.DYN.AMRT.MA | Mortality rate, adult, male (per 1,000 male adults)
			
 
				+SP.DYN.CBRT.IN | Birth rate, crude (per 1,000 people)
			
 
				+SP.DYN.CDRT.IN | Death rate, crude (per 1,000 people)
			
 
				+SP.DYN.CONU.ZS | Contraceptive prevalence (% of women ages 15-49)
			
 
				+SP.DYN.IMRT.FE.IN | Mortality rate, infant, female (per 1,000 live births)
			
 
				+SP.DYN.IMRT.IN | Mortality rate, infant (per 1,000 live births)
			
 
				+SP.DYN.IMRT.MA.IN | Mortality rate, infant, male (per 1,000 live births)
			
 
				+SP.DYN.LE00.FE.IN | Life expectancy at birth, female (years)
			
 
				+SP.DYN.LE00.IN | Life expectancy at birth, total (years)
			
 
				+SP.DYN.LE00.MA.IN | Life expectancy at birth, male (years)
			
 
				+SP.DYN.SMAM.FE | Mean age at first marriage, female
			
 
				+SP.DYN.SMAM.MA | Mean age at first marriage, male
			
 
				+SP.DYN.TFRT.IN | Fertility rate, total (births per woman)
			
 
				+SP.DYN.TO65.FE.ZS | Survival to age 65, female (% of cohort)
			
 
				+SP.DYN.TO65.MA.ZS | Survival to age 65, male (% of cohort)
			
 
				+SP.DYN.WFRT | Wanted fertility rate (births per woman)
			
 
				+SP.HOU.FEMA.ZS | Female headed households (% of households with a female head)
			
 
				+SP.MTR.1519.ZS | Teenage mothers (% of women ages 15-19 who have had children or are currently pregnant)
			
 
				+SP.POP.0004.FE | Population ages 0-4, female
			
 
				+SP.POP.0004.FE.5Y | Population ages 0-4, female (% of female population)
			
 
				+SP.POP.0004.MA | Population ages 0-4, male
			
 
				+SP.POP.0004.MA.5Y | Population ages 0-4, male (% of male population)
			
 
				+SP.POP.0014.FE.ZS | Population ages 0-14, female (% of total)
			
 
				+SP.POP.0014.MA.ZS | Population ages 0-14, male (% of total)
			
 
				+SP.POP.0014.TO | Population ages 0-14, total
			
 
				+SP.POP.0014.TO.ZS | Population ages 0-14 (% of total)
			
 
				+SP.POP.0509.FE | Population ages 5-9, female
			
 
				+SP.POP.0509.FE.5Y | Population ages 5-9, female (% of female population)
			
 
				+SP.POP.0509.MA | Population ages 5-9, male
			
 
				+SP.POP.0509.MA.5Y | Population ages 5-9, male (% of male population)
			
 
				+SP.POP.1014.FE | Population ages 10-14, female
			
 
				+SP.POP.1014.FE.5Y | Population ages 10-14, female (% of female population)
			
 
				+SP.POP.1014.MA | Population ages 10-14, male
			
 
				+SP.POP.1014.MA.5Y | Population ages 10-14, male (% of male population)
			
 
				+SP.POP.1519.FE | Population ages 15-19, female
			
 
				+SP.POP.1519.FE.5Y | Population ages 15-19, female (% of female population)
			
 
				+SP.POP.1519.MA | Population ages 15-19, male
			
 
				+SP.POP.1519.MA.5Y | Population ages 15-19, male (% of male population)
			
 
				+SP.POP.1564.FE.ZS | Population ages 15-64, female (% of total)
			
 
				+SP.POP.1564.MA.ZS | Population ages 15-64, male (% of total)
			
 
				+SP.POP.1564.TO | Population ages 15-64, total
			
 
				+SP.POP.1564.TO.ZS | Population ages 15-64 (% of total)
			
 
				+SP.POP.2024.FE | Population ages 20-24, female
			
 
				+SP.POP.2024.FE.5Y | Population ages 20-24, female (% of female population)
			
 
				+SP.POP.2024.MA | Population ages 20-24, male
			
 
				+SP.POP.2024.MA.5Y | Population ages 20-24, male (% of male population)
			
 
				+SP.POP.2529.FE | Population ages 25-29, female
			
 
				+SP.POP.2529.FE.5Y | Population ages 25-29, female (% of female population)
			
 
				+SP.POP.2529.MA | Population ages 25-29, male
			
 
				+SP.POP.2529.MA.5Y | Population ages 25-29, male (% of male population)
			
 
				+SP.POP.3034.FE | Population ages 30-34, female
			
 
				+SP.POP.3034.FE.5Y | Population ages 30-34, female (% of female population)
			
 
				+SP.POP.3034.MA | Population ages 30-34, male
			
 
				+SP.POP.3034.MA.5Y | Population ages 30-34, male (% of male population)
			
 
				+SP.POP.3539.FE | Population ages 35-39, female
			
 
				+SP.POP.3539.FE.5Y | Population ages 35-39, female (% of female population)
			
 
				+SP.POP.3539.MA | Population ages 35-39, male
			
 
				+SP.POP.3539.MA.5Y | Population ages 35-39, male (% of male population)
			
 
				+SP.POP.4044.FE | Population ages 40-44, female
			
 
				+SP.POP.4044.FE.5Y | Population ages 40-44, female (% of female population)
			
 
				+SP.POP.4044.MA | Population ages 40-44, male
			
 
				+SP.POP.4044.MA.5Y | Population ages 40-44, male (% of male population)
			
 
				+SP.POP.4549.FE | Population ages 45-49, female
			
 
				+SP.POP.4549.FE.5Y | Population ages 45-49, female (% of female population)
			
 
				+SP.POP.4549.MA | Population ages 45-49, male
			
 
				+SP.POP.4549.MA.5Y | Population ages 45-49, male (% of male population)
			
 
				+SP.POP.5054.FE | Population ages 50-54, female
			
 
				+SP.POP.5054.FE.5Y | Population ages 50-54, female (% of female population)
			
 
				+SP.POP.5054.MA | Population ages 50-54, male
			
 
				+SP.POP.5054.MA.5Y | Population ages 50-54, male (% of male population)
			
 
				+SP.POP.5559.FE | Population ages 55-59, female
			
 
				+SP.POP.5559.FE.5Y | Population ages 55-59, female (% of female population)
			
 
				+SP.POP.5559.MA | Population ages 55-59, male
			
 
				+SP.POP.5559.MA.5Y | Population ages 55-59, male (% of male population)
			
 
				+SP.POP.6064.FE | Population ages 60-64, female
			
 
				+SP.POP.6064.FE.5Y | Population ages 60-64, female (% of female population)
			
 
				+SP.POP.6064.MA | Population ages 60-64, male
			
 
				+SP.POP.6064.MA.5Y | Population ages 60-64, male (% of male population)
			
 
				+SP.POP.6569.FE | Population ages 65-69, female
			
 
				+SP.POP.6569.FE.5Y | Population ages 65-69, female (% of female population)
			
 
				+SP.POP.6569.MA | Population ages 65-69, male
			
 
				+SP.POP.6569.MA.5Y | Population ages 65-69, male (% of male population)
			
 
				+SP.POP.65UP.FE.ZS | Population ages 65 and above, female (% of total)
			
 
				+SP.POP.65UP.MA.ZS | Population ages 65 and above, male (% of total)
			
 
				+SP.POP.65UP.TO | Population ages 65 and above, total
			
 
				+SP.POP.65UP.TO.ZS | Population ages 65 and above (% of total)
			
 
				+SP.POP.7074.FE | Population ages 70-74, female
			
 
				+SP.POP.7074.FE.5Y | Population ages 70-74, female (% of female population)
			
 
				+SP.POP.7074.MA | Population ages 70-74, male
			
 
				+SP.POP.7074.MA.5Y | Population ages 70-74, male (% of male population)
			
 
				+SP.POP.7579.FE | Population ages 75-79, female
			
 
				+SP.POP.7579.FE.5Y | Population ages 75-79, female (% of female population)
			
 
				+SP.POP.7579.MA | Population ages 75-79, male
			
 
				+SP.POP.7579.MA.5Y | Population ages 75-79, male (% of male population)
			
 
				+SP.POP.80UP.FE | Population ages 80 and above, female
			
 
				+SP.POP.80UP.FE.5Y | Population ages 80 and above, female (% of female population)
			
 
				+SP.POP.80UP.MA | Population ages 80 and above, male
			
 
				+SP.POP.80UP.MA.5Y | Population ages 80 and above, male (% of male population)
			
 
				+SP.POP.AG00.FE.IN | Age population, age 0, female, interpolated
			
 
				+SP.POP.AG00.MA.IN | Age population, age 0, male, interpolated
			
 
				+SP.POP.AG01.FE.IN | Age population, age 01, female, interpolated
			
 
				+SP.POP.AG01.MA.IN | Age population, age 01, male, interpolated
			
 
				+SP.POP.AG02.FE.IN | Age population, age 02, female, interpolated
			
 
				+SP.POP.AG02.MA.IN | Age population, age 02, male, interpolated
			
 
				+SP.POP.AG03.FE.IN | Age population, age 03, female, interpolated
			
 
				+SP.POP.AG03.MA.IN | Age population, age 03, male, interpolated
			
 
				+SP.POP.AG04.FE.IN | Age population, age 04, female, interpolated
			
 
				+SP.POP.AG04.MA.IN | Age population, age 04, male, interpolated
			
 
				+SP.POP.AG05.FE.IN | Age population, age 05, female, interpolated
			
 
				+SP.POP.AG05.MA.IN | Age population, age 05, male, interpolated
			
 
				+SP.POP.AG06.FE.IN | Age population, age 06, female, interpolated
			
 
				+SP.POP.AG06.MA.IN | Age population, age 06, male, interpolated
			
 
				+SP.POP.AG07.FE.IN | Age population, age 07, female, interpolated
			
 
				+SP.POP.AG07.MA.IN | Age population, age 07, male, interpolated
			
 
				+SP.POP.AG08.FE.IN | Age population, age 08, female, interpolated
			
 
				+SP.POP.AG08.MA.IN | Age population, age 08, male, interpolated
			
 
				+SP.POP.AG09.FE.IN | Age population, age 09, female, interpolated
			
 
				+SP.POP.AG09.MA.IN | Age population, age 09, male, interpolated
			
 
				+SP.POP.AG10.FE.IN | Age population, age 10, female, interpolated
			
 
				+SP.POP.AG10.MA.IN | Age population, age 10, male
			
 
				+SP.POP.AG11.FE.IN | Age population, age 11, female, interpolated
			
 
				+SP.POP.AG11.MA.IN | Age population, age 11, male
			
 
				+SP.POP.AG12.FE.IN | Age population, age 12, female, interpolated
			
 
				+SP.POP.AG12.MA.IN | Age population, age 12, male
			
 
				+SP.POP.AG13.FE.IN | Age population, age 13, female, interpolated
			
 
				+SP.POP.AG13.MA.IN | Age population, age 13, male
			
 
				+SP.POP.AG14.FE.IN | Age population, age 14, female, interpolated
			
 
				+SP.POP.AG14.MA.IN | Age population, age 14, male
			
 
				+SP.POP.AG15.FE.IN | Age population, age 15, female, interpolated
			
 
				+SP.POP.AG15.MA.IN | Age population, age 15, male, interpolated
			
 
				+SP.POP.AG16.FE.IN | Age population, age 16, female, interpolated
			
 
				+SP.POP.AG16.MA.IN | Age population, age 16, male, interpolated
			
 
				+SP.POP.AG17.FE.IN | Age population, age 17, female, interpolated
			
 
				+SP.POP.AG17.MA.IN | Age population, age 17, male, interpolated
			
 
				+SP.POP.AG18.FE.IN | Age population, age 18, female, interpolated
			
 
				+SP.POP.AG18.MA.IN | Age population, age 18, male, interpolated
			
 
				+SP.POP.AG19.FE.IN | Age population, age 19, female, interpolated
			
 
				+SP.POP.AG19.MA.IN | Age population, age 19, male, interpolated
			
 
				+SP.POP.AG20.FE.IN | Age population, age 20, female, interpolated
			
 
				+SP.POP.AG20.MA.IN | Age population, age 20, male, interpolated
			
 
				+SP.POP.AG21.FE.IN | Age population, age 21, female, interpolated
			
 
				+SP.POP.AG21.MA.IN | Age population, age 21, male, interpolated
			
 
				+SP.POP.AG22.FE.IN | Age population, age 22, female, interpolated
			
 
				+SP.POP.AG22.MA.IN | Age population, age 22, male, interpolated
			
 
				+SP.POP.AG23.FE.IN | Age population, age 23, female, interpolated
			
 
				+SP.POP.AG23.MA.IN | Age population, age 23, male, interpolated
			
 
				+SP.POP.AG24.FE.IN | Age population, age 24, female, interpolated
			
 
				+SP.POP.AG24.MA.IN | Age population, age 24, male, interpolated
			
 
				+SP.POP.AG25.FE.IN | Age population, age 25, female, interpolated
			
 
				+SP.POP.AG25.MA.IN | Age population, age 25, male, interpolated
			
 
				+SP.POP.BRTH.MF | Sex ratio at birth (male births per female births)
			
 
				+SP.POP.DPND | Age dependency ratio (% of working-age population)
			
 
				+SP.POP.DPND.OL | Age dependency ratio, old (% of working-age population)
			
 
				+SP.POP.DPND.YG | Age dependency ratio, young (% of working-age population)
			
 
				+SP.POP.GROW | Population growth (annual %)
			
 
				+SP.POP.TOTL | Population, total
			
 
				+SP.POP.TOTL.FE.IN | Population, female
			
 
				+SP.POP.TOTL.FE.ZS | Population, female (% of total)
			
 
				+SP.POP.TOTL.MA.IN | Population, male
			
 
				+SP.POP.TOTL.MA.ZS | Population, male (% of total)
			
 
				+SP.REG.BRTH.RU.ZS | Completeness of birth registration, rural (%)
			
 
				+SP.REG.BRTH.UR.ZS | Completeness of birth registration, urban (%)
			
 
				+SP.REG.BRTH.ZS | Completeness of birth registration (%)
			
 
				+SP.REG.DTHS.ZS | Completeness of death registration with cause-of-death information (%)
			
 
				+SP.RUR.TOTL | Rural population
			
 
				+SP.RUR.TOTL.ZG | Rural population growth (annual %)
			
 
				+SP.RUR.TOTL.ZS | Rural population (% of total population)
			
 
				+SP.URB.GROW | Urban population growth (annual %)
			
 
				+SP.URB.TOTL | Urban population
			
 
				+SP.URB.TOTL.IN.ZS | Urban population (% of total)
			
 
				+SP.UWT.TFRT | Unmet need for contraception (% of married women ages 15-49)
			
--- a/data/purposeCombined/BI/examples/countries.py
+++ b/data/purposeCombined/BI/examples/countries.py
@@ -0,0 +1,2505 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""This module contains data related to countries and is used for geo mapping"""
			
 
				+from typing import Any, Dict, List, Optional
			
 
				+
			
 
				+countries: List[Dict[str, Any]] = [
			
 
				+    {
			
 
				+        "name": "Angola",
			
 
				+        "area": 1246700,
			
 
				+        "cioc": "ANG",
			
 
				+        "cca2": "AO",
			
 
				+        "capital": "Luanda",
			
 
				+        "lat": -12.5,
			
 
				+        "lng": 18.5,
			
 
				+        "cca3": "AGO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Algeria",
			
 
				+        "area": 2381741,
			
 
				+        "cioc": "ALG",
			
 
				+        "cca2": "DZ",
			
 
				+        "capital": "Algiers",
			
 
				+        "lat": 28,
			
 
				+        "lng": 3,
			
 
				+        "cca3": "DZA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Egypt",
			
 
				+        "area": 1002450,
			
 
				+        "cioc": "EGY",
			
 
				+        "cca2": "EG",
			
 
				+        "capital": "Cairo",
			
 
				+        "lat": 27,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "EGY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bangladesh",
			
 
				+        "area": 147570,
			
 
				+        "cioc": "BAN",
			
 
				+        "cca2": "BD",
			
 
				+        "capital": "Dhaka",
			
 
				+        "lat": 24,
			
 
				+        "lng": 90,
			
 
				+        "cca3": "BGD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Niger",
			
 
				+        "area": 1267000,
			
 
				+        "cioc": "NIG",
			
 
				+        "cca2": "NE",
			
 
				+        "capital": "Niamey",
			
 
				+        "lat": 16,
			
 
				+        "lng": 8,
			
 
				+        "cca3": "NER",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Liechtenstein",
			
 
				+        "area": 160,
			
 
				+        "cioc": "LIE",
			
 
				+        "cca2": "LI",
			
 
				+        "capital": "Vaduz",
			
 
				+        "lat": 47.26666666,
			
 
				+        "lng": 9.53333333,
			
 
				+        "cca3": "LIE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Namibia",
			
 
				+        "area": 825615,
			
 
				+        "cioc": "NAM",
			
 
				+        "cca2": "NA",
			
 
				+        "capital": "Windhoek",
			
 
				+        "lat": -22,
			
 
				+        "lng": 17,
			
 
				+        "cca3": "NAM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bulgaria",
			
 
				+        "area": 110879,
			
 
				+        "cioc": "BUL",
			
 
				+        "cca2": "BG",
			
 
				+        "capital": "Sofia",
			
 
				+        "lat": 43,
			
 
				+        "lng": 25,
			
 
				+        "cca3": "BGR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bolivia",
			
 
				+        "area": 1098581,
			
 
				+        "cioc": "BOL",
			
 
				+        "cca2": "BO",
			
 
				+        "capital": "Sucre",
			
 
				+        "lat": -17,
			
 
				+        "lng": -65,
			
 
				+        "cca3": "BOL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Ghana",
			
 
				+        "area": 238533,
			
 
				+        "cioc": "GHA",
			
 
				+        "cca2": "GH",
			
 
				+        "capital": "Accra",
			
 
				+        "lat": 8,
			
 
				+        "lng": -2,
			
 
				+        "cca3": "GHA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cocos (Keeling) Islands",
			
 
				+        "area": 14,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "CC",
			
 
				+        "capital": "West Island",
			
 
				+        "lat": -12.5,
			
 
				+        "lng": 96.83333333,
			
 
				+        "cca3": "CCK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Pakistan",
			
 
				+        "area": 881912,
			
 
				+        "cioc": "PAK",
			
 
				+        "cca2": "PK",
			
 
				+        "capital": "Islamabad",
			
 
				+        "lat": 30,
			
 
				+        "lng": 70,
			
 
				+        "cca3": "PAK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cape Verde",
			
 
				+        "area": 4033,
			
 
				+        "cioc": "CPV",
			
 
				+        "cca2": "CV",
			
 
				+        "capital": "Praia",
			
 
				+        "lat": 16,
			
 
				+        "lng": -24,
			
 
				+        "cca3": "CPV",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Jordan",
			
 
				+        "area": 89342,
			
 
				+        "cioc": "JOR",
			
 
				+        "cca2": "JO",
			
 
				+        "capital": "Amman",
			
 
				+        "lat": 31,
			
 
				+        "lng": 36,
			
 
				+        "cca3": "JOR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Liberia",
			
 
				+        "area": 111369,
			
 
				+        "cioc": "LBR",
			
 
				+        "cca2": "LR",
			
 
				+        "capital": "Monrovia",
			
 
				+        "lat": 6.5,
			
 
				+        "lng": -9.5,
			
 
				+        "cca3": "LBR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Libya",
			
 
				+        "area": 1759540,
			
 
				+        "cioc": "LBA",
			
 
				+        "cca2": "LY",
			
 
				+        "capital": "Tripoli",
			
 
				+        "lat": 25,
			
 
				+        "lng": 17,
			
 
				+        "cca3": "LBY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Malaysia",
			
 
				+        "area": 330803,
			
 
				+        "cioc": "MAS",
			
 
				+        "cca2": "MY",
			
 
				+        "capital": "Kuala Lumpur",
			
 
				+        "lat": 2.5,
			
 
				+        "lng": 112.5,
			
 
				+        "cca3": "MYS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Dominican Republic",
			
 
				+        "area": 48671,
			
 
				+        "cioc": "DOM",
			
 
				+        "cca2": "DO",
			
 
				+        "capital": "Santo Domingo",
			
 
				+        "lat": 19,
			
 
				+        "lng": -70.66666666,
			
 
				+        "cca3": "DOM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Puerto Rico",
			
 
				+        "area": 8870,
			
 
				+        "cioc": "PUR",
			
 
				+        "cca2": "PR",
			
 
				+        "capital": "San Juan",
			
 
				+        "lat": 18.25,
			
 
				+        "lng": -66.5,
			
 
				+        "cca3": "PRI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mayotte",
			
 
				+        "area": 374,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "YT",
			
 
				+        "capital": "Mamoudzou",
			
 
				+        "lat": -12.83333333,
			
 
				+        "lng": 45.16666666,
			
 
				+        "cca3": "MYT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "North Korea",
			
 
				+        "area": 120538,
			
 
				+        "cioc": "PRK",
			
 
				+        "cca2": "KP",
			
 
				+        "capital": "Pyongyang",
			
 
				+        "lat": 40,
			
 
				+        "lng": 127,
			
 
				+        "cca3": "PRK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Palestine",
			
 
				+        "area": 6220,
			
 
				+        "cioc": "PLE",
			
 
				+        "cca2": "PS",
			
 
				+        "capital": "Ramallah",
			
 
				+        "lat": 31.9,
			
 
				+        "lng": 35.2,
			
 
				+        "cca3": "PSE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Tanzania",
			
 
				+        "area": 945087,
			
 
				+        "cioc": "TAN",
			
 
				+        "cca2": "TZ",
			
 
				+        "capital": "Dodoma",
			
 
				+        "lat": -6,
			
 
				+        "lng": 35,
			
 
				+        "cca3": "TZA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Botswana",
			
 
				+        "area": 582000,
			
 
				+        "cioc": "BOT",
			
 
				+        "cca2": "BW",
			
 
				+        "capital": "Gaborone",
			
 
				+        "lat": -22,
			
 
				+        "lng": 24,
			
 
				+        "cca3": "BWA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cambodia",
			
 
				+        "area": 181035,
			
 
				+        "cioc": "CAM",
			
 
				+        "cca2": "KH",
			
 
				+        "capital": "Phnom Penh",
			
 
				+        "lat": 13,
			
 
				+        "lng": 105,
			
 
				+        "cca3": "KHM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Nicaragua",
			
 
				+        "area": 130373,
			
 
				+        "cioc": "NCA",
			
 
				+        "cca2": "NI",
			
 
				+        "capital": "Managua",
			
 
				+        "lat": 13,
			
 
				+        "lng": -85,
			
 
				+        "cca3": "NIC",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Trinidad and Tobago",
			
 
				+        "area": 5130,
			
 
				+        "cioc": "TTO",
			
 
				+        "cca2": "TT",
			
 
				+        "capital": "Port of Spain",
			
 
				+        "lat": 11,
			
 
				+        "lng": -61,
			
 
				+        "cca3": "TTO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Ethiopia",
			
 
				+        "area": 1104300,
			
 
				+        "cioc": "ETH",
			
 
				+        "cca2": "ET",
			
 
				+        "capital": "Addis Ababa",
			
 
				+        "lat": 8,
			
 
				+        "lng": 38,
			
 
				+        "cca3": "ETH",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Paraguay",
			
 
				+        "area": 406752,
			
 
				+        "cioc": "PAR",
			
 
				+        "cca2": "PY",
			
 
				+        "capital": "Asuncion",
			
 
				+        "lat": -23,
			
 
				+        "lng": -58,
			
 
				+        "cca3": "PRY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Hong Kong",
			
 
				+        "area": 1104,
			
 
				+        "cioc": "HKG",
			
 
				+        "cca2": "HK",
			
 
				+        "capital": "City of Victoria",
			
 
				+        "lat": 22.267,
			
 
				+        "lng": 114.188,
			
 
				+        "cca3": "HKG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saudi Arabia",
			
 
				+        "area": 2149690,
			
 
				+        "cioc": "KSA",
			
 
				+        "cca2": "SA",
			
 
				+        "capital": "Riyadh",
			
 
				+        "lat": 25,
			
 
				+        "lng": 45,
			
 
				+        "cca3": "SAU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Lebanon",
			
 
				+        "area": 10452,
			
 
				+        "cioc": "LIB",
			
 
				+        "cca2": "LB",
			
 
				+        "capital": "Beirut",
			
 
				+        "lat": 33.83333333,
			
 
				+        "lng": 35.83333333,
			
 
				+        "cca3": "LBN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Slovenia",
			
 
				+        "area": 20273,
			
 
				+        "cioc": "SLO",
			
 
				+        "cca2": "SI",
			
 
				+        "capital": "Ljubljana",
			
 
				+        "lat": 46.11666666,
			
 
				+        "lng": 14.81666666,
			
 
				+        "cca3": "SVN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Burkina Faso",
			
 
				+        "area": 272967,
			
 
				+        "cioc": "BUR",
			
 
				+        "cca2": "BF",
			
 
				+        "capital": "Ouagadougou",
			
 
				+        "lat": 13,
			
 
				+        "lng": -2,
			
 
				+        "cca3": "BFA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Switzerland",
			
 
				+        "area": 41284,
			
 
				+        "cioc": "SUI",
			
 
				+        "cca2": "CH",
			
 
				+        "capital": "Bern",
			
 
				+        "lat": 47,
			
 
				+        "lng": 8,
			
 
				+        "cca3": "CHE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mauritania",
			
 
				+        "area": 1030700,
			
 
				+        "cioc": "MTN",
			
 
				+        "cca2": "MR",
			
 
				+        "capital": "Nouakchott",
			
 
				+        "lat": 20,
			
 
				+        "lng": -12,
			
 
				+        "cca3": "MRT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Croatia",
			
 
				+        "area": 56594,
			
 
				+        "cioc": "CRO",
			
 
				+        "cca2": "HR",
			
 
				+        "capital": "Zagreb",
			
 
				+        "lat": 45.16666666,
			
 
				+        "lng": 15.5,
			
 
				+        "cca3": "HRV",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Chile",
			
 
				+        "area": 756102,
			
 
				+        "cioc": "CHI",
			
 
				+        "cca2": "CL",
			
 
				+        "capital": "Santiago",
			
 
				+        "lat": -30,
			
 
				+        "lng": -71,
			
 
				+        "cca3": "CHL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "China",
			
 
				+        "area": 9706961,
			
 
				+        "cioc": "CHN",
			
 
				+        "cca2": "CN",
			
 
				+        "capital": "Beijing",
			
 
				+        "lat": 35,
			
 
				+        "lng": 105,
			
 
				+        "cca3": "CHN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saint Kitts and Nevis",
			
 
				+        "area": 261,
			
 
				+        "cioc": "SKN",
			
 
				+        "cca2": "KN",
			
 
				+        "capital": "Basseterre",
			
 
				+        "lat": 17.33333333,
			
 
				+        "lng": -62.75,
			
 
				+        "cca3": "KNA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Sierra Leone",
			
 
				+        "area": 71740,
			
 
				+        "cioc": "SLE",
			
 
				+        "cca2": "SL",
			
 
				+        "capital": "Freetown",
			
 
				+        "lat": 8.5,
			
 
				+        "lng": -11.5,
			
 
				+        "cca3": "SLE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Jamaica",
			
 
				+        "area": 10991,
			
 
				+        "cioc": "JAM",
			
 
				+        "cca2": "JM",
			
 
				+        "capital": "Kingston",
			
 
				+        "lat": 18.25,
			
 
				+        "lng": -77.5,
			
 
				+        "cca3": "JAM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "San Marino",
			
 
				+        "area": 61,
			
 
				+        "cioc": "SMR",
			
 
				+        "cca2": "SM",
			
 
				+        "capital": "City of San Marino",
			
 
				+        "lat": 43.76666666,
			
 
				+        "lng": 12.41666666,
			
 
				+        "cca3": "SMR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Gibraltar",
			
 
				+        "area": 6,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "GI",
			
 
				+        "capital": "Gibraltar",
			
 
				+        "lat": 36.13333333,
			
 
				+        "lng": -5.35,
			
 
				+        "cca3": "GIB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Djibouti",
			
 
				+        "area": 23200,
			
 
				+        "cioc": "DJI",
			
 
				+        "cca2": "DJ",
			
 
				+        "capital": "Djibouti",
			
 
				+        "lat": 11.5,
			
 
				+        "lng": 43,
			
 
				+        "cca3": "DJI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guinea",
			
 
				+        "area": 245857,
			
 
				+        "cioc": "GUI",
			
 
				+        "cca2": "GN",
			
 
				+        "capital": "Conakry",
			
 
				+        "lat": 11,
			
 
				+        "lng": -10,
			
 
				+        "cca3": "GIN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Finland",
			
 
				+        "area": 338424,
			
 
				+        "cioc": "FIN",
			
 
				+        "cca2": "FI",
			
 
				+        "capital": "Helsinki",
			
 
				+        "lat": 64,
			
 
				+        "lng": 26,
			
 
				+        "cca3": "FIN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Uruguay",
			
 
				+        "area": 181034,
			
 
				+        "cioc": "URU",
			
 
				+        "cca2": "UY",
			
 
				+        "capital": "Montevideo",
			
 
				+        "lat": -33,
			
 
				+        "lng": -56,
			
 
				+        "cca3": "URY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Thailand",
			
 
				+        "area": 513120,
			
 
				+        "cioc": "THA",
			
 
				+        "cca2": "TH",
			
 
				+        "capital": "Bangkok",
			
 
				+        "lat": 15,
			
 
				+        "lng": 100,
			
 
				+        "cca3": "THA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Sao Tome and Principe",
			
 
				+        "area": 964,
			
 
				+        "cioc": "STP",
			
 
				+        "cca2": "ST",
			
 
				+        "capital": "Sao Tome",
			
 
				+        "lat": 1,
			
 
				+        "lng": 7,
			
 
				+        "cca3": "STP",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Seychelles",
			
 
				+        "area": 452,
			
 
				+        "cioc": "SEY",
			
 
				+        "cca2": "SC",
			
 
				+        "capital": "Victoria",
			
 
				+        "lat": -4.58333333,
			
 
				+        "lng": 55.66666666,
			
 
				+        "cca3": "SYC",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Nepal",
			
 
				+        "area": 147181,
			
 
				+        "cioc": "NEP",
			
 
				+        "cca2": "NP",
			
 
				+        "capital": "Kathmandu",
			
 
				+        "lat": 28,
			
 
				+        "lng": 84,
			
 
				+        "cca3": "NPL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Christmas Island",
			
 
				+        "area": 135,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "CX",
			
 
				+        "capital": "Flying Fish Cove",
			
 
				+        "lat": -10.5,
			
 
				+        "lng": 105.66666666,
			
 
				+        "cca3": "CXR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Laos",
			
 
				+        "area": 236800,
			
 
				+        "cioc": "LAO",
			
 
				+        "cca2": "LA",
			
 
				+        "capital": "Vientiane",
			
 
				+        "lat": 18,
			
 
				+        "lng": 105,
			
 
				+        "cca3": "LAO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Yemen",
			
 
				+        "area": 527968,
			
 
				+        "cioc": "YEM",
			
 
				+        "cca2": "YE",
			
 
				+        "capital": "Sana'a",
			
 
				+        "lat": 15,
			
 
				+        "lng": 48,
			
 
				+        "cca3": "YEM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bouvet Island",
			
 
				+        "area": 49,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "BV",
			
 
				+        "capital": "",
			
 
				+        "lat": -54.43333333,
			
 
				+        "lng": 3.4,
			
 
				+        "cca3": "BVT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "South Africa",
			
 
				+        "area": 1221037,
			
 
				+        "cioc": "RSA",
			
 
				+        "cca2": "ZA",
			
 
				+        "capital": "Pretoria",
			
 
				+        "lat": -29,
			
 
				+        "lng": 24,
			
 
				+        "cca3": "ZAF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Kiribati",
			
 
				+        "area": 811,
			
 
				+        "cioc": "KIR",
			
 
				+        "cca2": "KI",
			
 
				+        "capital": "South Tarawa",
			
 
				+        "lat": 1.41666666,
			
 
				+        "lng": 173,
			
 
				+        "cca3": "KIR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Philippines",
			
 
				+        "area": 342353,
			
 
				+        "cioc": "PHI",
			
 
				+        "cca2": "PH",
			
 
				+        "capital": "Manila",
			
 
				+        "lat": 13,
			
 
				+        "lng": 122,
			
 
				+        "cca3": "PHL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Sint Maarten",
			
 
				+        "area": 34,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "SX",
			
 
				+        "capital": "Philipsburg",
			
 
				+        "lat": 18.033333,
			
 
				+        "lng": -63.05,
			
 
				+        "cca3": "SXM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Romania",
			
 
				+        "area": 238391,
			
 
				+        "cioc": "ROU",
			
 
				+        "cca2": "RO",
			
 
				+        "capital": "Bucharest",
			
 
				+        "lat": 46,
			
 
				+        "lng": 25,
			
 
				+        "cca3": "ROU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "United States Virgin Islands",
			
 
				+        "area": 347,
			
 
				+        "cioc": "ISV",
			
 
				+        "cca2": "VI",
			
 
				+        "capital": "Charlotte Amalie",
			
 
				+        "lat": 18.35,
			
 
				+        "lng": -64.933333,
			
 
				+        "cca3": "VIR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Syria",
			
 
				+        "area": 185180,
			
 
				+        "cioc": "SYR",
			
 
				+        "cca2": "SY",
			
 
				+        "capital": "Damascus",
			
 
				+        "lat": 35,
			
 
				+        "lng": 38,
			
 
				+        "cca3": "SYR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Macau",
			
 
				+        "area": 30,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "MO",
			
 
				+        "capital": "",
			
 
				+        "lat": 22.16666666,
			
 
				+        "lng": 113.55,
			
 
				+        "cca3": "MAC",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saint Martin",
			
 
				+        "area": 53,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "MF",
			
 
				+        "capital": "Marigot",
			
 
				+        "lat": 18.08333333,
			
 
				+        "lng": -63.95,
			
 
				+        "cca3": "MAF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Malta",
			
 
				+        "area": 316,
			
 
				+        "cioc": "MLT",
			
 
				+        "cca2": "MT",
			
 
				+        "capital": "Valletta",
			
 
				+        "lat": 35.83333333,
			
 
				+        "lng": 14.58333333,
			
 
				+        "cca3": "MLT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Kazakhstan",
			
 
				+        "area": 2724900,
			
 
				+        "cioc": "KAZ",
			
 
				+        "cca2": "KZ",
			
 
				+        "capital": "Astana",
			
 
				+        "lat": 48,
			
 
				+        "lng": 68,
			
 
				+        "cca3": "KAZ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Turks and Caicos Islands",
			
 
				+        "area": 948,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "TC",
			
 
				+        "capital": "Cockburn Town",
			
 
				+        "lat": 21.75,
			
 
				+        "lng": -71.58333333,
			
 
				+        "cca3": "TCA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "French Polynesia",
			
 
				+        "area": 4167,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "PF",
			
 
				+        "capital": "Papeete",
			
 
				+        "lat": -15,
			
 
				+        "lng": -140,
			
 
				+        "cca3": "PYF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Niue",
			
 
				+        "area": 260,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "NU",
			
 
				+        "capital": "Alofi",
			
 
				+        "lat": -19.03333333,
			
 
				+        "lng": -169.86666666,
			
 
				+        "cca3": "NIU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Dominica",
			
 
				+        "area": 751,
			
 
				+        "cioc": "DMA",
			
 
				+        "cca2": "DM",
			
 
				+        "capital": "Roseau",
			
 
				+        "lat": 15.41666666,
			
 
				+        "lng": -61.33333333,
			
 
				+        "cca3": "DMA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Benin",
			
 
				+        "area": 112622,
			
 
				+        "cioc": "BEN",
			
 
				+        "cca2": "BJ",
			
 
				+        "capital": "Porto-Novo",
			
 
				+        "lat": 9.5,
			
 
				+        "lng": 2.25,
			
 
				+        "cca3": "BEN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "French Guiana",
			
 
				+        "area": 83534,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "GF",
			
 
				+        "capital": "Cayenne",
			
 
				+        "lat": 4,
			
 
				+        "lng": -53,
			
 
				+        "cca3": "GUF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Belgium",
			
 
				+        "area": 30528,
			
 
				+        "cioc": "BEL",
			
 
				+        "cca2": "BE",
			
 
				+        "capital": "Brussels",
			
 
				+        "lat": 50.83333333,
			
 
				+        "lng": 4,
			
 
				+        "cca3": "BEL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Montserrat",
			
 
				+        "area": 102,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "MS",
			
 
				+        "capital": "Plymouth",
			
 
				+        "lat": 16.75,
			
 
				+        "lng": -62.2,
			
 
				+        "cca3": "MSR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Togo",
			
 
				+        "area": 56785,
			
 
				+        "cioc": "TOG",
			
 
				+        "cca2": "TG",
			
 
				+        "capital": "Lome",
			
 
				+        "lat": 8,
			
 
				+        "lng": 1.16666666,
			
 
				+        "cca3": "TGO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Germany",
			
 
				+        "area": 357114,
			
 
				+        "cioc": "GER",
			
 
				+        "cca2": "DE",
			
 
				+        "capital": "Berlin",
			
 
				+        "lat": 51,
			
 
				+        "lng": 9,
			
 
				+        "cca3": "DEU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guam",
			
 
				+        "area": 549,
			
 
				+        "cioc": "GUM",
			
 
				+        "cca2": "GU",
			
 
				+        "capital": "Hagatna",
			
 
				+        "lat": 13.46666666,
			
 
				+        "lng": 144.78333333,
			
 
				+        "cca3": "GUM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Sri Lanka",
			
 
				+        "area": 65610,
			
 
				+        "cioc": "SRI",
			
 
				+        "cca2": "LK",
			
 
				+        "capital": "Colombo",
			
 
				+        "lat": 7,
			
 
				+        "lng": 81,
			
 
				+        "cca3": "LKA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "South Sudan",
			
 
				+        "area": 619745,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "SS",
			
 
				+        "capital": "Juba",
			
 
				+        "lat": 7,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "SSD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Falkland Islands",
			
 
				+        "area": 12173,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "FK",
			
 
				+        "capital": "Stanley",
			
 
				+        "lat": -51.75,
			
 
				+        "lng": -59,
			
 
				+        "cca3": "FLK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "United Kingdom",
			
 
				+        "area": 242900,
			
 
				+        "cioc": "GBR",
			
 
				+        "cca2": "GB",
			
 
				+        "capital": "London",
			
 
				+        "lat": 54,
			
 
				+        "lng": -2,
			
 
				+        "cca3": "GBR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guyana",
			
 
				+        "area": 214969,
			
 
				+        "cioc": "GUY",
			
 
				+        "cca2": "GY",
			
 
				+        "capital": "Georgetown",
			
 
				+        "lat": 5,
			
 
				+        "lng": -59,
			
 
				+        "cca3": "GUY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Costa Rica",
			
 
				+        "area": 51100,
			
 
				+        "cioc": "CRC",
			
 
				+        "cca2": "CR",
			
 
				+        "capital": "San Jose",
			
 
				+        "lat": 10,
			
 
				+        "lng": -84,
			
 
				+        "cca3": "CRI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cameroon",
			
 
				+        "area": 475442,
			
 
				+        "cioc": "CMR",
			
 
				+        "cca2": "CM",
			
 
				+        "capital": "Yaounde",
			
 
				+        "lat": 6,
			
 
				+        "lng": 12,
			
 
				+        "cca3": "CMR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Morocco",
			
 
				+        "area": 446550,
			
 
				+        "cioc": "MAR",
			
 
				+        "cca2": "MA",
			
 
				+        "capital": "Rabat",
			
 
				+        "lat": 32,
			
 
				+        "lng": -5,
			
 
				+        "cca3": "MAR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Northern Mariana Islands",
			
 
				+        "area": 464,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "MP",
			
 
				+        "capital": "Saipan",
			
 
				+        "lat": 15.2,
			
 
				+        "lng": 145.75,
			
 
				+        "cca3": "MNP",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Lesotho",
			
 
				+        "area": 30355,
			
 
				+        "cioc": "LES",
			
 
				+        "cca2": "LS",
			
 
				+        "capital": "Maseru",
			
 
				+        "lat": -29.5,
			
 
				+        "lng": 28.5,
			
 
				+        "cca3": "LSO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Hungary",
			
 
				+        "area": 93028,
			
 
				+        "cioc": "HUN",
			
 
				+        "cca2": "HU",
			
 
				+        "capital": "Budapest",
			
 
				+        "lat": 47,
			
 
				+        "lng": 20,
			
 
				+        "cca3": "HUN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Turkmenistan",
			
 
				+        "area": 488100,
			
 
				+        "cioc": "TKM",
			
 
				+        "cca2": "TM",
			
 
				+        "capital": "Ashgabat",
			
 
				+        "lat": 40,
			
 
				+        "lng": 60,
			
 
				+        "cca3": "TKM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Suriname",
			
 
				+        "area": 163820,
			
 
				+        "cioc": "SUR",
			
 
				+        "cca2": "SR",
			
 
				+        "capital": "Paramaribo",
			
 
				+        "lat": 4,
			
 
				+        "lng": -56,
			
 
				+        "cca3": "SUR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Netherlands",
			
 
				+        "area": 41850,
			
 
				+        "cioc": "NED",
			
 
				+        "cca2": "NL",
			
 
				+        "capital": "Amsterdam",
			
 
				+        "lat": 52.5,
			
 
				+        "lng": 5.75,
			
 
				+        "cca3": "NLD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bermuda",
			
 
				+        "area": 54,
			
 
				+        "cioc": "BER",
			
 
				+        "cca2": "BM",
			
 
				+        "capital": "Hamilton",
			
 
				+        "lat": 32.33333333,
			
 
				+        "lng": -64.75,
			
 
				+        "cca3": "BMU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Heard Island and McDonald Islands",
			
 
				+        "area": 412,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "HM",
			
 
				+        "capital": "",
			
 
				+        "lat": -53.1,
			
 
				+        "lng": 72.51666666,
			
 
				+        "cca3": "HMD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Chad",
			
 
				+        "area": 1284000,
			
 
				+        "cioc": "CHA",
			
 
				+        "cca2": "TD",
			
 
				+        "capital": "N'Djamena",
			
 
				+        "lat": 15,
			
 
				+        "lng": 19,
			
 
				+        "cca3": "TCD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Georgia",
			
 
				+        "area": 69700,
			
 
				+        "cioc": "GEO",
			
 
				+        "cca2": "GE",
			
 
				+        "capital": "Tbilisi",
			
 
				+        "lat": 42,
			
 
				+        "lng": 43.5,
			
 
				+        "cca3": "GEO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Montenegro",
			
 
				+        "area": 13812,
			
 
				+        "cioc": "MNE",
			
 
				+        "cca2": "ME",
			
 
				+        "capital": "Podgorica",
			
 
				+        "lat": 42.5,
			
 
				+        "lng": 19.3,
			
 
				+        "cca3": "MNE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mongolia",
			
 
				+        "area": 1564110,
			
 
				+        "cioc": "MGL",
			
 
				+        "cca2": "MN",
			
 
				+        "capital": "Ulan Bator",
			
 
				+        "lat": 46,
			
 
				+        "lng": 105,
			
 
				+        "cca3": "MNG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Marshall Islands",
			
 
				+        "area": 181,
			
 
				+        "cioc": "MHL",
			
 
				+        "cca2": "MH",
			
 
				+        "capital": "Majuro",
			
 
				+        "lat": 9,
			
 
				+        "lng": 168,
			
 
				+        "cca3": "MHL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Martinique",
			
 
				+        "area": 1128,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "MQ",
			
 
				+        "capital": "Fort-de-France",
			
 
				+        "lat": 14.666667,
			
 
				+        "lng": -61,
			
 
				+        "cca3": "MTQ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Belize",
			
 
				+        "area": 22966,
			
 
				+        "cioc": "BIZ",
			
 
				+        "cca2": "BZ",
			
 
				+        "capital": "Belmopan",
			
 
				+        "lat": 17.25,
			
 
				+        "lng": -88.75,
			
 
				+        "cca3": "BLZ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Norfolk Island",
			
 
				+        "area": 36,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "NF",
			
 
				+        "capital": "Kingston",
			
 
				+        "lat": -29.03333333,
			
 
				+        "lng": 167.95,
			
 
				+        "cca3": "NFK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Myanmar",
			
 
				+        "area": 676578,
			
 
				+        "cioc": "MYA",
			
 
				+        "cca2": "MM",
			
 
				+        "capital": "Naypyidaw",
			
 
				+        "lat": 22,
			
 
				+        "lng": 98,
			
 
				+        "cca3": "MMR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Afghanistan",
			
 
				+        "area": 652230,
			
 
				+        "cioc": "AFG",
			
 
				+        "cca2": "AF",
			
 
				+        "capital": "Kabul",
			
 
				+        "lat": 33,
			
 
				+        "lng": 65,
			
 
				+        "cca3": "AFG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Burundi",
			
 
				+        "area": 27834,
			
 
				+        "cioc": "BDI",
			
 
				+        "cca2": "BI",
			
 
				+        "capital": "Bujumbura",
			
 
				+        "lat": -3.5,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "BDI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "British Virgin Islands",
			
 
				+        "area": 151,
			
 
				+        "cioc": "IVB",
			
 
				+        "cca2": "VG",
			
 
				+        "capital": "Road Town",
			
 
				+        "lat": 18.431383,
			
 
				+        "lng": -64.62305,
			
 
				+        "cca3": "VGB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Belarus",
			
 
				+        "area": 207600,
			
 
				+        "cioc": "BLR",
			
 
				+        "cca2": "BY",
			
 
				+        "capital": "Minsk",
			
 
				+        "lat": 53,
			
 
				+        "lng": 28,
			
 
				+        "cca3": "BLR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saint Barthelemy",
			
 
				+        "area": 21,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "BL",
			
 
				+        "capital": "Gustavia",
			
 
				+        "lat": 18.5,
			
 
				+        "lng": -63.41666666,
			
 
				+        "cca3": "BLM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Grenada",
			
 
				+        "area": 344,
			
 
				+        "cioc": "GRN",
			
 
				+        "cca2": "GD",
			
 
				+        "capital": "St. George's",
			
 
				+        "lat": 12.11666666,
			
 
				+        "lng": -61.66666666,
			
 
				+        "cca3": "GRD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Tokelau",
			
 
				+        "area": 12,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "TK",
			
 
				+        "capital": "Fakaofo",
			
 
				+        "lat": -9,
			
 
				+        "lng": -172,
			
 
				+        "cca3": "TKL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Greece",
			
 
				+        "area": 131990,
			
 
				+        "cioc": "GRE",
			
 
				+        "cca2": "GR",
			
 
				+        "capital": "Athens",
			
 
				+        "lat": 39,
			
 
				+        "lng": 22,
			
 
				+        "cca3": "GRC",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Russia",
			
 
				+        "area": 17098242,
			
 
				+        "cioc": "RUS",
			
 
				+        "cca2": "RU",
			
 
				+        "capital": "Moscow",
			
 
				+        "lat": 60,
			
 
				+        "lng": 100,
			
 
				+        "cca3": "RUS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Greenland",
			
 
				+        "area": 2166086,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "GL",
			
 
				+        "capital": "Nuuk",
			
 
				+        "lat": 72,
			
 
				+        "lng": -40,
			
 
				+        "cca3": "GRL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Andorra",
			
 
				+        "area": 468,
			
 
				+        "cioc": "AND",
			
 
				+        "cca2": "AD",
			
 
				+        "capital": "Andorra la Vella",
			
 
				+        "lat": 42.5,
			
 
				+        "lng": 1.5,
			
 
				+        "cca3": "AND",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mozambique",
			
 
				+        "area": 801590,
			
 
				+        "cioc": "MOZ",
			
 
				+        "cca2": "MZ",
			
 
				+        "capital": "Maputo",
			
 
				+        "lat": -18.25,
			
 
				+        "lng": 35,
			
 
				+        "cca3": "MOZ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Tajikistan",
			
 
				+        "area": 143100,
			
 
				+        "cioc": "TJK",
			
 
				+        "cca2": "TJ",
			
 
				+        "capital": "Dushanbe",
			
 
				+        "lat": 39,
			
 
				+        "lng": 71,
			
 
				+        "cca3": "TJK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Haiti",
			
 
				+        "area": 27750,
			
 
				+        "cioc": "HAI",
			
 
				+        "cca2": "HT",
			
 
				+        "capital": "Port-au-Prince",
			
 
				+        "lat": 19,
			
 
				+        "lng": -72.41666666,
			
 
				+        "cca3": "HTI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mexico",
			
 
				+        "area": 1964375,
			
 
				+        "cioc": "MEX",
			
 
				+        "cca2": "MX",
			
 
				+        "capital": "Mexico City",
			
 
				+        "lat": 23,
			
 
				+        "lng": -102,
			
 
				+        "cca3": "MEX",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Zimbabwe",
			
 
				+        "area": 390757,
			
 
				+        "cioc": "ZIM",
			
 
				+        "cca2": "ZW",
			
 
				+        "capital": "Harare",
			
 
				+        "lat": -20,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "ZWE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saint Lucia",
			
 
				+        "area": 616,
			
 
				+        "cioc": "LCA",
			
 
				+        "cca2": "LC",
			
 
				+        "capital": "Castries",
			
 
				+        "lat": 13.88333333,
			
 
				+        "lng": -60.96666666,
			
 
				+        "cca3": "LCA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "India",
			
 
				+        "area": 3287590,
			
 
				+        "cioc": "IND",
			
 
				+        "cca2": "IN",
			
 
				+        "capital": "New Delhi",
			
 
				+        "lat": 20,
			
 
				+        "lng": 77,
			
 
				+        "cca3": "IND",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Latvia",
			
 
				+        "area": 64559,
			
 
				+        "cioc": "LAT",
			
 
				+        "cca2": "LV",
			
 
				+        "capital": "Riga",
			
 
				+        "lat": 57,
			
 
				+        "lng": 25,
			
 
				+        "cca3": "LVA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bhutan",
			
 
				+        "area": 38394,
			
 
				+        "cioc": "BHU",
			
 
				+        "cca2": "BT",
			
 
				+        "capital": "Thimphu",
			
 
				+        "lat": 27.5,
			
 
				+        "lng": 90.5,
			
 
				+        "cca3": "BTN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saint Vincent and the Grenadines",
			
 
				+        "area": 389,
			
 
				+        "cioc": "VIN",
			
 
				+        "cca2": "VC",
			
 
				+        "capital": "Kingstown",
			
 
				+        "lat": 13.25,
			
 
				+        "lng": -61.2,
			
 
				+        "cca3": "VCT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Vietnam",
			
 
				+        "area": 331212,
			
 
				+        "cioc": "VIE",
			
 
				+        "cca2": "VN",
			
 
				+        "capital": "Hanoi",
			
 
				+        "lat": 16.16666666,
			
 
				+        "lng": 107.83333333,
			
 
				+        "cca3": "VNM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Norway",
			
 
				+        "area": 323802,
			
 
				+        "cioc": "NOR",
			
 
				+        "cca2": "NO",
			
 
				+        "capital": "Oslo",
			
 
				+        "lat": 62,
			
 
				+        "lng": 10,
			
 
				+        "cca3": "NOR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Czech Republic",
			
 
				+        "area": 78865,
			
 
				+        "cioc": "CZE",
			
 
				+        "cca2": "CZ",
			
 
				+        "capital": "Prague",
			
 
				+        "lat": 49.75,
			
 
				+        "lng": 15.5,
			
 
				+        "cca3": "CZE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "French Southern and Antarctic Lands",
			
 
				+        "area": 7747,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "TF",
			
 
				+        "capital": "Port-aux-Francais",
			
 
				+        "lat": -49.25,
			
 
				+        "lng": 69.167,
			
 
				+        "cca3": "ATF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Antigua and Barbuda",
			
 
				+        "area": 442,
			
 
				+        "cioc": "ANT",
			
 
				+        "cca2": "AG",
			
 
				+        "capital": "Saint John's",
			
 
				+        "lat": 17.05,
			
 
				+        "lng": -61.8,
			
 
				+        "cca3": "ATG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Fiji",
			
 
				+        "area": 18272,
			
 
				+        "cioc": "FIJ",
			
 
				+        "cca2": "FJ",
			
 
				+        "capital": "Suva",
			
 
				+        "lat": -18,
			
 
				+        "lng": 175,
			
 
				+        "cca3": "FJI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "British Indian Ocean Territory",
			
 
				+        "area": 60,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "IO",
			
 
				+        "capital": "Diego Garcia",
			
 
				+        "lat": -6,
			
 
				+        "lng": 71.5,
			
 
				+        "cca3": "IOT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Honduras",
			
 
				+        "area": 112492,
			
 
				+        "cioc": "HON",
			
 
				+        "cca2": "HN",
			
 
				+        "capital": "Tegucigalpa",
			
 
				+        "lat": 15,
			
 
				+        "lng": -86.5,
			
 
				+        "cca3": "HND",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mauritius",
			
 
				+        "area": 2040,
			
 
				+        "cioc": "MRI",
			
 
				+        "cca2": "MU",
			
 
				+        "capital": "Port Louis",
			
 
				+        "lat": -20.28333333,
			
 
				+        "lng": 57.55,
			
 
				+        "cca3": "MUS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Antarctica",
			
 
				+        "area": 14000000,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "AQ",
			
 
				+        "capital": "",
			
 
				+        "lat": -90,
			
 
				+        "lng": 0,
			
 
				+        "cca3": "ATA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Luxembourg",
			
 
				+        "area": 2586,
			
 
				+        "cioc": "LUX",
			
 
				+        "cca2": "LU",
			
 
				+        "capital": "Luxembourg",
			
 
				+        "lat": 49.75,
			
 
				+        "lng": 6.16666666,
			
 
				+        "cca3": "LUX",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Israel",
			
 
				+        "area": 20770,
			
 
				+        "cioc": "ISR",
			
 
				+        "cca2": "IL",
			
 
				+        "capital": "Jerusalem",
			
 
				+        "lat": 31.47,
			
 
				+        "lng": 35.13,
			
 
				+        "cca3": "ISR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Micronesia",
			
 
				+        "area": 702,
			
 
				+        "cioc": "FSM",
			
 
				+        "cca2": "FM",
			
 
				+        "capital": "Palikir",
			
 
				+        "lat": 6.91666666,
			
 
				+        "lng": 158.25,
			
 
				+        "cca3": "FSM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Peru",
			
 
				+        "area": 1285216,
			
 
				+        "cioc": "PER",
			
 
				+        "cca2": "PE",
			
 
				+        "capital": "Lima",
			
 
				+        "lat": -10,
			
 
				+        "lng": -76,
			
 
				+        "cca3": "PER",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Reunion",
			
 
				+        "area": 2511,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "RE",
			
 
				+        "capital": "Saint-Denis",
			
 
				+        "lat": -21.15,
			
 
				+        "lng": 55.5,
			
 
				+        "cca3": "REU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Indonesia",
			
 
				+        "area": 1904569,
			
 
				+        "cioc": "INA",
			
 
				+        "cca2": "ID",
			
 
				+        "capital": "Jakarta",
			
 
				+        "lat": -5,
			
 
				+        "lng": 120,
			
 
				+        "cca3": "IDN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Vanuatu",
			
 
				+        "area": 12189,
			
 
				+        "cioc": "VAN",
			
 
				+        "cca2": "VU",
			
 
				+        "capital": "Port Vila",
			
 
				+        "lat": -16,
			
 
				+        "lng": 167,
			
 
				+        "cca3": "VUT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Macedonia",
			
 
				+        "area": 25713,
			
 
				+        "cioc": "MKD",
			
 
				+        "cca2": "MK",
			
 
				+        "capital": "Skopje",
			
 
				+        "lat": 41.83333333,
			
 
				+        "lng": 22,
			
 
				+        "cca3": "MKD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "DR Congo",
			
 
				+        "area": 2344858,
			
 
				+        "cioc": "COD",
			
 
				+        "cca2": "CD",
			
 
				+        "capital": "Kinshasa",
			
 
				+        "lat": 0,
			
 
				+        "lng": 25,
			
 
				+        "cca3": "COD",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Republic of the Congo",
			
 
				+        "area": 342000,
			
 
				+        "cioc": "CGO",
			
 
				+        "cca2": "CG",
			
 
				+        "capital": "Brazzaville",
			
 
				+        "lat": -1,
			
 
				+        "lng": 15,
			
 
				+        "cca3": "COG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Iceland",
			
 
				+        "area": 103000,
			
 
				+        "cioc": "ISL",
			
 
				+        "cca2": "IS",
			
 
				+        "capital": "Reykjavik",
			
 
				+        "lat": 65,
			
 
				+        "lng": -18,
			
 
				+        "cca3": "ISL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guadeloupe",
			
 
				+        "area": 1628,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "GP",
			
 
				+        "capital": "Basse-Terre",
			
 
				+        "lat": 16.25,
			
 
				+        "lng": -61.583333,
			
 
				+        "cca3": "GLP",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cook Islands",
			
 
				+        "area": 236,
			
 
				+        "cioc": "COK",
			
 
				+        "cca2": "CK",
			
 
				+        "capital": "Avarua",
			
 
				+        "lat": -21.23333333,
			
 
				+        "lng": -159.76666666,
			
 
				+        "cca3": "COK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Comoros",
			
 
				+        "area": 1862,
			
 
				+        "cioc": "COM",
			
 
				+        "cca2": "KM",
			
 
				+        "capital": "Moroni",
			
 
				+        "lat": -12.16666666,
			
 
				+        "lng": 44.25,
			
 
				+        "cca3": "COM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Colombia",
			
 
				+        "area": 1141748,
			
 
				+        "cioc": "COL",
			
 
				+        "cca2": "CO",
			
 
				+        "capital": "Bogota",
			
 
				+        "lat": 4,
			
 
				+        "lng": -72,
			
 
				+        "cca3": "COL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Nigeria",
			
 
				+        "area": 923768,
			
 
				+        "cioc": "NGR",
			
 
				+        "cca2": "NG",
			
 
				+        "capital": "Abuja",
			
 
				+        "lat": 10,
			
 
				+        "lng": 8,
			
 
				+        "cca3": "NGA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Timor-Leste",
			
 
				+        "area": 14874,
			
 
				+        "cioc": "TLS",
			
 
				+        "cca2": "TL",
			
 
				+        "capital": "Dili",
			
 
				+        "lat": -8.83333333,
			
 
				+        "lng": 125.91666666,
			
 
				+        "cca3": "TLS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Taiwan",
			
 
				+        "area": 36193,
			
 
				+        "cioc": "TPE",
			
 
				+        "cca2": "TW",
			
 
				+        "capital": "Taipei",
			
 
				+        "lat": 23.5,
			
 
				+        "lng": 121,
			
 
				+        "cca3": "TWN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Portugal",
			
 
				+        "area": 92090,
			
 
				+        "cioc": "POR",
			
 
				+        "cca2": "PT",
			
 
				+        "capital": "Lisbon",
			
 
				+        "lat": 39.5,
			
 
				+        "lng": -8,
			
 
				+        "cca3": "PRT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Moldova",
			
 
				+        "area": 33846,
			
 
				+        "cioc": "MDA",
			
 
				+        "cca2": "MD",
			
 
				+        "capital": "Chisinau",
			
 
				+        "lat": 47,
			
 
				+        "lng": 29,
			
 
				+        "cca3": "MDA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guernsey",
			
 
				+        "area": 78,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "GG",
			
 
				+        "capital": "St. Peter Port",
			
 
				+        "lat": 49.46666666,
			
 
				+        "lng": -2.58333333,
			
 
				+        "cca3": "GGY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Madagascar",
			
 
				+        "area": 587041,
			
 
				+        "cioc": "MAD",
			
 
				+        "cca2": "MG",
			
 
				+        "capital": "Antananarivo",
			
 
				+        "lat": -20,
			
 
				+        "lng": 47,
			
 
				+        "cca3": "MDG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Ecuador",
			
 
				+        "area": 276841,
			
 
				+        "cioc": "ECU",
			
 
				+        "cca2": "EC",
			
 
				+        "capital": "Quito",
			
 
				+        "lat": -2,
			
 
				+        "lng": -77.5,
			
 
				+        "cca3": "ECU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Senegal",
			
 
				+        "area": 196722,
			
 
				+        "cioc": "SEN",
			
 
				+        "cca2": "SN",
			
 
				+        "capital": "Dakar",
			
 
				+        "lat": 14,
			
 
				+        "lng": -14,
			
 
				+        "cca3": "SEN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "New Zealand",
			
 
				+        "area": 270467,
			
 
				+        "cioc": "NZL",
			
 
				+        "cca2": "NZ",
			
 
				+        "capital": "Wellington",
			
 
				+        "lat": -41,
			
 
				+        "lng": 174,
			
 
				+        "cca3": "NZL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Maldives",
			
 
				+        "area": 300,
			
 
				+        "cioc": "MDV",
			
 
				+        "cca2": "MV",
			
 
				+        "capital": "Male",
			
 
				+        "lat": 3.25,
			
 
				+        "lng": 73,
			
 
				+        "cca3": "MDV",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "American Samoa",
			
 
				+        "area": 199,
			
 
				+        "cioc": "ASA",
			
 
				+        "cca2": "AS",
			
 
				+        "capital": "Pago Pago",
			
 
				+        "lat": -14.33333333,
			
 
				+        "lng": -170,
			
 
				+        "cca3": "ASM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Saint Pierre and Miquelon",
			
 
				+        "area": 242,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "PM",
			
 
				+        "capital": "Saint-Pierre",
			
 
				+        "lat": 46.83333333,
			
 
				+        "lng": -56.33333333,
			
 
				+        "cca3": "SPM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Curacao",
			
 
				+        "area": 444,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "CW",
			
 
				+        "capital": "Willemstad",
			
 
				+        "lat": 12.116667,
			
 
				+        "lng": -68.933333,
			
 
				+        "cca3": "CUW",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "France",
			
 
				+        "area": 551695,
			
 
				+        "cioc": "FRA",
			
 
				+        "cca2": "FR",
			
 
				+        "capital": "Paris",
			
 
				+        "lat": 46,
			
 
				+        "lng": 2,
			
 
				+        "cca3": "FRA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Lithuania",
			
 
				+        "area": 65300,
			
 
				+        "cioc": "LTU",
			
 
				+        "cca2": "LT",
			
 
				+        "capital": "Vilnius",
			
 
				+        "lat": 56,
			
 
				+        "lng": 24,
			
 
				+        "cca3": "LTU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Rwanda",
			
 
				+        "area": 26338,
			
 
				+        "cioc": "RWA",
			
 
				+        "cca2": "RW",
			
 
				+        "capital": "Kigali",
			
 
				+        "lat": -2,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "RWA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Zambia",
			
 
				+        "area": 752612,
			
 
				+        "cioc": "ZAM",
			
 
				+        "cca2": "ZM",
			
 
				+        "capital": "Lusaka",
			
 
				+        "lat": -15,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "ZMB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Gambia",
			
 
				+        "area": 10689,
			
 
				+        "cioc": "GAM",
			
 
				+        "cca2": "GM",
			
 
				+        "capital": "Banjul",
			
 
				+        "lat": 13.46666666,
			
 
				+        "lng": -16.56666666,
			
 
				+        "cca3": "GMB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Wallis and Futuna",
			
 
				+        "area": 142,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "WF",
			
 
				+        "capital": "Mata-Utu",
			
 
				+        "lat": -13.3,
			
 
				+        "lng": -176.2,
			
 
				+        "cca3": "WLF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Jersey",
			
 
				+        "area": 116,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "JE",
			
 
				+        "capital": "Saint Helier",
			
 
				+        "lat": 49.25,
			
 
				+        "lng": -2.16666666,
			
 
				+        "cca3": "JEY",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Faroe Islands",
			
 
				+        "area": 1393,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "FO",
			
 
				+        "capital": "Torshavn",
			
 
				+        "lat": 62,
			
 
				+        "lng": -7,
			
 
				+        "cca3": "FRO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guatemala",
			
 
				+        "area": 108889,
			
 
				+        "cioc": "GUA",
			
 
				+        "cca2": "GT",
			
 
				+        "capital": "Guatemala City",
			
 
				+        "lat": 15.5,
			
 
				+        "lng": -90.25,
			
 
				+        "cca3": "GTM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Denmark",
			
 
				+        "area": 43094,
			
 
				+        "cioc": "DEN",
			
 
				+        "cca2": "DK",
			
 
				+        "capital": "Copenhagen",
			
 
				+        "lat": 56,
			
 
				+        "lng": 10,
			
 
				+        "cca3": "DNK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Isle of Man",
			
 
				+        "area": 572,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "IM",
			
 
				+        "capital": "Douglas",
			
 
				+        "lat": 54.25,
			
 
				+        "lng": -4.5,
			
 
				+        "cca3": "IMN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Australia",
			
 
				+        "area": 7692024,
			
 
				+        "cioc": "AUS",
			
 
				+        "cca2": "AU",
			
 
				+        "capital": "Canberra",
			
 
				+        "lat": -27,
			
 
				+        "lng": 133,
			
 
				+        "cca3": "AUS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Austria",
			
 
				+        "area": 83871,
			
 
				+        "cioc": "AUT",
			
 
				+        "cca2": "AT",
			
 
				+        "capital": "Vienna",
			
 
				+        "lat": 47.33333333,
			
 
				+        "lng": 13.33333333,
			
 
				+        "cca3": "AUT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Svalbard and Jan Mayen",
			
 
				+        "area": -1,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "SJ",
			
 
				+        "capital": "Longyearbyen",
			
 
				+        "lat": 78,
			
 
				+        "lng": 20,
			
 
				+        "cca3": "SJM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Venezuela",
			
 
				+        "area": 916445,
			
 
				+        "cioc": "VEN",
			
 
				+        "cca2": "VE",
			
 
				+        "capital": "Caracas",
			
 
				+        "lat": 8,
			
 
				+        "lng": -66,
			
 
				+        "cca3": "VEN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Kosovo",
			
 
				+        "area": 10908,
			
 
				+        "cioc": "KOS",
			
 
				+        "cca2": "XK",
			
 
				+        "capital": "Pristina",
			
 
				+        "lat": 42.666667,
			
 
				+        "lng": 21.166667,
			
 
				+        "cca3": "UNK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Palau",
			
 
				+        "area": 459,
			
 
				+        "cioc": "PLW",
			
 
				+        "cca2": "PW",
			
 
				+        "capital": "Ngerulmud",
			
 
				+        "lat": 7.5,
			
 
				+        "lng": 134.5,
			
 
				+        "cca3": "PLW",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Kenya",
			
 
				+        "area": 580367,
			
 
				+        "cioc": "KEN",
			
 
				+        "cca2": "KE",
			
 
				+        "capital": "Nairobi",
			
 
				+        "lat": 1,
			
 
				+        "lng": 38,
			
 
				+        "cca3": "KEN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Samoa",
			
 
				+        "area": 2842,
			
 
				+        "cioc": "SAM",
			
 
				+        "cca2": "WS",
			
 
				+        "capital": "Apia",
			
 
				+        "lat": -13.58333333,
			
 
				+        "lng": -172.33333333,
			
 
				+        "cca3": "WSM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Turkey",
			
 
				+        "area": 783562,
			
 
				+        "cioc": "TUR",
			
 
				+        "cca2": "TR",
			
 
				+        "capital": "Ankara",
			
 
				+        "lat": 39,
			
 
				+        "lng": 35,
			
 
				+        "cca3": "TUR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Albania",
			
 
				+        "area": 28748,
			
 
				+        "cioc": "ALB",
			
 
				+        "cca2": "AL",
			
 
				+        "capital": "Tirana",
			
 
				+        "lat": 41,
			
 
				+        "lng": 20,
			
 
				+        "cca3": "ALB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Oman",
			
 
				+        "area": 309500,
			
 
				+        "cioc": "OMA",
			
 
				+        "cca2": "OM",
			
 
				+        "capital": "Muscat",
			
 
				+        "lat": 21,
			
 
				+        "lng": 57,
			
 
				+        "cca3": "OMN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Tuvalu",
			
 
				+        "area": 26,
			
 
				+        "cioc": "TUV",
			
 
				+        "cca2": "TV",
			
 
				+        "capital": "Funafuti",
			
 
				+        "lat": -8,
			
 
				+        "lng": 178,
			
 
				+        "cca3": "TUV",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Aland Islands",
			
 
				+        "area": 1580,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "AX",
			
 
				+        "capital": "Mariehamn",
			
 
				+        "lat": 60.116667,
			
 
				+        "lng": 19.9,
			
 
				+        "cca3": "ALA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Brunei",
			
 
				+        "area": 5765,
			
 
				+        "cioc": "BRU",
			
 
				+        "cca2": "BN",
			
 
				+        "capital": "Bandar Seri Begawan",
			
 
				+        "lat": 4.5,
			
 
				+        "lng": 114.66666666,
			
 
				+        "cca3": "BRN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Tunisia",
			
 
				+        "area": 163610,
			
 
				+        "cioc": "TUN",
			
 
				+        "cca2": "TN",
			
 
				+        "capital": "Tunis",
			
 
				+        "lat": 34,
			
 
				+        "lng": 9,
			
 
				+        "cca3": "TUN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Pitcairn Islands",
			
 
				+        "area": 47,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "PN",
			
 
				+        "capital": "Adamstown",
			
 
				+        "lat": -25.06666666,
			
 
				+        "lng": -130.1,
			
 
				+        "cca3": "PCN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Barbados",
			
 
				+        "area": 430,
			
 
				+        "cioc": "BAR",
			
 
				+        "cca2": "BB",
			
 
				+        "capital": "Bridgetown",
			
 
				+        "lat": 13.16666666,
			
 
				+        "lng": -59.53333333,
			
 
				+        "cca3": "BRB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Brazil",
			
 
				+        "area": 8515767,
			
 
				+        "cioc": "BRA",
			
 
				+        "cca2": "BR",
			
 
				+        "capital": "Brasilia",
			
 
				+        "lat": -10,
			
 
				+        "lng": -55,
			
 
				+        "cca3": "BRA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Ivory Coast",
			
 
				+        "area": 322463,
			
 
				+        "cioc": "CIV",
			
 
				+        "cca2": "CI",
			
 
				+        "capital": "Yamoussoukro",
			
 
				+        "lat": 8,
			
 
				+        "lng": -5,
			
 
				+        "cca3": "CIV",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Serbia",
			
 
				+        "area": 88361,
			
 
				+        "cioc": "SRB",
			
 
				+        "cca2": "RS",
			
 
				+        "capital": "Belgrade",
			
 
				+        "lat": 44,
			
 
				+        "lng": 21,
			
 
				+        "cca3": "SRB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Equatorial Guinea",
			
 
				+        "area": 28051,
			
 
				+        "cioc": "GEQ",
			
 
				+        "cca2": "GQ",
			
 
				+        "capital": "Malabo",
			
 
				+        "lat": 2,
			
 
				+        "lng": 10,
			
 
				+        "cca3": "GNQ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "United States",
			
 
				+        "area": 9372610,
			
 
				+        "cioc": "USA",
			
 
				+        "cca2": "US",
			
 
				+        "capital": "Washington D.C.",
			
 
				+        "lat": 38,
			
 
				+        "lng": -97,
			
 
				+        "cca3": "USA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Qatar",
			
 
				+        "area": 11586,
			
 
				+        "cioc": "QAT",
			
 
				+        "cca2": "QA",
			
 
				+        "capital": "Doha",
			
 
				+        "lat": 25.5,
			
 
				+        "lng": 51.25,
			
 
				+        "cca3": "QAT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Sweden",
			
 
				+        "area": 450295,
			
 
				+        "cioc": "SWE",
			
 
				+        "cca2": "SE",
			
 
				+        "capital": "Stockholm",
			
 
				+        "lat": 62,
			
 
				+        "lng": 15,
			
 
				+        "cca3": "SWE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Azerbaijan",
			
 
				+        "area": 86600,
			
 
				+        "cioc": "AZE",
			
 
				+        "cca2": "AZ",
			
 
				+        "capital": "Baku",
			
 
				+        "lat": 40.5,
			
 
				+        "lng": 47.5,
			
 
				+        "cca3": "AZE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Guinea-Bissau",
			
 
				+        "area": 36125,
			
 
				+        "cioc": "GBS",
			
 
				+        "cca2": "GW",
			
 
				+        "capital": "Bissau",
			
 
				+        "lat": 12,
			
 
				+        "lng": -15,
			
 
				+        "cca3": "GNB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Swaziland",
			
 
				+        "area": 17364,
			
 
				+        "cioc": "SWZ",
			
 
				+        "cca2": "SZ",
			
 
				+        "capital": "Lobamba",
			
 
				+        "lat": -26.5,
			
 
				+        "lng": 31.5,
			
 
				+        "cca3": "SWZ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Tonga",
			
 
				+        "area": 747,
			
 
				+        "cioc": "TGA",
			
 
				+        "cca2": "TO",
			
 
				+        "capital": "Nuku'alofa",
			
 
				+        "lat": -20,
			
 
				+        "lng": -175,
			
 
				+        "cca3": "TON",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Canada",
			
 
				+        "area": 9984670,
			
 
				+        "cioc": "CAN",
			
 
				+        "cca2": "CA",
			
 
				+        "capital": "Ottawa",
			
 
				+        "lat": 60,
			
 
				+        "lng": -95,
			
 
				+        "cca3": "CAN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Ukraine",
			
 
				+        "area": 603500,
			
 
				+        "cioc": "UKR",
			
 
				+        "cca2": "UA",
			
 
				+        "capital": "Kiev",
			
 
				+        "lat": 49,
			
 
				+        "lng": 32,
			
 
				+        "cca3": "UKR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "South Korea",
			
 
				+        "area": 100210,
			
 
				+        "cioc": "KOR",
			
 
				+        "cca2": "KR",
			
 
				+        "capital": "Seoul",
			
 
				+        "lat": 37,
			
 
				+        "lng": 127.5,
			
 
				+        "cca3": "KOR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Anguilla",
			
 
				+        "area": 91,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "AI",
			
 
				+        "capital": "The Valley",
			
 
				+        "lat": 18.25,
			
 
				+        "lng": -63.16666666,
			
 
				+        "cca3": "AIA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Central African Republic",
			
 
				+        "area": 622984,
			
 
				+        "cioc": "CAF",
			
 
				+        "cca2": "CF",
			
 
				+        "capital": "Bangui",
			
 
				+        "lat": 7,
			
 
				+        "lng": 21,
			
 
				+        "cca3": "CAF",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Slovakia",
			
 
				+        "area": 49037,
			
 
				+        "cioc": "SVK",
			
 
				+        "cca2": "SK",
			
 
				+        "capital": "Bratislava",
			
 
				+        "lat": 48.66666666,
			
 
				+        "lng": 19.5,
			
 
				+        "cca3": "SVK",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cyprus",
			
 
				+        "area": 9251,
			
 
				+        "cioc": "CYP",
			
 
				+        "cca2": "CY",
			
 
				+        "capital": "Nicosia",
			
 
				+        "lat": 35,
			
 
				+        "lng": 33,
			
 
				+        "cca3": "CYP",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bosnia and Herzegovina",
			
 
				+        "area": 51209,
			
 
				+        "cioc": "BIH",
			
 
				+        "cca2": "BA",
			
 
				+        "capital": "Sarajevo",
			
 
				+        "lat": 44,
			
 
				+        "lng": 18,
			
 
				+        "cca3": "BIH",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Singapore",
			
 
				+        "area": 710,
			
 
				+        "cioc": "SIN",
			
 
				+        "cca2": "SG",
			
 
				+        "capital": "Singapore",
			
 
				+        "lat": 1.36666666,
			
 
				+        "lng": 103.8,
			
 
				+        "cca3": "SGP",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "South Georgia",
			
 
				+        "area": 3903,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "GS",
			
 
				+        "capital": "King Edward Point",
			
 
				+        "lat": -54.5,
			
 
				+        "lng": -37,
			
 
				+        "cca3": "SGS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Somalia",
			
 
				+        "area": 637657,
			
 
				+        "cioc": "SOM",
			
 
				+        "cca2": "SO",
			
 
				+        "capital": "Mogadishu",
			
 
				+        "lat": 10,
			
 
				+        "lng": 49,
			
 
				+        "cca3": "SOM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Uzbekistan",
			
 
				+        "area": 447400,
			
 
				+        "cioc": "UZB",
			
 
				+        "cca2": "UZ",
			
 
				+        "capital": "Tashkent",
			
 
				+        "lat": 41,
			
 
				+        "lng": 64,
			
 
				+        "cca3": "UZB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Eritrea",
			
 
				+        "area": 117600,
			
 
				+        "cioc": "ERI",
			
 
				+        "cca2": "ER",
			
 
				+        "capital": "Asmara",
			
 
				+        "lat": 15,
			
 
				+        "lng": 39,
			
 
				+        "cca3": "ERI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Poland",
			
 
				+        "area": 312679,
			
 
				+        "cioc": "POL",
			
 
				+        "cca2": "PL",
			
 
				+        "capital": "Warsaw",
			
 
				+        "lat": 52,
			
 
				+        "lng": 20,
			
 
				+        "cca3": "POL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Kuwait",
			
 
				+        "area": 17818,
			
 
				+        "cioc": "KUW",
			
 
				+        "cca2": "KW",
			
 
				+        "capital": "Kuwait City",
			
 
				+        "lat": 29.5,
			
 
				+        "lng": 45.75,
			
 
				+        "cca3": "KWT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Gabon",
			
 
				+        "area": 267668,
			
 
				+        "cioc": "GAB",
			
 
				+        "cca2": "GA",
			
 
				+        "capital": "Libreville",
			
 
				+        "lat": -1,
			
 
				+        "lng": 11.75,
			
 
				+        "cca3": "GAB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cayman Islands",
			
 
				+        "area": 264,
			
 
				+        "cioc": "CAY",
			
 
				+        "cca2": "KY",
			
 
				+        "capital": "George Town",
			
 
				+        "lat": 19.5,
			
 
				+        "lng": -80.5,
			
 
				+        "cca3": "CYM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Vatican City",
			
 
				+        "area": 0.44,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "VA",
			
 
				+        "capital": "Vatican City",
			
 
				+        "lat": 41.9,
			
 
				+        "lng": 12.45,
			
 
				+        "cca3": "VAT",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Estonia",
			
 
				+        "area": 45227,
			
 
				+        "cioc": "EST",
			
 
				+        "cca2": "EE",
			
 
				+        "capital": "Tallinn",
			
 
				+        "lat": 59,
			
 
				+        "lng": 26,
			
 
				+        "cca3": "EST",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Malawi",
			
 
				+        "area": 118484,
			
 
				+        "cioc": "MAW",
			
 
				+        "cca2": "MW",
			
 
				+        "capital": "Lilongwe",
			
 
				+        "lat": -13.5,
			
 
				+        "lng": 34,
			
 
				+        "cca3": "MWI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Spain",
			
 
				+        "area": 505992,
			
 
				+        "cioc": "ESP",
			
 
				+        "cca2": "ES",
			
 
				+        "capital": "Madrid",
			
 
				+        "lat": 40,
			
 
				+        "lng": -4,
			
 
				+        "cca3": "ESP",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Iraq",
			
 
				+        "area": 438317,
			
 
				+        "cioc": "IRQ",
			
 
				+        "cca2": "IQ",
			
 
				+        "capital": "Baghdad",
			
 
				+        "lat": 33,
			
 
				+        "lng": 44,
			
 
				+        "cca3": "IRQ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "El Salvador",
			
 
				+        "area": 21041,
			
 
				+        "cioc": "ESA",
			
 
				+        "cca2": "SV",
			
 
				+        "capital": "San Salvador",
			
 
				+        "lat": 13.83333333,
			
 
				+        "lng": -88.91666666,
			
 
				+        "cca3": "SLV",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Mali",
			
 
				+        "area": 1240192,
			
 
				+        "cioc": "MLI",
			
 
				+        "cca2": "ML",
			
 
				+        "capital": "Bamako",
			
 
				+        "lat": 17,
			
 
				+        "lng": -4,
			
 
				+        "cca3": "MLI",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Ireland",
			
 
				+        "area": 70273,
			
 
				+        "cioc": "IRL",
			
 
				+        "cca2": "IE",
			
 
				+        "capital": "Dublin",
			
 
				+        "lat": 53,
			
 
				+        "lng": -8,
			
 
				+        "cca3": "IRL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Iran",
			
 
				+        "area": 1648195,
			
 
				+        "cioc": "IRI",
			
 
				+        "cca2": "IR",
			
 
				+        "capital": "Tehran",
			
 
				+        "lat": 32,
			
 
				+        "lng": 53,
			
 
				+        "cca3": "IRN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Aruba",
			
 
				+        "area": 180,
			
 
				+        "cioc": "ARU",
			
 
				+        "cca2": "AW",
			
 
				+        "capital": "Oranjestad",
			
 
				+        "lat": 12.5,
			
 
				+        "lng": -69.96666666,
			
 
				+        "cca3": "ABW",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Papua New Guinea",
			
 
				+        "area": 462840,
			
 
				+        "cioc": "PNG",
			
 
				+        "cca2": "PG",
			
 
				+        "capital": "Port Moresby",
			
 
				+        "lat": -6,
			
 
				+        "lng": 147,
			
 
				+        "cca3": "PNG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Panama",
			
 
				+        "area": 75417,
			
 
				+        "cioc": "PAN",
			
 
				+        "cca2": "PA",
			
 
				+        "capital": "Panama City",
			
 
				+        "lat": 9,
			
 
				+        "lng": -80,
			
 
				+        "cca3": "PAN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Sudan",
			
 
				+        "area": 1886068,
			
 
				+        "cioc": "SUD",
			
 
				+        "cca2": "SD",
			
 
				+        "capital": "Khartoum",
			
 
				+        "lat": 15,
			
 
				+        "lng": 30,
			
 
				+        "cca3": "SDN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Solomon Islands",
			
 
				+        "area": 28896,
			
 
				+        "cioc": "SOL",
			
 
				+        "cca2": "SB",
			
 
				+        "capital": "Honiara",
			
 
				+        "lat": -8,
			
 
				+        "lng": 159,
			
 
				+        "cca3": "SLB",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Western Sahara",
			
 
				+        "area": 266000,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "EH",
			
 
				+        "capital": "El Aaiun",
			
 
				+        "lat": 24.5,
			
 
				+        "lng": -13,
			
 
				+        "cca3": "ESH",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Monaco",
			
 
				+        "area": 2.02,
			
 
				+        "cioc": "MON",
			
 
				+        "cca2": "MC",
			
 
				+        "capital": "Monaco",
			
 
				+        "lat": 43.73333333,
			
 
				+        "lng": 7.4,
			
 
				+        "cca3": "MCO",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Italy",
			
 
				+        "area": 301336,
			
 
				+        "cioc": "ITA",
			
 
				+        "cca2": "IT",
			
 
				+        "capital": "Rome",
			
 
				+        "lat": 42.83333333,
			
 
				+        "lng": 12.83333333,
			
 
				+        "cca3": "ITA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Japan",
			
 
				+        "area": 377930,
			
 
				+        "cioc": "JPN",
			
 
				+        "cca2": "JP",
			
 
				+        "capital": "Tokyo",
			
 
				+        "lat": 36,
			
 
				+        "lng": 138,
			
 
				+        "cca3": "JPN",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Kyrgyzstan",
			
 
				+        "area": 199951,
			
 
				+        "cioc": "KGZ",
			
 
				+        "cca2": "KG",
			
 
				+        "capital": "Bishkek",
			
 
				+        "lat": 41,
			
 
				+        "lng": 75,
			
 
				+        "cca3": "KGZ",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Uganda",
			
 
				+        "area": 241550,
			
 
				+        "cioc": "UGA",
			
 
				+        "cca2": "UG",
			
 
				+        "capital": "Kampala",
			
 
				+        "lat": 1,
			
 
				+        "lng": 32,
			
 
				+        "cca3": "UGA",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "New Caledonia",
			
 
				+        "area": 18575,
			
 
				+        "cioc": "",
			
 
				+        "cca2": "NC",
			
 
				+        "capital": "Noumea",
			
 
				+        "lat": -21.5,
			
 
				+        "lng": 165.5,
			
 
				+        "cca3": "NCL",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "United Arab Emirates",
			
 
				+        "area": 83600,
			
 
				+        "cioc": "UAE",
			
 
				+        "cca2": "AE",
			
 
				+        "capital": "Abu Dhabi",
			
 
				+        "lat": 24,
			
 
				+        "lng": 54,
			
 
				+        "cca3": "ARE",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Argentina",
			
 
				+        "area": 2780400,
			
 
				+        "cioc": "ARG",
			
 
				+        "cca2": "AR",
			
 
				+        "capital": "Buenos Aires",
			
 
				+        "lat": -34,
			
 
				+        "lng": -64,
			
 
				+        "cca3": "ARG",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bahamas",
			
 
				+        "area": 13943,
			
 
				+        "cioc": "BAH",
			
 
				+        "cca2": "BS",
			
 
				+        "capital": "Nassau",
			
 
				+        "lat": 24.25,
			
 
				+        "lng": -76,
			
 
				+        "cca3": "BHS",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Bahrain",
			
 
				+        "area": 765,
			
 
				+        "cioc": "BRN",
			
 
				+        "cca2": "BH",
			
 
				+        "capital": "Manama",
			
 
				+        "lat": 26,
			
 
				+        "lng": 50.55,
			
 
				+        "cca3": "BHR",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Armenia",
			
 
				+        "area": 29743,
			
 
				+        "cioc": "ARM",
			
 
				+        "cca2": "AM",
			
 
				+        "capital": "Yerevan",
			
 
				+        "lat": 40,
			
 
				+        "lng": 45,
			
 
				+        "cca3": "ARM",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Nauru",
			
 
				+        "area": 21,
			
 
				+        "cioc": "NRU",
			
 
				+        "cca2": "NR",
			
 
				+        "capital": "Yaren",
			
 
				+        "lat": -0.53333333,
			
 
				+        "lng": 166.91666666,
			
 
				+        "cca3": "NRU",
			
 
				+    },
			
 
				+    {
			
 
				+        "name": "Cuba",
			
 
				+        "area": 109884,
			
 
				+        "cioc": "CUB",
			
 
				+        "cca2": "CU",
			
 
				+        "capital": "Havana",
			
 
				+        "lat": 21.5,
			
 
				+        "lng": -80,
			
 
				+        "cca3": "CUB",
			
 
				+    },
			
 
				+]
			
 
				+
			
 
				+all_lookups: Dict[str, Dict[str, Dict[str, Any]]] = {}
			
 
				+lookups = ["cioc", "cca2", "cca3", "name"]
			
 
				+for lookup in lookups:
			
 
				+    all_lookups[lookup] = {}
			
 
				+    for country in countries:
			
 
				+        all_lookups[lookup][country[lookup].lower()] = country
			
 
				+
			
 
				+
			
 
				+def get(field: str, symbol: str) -> Optional[Dict[str, Any]]:
			
 
				+    """
			
 
				+    Get country data based on a standard code and a symbol
			
 
				+    """
			
 
				+    return all_lookups[field].get(symbol.lower())
			
--- a/data/purposeCombined/BI/examples/country_map.py
+++ b/data/purposeCombined/BI/examples/country_map.py
@@ -0,0 +1,114 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import datetime
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import BigInteger, Date, String
			
 
				+from sqlalchemy.sql import column
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.connectors.sqla.models import SqlMetric
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import (
			
 
				+    get_example_data,
			
 
				+    get_slice_json,
			
 
				+    merge_slice,
			
 
				+    misc_dash_slices,
			
 
				+    TBL,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def load_country_map_data(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    """Loading data for map with country map"""
			
 
				+    tbl_name = "birth_france_by_region"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        csv_bytes = get_example_data(
			
 
				+            "birth_france_data_for_country_map.csv", is_gzip=False, make_bytes=True
			
 
				+        )
			
 
				+        data = pd.read_csv(csv_bytes, encoding="utf-8")
			
 
				+        data["dttm"] = datetime.datetime.now().date()
			
 
				+        data.to_sql(  # pylint: disable=no-member
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "DEPT_ID": String(10),
			
 
				+                "2003": BigInteger,
			
 
				+                "2004": BigInteger,
			
 
				+                "2005": BigInteger,
			
 
				+                "2006": BigInteger,
			
 
				+                "2007": BigInteger,
			
 
				+                "2008": BigInteger,
			
 
				+                "2009": BigInteger,
			
 
				+                "2010": BigInteger,
			
 
				+                "2011": BigInteger,
			
 
				+                "2012": BigInteger,
			
 
				+                "2013": BigInteger,
			
 
				+                "2014": BigInteger,
			
 
				+                "dttm": Date(),
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+        print("Done loading table!")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+    print("Creating table reference")
			
 
				+    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not obj:
			
 
				+        obj = TBL(table_name=tbl_name)
			
 
				+    obj.main_dttm_col = "dttm"
			
 
				+    obj.database = database
			
 
				+    if not any(col.metric_name == "avg__2004" for col in obj.metrics):
			
 
				+        col = str(column("2004").compile(db.engine))
			
 
				+        obj.metrics.append(SqlMetric(metric_name="avg__2004", expression=f"AVG({col})"))
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
 
				+    obj.fetch_metadata()
			
 
				+    tbl = obj
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "granularity_sqla": "",
			
 
				+        "since": "",
			
 
				+        "until": "",
			
 
				+        "viz_type": "country_map",
			
 
				+        "entity": "DEPT_ID",
			
 
				+        "metric": {
			
 
				+            "expressionType": "SIMPLE",
			
 
				+            "column": {"type": "INT", "column_name": "2004"},
			
 
				+            "aggregate": "AVG",
			
 
				+            "label": "Boys",
			
 
				+            "optionName": "metric_112342",
			
 
				+        },
			
 
				+        "row_limit": 500000,
			
 
				+    }
			
 
				+
			
 
				+    print("Creating a slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Birth in France by department in 2016",
			
 
				+        viz_type="country_map",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    misc_dash_slices.add(slc.slice_name)
			
 
				+    merge_slice(slc)
			
--- a/data/purposeCombined/BI/examples/css_templates.py
+++ b/data/purposeCombined/BI/examples/css_templates.py
@@ -0,0 +1,100 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import textwrap
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.core import CssTemplate
			
 
				+
			
 
				+
			
 
				+def load_css_templates() -> None:
			
 
				+    """Loads 2 css templates to demonstrate the feature"""
			
 
				+    print("Creating default CSS templates")
			
 
				+
			
 
				+    obj = db.session.query(CssTemplate).filter_by(template_name="Flat").first()
			
 
				+    if not obj:
			
 
				+        obj = CssTemplate(template_name="Flat")
			
 
				+    css = textwrap.dedent(
			
 
				+        """\
			
 
				+    .navbar {
			
 
				+        transition: opacity 0.5s ease;
			
 
				+        opacity: 0.05;
			
 
				+    }
			
 
				+    .navbar:hover {
			
 
				+        opacity: 1;
			
 
				+    }
			
 
				+    .chart-header .header{
			
 
				+        font-weight: @font-weight-normal;
			
 
				+        font-size: 12px;
			
 
				+    }
			
 
				+    /*
			
 
				+    var bnbColors = [
			
 
				+        //rausch    hackb      kazan      babu      lima        beach     tirol
			
 
				+        '#ff5a5f', '#7b0051', '#007A87', '#00d1c1', '#8ce071', '#ffb400', '#b4a76c',
			
 
				+        '#ff8083', '#cc0086', '#00a1b3', '#00ffeb', '#bbedab', '#ffd266', '#cbc29a',
			
 
				+        '#ff3339', '#ff1ab1', '#005c66', '#00b3a5', '#55d12e', '#b37e00', '#988b4e',
			
 
				+     ];
			
 
				+    */
			
 
				+    """
			
 
				+    )
			
 
				+    obj.css = css
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
 
				+
			
 
				+    obj = db.session.query(CssTemplate).filter_by(template_name="Courier Black").first()
			
 
				+    if not obj:
			
 
				+        obj = CssTemplate(template_name="Courier Black")
			
 
				+    css = textwrap.dedent(
			
 
				+        """\
			
 
				+    h2 {
			
 
				+        color: white;
			
 
				+        font-size: 52px;
			
 
				+    }
			
 
				+    .navbar {
			
 
				+        box-shadow: none;
			
 
				+    }
			
 
				+    .navbar {
			
 
				+        transition: opacity 0.5s ease;
			
 
				+        opacity: 0.05;
			
 
				+    }
			
 
				+    .navbar:hover {
			
 
				+        opacity: 1;
			
 
				+    }
			
 
				+    .chart-header .header{
			
 
				+        font-weight: @font-weight-normal;
			
 
				+        font-size: 12px;
			
 
				+    }
			
 
				+    .nvd3 text {
			
 
				+        font-size: 12px;
			
 
				+        font-family: inherit;
			
 
				+    }
			
 
				+    body{
			
 
				+        background: #000;
			
 
				+        font-family: Courier, Monaco, monospace;;
			
 
				+    }
			
 
				+    /*
			
 
				+    var bnbColors = [
			
 
				+        //rausch    hackb      kazan      babu      lima        beach     tirol
			
 
				+        '#ff5a5f', '#7b0051', '#007A87', '#00d1c1', '#8ce071', '#ffb400', '#b4a76c',
			
 
				+        '#ff8083', '#cc0086', '#00a1b3', '#00ffeb', '#bbedab', '#ffd266', '#cbc29a',
			
 
				+        '#ff3339', '#ff1ab1', '#005c66', '#00b3a5', '#55d12e', '#b37e00', '#988b4e',
			
 
				+     ];
			
 
				+    */
			
 
				+    """
			
 
				+    )
			
 
				+    obj.css = css
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/deck.py
+++ b/data/purposeCombined/BI/examples/deck.py
@@ -0,0 +1,529 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+# pylint: disable=too-many-statements
			
 
				+import json
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+from .helpers import get_slice_json, merge_slice, TBL, update_slice_ids
			
 
				+
			
 
				+COLOR_RED = {"r": 205, "g": 0, "b": 3, "a": 0.82}
			
 
				+POSITION_JSON = """\
			
 
				+{
			
 
				+    "CHART-3afd9d70": {
			
 
				+        "meta": {
			
 
				+            "chartId": 66,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-3afd9d70",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "CHART-2ee7fa5e": {
			
 
				+        "meta": {
			
 
				+            "chartId": 67,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-2ee7fa5e",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "CHART-201f7715": {
			
 
				+        "meta": {
			
 
				+            "chartId": 68,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-201f7715",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "CHART-d02f6c40": {
			
 
				+        "meta": {
			
 
				+            "chartId": 69,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-d02f6c40",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "CHART-2673431d": {
			
 
				+        "meta": {
			
 
				+            "chartId": 70,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-2673431d",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "CHART-85265a60": {
			
 
				+        "meta": {
			
 
				+            "chartId": 71,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-85265a60",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "CHART-2b87513c": {
			
 
				+        "meta": {
			
 
				+            "chartId": 72,
			
 
				+            "width": 6,
			
 
				+            "height": 50
			
 
				+        },
			
 
				+        "type": "CHART",
			
 
				+        "id": "CHART-2b87513c",
			
 
				+        "children": []
			
 
				+    },
			
 
				+    "GRID_ID": {
			
 
				+        "type": "GRID",
			
 
				+        "id": "GRID_ID",
			
 
				+        "children": [
			
 
				+            "ROW-a7b16cb5",
			
 
				+            "ROW-72c218a5",
			
 
				+            "ROW-957ba55b",
			
 
				+            "ROW-af041bdd"
			
 
				+        ]
			
 
				+    },
			
 
				+    "HEADER_ID": {
			
 
				+        "meta": {
			
 
				+            "text": "deck.gl Demo"
			
 
				+        },
			
 
				+        "type": "HEADER",
			
 
				+        "id": "HEADER_ID"
			
 
				+    },
			
 
				+    "ROOT_ID": {
			
 
				+        "type": "ROOT",
			
 
				+        "id": "ROOT_ID",
			
 
				+        "children": [
			
 
				+            "GRID_ID"
			
 
				+        ]
			
 
				+    },
			
 
				+    "ROW-72c218a5": {
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW",
			
 
				+        "id": "ROW-72c218a5",
			
 
				+        "children": [
			
 
				+            "CHART-d02f6c40",
			
 
				+            "CHART-201f7715"
			
 
				+        ]
			
 
				+    },
			
 
				+    "ROW-957ba55b": {
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW",
			
 
				+        "id": "ROW-957ba55b",
			
 
				+        "children": [
			
 
				+            "CHART-2673431d",
			
 
				+            "CHART-85265a60"
			
 
				+        ]
			
 
				+    },
			
 
				+    "ROW-a7b16cb5": {
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW",
			
 
				+        "id": "ROW-a7b16cb5",
			
 
				+        "children": [
			
 
				+            "CHART-3afd9d70",
			
 
				+            "CHART-2ee7fa5e"
			
 
				+        ]
			
 
				+    },
			
 
				+    "ROW-af041bdd": {
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW",
			
 
				+        "id": "ROW-af041bdd",
			
 
				+        "children": [
			
 
				+            "CHART-2b87513c"
			
 
				+        ]
			
 
				+    },
			
 
				+    "DASHBOARD_VERSION_KEY": "v2"
			
 
				+}"""
			
 
				+
			
 
				+
			
 
				+def load_deck_dash() -> None:
			
 
				+    print("Loading deck.gl dashboard")
			
 
				+    slices = []
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name="long_lat").first()
			
 
				+    slice_data = {
			
 
				+        "spatial": {"type": "latlong", "lonCol": "LON", "latCol": "LAT"},
			
 
				+        "color_picker": COLOR_RED,
			
 
				+        "datasource": "5__table",
			
 
				+        "granularity_sqla": None,
			
 
				+        "groupby": [],
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/light-v9",
			
 
				+        "multiplier": 10,
			
 
				+        "point_radius_fixed": {"type": "metric", "value": "count"},
			
 
				+        "point_unit": "square_m",
			
 
				+        "min_radius": 1,
			
 
				+        "max_radius": 250,
			
 
				+        "row_limit": 5000,
			
 
				+        "time_range": " : ",
			
 
				+        "size": "count",
			
 
				+        "time_grain_sqla": None,
			
 
				+        "viewport": {
			
 
				+            "bearing": -4.952916738791771,
			
 
				+            "latitude": 37.78926922909199,
			
 
				+            "longitude": -122.42613341901688,
			
 
				+            "pitch": 4.750411100577438,
			
 
				+            "zoom": 12.729132798697304,
			
 
				+        },
			
 
				+        "viz_type": "deck_scatter",
			
 
				+    }
			
 
				+
			
 
				+    print("Creating Scatterplot slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Scatterplot",
			
 
				+        viz_type="deck_scatter",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "point_unit": "square_m",
			
 
				+        "row_limit": 5000,
			
 
				+        "spatial": {"type": "latlong", "lonCol": "LON", "latCol": "LAT"},
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/dark-v9",
			
 
				+        "granularity_sqla": None,
			
 
				+        "size": "count",
			
 
				+        "viz_type": "deck_screengrid",
			
 
				+        "time_range": "No filter",
			
 
				+        "point_radius": "Auto",
			
 
				+        "color_picker": {"a": 1, "r": 14, "b": 0, "g": 255},
			
 
				+        "grid_size": 20,
			
 
				+        "viewport": {
			
 
				+            "zoom": 14.161641703941438,
			
 
				+            "longitude": -122.41827069521386,
			
 
				+            "bearing": -4.952916738791771,
			
 
				+            "latitude": 37.76024135844065,
			
 
				+            "pitch": 4.750411100577438,
			
 
				+        },
			
 
				+        "point_radius_fixed": {"type": "fix", "value": 2000},
			
 
				+        "datasource": "5__table",
			
 
				+        "time_grain_sqla": None,
			
 
				+        "groupby": [],
			
 
				+    }
			
 
				+    print("Creating Screen Grid slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Screen grid",
			
 
				+        viz_type="deck_screengrid",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "spatial": {"type": "latlong", "lonCol": "LON", "latCol": "LAT"},
			
 
				+        "row_limit": 5000,
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/streets-v9",
			
 
				+        "granularity_sqla": None,
			
 
				+        "size": "count",
			
 
				+        "viz_type": "deck_hex",
			
 
				+        "time_range": "No filter",
			
 
				+        "point_radius_unit": "Pixels",
			
 
				+        "point_radius": "Auto",
			
 
				+        "color_picker": {"a": 1, "r": 14, "b": 0, "g": 255},
			
 
				+        "grid_size": 40,
			
 
				+        "extruded": True,
			
 
				+        "viewport": {
			
 
				+            "latitude": 37.789795085160335,
			
 
				+            "pitch": 54.08961642447763,
			
 
				+            "zoom": 13.835465702403654,
			
 
				+            "longitude": -122.40632230075536,
			
 
				+            "bearing": -2.3984797349335167,
			
 
				+        },
			
 
				+        "point_radius_fixed": {"type": "fix", "value": 2000},
			
 
				+        "datasource": "5__table",
			
 
				+        "time_grain_sqla": None,
			
 
				+        "groupby": [],
			
 
				+    }
			
 
				+    print("Creating Hex slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Hexagons",
			
 
				+        viz_type="deck_hex",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "spatial": {"type": "latlong", "lonCol": "LON", "latCol": "LAT"},
			
 
				+        "row_limit": 5000,
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/satellite-streets-v9",
			
 
				+        "granularity_sqla": None,
			
 
				+        "size": "count",
			
 
				+        "viz_type": "deck_grid",
			
 
				+        "point_radius_unit": "Pixels",
			
 
				+        "point_radius": "Auto",
			
 
				+        "time_range": "No filter",
			
 
				+        "color_picker": {"a": 1, "r": 14, "b": 0, "g": 255},
			
 
				+        "grid_size": 120,
			
 
				+        "extruded": True,
			
 
				+        "viewport": {
			
 
				+            "longitude": -122.42066918995666,
			
 
				+            "bearing": 155.80099696026355,
			
 
				+            "zoom": 12.699690845482069,
			
 
				+            "latitude": 37.7942314882596,
			
 
				+            "pitch": 53.470800300695146,
			
 
				+        },
			
 
				+        "point_radius_fixed": {"type": "fix", "value": 2000},
			
 
				+        "datasource": "5__table",
			
 
				+        "time_grain_sqla": None,
			
 
				+        "groupby": [],
			
 
				+    }
			
 
				+    print("Creating Grid slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Grid",
			
 
				+        viz_type="deck_grid",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+
			
 
				+    polygon_tbl = (
			
 
				+        db.session.query(TBL).filter_by(table_name="sf_population_polygons").first()
			
 
				+    )
			
 
				+    slice_data = {
			
 
				+        "datasource": "11__table",
			
 
				+        "viz_type": "deck_polygon",
			
 
				+        "slice_id": 41,
			
 
				+        "granularity_sqla": None,
			
 
				+        "time_grain_sqla": None,
			
 
				+        "time_range": " : ",
			
 
				+        "line_column": "contour",
			
 
				+        "metric": {
			
 
				+            "aggregate": "SUM",
			
 
				+            "column": {
			
 
				+                "column_name": "population",
			
 
				+                "description": None,
			
 
				+                "expression": None,
			
 
				+                "filterable": True,
			
 
				+                "groupby": True,
			
 
				+                "id": 1332,
			
 
				+                "is_dttm": False,
			
 
				+                "optionName": "_col_population",
			
 
				+                "python_date_format": None,
			
 
				+                "type": "BIGINT",
			
 
				+                "verbose_name": None,
			
 
				+            },
			
 
				+            "expressionType": "SIMPLE",
			
 
				+            "hasCustomLabel": True,
			
 
				+            "label": "Population",
			
 
				+            "optionName": "metric_t2v4qbfiz1_w6qgpx4h2p",
			
 
				+            "sqlExpression": None,
			
 
				+        },
			
 
				+        "line_type": "json",
			
 
				+        "linear_color_scheme": "oranges",
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/light-v9",
			
 
				+        "viewport": {
			
 
				+            "longitude": -122.43388541747726,
			
 
				+            "latitude": 37.752020331384834,
			
 
				+            "zoom": 11.133995608594631,
			
 
				+            "bearing": 37.89506450385642,
			
 
				+            "pitch": 60,
			
 
				+            "width": 667,
			
 
				+            "height": 906,
			
 
				+            "altitude": 1.5,
			
 
				+            "maxZoom": 20,
			
 
				+            "minZoom": 0,
			
 
				+            "maxPitch": 60,
			
 
				+            "minPitch": 0,
			
 
				+            "maxLatitude": 85.05113,
			
 
				+            "minLatitude": -85.05113,
			
 
				+        },
			
 
				+        "reverse_long_lat": False,
			
 
				+        "fill_color_picker": {"r": 3, "g": 65, "b": 73, "a": 1},
			
 
				+        "stroke_color_picker": {"r": 0, "g": 122, "b": 135, "a": 1},
			
 
				+        "filled": True,
			
 
				+        "stroked": False,
			
 
				+        "extruded": True,
			
 
				+        "multiplier": 0.1,
			
 
				+        "point_radius_fixed": {
			
 
				+            "type": "metric",
			
 
				+            "value": {
			
 
				+                "aggregate": None,
			
 
				+                "column": None,
			
 
				+                "expressionType": "SQL",
			
 
				+                "hasCustomLabel": None,
			
 
				+                "label": "Density",
			
 
				+                "optionName": "metric_c5rvwrzoo86_293h6yrv2ic",
			
 
				+                "sqlExpression": "SUM(population)/SUM(area)",
			
 
				+            },
			
 
				+        },
			
 
				+        "js_columns": [],
			
 
				+        "js_data_mutator": "",
			
 
				+        "js_tooltip": "",
			
 
				+        "js_onclick_href": "",
			
 
				+        "legend_format": ".1s",
			
 
				+        "legend_position": "tr",
			
 
				+    }
			
 
				+
			
 
				+    print("Creating Polygon slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Polygons",
			
 
				+        viz_type="deck_polygon",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=polygon_tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "datasource": "10__table",
			
 
				+        "viz_type": "deck_arc",
			
 
				+        "slice_id": 42,
			
 
				+        "granularity_sqla": None,
			
 
				+        "time_grain_sqla": None,
			
 
				+        "time_range": " : ",
			
 
				+        "start_spatial": {
			
 
				+            "type": "latlong",
			
 
				+            "latCol": "LATITUDE",
			
 
				+            "lonCol": "LONGITUDE",
			
 
				+        },
			
 
				+        "end_spatial": {
			
 
				+            "type": "latlong",
			
 
				+            "latCol": "LATITUDE_DEST",
			
 
				+            "lonCol": "LONGITUDE_DEST",
			
 
				+        },
			
 
				+        "row_limit": 5000,
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/light-v9",
			
 
				+        "viewport": {
			
 
				+            "altitude": 1.5,
			
 
				+            "bearing": 8.546256357301871,
			
 
				+            "height": 642,
			
 
				+            "latitude": 44.596651438714254,
			
 
				+            "longitude": -91.84340711201104,
			
 
				+            "maxLatitude": 85.05113,
			
 
				+            "maxPitch": 60,
			
 
				+            "maxZoom": 20,
			
 
				+            "minLatitude": -85.05113,
			
 
				+            "minPitch": 0,
			
 
				+            "minZoom": 0,
			
 
				+            "pitch": 60,
			
 
				+            "width": 997,
			
 
				+            "zoom": 2.929837070560775,
			
 
				+        },
			
 
				+        "color_picker": {"r": 0, "g": 122, "b": 135, "a": 1},
			
 
				+        "stroke_width": 1,
			
 
				+    }
			
 
				+
			
 
				+    print("Creating Arc slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Arcs",
			
 
				+        viz_type="deck_arc",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=db.session.query(TBL).filter_by(table_name="flights").first().id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "datasource": "12__table",
			
 
				+        "slice_id": 43,
			
 
				+        "viz_type": "deck_path",
			
 
				+        "time_grain_sqla": None,
			
 
				+        "time_range": " : ",
			
 
				+        "line_column": "path_json",
			
 
				+        "line_type": "json",
			
 
				+        "row_limit": 5000,
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/light-v9",
			
 
				+        "viewport": {
			
 
				+            "longitude": -122.18885402582598,
			
 
				+            "latitude": 37.73671752604488,
			
 
				+            "zoom": 9.51847667620428,
			
 
				+            "bearing": 0,
			
 
				+            "pitch": 0,
			
 
				+            "width": 669,
			
 
				+            "height": 1094,
			
 
				+            "altitude": 1.5,
			
 
				+            "maxZoom": 20,
			
 
				+            "minZoom": 0,
			
 
				+            "maxPitch": 60,
			
 
				+            "minPitch": 0,
			
 
				+            "maxLatitude": 85.05113,
			
 
				+            "minLatitude": -85.05113,
			
 
				+        },
			
 
				+        "color_picker": {"r": 0, "g": 122, "b": 135, "a": 1},
			
 
				+        "line_width": 150,
			
 
				+        "reverse_long_lat": False,
			
 
				+        "js_columns": ["color"],
			
 
				+        "js_data_mutator": "data => data.map(d => ({\n"
			
 
				+        "    ...d,\n"
			
 
				+        "    color: colors.hexToRGB(d.extraProps.color)\n"
			
 
				+        "}));",
			
 
				+        "js_tooltip": "",
			
 
				+        "js_onclick_href": "",
			
 
				+    }
			
 
				+
			
 
				+    print("Creating Path slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Path",
			
 
				+        viz_type="deck_path",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=db.session.query(TBL)
			
 
				+        .filter_by(table_name="bart_lines")
			
 
				+        .first()
			
 
				+        .id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+    slices.append(slc)
			
 
				+    slug = "deck"
			
 
				+
			
 
				+    print("Creating a dashboard")
			
 
				+    title = "deck.gl Demo"
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug=slug).first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+    dash.published = True
			
 
				+    js = POSITION_JSON
			
 
				+    pos = json.loads(js)
			
 
				+    update_slice_ids(pos, slices)
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.dashboard_title = title
			
 
				+    dash.slug = slug
			
 
				+    dash.slices = slices
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    load_deck_dash()
			
--- a/data/purposeCombined/BI/examples/energy.py
+++ b/data/purposeCombined/BI/examples/energy.py
@@ -0,0 +1,141 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""Loads datasets, dashboards and slices in a new superset instance"""
			
 
				+import textwrap
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import Float, String
			
 
				+from sqlalchemy.sql import column
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.connectors.sqla.models import SqlMetric
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL
			
 
				+
			
 
				+
			
 
				+def load_energy(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    """Loads an energy related dataset to use with sankey and graphs"""
			
 
				+    tbl_name = "energy_usage"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("energy.json.gz")
			
 
				+        pdf = pd.read_json(data)
			
 
				+        pdf.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={"source": String(255), "target": String(255), "value": Float()},
			
 
				+            index=False,
			
 
				+        )
			
 
				+
			
 
				+    print("Creating table [wb_health_population] reference")
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not tbl:
			
 
				+        tbl = TBL(table_name=tbl_name)
			
 
				+    tbl.description = "Energy consumption"
			
 
				+    tbl.database = database
			
 
				+
			
 
				+    if not any(col.metric_name == "sum__value" for col in tbl.metrics):
			
 
				+        col = str(column("value").compile(db.engine))
			
 
				+        tbl.metrics.append(
			
 
				+            SqlMetric(metric_name="sum__value", expression=f"SUM({col})")
			
 
				+        )
			
 
				+
			
 
				+    db.session.merge(tbl)
			
 
				+    db.session.commit()
			
 
				+    tbl.fetch_metadata()
			
 
				+
			
 
				+    slc = Slice(
			
 
				+        slice_name="Energy Sankey",
			
 
				+        viz_type="sankey",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=textwrap.dedent(
			
 
				+            """\
			
 
				+        {
			
 
				+            "collapsed_fieldsets": "",
			
 
				+            "groupby": [
			
 
				+                "source",
			
 
				+                "target"
			
 
				+            ],
			
 
				+            "metric": "sum__value",
			
 
				+            "row_limit": "5000",
			
 
				+            "slice_name": "Energy Sankey",
			
 
				+            "viz_type": "sankey"
			
 
				+        }
			
 
				+        """
			
 
				+        ),
			
 
				+    )
			
 
				+    misc_dash_slices.add(slc.slice_name)
			
 
				+    merge_slice(slc)
			
 
				+
			
 
				+    slc = Slice(
			
 
				+        slice_name="Energy Force Layout",
			
 
				+        viz_type="directed_force",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=textwrap.dedent(
			
 
				+            """\
			
 
				+        {
			
 
				+            "charge": "-500",
			
 
				+            "collapsed_fieldsets": "",
			
 
				+            "groupby": [
			
 
				+                "source",
			
 
				+                "target"
			
 
				+            ],
			
 
				+            "link_length": "200",
			
 
				+            "metric": "sum__value",
			
 
				+            "row_limit": "5000",
			
 
				+            "slice_name": "Force",
			
 
				+            "viz_type": "directed_force"
			
 
				+        }
			
 
				+        """
			
 
				+        ),
			
 
				+    )
			
 
				+    misc_dash_slices.add(slc.slice_name)
			
 
				+    merge_slice(slc)
			
 
				+
			
 
				+    slc = Slice(
			
 
				+        slice_name="Heatmap",
			
 
				+        viz_type="heatmap",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=textwrap.dedent(
			
 
				+            """\
			
 
				+        {
			
 
				+            "all_columns_x": "source",
			
 
				+            "all_columns_y": "target",
			
 
				+            "canvas_image_rendering": "pixelated",
			
 
				+            "collapsed_fieldsets": "",
			
 
				+            "linear_color_scheme": "blue_white_yellow",
			
 
				+            "metric": "sum__value",
			
 
				+            "normalize_across": "heatmap",
			
 
				+            "slice_name": "Heatmap",
			
 
				+            "viz_type": "heatmap",
			
 
				+            "xscale_interval": "1",
			
 
				+            "yscale_interval": "1"
			
 
				+        }
			
 
				+        """
			
 
				+        ),
			
 
				+    )
			
 
				+    misc_dash_slices.add(slc.slice_name)
			
 
				+    merge_slice(slc)
			
--- a/data/purposeCombined/BI/examples/flights.py
+++ b/data/purposeCombined/BI/examples/flights.py
@@ -0,0 +1,68 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import DateTime
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import get_example_data, TBL
			
 
				+
			
 
				+
			
 
				+def load_flights(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    """Loading random time series data from a zip file in the repo"""
			
 
				+    tbl_name = "flights"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("flight_data.csv.gz", make_bytes=True)
			
 
				+        pdf = pd.read_csv(data, encoding="latin-1")
			
 
				+
			
 
				+        # Loading airports info to join and get lat/long
			
 
				+        airports_bytes = get_example_data("airports.csv.gz", make_bytes=True)
			
 
				+        airports = pd.read_csv(airports_bytes, encoding="latin-1")
			
 
				+        airports = airports.set_index("IATA_CODE")
			
 
				+
			
 
				+        pdf["ds"] = (
			
 
				+            pdf.YEAR.map(str) + "-0" + pdf.MONTH.map(str) + "-0" + pdf.DAY.map(str)
			
 
				+        )
			
 
				+        pdf.ds = pd.to_datetime(pdf.ds)
			
 
				+        del pdf["YEAR"]
			
 
				+        del pdf["MONTH"]
			
 
				+        del pdf["DAY"]
			
 
				+
			
 
				+        pdf = pdf.join(airports, on="ORIGIN_AIRPORT", rsuffix="_ORIG")
			
 
				+        pdf = pdf.join(airports, on="DESTINATION_AIRPORT", rsuffix="_DEST")
			
 
				+        pdf.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={"ds": DateTime},
			
 
				+            index=False,
			
 
				+        )
			
 
				+
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not tbl:
			
 
				+        tbl = TBL(table_name=tbl_name)
			
 
				+    tbl.description = "Random set of flights in the US"
			
 
				+    tbl.database = database
			
 
				+    db.session.merge(tbl)
			
 
				+    db.session.commit()
			
 
				+    tbl.fetch_metadata()
			
 
				+    print("Done loading table!")
			
--- a/data/purposeCombined/BI/examples/helpers-backup.py
+++ b/data/purposeCombined/BI/examples/helpers-backup.py
@@ -0,0 +1,78 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""Loads datasets, dashboards and slices in a new superset instance"""
			
 
				+import json
			
 
				+import os
			
 
				+import zlib
			
 
				+from io import BytesIO
			
 
				+from typing import Any, Dict, List, Set
			
 
				+from urllib import request
			
 
				+
			
 
				+from superset import app, db
			
 
				+from superset.connectors.connector_registry import ConnectorRegistry
			
 
				+from superset.models import core as models
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+BASE_URL = "https://github.com/apache-superset/examples-data/blob/master/"
			
 
				+
			
 
				+# Shortcuts
			
 
				+DB = models.Database
			
 
				+
			
 
				+TBL = ConnectorRegistry.sources["table"]
			
 
				+
			
 
				+config = app.config
			
 
				+
			
 
				+EXAMPLES_FOLDER = os.path.join(config["BASE_DIR"], "examples")
			
 
				+
			
 
				+misc_dash_slices: Set[str] = set()  # slices assembled in a 'Misc Chart' dashboard
			
 
				+
			
 
				+
			
 
				+def update_slice_ids(layout_dict: Dict[Any, Any], slices: List[Slice]) -> None:
			
 
				+    charts = [
			
 
				+        component
			
 
				+        for component in layout_dict.values()
			
 
				+        if isinstance(component, dict) and component["type"] == "CHART"
			
 
				+    ]
			
 
				+    sorted_charts = sorted(charts, key= k["meta"]["chartId"])
			
 
				+    for i, chart_component in enumerate(sorted_charts):
			
 
				+        if i < len(slices):
			
 
				+            chart_component["meta"]["chartId"] = int(slices[i].id)
			
 
				+
			
 
				+
			
 
				+def merge_slice(slc: Slice) -> None:
			
 
				+    o = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first()
			
 
				+    if o:
			
 
				+        db.session.delete(o)
			
 
				+    db.session.add(slc)
			
 
				+    db.session.commit()
			
 
				+
			
 
				+
			
 
				+def get_slice_json(defaults: Dict[Any, Any], **kwargs: Any) -> str:
			
 
				+    defaults_copy = defaults.copy()
			
 
				+    defaults_copy.update(kwargs)
			
 
				+    return json.dumps(defaults_copy, indent=4, sort_keys=True)
			
 
				+
			
 
				+
			
 
				+def get_example_data(
			
 
				+    filepath: str, is_gzip: bool = True, make_bytes: bool = False
			
 
				+) -> BytesIO:
			
 
				+    content = request.urlopen(f"{BASE_URL}{filepath}?raw=true").read()
			
 
				+    if is_gzip:
			
 
				+        content = zlib.decompress(content, zlib.MAX_WBITS | 16)
			
 
				+    if make_bytes:
			
 
				+        content = BytesIO(content)
			
 
				+    return content
			
--- a/data/purposeCombined/BI/examples/helpers.py
+++ b/data/purposeCombined/BI/examples/helpers.py
@@ -0,0 +1,78 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""Loads datasets, dashboards and slices in a new superset instance"""
			
 
				+import json
			
 
				+import os
			
 
				+import zlib
			
 
				+from io import BytesIO
			
 
				+from typing import Any, Dict, List, Set
			
 
				+from urllib import request
			
 
				+
			
 
				+from superset import app, db
			
 
				+from superset.connectors.connector_registry import ConnectorRegistry
			
 
				+from superset.models import core as models
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+BASE_URL = "https://github.com/apache-superset/examples-data/blob/master/"
			
 
				+
			
 
				+# Shortcuts
			
 
				+DB = models.Database
			
 
				+
			
 
				+TBL = ConnectorRegistry.sources["table"]
			
 
				+
			
 
				+config = app.config
			
 
				+
			
 
				+EXAMPLES_FOLDER = os.path.join(config["BASE_DIR"], "examples")
			
 
				+
			
 
				+misc_dash_slices: Set[str] = set()  # slices assembled in a 'Misc Chart' dashboard
			
 
				+
			
 
				+
			
 
				+def update_slice_ids(layout_dict: Dict[Any, Any], slices: List[Slice]) -> None:
			
 
				+    charts = [
			
 
				+        component
			
 
				+        for component in layout_dict.values()
			
 
				+        if isinstance(component, dict) and component["type"] == "CHART"
			
 
				+    ]
			
 
				+    sorted_charts = sorted(charts, key=lambda k: k["meta"]["chartId"])
			
 
				+    for i, chart_component in enumerate(sorted_charts):
			
 
				+        if i < len(slices):
			
 
				+            chart_component["meta"]["chartId"] = int(slices[i].id)
			
 
				+
			
 
				+
			
 
				+def merge_slice(slc: Slice) -> None:
			
 
				+    o = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first()
			
 
				+    if o:
			
 
				+        db.session.delete(o)
			
 
				+    db.session.add(slc)
			
 
				+    db.session.commit()
			
 
				+
			
 
				+
			
 
				+def get_slice_json(defaults: Dict[Any, Any], **kwargs: Any) -> str:
			
 
				+    defaults_copy = defaults.copy()
			
 
				+    defaults_copy.update(kwargs)
			
 
				+    return json.dumps(defaults_copy, indent=4, sort_keys=True)
			
 
				+
			
 
				+
			
 
				+def get_example_data(
			
 
				+    filepath: str, is_gzip: bool = True, make_bytes: bool = False
			
 
				+) -> BytesIO:
			
 
				+    content = request.urlopen(f"{BASE_URL}{filepath}?raw=true").read()
			
 
				+    if is_gzip:
			
 
				+        content = zlib.decompress(content, zlib.MAX_WBITS | 16)
			
 
				+    if make_bytes:
			
 
				+        content = BytesIO(content)
			
 
				+    return content
			
--- a/data/purposeCombined/BI/examples/long_lat.py
+++ b/data/purposeCombined/BI/examples/long_lat.py
@@ -0,0 +1,116 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import datetime
			
 
				+import random
			
 
				+
			
 
				+import geohash
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import DateTime, Float, String
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import (
			
 
				+    get_example_data,
			
 
				+    get_slice_json,
			
 
				+    merge_slice,
			
 
				+    misc_dash_slices,
			
 
				+    TBL,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def load_long_lat_data(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    """Loading lat/long data from a csv file in the repo"""
			
 
				+    tbl_name = "long_lat"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("san_francisco.csv.gz", make_bytes=True)
			
 
				+        pdf = pd.read_csv(data, encoding="utf-8")
			
 
				+        start = datetime.datetime.now().replace(
			
 
				+            hour=0, minute=0, second=0, microsecond=0
			
 
				+        )
			
 
				+        pdf["datetime"] = [
			
 
				+            start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1))
			
 
				+            for i in range(len(pdf))
			
 
				+        ]
			
 
				+        pdf["occupancy"] = [random.randint(1, 6) for _ in range(len(pdf))]
			
 
				+        pdf["radius_miles"] = [random.uniform(1, 3) for _ in range(len(pdf))]
			
 
				+        pdf["geohash"] = pdf[["LAT", "LON"]].apply(lambda x: geohash.encode(*x), axis=1)
			
 
				+        pdf["delimited"] = pdf["LAT"].map(str).str.cat(pdf["LON"].map(str), sep=",")
			
 
				+        pdf.to_sql(  # pylint: disable=no-member
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "longitude": Float(),
			
 
				+                "latitude": Float(),
			
 
				+                "number": Float(),
			
 
				+                "street": String(100),
			
 
				+                "unit": String(10),
			
 
				+                "city": String(50),
			
 
				+                "district": String(50),
			
 
				+                "region": String(50),
			
 
				+                "postcode": Float(),
			
 
				+                "id": String(100),
			
 
				+                "datetime": DateTime(),
			
 
				+                "occupancy": Float(),
			
 
				+                "radius_miles": Float(),
			
 
				+                "geohash": String(12),
			
 
				+                "delimited": String(60),
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+        print("Done loading table!")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+    print("Creating table reference")
			
 
				+    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not obj:
			
 
				+        obj = TBL(table_name=tbl_name)
			
 
				+    obj.main_dttm_col = "datetime"
			
 
				+    obj.database = database
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
 
				+    obj.fetch_metadata()
			
 
				+    tbl = obj
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "granularity_sqla": "day",
			
 
				+        "since": "2014-01-01",
			
 
				+        "until": "now",
			
 
				+        "viz_type": "mapbox",
			
 
				+        "all_columns_x": "LON",
			
 
				+        "all_columns_y": "LAT",
			
 
				+        "mapbox_style": "mapbox://styles/mapbox/light-v9",
			
 
				+        "all_columns": ["occupancy"],
			
 
				+        "row_limit": 500000,
			
 
				+    }
			
 
				+
			
 
				+    print("Creating a slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Mapbox Long/Lat",
			
 
				+        viz_type="mapbox",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    misc_dash_slices.add(slc.slice_name)
			
 
				+    merge_slice(slc)
			
--- a/data/purposeCombined/BI/examples/misc_dashboard-backup.py
+++ b/data/purposeCombined/BI/examples/misc_dashboard-backup.py
@@ -0,0 +1,224 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+import textwrap
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+from .helpers import misc_dash_slices, update_slice_ids
			
 
				+
			
 
				+DASH_SLUG = "misc_charts"
			
 
				+
			
 
				+
			
 
				+def load_misc_dashboard() -> None:
			
 
				+    """Loading a dashboard featuring misc charts"""
			
 
				+
			
 
				+    print("Creating the dashboard")
			
 
				+    db.session.expunge_all()
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug=DASH_SLUG).first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+    js = textwrap.dedent(
			
 
				+        """\
			
 
				+{
			
 
				+    "CHART-BkeVbh8ANQ": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-BkeVbh8ANQ",
			
 
				+        "meta": {
			
 
				+            "chartId": 4004,
			
 
				+            "height": 34,
			
 
				+            "sliceName": "Multi Line",
			
 
				+            "width": 8
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-H1HYNzEANX": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-H1HYNzEANX",
			
 
				+        "meta": {
			
 
				+            "chartId": 3940,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Energy Sankey",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-HJOYVMV0E7": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-HJOYVMV0E7",
			
 
				+        "meta": {
			
 
				+            "chartId": 3969,
			
 
				+            "height": 63,
			
 
				+            "sliceName": "Mapbox Long/Lat",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-S1WYNz4AVX": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-S1WYNz4AVX",
			
 
				+        "meta": {
			
 
				+            "chartId": 3989,
			
 
				+            "height": 25,
			
 
				+            "sliceName": "Parallel Coordinates",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-r19KVMNCE7": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-r19KVMNCE7",
			
 
				+        "meta": {
			
 
				+            "chartId": 3971,
			
 
				+            "height": 34,
			
 
				+            "sliceName": "Calendar Heatmap multiformat 0",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-rJ4K4GV04Q": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-rJ4K4GV04Q",
			
 
				+        "meta": {
			
 
				+            "chartId": 3941,
			
 
				+            "height": 63,
			
 
				+            "sliceName": "Energy Force Layout",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-rkgF4G4A4X": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-rkgF4G4A4X",
			
 
				+        "meta": {
			
 
				+            "chartId": 3970,
			
 
				+            "height": 25,
			
 
				+            "sliceName": "Birth in France by department in 2016",
			
 
				+            "width": 8
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-rywK4GVR4X": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-rywK4GVR4X",
			
 
				+        "meta": {
			
 
				+            "chartId": 3942,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Heatmap",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "COLUMN-ByUFVf40EQ": {
			
 
				+        "children": [
			
 
				+            "CHART-rywK4GVR4X",
			
 
				+            "CHART-HJOYVMV0E7"
			
 
				+        ],
			
 
				+        "id": "COLUMN-ByUFVf40EQ",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "COLUMN"
			
 
				+    },
			
 
				+    "COLUMN-rkmYVGN04Q": {
			
 
				+        "children": [
			
 
				+            "CHART-rJ4K4GV04Q",
			
 
				+            "CHART-H1HYNzEANX"
			
 
				+        ],
			
 
				+        "id": "COLUMN-rkmYVGN04Q",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "COLUMN"
			
 
				+    },
			
 
				+    "GRID_ID": {
			
 
				+        "children": [
			
 
				+            "ROW-SytNzNA4X",
			
 
				+            "ROW-S1MK4M4A4X",
			
 
				+            "ROW-HkFFEzVRVm"
			
 
				+        ],
			
 
				+        "id": "GRID_ID",
			
 
				+        "type": "GRID"
			
 
				+    },
			
 
				+    "HEADER_ID": {
			
 
				+        "id": "HEADER_ID",
			
 
				+        "meta": {
			
 
				+            "text": "Misc Charts"
			
 
				+        },
			
 
				+        "type": "HEADER"
			
 
				+    },
			
 
				+    "ROOT_ID": {
			
 
				+        "children": [
			
 
				+            "GRID_ID"
			
 
				+        ],
			
 
				+        "id": "ROOT_ID",
			
 
				+        "type": "ROOT"
			
 
				+    },
			
 
				+    "ROW-HkFFEzVRVm": {
			
 
				+        "children": [
			
 
				+            "CHART-r19KVMNCE7",
			
 
				+            "CHART-BkeVbh8ANQ"
			
 
				+        ],
			
 
				+        "id": "ROW-HkFFEzVRVm",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-S1MK4M4A4X": {
			
 
				+        "children": [
			
 
				+            "COLUMN-rkmYVGN04Q",
			
 
				+            "COLUMN-ByUFVf40EQ"
			
 
				+        ],
			
 
				+        "id": "ROW-S1MK4M4A4X",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-SytNzNA4X": {
			
 
				+        "children": [
			
 
				+            "CHART-rkgF4G4A4X",
			
 
				+            "CHART-S1WYNz4AVX"
			
 
				+        ],
			
 
				+        "id": "ROW-SytNzNA4X",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "DASHBOARD_VERSION_KEY": "v2"
			
 
				+}
			
 
				+    """
			
 
				+    )
			
 
				+    pos = json.loads(js)
			
 
				+    slices = (
			
 
				+        db.session.query(Slice).filter(Slice.slice_name.in_(misc_dash_slices)).all()
			
 
				+    )
			
 
				+    slices = sorted(slices, key= x.id)
			
 
				+    update_slice_ids(pos, slices)
			
 
				+    dash.dashboard_title = "Misc Charts"
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slug = DASH_SLUG
			
 
				+    dash.slices = slices
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/misc_dashboard.py
+++ b/data/purposeCombined/BI/examples/misc_dashboard.py
@@ -0,0 +1,224 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+import textwrap
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+from .helpers import misc_dash_slices, update_slice_ids
			
 
				+
			
 
				+DASH_SLUG = "misc_charts"
			
 
				+
			
 
				+
			
 
				+def load_misc_dashboard() -> None:
			
 
				+    """Loading a dashboard featuring misc charts"""
			
 
				+
			
 
				+    print("Creating the dashboard")
			
 
				+    db.session.expunge_all()
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug=DASH_SLUG).first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+    js = textwrap.dedent(
			
 
				+        """\
			
 
				+{
			
 
				+    "CHART-BkeVbh8ANQ": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-BkeVbh8ANQ",
			
 
				+        "meta": {
			
 
				+            "chartId": 4004,
			
 
				+            "height": 34,
			
 
				+            "sliceName": "Multi Line",
			
 
				+            "width": 8
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-H1HYNzEANX": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-H1HYNzEANX",
			
 
				+        "meta": {
			
 
				+            "chartId": 3940,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Energy Sankey",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-HJOYVMV0E7": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-HJOYVMV0E7",
			
 
				+        "meta": {
			
 
				+            "chartId": 3969,
			
 
				+            "height": 63,
			
 
				+            "sliceName": "Mapbox Long/Lat",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-S1WYNz4AVX": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-S1WYNz4AVX",
			
 
				+        "meta": {
			
 
				+            "chartId": 3989,
			
 
				+            "height": 25,
			
 
				+            "sliceName": "Parallel Coordinates",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-r19KVMNCE7": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-r19KVMNCE7",
			
 
				+        "meta": {
			
 
				+            "chartId": 3971,
			
 
				+            "height": 34,
			
 
				+            "sliceName": "Calendar Heatmap multiformat 0",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-rJ4K4GV04Q": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-rJ4K4GV04Q",
			
 
				+        "meta": {
			
 
				+            "chartId": 3941,
			
 
				+            "height": 63,
			
 
				+            "sliceName": "Energy Force Layout",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-rkgF4G4A4X": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-rkgF4G4A4X",
			
 
				+        "meta": {
			
 
				+            "chartId": 3970,
			
 
				+            "height": 25,
			
 
				+            "sliceName": "Birth in France by department in 2016",
			
 
				+            "width": 8
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-rywK4GVR4X": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-rywK4GVR4X",
			
 
				+        "meta": {
			
 
				+            "chartId": 3942,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Heatmap",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "COLUMN-ByUFVf40EQ": {
			
 
				+        "children": [
			
 
				+            "CHART-rywK4GVR4X",
			
 
				+            "CHART-HJOYVMV0E7"
			
 
				+        ],
			
 
				+        "id": "COLUMN-ByUFVf40EQ",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "COLUMN"
			
 
				+    },
			
 
				+    "COLUMN-rkmYVGN04Q": {
			
 
				+        "children": [
			
 
				+            "CHART-rJ4K4GV04Q",
			
 
				+            "CHART-H1HYNzEANX"
			
 
				+        ],
			
 
				+        "id": "COLUMN-rkmYVGN04Q",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "COLUMN"
			
 
				+    },
			
 
				+    "GRID_ID": {
			
 
				+        "children": [
			
 
				+            "ROW-SytNzNA4X",
			
 
				+            "ROW-S1MK4M4A4X",
			
 
				+            "ROW-HkFFEzVRVm"
			
 
				+        ],
			
 
				+        "id": "GRID_ID",
			
 
				+        "type": "GRID"
			
 
				+    },
			
 
				+    "HEADER_ID": {
			
 
				+        "id": "HEADER_ID",
			
 
				+        "meta": {
			
 
				+            "text": "Misc Charts"
			
 
				+        },
			
 
				+        "type": "HEADER"
			
 
				+    },
			
 
				+    "ROOT_ID": {
			
 
				+        "children": [
			
 
				+            "GRID_ID"
			
 
				+        ],
			
 
				+        "id": "ROOT_ID",
			
 
				+        "type": "ROOT"
			
 
				+    },
			
 
				+    "ROW-HkFFEzVRVm": {
			
 
				+        "children": [
			
 
				+            "CHART-r19KVMNCE7",
			
 
				+            "CHART-BkeVbh8ANQ"
			
 
				+        ],
			
 
				+        "id": "ROW-HkFFEzVRVm",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-S1MK4M4A4X": {
			
 
				+        "children": [
			
 
				+            "COLUMN-rkmYVGN04Q",
			
 
				+            "COLUMN-ByUFVf40EQ"
			
 
				+        ],
			
 
				+        "id": "ROW-S1MK4M4A4X",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-SytNzNA4X": {
			
 
				+        "children": [
			
 
				+            "CHART-rkgF4G4A4X",
			
 
				+            "CHART-S1WYNz4AVX"
			
 
				+        ],
			
 
				+        "id": "ROW-SytNzNA4X",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "DASHBOARD_VERSION_KEY": "v2"
			
 
				+}
			
 
				+    """
			
 
				+    )
			
 
				+    pos = json.loads(js)
			
 
				+    slices = (
			
 
				+        db.session.query(Slice).filter(Slice.slice_name.in_(misc_dash_slices)).all()
			
 
				+    )
			
 
				+    slices = sorted(slices, key=lambda x: x.id)
			
 
				+    update_slice_ids(pos, slices)
			
 
				+    dash.dashboard_title = "Misc Charts"
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slug = DASH_SLUG
			
 
				+    dash.slices = slices
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/multi_line.py
+++ b/data/purposeCombined/BI/examples/multi_line.py
@@ -0,0 +1,58 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+from .birth_names import load_birth_names
			
 
				+from .helpers import merge_slice, misc_dash_slices
			
 
				+from .world_bank import load_world_bank_health_n_pop
			
 
				+
			
 
				+
			
 
				+def load_multi_line(only_metadata: bool = False) -> None:
			
 
				+    load_world_bank_health_n_pop(only_metadata)
			
 
				+    load_birth_names(only_metadata)
			
 
				+    ids = [
			
 
				+        row.id
			
 
				+        for row in db.session.query(Slice).filter(
			
 
				+            Slice.slice_name.in_(["Growth Rate", "Trends"])
			
 
				+        )
			
 
				+    ]
			
 
				+
			
 
				+    slc = Slice(
			
 
				+        datasource_type="table",  # not true, but needed
			
 
				+        datasource_id=1,  # cannot be empty
			
 
				+        slice_name="Multi Line",
			
 
				+        viz_type="line_multi",
			
 
				+        params=json.dumps(
			
 
				+            {
			
 
				+                "slice_name": "Multi Line",
			
 
				+                "viz_type": "line_multi",
			
 
				+                "line_charts": [ids[0]],
			
 
				+                "line_charts_2": [ids[1]],
			
 
				+                "since": "1970",
			
 
				+                "until": "1995",
			
 
				+                "prefix_metric_with_slice_name": True,
			
 
				+                "show_legend": False,
			
 
				+                "x_axis_format": "%Y",
			
 
				+            }
			
 
				+        ),
			
 
				+    )
			
 
				+
			
 
				+    misc_dash_slices.add(slc.slice_name)
			
 
				+    merge_slice(slc)
			
--- a/data/purposeCombined/BI/examples/multiformat_time_series.py
+++ b/data/purposeCombined/BI/examples/multiformat_time_series.py
@@ -0,0 +1,117 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+from typing import Dict, Optional, Tuple
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import BigInteger, Date, DateTime, String
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils.core import get_example_database
			
 
				+
			
 
				+from .helpers import (
			
 
				+    config,
			
 
				+    get_example_data,
			
 
				+    get_slice_json,
			
 
				+    merge_slice,
			
 
				+    misc_dash_slices,
			
 
				+    TBL,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def load_multiformat_time_series(
			
 
				+    only_metadata: bool = False, force: bool = False
			
 
				+) -> None:
			
 
				+    """Loading time series data from a zip file in the repo"""
			
 
				+    tbl_name = "multiformat_time_series"
			
 
				+    database = get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("multiformat_time_series.json.gz")
			
 
				+        pdf = pd.read_json(data)
			
 
				+
			
 
				+        pdf.ds = pd.to_datetime(pdf.ds, unit="s")
			
 
				+        pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
			
 
				+        pdf.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "ds": Date,
			
 
				+                "ds2": DateTime,
			
 
				+                "epoch_s": BigInteger,
			
 
				+                "epoch_ms": BigInteger,
			
 
				+                "string0": String(100),
			
 
				+                "string1": String(100),
			
 
				+                "string2": String(100),
			
 
				+                "string3": String(100),
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+        print("Done loading table!")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+    print(f"Creating table [{tbl_name}] reference")
			
 
				+    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not obj:
			
 
				+        obj = TBL(table_name=tbl_name)
			
 
				+    obj.main_dttm_col = "ds"
			
 
				+    obj.database = database
			
 
				+    dttm_and_expr_dict: Dict[str, Tuple[Optional[str], None]] = {
			
 
				+        "ds": (None, None),
			
 
				+        "ds2": (None, None),
			
 
				+        "epoch_s": ("epoch_s", None),
			
 
				+        "epoch_ms": ("epoch_ms", None),
			
 
				+        "string2": ("%Y%m%d-%H%M%S", None),
			
 
				+        "string1": ("%Y-%m-%d^%H:%M:%S", None),
			
 
				+        "string0": ("%Y-%m-%d %H:%M:%S.%f", None),
			
 
				+        "string3": ("%Y/%m/%d%H:%M:%S.%f", None),
			
 
				+    }
			
 
				+    for col in obj.columns:
			
 
				+        dttm_and_expr = dttm_and_expr_dict[col.column_name]
			
 
				+        col.python_date_format = dttm_and_expr[0]
			
 
				+        col.dbatabase_expr = dttm_and_expr[1]
			
 
				+        col.is_dttm = True
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
 
				+    obj.fetch_metadata()
			
 
				+    tbl = obj
			
 
				+
			
 
				+    print("Creating Heatmap charts")
			
 
				+    for i, col in enumerate(tbl.columns):
			
 
				+        slice_data = {
			
 
				+            "metrics": ["count"],
			
 
				+            "granularity_sqla": col.column_name,
			
 
				+            "row_limit": config["ROW_LIMIT"],
			
 
				+            "since": "2015",
			
 
				+            "until": "2016",
			
 
				+            "viz_type": "cal_heatmap",
			
 
				+            "domain_granularity": "month",
			
 
				+            "subdomain_granularity": "day",
			
 
				+        }
			
 
				+
			
 
				+        slc = Slice(
			
 
				+            slice_name=f"Calendar Heatmap multiformat {i}",
			
 
				+            viz_type="cal_heatmap",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(slice_data),
			
 
				+        )
			
 
				+        merge_slice(slc)
			
 
				+    misc_dash_slices.add("Calendar Heatmap multiformat 0")
			
--- a/data/purposeCombined/BI/examples/paris.py
+++ b/data/purposeCombined/BI/examples/paris.py
@@ -0,0 +1,60 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import String, Text
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import get_example_data, TBL
			
 
				+
			
 
				+
			
 
				+def load_paris_iris_geojson(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    tbl_name = "paris_iris_mapping"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("paris_iris.json.gz")
			
 
				+        df = pd.read_json(data)
			
 
				+        df["features"] = df.features.map(json.dumps)
			
 
				+
			
 
				+        df.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "color": String(255),
			
 
				+                "name": String(255),
			
 
				+                "features": Text,
			
 
				+                "type": Text,
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+
			
 
				+    print("Creating table {} reference".format(tbl_name))
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not tbl:
			
 
				+        tbl = TBL(table_name=tbl_name)
			
 
				+    tbl.description = "Map of Paris"
			
 
				+    tbl.database = database
			
 
				+    db.session.merge(tbl)
			
 
				+    db.session.commit()
			
 
				+    tbl.fetch_metadata()
			
--- a/data/purposeCombined/BI/examples/random_time_series.py
+++ b/data/purposeCombined/BI/examples/random_time_series.py
@@ -0,0 +1,81 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import DateTime
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import config, get_example_data, get_slice_json, merge_slice, TBL
			
 
				+
			
 
				+
			
 
				+def load_random_time_series_data(
			
 
				+    only_metadata: bool = False, force: bool = False
			
 
				+) -> None:
			
 
				+    """Loading random time series data from a zip file in the repo"""
			
 
				+    tbl_name = "random_time_series"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("random_time_series.json.gz")
			
 
				+        pdf = pd.read_json(data)
			
 
				+        pdf.ds = pd.to_datetime(pdf.ds, unit="s")
			
 
				+        pdf.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={"ds": DateTime},
			
 
				+            index=False,
			
 
				+        )
			
 
				+        print("Done loading table!")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+    print(f"Creating table [{tbl_name}] reference")
			
 
				+    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not obj:
			
 
				+        obj = TBL(table_name=tbl_name)
			
 
				+    obj.main_dttm_col = "ds"
			
 
				+    obj.database = database
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
 
				+    obj.fetch_metadata()
			
 
				+    tbl = obj
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "granularity_sqla": "day",
			
 
				+        "row_limit": config["ROW_LIMIT"],
			
 
				+        "since": "2019-01-01",
			
 
				+        "until": "2019-02-01",
			
 
				+        "metric": "count",
			
 
				+        "viz_type": "cal_heatmap",
			
 
				+        "domain_granularity": "month",
			
 
				+        "subdomain_granularity": "day",
			
 
				+    }
			
 
				+
			
 
				+    print("Creating a slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Calendar Heatmap",
			
 
				+        viz_type="cal_heatmap",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
--- a/data/purposeCombined/BI/examples/sf_population_polygons.py
+++ b/data/purposeCombined/BI/examples/sf_population_polygons.py
@@ -0,0 +1,62 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import json
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import BigInteger, Float, Text
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import get_example_data, TBL
			
 
				+
			
 
				+
			
 
				+def load_sf_population_polygons(
			
 
				+    only_metadata: bool = False, force: bool = False
			
 
				+) -> None:
			
 
				+    tbl_name = "sf_population_polygons"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("sf_population.json.gz")
			
 
				+        df = pd.read_json(data)
			
 
				+        df["contour"] = df.contour.map(json.dumps)
			
 
				+
			
 
				+        df.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "zipcode": BigInteger,
			
 
				+                "population": BigInteger,
			
 
				+                "contour": Text,
			
 
				+                "area": Float,
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+
			
 
				+    print("Creating table {} reference".format(tbl_name))
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not tbl:
			
 
				+        tbl = TBL(table_name=tbl_name)
			
 
				+    tbl.description = "Population density of San Francisco"
			
 
				+    tbl.database = database
			
 
				+    db.session.merge(tbl)
			
 
				+    db.session.commit()
			
 
				+    tbl.fetch_metadata()
			
--- a/data/purposeCombined/BI/examples/tabbed_dashboard-backup.py
+++ b/data/purposeCombined/BI/examples/tabbed_dashboard-backup.py
@@ -0,0 +1,342 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""Loads datasets, dashboards and slices in a new superset instance"""
			
 
				+import json
			
 
				+import textwrap
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+from .helpers import update_slice_ids
			
 
				+
			
 
				+
			
 
				+def load_tabbed_dashboard(_: bool = False) -> None:
			
 
				+    """Creating a tabbed dashboard"""
			
 
				+
			
 
				+    print("Creating a dashboard with nested tabs")
			
 
				+    slug = "tabbed_dash"
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug=slug).first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+
			
 
				+    # reuse charts in "World's Bank Data and create
			
 
				+    # new dashboard with nested tabs
			
 
				+    tabbed_dash_slices = set()
			
 
				+    tabbed_dash_slices.add("Region Filter")
			
 
				+    tabbed_dash_slices.add("Growth Rate")
			
 
				+    tabbed_dash_slices.add("Treemap")
			
 
				+    tabbed_dash_slices.add("Box plot")
			
 
				+
			
 
				+    js = textwrap.dedent(
			
 
				+        """\
			
 
				+    {
			
 
				+      "CHART-c0EjR-OZ0n": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-c0EjR-OZ0n",
			
 
				+        "meta": {
			
 
				+          "chartId": 870,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "Box plot",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "ROW-7G2o5uDvfo"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "CHART-dxV7Il74hH": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-dxV7Il74hH",
			
 
				+        "meta": {
			
 
				+          "chartId": 797,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "Treemap",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS",
			
 
				+          "TABS-afnrUvdxYF",
			
 
				+          "TAB-jNNd4WWar1",
			
 
				+          "ROW-7ygtDczaQ"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "CHART-jJ5Yj1Ptaz": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-jJ5Yj1Ptaz",
			
 
				+        "meta": {
			
 
				+          "chartId": 789,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "World's Population",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-z81Q87PD7",
			
 
				+          "ROW-G73z9PIHn"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "CHART-z4gmEuCqQ5": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-z4gmEuCqQ5",
			
 
				+        "meta": {
			
 
				+          "chartId": 788,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "Region Filter",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-EcNm_wh922",
			
 
				+          "ROW-LCjsdSetJ"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "DASHBOARD_VERSION_KEY": "v2",
			
 
				+      "GRID_ID": {
			
 
				+        "children": [],
			
 
				+        "id": "GRID_ID",
			
 
				+        "type": "GRID"
			
 
				+      },
			
 
				+      "HEADER_ID": {
			
 
				+        "id": "HEADER_ID",
			
 
				+        "meta": {
			
 
				+          "text": "Tabbed Dashboard"
			
 
				+        },
			
 
				+        "type": "HEADER"
			
 
				+      },
			
 
				+      "ROOT_ID": {
			
 
				+        "children": [
			
 
				+          "TABS-lV0r00f4H1"
			
 
				+        ],
			
 
				+        "id": "ROOT_ID",
			
 
				+        "type": "ROOT"
			
 
				+      },
			
 
				+      "ROW-7G2o5uDvfo": {
			
 
				+        "children": [
			
 
				+          "CHART-c0EjR-OZ0n"
			
 
				+        ],
			
 
				+        "id": "ROW-7G2o5uDvfo",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "ROW-7ygtDczaQ": {
			
 
				+        "children": [
			
 
				+          "CHART-dxV7Il74hH"
			
 
				+        ],
			
 
				+        "id": "ROW-7ygtDczaQ",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS",
			
 
				+          "TABS-afnrUvdxYF",
			
 
				+          "TAB-jNNd4WWar1"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "ROW-G73z9PIHn": {
			
 
				+        "children": [
			
 
				+          "CHART-jJ5Yj1Ptaz"
			
 
				+        ],
			
 
				+        "id": "ROW-G73z9PIHn",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-z81Q87PD7"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "ROW-LCjsdSetJ": {
			
 
				+        "children": [
			
 
				+          "CHART-z4gmEuCqQ5"
			
 
				+        ],
			
 
				+        "id": "ROW-LCjsdSetJ",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-EcNm_wh922"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "TAB-EcNm_wh922": {
			
 
				+        "children": [
			
 
				+          "ROW-LCjsdSetJ"
			
 
				+        ],
			
 
				+        "id": "TAB-EcNm_wh922",
			
 
				+        "meta": {
			
 
				+          "text": "row tab 1"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-NF3dlrWGS": {
			
 
				+        "children": [
			
 
				+          "ROW-7G2o5uDvfo",
			
 
				+          "TABS-CSjo6VfNrj"
			
 
				+        ],
			
 
				+        "id": "TAB-NF3dlrWGS",
			
 
				+        "meta": {
			
 
				+          "text": "Tab A"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-gcQJxApOZS": {
			
 
				+        "children": [
			
 
				+          "TABS-afnrUvdxYF"
			
 
				+        ],
			
 
				+        "id": "TAB-gcQJxApOZS",
			
 
				+        "meta": {
			
 
				+          "text": "Tab B"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-jNNd4WWar1": {
			
 
				+        "children": [
			
 
				+          "ROW-7ygtDczaQ"
			
 
				+        ],
			
 
				+        "id": "TAB-jNNd4WWar1",
			
 
				+        "meta": {
			
 
				+          "text": "New Tab"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS",
			
 
				+          "TABS-afnrUvdxYF"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-z81Q87PD7": {
			
 
				+        "children": [
			
 
				+          "ROW-G73z9PIHn"
			
 
				+        ],
			
 
				+        "id": "TAB-z81Q87PD7",
			
 
				+        "meta": {
			
 
				+          "text": "row tab 2"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TABS-CSjo6VfNrj": {
			
 
				+        "children": [
			
 
				+          "TAB-EcNm_wh922",
			
 
				+          "TAB-z81Q87PD7"
			
 
				+        ],
			
 
				+        "id": "TABS-CSjo6VfNrj",
			
 
				+        "meta": {},
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS"
			
 
				+        ],
			
 
				+        "type": "TABS"
			
 
				+      },
			
 
				+      "TABS-afnrUvdxYF": {
			
 
				+        "children": [
			
 
				+          "TAB-jNNd4WWar1"
			
 
				+        ],
			
 
				+        "id": "TABS-afnrUvdxYF",
			
 
				+        "meta": {},
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS"
			
 
				+        ],
			
 
				+        "type": "TABS"
			
 
				+      },
			
 
				+      "TABS-lV0r00f4H1": {
			
 
				+        "children": [
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TAB-gcQJxApOZS"
			
 
				+        ],
			
 
				+        "id": "TABS-lV0r00f4H1",
			
 
				+        "meta": {},
			
 
				+        "parents": [
			
 
				+          "ROOT_ID"
			
 
				+        ],
			
 
				+        "type": "TABS"
			
 
				+      }
			
 
				+    }
			
 
				+        """
			
 
				+    )
			
 
				+    pos = json.loads(js)
			
 
				+    slices = [
			
 
				+        db.session.query(Slice).filter_by(slice_name=name).first()
			
 
				+        for name in tabbed_dash_slices
			
 
				+    ]
			
 
				+
			
 
				+    slices = sorted(slices, key= x.id)
			
 
				+    update_slice_ids(pos, slices)
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slices = slices
			
 
				+    dash.dashboard_title = "Tabbed Dashboard"
			
 
				+    dash.slug = slug
			
 
				+
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/tabbed_dashboard.py
+++ b/data/purposeCombined/BI/examples/tabbed_dashboard.py
@@ -0,0 +1,342 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""Loads datasets, dashboards and slices in a new superset instance"""
			
 
				+import json
			
 
				+import textwrap
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+
			
 
				+from .helpers import update_slice_ids
			
 
				+
			
 
				+
			
 
				+def load_tabbed_dashboard(_: bool = False) -> None:
			
 
				+    """Creating a tabbed dashboard"""
			
 
				+
			
 
				+    print("Creating a dashboard with nested tabs")
			
 
				+    slug = "tabbed_dash"
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug=slug).first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+
			
 
				+    # reuse charts in "World's Bank Data and create
			
 
				+    # new dashboard with nested tabs
			
 
				+    tabbed_dash_slices = set()
			
 
				+    tabbed_dash_slices.add("Region Filter")
			
 
				+    tabbed_dash_slices.add("Growth Rate")
			
 
				+    tabbed_dash_slices.add("Treemap")
			
 
				+    tabbed_dash_slices.add("Box plot")
			
 
				+
			
 
				+    js = textwrap.dedent(
			
 
				+        """\
			
 
				+    {
			
 
				+      "CHART-c0EjR-OZ0n": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-c0EjR-OZ0n",
			
 
				+        "meta": {
			
 
				+          "chartId": 870,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "Box plot",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "ROW-7G2o5uDvfo"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "CHART-dxV7Il74hH": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-dxV7Il74hH",
			
 
				+        "meta": {
			
 
				+          "chartId": 797,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "Treemap",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS",
			
 
				+          "TABS-afnrUvdxYF",
			
 
				+          "TAB-jNNd4WWar1",
			
 
				+          "ROW-7ygtDczaQ"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "CHART-jJ5Yj1Ptaz": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-jJ5Yj1Ptaz",
			
 
				+        "meta": {
			
 
				+          "chartId": 789,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "World's Population",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-z81Q87PD7",
			
 
				+          "ROW-G73z9PIHn"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "CHART-z4gmEuCqQ5": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-z4gmEuCqQ5",
			
 
				+        "meta": {
			
 
				+          "chartId": 788,
			
 
				+          "height": 50,
			
 
				+          "sliceName": "Region Filter",
			
 
				+          "width": 4
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-EcNm_wh922",
			
 
				+          "ROW-LCjsdSetJ"
			
 
				+        ],
			
 
				+        "type": "CHART"
			
 
				+      },
			
 
				+      "DASHBOARD_VERSION_KEY": "v2",
			
 
				+      "GRID_ID": {
			
 
				+        "children": [],
			
 
				+        "id": "GRID_ID",
			
 
				+        "type": "GRID"
			
 
				+      },
			
 
				+      "HEADER_ID": {
			
 
				+        "id": "HEADER_ID",
			
 
				+        "meta": {
			
 
				+          "text": "Tabbed Dashboard"
			
 
				+        },
			
 
				+        "type": "HEADER"
			
 
				+      },
			
 
				+      "ROOT_ID": {
			
 
				+        "children": [
			
 
				+          "TABS-lV0r00f4H1"
			
 
				+        ],
			
 
				+        "id": "ROOT_ID",
			
 
				+        "type": "ROOT"
			
 
				+      },
			
 
				+      "ROW-7G2o5uDvfo": {
			
 
				+        "children": [
			
 
				+          "CHART-c0EjR-OZ0n"
			
 
				+        ],
			
 
				+        "id": "ROW-7G2o5uDvfo",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "ROW-7ygtDczaQ": {
			
 
				+        "children": [
			
 
				+          "CHART-dxV7Il74hH"
			
 
				+        ],
			
 
				+        "id": "ROW-7ygtDczaQ",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS",
			
 
				+          "TABS-afnrUvdxYF",
			
 
				+          "TAB-jNNd4WWar1"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "ROW-G73z9PIHn": {
			
 
				+        "children": [
			
 
				+          "CHART-jJ5Yj1Ptaz"
			
 
				+        ],
			
 
				+        "id": "ROW-G73z9PIHn",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-z81Q87PD7"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "ROW-LCjsdSetJ": {
			
 
				+        "children": [
			
 
				+          "CHART-z4gmEuCqQ5"
			
 
				+        ],
			
 
				+        "id": "ROW-LCjsdSetJ",
			
 
				+        "meta": {
			
 
				+          "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj",
			
 
				+          "TAB-EcNm_wh922"
			
 
				+        ],
			
 
				+        "type": "ROW"
			
 
				+      },
			
 
				+      "TAB-EcNm_wh922": {
			
 
				+        "children": [
			
 
				+          "ROW-LCjsdSetJ"
			
 
				+        ],
			
 
				+        "id": "TAB-EcNm_wh922",
			
 
				+        "meta": {
			
 
				+          "text": "row tab 1"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-NF3dlrWGS": {
			
 
				+        "children": [
			
 
				+          "ROW-7G2o5uDvfo",
			
 
				+          "TABS-CSjo6VfNrj"
			
 
				+        ],
			
 
				+        "id": "TAB-NF3dlrWGS",
			
 
				+        "meta": {
			
 
				+          "text": "Tab A"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-gcQJxApOZS": {
			
 
				+        "children": [
			
 
				+          "TABS-afnrUvdxYF"
			
 
				+        ],
			
 
				+        "id": "TAB-gcQJxApOZS",
			
 
				+        "meta": {
			
 
				+          "text": "Tab B"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-jNNd4WWar1": {
			
 
				+        "children": [
			
 
				+          "ROW-7ygtDczaQ"
			
 
				+        ],
			
 
				+        "id": "TAB-jNNd4WWar1",
			
 
				+        "meta": {
			
 
				+          "text": "New Tab"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS",
			
 
				+          "TABS-afnrUvdxYF"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TAB-z81Q87PD7": {
			
 
				+        "children": [
			
 
				+          "ROW-G73z9PIHn"
			
 
				+        ],
			
 
				+        "id": "TAB-z81Q87PD7",
			
 
				+        "meta": {
			
 
				+          "text": "row tab 2"
			
 
				+        },
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TABS-CSjo6VfNrj"
			
 
				+        ],
			
 
				+        "type": "TAB"
			
 
				+      },
			
 
				+      "TABS-CSjo6VfNrj": {
			
 
				+        "children": [
			
 
				+          "TAB-EcNm_wh922",
			
 
				+          "TAB-z81Q87PD7"
			
 
				+        ],
			
 
				+        "id": "TABS-CSjo6VfNrj",
			
 
				+        "meta": {},
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-NF3dlrWGS"
			
 
				+        ],
			
 
				+        "type": "TABS"
			
 
				+      },
			
 
				+      "TABS-afnrUvdxYF": {
			
 
				+        "children": [
			
 
				+          "TAB-jNNd4WWar1"
			
 
				+        ],
			
 
				+        "id": "TABS-afnrUvdxYF",
			
 
				+        "meta": {},
			
 
				+        "parents": [
			
 
				+          "ROOT_ID",
			
 
				+          "TABS-lV0r00f4H1",
			
 
				+          "TAB-gcQJxApOZS"
			
 
				+        ],
			
 
				+        "type": "TABS"
			
 
				+      },
			
 
				+      "TABS-lV0r00f4H1": {
			
 
				+        "children": [
			
 
				+          "TAB-NF3dlrWGS",
			
 
				+          "TAB-gcQJxApOZS"
			
 
				+        ],
			
 
				+        "id": "TABS-lV0r00f4H1",
			
 
				+        "meta": {},
			
 
				+        "parents": [
			
 
				+          "ROOT_ID"
			
 
				+        ],
			
 
				+        "type": "TABS"
			
 
				+      }
			
 
				+    }
			
 
				+        """
			
 
				+    )
			
 
				+    pos = json.loads(js)
			
 
				+    slices = [
			
 
				+        db.session.query(Slice).filter_by(slice_name=name).first()
			
 
				+        for name in tabbed_dash_slices
			
 
				+    ]
			
 
				+
			
 
				+    slices = sorted(slices, key=lambda x: x.id)
			
 
				+    update_slice_ids(pos, slices)
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slices = slices
			
 
				+    dash.dashboard_title = "Tabbed Dashboard"
			
 
				+    dash.slug = slug
			
 
				+
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/unicode_test_data.py
+++ b/data/purposeCombined/BI/examples/unicode_test_data.py
@@ -0,0 +1,163 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+import datetime
			
 
				+import json
			
 
				+import random
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import Date, Float, String
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import (
			
 
				+    config,
			
 
				+    get_example_data,
			
 
				+    get_slice_json,
			
 
				+    merge_slice,
			
 
				+    TBL,
			
 
				+    update_slice_ids,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> None:
			
 
				+    """Loading unicode test dataset from a csv file in the repo"""
			
 
				+    tbl_name = "unicode_test"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data(
			
 
				+            "unicode_utf8_unixnl_test.csv", is_gzip=False, make_bytes=True
			
 
				+        )
			
 
				+        df = pd.read_csv(data, encoding="utf-8")
			
 
				+        # generate date/numeric data
			
 
				+        df["dttm"] = datetime.datetime.now().date()
			
 
				+        df["value"] = [random.randint(1, 100) for _ in range(len(df))]
			
 
				+        df.to_sql(  # pylint: disable=no-member
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=500,
			
 
				+            dtype={
			
 
				+                "phrase": String(500),
			
 
				+                "short_phrase": String(10),
			
 
				+                "with_missing": String(100),
			
 
				+                "dttm": Date(),
			
 
				+                "value": Float(),
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+        print("Done loading table!")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+    print("Creating table [unicode_test] reference")
			
 
				+    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not obj:
			
 
				+        obj = TBL(table_name=tbl_name)
			
 
				+    obj.main_dttm_col = "dttm"
			
 
				+    obj.database = database
			
 
				+    db.session.merge(obj)
			
 
				+    db.session.commit()
			
 
				+    obj.fetch_metadata()
			
 
				+    tbl = obj
			
 
				+
			
 
				+    slice_data = {
			
 
				+        "granularity_sqla": "dttm",
			
 
				+        "groupby": [],
			
 
				+        "metric": {
			
 
				+            "aggregate": "SUM",
			
 
				+            "column": {"column_name": "value"},
			
 
				+            "expressionType": "SIMPLE",
			
 
				+            "label": "Value",
			
 
				+        },
			
 
				+        "row_limit": config["ROW_LIMIT"],
			
 
				+        "since": "100 years ago",
			
 
				+        "until": "now",
			
 
				+        "viz_type": "word_cloud",
			
 
				+        "size_from": "10",
			
 
				+        "series": "short_phrase",
			
 
				+        "size_to": "70",
			
 
				+        "rotation": "square",
			
 
				+        "limit": "100",
			
 
				+    }
			
 
				+
			
 
				+    print("Creating a slice")
			
 
				+    slc = Slice(
			
 
				+        slice_name="Unicode Cloud",
			
 
				+        viz_type="word_cloud",
			
 
				+        datasource_type="table",
			
 
				+        datasource_id=tbl.id,
			
 
				+        params=get_slice_json(slice_data),
			
 
				+    )
			
 
				+    merge_slice(slc)
			
 
				+
			
 
				+    print("Creating a dashboard")
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug="unicode-test").first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+    js = """\
			
 
				+{
			
 
				+    "CHART-Hkx6154FEm": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-Hkx6154FEm",
			
 
				+        "meta": {
			
 
				+            "chartId": 2225,
			
 
				+            "height": 30,
			
 
				+            "sliceName": "slice 1",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "GRID_ID": {
			
 
				+        "children": [
			
 
				+            "ROW-SyT19EFEQ"
			
 
				+        ],
			
 
				+        "id": "GRID_ID",
			
 
				+        "type": "GRID"
			
 
				+    },
			
 
				+    "ROOT_ID": {
			
 
				+        "children": [
			
 
				+            "GRID_ID"
			
 
				+        ],
			
 
				+        "id": "ROOT_ID",
			
 
				+        "type": "ROOT"
			
 
				+    },
			
 
				+    "ROW-SyT19EFEQ": {
			
 
				+        "children": [
			
 
				+            "CHART-Hkx6154FEm"
			
 
				+        ],
			
 
				+        "id": "ROW-SyT19EFEQ",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "DASHBOARD_VERSION_KEY": "v2"
			
 
				+}
			
 
				+    """
			
 
				+    dash.dashboard_title = "Unicode Test"
			
 
				+    pos = json.loads(js)
			
 
				+    update_slice_ids(pos, [slc])
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slug = "unicode-test"
			
 
				+    dash.slices = [slc]
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/examples/world_bank.py
+++ b/data/purposeCombined/BI/examples/world_bank.py
@@ -0,0 +1,574 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing,
			
 
				+# software distributed under the License is distributed on an
			
 
				+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+# KIND, either express or implied.  See the License for the
			
 
				+# specific language governing permissions and limitations
			
 
				+# under the License.
			
 
				+"""Loads datasets, dashboards and slices in a new superset instance"""
			
 
				+import json
			
 
				+import os
			
 
				+import textwrap
			
 
				+
			
 
				+import pandas as pd
			
 
				+from sqlalchemy import DateTime, String
			
 
				+from sqlalchemy.sql import column
			
 
				+
			
 
				+from superset import db
			
 
				+from superset.connectors.sqla.models import SqlMetric
			
 
				+from superset.models.dashboard import Dashboard
			
 
				+from superset.models.slice import Slice
			
 
				+from superset.utils import core as utils
			
 
				+
			
 
				+from .helpers import (
			
 
				+    config,
			
 
				+    EXAMPLES_FOLDER,
			
 
				+    get_example_data,
			
 
				+    get_slice_json,
			
 
				+    merge_slice,
			
 
				+    misc_dash_slices,
			
 
				+    TBL,
			
 
				+    update_slice_ids,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals
			
 
				+    only_metadata: bool = False, force: bool = False
			
 
				+) -> None:
			
 
				+    """Loads the world bank health dataset, slices and a dashboard"""
			
 
				+    tbl_name = "wb_health_population"
			
 
				+    database = utils.get_example_database()
			
 
				+    table_exists = database.has_table_by_name(tbl_name)
			
 
				+
			
 
				+    if not only_metadata and (not table_exists or force):
			
 
				+        data = get_example_data("countries.json.gz")
			
 
				+        pdf = pd.read_json(data)
			
 
				+        pdf.columns = [col.replace(".", "_") for col in pdf.columns]
			
 
				+        pdf.year = pd.to_datetime(pdf.year)
			
 
				+        pdf.to_sql(
			
 
				+            tbl_name,
			
 
				+            database.get_sqla_engine(),
			
 
				+            if_exists="replace",
			
 
				+            chunksize=50,
			
 
				+            dtype={
			
 
				+                "year": DateTime(),
			
 
				+                "country_code": String(3),
			
 
				+                "country_name": String(255),
			
 
				+                "region": String(255),
			
 
				+            },
			
 
				+            index=False,
			
 
				+        )
			
 
				+
			
 
				+    print("Creating table [wb_health_population] reference")
			
 
				+    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
			
 
				+    if not tbl:
			
 
				+        tbl = TBL(table_name=tbl_name)
			
 
				+    tbl.description = utils.readfile(os.path.join(EXAMPLES_FOLDER, "countries.md"))
			
 
				+    tbl.main_dttm_col = "year"
			
 
				+    tbl.database = database
			
 
				+    tbl.filter_select_enabled = True
			
 
				+
			
 
				+    metrics = [
			
 
				+        "sum__SP_POP_TOTL",
			
 
				+        "sum__SH_DYN_AIDS",
			
 
				+        "sum__SH_DYN_AIDS",
			
 
				+        "sum__SP_RUR_TOTL_ZS",
			
 
				+        "sum__SP_DYN_LE00_IN",
			
 
				+        "sum__SP_RUR_TOTL",
			
 
				+    ]
			
 
				+    for metric in metrics:
			
 
				+        if not any(col.metric_name == metric for col in tbl.metrics):
			
 
				+            aggr_func = metric[:3]
			
 
				+            col = str(column(metric[5:]).compile(db.engine))
			
 
				+            tbl.metrics.append(
			
 
				+                SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})")
			
 
				+            )
			
 
				+
			
 
				+    db.session.merge(tbl)
			
 
				+    db.session.commit()
			
 
				+    tbl.fetch_metadata()
			
 
				+
			
 
				+    metric = "sum__SP_POP_TOTL"
			
 
				+    metrics = ["sum__SP_POP_TOTL"]
			
 
				+    secondary_metric = {
			
 
				+        "aggregate": "SUM",
			
 
				+        "column": {
			
 
				+            "column_name": "SP_RUR_TOTL",
			
 
				+            "optionName": "_col_SP_RUR_TOTL",
			
 
				+            "type": "DOUBLE",
			
 
				+        },
			
 
				+        "expressionType": "SIMPLE",
			
 
				+        "hasCustomLabel": True,
			
 
				+        "label": "Rural Population",
			
 
				+    }
			
 
				+
			
 
				+    defaults = {
			
 
				+        "compare_lag": "10",
			
 
				+        "compare_suffix": "o10Y",
			
 
				+        "limit": "25",
			
 
				+        "granularity_sqla": "year",
			
 
				+        "groupby": [],
			
 
				+        "row_limit": config["ROW_LIMIT"],
			
 
				+        "since": "2014-01-01",
			
 
				+        "until": "2014-01-02",
			
 
				+        "time_range": "2014-01-01 : 2014-01-02",
			
 
				+        "markup_type": "markdown",
			
 
				+        "country_fieldtype": "cca3",
			
 
				+        "entity": "country_code",
			
 
				+        "show_bubbles": True,
			
 
				+    }
			
 
				+
			
 
				+    print("Creating slices")
			
 
				+    slices = [
			
 
				+        Slice(
			
 
				+            slice_name="Region Filter",
			
 
				+            viz_type="filter_box",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="filter_box",
			
 
				+                date_filter=False,
			
 
				+                filter_configs=[
			
 
				+                    {
			
 
				+                        "asc": False,
			
 
				+                        "clearable": True,
			
 
				+                        "column": "region",
			
 
				+                        "key": "2s98dfu",
			
 
				+                        "metric": "sum__SP_POP_TOTL",
			
 
				+                        "multiple": True,
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "asc": False,
			
 
				+                        "clearable": True,
			
 
				+                        "key": "li3j2lk",
			
 
				+                        "column": "country_name",
			
 
				+                        "metric": "sum__SP_POP_TOTL",
			
 
				+                        "multiple": True,
			
 
				+                    },
			
 
				+                ],
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="World's Population",
			
 
				+            viz_type="big_number",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                since="2000",
			
 
				+                viz_type="big_number",
			
 
				+                compare_lag="10",
			
 
				+                metric="sum__SP_POP_TOTL",
			
 
				+                compare_suffix="over 10Y",
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Most Populated Countries",
			
 
				+            viz_type="table",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="table",
			
 
				+                metrics=["sum__SP_POP_TOTL"],
			
 
				+                groupby=["country_name"],
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Growth Rate",
			
 
				+            viz_type="line",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="line",
			
 
				+                since="1960-01-01",
			
 
				+                metrics=["sum__SP_POP_TOTL"],
			
 
				+                num_period_compare="10",
			
 
				+                groupby=["country_name"],
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="% Rural",
			
 
				+            viz_type="world_map",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="world_map",
			
 
				+                metric="sum__SP_RUR_TOTL_ZS",
			
 
				+                num_period_compare="10",
			
 
				+                secondary_metric=secondary_metric,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Life Expectancy VS Rural %",
			
 
				+            viz_type="bubble",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="bubble",
			
 
				+                since="2011-01-01",
			
 
				+                until="2011-01-02",
			
 
				+                series="region",
			
 
				+                limit=0,
			
 
				+                entity="country_name",
			
 
				+                x="sum__SP_RUR_TOTL_ZS",
			
 
				+                y="sum__SP_DYN_LE00_IN",
			
 
				+                size="sum__SP_POP_TOTL",
			
 
				+                max_bubble_size="50",
			
 
				+                adhoc_filters=[
			
 
				+                    {
			
 
				+                        "clause": "WHERE",
			
 
				+                        "expressionType": "SIMPLE",
			
 
				+                        "filterOptionName": "2745eae5",
			
 
				+                        "comparator": [
			
 
				+                            "TCA",
			
 
				+                            "MNP",
			
 
				+                            "DMA",
			
 
				+                            "MHL",
			
 
				+                            "MCO",
			
 
				+                            "SXM",
			
 
				+                            "CYM",
			
 
				+                            "TUV",
			
 
				+                            "IMY",
			
 
				+                            "KNA",
			
 
				+                            "ASM",
			
 
				+                            "ADO",
			
 
				+                            "AMA",
			
 
				+                            "PLW",
			
 
				+                        ],
			
 
				+                        "operator": "NOT IN",
			
 
				+                        "subject": "country_code",
			
 
				+                    }
			
 
				+                ],
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Rural Breakdown",
			
 
				+            viz_type="sunburst",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                viz_type="sunburst",
			
 
				+                groupby=["region", "country_name"],
			
 
				+                since="2011-01-01",
			
 
				+                until="2011-01-01",
			
 
				+                metric=metric,
			
 
				+                secondary_metric=secondary_metric,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="World's Pop Growth",
			
 
				+            viz_type="area",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                since="1960-01-01",
			
 
				+                until="now",
			
 
				+                viz_type="area",
			
 
				+                groupby=["region"],
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Box plot",
			
 
				+            viz_type="box_plot",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                since="1960-01-01",
			
 
				+                until="now",
			
 
				+                whisker_options="Min/max (no outliers)",
			
 
				+                x_ticks_layout="staggered",
			
 
				+                viz_type="box_plot",
			
 
				+                groupby=["region"],
			
 
				+                metrics=metrics,
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Treemap",
			
 
				+            viz_type="treemap",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                since="1960-01-01",
			
 
				+                until="now",
			
 
				+                viz_type="treemap",
			
 
				+                metrics=["sum__SP_POP_TOTL"],
			
 
				+                groupby=["region", "country_code"],
			
 
				+            ),
			
 
				+        ),
			
 
				+        Slice(
			
 
				+            slice_name="Parallel Coordinates",
			
 
				+            viz_type="para",
			
 
				+            datasource_type="table",
			
 
				+            datasource_id=tbl.id,
			
 
				+            params=get_slice_json(
			
 
				+                defaults,
			
 
				+                since="2011-01-01",
			
 
				+                until="2011-01-01",
			
 
				+                viz_type="para",
			
 
				+                limit=100,
			
 
				+                metrics=["sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS"],
			
 
				+                secondary_metric="sum__SP_POP_TOTL",
			
 
				+                series="country_name",
			
 
				+            ),
			
 
				+        ),
			
 
				+    ]
			
 
				+    misc_dash_slices.add(slices[-1].slice_name)
			
 
				+    for slc in slices:
			
 
				+        merge_slice(slc)
			
 
				+
			
 
				+    print("Creating a World's Health Bank dashboard")
			
 
				+    dash_name = "World Bank's Data"
			
 
				+    slug = "world_health"
			
 
				+    dash = db.session.query(Dashboard).filter_by(slug=slug).first()
			
 
				+
			
 
				+    if not dash:
			
 
				+        dash = Dashboard()
			
 
				+    dash.published = True
			
 
				+    js = textwrap.dedent(
			
 
				+        """\
			
 
				+{
			
 
				+    "CHART-36bfc934": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-36bfc934",
			
 
				+        "meta": {
			
 
				+            "chartId": 40,
			
 
				+            "height": 25,
			
 
				+            "sliceName": "Region Filter",
			
 
				+            "width": 2
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-37982887": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-37982887",
			
 
				+        "meta": {
			
 
				+            "chartId": 41,
			
 
				+            "height": 25,
			
 
				+            "sliceName": "World's Population",
			
 
				+            "width": 2
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-17e0f8d8": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-17e0f8d8",
			
 
				+        "meta": {
			
 
				+            "chartId": 42,
			
 
				+            "height": 92,
			
 
				+            "sliceName": "Most Populated Countries",
			
 
				+            "width": 3
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-2ee52f30": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-2ee52f30",
			
 
				+        "meta": {
			
 
				+            "chartId": 43,
			
 
				+            "height": 38,
			
 
				+            "sliceName": "Growth Rate",
			
 
				+            "width": 6
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-2d5b6871": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-2d5b6871",
			
 
				+        "meta": {
			
 
				+            "chartId": 44,
			
 
				+            "height": 52,
			
 
				+            "sliceName": "% Rural",
			
 
				+            "width": 7
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-0fd0d252": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-0fd0d252",
			
 
				+        "meta": {
			
 
				+            "chartId": 45,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Life Expectancy VS Rural %",
			
 
				+            "width": 8
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-97f4cb48": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-97f4cb48",
			
 
				+        "meta": {
			
 
				+            "chartId": 46,
			
 
				+            "height": 38,
			
 
				+            "sliceName": "Rural Breakdown",
			
 
				+            "width": 3
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-b5e05d6f": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-b5e05d6f",
			
 
				+        "meta": {
			
 
				+            "chartId": 47,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "World's Pop Growth",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-e76e9f5f": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-e76e9f5f",
			
 
				+        "meta": {
			
 
				+            "chartId": 48,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Box plot",
			
 
				+            "width": 4
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "CHART-a4808bba": {
			
 
				+        "children": [],
			
 
				+        "id": "CHART-a4808bba",
			
 
				+        "meta": {
			
 
				+            "chartId": 49,
			
 
				+            "height": 50,
			
 
				+            "sliceName": "Treemap",
			
 
				+            "width": 8
			
 
				+        },
			
 
				+        "type": "CHART"
			
 
				+    },
			
 
				+    "COLUMN-071bbbad": {
			
 
				+        "children": [
			
 
				+            "ROW-1e064e3c",
			
 
				+            "ROW-afdefba9"
			
 
				+        ],
			
 
				+        "id": "COLUMN-071bbbad",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT",
			
 
				+            "width": 9
			
 
				+        },
			
 
				+        "type": "COLUMN"
			
 
				+    },
			
 
				+    "COLUMN-fe3914b8": {
			
 
				+        "children": [
			
 
				+            "CHART-36bfc934",
			
 
				+            "CHART-37982887"
			
 
				+        ],
			
 
				+        "id": "COLUMN-fe3914b8",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT",
			
 
				+            "width": 2
			
 
				+        },
			
 
				+        "type": "COLUMN"
			
 
				+    },
			
 
				+    "GRID_ID": {
			
 
				+        "children": [
			
 
				+            "ROW-46632bc2",
			
 
				+            "ROW-3fa26c5d",
			
 
				+            "ROW-812b3f13"
			
 
				+        ],
			
 
				+        "id": "GRID_ID",
			
 
				+        "type": "GRID"
			
 
				+    },
			
 
				+    "HEADER_ID": {
			
 
				+        "id": "HEADER_ID",
			
 
				+        "meta": {
			
 
				+            "text": "World's Bank Data"
			
 
				+        },
			
 
				+        "type": "HEADER"
			
 
				+    },
			
 
				+    "ROOT_ID": {
			
 
				+        "children": [
			
 
				+            "GRID_ID"
			
 
				+        ],
			
 
				+        "id": "ROOT_ID",
			
 
				+        "type": "ROOT"
			
 
				+    },
			
 
				+    "ROW-1e064e3c": {
			
 
				+        "children": [
			
 
				+            "COLUMN-fe3914b8",
			
 
				+            "CHART-2d5b6871"
			
 
				+        ],
			
 
				+        "id": "ROW-1e064e3c",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-3fa26c5d": {
			
 
				+        "children": [
			
 
				+            "CHART-b5e05d6f",
			
 
				+            "CHART-0fd0d252"
			
 
				+        ],
			
 
				+        "id": "ROW-3fa26c5d",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-46632bc2": {
			
 
				+        "children": [
			
 
				+            "COLUMN-071bbbad",
			
 
				+            "CHART-17e0f8d8"
			
 
				+        ],
			
 
				+        "id": "ROW-46632bc2",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-812b3f13": {
			
 
				+        "children": [
			
 
				+            "CHART-a4808bba",
			
 
				+            "CHART-e76e9f5f"
			
 
				+        ],
			
 
				+        "id": "ROW-812b3f13",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "ROW-afdefba9": {
			
 
				+        "children": [
			
 
				+            "CHART-2ee52f30",
			
 
				+            "CHART-97f4cb48"
			
 
				+        ],
			
 
				+        "id": "ROW-afdefba9",
			
 
				+        "meta": {
			
 
				+            "background": "BACKGROUND_TRANSPARENT"
			
 
				+        },
			
 
				+        "type": "ROW"
			
 
				+    },
			
 
				+    "DASHBOARD_VERSION_KEY": "v2"
			
 
				+}
			
 
				+    """
			
 
				+    )
			
 
				+    pos = json.loads(js)
			
 
				+    update_slice_ids(pos, slices)
			
 
				+
			
 
				+    dash.dashboard_title = dash_name
			
 
				+    dash.position_json = json.dumps(pos, indent=4)
			
 
				+    dash.slug = slug
			
 
				+
			
 
				+    dash.slices = slices[:-1]
			
 
				+    db.session.merge(dash)
			
 
				+    db.session.commit()
			
--- a/data/purposeCombined/BI/income_disparity_final_version_2.py
+++ b/data/purposeCombined/BI/income_disparity_final_version_2.py
@@ -0,0 +1,580 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""income_disparity.ipynb
			
 
				+
			
 
				+Automatically generated by Colaboratory.
			
 
				+
			
 
				+Original file is located at
			
 
				+    https://colab.research.google.com/drive/1upuHuQ3gWDkpbvkvHl2uTQlSv20JZnf2
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+#!pip install pandas-datareader
			
 
				+import wbdata
			
 
				+import datetime
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from pandas_datareader import wb
			
 
				+import matplotlib.pyplot as plt
			
 
				+from sklearn.linear_model import LinearRegression as lr
			
 
				+from matplotlib.pyplot import MultipleLocator
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # Part 1: API Integration
			
 
				+# =============================================================================
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # API method 1: using wbdata module
			
 
				+# =============================================================================
			
 
				+
			
 
				+# #searching for countries index using names
			
 
				+# print(wbdata.search_countries('United Kingdom'))
			
 
				+
			
 
				+# list of countries
			
 
				+countries = ["USA", "BEL", "BRA", "COL", "FRA", "DEU", "GRC", "IDN", "IRL", "MEX", "NLD", "RUS"]
			
 
				+# date period
			
 
				+dates = datetime.datetime(2008, 1, 1), datetime.datetime(2018, 1, 1)
			
 
				+
			
 
				+# data object
			
 
				+indicators = {'SI.DST.05TH.20':'Income share held by highest 20%', 'SI.DST.FRST.20': 'Income share held by lowest 20%', \
			
 
				+             'SL.EMP.TOTL.SP.FE.NE.ZS': 'Employment to population ratio, 15+, female (%) (national estimate)',\
			
 
				+             'SL.EMP.TOTL.SP.MA.NE.ZS': 'Employment to population ratio, 15+, male (%) (national estimate)'}
			
 
				+
			
 
				+# getting data from these countries
			
 
				+raw_data = wbdata.get_dataframe(indicators, country=countries, data_date=dates, convert_date=True)
			
 
				+
			
 
				+raw_unstacked_data = raw_data.unstack(level=0)
			
 
				+
			
 
				+# printing our data object
			
 
				+# print(raw_data)
			
 
				+# print(raw_unstacked_data)
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # API method 2: using from pandas.datareader import wb, convert the data object to a DataFrame 
			
 
				+# =============================================================================
			
 
				+
			
 
				+# view all data
			
 
				+pd.set_option('display.max_columns', 15) 
			
 
				+pd.set_option('display.max_rows', 15) 
			
 
				+
			
 
				+df1 = wb.download(indicator = indicators, country = countries,  start = 2008, end = 2018)
			
 
				+date_period = [i for i in range(2008, 2019)]
			
 
				+print(df1)
			
 
				+
			
 
				+# create a new DataFrame df2 for later use, not change origin values from df1 if we do some calculations for our dataframe df2
			
 
				+# rename the columns name
			
 
				+df2 = df1.rename(columns = {'SI.DST.05TH.20':'Income share held by highest 20%', 'SI.DST.FRST.20': 'Income share held by lowest 20%', \
			
 
				+             'SL.EMP.TOTL.SP.FE.NE.ZS': 'Employment to population ratio, 15+, female (%) (national estimate)',\
			
 
				+             'SL.EMP.TOTL.SP.MA.NE.ZS': 'Employment to population ratio, 15+, male (%) (national estimate)'}, inplace = False)
			
 
				+
			
 
				+# overview our data object DataFrame
			
 
				+# Data manipulation: dealing with the missing value, replace them as mean(), which has less impact on our data sets
			
 
				+df2.mean()
			
 
				+df2.fillna(df2.mean(), inplace = True)
			
 
				+print(df2)
			
 
				+
			
 
				+# Overview our new edited DataFram and get basic info of statistics
			
 
				+print(df2.describe())
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # Part 2: Data structure set up
			
 
				+# =============================================================================
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # creating our Data Structure type I
			
 
				+# =============================================================================
			
 
				+
			
 
				+# step I: convert DataFrame to a list in correct order from 2008 to 2018
			
 
				+def country_DataFrame_to_list(country, target_data):
			
 
				+  df = wb.download(indicator = target_data, country = country,  start = 2008, end = 2018)
			
 
				+  df.fillna(df.mean(), inplace = True)
			
 
				+  df_list =df[df.columns[0]].tolist()
			
 
				+  round_list = [round(i, 2) for i in df_list ]
			
 
				+  return round_list[::-1]
			
 
				+
			
 
				+# step II: make a list of tuple, which is a good way to save our data
			
 
				+def country_tuples(country_list, time):
			
 
				+  return list(zip(country_list, time))
			
 
				+
			
 
				+# additional gap calculation for calculating the gap between two list
			
 
				+def gap_between(toplist, lowlist):
			
 
				+  gap_list = []
			
 
				+  for i in range(len(toplist)):
			
 
				+    gap_list.append(round((toplist[i]- lowlist[i]), 2))
			
 
				+  return gap_list
			
 
				+
			
 
				+
			
 
				+
			
 
				+# step IV: Make a dictionary of list of tuple, which is one of our data structure of this project,
			
 
				+# named as Data Structure type I.
			
 
				+def object_Dictionary(country_list, object_target, date_period):
			
 
				+  object_df = {}
			
 
				+  for country in country_list:
			
 
				+    object_df[country] = country_tuples(date_period, country_DataFrame_to_list(country, object_target))
			
 
				+  return object_df
			
 
				+
			
 
				+# step V: start to build: 
			
 
				+    
			
 
				+    
			
 
				+# This data set is for storing data of Income share held by highest 20%
			
 
				+Top_20_df = object_Dictionary(countries, 'SI.DST.05TH.20', date_period)
			
 
				+
			
 
				+# This data set is for storing data of Income share held by lowest 20%
			
 
				+Low_20_df = object_Dictionary(countries, 'SI.DST.FRST.20', date_period)
			
 
				+
			
 
				+# This data set is for storing data of 'Employment to population ratio, 15+, female (%) (national estimate)'
			
 
				+female_employ_df = object_Dictionary(countries, 'SL.EMP.TOTL.SP.FE.NE.ZS', date_period)
			
 
				+
			
 
				+# This data set is for storing data of 'Employment to population ratio, 15+, male (%) (national estimate)'
			
 
				+male_employ_df = object_Dictionary(countries, 'SL.EMP.TOTL.SP.MA.NE.ZS', date_period)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # creating our Data Structure type II: convert our Data structure type I to typle II
			
 
				+# =============================================================================
			
 
				+# step 1: write a function that can unpack dictionary of tuple to a new dictionary of simple list, and calculate the gap
			
 
				+def no_tuple_dic(object_Dictionary1, object_Dictionary2):
			
 
				+  new_dict = {}
			
 
				+  for i in countries:
			
 
				+    new_list = []
			
 
				+    for j in range(11):
			
 
				+      # The reason why I didn't use the difference function is because I don't want my new dictionary has year
			
 
				+      new_list.append(round((object_Dictionary1[i][j][1]- object_Dictionary2[i][j][1]), 2)) 
			
 
				+    new_dict[i] = new_list  
			
 
				+
			
 
				+  return new_dict
			
 
				+
			
 
				+# step 2: getting the income gap dictionary of list between income share held by highest 20% and income share held by lowest 20%
			
 
				+income_gap_dict = no_tuple_dic(Top_20_df, Low_20_df)
			
 
				+
			
 
				+# step 3: create our Data structure type II, DataFrame
			
 
				+income_gap_dict_df = pd.DataFrame(income_gap_dict, columns = countries)
			
 
				+
			
 
				+# step 4: show the basic statistic info of our income gap DataFrame
			
 
				+print(round(income_gap_dict_df.describe(),2))
			
 
				+
			
 
				+# same step as above, to get our Data Structure type II, between male employment population and female employment population
			
 
				+gender_gap_dict = no_tuple_dic(male_employ_df, female_employ_df)
			
 
				+
			
 
				+gender_gap_dict_df = pd.DataFrame(gender_gap_dict, columns = countries)
			
 
				+print(round(gender_gap_dict_df.describe(),2))
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Data Structure function application
			
 
				+
			
 
				+# This function is to calculate the difference of the gap between income share held by highest 20% and income share held by lowest 20%
			
 
				+def gap_income_Dataframe(country):
			
 
				+  gap = {}
			
 
				+  for i in range(len(Top_20_df[country])):
			
 
				+    year1, data1 = Top_20_df[country][i]
			
 
				+    year2, data2 = Low_20_df[country][i]  
			
 
				+    if year1 == year2:
			
 
				+      gap[year1] = round(data1-data2, 2)
			
 
				+  return gap
			
 
				+
			
 
				+# This function is to calculate the difference of the gap between male employment population and female employment population
			
 
				+def gap_gender_Dataframe(country):
			
 
				+  gap = {}
			
 
				+  for i in range(len(Top_20_df[country])):
			
 
				+    year1, data1 = male_employ_df[country][i]
			
 
				+    year2, data2 = female_employ_df[country][i]  
			
 
				+    if year1 == year2:
			
 
				+      gap[year1] = round(data1-data2, 2)
			
 
				+  return gap
			
 
				+
			
 
				+# This function is to searching specific country and year  
			
 
				+def searching_data(object_Dictionary, country, year):
			
 
				+  country_list = []
			
 
				+  if country in countries:
			
 
				+    for i in range(11):
			
 
				+      country_list.append(object_Dictionary[country][i])
			
 
				+  
			
 
				+  output = [item for item in country_list if item[0] == year]
			
 
				+  #return empty list if data not found, return a tuple if country and year is valid    
			
 
				+  return output
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# # Part 3: Ploting the data set
			
 
				+# =============================================================================
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #plot 1: Income gap from 2008 to 2018
			
 
				+# =============================================================================
			
 
				+
			
 
				+from matplotlib.pyplot import MultipleLocator
			
 
				+plt.title('Income gap from 2008 to 2018')
			
 
				+plt.xlabel('Year')
			
 
				+plt.ylabel('Income gap%')
			
 
				+all_data_i = []
			
 
				+
			
 
				+for c in countries:
			
 
				+  gap_i = gap_income_Dataframe(c)
			
 
				+  x_i = gap_i.keys()
			
 
				+  y_i = gap_i.values()
			
 
				+  all_data_i.append(gap_i)
			
 
				+  plt.scatter(x_i,y_i,marker='+',label=c)
			
 
				+  plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+
			
 
				+x_major_locator=MultipleLocator(1)  #set the x interval as 1
			
 
				+y_major_locator=MultipleLocator(2)   #set the y interval as 2
			
 
				+ax=plt.gca()
			
 
				+ax.xaxis.set_major_locator(x_major_locator)     #Set the major scale of the x-axis to a multiple of 1
			
 
				+ax.yaxis.set_major_locator(y_major_locator)     #Set the major scale of the y-axis to a multiple of 2
			
 
				+plt.xlim(2007,2019)   #Set the x scale range of the x-axis from 2008 to 2018， the reason why I use 2019 is because we can see clearly t 
			
 
				+plt.ylim(25,60)     #Set the y scale range of the y-axis from 25 to 60
			
 
				+
			
 
				+N = 10000
			
 
				+xr_i = list(range(2008,2019))
			
 
				+yr_i = []
			
 
				+for i in xr_i:
			
 
				+  temp = 0
			
 
				+  for j in all_data_i:
			
 
				+    temp += j[i]
			
 
				+  temp /= len(countries)
			
 
				+  yr_i.append(temp)
			
 
				+plt.plot(xr_i,yr_i,"r-",label='average')
			
 
				+plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+plt.savefig('Income gap.pdf')  
			
 
				+plt.show()
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #plot 2: Gender Employment rate gap from 2008 to 2018
			
 
				+# =============================================================================
			
 
				+
			
 
				+plt.title('Gender Employment rate gap from 2008 to 2018')
			
 
				+plt.xlabel('Year')
			
 
				+plt.ylabel('Gender Employment Gap %')
			
 
				+all_data_j = []
			
 
				+for c in countries:
			
 
				+  gap_j = gap_gender_Dataframe(c)
			
 
				+  x_j = gap_j.keys()
			
 
				+  y_j = gap_j.values()
			
 
				+  all_data_j.append(gap_j)
			
 
				+  plt.scatter(x_j,y_j,marker='+',label=c)
			
 
				+  plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+
			
 
				+x_major_locator=MultipleLocator(1)  #set the x interval as 1
			
 
				+y_major_locator=MultipleLocator(2)   #set the y interval as 2
			
 
				+ax=plt.gca()
			
 
				+ax.xaxis.set_major_locator(x_major_locator)     #Set the major scale of the x-axis to a multiple of 1
			
 
				+ax.yaxis.set_major_locator(y_major_locator)     #Set the major scale of the y-axis to a multiple of 0.02
			
 
				+plt.xlim(2007,2019)   #Set the scale range of the x-axis from 2008 to 2018
			
 
				+plt.ylim(6,38)     #Set the scale range of the y-axis from 25 to 60
			
 
				+
			
 
				+N = 10000
			
 
				+xr_j = list(range(2008,2019))
			
 
				+yr_j = []
			
 
				+for i in xr_j:
			
 
				+  temp = 0
			
 
				+  for j in all_data_j:
			
 
				+    temp += j[i]
			
 
				+  temp /= len(countries)
			
 
				+  yr_j.append(temp)
			
 
				+plt.plot(xr_j,yr_j,"r-",label='average')
			
 
				+plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+plt.show()
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #boxplot 1 income gap
			
 
				+# =============================================================================
			
 
				+
			
 
				+plt.figure(figsize=(9,6),dpi=60)
			
 
				+
			
 
				+labels, data = [*zip(*income_gap_dict.items())]  # 'transpose' items to parallel key, value lists
			
 
				+
			
 
				+# or backwards compatable    
			
 
				+labels, data = income_gap_dict.keys(), income_gap_dict.values()
			
 
				+plt.title('Income Gap from 2008 to 2018')
			
 
				+plt.xlabel('Country')
			
 
				+plt.ylabel('Income Gap %')
			
 
				+plt.boxplot(data)
			
 
				+plt.xticks(range(1, len(labels) + 1), labels)
			
 
				+plt.show()
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #boxplot 2 gender employment gap
			
 
				+# =============================================================================
			
 
				+
			
 
				+plt.figure(figsize=(9,6),dpi=60)
			
 
				+
			
 
				+labels, data = [*zip(*gender_gap_dict.items())]  # 'transpose' items to parallel key, value lists
			
 
				+
			
 
				+# or backwards compatable    
			
 
				+labels, data = gender_gap_dict.keys(), gender_gap_dict.values()
			
 
				+plt.title('Gender Employment Gap')
			
 
				+plt.xlabel('Country')
			
 
				+plt.ylabel('Gender Employment Gap %')
			
 
				+plt.boxplot(data)
			
 
				+plt.xticks(range(1, len(labels) + 1), labels)
			
 
				+plt.show()
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #Part 4: linear regression
			
 
				+# =============================================================================
			
 
				+
			
 
				+import numpy as np
			
 
				+from sklearn.linear_model import LinearRegression
			
 
				+
			
 
				+# Convert the original data frame to list
			
 
				+def convert_to_target_data_dict(country_list):
			
 
				+    converted_dict = {}
			
 
				+
			
 
				+    for i in range(len(country_list)):
			
 
				+        country_name = country_list[i]
			
 
				+        converted_dict[country_name] = {}
			
 
				+        gap_income_dict = gap_income_Dataframe(country_name)
			
 
				+        gap_gender_dict = gap_gender_Dataframe(country_name)
			
 
				+        converted_gap_income_list = []
			
 
				+        converted_gap_gender_list = []
			
 
				+
			
 
				+        for k in gap_income_dict:
			
 
				+            converted_gap_income_list.append(gap_income_dict[k])
			
 
				+            converted_gap_gender_list.append(gap_gender_dict[k])
			
 
				+
			
 
				+        converted_dict[country_name]["income"] = converted_gap_income_list
			
 
				+        converted_dict[country_name]["gender"] = converted_gap_gender_list
			
 
				+
			
 
				+    return converted_dict
			
 
				+
			
 
				+
			
 
				+# Work out the x-coordinates for linear regression
			
 
				+def x_coordinate():
			
 
				+    x_list = []
			
 
				+    x_coordinate = 2008
			
 
				+    for i in range(11):
			
 
				+        x_list.append(x_coordinate)
			
 
				+        x_coordinate = x_coordinate + 1
			
 
				+
			
 
				+    return x_list
			
 
				+
			
 
				+
			
 
				+# Work out the linear regression for single country
			
 
				+def linear_regression(contry_name, coordinate_dict, data_type, predict_time):
			
 
				+    y_list = coordinate_dict[contry_name][data_type]
			
 
				+    x_list = x_coordinate()
			
 
				+    x = np.array(x_list).reshape((-1, 1))
			
 
				+    y = np.array(y_list)
			
 
				+
			
 
				+    linear_model = LinearRegression().fit(x, y)
			
 
				+
			
 
				+    predict_year = np.array([predict_time]).reshape((-1, 1))
			
 
				+    ten_year_prediction = linear_model.predict(predict_year)
			
 
				+    
			
 
				+
			
 
				+    return ten_year_prediction[0]
			
 
				+
			
 
				+
			
 
				+# Work out the final predicted result for the income and gender gap of 2030
			
 
				+def total_linear_regression_result(y_coordinate_dict):
			
 
				+    linear_regression_result_dict = {}
			
 
				+
			
 
				+    for k in y_coordinate_dict:
			
 
				+        linear_regression_result_dict[k] = {}
			
 
				+        predict_income_gap_2030 = linear_regression(k, y_coordinate_dict, "income", 2030)
			
 
				+        predict_gender_gap_2030 = linear_regression(k, y_coordinate_dict, "gender", 2030)
			
 
				+        linear_regression_result_dict[k]["income"] = predict_income_gap_2030
			
 
				+        linear_regression_result_dict[k]["gender"] = predict_gender_gap_2030
			
 
				+
			
 
				+    return linear_regression_result_dict
			
 
				+
			
 
				+
			
 
				+# Calculate the average income & gender gap of 2030
			
 
				+def calculate_average_gap(result_dict, country_list):
			
 
				+    average_result_dict = {}
			
 
				+    sum_income_gap = 0
			
 
				+    sum_gender_gap = 0
			
 
				+
			
 
				+    for k in result_dict:
			
 
				+        sum_income_gap = sum_income_gap + result_dict[k]["income"]
			
 
				+        sum_gender_gap = sum_gender_gap + result_dict[k]["gender"]
			
 
				+
			
 
				+    average_income_gap = sum_income_gap / len(country_list)
			
 
				+    average_gender_gap = sum_gender_gap / len(country_list)
			
 
				+
			
 
				+    average_result_dict["average_income_gap"] = average_income_gap
			
 
				+    average_result_dict["average_gender_gap"] = average_gender_gap
			
 
				+
			
 
				+    return average_result_dict
			
 
				+
			
 
				+
			
 
				+# Compare the average value with our liner regression result
			
 
				+# print the list of countries which higher or lower than our average prediction, or even equal
			
 
				+def compare_with_the_average(average_dict, result_dict):
			
 
				+    compare_result_dict = {}
			
 
				+    higher_than_income_average = []
			
 
				+    lower_than_income_average = []
			
 
				+    equal_to_income_average = []
			
 
				+    higher_than_gender_average = []
			
 
				+    lower_than_gender_average = []
			
 
				+    equal_to_gender_average = []
			
 
				+
			
 
				+    for k in result_dict:
			
 
				+        if result_dict[k]["income"] > average_dict["average_income_gap"]:
			
 
				+            higher_than_income_average.append(k)
			
 
				+        elif result_dict[k]["income"] < average_dict["average_income_gap"]:
			
 
				+            lower_than_income_average.append(k)
			
 
				+        elif result_dict[k]["income"] == average_dict["average_income_gap"]:
			
 
				+            equal_to_income_average.append(k)
			
 
				+
			
 
				+        if result_dict[k]["gender"] > average_dict["average_gender_gap"]:
			
 
				+            higher_than_gender_average.append(k)
			
 
				+        elif result_dict[k]["gender"] < average_dict["average_gender_gap"]:
			
 
				+            lower_than_gender_average.append(k)
			
 
				+        elif result_dict[k]["gender"] == average_dict["average_gender_gap"]:
			
 
				+            equal_to_gender_average.append(k)
			
 
				+
			
 
				+    compare_result_dict["higher_than_income_average"] = higher_than_income_average
			
 
				+    compare_result_dict["lower_than_income_average"] = lower_than_income_average
			
 
				+    compare_result_dict["equal_to_income_average"] = equal_to_income_average
			
 
				+
			
 
				+    compare_result_dict["higher_than_gender_average"] = higher_than_gender_average
			
 
				+    compare_result_dict["lower_than_gender_average"] = lower_than_gender_average
			
 
				+    compare_result_dict["equal_to_gender_average"] = equal_to_gender_average
			
 
				+
			
 
				+    return compare_result_dict
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # Work out the linear regression result for the 'countries' list
			
 
				+    y_dict = convert_to_target_data_dict(countries)
			
 
				+    linear_regression_result_dict = total_linear_regression_result(y_dict)
			
 
				+
			
 
				+    # Work out the average income & gender gap
			
 
				+    average_gap_result = calculate_average_gap(linear_regression_result_dict, countries)
			
 
				+
			
 
				+    # Compare the average gap with the gap for each country
			
 
				+    compare_with_average = compare_with_the_average(average_gap_result, linear_regression_result_dict)
			
 
				+
			
 
				+    # Print the results
			
 
				+    print(linear_regression_result_dict)
			
 
				+    print()
			
 
				+    print(average_gap_result)
			
 
				+    print()
			
 
				+    print(compare_with_average)
			
 
				+    return linear_regression_result_dict,average_gap_result,compare_with_average
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    linear_regression_result_dict,average_gap_result,compare_with_average = main()
			
 
				+
			
 
				+
			
 
				+# over view our linear regression result
			
 
				+print()
			
 
				+print(linear_regression_result_dict)
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #part 5: plot the figure with our prediction with comparison
			
 
				+# =============================================================================
			
 
				+
			
 
				+# Commented out IPython magic to ensure Python compatibility.
			
 
				+# =============================================================================
			
 
				+# #plot 1 for income gap with prediction in 2030
			
 
				+# =============================================================================
			
 
				+# %matplotlib inline
			
 
				+from matplotlib.pyplot import MultipleLocator
			
 
				+plt.figure(figsize=(12,6),dpi=60)
			
 
				+plt.title('Prediction of Income Gap in 2030')
			
 
				+plt.xlabel('Year')
			
 
				+plt.ylabel('Income gap%')
			
 
				+all_data_i = []
			
 
				+
			
 
				+xr_x = list(range(2008,2019))
			
 
				+xr_x.append(2030)
			
 
				+# xr_x = list(map(lambda x:str(x),xr_x))
			
 
				+for c in countries:
			
 
				+  gap_i = gap_income_Dataframe(c)
			
 
				+  x_i = list(gap_i.keys())
			
 
				+  y_i = list(gap_i.values())
			
 
				+  tmp = linear_regression_result_dict[c]
			
 
				+  x_i.append(2019)
			
 
				+  y_i.append(tmp["income"])
			
 
				+  gap_i[2019] = tmp["income"]
			
 
				+  all_data_i.append(gap_i)
			
 
				+  plt.scatter(xr_x,y_i,marker='+',label=c)
			
 
				+  plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+
			
 
				+x_major_locator=MultipleLocator(1)  #set the x interval as 1
			
 
				+y_major_locator=MultipleLocator(2)   #set the y interval as 2
			
 
				+ax=plt.gca()
			
 
				+ax.xaxis.set_major_locator(x_major_locator)     #Set the major scale of the x-axis to a multiple of 1
			
 
				+ax.yaxis.set_major_locator(y_major_locator)     #Set the major scale of the y-axis to a multiple of 2
			
 
				+plt.xlim(2007,2031)   #Set the scale range of the x-axis from 2008 to 2018
			
 
				+plt.ylim(25,60)     #Set the scale range of the y-axis from 25 to 60
			
 
				+
			
 
				+
			
 
				+xr_i = list(range(2008,2019))
			
 
				+xr_i.append(2019)
			
 
				+yr_i = []
			
 
				+for i in xr_i:
			
 
				+  temp = 0
			
 
				+  for j in all_data_i:
			
 
				+    temp += j[i]
			
 
				+  temp /= len(countries)
			
 
				+  yr_i.append(temp)
			
 
				+
			
 
				+plt.plot(xr_x,yr_i,"r-",label='average')
			
 
				+plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+plt.savefig('Income gap.pdf')  
			
 
				+plt.show()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# =============================================================================
			
 
				+# #plot 2 for gender gap with prediction in 2030
			
 
				+# =============================================================================
			
 
				+plt.figure(figsize=(12,6),dpi=60)
			
 
				+plt.title('Prediction of Gender Employment Gap in 2030')
			
 
				+plt.xlabel('Year')
			
 
				+plt.ylabel('Gender Employment Gap %')
			
 
				+all_data_j = []
			
 
				+
			
 
				+xr_x = list(range(2008,2019))
			
 
				+xr_x.append(2030)
			
 
				+for c in countries:
			
 
				+  gap_j = gap_gender_Dataframe(c)
			
 
				+  x_j = list(gap_j.keys())
			
 
				+  y_j = list(gap_j.values())
			
 
				+  tmp = linear_regression_result_dict[c]
			
 
				+  x_j.append(2019)
			
 
				+  y_j.append(tmp["gender"])
			
 
				+  gap_j[2019] = tmp["gender"]
			
 
				+  all_data_j.append(gap_j)
			
 
				+  plt.scatter(xr_x,y_j,marker='+',label=c)
			
 
				+  plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+
			
 
				+x_major_locator=MultipleLocator(1)  #set the x interval as 1
			
 
				+y_major_locator=MultipleLocator(2)   #set the y interval as 2
			
 
				+ax=plt.gca()
			
 
				+ax.xaxis.set_major_locator(x_major_locator)     #Set the major scale of the x-axis to a multiple of 1
			
 
				+ax.yaxis.set_major_locator(y_major_locator)     #Set the major scale of the y-axis to a multiple of 0.02
			
 
				+plt.xlim(2007,2031)   #Set the scale range of the x-axis from 2008 to 2018
			
 
				+plt.ylim(2,38)     #Set the scale range of the y-axis from 25 to 60
			
 
				+
			
 
				+
			
 
				+xr_j = list(range(2008,2019))
			
 
				+xr_j.append(2019)
			
 
				+yr_j = []
			
 
				+for i in xr_j:
			
 
				+  temp = 0
			
 
				+  for j in all_data_j:
			
 
				+    temp += j[i]
			
 
				+  temp /= len(countries)
			
 
				+  yr_j.append(temp)
			
 
				+plt.plot(xr_x,yr_j,"r-",label='average')
			
 
				+plt.legend(loc=2,bbox_to_anchor=(1.05,1.0),borderaxespad = 0.)
			
 
				+plt.show()
			
 
				+
			
 
				+
			
--- a/data/purposeCombined/BI/macro_analysis-backup.py
+++ b/data/purposeCombined/BI/macro_analysis-backup.py
@@ -0,0 +1,338 @@
 
				+import pandas as pd
			
 
				+from bokeh.plotting import figure, save, show,output_file, ColumnDataSource
			
 
				+from bokeh.models import HoverTool
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+class DataFrameAnalysis:
			
 
				+    """Arms Macro-Analysis capability to a dataframe"""
			
 
				+    def __init__(self, frame):
			
 
				+        self.df = frame  # dataframe object
			
 
				+
			
 
				+    def avg_discount_rate(self):
			
 
				+        """Calculates average discount rate of all orders."""
			
 
				+        # You should calculate the average and gross discount rate.
			
 
				+        pd.to_numeric(self.df['Discount_Amount'])
			
 
				+        pd.to_numeric(self.df['Order_Total_Amount'])
			
 
				+        total_sales_amount = self.df['Order_Total_Amount'].sum()
			
 
				+        total_discount_amount = self.df['Discount_Amount'].sum()
			
 
				+        total_discount_avg = int((total_discount_amount / (total_discount_amount+total_sales_amount))*100)
			
 
				+        return print(f'Customer Discount Avg: {total_discount_avg}%')
			
 
				+
			
 
				+
			
 
				+    def customer_role_breakdown(self):
			
 
				+        """Calculates proportion of retail/wholesale as a function of sales."""
			
 
				+        retail = 0
			
 
				+        wholesale = 0
			
 
				+        sum_count =int(len(self.df.index))
			
 
				+        sum_sales = self.df['Order_Total_Amount'].sum()
			
 
				+        retail_customer_count = round((len(self.df.loc[self.df['Customer_Role']=='Customer'].index)/sum_count)*100)
			
 
				+        wholesale_customer_count = round((len(self.df.loc[self.df['Customer_Role']=='Wholesale Customer'].index)/sum_count)*100)
			
 
				+        retail_sales = round((self.df['Order_Total_Amount'].loc[self.df['Customer_Role']=='Customer'].sum()/sum_sales)*100)
			
 
				+        wholesale_sales = round((self.df['Order_Total_Amount'].loc[self.df['Customer_Role']=='Wholesale Customer'].sum()/sum_sales)*100)
			
 
				+        grid = [[retail_customer_count,wholesale_customer_count],[retail_sales,wholesale_sales]]
			
 
				+        crb_df = pd.DataFrame(data=grid, columns=['Retail','Wholesale'], index=['Proportional Order Counts', 'Proportional Sales'])
			
 
				+        plt.style.use('seaborn-deep')
			
 
				+        fig, ax = plt.subplots(figsize=(10, 10))
			
 
				+        crb_df.plot.bar(title='Customer Role Breakdown', xlabel='Customer Role', ylabel='Proportion (%)',
			
 
				+                        cmap='winter', ax=ax)
			
 
				+        plt.figsave('Customer_Role_Breakdown.png')
			
 
				+        print(crb_df.head(3))
			
 
				+
			
 
				+    def geographical_breakdown(self):
			
 
				+        """ Displays a scatterplot of Sales/Revenue weights for different States."""
			
 
				+        self.df = self.df[self.df.Country_Name_Shipping== 'United States (US)']
			
 
				+        counts = self.df["State_Name_Shipping"].value_counts().to_dict()
			
 
				+        States = list(counts.keys())
			
 
				+        Count = list(counts.values())
			
 
				+        geo = pd.DataFrame({'States': States, 'Counts': Count})
			
 
				+        geo_dataframe = pd.DataFrame(geo)
			
 
				+        geo_dataframe.insert(loc=2, column="Sales_Total", value=0)
			
 
				+        geo_dataframe.insert(loc=3, column="Avg_Purchase_Revenue", value=0)
			
 
				+        for i, row in self.df.iterrows():
			
 
				+            state = row.loc['State_Name_Shipping']
			
 
				+            total = row.loc['Order_Total_Amount']
			
 
				+            idx = geo_dataframe[geo_dataframe["States"] == state].index.item()
			
 
				+            av = int(geo_dataframe.at[idx, 'Sales_Total']) / int(geo_dataframe.at[idx, 'Counts'])
			
 
				+            geo_dataframe.at[idx, 'Sales_Total'] += total
			
 
				+            geo_dataframe.at[idx, 'Avg_Purchase_Revenue'] = av
			
 
				+        # data visualization
			
 
				+        cds = ColumnDataSource(geo_dataframe)
			
 
				+        cds.data.keys()
			
 
				+        visual = figure(tools='box_zoom, pan, reset',
			
 
				+                        width=700, height=700,
			
 
				+                        title='Geographical Sales Breakdown',
			
 
				+                        y_axis_label='Order Quantity', x_axis_label='Revenue')
			
 
				+        visual.circle('Sales_Total', 'Counts', size=7, source=cds, name= 'States')
			
 
				+        visual.add_tools(HoverTool(tooltips=[("State", "@States"),
			
 
				+                                             ("Average Purchase Revenue", "@Avg_Purchase_Revenue")
			
 
				+                                             ]))
			
 
				+        output_file('geographical_breakdown.html')
			
 
				+        save(visual)
			
 
				+        show(visual)
			
 
				+        return print(geo_dataframe)
			
 
				+
			
 
				+
			
 
				+class ProductAnalysis:
			
 
				+    """Arms product analysis capability to a dataframe"""
			
 
				+
			
 
				+    def __init__(self, frame):
			
 
				+        self.df = frame  # dataframe object
			
 
				+        self.analysis_frame = self.monthly_product_frame()
			
 
				+        self.time_span = self.serve_time_span()  # list of tuples: x[0] == year, x[1] == month for x in self.time_span
			
 
				+
			
 
				+    def monthly_product_frame(self):
			
 
				+        """Analyzes the order lines in the CSV_Files folder and
			
 
				+        Returns a pandas Dataframe with monthly product statistics."""
			
 
				+        from datetime import datetime
			
 
				+        import information_repository as ir
			
 
				+        frame = self.df
			
 
				+        frame = frame[['Order_Date', 'Product_Name', 'Quantity', 'Item_Cost']]
			
 
				+        dict_list = []
			
 
				+        for i, row in frame.iterrows():
			
 
				+            row_date = row['Order_Date']
			
 
				+            row_date = datetime.strptime(row_date, "%Y-%m-%d %H:%M")
			
 
				+            row_date_month = row_date.month
			
 
				+            row_date_year = row_date.year
			
 
				+            raw_products = row['Product_Name'].replace('\r', '').split('\n')
			
 
				+            raw_quantities = row['Quantity'].replace('\r', '').split('\n')
			
 
				+            raw_cost = row['Item_Cost'].replace('\r', '').split('\n')
			
 
				+            for key in range(len(raw_products)):
			
 
				+                product = [i for i in ir.p_list if i in raw_products[key]][0]
			
 
				+                quantity = int(raw_quantities[key])
			
 
				+                revenue = float(raw_cost[key])
			
 
				+                dict_object = [product, quantity, revenue, row_date_month, row_date_year]
			
 
				+                matched_dictionary = [i for i in dict_list if
			
 
				+                                      i['name'] == dict_object[0] and i['month'] == dict_object[3]
			
 
				+                                      and i['year'] == dict_object[4]]
			
 
				+                if len(matched_dictionary) == 1:
			
 
				+                    matched_dictionary[0]['count'] += dict_object[1]
			
 
				+                    matched_dictionary[0]['revenue'] += dict_object[2]
			
 
				+                else:
			
 
				+                    dict_list.append({'name': dict_object[0], 'count': dict_object[1],
			
 
				+                                      'revenue': dict_object[2], 'month': dict_object[3], 'year': dict_object[4]})
			
 
				+        self.analysis_frame = pd.DataFrame(columns=['year', 'month', 'count', 'revenue', 'change_over_month', 'product'])
			
 
				+        time_span = []
			
 
				+        for product in ir.p_list:
			
 
				+            product_dictionaries = sorted(
			
 
				+                sorted([i for i in dict_list if i['name'] == product], key= x['month']
			
 
				+                       ), key= x['year'])
			
 
				+            data_list = []
			
 
				+            year_list = []
			
 
				+            month_list = []
			
 
				+            for key in range(len(product_dictionaries)):
			
 
				+                if key > 0:
			
 
				+                    try:
			
 
				+                        change_over_month = (100 - round(
			
 
				+                            ((product_dictionaries[key]['revenue'] / product_dictionaries[key]['count'])
			
 
				+                             / (product_dictionaries[key - 1]['revenue'] / product_dictionaries[key - 1][
			
 
				+                                        'count'])) * 100))
			
 
				+
			
 
				+                    except IndexError:
			
 
				+                        print('change_list calls need to be refined')
			
 
				+                else:
			
 
				+                    change_over_month = 0
			
 
				+
			
 
				+                row_list = [product_dictionaries[key]['year'], product_dictionaries[key]['month'],
			
 
				+                            product_dictionaries[key]['count'], product_dictionaries[key]['revenue'], change_over_month,
			
 
				+                            product_dictionaries[key]['name']]
			
 
				+                data_list.append(row_list)
			
 
				+                if product == 'Blue Moon':
			
 
				+                    month_list.append(product_dictionaries[key]['month'])
			
 
				+                    year_list.append(product_dictionaries[key]['year'])
			
 
				+
			
 
				+            if product == 'Blue Moon':
			
 
				+                time_span = [*zip(year_list, month_list)]
			
 
				+            append_frame = pd.DataFrame(data=data_list,
			
 
				+                                        columns=['year', 'month', 'count', 'revenue', 'change_over_month', 'product'])
			
 
				+            self.analysis_frame = pd.concat([self.analysis_frame, append_frame], ignore_index=True)
			
 
				+        self.time_span = time_span
			
 
				+        return self.analysis_frame
			
 
				+
			
 
				+    def highest_positive_product_change_over_month_analysis(self):
			
 
				+        """Analyzes the monthly_product_frame and returns the 5 products whose sales level increased the most"""
			
 
				+        year = int(input('Type the year you would like to query in yyyy format:  '))
			
 
				+        month = int(input('Type the month you would like to query:  '))
			
 
				+        data_slice = self.analysis_frame.loc[self.analysis_frame['month'] == month].loc[self.analysis_frame['year'] == year].loc[self.analysis_frame['revenue']>500]
			
 
				+        data_slice.sort_values(by='change_over_month', inplace=True, ascending=False)
			
 
				+        return print(data_slice.head(5))
			
 
				+
			
 
				+    def highest_negative_product_change_over_month_analysis(self):
			
 
				+        """Analyzes the monthly_product_frame and returns the 5 products whose sales level decreased the most"""
			
 
				+        year = int(input('Type the year you would like to query in yyyy format:  '))
			
 
				+        month = int(input('Type the month you would like to query:  '))
			
 
				+        data_slice = self.analysis_frame.loc[self.analysis_frame['month'] == month].loc[self.analysis_frame['year'] == year].loc[self.analysis_frame['revenue']>500]
			
 
				+        data_slice.sort_values(by='change_over_month', inplace=True, ascending=True)
			
 
				+        return data_slice
			
 
				+
			
 
				+    def product_line_change_over_month_analysis(self, year, month):
			
 
				+        """Analyzes the monthly_product_frame by product line and returns a dataframe with
			
 
				+        product line change over month data."""
			
 
				+        import information_repository as ir
			
 
				+        #year = int(input('Type the year you would like to query in yyyy format:  '))
			
 
				+        #month = int(input('Type the month you would like to query:  '))
			
 
				+        product_line_list_of_lists = [ir.tea_product_list, ir.capsule_product_list, ir.smokeable_product_list,
			
 
				+                             ir.skincare_product_list, ir.superfood_product_list, ir.honey_product_list,
			
 
				+                             ir.tincture_product_list]
			
 
				+        product_line_strings = ['Tea', 'Capsules', 'Smokeables', 'Skincare', 'Superfood', 'Honey', 'Tinctures']
			
 
				+        product_line_append_list = []
			
 
				+        line_index_counter = 0
			
 
				+        for product_line in product_line_list_of_lists:
			
 
				+            line_list = []
			
 
				+            line_list.append(year)
			
 
				+            line_list.append(month)
			
 
				+            data_slice = self.analysis_frame.loc[self.analysis_frame['month'] == month].loc[
			
 
				+                self.analysis_frame['year'] == year].loc[self.analysis_frame['product'].isin(product_line)]
			
 
				+            if month > 1:
			
 
				+                last_month_frame = self.analysis_frame.loc[self.analysis_frame['month'] == (month - 1)].loc[
			
 
				+                    self.analysis_frame['year'] == year].loc[self.analysis_frame['product'].isin(product_line)]
			
 
				+            else:
			
 
				+                last_month_frame = self.analysis_frame.loc[self.analysis_frame['month'] == 12].loc[
			
 
				+                    self.analysis_frame['year'] == (year - 1)].loc[self.analysis_frame['product'].isin(product_line)]
			
 
				+            last_month_revenue = last_month_frame['revenue'].sum()
			
 
				+            this_month_revenue = data_slice['revenue'].sum()
			
 
				+            avg_change_over_month = (this_month_revenue / last_month_revenue) * 100
			
 
				+            line_list.append(avg_change_over_month)
			
 
				+            product_line = product_line_strings[line_index_counter]
			
 
				+            line_index_counter += 1
			
 
				+            line_list.append(product_line)
			
 
				+            product_line_append_list.append(line_list)
			
 
				+        product_line_analysis_frame = pd.DataFrame(data=product_line_append_list,
			
 
				+                                                   columns=['year', 'month', 'avg_change_over_month',
			
 
				+                                                            'product_line'])
			
 
				+        product_line_analysis_frame.to_csv('product_line_csv_2021.csv')
			
 
				+        return product_line_analysis_frame
			
 
				+
			
 
				+    def serve_time_span(self):
			
 
				+        """Returns a list of tuples of unique (year, month) pairs in chronological order based on the
			
 
				+         monthly_product_frame."""
			
 
				+        return sorted(sorted(list(set([*zip(self.analysis_frame['year'],self.analysis_frame['month'])])),
			
 
				+                            key=x[1]), key=x[0])
			
 
				+
			
 
				+    def product_line_change_over_month_graph(self):
			
 
				+        """Using the product_line_change_over_month_analysis frame, it outputs a graph of the changes over time for
			
 
				+        the top product lines."""
			
 
				+        line_change_frame_data = []
			
 
				+        for i in self.time_span:
			
 
				+            month_frame = self.product_line_change_over_month_analysis(i[0], i[1])
			
 
				+            change_list = month_frame['avg_change_over_month']
			
 
				+            line_change_frame_data.append(change_list)
			
 
				+        treated_line_change_frame_data = []
			
 
				+        for i in range(len(line_change_frame_data)): #index of time period/segment
			
 
				+            if i ==0:
			
 
				+                treated_line_change_frame_data.append([self.time_span[i][0], self.time_span[i][1],
			
 
				+                                                       0,0,0,0,0,0,0]) #insert base amounts for the first month
			
 
				+            else: #function as intended
			
 
				+                month_cumulative_change_list = []
			
 
				+                month_cumulative_change_list.append(self.time_span[i][0])
			
 
				+                month_cumulative_change_list.append(self.time_span[i][1])# append year and month
			
 
				+                for x in range(len(line_change_frame_data[0])):
			
 
				+                    prior_change_list = [i[x] for i in line_change_frame_data]
			
 
				+                    product_cumulative_change = (100+treated_line_change_frame_data[i-1][x+2]) * ((prior_change_list[i]/100))-100
			
 
				+                    #i-1 for previous time period and x+2 for offset due to year and month category
			
 
				+                    month_cumulative_change_list.append(product_cumulative_change)
			
 
				+                treated_line_change_frame_data.append(month_cumulative_change_list)
			
 
				+        graph_frame = pd.DataFrame(data=treated_line_change_frame_data, columns=['Year', 'Month', 'Tea', 'Capsules', 'Smokeables','Skincare',
			
 
				+                                                                           'Superfood', 'Honey', 'Tinctures'])
			
 
				+        print(graph_frame.head(7))
			
 
				+        x = [str(i) for i in graph_frame['Month']]
			
 
				+        y1 = graph_frame['Tea']
			
 
				+        y2 = graph_frame['Capsules']
			
 
				+        y3 = graph_frame['Superfood']
			
 
				+        y4 = graph_frame['Honey']
			
 
				+        y5 = graph_frame['Smokeables']
			
 
				+        graph = figure(x_range=x,title='Cumulative Percentage Change of Product Lines',x_axis_label='Month', y_axis_label='Percentage Change')
			
 
				+        graph.line(x, y1, legend_label ='Tea', color='red', line_width=3)
			
 
				+        graph.line(x, y2, legend_label ='Capsules', color='blue', line_width=3)
			
 
				+        graph.line(x, y3, legend_label ='Superfood', color='orange', line_width=3)
			
 
				+        graph.line(x, y4, legend_label ='Honey', color='yellow', line_width=3)
			
 
				+        graph.line(x, y5, legend_label ='Smokeables', color='green', line_width=3)
			
 
				+        output_file('product_line_change_over_month.html')
			
 
				+        save(graph)
			
 
				+        return show(graph)
			
 
				+
			
 
				+
			
 
				+class InventoryPredictor:
			
 
				+    """Inventory volume prediction using a product sales csv as the raw data."""
			
 
				+    def __init__(self):
			
 
				+        import information_repository as ir
			
 
				+        self.unit_counts = self.sales_unit_count_dictionaries()
			
 
				+        self.ingredients = self.ingredient_dictionary()
			
 
				+        self.recipes = ir.unit_recipes
			
 
				+
			
 
				+        print('initiating')
			
 
				+        pass
			
 
				+
			
 
				+    def sales_unit_count_dictionaries(self):
			
 
				+        """Creates a set of dictionaries for each product and the cumulative quantity of units across all SKUs."""
			
 
				+        import information_repository as ir
			
 
				+        product_sales_frame = pd.read_csv('product_sales.csv')
			
 
				+        product_sales_frame = product_sales_frame.where(pd.notnull(product_sales_frame), 'None')
			
 
				+        product_unit_amounts = []
			
 
				+        for i in ir.p_list:
			
 
				+            product_dict = dict(name=i, quantity=0)
			
 
				+            for x, row in product_sales_frame.iterrows():
			
 
				+                if i in row['Product Name']:
			
 
				+                    if i in ir.tea_product_list:
			
 
				+                        if '1' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold']
			
 
				+                        elif '3' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 3
			
 
				+                        elif '20' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 20
			
 
				+                        else:
			
 
				+                            pass
			
 
				+                            # print('Something unexpected occured', row['Product Name'], row['Variation Attributes'])
			
 
				+                    elif i in ir.superfood_product_list:
			
 
				+                        if '3' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold']
			
 
				+                        elif '9' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 3
			
 
				+                        else:
			
 
				+                            product_dict['quantity'] += 1
			
 
				+                    elif i in ir.capsule_product_list:
			
 
				+                        if '1' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold']
			
 
				+                        if '4' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 4
			
 
				+                    elif i in ir.smokeable_product_list:
			
 
				+                        if '7' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 7
			
 
				+                        elif 'prerolls' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 2
			
 
				+                        else:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 4
			
 
				+                    elif i in ir.honey_product_list:
			
 
				+                        if '3' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 3
			
 
				+                        elif '5' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 5
			
 
				+                        elif '2' in row['Variation Attributes']:
			
 
				+                            pass
			
 
				+                            # print('Reminder that packet honeys and jars need to separate')
			
 
				+                    else:
			
 
				+                        product_dict['quantity'] += row['Quantity Sold']
			
 
				+            product_unit_amounts.append(product_dict)
			
 
				+        return product_unit_amounts
			
 
				+
			
 
				+    def ingredient_dictionary(self):
			
 
				+        """Creates a ingredient dictionary with all ingredients as keys and the cumulative volume across all
			
 
				+        products as values."""
			
 
				+        inventory = pd.read_csv('craftybase-export-material.csv')
			
 
				+        ingredient_dictionary = {}
			
 
				+        for i in list(inventory['name']):
			
 
				+            ingredient_dictionary[i]=0
			
 
				+        return ingredient_dictionary
			
 
				+
			
 
				+    def ingredient_volume_table(self):
			
 
				+        """Creates a csv with ingredients and the cumulative volume used across a time span."""
			
 
				+        for x in self.unit_counts:
			
 
				+            for y in self.recipes:
			
 
				+                if x['name'] == y['name']:
			
 
				+                    for k, v in y.items():
			
 
				+                        if k != 'name':
			
 
				+                            self.ingredients[k] += v * x['quantity']
			
 
				+        sorted_ingredient_volumes = sorted(self.ingredients.items(), key= x[1], reverse=True)
			
 
				+        output_frame = pd.DataFrame(data = sorted_ingredient_volumes, columns= ['Ingredient', 'Volume (gram or oz)'])
			
 
				+        output_frame = output_frame[output_frame['Volume (gram or oz)'] !=0]
			
 
				+        output_frame.to_csv('ingredient_volume_table.csv')
			
 
				+
			
--- a/data/purposeCombined/BI/macro_analysis.py
+++ b/data/purposeCombined/BI/macro_analysis.py
@@ -0,0 +1,338 @@
 
				+import pandas as pd
			
 
				+from bokeh.plotting import figure, save, show,output_file, ColumnDataSource
			
 
				+from bokeh.models import HoverTool
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+class DataFrameAnalysis:
			
 
				+    """Arms Macro-Analysis capability to a dataframe"""
			
 
				+    def __init__(self, frame):
			
 
				+        self.df = frame  # dataframe object
			
 
				+
			
 
				+    def avg_discount_rate(self):
			
 
				+        """Calculates average discount rate of all orders."""
			
 
				+        # You should calculate the average and gross discount rate.
			
 
				+        pd.to_numeric(self.df['Discount_Amount'])
			
 
				+        pd.to_numeric(self.df['Order_Total_Amount'])
			
 
				+        total_sales_amount = self.df['Order_Total_Amount'].sum()
			
 
				+        total_discount_amount = self.df['Discount_Amount'].sum()
			
 
				+        total_discount_avg = int((total_discount_amount / (total_discount_amount+total_sales_amount))*100)
			
 
				+        return print(f'Customer Discount Avg: {total_discount_avg}%')
			
 
				+
			
 
				+
			
 
				+    def customer_role_breakdown(self):
			
 
				+        """Calculates proportion of retail/wholesale as a function of sales."""
			
 
				+        retail = 0
			
 
				+        wholesale = 0
			
 
				+        sum_count =int(len(self.df.index))
			
 
				+        sum_sales = self.df['Order_Total_Amount'].sum()
			
 
				+        retail_customer_count = round((len(self.df.loc[self.df['Customer_Role']=='Customer'].index)/sum_count)*100)
			
 
				+        wholesale_customer_count = round((len(self.df.loc[self.df['Customer_Role']=='Wholesale Customer'].index)/sum_count)*100)
			
 
				+        retail_sales = round((self.df['Order_Total_Amount'].loc[self.df['Customer_Role']=='Customer'].sum()/sum_sales)*100)
			
 
				+        wholesale_sales = round((self.df['Order_Total_Amount'].loc[self.df['Customer_Role']=='Wholesale Customer'].sum()/sum_sales)*100)
			
 
				+        grid = [[retail_customer_count,wholesale_customer_count],[retail_sales,wholesale_sales]]
			
 
				+        crb_df = pd.DataFrame(data=grid, columns=['Retail','Wholesale'], index=['Proportional Order Counts', 'Proportional Sales'])
			
 
				+        plt.style.use('seaborn-deep')
			
 
				+        fig, ax = plt.subplots(figsize=(10, 10))
			
 
				+        crb_df.plot.bar(title='Customer Role Breakdown', xlabel='Customer Role', ylabel='Proportion (%)',
			
 
				+                        cmap='winter', ax=ax)
			
 
				+        plt.figsave('Customer_Role_Breakdown.png')
			
 
				+        print(crb_df.head(3))
			
 
				+
			
 
				+    def geographical_breakdown(self):
			
 
				+        """ Displays a scatterplot of Sales/Revenue weights for different States."""
			
 
				+        self.df = self.df[self.df.Country_Name_Shipping== 'United States (US)']
			
 
				+        counts = self.df["State_Name_Shipping"].value_counts().to_dict()
			
 
				+        States = list(counts.keys())
			
 
				+        Count = list(counts.values())
			
 
				+        geo = pd.DataFrame({'States': States, 'Counts': Count})
			
 
				+        geo_dataframe = pd.DataFrame(geo)
			
 
				+        geo_dataframe.insert(loc=2, column="Sales_Total", value=0)
			
 
				+        geo_dataframe.insert(loc=3, column="Avg_Purchase_Revenue", value=0)
			
 
				+        for i, row in self.df.iterrows():
			
 
				+            state = row.loc['State_Name_Shipping']
			
 
				+            total = row.loc['Order_Total_Amount']
			
 
				+            idx = geo_dataframe[geo_dataframe["States"] == state].index.item()
			
 
				+            av = int(geo_dataframe.at[idx, 'Sales_Total']) / int(geo_dataframe.at[idx, 'Counts'])
			
 
				+            geo_dataframe.at[idx, 'Sales_Total'] += total
			
 
				+            geo_dataframe.at[idx, 'Avg_Purchase_Revenue'] = av
			
 
				+        # data visualization
			
 
				+        cds = ColumnDataSource(geo_dataframe)
			
 
				+        cds.data.keys()
			
 
				+        visual = figure(tools='box_zoom, pan, reset',
			
 
				+                        width=700, height=700,
			
 
				+                        title='Geographical Sales Breakdown',
			
 
				+                        y_axis_label='Order Quantity', x_axis_label='Revenue')
			
 
				+        visual.circle('Sales_Total', 'Counts', size=7, source=cds, name= 'States')
			
 
				+        visual.add_tools(HoverTool(tooltips=[("State", "@States"),
			
 
				+                                             ("Average Purchase Revenue", "@Avg_Purchase_Revenue")
			
 
				+                                             ]))
			
 
				+        output_file('geographical_breakdown.html')
			
 
				+        save(visual)
			
 
				+        show(visual)
			
 
				+        return print(geo_dataframe)
			
 
				+
			
 
				+
			
 
				+class ProductAnalysis:
			
 
				+    """Arms product analysis capability to a dataframe"""
			
 
				+
			
 
				+    def __init__(self, frame):
			
 
				+        self.df = frame  # dataframe object
			
 
				+        self.analysis_frame = self.monthly_product_frame()
			
 
				+        self.time_span = self.serve_time_span()  # list of tuples: x[0] == year, x[1] == month for x in self.time_span
			
 
				+
			
 
				+    def monthly_product_frame(self):
			
 
				+        """Analyzes the order lines in the CSV_Files folder and
			
 
				+        Returns a pandas Dataframe with monthly product statistics."""
			
 
				+        from datetime import datetime
			
 
				+        import information_repository as ir
			
 
				+        frame = self.df
			
 
				+        frame = frame[['Order_Date', 'Product_Name', 'Quantity', 'Item_Cost']]
			
 
				+        dict_list = []
			
 
				+        for i, row in frame.iterrows():
			
 
				+            row_date = row['Order_Date']
			
 
				+            row_date = datetime.strptime(row_date, "%Y-%m-%d %H:%M")
			
 
				+            row_date_month = row_date.month
			
 
				+            row_date_year = row_date.year
			
 
				+            raw_products = row['Product_Name'].replace('\r', '').split('\n')
			
 
				+            raw_quantities = row['Quantity'].replace('\r', '').split('\n')
			
 
				+            raw_cost = row['Item_Cost'].replace('\r', '').split('\n')
			
 
				+            for key in range(len(raw_products)):
			
 
				+                product = [i for i in ir.p_list if i in raw_products[key]][0]
			
 
				+                quantity = int(raw_quantities[key])
			
 
				+                revenue = float(raw_cost[key])
			
 
				+                dict_object = [product, quantity, revenue, row_date_month, row_date_year]
			
 
				+                matched_dictionary = [i for i in dict_list if
			
 
				+                                      i['name'] == dict_object[0] and i['month'] == dict_object[3]
			
 
				+                                      and i['year'] == dict_object[4]]
			
 
				+                if len(matched_dictionary) == 1:
			
 
				+                    matched_dictionary[0]['count'] += dict_object[1]
			
 
				+                    matched_dictionary[0]['revenue'] += dict_object[2]
			
 
				+                else:
			
 
				+                    dict_list.append({'name': dict_object[0], 'count': dict_object[1],
			
 
				+                                      'revenue': dict_object[2], 'month': dict_object[3], 'year': dict_object[4]})
			
 
				+        self.analysis_frame = pd.DataFrame(columns=['year', 'month', 'count', 'revenue', 'change_over_month', 'product'])
			
 
				+        time_span = []
			
 
				+        for product in ir.p_list:
			
 
				+            product_dictionaries = sorted(
			
 
				+                sorted([i for i in dict_list if i['name'] == product], key=lambda x: x['month']
			
 
				+                       ), key=lambda x: x['year'])
			
 
				+            data_list = []
			
 
				+            year_list = []
			
 
				+            month_list = []
			
 
				+            for key in range(len(product_dictionaries)):
			
 
				+                if key > 0:
			
 
				+                    try:
			
 
				+                        change_over_month = (100 - round(
			
 
				+                            ((product_dictionaries[key]['revenue'] / product_dictionaries[key]['count'])
			
 
				+                             / (product_dictionaries[key - 1]['revenue'] / product_dictionaries[key - 1][
			
 
				+                                        'count'])) * 100))
			
 
				+
			
 
				+                    except IndexError:
			
 
				+                        print('change_list calls need to be refined')
			
 
				+                else:
			
 
				+                    change_over_month = 0
			
 
				+
			
 
				+                row_list = [product_dictionaries[key]['year'], product_dictionaries[key]['month'],
			
 
				+                            product_dictionaries[key]['count'], product_dictionaries[key]['revenue'], change_over_month,
			
 
				+                            product_dictionaries[key]['name']]
			
 
				+                data_list.append(row_list)
			
 
				+                if product == 'Blue Moon':
			
 
				+                    month_list.append(product_dictionaries[key]['month'])
			
 
				+                    year_list.append(product_dictionaries[key]['year'])
			
 
				+
			
 
				+            if product == 'Blue Moon':
			
 
				+                time_span = [*zip(year_list, month_list)]
			
 
				+            append_frame = pd.DataFrame(data=data_list,
			
 
				+                                        columns=['year', 'month', 'count', 'revenue', 'change_over_month', 'product'])
			
 
				+            self.analysis_frame = pd.concat([self.analysis_frame, append_frame], ignore_index=True)
			
 
				+        self.time_span = time_span
			
 
				+        return self.analysis_frame
			
 
				+
			
 
				+    def highest_positive_product_change_over_month_analysis(self):
			
 
				+        """Analyzes the monthly_product_frame and returns the 5 products whose sales level increased the most"""
			
 
				+        year = int(input('Type the year you would like to query in yyyy format:  '))
			
 
				+        month = int(input('Type the month you would like to query:  '))
			
 
				+        data_slice = self.analysis_frame.loc[self.analysis_frame['month'] == month].loc[self.analysis_frame['year'] == year].loc[self.analysis_frame['revenue']>500]
			
 
				+        data_slice.sort_values(by='change_over_month', inplace=True, ascending=False)
			
 
				+        return print(data_slice.head(5))
			
 
				+
			
 
				+    def highest_negative_product_change_over_month_analysis(self):
			
 
				+        """Analyzes the monthly_product_frame and returns the 5 products whose sales level decreased the most"""
			
 
				+        year = int(input('Type the year you would like to query in yyyy format:  '))
			
 
				+        month = int(input('Type the month you would like to query:  '))
			
 
				+        data_slice = self.analysis_frame.loc[self.analysis_frame['month'] == month].loc[self.analysis_frame['year'] == year].loc[self.analysis_frame['revenue']>500]
			
 
				+        data_slice.sort_values(by='change_over_month', inplace=True, ascending=True)
			
 
				+        return data_slice
			
 
				+
			
 
				+    def product_line_change_over_month_analysis(self, year, month):
			
 
				+        """Analyzes the monthly_product_frame by product line and returns a dataframe with
			
 
				+        product line change over month data."""
			
 
				+        import information_repository as ir
			
 
				+        #year = int(input('Type the year you would like to query in yyyy format:  '))
			
 
				+        #month = int(input('Type the month you would like to query:  '))
			
 
				+        product_line_list_of_lists = [ir.tea_product_list, ir.capsule_product_list, ir.smokeable_product_list,
			
 
				+                             ir.skincare_product_list, ir.superfood_product_list, ir.honey_product_list,
			
 
				+                             ir.tincture_product_list]
			
 
				+        product_line_strings = ['Tea', 'Capsules', 'Smokeables', 'Skincare', 'Superfood', 'Honey', 'Tinctures']
			
 
				+        product_line_append_list = []
			
 
				+        line_index_counter = 0
			
 
				+        for product_line in product_line_list_of_lists:
			
 
				+            line_list = []
			
 
				+            line_list.append(year)
			
 
				+            line_list.append(month)
			
 
				+            data_slice = self.analysis_frame.loc[self.analysis_frame['month'] == month].loc[
			
 
				+                self.analysis_frame['year'] == year].loc[self.analysis_frame['product'].isin(product_line)]
			
 
				+            if month > 1:
			
 
				+                last_month_frame = self.analysis_frame.loc[self.analysis_frame['month'] == (month - 1)].loc[
			
 
				+                    self.analysis_frame['year'] == year].loc[self.analysis_frame['product'].isin(product_line)]
			
 
				+            else:
			
 
				+                last_month_frame = self.analysis_frame.loc[self.analysis_frame['month'] == 12].loc[
			
 
				+                    self.analysis_frame['year'] == (year - 1)].loc[self.analysis_frame['product'].isin(product_line)]
			
 
				+            last_month_revenue = last_month_frame['revenue'].sum()
			
 
				+            this_month_revenue = data_slice['revenue'].sum()
			
 
				+            avg_change_over_month = (this_month_revenue / last_month_revenue) * 100
			
 
				+            line_list.append(avg_change_over_month)
			
 
				+            product_line = product_line_strings[line_index_counter]
			
 
				+            line_index_counter += 1
			
 
				+            line_list.append(product_line)
			
 
				+            product_line_append_list.append(line_list)
			
 
				+        product_line_analysis_frame = pd.DataFrame(data=product_line_append_list,
			
 
				+                                                   columns=['year', 'month', 'avg_change_over_month',
			
 
				+                                                            'product_line'])
			
 
				+        product_line_analysis_frame.to_csv('product_line_csv_2021.csv')
			
 
				+        return product_line_analysis_frame
			
 
				+
			
 
				+    def serve_time_span(self):
			
 
				+        """Returns a list of tuples of unique (year, month) pairs in chronological order based on the
			
 
				+         monthly_product_frame."""
			
 
				+        return sorted(sorted(list(set([*zip(self.analysis_frame['year'],self.analysis_frame['month'])])),
			
 
				+                            key=lambda x:x[1]), key=lambda x:x[0])
			
 
				+
			
 
				+    def product_line_change_over_month_graph(self):
			
 
				+        """Using the product_line_change_over_month_analysis frame, it outputs a graph of the changes over time for
			
 
				+        the top product lines."""
			
 
				+        line_change_frame_data = []
			
 
				+        for i in self.time_span:
			
 
				+            month_frame = self.product_line_change_over_month_analysis(i[0], i[1])
			
 
				+            change_list = month_frame['avg_change_over_month']
			
 
				+            line_change_frame_data.append(change_list)
			
 
				+        treated_line_change_frame_data = []
			
 
				+        for i in range(len(line_change_frame_data)): #index of time period/segment
			
 
				+            if i ==0:
			
 
				+                treated_line_change_frame_data.append([self.time_span[i][0], self.time_span[i][1],
			
 
				+                                                       0,0,0,0,0,0,0]) #insert base amounts for the first month
			
 
				+            else: #function as intended
			
 
				+                month_cumulative_change_list = []
			
 
				+                month_cumulative_change_list.append(self.time_span[i][0])
			
 
				+                month_cumulative_change_list.append(self.time_span[i][1])# append year and month
			
 
				+                for x in range(len(line_change_frame_data[0])):
			
 
				+                    prior_change_list = [i[x] for i in line_change_frame_data]
			
 
				+                    product_cumulative_change = (100+treated_line_change_frame_data[i-1][x+2]) * ((prior_change_list[i]/100))-100
			
 
				+                    #i-1 for previous time period and x+2 for offset due to year and month category
			
 
				+                    month_cumulative_change_list.append(product_cumulative_change)
			
 
				+                treated_line_change_frame_data.append(month_cumulative_change_list)
			
 
				+        graph_frame = pd.DataFrame(data=treated_line_change_frame_data, columns=['Year', 'Month', 'Tea', 'Capsules', 'Smokeables','Skincare',
			
 
				+                                                                           'Superfood', 'Honey', 'Tinctures'])
			
 
				+        print(graph_frame.head(7))
			
 
				+        x = [str(i) for i in graph_frame['Month']]
			
 
				+        y1 = graph_frame['Tea']
			
 
				+        y2 = graph_frame['Capsules']
			
 
				+        y3 = graph_frame['Superfood']
			
 
				+        y4 = graph_frame['Honey']
			
 
				+        y5 = graph_frame['Smokeables']
			
 
				+        graph = figure(x_range=x,title='Cumulative Percentage Change of Product Lines',x_axis_label='Month', y_axis_label='Percentage Change')
			
 
				+        graph.line(x, y1, legend_label ='Tea', color='red', line_width=3)
			
 
				+        graph.line(x, y2, legend_label ='Capsules', color='blue', line_width=3)
			
 
				+        graph.line(x, y3, legend_label ='Superfood', color='orange', line_width=3)
			
 
				+        graph.line(x, y4, legend_label ='Honey', color='yellow', line_width=3)
			
 
				+        graph.line(x, y5, legend_label ='Smokeables', color='green', line_width=3)
			
 
				+        output_file('product_line_change_over_month.html')
			
 
				+        save(graph)
			
 
				+        return show(graph)
			
 
				+
			
 
				+
			
 
				+class InventoryPredictor:
			
 
				+    """Inventory volume prediction using a product sales csv as the raw data."""
			
 
				+    def __init__(self):
			
 
				+        import information_repository as ir
			
 
				+        self.unit_counts = self.sales_unit_count_dictionaries()
			
 
				+        self.ingredients = self.ingredient_dictionary()
			
 
				+        self.recipes = ir.unit_recipes
			
 
				+
			
 
				+        print('initiating')
			
 
				+        pass
			
 
				+
			
 
				+    def sales_unit_count_dictionaries(self):
			
 
				+        """Creates a set of dictionaries for each product and the cumulative quantity of units across all SKUs."""
			
 
				+        import information_repository as ir
			
 
				+        product_sales_frame = pd.read_csv('product_sales.csv')
			
 
				+        product_sales_frame = product_sales_frame.where(pd.notnull(product_sales_frame), 'None')
			
 
				+        product_unit_amounts = []
			
 
				+        for i in ir.p_list:
			
 
				+            product_dict = dict(name=i, quantity=0)
			
 
				+            for x, row in product_sales_frame.iterrows():
			
 
				+                if i in row['Product Name']:
			
 
				+                    if i in ir.tea_product_list:
			
 
				+                        if '1' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold']
			
 
				+                        elif '3' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 3
			
 
				+                        elif '20' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 20
			
 
				+                        else:
			
 
				+                            pass
			
 
				+                            # print('Something unexpected occured', row['Product Name'], row['Variation Attributes'])
			
 
				+                    elif i in ir.superfood_product_list:
			
 
				+                        if '3' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold']
			
 
				+                        elif '9' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 3
			
 
				+                        else:
			
 
				+                            product_dict['quantity'] += 1
			
 
				+                    elif i in ir.capsule_product_list:
			
 
				+                        if '1' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold']
			
 
				+                        if '4' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 4
			
 
				+                    elif i in ir.smokeable_product_list:
			
 
				+                        if '7' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 7
			
 
				+                        elif 'prerolls' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 2
			
 
				+                        else:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 4
			
 
				+                    elif i in ir.honey_product_list:
			
 
				+                        if '3' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 3
			
 
				+                        elif '5' in row['Variation Attributes']:
			
 
				+                            product_dict['quantity'] += row['Quantity Sold'] * 5
			
 
				+                        elif '2' in row['Variation Attributes']:
			
 
				+                            pass
			
 
				+                            # print('Reminder that packet honeys and jars need to separate')
			
 
				+                    else:
			
 
				+                        product_dict['quantity'] += row['Quantity Sold']
			
 
				+            product_unit_amounts.append(product_dict)
			
 
				+        return product_unit_amounts
			
 
				+
			
 
				+    def ingredient_dictionary(self):
			
 
				+        """Creates a ingredient dictionary with all ingredients as keys and the cumulative volume across all
			
 
				+        products as values."""
			
 
				+        inventory = pd.read_csv('craftybase-export-material.csv')
			
 
				+        ingredient_dictionary = {}
			
 
				+        for i in list(inventory['name']):
			
 
				+            ingredient_dictionary[i]=0
			
 
				+        return ingredient_dictionary
			
 
				+
			
 
				+    def ingredient_volume_table(self):
			
 
				+        """Creates a csv with ingredients and the cumulative volume used across a time span."""
			
 
				+        for x in self.unit_counts:
			
 
				+            for y in self.recipes:
			
 
				+                if x['name'] == y['name']:
			
 
				+                    for k, v in y.items():
			
 
				+                        if k != 'name':
			
 
				+                            self.ingredients[k] += v * x['quantity']
			
 
				+        sorted_ingredient_volumes = sorted(self.ingredients.items(), key=lambda x: x[1], reverse=True)
			
 
				+        output_frame = pd.DataFrame(data = sorted_ingredient_volumes, columns= ['Ingredient', 'Volume (gram or oz)'])
			
 
				+        output_frame = output_frame[output_frame['Volume (gram or oz)'] !=0]
			
 
				+        output_frame.to_csv('ingredient_volume_table.csv')
			
 
				+
			
--- a/data/purposeCombined/BI/practica3.py
+++ b/data/purposeCombined/BI/practica3.py
@@ -0,0 +1,662 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Autor:
			
 
				+    Francisco Solano López Rodríguez
			
 
				+Fecha:
			
 
				+    Noviembre/2018
			
 
				+Contenido:
			
 
				+    Práctica 3
			
 
				+    Inteligencia de Negocio
			
 
				+    Grado en Ingeniería Informática
			
 
				+    Universidad de Granada
			
 
				+"""
			
 
				+
			
 
				+''' -------------------- IMPORT LIBRARY -------------------- '''
			
 
				+
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import time
			
 
				+import matplotlib.pyplot as plt
			
 
				+import seaborn as sns
			
 
				+from collections import Counter
			
 
				+
			
 
				+import datetime
			
 
				+
			
 
				+from sklearn.model_selection import StratifiedKFold, KFold
			
 
				+from sklearn.model_selection import train_test_split, GridSearchCV
			
 
				+from sklearn.feature_selection import VarianceThreshold
			
 
				+from sklearn import ensemble
			
 
				+
			
 
				+''' --- classifiers import --- '''
			
 
				+from sklearn.linear_model import LogisticRegression
			
 
				+from sklearn.ensemble import RandomForestClassifier
			
 
				+from sklearn.ensemble import ExtraTreesClassifier
			
 
				+from sklearn.neural_network import MLPClassifier
			
 
				+from sklearn.neighbors import KNeighborsClassifier
			
 
				+from sklearn import svm
			
 
				+import xgboost as xgb
			
 
				+import lightgbm as lgb
			
 
				+from sklearn import tree
			
 
				+
			
 
				+from sklearn.svm import SVC, LinearSVC, NuSVC
			
 
				+from sklearn.tree import DecisionTreeClassifier
			
 
				+from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
			
 
				+from sklearn.naive_bayes import GaussianNB
			
 
				+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
			
 
				+from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
			
 
				+
			
 
				+from catboost import Pool, CatBoostClassifier
			
 
				+
			
 
				+''' --- preprocessing import --- '''
			
 
				+from sklearn import preprocessing
			
 
				+from sklearn.preprocessing import OneHotEncoder
			
 
				+from sklearn.preprocessing import LabelEncoder
			
 
				+from sklearn.preprocessing import MinMaxScaler
			
 
				+from sklearn.preprocessing import StandardScaler  
			
 
				+from sklearn.preprocessing import PolynomialFeatures
			
 
				+from sklearn.preprocessing import scale
			
 
				+from sklearn.preprocessing import Normalizer
			
 
				+
			
 
				+''' --- metrics import --- '''
			
 
				+from sklearn import metrics
			
 
				+from sklearn.metrics import roc_curve, auc
			
 
				+from sklearn.metrics import confusion_matrix
			
 
				+from sklearn.metrics import accuracy_score
			
 
				+
			
 
				+from math import sin, cos, sqrt, atan2, radians
			
 
				+
			
 
				+
			
 
				+# Obtener datos respecto a la fecha y obtener la edad del pozo
			
 
				+def date_parser(df):
			
 
				+    date_recorder = list(map(lambda x: datetime.datetime.strptime(str(x), '%Y-%m-%d'),
			
 
				+                             df['date_recorded'].values))
			
 
				+    df['year_recorder'] = list(map(lambda x: int(x.strftime('%Y')), date_recorder))
			
 
				+    df['weekday_recorder'] = list(map(lambda x: int(x.strftime('%w')), date_recorder))
			
 
				+    df['yearly_week_recorder'] = list(map(lambda x: int(x.strftime('%W')), date_recorder))
			
 
				+    df['month_recorder'] = list(map(lambda x: int(x.strftime('%m')), date_recorder))
			
 
				+    df['age'] = df['year_recorder'].values - df['construction_year'].values
			
 
				+    del df['date_recorded']
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+# Obtener a distancia a la coordenada (0,0)
			
 
				+def distancia(lon1, lat1, lon2, lat2):  
			
 
				+    dlon = lon2 - lon1
			
 
				+    dlat = lat2 - lat1
			
 
				+
			
 
				+    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
			
 
				+    c = 2 * atan2(sqrt(a), sqrt(1 - a))
			
 
				+    R = 6371
			
 
				+
			
 
				+    return R * c
			
 
				+
			
 
				+# Obtener la coordenada cartesiana x a partir de las longitud y la latitud
			
 
				+def cartesian_x(lon, lat):
			
 
				+    lat=radians(lat)
			
 
				+    lon=radians(lon)
			
 
				+    R=6371.0
			
 
				+    x = R * cos(lat) * cos(lon)
			
 
				+    return x
			
 
				+
			
 
				+# Obtener la coordenada cartesiana y a partir de las longitud y la latitud
			
 
				+def cartesian_y(lon, lat):
			
 
				+    lat=radians(lat)
			
 
				+    lon=radians(lon)
			
 
				+    R=6371.0
			
 
				+    y = R * cos(lat) * sin(lon)
			
 
				+    return y
			
 
				+
			
 
				+# Matriz de confusion
			
 
				+def plot_confusion_matrix(y_test, predictions):
			
 
				+    cm = metrics.confusion_matrix(y_test, predictions)
			
 
				+    plt.figure(figsize=(9,9))
			
 
				+    sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True)
			
 
				+    plt.ylabel('Actual label')
			
 
				+    plt.xlabel('Predicted label')
			
 
				+    plt.show()
			
 
				+
			
 
				+# Funcion para realizar la validacion cruzada
			
 
				+def cross_validation(clf, X, y, cv = None, min_max_scaler = False, scaled = False, standard_scaler = False, normalizer = False, poly = False, m_confusion = False):
			
 
				+
			
 
				+    if cv == None:
			
 
				+        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123456)
			
 
				+
			
 
				+    iteration = 0
			
 
				+
			
 
				+    for train, test in cv.split(X, y):
			
 
				+
			
 
				+        X_train, X_test = X[train], X[test]
			
 
				+        y_train, y_test = y[train], y[test]
			
 
				+
			
 
				+
			
 
				+        if min_max_scaler:
			
 
				+            X_train = MinMaxScaler().fit_transform(X_train)
			
 
				+            X_test = MinMaxScaler().fit_transform(X_test)
			
 
				+
			
 
				+        if scaled:
			
 
				+            X_train = scale(X_train)
			
 
				+            X_test = scale(X_test)
			
 
				+
			
 
				+        if poly:
			
 
				+            X_train = PolynomialFeatures(degree = 2, interaction_only=True).fit_transform(X_train)
			
 
				+            X_test = PolynomialFeatures(degree = 2, interaction_only=True).fit_transform(X_test)
			
 
				+
			
 
				+        if standard_scaler:
			
 
				+            transformer = StandardScaler().fit(X_train)
			
 
				+            X_train = transformer.transform(X_train)
			
 
				+            X_test = transformer.transform(X_test)
			
 
				+
			
 
				+        if normalizer:
			
 
				+            transformer = Normalizer().fit(X_train)
			
 
				+            X_train = transformer.transform(X_train)
			
 
				+            X_test = transformer.transform(X_test)
			
 
				+
			
 
				+        t = time.time()
			
 
				+        clf = clf.fit(X_train,y_train)
			
 
				+        training_time = time.time() - t
			
 
				+
			
 
				+        predictions_train = clf.predict(X_train)
			
 
				+        predictions = clf.predict(X_test)
			
 
				+
			
 
				+        print("--------- Iteración ", iteration, " --------- ")
			
 
				+        print("Tiempo :: ", training_time)
			
 
				+        print ("Train Accuracy :: ", accuracy_score(y_train, predictions_train))
			
 
				+        print ("Test Accuracy  :: ", accuracy_score(y_test, predictions))
			
 
				+        print("")
			
 
				+
			
 
				+        if m_confusion:
			
 
				+            plot_confusion_matrix(y_test, predictions)
			
 
				+
			
 
				+        iteration += 1
			
 
				+
			
 
				+''' ------------------------------------------------------------------ '''
			
 
				+''' --------------------------- READ DATA ---------------------------- '''
			
 
				+''' ------------------------------------------------------------------ '''
			
 
				+
			
 
				+print("\nWATER PUMP COMPETITION\n")
			
 
				+
			
 
				+print("Leyendo datos...")
			
 
				+
			
 
				+#los ficheros .csv se han preparado previamente para sustituir ,, y "Not known" por NaN (valores perdidos)
			
 
				+data_x_orig = pd.read_csv('data/water_pump_tra.csv')
			
 
				+data_y = pd.read_csv('data/water_pump_tra_target.csv')
			
 
				+data_x_tst = pd.read_csv('data/water_pump_tst.csv')
			
 
				+
			
 
				+print(data_x_orig.shape)
			
 
				+print(data_x_tst.shape)
			
 
				+
			
 
				+print("Lectura completada.\n")
			
 
				+
			
 
				+
			
 
				+''' ------------------------------------------------------------------ '''
			
 
				+''' -------------------------- LOOK AT DATA -------------------------- '''
			
 
				+''' ------------------------------------------------------------------ '''
			
 
				+
			
 
				+print('Viendo los datos:\n')
			
 
				+
			
 
				+data_x = data_x_orig
			
 
				+
			
 
				+print('num_private:')
			
 
				+print(data_x['num_private'].value_counts()[0:3])
			
 
				+print('recorded_by:')
			
 
				+print(data_x['recorded_by'].value_counts())
			
 
				+print(data_y.status_group.value_counts()/len(data_y))
			
 
				+
			
 
				+data_y.status_group.value_counts().plot(kind='bar')
			
 
				+plt.xticks(rotation = 0)
			
 
				+plt.show()
			
 
				+
			
 
				+print('Ejemplos con longitude = 0')
			
 
				+print(len(data_x.ix[data_x['longitude']==0,'longitude']))
			
 
				+
			
 
				+print('Ejemplos con latitude = 0')
			
 
				+print(len(data_x.ix[data_x['latitude']==-0.00000002,'latitude']))
			
 
				+
			
 
				+print('Ejemplos con construction_year = 0')
			
 
				+print(len(data_x.ix[data_x['construction_year']==0,'construction_year']))
			
 
				+
			
 
				+
			
 
				+corr = data_x.corr()
			
 
				+sns.heatmap (corr)
			
 
				+plt.xticks(rotation =45)
			
 
				+plt.show()
			
 
				+
			
 
				+print("Valores perdidos:")
			
 
				+print(data_x.isnull().sum())
			
 
				+
			
 
				+data_x.isnull().sum().plot.bar()
			
 
				+plt.show()
			
 
				+
			
 
				+print('funder:\n')
			
 
				+print(data_x['funder'].value_counts()[0:6])
			
 
				+print('\ninstaller:\n')
			
 
				+print(data_x['installer'].value_counts()[0:6])
			
 
				+print('\npublic_meeting:\n')
			
 
				+print(data_x['public_meeting'].value_counts()[0:6])
			
 
				+print('\nscheme_management:\n')
			
 
				+print(data_x['scheme_management'].value_counts()[0:6])
			
 
				+print('\npermit:\n')
			
 
				+print(data_x['permit'].value_counts()[0:6])
			
 
				+print('\nsubvillage:\n')
			
 
				+print(data_x['subvillage'].value_counts()[0:6])
			
 
				+print('\nwpt_name:\n')
			
 
				+print(data_x['wpt_name'].value_counts()[0:6])
			
 
				+
			
 
				+'''
			
 
				+data_x['funder'].value_counts()[0:10].plot.bar()
			
 
				+plt.show()
			
 
				+data_x['installer'].value_counts().plot.bar()
			
 
				+plt.show()
			
 
				+data_x['public_meeting'].value_counts().plot.bar()
			
 
				+plt.show()
			
 
				+data_x['scheme_management'].value_counts().plot.bar()
			
 
				+plt.show()
			
 
				+data_x['permit'].value_counts().plot.bar()
			
 
				+plt.show()
			
 
				+data_x['subvillage'].value_counts().plot.bar()
			
 
				+plt.show()
			
 
				+data_x['wpt_name'].value_counts().plot.bar()
			
 
				+plt.show()
			
 
				+'''
			
 
				+
			
 
				+''' ------------------------------------------------------------------ '''
			
 
				+''' ------------------------- PREPROCESSING -------------------------- '''
			
 
				+''' ------------------------------------------------------------------ '''
			
 
				+
			
 
				+print("\nPreprocesando datos...")
			
 
				+
			
 
				+data_x=data_x_orig.append(data_x_tst)
			
 
				+
			
 
				+
			
 
				+''' ------------------ DROP COLUMNS ------------------ '''
			
 
				+
			
 
				+print("  Borrando columnas...")
			
 
				+columns_to_drop = ['id', 'num_private', 'recorded_by', 'scheme_name']
			
 
				+data_x.drop(labels=columns_to_drop, axis=1, inplace = True)
			
 
				+data_y.drop(labels=['id'], axis=1,inplace = True)
			
 
				+
			
 
				+
			
 
				+
			
 
				+''' ------------------ MISSING VALUES ------------------ '''
			
 
				+
			
 
				+print("  Modificando valores nan...")
			
 
				+data_x['funder'] = data_x['funder'].fillna('Government Of Tanzania')
			
 
				+data_x['installer'] = data_x['installer'].fillna('DWE')
			
 
				+data_x['public_meeting'] = data_x['public_meeting'].fillna(True)
			
 
				+data_x['scheme_management'] = data_x['scheme_management'].fillna('VWC')
			
 
				+data_x['permit'] = data_x['permit'].fillna(True)
			
 
				+data_x['subvillage'] = data_x['subvillage'].fillna('Unknown')
			
 
				+data_x['wpt_name'] = data_x['wpt_name'].fillna('none')
			
 
				+
			
 
				+data_x.ix[data_x['latitude']>-0.1,'latitude']=None
			
 
				+data_x.ix[data_x['longitude']==0,'longitude']=None
			
 
				+data_x["longitude"] = data_x.groupby("region_code").transform(lambda x: x.fillna(x.median())).longitude
			
 
				+data_x["latitude"] = data_x.groupby("region_code").transform(lambda x: x.fillna(x.median())).latitude
			
 
				+
			
 
				+data_x.construction_year=pd.to_numeric(data_x.construction_year)
			
 
				+data_x.loc[data_x.construction_year <= 0, data_x.columns=='construction_year'] = 1950
			
 
				+
			
 
				+# mean() tarda mucho, pero mejora un poco los resultados con respecto a median()
			
 
				+#data_x=data_x.fillna(data_x.mean())
			
 
				+#data_x = data_x.fillna(data_x.median())
			
 
				+
			
 
				+''' ------------------ RARE VALUES ------------------ '''
			
 
				+
			
 
				+print("  Etiquetando casos raros...")
			
 
				+columns_other = [x for x in data_x.columns if x not in ['latitude','longitude','gps_height','age','population','construction_year','month_recorder']]
			
 
				+
			
 
				+for col in columns_other:
			
 
				+    value_counts = data_x[col].value_counts()
			
 
				+    lessthen = value_counts[value_counts < 20]
			
 
				+    listnow = data_x.installer.isin(list(lessthen.keys()))
			
 
				+    data_x.loc[listnow,col] = 'Others'
			
 
				+
			
 
				+
			
 
				+''' ------------------ CARTESIAN ------------------ '''
			
 
				+
			
 
				+print("  Preprocesando coordenadas y distancias...")
			
 
				+data_x['dist'] = data_x.apply(lambda row: distancia(row['longitude'], row['latitude'], 0, 0), axis=1)
			
 
				+data_x['cartesian_x'] = data_x.apply(lambda row: cartesian_x(row['longitude'], row['latitude']), axis=1)
			
 
				+data_x['cartesian_y'] = data_x.apply(lambda row: cartesian_y(row['longitude'], row['latitude']), axis=1)
			
 
				+data_x.drop(labels=['longitude', 'latitude'], axis=1, inplace = True)
			
 
				+
			
 
				+''' ------------------ DATES ------------------ '''
			
 
				+
			
 
				+print("  Preprocesando fechas...")
			
 
				+data_x = date_parser(data_x)
			
 
				+
			
 
				+
			
 
				+
			
 
				+data_x.population = data_x.population.apply(lambda x: np.log10(x+1))
			
 
				+
			
 
				+print("  Convirtiendo categóricas a numéricas...")
			
 
				+data_x = data_x.astype(str).apply(LabelEncoder().fit_transform)
			
 
				+
			
 
				+data_x_tst = data_x[len(data_x_orig):]
			
 
				+data_x = data_x[:len(data_x_orig)]
			
 
				+
			
 
				+X = data_x.values
			
 
				+y = np.ravel(data_y.values)
			
 
				+#y = le.fit(y).transform(y)
			
 
				+X_tst = data_x_tst.values
			
 
				+
			
 
				+print("Datos preprocesados con éxito.\n")
			
 
				+
			
 
				+
			
 
				+''' -------------------- CROSS VALIDATION -------------------- '''
			
 
				+
			
 
				+'''
			
 
				+print("Validación cruzada:\n")
			
 
				+
			
 
				+print('\nKNN\n')
			
 
				+knn = KNeighborsClassifier(n_neighbors=5)
			
 
				+cross_validation(clf=knn, X = X, y = y, cv = None, min_max_scaler = True)
			
 
				+
			
 
				+print('\nXGB\n')
			
 
				+clf = xgb.XGBClassifier(n_estimators = 200)
			
 
				+cross_validation(clf, X, y)
			
 
				+
			
 
				+print('\nLGB\n')
			
 
				+clf = lgb.LGBMClassifier(objective='binary', n_estimators=200, num_leaves=31)
			
 
				+cross_validation(clf, X, y)
			
 
				+
			
 
				+print('\nRandomForest\n')
			
 
				+clf = RandomForestClassifier(n_estimators=125, max_depth = 20, random_state = 10)
			
 
				+cross_validation(clf, X, y)
			
 
				+
			
 
				+print('\nExtraTreesClassifier\n')
			
 
				+clf = ExtraTreesClassifier(n_estimators = 125, max_depth = 20)
			
 
				+cross_validation(clf, X, y)
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 1 -------------------- '''
			
 
				+'''
			
 
				+clf = xgb.XGBClassifier(n_estimators = 200)
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission1.csv", index=False)
			
 
				+'''
			
 
				+''' ---------------------------------------------------- '''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 2 -------------------- '''
			
 
				+'''
			
 
				+clf = RandomForestClassifier(n_estimators = 125)
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission2.csv", index=False)
			
 
				+'''
			
 
				+''' ---------------------------------------------------- '''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 3 -------------------- '''
			
 
				+'''
			
 
				+clf = RandomForestClassifier()
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission3.csv", index=False)
			
 
				+'''
			
 
				+''' ---------------------------------------------------- '''
			
 
				+
			
 
				+
			
 
				+''' -------------------- SUBMISSION 6 -------------------- '''
			
 
				+'''
			
 
				+# Eliminated features:
			
 
				+# 'num_private', 'recorded_by', 'region', 'scheme_name', 'scheme_management'
			
 
				+
			
 
				+clf = RandomForestClassifier(max_features = 'sqrt', n_estimators = 500, random_state=10)
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission6.csv", index=False)
			
 
				+'''
			
 
				+''' ---------------------------------------------------- '''
			
 
				+
			
 
				+
			
 
				+''' -------------------- SUBMISSION 8 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 8")
			
 
				+
			
 
				+clf = RandomForestClassifier(max_features = 'sqrt', n_estimators = 200, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission9.csv", index=False)
			
 
				+'''
			
 
				+''' ---------------------------------------------------- '''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 11 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 11")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=200, max_depth = 20, random_state = 10)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission11.csv", index=False)
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 12 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 12")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=125, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission12.csv", index=False)
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 13 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 13")
			
 
				+
			
 
				+fit_rf = RandomForestClassifier(max_features = 'sqrt', max_depth=20)
			
 
				+estimators = range(25,201,25)
			
 
				+param_dist = {'n_estimators': estimators}
			
 
				+
			
 
				+clf= GridSearchCV(fit_rf, cv = 5, scoring = 'accuracy', param_grid=param_dist, n_jobs = 3)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission13.csv", index=False)
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 15 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 15")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=125, max_depth = 22)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission15.csv", index=False)
			
 
				+'''
			
 
				+''' -------------------- SUBMISSION 16 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 16")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=500)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission16.csv", index=False)
			
 
				+
			
 
				+# Nota: este experimento empeora los resultados, posible sobreentrenamiento
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 17 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 17")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=120, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission17.csv", index=False)
			
 
				+
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 18 -------------------- '''
			
 
				+'''
			
 
				+# fillnan() with more repeated
			
 
				+print("Submission 18")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=160, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission18.csv", index=False)
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 19 -------------------- '''
			
 
				+'''
			
 
				+# fillnan() with more repeated
			
 
				+print("Submission 19")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=150, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission19.csv", index=False)
			
 
				+'''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 22 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 22")
			
 
				+
			
 
				+fit_rf = RandomForestClassifier(max_features = 'sqrt', max_depth=20)
			
 
				+estimators = range(25,201,25)
			
 
				+param_dist = {'n_estimators': estimators}
			
 
				+
			
 
				+clf= GridSearchCV(fit_rf, cv = 5, scoring = 'accuracy', param_grid=param_dist, n_jobs = 3)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission22.csv", index=False)
			
 
				+
			
 
				+best_param = clf.best_params_['n_estimators']
			
 
				+print ("Mejor valor para n_estimators: ", best_param)
			
 
				+'''
			
 
				+''' -------------------- SUBMISSION 23 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 23")
			
 
				+
			
 
				+fit_rf = RandomForestClassifier(max_features = 'sqrt', max_depth=25)
			
 
				+estimators = range(100,1101,25)
			
 
				+param_dist = {'n_estimators': estimators}
			
 
				+
			
 
				+clf= GridSearchCV(fit_rf, cv = 5, scoring = 'accuracy', param_grid=param_dist, n_jobs = 3)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission23.csv", index=False)
			
 
				+
			
 
				+best_param = clf.best_params_['n_estimators']
			
 
				+print ("Mejor valor para n_estimators: ", best_param)
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+''' -------------------- SUBMISSION 24 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 24")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=100, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission24.csv", index=False)
			
 
				+
			
 
				+'''
			
 
				+''' -------------------- SUBMISSION 25 -------------------- '''
			
 
				+'''
			
 
				+print("Submission 25")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators=150, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission25.csv", index=False)
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+''' ------------------- FINAL SUBMISSION ------------------ '''
			
 
				+
			
 
				+''' -------------------- SUBMISSION 26 -------------------- '''
			
 
				+
			
 
				+print("Submission 26")
			
 
				+
			
 
				+clf = RandomForestClassifier(n_estimators = 125, max_depth = 20)
			
 
				+
			
 
				+clf = clf.fit(X,y)
			
 
				+
			
 
				+y_pred_tst = clf.predict(X_tst)
			
 
				+
			
 
				+df_submission = pd.read_csv('data/water_pump_submissionformat.csv')
			
 
				+df_submission['status_group'] = y_pred_tst
			
 
				+df_submission.to_csv("submission26.csv", index=False)
			
--- a/data/purposeCombined/Directory/IOTA2Directory.py
+++ b/data/purposeCombined/Directory/IOTA2Directory.py
@@ -0,0 +1,98 @@
 
				+#!/usr/bin/python
			
 
				+#-*- coding: utf-8 -*-
			
 
				+
			
 
				+# =========================================================================
			
 
				+#   Program:   iota2
			
 
				+#
			
 
				+#   Copyright (c) CESBIO. All rights reserved.
			
 
				+#
			
 
				+#   See LICENSE for details.
			
 
				+#
			
 
				+#   This software is distributed WITHOUT ANY WARRANTY; without even
			
 
				+#   the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
			
 
				+#   PURPOSE.  See the above copyright notices for more information.
			
 
				+#
			
 
				+# =========================================================================
			
 
				+
			
 
				+
			
 
				+import os
			
 
				+import shutil
			
 
				+from Common import ServiceConfigFile as SCF
			
 
				+
			
 
				+
			
 
				+def GenerateDirectories(cfg):
			
 
				+    """
			
 
				+    generate IOTA2 output directories
			
 
				+    """
			
 
				+    if not isinstance(cfg, SCF.serviceConfigFile):
			
 
				+        cfg = SCF.serviceConfigFile(cfg)
			
 
				+
			
 
				+    root = cfg.getParam('chain', 'outputPath')
			
 
				+    rm_PathTEST = cfg.getParam("chain", "remove_outputPath")
			
 
				+    start_step = cfg.getParam("chain", "firstStep")
			
 
				+
			
 
				+    if os.path.exists(root) and root != "/" and rm_PathTEST and start_step == "init":
			
 
				+        shutil.rmtree(root,ignore_errors=False)
			
 
				+    os.mkdir(root)
			
 
				+    if os.path.exists(root+"/logs"):
			
 
				+        shutil.rmtree(root+"/logs")
			
 
				+    os.mkdir(root+"/logs")
			
 
				+    if os.path.exists(root+"/samplesSelection"):
			
 
				+        shutil.rmtree(root+"/samplesSelection")
			
 
				+    os.mkdir(root+"/samplesSelection")
			
 
				+    if os.path.exists(root+"/model"):
			
 
				+        shutil.rmtree(root+"/model")
			
 
				+    os.mkdir(root+"/model")
			
 
				+    if os.path.exists(root+"/formattingVectors"):
			
 
				+        shutil.rmtree(root+"/formattingVectors")
			
 
				+    os.mkdir(root+"/formattingVectors")
			
 
				+    if os.path.exists(root+"/config_model"):
			
 
				+        shutil.rmtree(root+"/config_model")
			
 
				+    os.mkdir(root+"/config_model")
			
 
				+    if os.path.exists(root+"/envelope"):
			
 
				+        shutil.rmtree(root+"/envelope")
			
 
				+    os.mkdir(root+"/envelope")
			
 
				+    if os.path.exists(root+"/classif"):
			
 
				+        shutil.rmtree(root+"/classif")
			
 
				+    os.mkdir(root+"/classif")
			
 
				+    if os.path.exists(root+"/shapeRegion"):
			
 
				+        shutil.rmtree(root+"/shapeRegion")
			
 
				+    os.mkdir(root+"/shapeRegion")
			
 
				+    if os.path.exists(root+"/final"):
			
 
				+        shutil.rmtree(root+"/final")
			
 
				+    os.mkdir(root+"/final")
			
 
				+    os.mkdir(root+"/final/simplification")
			
 
				+    os.mkdir(root+"/final/simplification/tiles")
			
 
				+    os.mkdir(root+"/final/simplification/vectors")    
			
 
				+    os.mkdir(root+"/final/simplification/tmp")
			
 
				+    if os.path.exists(root+"/features"):
			
 
				+        shutil.rmtree(root+"/features")
			
 
				+    os.mkdir(root+"/features")
			
 
				+    if os.path.exists(root+"/dataRegion"):
			
 
				+        shutil.rmtree(root+"/dataRegion")
			
 
				+    os.mkdir(root+"/dataRegion")
			
 
				+    if os.path.exists(root+"/learningSamples"):
			
 
				+        shutil.rmtree(root+"/learningSamples")
			
 
				+    os.mkdir(root+"/learningSamples")
			
 
				+    if os.path.exists(root+"/dataAppVal"):
			
 
				+        shutil.rmtree(root+"/dataAppVal")
			
 
				+    os.mkdir(root+"/dataAppVal")
			
 
				+    if os.path.exists(root+"/stats"):
			
 
				+        shutil.rmtree(root+"/stats")
			
 
				+    os.mkdir(root+"/stats")
			
 
				+    
			
 
				+    if os.path.exists(root+"/cmd"):
			
 
				+        shutil.rmtree(root+"/cmd")
			
 
				+    os.mkdir(root+"/cmd")
			
 
				+    os.mkdir(root+"/cmd/stats")
			
 
				+    os.mkdir(root+"/cmd/train")
			
 
				+    os.mkdir(root+"/cmd/cla")
			
 
				+    os.mkdir(root+"/cmd/confusion")
			
 
				+    os.mkdir(root+"/cmd/features")
			
 
				+    os.mkdir(root+"/cmd/fusion")
			
 
				+    os.mkdir(root+"/cmd/splitShape")
			
 
				+
			
 
				+    merge_final_classifications = cfg.getParam('chain', 'merge_final_classifications')
			
 
				+    if merge_final_classifications:
			
 
				+        if os.path.exists(root+"/final/merge_final_classifications"):
			
 
				+            shutil.rmtree(root+"/final/merge_final_classifications")
			
--- a/data/purposeCombined/Directory/advance_touch.py
+++ b/data/purposeCombined/Directory/advance_touch.py
@@ -0,0 +1,31 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Libraries
			
 
				+import os
			
 
				+import click
			
 
				+
			
 
				+@click.command()
			
 
				+@click.argument('paths', nargs=-1)
			
 
				+@click.option('-cd/--change', is_flag=True, default=False, help='After creating the directories, change to the new deeper directory.')
			
 
				+def advance_touch(paths, cd):
			
 
				+    """ Make folders and files """
			
 
				+    for path in paths:
			
 
				+        # Make folders
			
 
				+        new_dirs = '/'.join(path.split('/')[0:-1])
			
 
				+        if not os.path.exists(new_dirs) and new_dirs != '':
			
 
				+            os.makedirs(new_dirs)
			
 
				+        # Change directory
			
 
				+        if cd:
			
 
				+            cd_path = os.path.join(os.getcwd(), new_dirs) + '/'
			
 
				+            os.chdir(cd_path)
			
 
				+
			
 
				+        # Make file
			
 
				+        if not path.endswith('/') and not os.path.isfile(path):
			
 
				+            try:
			
 
				+                open(path, 'w+').close()
			
 
				+            except IsADirectoryError:
			
 
				+                pass
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    advance_touch()
			
--- a/data/purposeCombined/Directory/augmentation_main.py
+++ b/data/purposeCombined/Directory/augmentation_main.py
@@ -0,0 +1,213 @@
 
				+from __future__ import print_function, unicode_literals
			
 
				+import os
			
 
				+from twisted.python import filepath
			
 
				+from twisted.trial import unittest
			
 
				+from .. import database
			
 
				+from ..database import (CHANNELDB_TARGET_VERSION, USAGEDB_TARGET_VERSION,
			
 
				+                        _get_db, dump_db, DBError)
			
 
				+
			
 
				+class Get(unittest.TestCase):
			
 
				+    def test_create_default(self):
			
 
				+        db_url = ":memory:"
			
 
				+        db = _get_db(db_url, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+        rows = db.execute("SELECT * FROM version").fetchall()
			
 
				+        self.assertEqual(len(rows), 1)
			
 
				+        self.assertEqual(rows[0]["version"], CHANNELDB_TARGET_VERSION)
			
 
				+
			
 
				+    def test_open_existing_file(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "normal.db")
			
 
				+        db = _get_db(fn, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+        rows = db.execute("SELECT * FROM version").fetchall()
			
 
				+        self.assertEqual(len(rows), 1)
			
 
				+        self.assertEqual(rows[0]["version"], CHANNELDB_TARGET_VERSION)
			
 
				+        db2 = _get_db(fn, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+        rows = db2.execute("SELECT * FROM version").fetchall()
			
 
				+        self.assertEqual(len(rows), 1)
			
 
				+        self.assertEqual(rows[0]["version"], CHANNELDB_TARGET_VERSION)
			
 
				+
			
 
				+    def test_open_bad_version(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "old.db")
			
 
				+        db = _get_db(fn, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+        db.execute("UPDATE version SET version=999")
			
 
				+        db.commit()
			
 
				+
			
 
				+        with self.assertRaises(DBError) as e:
			
 
				+            _get_db(fn, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+        self.assertIn("Unable to handle db version 999", str(e.exception))
			
 
				+
			
 
				+    def test_open_corrupt(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "corrupt.db")
			
 
				+        with open(fn, "wb") as f:
			
 
				+            f.write(b"I am not a database")
			
 
				+        with self.assertRaises(DBError) as e:
			
 
				+            _get_db(fn, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+        self.assertIn("not a database", str(e.exception))
			
 
				+
			
 
				+    def test_failed_create_allows_subsequent_create(self):
			
 
				+        patch = self.patch(database, "get_schema", lambda version: b"this is a broken schema")
			
 
				+        dbfile = filepath.FilePath(self.mktemp())
			
 
				+        self.assertRaises(Exception, lambda: _get_db(dbfile.path))
			
 
				+        patch.restore()
			
 
				+        _get_db(dbfile.path, "channel", CHANNELDB_TARGET_VERSION)
			
 
				+
			
 
				+    def test_upgrade(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "upgrade.db")
			
 
				+        self.assertNotEqual(USAGEDB_TARGET_VERSION, 1)
			
 
				+
			
 
				+        # create an old-version DB in a file
			
 
				+        db = _get_db(fn, "usage", 1)
			
 
				+        rows = db.execute("SELECT * FROM version").fetchall()
			
 
				+        self.assertEqual(len(rows), 1)
			
 
				+        self.assertEqual(rows[0]["version"], 1)
			
 
				+        del db
			
 
				+
			
 
				+        # then upgrade the file to the latest version
			
 
				+        dbA = _get_db(fn, "usage", USAGEDB_TARGET_VERSION)
			
 
				+        rows = dbA.execute("SELECT * FROM version").fetchall()
			
 
				+        self.assertEqual(len(rows), 1)
			
 
				+        self.assertEqual(rows[0]["version"], USAGEDB_TARGET_VERSION)
			
 
				+        dbA_text = dump_db(dbA)
			
 
				+        del dbA
			
 
				+
			
 
				+        # make sure the upgrades got committed to disk
			
 
				+        dbB = _get_db(fn, "usage", USAGEDB_TARGET_VERSION)
			
 
				+        dbB_text = dump_db(dbB)
			
 
				+        del dbB
			
 
				+        self.assertEqual(dbA_text, dbB_text)
			
 
				+
			
 
				+        # The upgraded schema should be equivalent to that of a new DB.
			
 
				+        latest_db = _get_db(":memory:", "usage", USAGEDB_TARGET_VERSION)
			
 
				+        latest_text = dump_db(latest_db)
			
 
				+        with open("up.sql","w") as f: f.write(dbA_text)
			
 
				+        with open("new.sql","w") as f: f.write(latest_text)
			
 
				+        # debug with "diff -u _trial_temp/up.sql _trial_temp/new.sql"
			
 
				+        self.assertEqual(dbA_text, latest_text)
			
 
				+
			
 
				+    def test_upgrade_fails(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "upgrade.db")
			
 
				+        self.assertNotEqual(USAGEDB_TARGET_VERSION, 1)
			
 
				+
			
 
				+        # create an old-version DB in a file
			
 
				+        db = _get_db(fn, "usage", 1)
			
 
				+        rows = db.execute("SELECT * FROM version").fetchall()
			
 
				+        self.assertEqual(len(rows), 1)
			
 
				+        self.assertEqual(rows[0]["version"], 1)
			
 
				+        del db
			
 
				+
			
 
				+        # then upgrade the file to a too-new version, for which we have no
			
 
				+        # upgrader
			
 
				+        with self.assertRaises(DBError):
			
 
				+            _get_db(fn, "usage", USAGEDB_TARGET_VERSION+1)
			
 
				+
			
 
				+class CreateChannel(unittest.TestCase):
			
 
				+    def test_memory(self):
			
 
				+        db = database.create_channel_db(":memory:")
			
 
				+        latest_text = dump_db(db)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+
			
 
				+    def test_preexisting(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "preexisting.db")
			
 
				+        with open(fn, "w"):
			
 
				+            pass
			
 
				+        with self.assertRaises(database.DBAlreadyExists):
			
 
				+            database.create_channel_db(fn)
			
 
				+
			
 
				+    def test_create(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        db = database.create_channel_db(fn)
			
 
				+        latest_text = dump_db(db)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+
			
 
				+    def test_create_or_upgrade(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        db = database.create_or_upgrade_channel_db(fn)
			
 
				+        latest_text = dump_db(db)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+
			
 
				+class CreateUsage(unittest.TestCase):
			
 
				+    def test_memory(self):
			
 
				+        db = database.create_usage_db(":memory:")
			
 
				+        latest_text = dump_db(db)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+
			
 
				+    def test_preexisting(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "preexisting.db")
			
 
				+        with open(fn, "w"):
			
 
				+            pass
			
 
				+        with self.assertRaises(database.DBAlreadyExists):
			
 
				+            database.create_usage_db(fn)
			
 
				+
			
 
				+    def test_create(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        db = database.create_usage_db(fn)
			
 
				+        latest_text = dump_db(db)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+
			
 
				+    def test_create_or_upgrade(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        db = database.create_or_upgrade_usage_db(fn)
			
 
				+        latest_text = dump_db(db)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+
			
 
				+    def test_create_or_upgrade_disabled(self):
			
 
				+        db = database.create_or_upgrade_usage_db(None)
			
 
				+        self.assertIs(db, None)
			
 
				+
			
 
				+class OpenChannel(unittest.TestCase):
			
 
				+    def test_open(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        db1 = database.create_channel_db(fn)
			
 
				+        latest_text = dump_db(db1)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+        db2 = database.open_existing_db(fn)
			
 
				+        self.assertIn("CREATE TABLE", dump_db(db2))
			
 
				+
			
 
				+    def test_doesnt_exist(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        with self.assertRaises(database.DBDoesntExist):
			
 
				+            database.open_existing_db(fn)
			
 
				+
			
 
				+class OpenUsage(unittest.TestCase):
			
 
				+    def test_open(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        db1 = database.create_usage_db(fn)
			
 
				+        latest_text = dump_db(db1)
			
 
				+        self.assertIn("CREATE TABLE", latest_text)
			
 
				+        db2 = database.open_existing_db(fn)
			
 
				+        self.assertIn("CREATE TABLE", dump_db(db2))
			
 
				+
			
 
				+    def test_doesnt_exist(self):
			
 
				+        basedir = self.mktemp()
			
 
				+        os.mkdir(basedir)
			
 
				+        fn = os.path.join(basedir, "created.db")
			
 
				+        with self.assertRaises(database.DBDoesntExist):
			
 
				+            database.open_existing_db(fn)
			
 
				+
			
--- a/data/purposeCombined/Directory/conftest.py
+++ b/data/purposeCombined/Directory/conftest.py
@@ -0,0 +1,92 @@
 
				+import os
			
 
				+import shutil
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+
			
 
				+def create_file(path: str, content: str):
			
 
				+    """Create txt file with specific content"""
			
 
				+    with open(f"{path}", "w") as file:
			
 
				+        file.write(content)
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_files():
			
 
				+    """Create files with equal or non-equal content"""
			
 
				+    create_file("tests/file1.txt", "hello, world")
			
 
				+    create_file("tests/file2.txt", "hello, world!")
			
 
				+    create_file("tests/file3.txt", "hello, world")
			
 
				+    yield
			
 
				+    os.remove("tests/file1.txt")
			
 
				+    os.remove("tests/file2.txt")
			
 
				+    os.remove("tests/file3.txt")
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_dirs_and_files():
			
 
				+    os.makedirs("tests/dir1/dir2")
			
 
				+    os.makedirs("tests/dir3/dir4")
			
 
				+    create_file("tests/dir1/file1.txt", "aaa")
			
 
				+    create_file("tests/dir3/file2.txt", "bbb")
			
 
				+    yield
			
 
				+    shutil.rmtree("tests/dir1")
			
 
				+    shutil.rmtree("tests/dir3")
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_nested_dirs_and_files_first_case():
			
 
				+    """Create common case for synch function"""
			
 
				+    os.makedirs("tests/source/dir1")
			
 
				+    os.mkdir("tests/source/dir2")
			
 
				+    os.mkdir("tests/source/dir3")
			
 
				+    create_file("tests/source/dir1/file1.txt", "abacaba")
			
 
				+    os.makedirs("tests/replica/dir1")
			
 
				+    os.mkdir("tests/replica/dir4")
			
 
				+    yield
			
 
				+    shutil.rmtree("tests/source")
			
 
				+    shutil.rmtree("tests/replica")
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_nested_dirs_and_files_second_case():
			
 
				+    """Create common case for synch function"""
			
 
				+    os.makedirs("tests/source/dir1/dir2")
			
 
				+    create_file("tests/source/dir1/dir2/file1.txt", "hello")
			
 
				+    os.makedirs("tests/replica/dir1")
			
 
				+    os.mkdir("tests/replica/dir4")
			
 
				+    create_file("tests/replica/dir4/file2.txt", "hello")
			
 
				+    yield
			
 
				+    shutil.rmtree("tests/source")
			
 
				+    shutil.rmtree("tests/replica")
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_two_different_files():
			
 
				+    """Create two different files"""
			
 
				+    os.mkdir("tests/source")
			
 
				+    os.mkdir("tests/replica")
			
 
				+    create_file("tests/source/file1.txt", "aaa")
			
 
				+    create_file("tests/replica/file1.txt", "bbb")
			
 
				+    yield
			
 
				+    shutil.rmtree("tests/source")
			
 
				+    shutil.rmtree("tests/replica")
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_empty_source_dir():
			
 
				+    """Create empty source dir and non-empty replic's one"""
			
 
				+    os.mkdir("tests/source")
			
 
				+    os.makedirs("tests/replica/dir1")
			
 
				+    yield
			
 
				+    shutil.rmtree("tests/source")
			
 
				+    shutil.rmtree("tests/replica")
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def create_empty_replica_dir():
			
 
				+    """Create empty replica dir and non-empty source one"""
			
 
				+    os.makedirs("tests/source/dir1/dir2")
			
 
				+    os.mkdir("tests/replica")
			
 
				+    yield
			
 
				+    shutil.rmtree("tests/source")
			
 
				+    shutil.rmtree("tests/replica")
			
--- a/data/purposeCombined/Directory/data_preprocessing_utils.py
+++ b/data/purposeCombined/Directory/data_preprocessing_utils.py
@@ -0,0 +1,394 @@
 
				+# Customary Imports:
			
 
				+import tensorflow as tf
			
 
				+assert '2.' in tf.__version__  # make sure you're using tf 2.0
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+import pandas as pd
			
 
				+import sklearn
			
 
				+import skimage
			
 
				+import cv2 as cv
			
 
				+import os
			
 
				+import datetime
			
 
				+import scipy
			
 
				+from skimage.morphology import reconstruction
			
 
				+from skimage import exposure
			
 
				+import scipy.io as sio
			
 
				+import h5py
			
 
				+import random
			
 
				+import shutil
			
 
				+import PIL
			
 
				+import imageio
			
 
				+import pydot 
			
 
				+import graphviz
			
 
				+import plotly.graph_objects as go
			
 
				+import preprocess_crop
			
 
				+from pathlib import Path
			
 
				+from tensorflow.keras import backend as K
			
 
				+from PIL import Image
			
 
				+from keras.preprocessing.image import ImageDataGenerator
			
 
				+from tensorflow.keras.layers import Dense, Flatten, Conv2D
			
 
				+from tensorflow.keras import Model
			
 
				+#from keras.utils import CustomObjectScope
			
 
				+from mpl_toolkits.mplot3d import Axes3D
			
 
				+import data_preprocessing_utils
			
 
				+##################################################################################################################################
			
 
				+'''
			
 
				+DATA PREPROCESSING UTILS:
			
 
				+'''
			
 
				+##################################################################################################################################
			
 
				+# Converting MAP Files:
			
 
				+def convert_MAP(directory, output_directory, min_shape, file_format = '.npy', search_keys = None, dtype = np.float32):
			
 
				+    '''
			
 
				+    This program loops through given raw_data directory
			
 
				+    and converts .mat files to .npy files
			
 
				+    '''
			
 
				+    new_dir = os.path.join(os.getcwd(), output_directory)
			
 
				+    if not os.path.exists(new_dir):
			
 
				+        os.mkdir(new_dir)
			
 
				+    else:
			
 
				+        shutil.rmtree(new_dir)
			
 
				+        os.mkdir(new_dir)
			
 
				+    for file in os.listdir(directory):
			
 
				+        filename = os.fsdecode(file)
			
 
				+        if filename.endswith(".mat"): 
			
 
				+            #print(os.path.join(directory, filename))
			
 
				+            filepath = os.path.join(directory, filename)
			
 
				+            array_dict = {}
			
 
				+            try:
			
 
				+                f = h5py.File(filepath, 'r')
			
 
				+            except:
			
 
				+                f = sio.loadmat(filepath)
			
 
				+            for k, v in f.items():
			
 
				+                array_dict[k] = np.array(v, dtype = np.float32)
			
 
				+            # As we only need image info from dict (the last key) we do this
			
 
				+            if search_keys == None:
			
 
				+                search_keys = 'map' # out of struct of .mat files want "map"
			
 
				+                filtered_dict = dict(filter(lambda item: search_keys in item[0], array_dict.items()))
			
 
				+            else:
			
 
				+                filtered_dict = {}
			
 
				+                for i in range(len(search_keys)):
			
 
				+                    search_key = search_keys[i]
			
 
				+                    if search_key in array_dict:
			
 
				+                        filtered_dict[search_key] = array_dict[search_key]
			
 
				+            if len(filtered_dict) == 0:
			
 
				+                print('No Data to Meet Search Key Requirements: Datapoint Rejected -> ' + filepath)
			
 
				+            else:
			
 
				+                #print(list(array_dict.keys()))
			
 
				+                #print(filtered_dict)
			
 
				+                arrays = []
			
 
				+                for k, v in filtered_dict.items():
			
 
				+                    temp = np.transpose(v.astype(np.float32))
			
 
				+                    # To normalize data between [-1,1], use -> arrays = arrays/(np.max(arrays)/2) - 1
			
 
				+                    # To normalize data between [0,1], use -> arrays = arrays/(np.max(arrays))
			
 
				+                    # To normalize data between [0,255], 
			
 
				+                    #     use -> arrays = (arrays/(np.max(arrays))*255).astype(np.uint8)
			
 
				+                    temp = temp/(np.max(temp))
			
 
				+                    arrays.append(temp)
			
 
				+                for i in range(len(arrays)):
			
 
				+                    if len(arrays[i].shape) > 2:
			
 
				+                        #print(arrays[i].shape)
			
 
				+                        arrays[i] = np.mean(arrays[i], axis = 2)
			
 
				+
			
 
				+                for i in range(len(arrays)):
			
 
				+                    new_dir_filepath = os.path.join(new_dir, filename.strip('.mat') 
			
 
				+                                                    + '_index'+str(i) + file_format)
			
 
				+                    array = arrays[i]
			
 
				+                    if array.shape[0] >= min_shape[0] and array.shape[1] >= min_shape[1]:
			
 
				+                        if file_format == '.npy':
			
 
				+                            np.save(new_dir_filepath, array, allow_pickle=True, fix_imports=True)
			
 
				+                        else:
			
 
				+                            imageio.imwrite(new_dir_filepath, array)
			
 
				+                    elif i == 0:
			
 
				+                        print('Min Size Not Met: Datapoint Rejected -> ' + filepath)
			
 
				+    return os.path.join(os.getcwd(), output_directory)
			
 
				+
			
 
				+##################################################################################################################################
			
 
				+# Data Cleaning Procedures:
			
 
				+def data_clean_func(image = None):
			
 
				+    if image is not None:
			
 
				+        #print(len(np.unique(image)))
			
 
				+        #clean_image = image
			
 
				+        '''
			
 
				+        plt.hist(image)
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        '''
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.title('Original Image')
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        threshold = 0.85
			
 
				+        default_fill = 0.0
			
 
				+        frac_of_high_clip = 1/9
			
 
				+        image[image > threshold] = default_fill
			
 
				+        image[image < frac_of_high_clip*(1.0-threshold)] = default_fill
			
 
				+        '''
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.title('After Clipping')
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        image = scipy.ndimage.median_filter(image, size=(4, 4))
			
 
				+        '''
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.title('After Median Filter')
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        image = skimage.filters.gaussian(image, sigma=0.01, output=None, mode='reflect', preserve_range=True)
			
 
				+        ####################################################################
			
 
				+        # Added to ensure negligible loss when converting to int16 
			
 
				+        # within exposure.equalize_adapthist
			
 
				+        image = (image/np.max(image)*(2**16)).astype(np.uint16)
			
 
				+        # A "Monkey Patch" could possibly be used as a cleaner solution, 
			
 
				+        # but would be more involved than is necessary for my application
			
 
				+        ####################################################################
			
 
				+        image = exposure.equalize_adapthist(image,kernel_size=image.shape[0]//8, clip_limit=0.005, nbins=2**13)
			
 
				+        image = image.astype(np.float64)
			
 
				+        '''
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.title('After Local Adapt Hist')
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        image = scipy.ndimage.median_filter(image, size=(3, 1))
			
 
				+        image = scipy.ndimage.median_filter(image, size=(1, 3))
			
 
				+        image = skimage.filters.gaussian(image, sigma=0.1, output=None, mode='reflect', preserve_range=True)
			
 
				+        image = exposure.rescale_intensity(image, in_range='image', out_range=(0.0,1.0))
			
 
				+        '''
			
 
				+        plt.imshow(image, cmap='gray')
			
 
				+        plt.title('Final Image')
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        '''
			
 
				+        plt.hist(image)
			
 
				+        plt.show()
			
 
				+        '''
			
 
				+        clean_image = image.astype(np.float32)
			
 
				+    else:
			
 
				+        clean_image = image
			
 
				+    return clean_image
			
 
				+
			
 
				+def data_cleaning(input_dir = 'converted_data', output_dir_name = 'cleaned_data',
			
 
				+                  output_file_format ='.npy', delete_previous = True):
			
 
				+    '''
			
 
				+     This program seeks to remove some noise from the data
			
 
				+     and make the underlying vessel structure more prominent
			
 
				+     Input: input_dir -> directory that holds data to be cleaned
			
 
				+            output_dir -> directory to hold cleaned data
			
 
				+     Output: None
			
 
				+    '''
			
 
				+    file_list = os.listdir(input_dir)
			
 
				+    clean_dir = os.path.join(os.getcwd(), output_dir_name)
			
 
				+    if not os.path.exists(clean_dir):
			
 
				+        os.mkdir(clean_dir)
			
 
				+    elif delete_previous == True:
			
 
				+        shutil.rmtree(clean_dir)
			
 
				+        os.mkdir(clean_dir)
			
 
				+    for file in file_list:
			
 
				+        filename = os.fsdecode(file)
			
 
				+        filepath = os.path.join(input_dir, filename)
			
 
				+        if filepath.endswith('.npy'):
			
 
				+            array = np.load(filepath)
			
 
				+        else:
			
 
				+            array = imageio.imread(filepath)
			
 
				+            
			
 
				+        # Defined data clean function above:
			
 
				+        array = data_preprocessing_utils.data_clean_func(array)
			
 
				+    
			
 
				+        new_filepath = os.path.join(clean_dir, filename)
			
 
				+        if output_file_format == '.npy':
			
 
				+            new_filepath = Path(new_filepath)
			
 
				+            new_filepath = new_filepath.with_suffix('')
			
 
				+            new_filepath = new_filepath.with_suffix(output_file_format)
			
 
				+            np.save(new_filepath, array, allow_pickle=True, fix_imports=True)
			
 
				+        else:
			
 
				+            new_filepath = Path(new_filepath)
			
 
				+            new_filepath = new_filepath.with_suffix('')
			
 
				+            new_filepath = new_filepath.with_suffix(output_file_format)
			
 
				+            imageio.imwrite(new_filepath, array)
			
 
				+    return  
			
 
				+
			
 
				+    
			
 
				+##################################################################################################################################
			
 
				+# Data Seperation / Validation Split Procedures:
			
 
				+def data_seperation(input_dir, dataset_percentages, 
			
 
				+                    delete_previous = False, file_format = '.npy', 
			
 
				+                    scale = 1):
			
 
				+    '''
			
 
				+    Takes numpy array and creates data folder with seperate sections
			
 
				+    for training, validation, and testing according to given percentages
			
 
				+    Input: numpy dir -> contains file path to data folder of numpy files
			
 
				+           dataset_percentages -> (% train, % test) such that % train + % test = 100
			
 
				+           OR
			
 
				+           dataset_percentages -> (% train, % val, % test) such that % train + % val + % test = 100
			
 
				+    Output: new folders for training and testing or training/validation/testing
			
 
				+    '''
			
 
				+    
			
 
				+    # If just train and test
			
 
				+    if len(dataset_percentages) == 2:
			
 
				+        # Making Main data folder
			
 
				+        new_dir = os.path.join(os.getcwd(), 'data')
			
 
				+        if not os.path.exists(new_dir):
			
 
				+            os.mkdir(new_dir)
			
 
				+        
			
 
				+        # Making train subfolder
			
 
				+        train_dir = os.path.join(new_dir, 'train')
			
 
				+        if not os.path.exists(train_dir):
			
 
				+            os.mkdir(train_dir)
			
 
				+            train_dir = os.path.join(train_dir, 'input')
			
 
				+            os.mkdir(train_dir)
			
 
				+        elif delete_previous == True:
			
 
				+            shutil.rmtree(train_dir)
			
 
				+            os.mkdir(train_dir)
			
 
				+            train_dir = os.path.join(train_dir, 'input')
			
 
				+            os.mkdir(train_dir)
			
 
				+        
			
 
				+        # Making test subfolder
			
 
				+        test_dir = os.path.join(new_dir, 'test')
			
 
				+        if not os.path.exists(test_dir):
			
 
				+            os.mkdir(test_dir)
			
 
				+            test_dir = os.path.join(test_dir, 'input')
			
 
				+            os.mkdir(test_dir)
			
 
				+        elif delete_previous == True:
			
 
				+            shutil.rmtree(test_dir)
			
 
				+            os.mkdir(test_dir)
			
 
				+            test_dir = os.path.join(test_dir, 'input')
			
 
				+            os.mkdir(test_dir)
			
 
				+
			
 
				+
			
 
				+        file_list = os.listdir(input_dir)
			
 
				+        total_num_imgs = len(file_list)
			
 
				+        train_percent = dataset_percentages[0]
			
 
				+        test_percent = dataset_percentages[1]
			
 
				+        valid_inputs = (train_percent >= test_percent and train_percent <= 100 and
			
 
				+                        test_percent <= 100 and train_percent > 0 and test_percent > 0 and
			
 
				+                        train_percent + test_percent == 100)
			
 
				+        if valid_inputs:
			
 
				+            num_train = int(round(total_num_imgs * train_percent//100))
			
 
				+        else:
			
 
				+            num_train = int(round(total_num_imgs * 0.9))
			
 
				+            print('ERROR: Please input valid percentages for dataset division')
			
 
				+            print('In place of valid input the ratio 90% train, 10% test was used')
			
 
				+        
			
 
				+        index = 0
			
 
				+        random.shuffle(file_list)
			
 
				+        for file in file_list:
			
 
				+            filename = os.fsdecode(file)
			
 
				+            filepath = os.path.join(input_dir, filename)
			
 
				+            # Loads File
			
 
				+            if filepath.endswith('.npy'):
			
 
				+                array = np.load(filepath)
			
 
				+                array = array/np.max(array)*scale
			
 
				+            else:
			
 
				+                array = imageio.imread(filepath)
			
 
				+                array = array/np.max(array)*scale
			
 
				+            if index < num_train:
			
 
				+                new_filepath = os.path.join(train_dir, filename)
			
 
				+            else:
			
 
				+                new_filepath = os.path.join(test_dir, filename)
			
 
				+            # Saves File
			
 
				+            if file_format == '.npy':
			
 
				+                new_filepath = Path(new_filepath)
			
 
				+                new_filepath = new_filepath.with_suffix('')
			
 
				+                new_filepath = new_filepath.with_suffix(file_format)
			
 
				+                np.save(new_filepath, array, allow_pickle=True, fix_imports=True)
			
 
				+            else:
			
 
				+                new_filepath = Path(new_filepath)
			
 
				+                new_filepath = new_filepath.with_suffix('')
			
 
				+                new_filepath = new_filepath.with_suffix(file_format)
			
 
				+                imageio.imwrite(new_filepath, array)
			
 
				+            index += 1
			
 
				+        return train_dir, test_dir
			
 
				+    # If train, val, and test
			
 
				+    elif len(dataset_percentages) == 3:
			
 
				+        # Making Main data folder
			
 
				+        new_dir = os.path.join(os.getcwd(), 'data')
			
 
				+        if not os.path.exists(new_dir):
			
 
				+            os.mkdir(new_dir)
			
 
				+            
			
 
				+        # Making train subfolder
			
 
				+        train_dir = os.path.join(new_dir, 'train')
			
 
				+        if not os.path.exists(train_dir):
			
 
				+            os.mkdir(train_dir)
			
 
				+            train_dir = os.path.join(train_dir, 'input')
			
 
				+            os.mkdir(train_dir)
			
 
				+        elif delete_previous == True:
			
 
				+            shutil.rmtree(train_dir)
			
 
				+            os.mkdir(train_dir)
			
 
				+            train_dir = os.path.join(train_dir, 'input')
			
 
				+            os.mkdir(train_dir)
			
 
				+        
			
 
				+        # Making val subfolder
			
 
				+        val_dir = os.path.join(new_dir, 'val')
			
 
				+        if not os.path.exists(val_dir):
			
 
				+            os.mkdir(val_dir)
			
 
				+            val_dir = os.path.join(val_dir, 'input')
			
 
				+            os.mkdir(val_dir)
			
 
				+        elif delete_previous == True:
			
 
				+            shutil.rmtree(val_dir)
			
 
				+            os.mkdir(val_dir)
			
 
				+            val_dir = os.path.join(val_dir, 'input')
			
 
				+            os.mkdir(val_dir)
			
 
				+        
			
 
				+        # Making test subfolder
			
 
				+        test_dir = os.path.join(new_dir, 'test')
			
 
				+        if not os.path.exists(test_dir):
			
 
				+            os.mkdir(test_dir)
			
 
				+            test_dir = os.path.join(test_dir, 'input')
			
 
				+            os.mkdir(test_dir)
			
 
				+        elif delete_previous == True:
			
 
				+            shutil.rmtree(test_dir)
			
 
				+            os.mkdir(test_dir)
			
 
				+            test_dir = os.path.join(test_dir, 'input')
			
 
				+            os.mkdir(test_dir)
			
 
				+            
			
 
				+        file_list = os.listdir(input_dir)
			
 
				+        total_num_imgs = len(file_list)
			
 
				+        train_percent = dataset_percentages[0]
			
 
				+        val_percent = dataset_percentages[1]
			
 
				+        test_percent = dataset_percentages[2]
			
 
				+        valid_inputs = (train_percent >= test_percent and train_percent >= val_percent 
			
 
				+                        and train_percent <= 100 and val_percent <= 100 and test_percent <= 100
			
 
				+                        and train_percent > 0 and val_percent > 0 and test_percent > 0 and
			
 
				+                        train_percent + val_percent + test_percent == 100)
			
 
				+        if valid_inputs:
			
 
				+            num_train = int(round(total_num_imgs * train_percent//100))
			
 
				+            num_val = int(round(total_num_imgs * val_percent//100))
			
 
				+        else:
			
 
				+            num_train = int(round(total_num_imgs * 0.9))
			
 
				+            num_val = int(round((total_num_imgs - num_train)/2))
			
 
				+            print('ERROR: Please input valid percentages for dataset division')
			
 
				+            print('In place of a valid input the ratio 90% train, 5% val, 5% test was used')
			
 
				+        
			
 
				+        index = 0
			
 
				+        random.shuffle(file_list)
			
 
				+        for file in file_list:
			
 
				+            filename = os.fsdecode(file)
			
 
				+            filepath = os.path.join(input_dir, filename)
			
 
				+            # Loads File
			
 
				+            if filepath.endswith('.npy'):
			
 
				+                array = np.load(filepath)
			
 
				+                array = array/np.max(array)*scale
			
 
				+            else:
			
 
				+                array = imageio.imread(filepath)
			
 
				+                array = array/np.max(array)*scale
			
 
				+            if index < num_train:
			
 
				+                new_filepath = os.path.join(train_dir, filename)
			
 
				+            elif index <= num_train + num_val:
			
 
				+                new_filepath = os.path.join(val_dir, filename)
			
 
				+            else:
			
 
				+                new_filepath = os.path.join(test_dir, filename)
			
 
				+            # Saves File
			
 
				+            if file_format == '.npy':
			
 
				+                new_filepath = Path(new_filepath)
			
 
				+                new_filepath = new_filepath.with_suffix('')
			
 
				+                new_filepath = new_filepath.with_suffix(file_format)
			
 
				+                np.save(new_filepath, array, allow_pickle=True, fix_imports=True)
			
 
				+            else:
			
 
				+                new_filepath = Path(new_filepath)
			
 
				+                new_filepath = new_filepath.with_suffix('')
			
 
				+                new_filepath = new_filepath.with_suffix(file_format)
			
 
				+                imageio.imwrite(new_filepath, array)
			
 
				+            index += 1
			
 
				+        return train_dir, val_dir, test_dir
			
 
				+    else:
			
 
				+        print('ERROR: Please divide into train/test or train/val/test')
			
 
				+        return None
			
--- a/data/purposeCombined/Directory/diml_to_interiornet.py
+++ b/data/purposeCombined/Directory/diml_to_interiornet.py
@@ -0,0 +1,122 @@
 
				+import cv2
			
 
				+import os
			
 
				+import shutil
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def sample_to_interiornet():
			
 
				+    diml_path = "/nvme/datasets/diml_depth/scenes"
			
 
				+    hd7_path = "/nvme/datasets/diml_depth/HD7"
			
 
				+
			
 
				+    depth_paths = [
			
 
				+        "/nvme/datasets/diml_depth/train/HR/11. Bedroom/depth_filled",
			
 
				+        "/nvme/datasets/diml_depth/train/HR/12. Livingroom/depth_filled"]
			
 
				+
			
 
				+    depth_images = []
			
 
				+    for path in depth_paths:
			
 
				+        depth_images += [os.path.join(path, name) for name in os.listdir(path)
			
 
				+                         if os.path.isfile(os.path.join(path, name))]
			
 
				+
			
 
				+    scene_paths = [os.path.join(diml_path, name) for name in os.listdir(diml_path)
			
 
				+                   if os.path.isdir(os.path.join(diml_path, name))]
			
 
				+
			
 
				+    for scene_path in scene_paths:
			
 
				+        frame_paths = [os.path.join(scene_path, name) for name in os.listdir(scene_path)
			
 
				+                       if os.path.isfile(os.path.join(scene_path, name))]
			
 
				+
			
 
				+        new_frame_path = os.path.join(hd7_path, scene_path.split('/')[-1])
			
 
				+        os.mkdir(new_frame_path)
			
 
				+        os.mkdir(os.path.join(new_frame_path, "cam0"))
			
 
				+        os.mkdir(os.path.join(new_frame_path, "depth0"))
			
 
				+        os.mkdir(os.path.join(new_frame_path, "label0"))
			
 
				+        os.mkdir(os.path.join(new_frame_path, "cam0", "data"))
			
 
				+        os.mkdir(os.path.join(new_frame_path, "depth0", "data"))
			
 
				+        os.mkdir(os.path.join(new_frame_path, "label0", "data"))
			
 
				+        print(new_frame_path)
			
 
				+        for i, frame_path in enumerate(frame_paths):
			
 
				+            file_name = frame_path.split('/')[-1][:-6]
			
 
				+            img = cv2.imread(frame_path, cv2.IMREAD_UNCHANGED)
			
 
				+            print(file_name)
			
 
				+            depth_path = [path for path in depth_images if file_name in path][0]
			
 
				+            depth_img = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
			
 
				+
			
 
				+            img = cv2.resize(img, dsize=(img.shape[1] / 2, img.shape[0] / 2), interpolation=cv2.INTER_LINEAR)
			
 
				+            depth_img = cv2.resize(depth_img, dsize=(depth_img.shape[1] / 2, depth_img.shape[0] / 2),
			
 
				+                                   interpolation=cv2.INTER_LINEAR)
			
 
				+            label_img = depth_img.copy()
			
 
				+            label_img[:, :] = 3
			
 
				+
			
 
				+            cv2.imwrite(os.path.join(new_frame_path, "cam0", "data", "{}.png".format(i)), img)
			
 
				+            cv2.imwrite(os.path.join(new_frame_path, "depth0", "data", "{}.png".format(i)), depth_img)
			
 
				+            cv2.imwrite(os.path.join(new_frame_path, "label0", "data", "{}_instance.png".format(i)), label_img)
			
 
				+            cv2.imwrite(os.path.join(new_frame_path, "label0", "data", "{}_nyu.png".format(i)), label_img)
			
 
				+
			
 
				+
			
 
				+def full_to_interiornet():
			
 
				+    scene_file_path = "/nvme/datasets/diml_depth/scenes.txt"
			
 
				+    base_path = "/nvme/datasets/diml_depth/"
			
 
				+    out_path = "/nvme/datasets/diml_depth/HD7/"
			
 
				+    cam0_render = "/nvme/datasets/interiornet/3FO4IDEI1LAV_Bedroom/cam0.render"
			
 
				+    num_frames = 20
			
 
				+    shape = (672, 378)
			
 
				+    np.random.seed(123)
			
 
				+
			
 
				+    with open(scene_file_path, 'r') as f:
			
 
				+        scene_lines = f.readlines()
			
 
				+
			
 
				+    scene_lines = [sn.split('\n')[0] for sn in scene_lines]
			
 
				+    scene_paths = [os.path.join(base_path, sn.split('-')[0]) for sn in scene_lines]
			
 
				+    scene_ranges = [sn.split('-')[1] for sn in scene_lines]
			
 
				+    scene_ranges = [(int(rn[1:-1].split(':')[0]), int(rn[1:-1].split(':')[1])) for rn in scene_ranges]
			
 
				+
			
 
				+    for i, scene_path in enumerate(scene_paths):
			
 
				+        file_list = []
			
 
				+        for j in range(scene_ranges[i][0], scene_ranges[i][1]+1):
			
 
				+            scene_path_col = os.path.join(scene_path, "{}/col".format(j))
			
 
				+            if os.path.exists(scene_path_col):
			
 
				+                file_list += [os.path.join(scene_path_col, dn) for dn in os.listdir(scene_path_col)]
			
 
				+
			
 
				+        scene_count = len(os.listdir(out_path))
			
 
				+        scene_out_path = "{:02d}DIML_{}".format(scene_count + 1, scene_path.split('/')[-2].split(' ')[1])
			
 
				+        scene_out_path = os.path.join(out_path, scene_out_path)
			
 
				+
			
 
				+        if os.path.exists(scene_out_path):
			
 
				+            shutil.rmtree(scene_out_path)
			
 
				+        os.mkdir(scene_out_path)
			
 
				+        os.mkdir(os.path.join(scene_out_path, "cam0"))
			
 
				+        os.mkdir(os.path.join(scene_out_path, "depth0"))
			
 
				+        os.mkdir(os.path.join(scene_out_path, "label0"))
			
 
				+        os.mkdir(os.path.join(scene_out_path, "cam0", "data"))
			
 
				+        os.mkdir(os.path.join(scene_out_path, "depth0", "data"))
			
 
				+        os.mkdir(os.path.join(scene_out_path, "label0", "data"))
			
 
				+        shutil.copyfile(cam0_render, os.path.join(scene_out_path, "cam0.render"))
			
 
				+        print(scene_out_path)
			
 
				+
			
 
				+        frame_paths = np.random.choice(file_list, num_frames, False)
			
 
				+        for j, frame_path in enumerate(frame_paths):
			
 
				+            img = cv2.imread(frame_path, cv2.IMREAD_UNCHANGED)
			
 
				+            depth_path = frame_path.replace('/col/', '/up_png/')
			
 
				+            depth_path = depth_path.replace('_c.png', '_ud.png')
			
 
				+            depth_img = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
			
 
				+
			
 
				+            if depth_img is None:
			
 
				+                print(depth_path)
			
 
				+                exit()
			
 
				+            if img is None:
			
 
				+                print(frame_path)
			
 
				+                exit()
			
 
				+
			
 
				+            img = cv2.resize(img, dsize=shape, interpolation=cv2.INTER_LINEAR)
			
 
				+            depth_img = cv2.resize(depth_img, dsize=shape,
			
 
				+                                   interpolation=cv2.INTER_LINEAR)
			
 
				+            label_img = depth_img.copy()
			
 
				+            label_img[:, :] = 3
			
 
				+
			
 
				+            cv2.imwrite(os.path.join(scene_out_path, "cam0", "data", "{}.png".format(j)), img)
			
 
				+            cv2.imwrite(os.path.join(scene_out_path, "depth0", "data", "{}.png".format(j)), depth_img)
			
 
				+            cv2.imwrite(os.path.join(scene_out_path, "label0", "data", "{}_instance.png".format(j)), label_img)
			
 
				+            cv2.imwrite(os.path.join(scene_out_path, "label0", "data", "{}_nyu.png".format(j)), label_img)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    full_to_interiornet()
			
--- a/data/purposeCombined/Directory/ego_to_json.py
+++ b/data/purposeCombined/Directory/ego_to_json.py
@@ -0,0 +1,177 @@
 
				+import os 
			
 
				+import shutil 
			
 
				+import json
			
 
				+import scipy.io
			
 
				+import random
			
 
				+
			
 
				+# ego_to_json.py maakt egohands_data bruikbaar om om te laten zetten naar de verschillende
			
 
				+# formaten voor de netwerken.
			
 
				+#drie folders, train, test, val
			
 
				+
			
 
				+# |-- train
			
 
				+# |  | -- images
			
 
				+# |  | -- annotations.json
			
 
				+# |-- val
			
 
				+# |  | -- images
			
 
				+# |  | -- annotations.json
			
 
				+# |-- test
			
 
				+# |  | -- images
			
 
				+# |  | -- annotations.json
			
 
				+
			
 
				+
			
 
				+# annotations.json:
			
 
				+# { 
			
 
				+#     "CARDS_OFFICE_H_T_frame_0001.jpg": 
			
 
				+#     {
			
 
				+#         "name": "CARDS_OFFICE_H_T_frame_0001.jpg",
			
 
				+#         "objects": [[]]
			
 
				+#     },
			
 
				+#     "CARDS_OFFICE_H_T_frame_0002.jpg":
			
 
				+#     {
			
 
				+#         "name": "CARDS_OFFICE_H_T_frame_0002.jpg",
			
 
				+#         "objects": [[]]
			
 
				+#     }
			
 
				+# }
			
 
				+
			
 
				+ROOT_DIR = "../egohands_data"
			
 
				+ANNOTATION_FILE = "polygons.mat"
			
 
				+SAVE_FILE = "annotations.json"
			
 
				+
			
 
				+
			
 
				+def split_test():
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "test"))
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "val"))
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "train"))
			
 
				+    
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "CARDS_COURTYARD_B_T"), os.path.join(ROOT_DIR, "test", "CARDS_COURTYARD_B_T"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "CARDS_OFFICE_S_B"), os.path.join(ROOT_DIR, "test", "CARDS_OFFICE_S_B"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "CHESS_COURTYARD_B_T"), os.path.join(ROOT_DIR, "test", "CHESS_COURTYARD_B_T"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "CHESS_LIVINGROOM_T_H"), os.path.join(ROOT_DIR, "test", "CHESS_LIVINGROOM_T_H"))   
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "JENGA_LIVINGROOM_S_T"), os.path.join(ROOT_DIR, "test", "JENGA_LIVINGROOM_S_T"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "JENGA_OFFICE_H_T"), os.path.join(ROOT_DIR, "test", "JENGA_OFFICE_H_T"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "PUZZLE_COURTYARD_H_T"), os.path.join(ROOT_DIR, "test", "PUZZLE_COURTYARD_H_T"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "PUZZLE_LIVINGROOM_T_B"), os.path.join(ROOT_DIR, "test", "PUZZLE_LIVINGROOM_T_B"))
			
 
				+
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "CARDS_LIVINGROOM_S_H"), os.path.join(ROOT_DIR, "val", "CARDS_LIVINGROOM_S_H"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "CHESS_COURTYARD_H_S"), os.path.join(ROOT_DIR, "val", "CHESS_COURTYARD_H_S"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "JENGA_COURTYARD_T_S"), os.path.join(ROOT_DIR, "val", "JENGA_COURTYARD_T_S"))
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "PUZZLE_OFFICE_S_T"), os.path.join(ROOT_DIR, "val", "PUZZLE_OFFICE_S_T"))
			
 
				+
			
 
				+    train = ['CARDS_COURTYARD_H_S','CARDS_COURTYARD_S_H','CARDS_COURTYARD_T_B','CARDS_LIVINGROOM_B_T','CARDS_LIVINGROOM_H_S','CARDS_LIVINGROOM_T_B','CARDS_OFFICE_B_S','CARDS_OFFICE_H_T','CARDS_OFFICE_T_H','CHESS_COURTYARD_S_H','CHESS_COURTYARD_T_B','CHESS_LIVINGROOM_B_S','CHESS_LIVINGROOM_H_T','CHESS_LIVINGROOM_S_B','CHESS_OFFICE_B_S','CHESS_OFFICE_H_T','CHESS_OFFICE_S_B','CHESS_OFFICE_T_H','JENGA_COURTYARD_B_H','JENGA_COURTYARD_H_B','JENGA_COURTYARD_S_T','JENGA_LIVINGROOM_B_H','JENGA_LIVINGROOM_H_B','JENGA_LIVINGROOM_T_S','JENGA_OFFICE_B_S','JENGA_OFFICE_S_B','JENGA_OFFICE_T_H','PUZZLE_COURTYARD_B_S','PUZZLE_COURTYARD_S_B','PUZZLE_COURTYARD_T_H','PUZZLE_LIVINGROOM_B_T','PUZZLE_LIVINGROOM_H_S','PUZZLE_LIVINGROOM_S_H','PUZZLE_OFFICE_B_H','PUZZLE_OFFICE_H_B','PUZZLE_OFFICE_T_S']
			
 
				+
			
 
				+    for folder in train:
			
 
				+        shutil.move(os.path.join(ROOT_DIR, folder), os.path.join(ROOT_DIR, "train", folder))
			
 
				+
			
 
				+def json_test():
			
 
				+    # test_dir = os.path.join(ROOT_DIR, "test")
			
 
				+    # os.makedirs(os.path.join(test_dir, "images"))
			
 
				+    # img_dir = os.path.join(test_dir, "images")
			
 
				+
			
 
				+    # create_annotations(test_dir,img_dir)
			
 
				+
			
 
				+    # val_dir = os.path.join(ROOT_DIR, "val")
			
 
				+    # os.makedirs(os.path.join(val_dir, "images"))
			
 
				+    # img_dir = os.path.join(val_dir, "images")
			
 
				+
			
 
				+    # create_annotations(val_dir,img_dir)
			
 
				+
			
 
				+    train_dir = os.path.join(ROOT_DIR, "train")
			
 
				+    # os.makedirs(os.path.join(train_dir, "images"))
			
 
				+    img_dir = os.path.join(train_dir, "images")
			
 
				+
			
 
				+    create_annotations(train_dir,img_dir)
			
 
				+
			
 
				+   
			
 
				+def json_train_val():
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "tmp"))
			
 
				+    tmp_dir = os.path.join(ROOT_DIR, "tmp")
			
 
				+    os.makedirs(os.path.join(tmp_dir, "images"))
			
 
				+    img_dir = os.path.join(tmp_dir, "images")
			
 
				+
			
 
				+    for dir_name in os.listdir(ROOT_DIR):
			
 
				+        if not (dir_name == "tmp" or dir_name == "test"):
			
 
				+            shutil.move(os.path.join(ROOT_DIR, dir_name), os.path.join(ROOT_DIR, tmp_dir, dir_name))
			
 
				+
			
 
				+    create_annotations(tmp_dir, img_dir)
			
 
				+
			
 
				+def create_annotations(directory, img_dir):
			
 
				+    annotations = {}
			
 
				+    for dir_name in os.listdir(directory):
			
 
				+        if not (dir_name == "images"):
			
 
				+            for _, _, files in os.walk(os.path.join(directory, dir_name)):
			
 
				+                mat = scipy.io.loadmat(os.path.join(directory, dir_name, ANNOTATION_FILE))
			
 
				+
			
 
				+                for i, img_file in enumerate(sorted(files)):
			
 
				+                    if not (img_file.endswith(".mat")):
			
 
				+                        new_img_file = dir_name + "_" + img_file
			
 
				+
			
 
				+                        image = {
			
 
				+                            "name":     new_img_file,
			
 
				+                            "objects":  []
			
 
				+                        }
			
 
				+
			
 
				+                        for segmentation in mat["polygons"][0][i]:
			
 
				+                            if segmentation.any():
			
 
				+                                image["objects"].append(segmentation.tolist())
			
 
				+                        
			
 
				+                        annotations[new_img_file] = image
			
 
				+
			
 
				+                        shutil.move(os.path.join(directory, dir_name, img_file), os.path.join(img_dir, new_img_file))
			
 
				+
			
 
				+    with open(os.path.join(directory, SAVE_FILE), 'w') as output_json_file:
			
 
				+        json.dump(annotations, output_json_file)
			
 
				+
			
 
				+    for dir_name in os.listdir(directory):
			
 
				+        if not (dir_name == "images" or dir_name == "annotations.json"):
			
 
				+            shutil.rmtree(os.path.join(directory, dir_name))
			
 
				+
			
 
				+def split_train_val():    
			
 
				+    tmp_dir = os.path.join(ROOT_DIR, "tmp")
			
 
				+    
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "train"))
			
 
				+    train_dir = os.path.join(ROOT_DIR, "train")
			
 
				+    os.makedirs(os.path.join(train_dir, "images"))
			
 
				+    
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "val"))
			
 
				+    val_dir = os.path.join(ROOT_DIR, "val")
			
 
				+    os.makedirs(os.path.join(val_dir, "images"))
			
 
				+
			
 
				+    # Opening JSON file
			
 
				+    with open(os.path.join(tmp_dir, 'annotations.json')) as json_file:
			
 
				+        data = json.load(json_file)
			
 
				+
			
 
				+        # 0.1765 is 15% van 100% omdat test al 20 % is (niet helemaal)
			
 
				+        val_keys = random.sample(list(data), round(len(data) * 0.1765))
			
 
				+
			
 
				+        validation = {k: v for k, v in data.items() if k in val_keys}
			
 
				+        train = {k: v for k, v in data.items() if k not in val_keys}
			
 
				+
			
 
				+    with open(os.path.join(val_dir, SAVE_FILE), 'w') as output_json_file:
			
 
				+        json.dump(validation, output_json_file)
			
 
				+
			
 
				+    with open(os.path.join(train_dir, SAVE_FILE), 'w') as output_json_file:
			
 
				+        json.dump(train, output_json_file)
			
 
				+        
			
 
				+    for key, _ in validation.items():
			
 
				+        shutil.move(os.path.join(tmp_dir, "images", key), os.path.join(val_dir, "images", key))
			
 
				+
			
 
				+    for key, _ in train.items():
			
 
				+        shutil.move(os.path.join(tmp_dir, "images", key), os.path.join(train_dir, "images"))
			
 
				+
			
 
				+    shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+def move_to_folder():
			
 
				+    os.makedirs(os.path.join(ROOT_DIR, "json"))
			
 
				+    json_dir = os.path.join(ROOT_DIR, "json")
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "test"), json_dir)
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "val"), json_dir)
			
 
				+    shutil.move(os.path.join(ROOT_DIR, "train"), json_dir)
			
 
				+
			
 
				+    shutil.move(ROOT_DIR, "../data")
			
 
				+
			
 
				+
			
 
				+# split_test()
			
 
				+json_test()
			
 
				+# json_train_val()
			
 
				+# split_train_val()
			
 
				+move_to_folder()
			
--- a/data/purposeCombined/Directory/esquema.py
+++ b/data/purposeCombined/Directory/esquema.py
@@ -0,0 +1,107 @@
 
				+import errno
			
 
				+import os
			
 
				+from flask import jsonify
			
 
				+
			
 
				+def crearFacultad(request):
			
 
				+    
			
 
				+    try:
			
 
				+        
			
 
				+        json_req = request.json
			
 
				+        fac_nombre = json_req['fac_nombre']
			
 
				+        
			
 
				+        os.mkdir('resources/'+fac_nombre)
			
 
				+         
			
 
				+    except OSError:
			
 
				+        
			
 
				+        return jsonify({"message":"error al crear facultad"}),500
			
 
				+    
			
 
				+    else:
			
 
				+        
			
 
				+        return jsonify({"message":"facultad creada"}),200
			
 
				+    
			
 
				+
			
 
				+def crearCarrera(request):
			
 
				+    
			
 
				+    try:
			
 
				+        
			
 
				+        json_req = request.json
			
 
				+        fac_nombre = json_req['fac_nombre']
			
 
				+        car_nombre= json_req['car_nombre']
			
 
				+        
			
 
				+        os.mkdir('resources/'+fac_nombre+'/'+car_nombre)
			
 
				+        
			
 
				+    except OSError:
			
 
				+        
			
 
				+        return jsonify({"message":"error al crear carrera"}),500
			
 
				+    
			
 
				+    else:
			
 
				+        
			
 
				+        return jsonify({"message":"carrera creada"}),200
			
 
				+
			
 
				+
			
 
				+def crearAsignatura(request):
			
 
				+    
			
 
				+    try:
			
 
				+        
			
 
				+        json_req = request.json
			
 
				+        fac_nombre = json_req['fac_nombre']
			
 
				+        car_nombre= json_req['car_nombre']
			
 
				+        asig_identificador=json_req['asig_identificador']
			
 
				+        
			
 
				+        path=('resources/'+fac_nombre+'/'+car_nombre+'/'+asig_identificador+"/")
			
 
				+        
			
 
				+        if not os.path.isdir(path):
			
 
				+            os.makedirs(path)
			
 
				+        
			
 
				+        os.mkdir(path+'Portafolios')
			
 
				+        
			
 
				+    except OSError as e:
			
 
				+        print(e.strerror)
			
 
				+        return jsonify({"message":"error al crear asignatura"}),500
			
 
				+
			
 
				+    else:
			
 
				+        
			
 
				+        return jsonify({"message":"asignatura creada"}),200
			
 
				+    
			
 
				+
			
 
				+def crearPortafolio(request):
			
 
				+    
			
 
				+    try:
			
 
				+        
			
 
				+        json_req = request.json
			
 
				+        fac_nombre = json_req['fac_nombre']
			
 
				+        car_nombre= json_req['car_nombre']
			
 
				+        asig_identificador=json_req['asig_identificador']
			
 
				+        per_cedula=json_req['per_cedula']
			
 
				+        
			
 
				+        pathCedula=('resources/'+fac_nombre+'/'+car_nombre+'/'+asig_identificador+'/Portafolios/'+per_cedula)
			
 
				+        os.mkdir(pathCedula)
			
 
				+        
			
 
				+        pathDatosInf=(pathCedula+'/1. Datos informativos')
			
 
				+        os.mkdir(pathDatosInf)
			
 
				+        
			
 
				+        pathElmentosCurri=(pathCedula+'/2. Elementos curriculares')
			
 
				+        os.mkdir(pathElmentosCurri)
			
 
				+        os.mkdir(pathElmentosCurri+'/a. Syllabus')
			
 
				+        os.mkdir(pathElmentosCurri+'/b. Expectativas')
			
 
				+        os.mkdir(pathElmentosCurri+'/c. Apuntes de clase')
			
 
				+        os.mkdir(pathElmentosCurri+'/d. Evaluaciones')
			
 
				+        os.mkdir(pathElmentosCurri+'/e. Investigaciones')
			
 
				+        os.mkdir(pathElmentosCurri+'/f. Actividades de experimentación')
			
 
				+        os.mkdir(pathElmentosCurri+'/g. Proyectos')
			
 
				+        os.mkdir(pathElmentosCurri+'/h. Estudios de caso')
			
 
				+        os.mkdir(pathElmentosCurri+'/i. Planteamiento de problemas')
			
 
				+        os.mkdir(pathElmentosCurri+'/j. Registro de asistencia')
			
 
				+        os.mkdir(pathElmentosCurri+'/k. Registro de observaciones')
			
 
				+        os.mkdir(pathElmentosCurri+'/l. Tareas intraclases')
			
 
				+        os.mkdir(pathElmentosCurri+'/m. Tareas autónomas')
			
 
				+        os.mkdir(pathElmentosCurri+'/n. Tareas de refuerzo')
			
 
				+        
			
 
				+        pathInformeFin=(pathCedula+'/3. Informe final')
			
 
				+        os.mkdir(pathInformeFin)
			
 
				+    
			
 
				+    except OSError as error:
			
 
				+        print(error)
			
 
				+        return jsonify({"message":"error al crear portafolio"}),500
			
 
				+    else:
			
 
				+        return jsonify({"message":"portafolio creado"}),200  
			
--- a/data/purposeCombined/Directory/file_handler.py
+++ b/data/purposeCombined/Directory/file_handler.py
@@ -0,0 +1,41 @@
 
				+import os
			
 
				+import time
			
 
				+import traceback
			
 
				+
			
 
				+def file_storage(file_path,suffix):
			
 
				+    r"""
			
 
				+        file_path :: The file absolute path
			
 
				+        suffix :: filename
			
 
				+
			
 
				+        file_path=C:\Users\Desktop\video_
			
 
				+        filename = abc.py
			
 
				+        return C:\Users\Desktop\video_2020\12\12\abc.py
			
 
				+    """
			
 
				+    tm = time.localtime(time.time())
			
 
				+    # 获取系统当前年，月，日，小时
			
 
				+    year = time.strftime('%Y', tm)
			
 
				+    month = time.strftime('%m', tm)
			
 
				+    day = time.strftime('%d', tm)
			
 
				+    # 根据当前日期创建图片文件
			
 
				+    file_year = file_path + '/' + year
			
 
				+    file_month = file_year + '/' + month
			
 
				+    file_day = file_month + '/' + day
			
 
				+    # 判断路径是否存在，没有则创建
			
 
				+    if not os.path.exists(file_path):
			
 
				+        os.makedirs(file_path)
			
 
				+        os.mkdir(file_year)
			
 
				+        os.mkdir(file_month)
			
 
				+        os.mkdir(file_day)
			
 
				+    else:
			
 
				+        if not os.path.exists(file_year):
			
 
				+            os.mkdir(file_year)
			
 
				+            os.mkdir(file_month)
			
 
				+            os.mkdir(file_day)
			
 
				+        else:
			
 
				+            if not os.path.exists(file_month):
			
 
				+                os.mkdir(file_month)
			
 
				+                os.mkdir(file_day)
			
 
				+            else:
			
 
				+                if not os.path.exists(file_day):
			
 
				+                    os.mkdir(file_day)
			
 
				+    return os.path.join(file_day,suffix)
			
--- a/data/purposeCombined/Directory/generate_directories.py
+++ b/data/purposeCombined/Directory/generate_directories.py
@@ -0,0 +1,130 @@
 
				+"""
			
 
				+Taken from - https://github.com/alexhamiltonRN
			
 
				+"""
			
 
				+from pathlib import Path
			
 
				+
			
 
				+def generate_patient_ids(dataset_type):
			
 
				+    """
			
 
				+    This function generates the patient_ids for the directories to be created below. 
			
 
				+    Ids are extracted from the raw dataset file structure.
			
 
				+    """
			
 
				+    
			
 
				+    patient_ids = []
			
 
				+    path_to_date = Path()
			
 
				+    
			
 
				+    if dataset_type == str(1):
			
 
				+        path_to_data = Path('E:/Memoire/ProstateX/train-data')
			
 
				+    else:
			
 
				+        path_to_data = Path('E:/Memoire/ProstateX/test-data')
			
 
				+    
			
 
				+    # Get list of patient_ids in folder
			
 
				+    patient_folders = [x for x in path_to_data.iterdir() if x.is_dir()]
			
 
				+    for patient_folder in patient_folders:
			
 
				+        patient_ids.append(str(patient_folder.stem))
			
 
				+    return patient_ids 
			
 
				+
			
 
				+def generate_nifti_ds(patient_ids, dataset_type):
			
 
				+    """
			
 
				+    This function generates the directory structure for the nifti files
			
 
				+    generated from the dicom files.
			
 
				+
			
 
				+    Directory structure for generated data:
			
 
				+    ProstateX/generated/train/nifti
			
 
				+    ProstateX/generated/test/nifti
			
 
				+    """
			
 
				+    for patient_id in patient_ids:
			
 
				+        if dataset_type == str(1):
			
 
				+            new_path = Path(str('E:/Memoire/ProstateX/generated/train/nifti/' + patient_id))
			
 
				+            new_path.mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('t2').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('bval').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('adc').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('ktrans').mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+        else:
			
 
				+            new_path = Path(str('E:/Memoire/ProstateX/generated/test/nifti/' + patient_id))
			
 
				+            new_path.mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('t2').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('bval').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('adc').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('ktrans').mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+def generate_nifti_resampled_ds(patient_ids, dataset_type):
			
 
				+    """
			
 
				+    This function generates the directory structure for the nifti files
			
 
				+    generated from the dicom files.
			
 
				+
			
 
				+    Directory structure for generated data:
			
 
				+    ProstateX/generated/train/nifti_resampled
			
 
				+    ProstateX/generated/test/nifti_resampled
			
 
				+    """
			
 
				+    for patient_id in patient_ids:
			
 
				+        if dataset_type == str(1):
			
 
				+            new_path = Path(str('E:/Memoire/ProstateX/generated/train/nifti_resampled/' + patient_id))
			
 
				+            new_path.mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('t2').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('bval').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('adc').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('ktrans').mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+        else:
			
 
				+            new_path = Path(str('E:/Memoire/ProstateX/generated/test/nifti_resampled/' + patient_id))
			
 
				+            new_path.mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('t2').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('bval').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('adc').mkdir(parents = True, exist_ok = True)
			
 
				+            new_path.joinpath('ktrans').mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+def generate_numpy_ds(dataset_type):
			
 
				+    """
			
 
				+    This function generates the directory structure for the final numpy
			
 
				+    arrays for the training and test sets. 
			
 
				+    
			
 
				+    Director structure for processed data:
			
 
				+    ProstateX/generated/train/numpy
			
 
				+    ProstateX/generated/test/numpy
			
 
				+    """
			
 
				+    if dataset_type == str(1):
			
 
				+        new_path = Path('E:/Memoire/ProstateX/generated/train/numpy/')
			
 
				+        new_path.mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('t2').mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('bval').mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('adc').mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('ktrans').mkdir(parents = True, exist_ok = True)
			
 
				+    else:
			
 
				+        new_path = Path('E:/Memoire/ProstateX/generated/test/numpy/')
			
 
				+        new_path.mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('t2').mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('bval').mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('adc').mkdir(parents = True, exist_ok = True)
			
 
				+        new_path.joinpath('ktrans').mkdir(parents = True, exist_ok = True)
			
 
				+        
			
 
				+def generate_dataframe_ds(dataset_type):
			
 
				+    if dataset_type == str(1):
			
 
				+        new_path = Path('E:/Memoire/ProstateX/generated/train/dataframes/')
			
 
				+        new_path.mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+    else:
			
 
				+        new_path = Path('E:/Memoire/ProstateX/generated/test/dataframes/')
			
 
				+        new_path.mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+def generate_logs_ds(dataset_type):
			
 
				+    if dataset_type == str(1):
			
 
				+        new_path = Path('E:/Memoire/ProstateX/generated/train/logs/')
			
 
				+        new_path.mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+    else:
			
 
				+        new_path = Path('E:/Memoire/ProstateX/generated/test/logs/')
			
 
				+        new_path.mkdir(parents = True, exist_ok = True)
			
 
				+
			
 
				+def main():
			
 
				+    dataset_type = input('Generate directory structure for which type of data (1-Train; 2-Test):')
			
 
				+    patient_ids = generate_patient_ids(dataset_type)
			
 
				+    generate_nifti_ds(patient_ids, dataset_type)
			
 
				+    generate_nifti_resampled_ds(patient_ids, dataset_type)
			
 
				+    generate_numpy_ds(dataset_type)
			
 
				+    generate_dataframe_ds(dataset_type)
			
 
				+    generate_logs_ds(dataset_type)
			
 
				+    print('Done creating directory structure...')
			
 
				+
			
 
				+main()
			
--- a/data/purposeCombined/Directory/logging.py
+++ b/data/purposeCombined/Directory/logging.py
@@ -0,0 +1,167 @@
 
				+import os
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from django.conf import settings
			
 
				+from django.core.files import File
			
 
				+
			
 
				+
			
 
				+def set():
			
 
				+    if not os.path.exists(settings.MEDIA_ROOT):
			
 
				+        try:
			
 
				+            os.mkdir(settings.MEDIA_ROOT)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+
			
 
				+    if not os.path.exists(settings.MEDIA_ROOT+'/download'):
			
 
				+        try:
			
 
				+            os.mkdir(settings.MEDIA_ROOT+'/download')
			
 
				+        except OSError:
			
 
				+            return
			
 
				+
			
 
				+    if not os.path.exists(settings.BASE_DIR + "/log"):
			
 
				+        try:
			
 
				+            os.mkdir(settings.BASE_DIR + "/log")
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    if not os.path.exists(settings.BASE_DIR + "/log/message"):
			
 
				+        try:
			
 
				+            os.mkdir(settings.BASE_DIR + "/log/message")
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    if not os.path.exists(settings.BASE_DIR + "/log/error"):
			
 
				+        try:
			
 
				+            os.mkdir(settings.BASE_DIR + "/log/error")
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    if not os.path.exists(settings.BASE_DIR + "/log/log"):
			
 
				+        try:
			
 
				+            os.mkdir(settings.BASE_DIR + "/log/log")
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    if not os.path.exists(settings.MEDIA_ROOT + "/tgbot"):
			
 
				+        try:
			
 
				+            os.mkdir(settings.MEDIA_ROOT + "/tgbot")
			
 
				+        except OSError:
			
 
				+            return
			
 
				+
			
 
				+
			
 
				+
			
 
				+def message(message):
			
 
				+    DirLogs = settings.BASE_DIR + "/log"
			
 
				+    if not os.path.exists(DirLogs):
			
 
				+        try:
			
 
				+            os.mkdir(DirLogs)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    DirLogs = settings.BASE_DIR + "/log/message"
			
 
				+    if not os.path.exists(DirLogs):
			
 
				+        try:
			
 
				+            os.mkdir(DirLogs)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    date = datetime.now()
			
 
				+    month = "0" if date.month < 10 else ""
			
 
				+    month += str(date.month)
			
 
				+    day = "0" if date.day < 10 else ""
			
 
				+    day += str(date.day)
			
 
				+    StrDate = "%s%s%s" % (str(date.year), month, day)
			
 
				+    file = open(DirLogs + '/message_' + StrDate + '.log', 'a')
			
 
				+    my_file = File(file)
			
 
				+    my_file.write("[%s]: %s\n" % (
			
 
				+        str(datetime.now().strftime("%d-%m-%Y %H:%M:%S")),
			
 
				+        str(message)
			
 
				+        ))
			
 
				+    my_file.closed
			
 
				+    file.closed
			
 
				+
			
 
				+
			
 
				+def log(message):
			
 
				+    DirLogs = settings.BASE_DIR + "/log"
			
 
				+    if not os.path.exists(DirLogs):
			
 
				+        try:
			
 
				+            os.mkdir(DirLogs)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    DirLogs = settings.BASE_DIR + "/log/log"
			
 
				+    if not os.path.exists(DirLogs):
			
 
				+        try:
			
 
				+            os.mkdir(DirLogs)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    date = datetime.now()
			
 
				+    month = "0" if date.month < 10 else ""
			
 
				+    month += str(date.month)
			
 
				+    day = "0" if date.day < 10 else ""
			
 
				+    day += str(date.day)
			
 
				+    StrDate = "%s%s%s" % (str(date.year), month, day)
			
 
				+    file = open(DirLogs + '/message_' + StrDate + '.log', 'a')
			
 
				+    my_file = File(file)
			
 
				+    my_file.write("[%s]: %s\n" % (
			
 
				+        str(datetime.now().strftime("%d-%m-%Y %H:%M:%S")),
			
 
				+        str(message)))
			
 
				+    my_file.closed
			
 
				+    file.closed
			
 
				+
			
 
				+
			
 
				+def error(message):
			
 
				+    DirLogs = settings.BASE_DIR + "/log"
			
 
				+    if not os.path.exists(DirLogs):
			
 
				+        try:
			
 
				+            os.mkdir(DirLogs)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    DirLogs = settings.BASE_DIR + "/log/error"
			
 
				+    if not os.path.exists(DirLogs):
			
 
				+        try:
			
 
				+            os.mkdir(DirLogs)
			
 
				+        except OSError:
			
 
				+            return
			
 
				+    date = datetime.now()
			
 
				+    month = "0" if date.month < 10 else ""
			
 
				+    month += str(date.month)
			
 
				+    day = "0" if date.day < 10 else ""
			
 
				+    day += str(date.day)
			
 
				+    StrDate = "%s%s%s" % (str(date.year), month, day)
			
 
				+    file = open(DirLogs + '/errors_' + StrDate + '.log', 'a')
			
 
				+    my_file = File(file)
			
 
				+    my_file.write("[%s]: %s\n" % (
			
 
				+        str(datetime.now().strftime("%d-%m-%Y %H:%M:%S")),
			
 
				+        str(message)))
			
 
				+    my_file.closed
			
 
				+    file.closed
			
 
				+
			
 
				+
			
 
				+def check_dir():
			
 
				+    try:
			
 
				+        if not os.path.exists(settings.MEDIA_ROOT):
			
 
				+            try:
			
 
				+                os.mkdir(settings.MEDIA_ROOT)
			
 
				+            except OSError:
			
 
				+                logging.error(traceback.format_exc())
			
 
				+                return
			
 
				+        if not os.path.exists(settings.MEDIA_ROOT+"/att"):
			
 
				+            try:
			
 
				+                os.mkdir(settings.MEDIA_ROOT+"/att")
			
 
				+            except OSError:
			
 
				+                logging.error(traceback.format_exc())
			
 
				+                return
			
 
				+        if not os.path.exists(settings.MEDIA_ROOT+"/att/biophoto"):
			
 
				+            try:
			
 
				+                os.mkdir(settings.MEDIA_ROOT+"/att/biophoto")
			
 
				+            except OSError:
			
 
				+                logging.error(traceback.format_exc())
			
 
				+                return
			
 
				+        if not os.path.exists(settings.ATT_ROOT):
			
 
				+            try:
			
 
				+                os.mkdir(settings.ATT_ROOT)
			
 
				+            except OSError:
			
 
				+                logging.error(traceback.format_exc())
			
 
				+                return
			
 
				+        if not os.path.exists(settings.ATT_ROOT+"/USERPIC"):
			
 
				+            try:
			
 
				+                os.mkdir(settings.ATT_ROOT+"/USERPIC")
			
 
				+            except OSError:
			
 
				+                logging.error(traceback.format_exc())
			
 
				+                return
			
 
				+    except Exception as err:
			
 
				+        logging.error('%s\n%s' % (traceback.format_exc(), str(err)))
			
--- a/data/purposeCombined/Directory/make_folder.py
+++ b/data/purposeCombined/Directory/make_folder.py
@@ -0,0 +1,27 @@
 
				+import os
			
 
				+def make_folder(dealername):
			
 
				+    os.getcwd()
			
 
				+    #'C:\\Users\\corcoras\\Desktop\\FY14 INSTALLS'
			
 
				+    install_dir = 'C:\\Users\\corcoras\\Desktop\\FY14 INSTALLS'
			
 
				+    os.chdir(install_dir)
			
 
				+    #dealername = "Rene motors"
			
 
				+    dealername_no_space = dealername.replace(" ", "_")
			
 
				+    dealername_no_space
			
 
				+    #'Don_Ayres_Honda'
			
 
				+    dealer_folder = dealername_no_space[:1]
			
 
				+    dealer_folder
			
 
				+    #'D'
			
 
				+    os.chdir(dealer_folder)
			
 
				+    os.getcwd()
			
 
				+    #'C:\\Users\\corcoras\\Desktop\\FY14 INSTALLS\\D'
			
 
				+    dealername_spaces = dealername_no_space.replace("_", " ")
			
 
				+    dealername_spaces
			
 
				+    #'Don Ayres Honda'
			
 
				+    os.mkdir(dealername_spaces)
			
 
				+    os.chdir(dealername_spaces)
			
 
				+    os.getcwd()
			
 
				+    #'C:\\Users\\corcoras\\Desktop\\FY14 INSTALLS\\D\\Don Ayres Honda'
			
 
				+    os.mkdir("config")
			
 
				+    os.mkdir("original")
			
 
				+    os.mkdir("final")
			
 
				+    print(f"\nFolder was created : {install_dir}\{dealer_folder}\{dealername_spaces}")
			
--- a/data/purposeCombined/Directory/mkdir.py
+++ b/data/purposeCombined/Directory/mkdir.py
@@ -0,0 +1,90 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+Pydir is mkdir for Python modules.
			
 
				+
			
 
				+Example:
			
 
				+    $ pydir -v myproject/module/etc
			
 
				+    Created directory myproject/module/etc
			
 
				+    Created file myproject/__init__.py
			
 
				+    Created file myproject/module/__init__.py
			
 
				+    Created file myproject/module/etc/__init__.py
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+from optparse import OptionParser, make_option
			
 
				+import os
			
 
				+import os.path
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+VERSION = (0, 2, 1)
			
 
				+
			
 
				+
			
 
				+def version_string():
			
 
				+    return '.'.join(str(component) for component in VERSION)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    usage = '%prog path [path2] [path3] [pathN]\n\n' + __doc__.strip()
			
 
				+    parser = OptionParser(usage=usage, option_list=(
			
 
				+        make_option('-v', '--verbose', default=False, action='store_true'),
			
 
				+    ))
			
 
				+    
			
 
				+    options, args = parser.parse_args()
			
 
				+    
			
 
				+    if len(args) == 0:
			
 
				+        parser.error('No paths given.')
			
 
				+    
			
 
				+    output = sys.stdout if options.verbose else None
			
 
				+    
			
 
				+    for index, path in enumerate(args):
			
 
				+        path = path.replace('.', os.path.sep)
			
 
				+        
			
 
				+        if output and index > 0:
			
 
				+            output.write('\n')
			
 
				+        
			
 
				+        try:
			
 
				+            pydir(path, output=output)
			
 
				+        except BaseException as exc:
			
 
				+            print ('Couldn\'t create %s: %s' % (path, exc,))
			
 
				+
			
 
				+
			
 
				+def pydir(path, output=None):
			
 
				+    """
			
 
				+    Create a directory structure for a Python module, including __init__.py
			
 
				+    files. Converts existing directories into modules.
			
 
				+    """
			
 
				+    
			
 
				+    def info(line):
			
 
				+        if output:
			
 
				+            output.write(line)
			
 
				+            output.write('\n')
			
 
				+    
			
 
				+    try:
			
 
				+        os.makedirs(path)
			
 
				+    except (OSError, IOError) as exc:
			
 
				+        if not os.path.isdir(path):
			
 
				+            info('Path already exists: %s' % path)
			
 
				+        else:
			
 
				+            raise
			
 
				+    else:
			
 
				+        info('Created directory %s' % path)
			
 
				+    
			
 
				+    segments = path.split(os.path.sep)
			
 
				+    for i in xrange(len(segments)):
			
 
				+        init_filename = os.path.sep.join(segments[:i+1] + ['__init__.py'])
			
 
				+        if not os.path.isfile(init_filename):
			
 
				+            try:
			
 
				+                open(init_filename, 'w').close()
			
 
				+            except (OSError, IOError) as exc:
			
 
				+                raise
			
 
				+            else:
			
 
				+                info('Created file %s' % (init_filename,))
			
 
				+        else:
			
 
				+            info('File already exists: %s' % (init_filename,))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/data/purposeCombined/Directory/mkdirPypi.py
+++ b/data/purposeCombined/Directory/mkdirPypi.py
@@ -0,0 +1,135 @@
 
				+
			
 
				+
			
 
				+                        #********************************************************************************#
			
 
				+                        #                                                                                #
			
 
				+                        #                                  нεℓℓσ,вαтεs!                                  #
			
 
				+                        #                                                                                #
			
 
				+                        #   filename: mkdirPypi.py                                                       #
			
 
				+                        #   created: 2022-03-10                                                          #
			
 
				+                        #   system: Windows                                                              #
			
 
				+                        #   version: 64bit                                                               #
			
 
				+                        #                                       by: Bates <https://github.com/batestin1> #
			
 
				+                        #********************************************************************************#
			
 
				+                        #                           import your librarys below                           #
			
 
				+                        #********************************************************************************#
			
 
				+
			
 
				+from pathlib import Path
			
 
				+from datetime import date
			
 
				+import getpass
			
 
				+import platform
			
 
				+import subprocess
			
 
				+
			
 
				+def mkdirPypi(file):
			
 
				+    users=getpass.getuser()
			
 
				+    res = subprocess.run(["git", "config", "user.name"], stdout=subprocess.PIPE)
			
 
				+    git_username = res.stdout.strip().decode()
			
 
				+    filename = file.replace(' ', '_')
			
 
				+    #create a home directory#
			
 
				+    cd = 'Codigo fonte'
			
 
				+    dw = 'Download'
			
 
				+    linkGit = f'https://github.com/{git_username}/'
			
 
				+    codigo_fonte = f"{cd} : {linkGit}"
			
 
				+    download = f"{dw} : {linkGit}"
			
 
				+    project_urls = {codigo_fonte, download}
			
 
				+    path = Path(f"./{filename}")
			
 
				+    path.mkdir(parents=True, exist_ok=True)
			
 
				+    data_atual = date.today()
			
 
				+    data = f"""{data_atual.strftime('%Y-%m-%d')}"""
			
 
				+
			
 
				+    #### create a LICENSE ####
			
 
				+    textLic ="""
			
 
				+MIT License
			
 
				+Copyright (c) 2018 Yan Orestes
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge,publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
			
 
				+    """
			
 
				+    licensa = open(f"{filename}/LICENSE", "w")
			
 
				+    licensa.write(textLic)
			
 
				+
			
 
				+    #### create a README.md ###
			
 
				+
			
 
				+    textReadm = f"""
			
 
				+<h1 align="center">
			
 
				+<img src="https://img.shields.io/static/v1?label={filename.upper()}%20POR&message={users}&color=7159c1&style=flat-square&logo=ghost"/>
			
 
				+<h3> <p align="center">{filename.upper()} </p> </h3>
			
 
				+<h3> <p align="center"> ================= </p> </h3>
			
 
				+>> <h3> Resume </h3>
			
 
				+<p> text here </p>
			
 
				+>> <h3> How install </h3>
			
 
				+```
			
 
				+code here
			
 
				+```
			
 
				+>> <h3> How Works </h3>
			
 
				+```
			
 
				+code here
			
 
				+```
			
 
				+    """
			
 
				+    readme = open(f"{filename}/README.md", "w")
			
 
				+    readme.write(textReadm)
			
 
				+
			
 
				+    ###setup.cfg###
			
 
				+
			
 
				+    cfgTxt = """
			
 
				+[metadata]
			
 
				+description-file = README.md
			
 
				+license_file = LICENSE.txt
			
 
				+"""
			
 
				+    cfgsetup = open(f"{filename}/setup.cfg", "w")
			
 
				+    cfgsetup.write(cfgTxt)
			
 
				+
			
 
				+    ###setup.py ######
			
 
				+
			
 
				+    setupyT = f"""
			
 
				+from setuptools import setup
			
 
				+setup(
			
 
				+    name = '{filename}',
			
 
				+    version = '1.0.0',
			
 
				+    author = '{users}',
			
 
				+    author_email = '{users}@mailer.com.br',
			
 
				+    packages = ['{filename}'],
			
 
				+    description = 'a way to make your life easier',
			
 
				+    long_description = 'file: README.md',
			
 
				+    url = 'https://github.com/{git_username}/',
			
 
				+    project_urls = {project_urls},
			
 
				+    keywords = 'a way to make your life easier',
			
 
				+    classifiers = []
			
 
				+)"""
			
 
				+
			
 
				+    setupy = open(f"{filename}/setup.py", "w")
			
 
				+    setupy.write(setupyT)
			
 
				+
			
 
				+    #### create dir #####
			
 
				+
			
 
				+    path = Path(f"./{filename}/{filename}")
			
 
				+    path.mkdir(parents=True, exist_ok=True)
			
 
				+    txtnull=f"""
			
 
				+#############################################################################################################################
			
 
				+#   filename:{filename}.py                                                       
			
 
				+#   created: {data}                                                              
			
 
				+#   import your librarys below                                                    
			
 
				+#############################################################################################################################
			
 
				+
			
 
				+
			
 
				+def {filename}():
			
 
				+    pass
			
 
				+    """
			
 
				+
			
 
				+    main = open(f"{filename}/{filename}/{filename}.py", "w")
			
 
				+    main.write(txtnull)
			
 
				+
			
 
				+
			
 
				+    txtnull2=f"""
			
 
				+#############################################################################################################################
			
 
				+#   filename:{filename}.py                                                       
			
 
				+#   created: {data}                                                              
			
 
				+#   import your librarys below                                                    
			
 
				+#############################################################################################################################
			
 
				+
			
 
				+
			
 
				+
			
 
				+from .{filename} import *
			
 
				+
			
 
				+    """
			
 
				+    init = open(f"{filename}/{filename}/__init__.py", "w")
			
 
				+    init.write(txtnull2)
			
 
				+
			
 
				+    print(f"your project call {filename} was create to be upper on Pypi")
			
--- a/data/purposeCombined/Directory/mkdir_p.py
+++ b/data/purposeCombined/Directory/mkdir_p.py
@@ -0,0 +1,12 @@
 
				+import os
			
 
				+import errno
			
 
				+
			
 
				+
			
 
				+def mkdir_p(path):
			
 
				+    try:
			
 
				+        os.makedirs(path)
			
 
				+    except OSError as exc:
			
 
				+        if exc.errno == errno.EEXIST and os.path.isdir(path):
			
 
				+            pass
			
 
				+        else:
			
 
				+            raise  
			
--- a/data/purposeCombined/Directory/project_creator.py
+++ b/data/purposeCombined/Directory/project_creator.py
@@ -0,0 +1,80 @@
 
				+############################################################################
			
 
				+##### Transposon Annotator reasonaTE - part of Transposon Ultimate #########
			
 
				+##### Kevin Riehl (kevin.riehl.de@gmail.com, 2021) #########################
			
 
				+############################################################################
			
 
				+
			
 
				+# Imports
			
 
				+from Bio.SeqRecord import SeqRecord
			
 
				+from Bio import SeqIO
			
 
				+from os import path
			
 
				+import os.path
			
 
				+
			
 
				+# Methods
			
 
				+def make_rc_record(record):
			
 
				+    return SeqRecord(seq = record.seq.reverse_complement(), id = record.id, description="")
			
 
				+
			
 
				+def copySequenceClean(fromFile,projectFolderPath):
			
 
				+    # Copy sequence and clean heads
			
 
				+    f1 = open(fromFile,"r")
			
 
				+    f2 = open(os.path.join(projectFolderPath,"sequence.fasta"),"w+")
			
 
				+    f3 = open(os.path.join(projectFolderPath,"sequence_heads.txt"),"w+")
			
 
				+    line = f1.readline()
			
 
				+    counter = 0
			
 
				+    while line!="":
			
 
				+        if(line.startswith(">")):
			
 
				+            counter += 1
			
 
				+            f3.write(">seq"+str(counter)+"\t"+line)
			
 
				+            f2.write(">seq"+str(counter)+"\n")
			
 
				+        else:
			
 
				+            f2.write(line.upper())
			
 
				+        line = f1.readline()
			
 
				+    f1.close()
			
 
				+    f2.close()
			
 
				+    f3.close()
			
 
				+    # Create reverse complement Fasta file
			
 
				+    records = map(make_rc_record, SeqIO.parse(os.path.join(projectFolderPath,"sequence.fasta"), "fasta"))
			
 
				+    SeqIO.write(records, os.path.join(projectFolderPath,"sequence_rc.fasta"), "fasta")
			
 
				+    records = map(make_rc_record, SeqIO.parse(os.path.join(projectFolderPath,"sequence_rc.fasta"), "fasta"))
			
 
				+    SeqIO.write(records, os.path.join(projectFolderPath,"sequence.fasta"), "fasta")
			
 
				+    
			
 
				+def createProject(projectFolder, projectName, inputFasta):
			
 
				+    # Check if project folder exists
			
 
				+    if(not path.isdir(projectFolder)):
			
 
				+        os.mkdir(projectFolder)    
			
 
				+    # Check if given project already exits
			
 
				+    projectFolderPath = os.path.join(projectFolder,projectName)
			
 
				+    if(path.isdir(projectFolderPath)):
			
 
				+        print("Project already exists, process aborted")
			
 
				+        return "EXIT"
			
 
				+    os.mkdir(projectFolderPath)
			
 
				+    # Create folder structure for annotation softwares
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"tirvish"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"tirvish_rc"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"sinescan"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"sinefind"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"sinefind_rc"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"repMasker"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"repeatmodel"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"must"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"mitetracker"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"mitetracker_rc"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"mitefind"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"mitefind_rc"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"ltrPred"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"ltrHarvest"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"helitronScanner"))
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"helitronScanner_rc")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonPSI")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"NCBICDD1000")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"parsedAnnotations")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonCandA")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonCandB")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonCandC")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonCandD")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonCandE")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"transposonCandF")) 
			
 
				+    os.mkdir(os.path.join(projectFolderPath,"finalResults")) 
			
 
				+    # Copy DNA into folder
			
 
				+    copySequenceClean(inputFasta,projectFolderPath)
			
 
				+
			
 
				+#createProject("projects", "testProject", "G:/CambridgeGenData/GenSeq/RHIZIPHAGUS_IRR/rir17contigs.fasta")
			
--- a/data/purposeCombined/Directory/setup.py
+++ b/data/purposeCombined/Directory/setup.py
@@ -0,0 +1,206 @@
 
				+import os
			
 
				+from pathlib import Path
			
 
				+import shutil
			
 
				+import glob
			
 
				+
			
 
				+def setup_folders(num_vincs=6, num_sites=6):
			
 
				+    """
			
 
				+    DESCRIPTION:
			
 
				+    Sets up directory structure for storing plotfiles.
			
 
				+    
			
 
				+    
			
 
				+    CALLING SEQUENCE: 
			
 
				+    setup_folders(num_vincs=6, num_sites=6)
			
 
				+    
			
 
				+    KEYWORDS:
			
 
				+    ## num_vincs: number of velocity increments (default 6; +0-5 km/s)
			
 
				+    ## num_sites: number of specific collision sites (default 6)
			
 
				+    
			
 
				+    
			
 
				+    Directory Structure:
			
 
				+    Plots
			
 
				+        - all_ejecta
			
 
				+            - vincs_separate
			
 
				+                - 0vinc
			
 
				+                    - all_planets
			
 
				+                    - per_planet
			
 
				+                        - cols_v_time
			
 
				+                        - cols_v_time_fits
			
 
				+                        - inc_v_a
			
 
				+                        - e_v_a
			
 
				+                - 1vinc
			
 
				+                - 2vinc
			
 
				+                  ...
			
 
				+                  ...
			
 
				+            - vincs_compared
			
 
				+                - histograms
			
 
				+                - cols_v_time
			
 
				+                - inc_v_a
			
 
				+                - e_v_a
			
 
				+                
			
 
				+        - specific_collision_sites
			
 
				+            - site1
			
 
				+                - vincs_separate
			
 
				+                    - 0vinc
			
 
				+                        - all_planets
			
 
				+                        - per_planet
			
 
				+                            - cols_v_time
			
 
				+                            - cols_v_time_fits
			
 
				+                            - inc_v_a
			
 
				+                            - e_v_a
			
 
				+                    - 1vinc
			
 
				+                    - 2vinc
			
 
				+                      ...
			
 
				+                      ...
			
 
				+                - vincs_compared
			
 
				+                    - histograms
			
 
				+                    - cols_v_time
			
 
				+                    - inc_v_a
			
 
				+                    - e_v_a
			
 
				+            - site2
			
 
				+              ...
			
 
				+              ...
			
 
				+              
			
 
				+        - single_ejecta
			
 
				+            - 0vinc
			
 
				+            - 1vinc
			
 
				+              ...
			
 
				+              ...
			
 
				+    
			
 
				+    """
			
 
				+    
			
 
				+    object_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
			
 
				+    
			
 
				+    parent = os.getcwd()
			
 
				+    plotspath = parent + "/Plots"
			
 
				+    all_ejecta_path = plotspath + "/all_ejecta"
			
 
				+    specific_sites_path = plotspath + "/specific_collision_sites"
			
 
				+    single_ejecta_path = plotspath + "/single_ejecta"
			
 
				+    
			
 
				+    #create Plots directory
			
 
				+    Path(plotspath).mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    
			
 
				+    
			
 
				+    #create all_ejecta folder
			
 
				+    Path(all_ejecta_path).mkdir(parents=True, exist_ok=True)
			
 
				+     
			
 
				+    #populate all_ejecta_folder:
			
 
				+    
			
 
				+    ###1. vincs_separate folder
			
 
				+    Path(all_ejecta_path + "/vincs_separate").mkdir(parents=True, exist_ok=True)
			
 
				+    for i in range(num_vincs):
			
 
				+        
			
 
				+        #make vincs_separate
			
 
				+        vinc_folder = all_ejecta_path + "/vincs_separate/" + str(i) + "vinc"
			
 
				+        Path(vinc_folder).mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        #make all_planets
			
 
				+        Path(vinc_folder + "/all_planets").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(vinc_folder + "/all_planets/inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(vinc_folder + "/all_planets/e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        #make and populate per_planet
			
 
				+        per_p_folder = vinc_folder + "/per_planet"
			
 
				+        Path(per_p_folder).mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/cols_v_time").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/cols_v_time_fits").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/inc_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/e_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+        for o in object_names[1:]:
			
 
				+            Path(per_p_folder + "/inc_v_a/" + o + "_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/e_v_a/" + o + "_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        Path(per_p_folder + "/inc_v_a/remaining_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/e_v_a/remaining_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/inc_v_a/esc_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/e_v_a/esc_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/inc_v_a/mixed_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(per_p_folder + "/e_v_a/mixed_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    
			
 
				+    ###2. vincs_compared folder
			
 
				+    Path(all_ejecta_path + "/vincs_compared").mkdir(parents=True, exist_ok=True)
			
 
				+    Path(all_ejecta_path + "/vincs_compared/histograms").mkdir(parents=True, exist_ok=True)
			
 
				+    Path(all_ejecta_path + "/vincs_compared/cols_v_time").mkdir(parents=True, exist_ok=True)
			
 
				+    Path(all_ejecta_path + "/vincs_compared/inc_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+    Path(all_ejecta_path + "/vincs_compared/e_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    
			
 
				+    
			
 
				+    #create specific_collision_sites folder
			
 
				+    Path(specific_sites_path).mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    #populate specific_collision_sites folder
			
 
				+    for j in range(num_sites):
			
 
				+        
			
 
				+        #folder for each site
			
 
				+        site_path = specific_sites_path + "/site" + str(j) 
			
 
				+        Path(site_path).mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        #1. vincs_separate folder
			
 
				+        for i in range(num_vincs):
			
 
				+        
			
 
				+            #make vincs_separate
			
 
				+            vinc_folder = site_path + "/vincs_separate/" + str(i) + "vinc"
			
 
				+            Path(vinc_folder).mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            #make all_planets
			
 
				+            Path(vinc_folder + "/all_planets").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(vinc_folder + "/all_planets/inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(vinc_folder + "/all_planets/e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            
			
 
				+            #make and populate per_planet
			
 
				+            per_p_folder = vinc_folder + "/per_planet"
			
 
				+            Path(per_p_folder).mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/cols_v_time").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/cols_v_time_fits").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/inc_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/e_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+            for o in object_names[1:]:
			
 
				+                Path(per_p_folder + "/inc_v_a/" + o + "_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+                Path(per_p_folder + "/e_v_a/" + o + "_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/inc_v_a/remaining_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/e_v_a/remaining_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/inc_v_a/esc_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/e_v_a/esc_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/inc_v_a/mixed_inc_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+            Path(per_p_folder + "/e_v_a/mixed_e_v_a_snapshots").mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        ###2. vincs_compared folder
			
 
				+        Path(site_path + "/vincs_compared").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(site_path + "/vincs_compared/histograms").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(site_path + "/vincs_compared/cols_v_time").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(site_path + "/vincs_compared/inc_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+        Path(site_path + "/vincs_compared/e_v_a").mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+    
			
 
				+    #create single_ejecta_path folder
			
 
				+    Path(single_ejecta_path).mkdir(parents=True, exist_ok=True)
			
 
				+    #populate
			
 
				+    for i in range(num_vincs):
			
 
				+        Path(single_ejecta_path + '/' + str(i) + 'vinc').mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        
			
 
				+def sort_data(num_vincs=6):
			
 
				+    """
			
 
				+    DESCRIPTION:
			
 
				+    Sorts data folders in Ejecta_Simulation_Data by vinc.
			
 
				+    
			
 
				+    CALLING SEQUENCE:
			
 
				+    sort_data(num_vincs=6)
			
 
				+    
			
 
				+    KEYWORDS:
			
 
				+    ## num_vincs: number of velocity increments (default 6; +0-5 km/s)
			
 
				+    """
			
 
				+    
			
 
				+    parent = os.getcwd()
			
 
				+    folders = sorted(glob.glob(parent + '/Ejecta_Simulation_Data/5000e*'))
			
 
				+    for i in range(num_vincs):
			
 
				+        Path(parent + '/Ejecta_Simulation_Data/'+str(i)+'vinc').mkdir(parents=True, exist_ok=True)
			
 
				+    for folder in folders:
			
 
				+        vincnum = folder.split('/')[-1].split('_')[2][0]
			
 
				+        shutil.move(folder, parent + '/Ejecta_Simulation_Data/' + str(vincnum) + 'vinc')
			
 
				+    
			
 
				+    
			
 
				+    
			
--- a/data/purposeCombined/Directory/split_data_in_k_folds.py
+++ b/data/purposeCombined/Directory/split_data_in_k_folds.py
@@ -0,0 +1,49 @@
 
				+import os
			
 
				+import shutil
			
 
				+
			
 
				+
			
 
				+def populate_kfold_directories(data_dir, K_FOLDS):
			
 
				+
			
 
				+    alarmed_images = os.listdir(f"{data_dir}/Alarmed")
			
 
				+    annoyed_images = os.listdir(f"{data_dir}/Annoyed")
			
 
				+    curious_images = os.listdir(f"{data_dir}/Curious")
			
 
				+    relaxed_images = os.listdir(f"{data_dir}/Relaxed")
			
 
				+
			
 
				+    for i in range(K_FOLDS):
			
 
				+        validation_range = (i*20, i*20 + 20)
			
 
				+
			
 
				+        for j in range(0, 100):
			
 
				+            if validation_range[0] <= j < validation_range[1]:
			
 
				+                shutil.copy(f"{data_dir}/Alarmed/{alarmed_images[j]}", f"folds/fold{i}/validation/Alarmed/")
			
 
				+                shutil.copy(f"{data_dir}/Annoyed/{annoyed_images[j]}", f"folds/fold{i}/validation/Annoyed/")
			
 
				+                shutil.copy(f"{data_dir}/Curious/{curious_images[j]}", f"folds/fold{i}/validation/Curious/")
			
 
				+                shutil.copy(f"{data_dir}/Relaxed/{relaxed_images[j]}", f"folds/fold{i}/validation/Relaxed/")
			
 
				+            else:
			
 
				+                shutil.copy(f"{data_dir}/Alarmed/{alarmed_images[j]}", f"folds/fold{i}/train/Alarmed/")
			
 
				+                shutil.copy(f"{data_dir}/Annoyed/{annoyed_images[j]}", f"folds/fold{i}/train/Annoyed/")
			
 
				+                shutil.copy(f"{data_dir}/Curious/{curious_images[j]}", f"folds/fold{i}/train/Curious/")
			
 
				+                shutil.copy(f"{data_dir}/Relaxed/{relaxed_images[j]}", f"folds/fold{i}/train/Relaxed/")
			
 
				+
			
 
				+
			
 
				+def create_kfold_directories(K_FOLDS):
			
 
				+
			
 
				+    try:
			
 
				+        os.mkdir("folds")
			
 
				+    except:
			
 
				+        print("Directory 'folds' already exists")
			
 
				+
			
 
				+    for i in range(K_FOLDS):
			
 
				+        try:
			
 
				+            os.mkdir(f"folds/fold{i}/")
			
 
				+            os.mkdir(f"folds/fold{i}/train")
			
 
				+            os.mkdir(f"folds/fold{i}/validation")
			
 
				+            os.mkdir(f"folds/fold{i}/train/Alarmed")
			
 
				+            os.mkdir(f"folds/fold{i}/train/Annoyed")
			
 
				+            os.mkdir(f"folds/fold{i}/train/Curious")
			
 
				+            os.mkdir(f"folds/fold{i}/train/Relaxed")
			
 
				+            os.mkdir(f"folds/fold{i}/validation/Alarmed")
			
 
				+            os.mkdir(f"folds/fold{i}/validation/Annoyed")
			
 
				+            os.mkdir(f"folds/fold{i}/validation/Curious")
			
 
				+            os.mkdir(f"folds/fold{i}/validation/Relaxed")
			
 
				+        except:
			
 
				+            print("Can't create directory because it already exists")
			
--- a/data/purposeCombined/Directory/stc_vid2frames.py
+++ b/data/purposeCombined/Directory/stc_vid2frames.py
@@ -0,0 +1,80 @@
 
				+import sys
			
 
				+import os
			
 
				+import numpy as np
			
 
				+import shutil
			
 
				+import argparse
			
 
				+import torch
			
 
				+import torchvision
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(add_help=True)
			
 
				+    parser.add_argument('--dataroot',
			
 
				+                        default='.',
			
 
				+                        help='Dataset root directory')
			
 
				+    parser.add_argument('--src_vid_path', default='archive/training/videos/',
			
 
				+                        help='Name of folder where `avi` files exist')
			
 
				+    parser.add_argument('--tar_vid_frame_path', default='converted/train',
			
 
				+                        help='Name of folder to save extracted frames.')
			
 
				+    parser.add_argument('--src_npy_path', default='archive/test_pixel_mask/',
			
 
				+                        help='Name of folder where `npy` frame mask exist')
			
 
				+    parser.add_argument('--tar_anno_path', default='converted/pixel_mask',
			
 
				+                        help='Name of folder to save extracted frame annotation')
			
 
				+    parser.add_argument('--extension', default='jpg',
			
 
				+                        help="File extension format for the output image")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    src_dir = os.path.join(args.dataroot, args.src_vid_path)
			
 
				+    tar_dir = os.path.join(args.dataroot, args.tar_vid_frame_path)
			
 
				+
			
 
				+    try:
			
 
				+        os.makedirs(tar_dir)
			
 
				+    except FileExistsError:
			
 
				+        print(F'{tar_dir} already exists, remove whole tree and recompose ...')
			
 
				+        shutil.rmtree(tar_dir)
			
 
				+        os.makedirs(tar_dir)
			
 
				+
			
 
				+    vid_list = os.listdir(src_dir)
			
 
				+
			
 
				+    for i, vidname in enumerate(tqdm(vid_list)):
			
 
				+        vid = torchvision.io.read_video(os.path.join(src_dir, vidname), pts_unit='sec')[0]
			
 
				+        target_folder = os.path.join(tar_dir, vidname[:-4])
			
 
				+   
			
 
				+        try: 
			
 
				+            os.makedirs(target_folder)
			
 
				+        except FileExistsError:
			
 
				+            print(F'{target_folder} already exists, remove the directory recompose ...')
			
 
				+            shutil.rmtree(target_folder)
			
 
				+            os.makedirs(target_folder) 
			
 
				+            
			
 
				+        for i, frame in enumerate(vid):
			
 
				+            frame = (frame / 255.).permute(2, 0, 1) #HWC2CHW
			
 
				+            torchvision.utils.save_image(frame,
			
 
				+                                         F'{target_folder}/{i:03}.{args.extension}') 
			
 
				+    
			
 
				+    src_dir = os.path.join(args.dataroot, args.src_npy_path)    
			
 
				+    tar_dir = os.path.join(args.dataroot, args.tar_anno_path)
			
 
				+
			
 
				+    try:
			
 
				+        os.makedirs(tar_dir)
			
 
				+    except FileExistsError:
			
 
				+        print(F"{tar_dir} already exists, remove whole tree and recompose ...")
			
 
				+        shutil.rmtree(tar_dir)
			
 
				+        os.makedirs(tar_dir)
			
 
				+
			
 
				+    frame_anno = os.listdir(src_dir)
			
 
				+
			
 
				+    for _f in tqdm(frame_anno):
			
 
				+        fn = _f[:-4]
			
 
				+        target_folder = os.path.join(tar_dir, fn)
			
 
				+        os.makedirs(target_folder)
			
 
				+        px_anno = np.load(F"{src_dir}/{fn}.npy").astype(np.float)
			
 
				+
			
 
				+        for i, px_frame in enumerate(px_anno):
			
 
				+            torchvision.utils.save_image(torch.from_numpy(px_frame).unsqueeze(0), # CHW, 1 channel
			
 
				+                                         F"{target_folder}/{i:03}.{args.extension}")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/data/purposeCombined/Directory/test_archive.py
+++ b/data/purposeCombined/Directory/test_archive.py
@@ -0,0 +1,197 @@
 
				+## Copyright (c) 2012 Aldebaran Robotics. All rights reserved.
			
 
				+## Use of this source code is governed by a BSD-style license that can be
			
 
				+## found in the COPYING file.
			
 
				+
			
 
				+"""Automatic testing for handling archives
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import stat
			
 
				+import errno
			
 
				+import unittest
			
 
				+import tempfile
			
 
				+
			
 
				+import qibuild
			
 
				+
			
 
				+class ArchiveTestCase(unittest.TestCase):
			
 
				+    def setUp(self):
			
 
				+        self.tmp = tempfile.mkdtemp(prefix="tmp-archive-test")
			
 
				+
			
 
				+    def tearDown(self):
			
 
				+        qibuild.sh.rm(self.tmp)
			
 
				+
			
 
				+    def test_zip_extract(self):
			
 
				+        # Create some files in the temp dir:
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        # Create a empty dir called a, and two files named
			
 
				+        # b and c
			
 
				+        a = os.path.join(src, "a")
			
 
				+        os.mkdir(a)
			
 
				+        b = os.path.join(a, "b")
			
 
				+        with open(b, "w") as fp:
			
 
				+            fp.write("b\n")
			
 
				+        c = os.path.join(a, "c")
			
 
				+        with open(c, "w") as fp:
			
 
				+            fp.write("c\n")
			
 
				+        archive = qibuild.archive.zip(a)
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        qibuild.archive.extract(archive, dest)
			
 
				+        ls_r = qibuild.sh.ls_r(dest)
			
 
				+        self.assertEquals(ls_r, ["a/b", "a/c"])
			
 
				+
			
 
				+    def test_zip_extract_ro(self):
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        # Create a empty dir called a, and two files named
			
 
				+        # b and c
			
 
				+        a = os.path.join(src, "a")
			
 
				+        os.mkdir(a)
			
 
				+        ro = os.path.join(a, "ro")
			
 
				+        with open(ro, "w") as fp:
			
 
				+            fp.write("ro\n")
			
 
				+        # 200:
			
 
				+        os.chmod(ro, stat.S_IRUSR)
			
 
				+        archive = qibuild.archive.zip(a)
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        qibuild.archive.extract(archive, dest)
			
 
				+        ls_r = qibuild.sh.ls_r(dest)
			
 
				+        self.assertEquals(ls_r, ["a/ro"])
			
 
				+        dest_ro = os.path.join(dest, "a", "ro")
			
 
				+        # check that the dest is readonly:
			
 
				+        error = None
			
 
				+        try:
			
 
				+            open(dest_ro, "w")
			
 
				+        except IOError as e:
			
 
				+            error = e
			
 
				+        self.assertFalse(error is None)
			
 
				+        self.assertEquals(error.errno,  errno.EACCES)
			
 
				+
			
 
				+    def test_zip_extract_ro_dir(self):
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        ro1 = os.path.join(src, "ro1")
			
 
				+        os.mkdir(ro1)
			
 
				+        ro2 = os.path.join(ro1, "ro2")
			
 
				+        os.mkdir(ro2)
			
 
				+        a = os.path.join(ro2, "a")
			
 
				+        with open(a, "w") as fp:
			
 
				+            fp.write("a\n")
			
 
				+        # RO dir inside an other RO dir
			
 
				+        os.chmod(ro2, stat.S_IRUSR | stat.S_IXUSR)
			
 
				+        os.chmod(ro1, stat.S_IRUSR | stat.S_IXUSR)
			
 
				+        archive = qibuild.archive.zip(src)
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        qibuild.archive.extract(archive, dest)
			
 
				+        ls_r = qibuild.sh.ls_r(dest)
			
 
				+        self.assertEquals(ls_r, ["src/ro1/ro2/a"])
			
 
				+
			
 
				+    def test_extract_preserve_executables_from_zip(self):
			
 
				+        zip = qibuild.command.find_program("zip")
			
 
				+        if not zip:
			
 
				+            return
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        a_exe = os.path.join(src, "a.exe")
			
 
				+        with open(a_exe, "w") as fp:
			
 
				+            fp.write("a_exe\n")
			
 
				+        st_700 = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
			
 
				+        os.chmod(a_exe, st_700)
			
 
				+        qibuild.command.call(["zip", "-r", "src.zip", "src"],
			
 
				+            cwd=self.tmp)
			
 
				+        archive = os.path.join(self.tmp, "src.zip")
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        qibuild.archive.extract_zip(archive, dest)
			
 
				+        dest_exe = os.path.join(dest, "src", "a.exe")
			
 
				+        st_mode = os.stat(dest_exe).st_mode
			
 
				+        self.assertEquals(st_mode, 100700)
			
 
				+
			
 
				+    def test_extract_change_topdir(self):
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        a_long_dir = os.path.join(src, "a_long_dir")
			
 
				+        os.mkdir(a_long_dir)
			
 
				+        b = os.path.join(a_long_dir, "b")
			
 
				+        with open(b, "w") as fp:
			
 
				+            fp.write("b\n")
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        tar_gz = qibuild.archive.zip_unix(a_long_dir)
			
 
				+        qibuild.archive.extract(tar_gz, dest, topdir="a")
			
 
				+        a = os.path.join(dest, "a")
			
 
				+        ls_r = qibuild.sh.ls_r(a)
			
 
				+        self.assertEquals(ls_r, ["b"])
			
 
				+        a_zip = qibuild.archive.zip_win(a_long_dir)
			
 
				+        qibuild.archive.extract(a_zip, dest, topdir="aa")
			
 
				+        aa = os.path.join(dest, "aa")
			
 
				+        ls_r = qibuild.sh.ls_r(aa)
			
 
				+        self.assertEquals(ls_r, ["b"])
			
 
				+
			
 
				+    def test_extract_change_topdir_already_correct(self):
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        a_dir = os.path.join(src, "a")
			
 
				+        os.mkdir(a_dir)
			
 
				+        tar_gz = qibuild.archive.zip_unix(a_dir)
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        qibuild.archive.extract(tar_gz, dest, topdir="a")
			
 
				+        ls_r = qibuild.sh.ls_r(dest)
			
 
				+        self.assertEquals(ls_r, ["a/"])
			
 
				+
			
 
				+    def test_extract_with_symlink(self):
			
 
				+        if sys.platform.startswith("win"):
			
 
				+            return
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        a_dir = os.path.join(src, "a_dir")
			
 
				+        os.mkdir(a_dir)
			
 
				+        a_file = os.path.join(a_dir, "a_file")
			
 
				+        with open(a_file, "w") as fp:
			
 
				+            fp.write("a_file\n")
			
 
				+        a_link = os.path.join(a_dir, "a_link")
			
 
				+        os.symlink("a_file", a_link)
			
 
				+        tar_gz = qibuild.archive.zip_unix(a_dir)
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        qibuild.archive.extract(tar_gz, dest)
			
 
				+        ls_r = qibuild.sh.ls_r(dest)
			
 
				+        self.assertEquals(ls_r,
			
 
				+            ['a_dir/a_file', 'a_dir/a_link'])
			
 
				+        dest_link = os.path.join(dest, "a_dir", "a_link")
			
 
				+        self.assertTrue(os.path.islink(dest_link))
			
 
				+        dest_target = os.readlink(dest_link)
			
 
				+        self.assertEquals(dest_target, "a_file")
			
 
				+
			
 
				+    def test_extract_with_symlink_and_change_topdir(self):
			
 
				+        if sys.platform.startswith("win"):
			
 
				+            return
			
 
				+        src = os.path.join(self.tmp, "src")
			
 
				+        os.mkdir(src)
			
 
				+        a_long_dir = os.path.join(src, "a_long_dir")
			
 
				+        os.mkdir(a_long_dir)
			
 
				+        a_file = os.path.join(a_long_dir, "a_file")
			
 
				+        with open(a_file, "w") as fp:
			
 
				+            fp.write("a_file\n")
			
 
				+        a_link = os.path.join(a_long_dir, "a_link")
			
 
				+        os.symlink("a_file", a_link)
			
 
				+        tar_gz = qibuild.archive.zip_unix(a_long_dir)
			
 
				+        dest = os.path.join(self.tmp, "dest")
			
 
				+        os.mkdir(dest)
			
 
				+        qibuild.archive.extract(tar_gz, dest, topdir="a_dir")
			
 
				+        ls_r = qibuild.sh.ls_r(dest)
			
 
				+        self.assertEquals(ls_r,
			
 
				+            ['a_dir/a_file', 'a_dir/a_link'])
			
 
				+        dest_link = os.path.join(dest, "a_dir", "a_link")
			
 
				+        self.assertTrue(os.path.islink(dest_link))
			
 
				+        dest_target = os.readlink(dest_link)
			
 
				+        self.assertEquals(dest_target, "a_file")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    unittest.main() 
			
--- a/data/purposeCombined/Directory/test_tool.py
+++ b/data/purposeCombined/Directory/test_tool.py
@@ -0,0 +1,306 @@
 
				+import unittest
			
 
				+from unittest.mock import patch
			
 
				+import os
			
 
				+import shutil
			
 
				+from programy.admin.tool import AdminTool
			
 
				+
			
 
				+
			
 
				+class MockAdminTool(AdminTool):
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        AdminTool.__init__(self)
			
 
				+        self.text = ""
			
 
				+
			
 
				+    def display(self, text):
			
 
				+        self.text += text
			
 
				+
			
 
				+
			
 
				+class AdminToolTests(unittest.TestCase):
			
 
				+
			
 
				+    def get_temp_dir(self):
			
 
				+        if os.name == 'posix':
			
 
				+            return '/tmp'
			
 
				+        elif os.name == 'nt':
			
 
				+            import tempfile
			
 
				+            return tempfile.gettempdir()
			
 
				+        else:
			
 
				+            raise Exception("Unknown operating system [%s]" % os.name)
			
 
				+
			
 
				+    def create_file(self, filename):
			
 
				+        with open(filename, "w+") as file:
			
 
				+            file.writelines(["line1", "line2", "line3"])
			
 
				+            file.flush()
			
 
				+            file.close()
			
 
				+
			
 
				+    def test_recursive_copy(self):
			
 
				+        tmp_dir = self.get_temp_dir() + os.sep +"programy"
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+        os.mkdir(tmp_dir)
			
 
				+
			
 
				+        src_dir =  tmp_dir + os.sep + "src"
			
 
				+        os.mkdir(src_dir)
			
 
				+        src_sub_dir = tmp_dir + os.sep + "src" + os.sep + "sub"
			
 
				+        os.mkdir(src_sub_dir)
			
 
				+        src_sub_dir2 = tmp_dir + os.sep + "src" + os.sep + "sub2"
			
 
				+        os.mkdir(src_sub_dir2)
			
 
				+        dest_dir = tmp_dir + os.sep + "dest"
			
 
				+        os.mkdir(dest_dir)
			
 
				+
			
 
				+        self.create_file(src_dir + os.sep + "file1.txt")
			
 
				+        self.create_file(src_dir + os.sep + "file2.txt")
			
 
				+        self.create_file(src_dir + os.sep + "file3.txt")
			
 
				+        self.create_file(src_dir + os.sep + "sub" + os.sep + "file4.txt")
			
 
				+
			
 
				+        AdminTool.recursive_copy(src_dir, dest_dir)
			
 
				+
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "file1.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "file2.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "file3.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "sub" + os.sep + "file4.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "sub2"))
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+    def test_delete_folder_contents(self):
			
 
				+        tmp_dir = self.get_temp_dir() + os.sep +"programy"
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+        os.mkdir(tmp_dir)
			
 
				+
			
 
				+        src_dir =  tmp_dir + os.sep + "src"
			
 
				+        os.mkdir(src_dir)
			
 
				+        src_sub_dir = tmp_dir + os.sep + "src" + os.sep + "sub"
			
 
				+        os.mkdir(src_sub_dir)
			
 
				+        dest_dir = tmp_dir + os.sep + "dest"
			
 
				+        os.mkdir(dest_dir)
			
 
				+
			
 
				+        self.create_file(src_dir + os.sep + "file1.txt")
			
 
				+        self.create_file(src_dir + os.sep + "file2.txt")
			
 
				+        self.create_file(src_dir + os.sep + "file3.txt")
			
 
				+        self.create_file(src_dir + os.sep + "sub" + os.sep + "file4.txt")
			
 
				+
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "file1.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "file2.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "file3.txt"))
			
 
				+        self.assertTrue(os.path.exists(src_dir + os.sep + "sub" + os.sep + "file4.txt"))
			
 
				+
			
 
				+        AdminTool.delete_folder_contents(tmp_dir)
			
 
				+
			
 
				+        self.assertFalse(os.path.exists(src_dir + os.sep + "file1.txt"))
			
 
				+        self.assertFalse(os.path.exists(src_dir + os.sep + "file2.txt"))
			
 
				+        self.assertFalse(os.path.exists(src_dir + os.sep + "file3.txt"))
			
 
				+        self.assertFalse(os.path.exists(src_dir + os.sep + "sub" + os.sep + "file4.txt"))
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+    def test_make_executable(self):
			
 
				+        tmp_dir = self.get_temp_dir() + os.sep +"programy"
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+        os.mkdir(tmp_dir)
			
 
				+
			
 
				+        filepath = tmp_dir + os.sep + "file1.txt"
			
 
				+        self.create_file(filepath)
			
 
				+
			
 
				+        self.assertTrue(os.path.exists(filepath))
			
 
				+
			
 
				+        AdminTool.make_executable(filepath)
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+    def test_make_all_executable(self):
			
 
				+        tmp_dir = self.get_temp_dir() + os.sep +"programy"
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+        os.mkdir(tmp_dir)
			
 
				+
			
 
				+        src_dir =  tmp_dir + os.sep + "src"
			
 
				+        os.mkdir(src_dir)
			
 
				+        src_sub_dir = tmp_dir + os.sep + "src" + os.sep + "sub"
			
 
				+        os.mkdir(src_sub_dir)
			
 
				+        dest_dir = tmp_dir + os.sep + "dest"
			
 
				+        os.mkdir(dest_dir)
			
 
				+
			
 
				+        self.create_file(src_dir + os.sep + "file1.txt")
			
 
				+        self.create_file(src_dir + os.sep + "file2.txt")
			
 
				+        self.create_file(src_dir + os.sep + "file3.txt")
			
 
				+        self.create_file(src_dir + os.sep + "sub" + os.sep + "file4.txt")
			
 
				+
			
 
				+        AdminTool.make_all_executable(tmp_dir)
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+    def test_list_bots(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEqual("", tool.text)
			
 
				+
			
 
				+        tool.list_bots()
			
 
				+
			
 
				+        self.assertEquals("""Available bots are:
			
 
				+	alice2-y	professor-y	rosie-y	talk-y	y-bot	servusai-y	template-y	traintimes-y
			
 
				+	To download use 'python3 -m programy.admin.tool download <bot-name>'
			
 
				+Additional components are:
			
 
				+	textblob
			
 
				+	To install use 'python3 -m programy.admin.tool install <component>'""", tool.text)
			
 
				+
			
 
				+    def patch_wget_download(self, url):
			
 
				+        return "mock.bot"
			
 
				+
			
 
				+    @patch("programy.admin.tool.AdminTool.wget_download", patch_wget_download)
			
 
				+    def test_download_bot(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEqual("", tool.text)
			
 
				+
			
 
				+        filename = tool.download_bot("y-bot")
			
 
				+        self.assertEqual("mock.bot", filename)
			
 
				+
			
 
				+        self.assertEqual("""Downloading [y-bot] from [https://github.com/keiffster/y-bot/archive/master.zip]
			
 
				+Download complete""", tool.text)
			
 
				+
			
 
				+    def test_zip_dir_name_from_filename(self):
			
 
				+        self.assertEqual("filename", AdminTool.zip_dir_name_from_filename('filename.zip'))
			
 
				+        self.assertEqual("filename", AdminTool.zip_dir_name_from_filename('filename'))
			
 
				+
			
 
				+    def test_extract_bot_no_remove(self):
			
 
				+        tool = AdminTool()
			
 
				+
			
 
				+        tmp_dir = self.get_temp_dir() + os.sep +"programy"
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+        os.mkdir(tmp_dir)
			
 
				+        shutil.copyfile(os.path.dirname(__file__) + os.sep + "bot.zip", tmp_dir + os.sep + "bot.zip")
			
 
				+
			
 
				+        tool.extract_bot(tmp_dir + os.sep + "bot.zip", path=tmp_dir, remove_after=False)
			
 
				+
			
 
				+        self.assertTrue(os.path.exists(tmp_dir + os.sep + "bot.zip"))
			
 
				+        self.assertTrue(os.path.exists(tmp_dir + os.sep + "test1.txt"))
			
 
				+        self.assertTrue(os.path.exists(tmp_dir + os.sep + "test2.txt"))
			
 
				+
			
 
				+        shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+    def test_extract_bot_with_remove(self):
			
 
				+        tool = AdminTool()
			
 
				+
			
 
				+        tmp_dir = self.get_temp_dir() + os.sep +"programy"
			
 
				+
			
 
				+        if os.path.exists(tmp_dir):
			
 
				+            shutil.rmtree(tmp_dir)
			
 
				+        os.mkdir(tmp_dir)
			
 
				+        shutil.copyfile(os.path.dirname(__file__) + os.sep + "bot.zip", tmp_dir + os.sep + "bot.zip")
			
 
				+
			
 
				+        tool.extract_bot(tmp_dir + os.sep + "bot.zip", path=tmp_dir, remove_after=True)
			
 
				+
			
 
				+        self.assertFalse(os.path.exists(tmp_dir + os.sep + "bot.zip"))
			
 
				+        self.assertTrue(os.path.exists(tmp_dir + os.sep + "test1.txt"))
			
 
				+        self.assertTrue(os.path.exists(tmp_dir + os.sep + "test2.txt"))
			
 
				+
			
 
				+        shutil.rmtree(tmp_dir)
			
 
				+
			
 
				+    def patch_download_and_make_active(self, bot_name):
			
 
				+        pass # Do nothing
			
 
				+
			
 
				+    @patch("programy.admin.tool.AdminTool.download_and_make_active", patch_download_and_make_active)
			
 
				+    def test_install_bot(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEquals("", tool.text)
			
 
				+
			
 
				+        tool.install_bot(["test", "y-bot"])
			
 
				+        self.assertEqual("""
			
 
				+To run y-bot bot in console mode, use the following commands
			
 
				+\tcd scripts/xnix\t./y-bot.sh""", tool.text)
			
 
				+
			
 
				+    def test_install_bot_unknown(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEquals("", tool.text)
			
 
				+
			
 
				+        with self.assertRaises(Exception):
			
 
				+            tool.install_bot(["test", "unknown"])
			
 
				+
			
 
				+    def patch_install_textblob(self):
			
 
				+        pass # Do nothing
			
 
				+
			
 
				+    @patch("programy.admin.tool.AdminTool.install_textblob", patch_install_textblob)
			
 
				+    def test_install_additional(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEquals("", tool.text)
			
 
				+
			
 
				+        tool.install_additional(["test", "textblob"])
			
 
				+        self.assertEqual("Installing additional components for textblob", tool.text)
			
 
				+
			
 
				+    def test_install_additional_invalid(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEquals("", tool.text)
			
 
				+
			
 
				+        with self.assertRaises(Exception):
			
 
				+            tool.install_additional(["test", "xxxxxxx"])
			
 
				+
			
 
				+    def test_show_execute_help(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEqual("", tool.text)
			
 
				+
			
 
				+        tool.show_execute_help("y-bot")
			
 
				+
			
 
				+        self.assertEqual("""
			
 
				+To run y-bot bot in console mode, use the following commands
			
 
				+\tcd scripts/xnix\t./y-bot.sh""", tool.text)
			
 
				+
			
 
				+    def test_show_help(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        self.assertEqual("", tool.text)
			
 
				+
			
 
				+        tool.show_help()
			
 
				+
			
 
				+        self.assertEqual("""Available commands are:
			
 
				+\thelp	list	download <bot-name>	install <component>""", tool.text)
			
 
				+
			
 
				+    def test_run_no_words(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run([])
			
 
				+        self.assertIsNotNone(tool.text)
			
 
				+        self.assertTrue(tool.text.startswith("Available commands are:"))
			
 
				+
			
 
				+    def test_run_unknown_primary_command(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run(['unknown'])
			
 
				+        self.assertIsNotNone(tool.text)
			
 
				+        self.assertTrue(tool.text.startswith("Unknown primary command [unknown]"))
			
 
				+
			
 
				+    def test_run_missing_bot_name(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run(['download'])
			
 
				+        self.assertIsNotNone(tool.text)
			
 
				+        self.assertTrue(tool.text.startswith("Missing bot name from download command"))
			
 
				+
			
 
				+    def test_run_list(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run(['list'])
			
 
				+        self.assertIsNotNone(tool.text)
			
 
				+
			
 
				+    def test_run_download(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run(['download'])
			
 
				+        self.assertIsNotNone(tool.text)
			
 
				+
			
 
				+    def test_run_install(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run(['install'])
			
 
				+        self.assertIsNotNone(tool.text)
			
 
				+
			
 
				+    def test_run_help(self):
			
 
				+        tool = MockAdminTool()
			
 
				+        tool.run(['help'])
			
 
				+        self.assertIsNotNone(tool.text)
			
--- a/data/purposeCombined/Directory/tutorial.py
+++ b/data/purposeCombined/Directory/tutorial.py
@@ -0,0 +1,272 @@
 
				+import csv
			
 
				+import os
			
 
				+import re
			
 
				+import shutil
			
 
				+
			
 
				+def del_create_analytics_folder():
			
 
				+    # del the analytics folder including subfolder
			
 
				+    # mkdir the analytics folder (only mkdir)
			
 
				+    if os.path.exists('analytics'):
			
 
				+        shutil.rmtree('analytics')
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+
			
 
				+def course():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    if os.path.exists('analytics/course'):
			
 
				+        shutil.rmtree('analytics/course')
			
 
				+    d = {'01':'btech',
			
 
				+    '11':'mtech',
			
 
				+    '21':'phd',
			
 
				+    '12':'msc'}
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/course'):
			
 
				+            os.mkdir('analytics/course')
			
 
				+        for row in reader:
			
 
				+            if len(row)==0:
			
 
				+                print(1)
			
 
				+                continue
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            stream = str(row['id'][-4:-2]).lower()
			
 
				+            yr = str(row['id'][:2])
			
 
				+            if str(row['id'][2:4]) in list(d.keys()):
			
 
				+                degree = d[str(row['id'][2:4])]
			
 
				+            else:
			
 
				+                with open('analytics/course/' + 'misc.csv' , mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/course/' + 'misc.csv')==0:
			
 
				+                        f_write.writerow(head)
			
 
				+                    f_write.writerow(l)
			
 
				+                f.close()
			
 
				+                continue
			
 
				+            csv_name = f'{yr}_{stream}_{degree}.csv'
			
 
				+            p = re.compile(r'\d\d\d\d\D\D\d\d')
			
 
				+            k = re.fullmatch(p,row['id'])
			
 
				+            if k:
			
 
				+                if not os.path.exists('analytics/course/'+ stream):
			
 
				+                    os.mkdir('analytics/course/'+ stream) 
			
 
				+                if not os.path.exists('analytics/course/'+ stream + '/' + degree):
			
 
				+                    os.mkdir('analytics/course/'+ stream + '/' + degree ) 
			
 
				+                with open('analytics/course/'+ stream + '/' + degree + '/' + csv_name , mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/course/'+ stream + '/' + degree + '/' + csv_name)==0:
			
 
				+                        f_write.writerow(head)
			
 
				+                    f_write.writerow(l)
			
 
				+                f.close()
			
 
				+            else:
			
 
				+                with open('analytics/course/' + 'misc.csv' , mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/course/' + 'misc.csv')==0:
			
 
				+                        f_write.writerow(head)
			
 
				+                    f_write.writerow(l)
			
 
				+                f.close()
			
 
				+    csvfile.close()
			
 
				+
			
 
				+
			
 
				+def country():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')    
			
 
				+    if os.path.exists('analytics/country'):
			
 
				+        shutil.rmtree('analytics/country')    
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/country'):
			
 
				+            os.mkdir('analytics/country')
			
 
				+        for row in reader:
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            with open('analytics/country/'+row['country'].lower()+ '.csv', mode = 'a') as f:
			
 
				+                f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                if os.path.getsize('analytics/country/'+row['country'].lower() + '.csv')==0:
			
 
				+                  f_write.writerow(head) 
			
 
				+                f_write.writerow(l)
			
 
				+            f.close()
			
 
				+    csvfile.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def email_domain_extract():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    if os.path.exists('analytics/email'):
			
 
				+        shutil.rmtree('analytics/email')
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/email'):
			
 
				+            os.mkdir('analytics/email')
			
 
				+        for row in reader:
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            if '@' in row['email'] and '.' in row['email']:
			
 
				+                domain = row['email'].split('@')[1].split('.')[0]
			
 
				+                with open('analytics/email/'+domain+ '.csv', mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/email/'+ domain + '.csv')==0:
			
 
				+                        f_write.writerow(head) 
			
 
				+                    f_write.writerow(l)
			
 
				+                f.close()
			
 
				+
			
 
				+            else:
			
 
				+                with open('analytics/email/'+'misc'+ '.csv', mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/email/'+ domain + '.csv')==0:
			
 
				+                        f_write.writerow(head) 
			
 
				+                    f_write.writerow(l)
			
 
				+                f.close()
			
 
				+    csvfile.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def gender():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    if os.path.exists('analytics/gender'):
			
 
				+        shutil.rmtree('analytics/gender')
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/gender'):
			
 
				+            os.mkdir('analytics/gender')
			
 
				+        for row in reader:
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            gender = row['gender'].lower()
			
 
				+            with open('analytics/gender/'+gender+ '.csv', mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/gender/'+ gender + '.csv')==0:
			
 
				+                        f_write.writerow(head) 
			
 
				+                    f_write.writerow(l)
			
 
				+            f.close()
			
 
				+    csvfile.close()
			
 
				+
			
 
				+
			
 
				+def dob():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    if os.path.exists('analytics/dob'):
			
 
				+        shutil.rmtree('analytics/dob')
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/dob'):
			
 
				+            os.mkdir('analytics/dob')
			
 
				+        for row in reader:
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            x = str(re.sub(r"\D","-",row['dob']))
			
 
				+            yr = int(x.split('-')[-1])
			
 
				+            k = int(yr)%10
			
 
				+            if k>4:
			
 
				+                name = 'bday_' + str(yr - k + 5) + '_' + str(yr - k + 9)
			
 
				+            else:
			
 
				+                name = 'bday_' + str(yr - k ) + '_' + str(yr - k + 4)
			
 
				+            if yr > 2014:
			
 
				+                name = 'bday_2015_2020'
			
 
				+            with open('analytics/dob/'+name+ '.csv', mode = 'a') as f:
			
 
				+                    f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                    if os.path.getsize('analytics/dob/'+name+ '.csv')==0:
			
 
				+                        f_write.writerow(head) 
			
 
				+                    f_write.writerow(l)
			
 
				+            f.close()
			
 
				+        
			
 
				+
			
 
				+
			
 
				+
			
 
				+def state():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    if os.path.exists('analytics/state'):
			
 
				+        shutil.rmtree('analytics/state')
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/state'):
			
 
				+            os.mkdir('analytics/state')
			
 
				+        for row in reader:
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            with open('analytics/state/'+row['state'].lower()+ '.csv', mode = 'a') as f:
			
 
				+                f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                if os.path.getsize('analytics/state/'+row['state'].lower() + '.csv')==0:
			
 
				+                  f_write.writerow(head) 
			
 
				+                f_write.writerow(l)
			
 
				+            f.close()
			
 
				+    csvfile.close()
			
 
				+
			
 
				+
			
 
				+def blood_group():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    if os.path.exists('analytics/blood_group'):
			
 
				+        shutil.rmtree('analytics/blood_group')
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)
			
 
				+        if not os.path.exists('analytics/blood_group'):
			
 
				+            os.mkdir('analytics/blood_group')
			
 
				+        for row in reader:
			
 
				+            l = list(row.values())
			
 
				+            head = list(row.keys())
			
 
				+            with open('analytics/blood_group/'+row['blood_group']+ '.csv', mode = 'a') as f:
			
 
				+                f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+                if os.path.getsize('analytics/blood_group/'+row['blood_group'] + '.csv')==0:
			
 
				+                    f_write.writerow(head) 
			
 
				+                f_write.writerow(l)
			
 
				+            f.close()
			
 
				+    csvfile.close()
			
 
				+
			
 
				+
			
 
				+# Create the new file here and also sort it in this function only.
			
 
				+def new_file_sort():
			
 
				+    if not os.path.exists('analytics'):
			
 
				+        os.mkdir('analytics')
			
 
				+    new = []
			
 
				+    head = []
			
 
				+    with open('studentinfo_cs384.csv', newline='') as csvfile:
			
 
				+        reader = csv.DictReader(csvfile)       
			
 
				+        for row in reader:
			
 
				+            head = list(row.keys())
			
 
				+            del head[1]
			
 
				+            head.insert(1,'first_name')
			
 
				+            head.insert(2,'last_name')
			
 
				+            k = list(row.values())
			
 
				+            del k[1]
			
 
				+            k.insert(1,row['full_name'].split()[0])
			
 
				+            k.insert(2,' '.join(row['full_name'].split()[1:]))
			
 
				+            new.append(k)
			
 
				+    csvfile.close()
			
 
				+    with open('analytics/studentinfo_cs384_names_split.csv', newline='',mode='w') as f:
			
 
				+        f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+        f_write.writerow(head)
			
 
				+        for i in new:
			
 
				+            f_write.writerow(i)
			
 
				+    f.close()
			
 
				+    #sorting
			
 
				+    dic = {}
			
 
				+    for i in new:
			
 
				+        dic[i[1]]='#$%^&*'.join(i)
			
 
				+    new = []
			
 
				+    with open('analytics/studentinfo_cs384_names_split_sorted_first_name.csv', mode = 'w') as f:
			
 
				+        print
			
 
				+    f.close()
			
 
				+    for i in sorted(dic.items()):
			
 
				+        new.append(i[1].split('#$%^&*'))
			
 
				+    with open('analytics/studentinfo_cs384_names_split_sorted_first_name.csv', mode = 'a') as f:
			
 
				+        f_write = csv.writer(f, delimiter=',',lineterminator='\r')
			
 
				+        f_write.writerow(head)
			
 
				+        for i in new:
			
 
				+            f_write.writerow(i)
			
 
				+    f.close()
			
 
				+
			
 
				+#if __name__ == "__main__":
			
 
				+#     del_create_analytics_folder()
			
 
				+#     course()
			
 
				+#     blood_group()
			
 
				+#     new_file_sort()s
			
 
				+#     state()
			
 
				+#     email_domain_extract()
			
 
				+#     state()
			
 
				+#     gender()
			
 
				+ #   dob()
		`@@ -0,0 +1 @@`
		`+Subproject commit ac4dbd83e307a5b8d3fd3b77103ec837b821c564`
		`@@ -0,0 +1 @@`
		`+Subproject commit dc0e7dc1066ca4cd2d6006a5bccd7ec37521ec1c`
		`@@ -0,0 +1 @@`
		`+Subproject commit 8078e57805781f1453f1dd7ea84f8b93aa70cafa`
		`@@ -0,0 +1 @@`
		`+Subproject commit 06143b2ae0538affe8029950bf36597d253bcffd`