فهرست منبع

solve conflict

Sjim 2 سال پیش
والد
کامیت
7f5c9939d1
100فایلهای تغییر یافته به همراه9769 افزوده شده و 5 حذف شده
  1. 1 1
      .idea/Data.iml
  2. 1 4
      .idea/misc.xml
  3. 272 0
      File/DataManagment.py
  4. 139 0
      File/checkpoint.py
  5. 224 0
      File/file.py
  6. 155 0
      File/file_1.py
  7. 33 0
      File/file_util.py
  8. 398 0
      File/files.py
  9. 305 0
      File/filesystem.py
  10. 168 0
      File/preprocess.py
  11. 75 0
      File/saver.py
  12. 154 0
      File/transform_cuhk03.py
  13. 34 0
      File/utils.py
  14. 37 0
      Hash/03-HT-Get.py
  15. 99 0
      Hash/GeneralHashFunctions.py
  16. 226 0
      Hash/Reducible.py
  17. 42 0
      Hash/bignum.py
  18. 0 0
      Hash/common_substring.py
  19. 70 0
      Hash/crypto_hash.py
  20. 607 0
      Hash/des_crypt.py
  21. 146 0
      Hash/digests.py
  22. 157 0
      Hash/generate_hash_macro.py
  23. 997 0
      Hash/hash_1.py
  24. 117 0
      Hash/hash_functions.py
  25. 173 0
      Hash/hash_util.py
  26. 127 0
      Hash/hash_v.1.0.py
  27. 34 0
      Hash/hasher.py
  28. 255 0
      Hash/hashes.py
  29. 298 0
      Hash/hashing.py
  30. 181 0
      Hash/headers.py
  31. 109 0
      Hash/matching_with_mismatches.py
  32. 77 0
      Hash/optimize-pngs.py
  33. 117 0
      Hash/password_hashers.py
  34. 105 0
      Hash/security.py
  35. 69 0
      Hash/tab_hash.py
  36. 346 0
      Hash/test_hash.py
  37. 92 0
      Hash/verifier.py
  38. 334 0
      Hash/windows.py
  39. 337 0
      Hash/xor.py
  40. 68 0
      Hash/zorbristHash.py
  41. 127 0
      Pseudonym/03-pseudonymize-data-in-power-bi-python.py
  42. 196 0
      Pseudonym/dataset_pseudonymizer.py
  43. 65 0
      Pseudonym/doc2pseudo.py
  44. 1227 0
      Pseudonym/main.py
  45. 184 0
      Pseudonym/process_file.py
  46. 17 0
      Target/File/DataManagment_3.py
  47. 19 0
      Target/File/checkpoint_2.py
  48. 3 0
      Target/File/checkpoint_4.py
  49. 4 0
      Target/File/checkpoint_6.py
  50. 2 0
      Target/File/checkpoint_7.py
  51. 23 0
      Target/File/file_1_3.py
  52. 8 0
      Target/File/file_util_3.py
  53. 7 0
      Target/File/file_util_4.py
  54. 12 0
      Target/File/files_13.py
  55. 7 0
      Target/File/files_25.py
  56. 33 0
      Target/File/filesystem_13.py
  57. 35 0
      Target/File/hash_1_16.py
  58. 12 0
      Target/File/preprocess_2.py
  59. 19 0
      Target/File/preprocess_6.py
  60. 40 0
      Target/File/transform_cuhk03_1.py
  61. 16 0
      Target/File/utils_1.py
  62. 11 0
      Target/File/utils_2.py
  63. 5 0
      Target/Hash/03-HT-Get_2.py
  64. 26 0
      Target/Hash/EncrypC_3.py
  65. 29 0
      Target/Hash/EncrypC_6.py
  66. 20 0
      Target/Hash/EncryptionDecryption_2.py
  67. 8 0
      Target/Hash/GeneralHashFunctions_1.py
  68. 7 0
      Target/Hash/GeneralHashFunctions_10.py
  69. 8 0
      Target/Hash/GeneralHashFunctions_11.py
  70. 5 0
      Target/Hash/GeneralHashFunctions_2.py
  71. 14 0
      Target/Hash/GeneralHashFunctions_3.py
  72. 9 0
      Target/Hash/GeneralHashFunctions_4.py
  73. 6 0
      Target/Hash/GeneralHashFunctions_5.py
  74. 5 0
      Target/Hash/GeneralHashFunctions_6.py
  75. 5 0
      Target/Hash/GeneralHashFunctions_7.py
  76. 5 0
      Target/Hash/GeneralHashFunctions_8.py
  77. 5 0
      Target/Hash/GeneralHashFunctions_9.py
  78. 6 0
      Target/Hash/Reducible_2.py
  79. 10 0
      Target/Hash/base64_2.py
  80. 14 0
      Target/Hash/base64_3.py
  81. 5 0
      Target/Hash/base64_4.py
  82. 3 0
      Target/Hash/base64_5.py
  83. 12 0
      Target/Hash/bignum_1.py
  84. 69 0
      Target/Hash/biometry_hash_5.py
  85. 27 0
      Target/Hash/biometry_hash_8.py
  86. 15 0
      Target/Hash/crypto_11.py
  87. 23 0
      Target/Hash/crypto_13.py
  88. 17 0
      Target/Hash/crypto_14.py
  89. 12 0
      Target/Hash/crypto_4.py
  90. 2 0
      Target/Hash/crypto_6.py
  91. 2 0
      Target/Hash/crypto_7.py
  92. 13 0
      Target/Hash/crypto_hash_1.py
  93. 13 0
      Target/Hash/crypto_hash_2.py
  94. 13 0
      Target/Hash/crypto_hash_3.py
  95. 13 0
      Target/Hash/des_crypt_1.py
  96. 29 0
      Target/Hash/des_crypt_2.py
  97. 29 0
      Target/Hash/des_crypt_27.py
  98. 23 0
      Target/Hash/des_crypt_4.py
  99. 4 0
      Target/Hash/digests_2.py
  100. 12 0
      Target/Hash/digests_4.py

+ 1 - 1
.idea/Data.iml

@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyDocumentationSettings">

+ 1 - 4
.idea/misc.xml

@@ -1,10 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (base) (2)" project-jdk-type="Python SDK" />
-<<<<<<< HEAD
-=======
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
->>>>>>> c7f56cb0b54d04e8c4eb2ced24023454d0721d31
 </project>

+ 272 - 0
File/DataManagment.py

@@ -0,0 +1,272 @@
+import omdb
+import os
+import pickle
+import random
+import json
+import copy
+from django.conf import settings
+
+class DataManagment:
+    save_file = None
+
+    def __init__(self):
+        # Should check if the api is up
+        self.load()
+
+    def load(self):
+        # Query omdb and create gamefile
+        try:
+            if __name__ == '__main__':
+                f = open("/tmp/test", "rb")
+            else:
+                f = open(settings.SAVE_FILE, "rb")
+        except Exception:
+            self.save_file = self.init_save()
+        else:
+            self.save_file = pickle.load(f)
+            f.close()
+
+    def dump(self, save=False):
+        try:
+            if __name__ == '__main__':
+                f = open("/tmp/test", "wb")
+            else:
+                f = open(settings.SAVE_FILE, "wb")
+        except Exception:
+            # TODO : Custom exeption for saving error
+            raise Exception("Error while saving : data may be unavailable")
+        if self.save_file == None:
+            self.save_file = self.init_save()
+        pickle.dump(self.save_file, f)
+        f.close()
+
+        f = open('/tmp/bidule', 'w')
+        f.write(json.dumps(self.save_file))
+        f.close()
+
+    def load_default_settings(self):
+        # Load the settings into the game object
+        pass
+
+    def init_save(self, name = ''):
+        if __name__ == '__main__':
+            c = [5, 5]
+        else:
+            c = [settings.START_X, settings.START_Y]
+
+        save_file = {
+            'player' : {
+                'strength' : 1,
+                'dir' : 0,
+                'ball' : 5000,
+                'coord' : c,
+            },
+            'movie' : {},
+        }
+        return {
+            'active' : -1,
+            'current' : save_file,
+            'curser' : 0,
+            0: None,
+            1: None,
+            2: None,
+        }
+
+    ### Getter
+    def get_strength(self):
+        return self.save_file['current']['player']['strength']
+
+    def get_movie(self, id):
+        # Return movie based on id
+        save = self.save_file['current']
+        try:
+            return save['movie'][id]
+        except Exception:
+            return None
+    def get_all_movie(self):
+        return self.save_file['current']['movie']
+
+    def get_movies_array(self):
+        tab = []
+        movies = self.save_file['current']['movie']
+        for index, movie in enumerate(self.save_file['current']['movie']):
+            tab.append(movie)
+        return tab
+
+    def get_movie_rand(self):
+        tab = []
+        for index, movie in enumerate(self.save_file['current']['movie']):
+            if movie['catched'] == False:
+                tab.append(index)
+        if (len(tab) == 0):
+            return None
+        return random.choice(tab)
+
+    def get_coord(self):
+        save = self.save_file['current']
+        return save['player']['coord']
+
+    #def get_coord_percent(self):
+    #    coord = self.save_file['current']['player']['coord']
+    #    coord = [coord[0]/ settings.SIZE_X, coord[1] / settings.SIZE_Y]
+
+    def get_coord_percentY(self):
+        coord = self.save_file['current']['player']['coord']
+        return coord[1] / settings.SIZE_Y * 87
+
+    def get_coord_percentX(self):
+        coord = self.save_file['current']['player']['coord']
+        return coord[0] / settings.SIZE_X * 91
+
+    def get_balls(self):
+        return self.save_file['current']['player']['ball']
+
+    def get_strength(self):
+        return self.save_file['current']['player']['strength']
+
+    def add_ball(self):
+        if self.save_file['current']['player']['ball'] < 9:
+            self.save_file['current']['player']['ball'] += 1
+
+    def get_save_file(self):
+        return self.save_file
+
+    def get_movie_catched(self, id):
+        n = 0
+
+        for movie in self.save_file[id]['movie']:
+            if movie['catched']:
+                n += 1
+        return n
+
+    def get_movie_count(self, id):
+        return len(self.save_file[id]['movie'])
+
+    def del_ball(self):
+        if self.save_file['current']['player']['ball'] > 0:
+            self.save_file['current']['player']['ball'] -= 1
+
+    # Setter
+    def set_active_file(self, num):
+        print("******* LOADING NEW SAVE *******")
+        self.save_file['active'] = num
+        self.save_file['current'] = self.save_file[num]
+
+    def set_save(self, num):
+        print("******* SAVING *******")
+        #self.save_file['active'] = num
+        #print(self.save_file[num]['player']['dir'])
+        self.save_file[num] = copy.deepcopy(self.save_file['current'])
+        #print(self.save_file[num]['player']['dir'])
+
+
+
+    ### Model action
+    def new_file(self, num):
+        if __name__ == '__main__':
+            c = [5, 5]
+        else:
+            c = [settings.START_X, settings.START_Y]
+        self.save_file['current'] = {
+            'player' : {
+                'strength' : 1,
+                'dir' : 0,
+                'ball' : 5000,
+                'coord' : c,
+            },
+            'movie' : {},
+        }
+
+        omdb.set_default('apikey', settings.OMDBAPI_KEY)
+        movies = []
+        for movie in settings.MOVIE_LIST:
+            data = omdb.imdbid(movie)
+            movies.append({
+                'name': data['title'],
+                'strength': int(float(data['imdb_rating'])),
+                'rating': data['imdb_rating'],
+                'actors': data['actors'],
+                'year': data['released'].split(' ')[2],
+                'image': data['poster'],
+                'director': data['director'],
+                'catched': False,
+            })
+
+        self.save_file['current']['movie'] = movies
+
+    def strength_up(self):
+        self.save_file['current']['player']['strength'] += 1
+
+    def getdir(self):
+        return self.save_file['current']['player']['dir']
+
+    def setdir(self, dir):
+        self.save_file['current']['player']['dir'] = dir
+
+    def catch_movie(self, id):
+        self.save_file['current']['movie'][id]['catched'] = True
+
+    # Movement
+    def go_up(self):
+        if self.save_file['current']['player']['coord'][1] > 0:
+            self.save_file['current']['player']['coord'][1] -= 1
+
+    def go_down(self):
+        if self.save_file['current']['player']['coord'][1] < settings.SIZE_Y:
+            self.save_file['current']['player']['coord'][1] += 1
+
+    def go_left(self):
+        if self.save_file['current']['player']['coord'][0] > 0:
+            self.save_file['current']['player']['coord'][0] -= 1
+
+    def go_right(self):
+        if self.save_file['current']['player']['coord'][0] < settings.SIZE_X:
+            self.save_file['current']['player']['coord'][0] += 1
+
+    def find_random(self):
+        return (5 == random.randint(2, 8))
+
+
+if __name__ == '__main__':
+    c = DataManagment()
+    settings.configure()
+
+    # Load without file
+    c.load()
+
+    c.set_active_file(0)
+
+    print("Coord :", c.get_coord())
+    print("Strength :", c.get_strength())
+    c.strength_up()
+    print("Strength up :", c.get_strength())
+
+    c.dump()
+
+    c.load()
+    print("\nRound 2:\nCoord :", c.get_coord())
+    print("Strength :", c.get_strength())
+    c.strength_up()
+    print("Strength up :", c.get_strength())
+
+    c.go_up()
+    print("Coord :", c.get_coord())
+    c.go_up()
+    c.go_up()
+    c.go_up()
+    c.go_up()
+    print("Coord :", c.get_coord())
+    c.go_up()
+    c.go_up()
+    c.go_up()
+    print("Coord :", c.get_coord())
+
+    print(c.get_movie_rand())
+    #c.new_file(0)
+
+    #m = c.get_movie(0)
+    #print(m['name'], m['strength'], m['catched'])
+
+    #c.catch_movie(0)
+    #m = c.get_movie(0)
+    #print(m['name'], m['strength'], m['catched'])

+ 139 - 0
File/checkpoint.py

@@ -0,0 +1,139 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+import logging
+import os
+
+import torch
+
+from maskrcnn_benchmark.utils.model_serialization import load_state_dict
+from maskrcnn_benchmark.utils.c2_model_loading import load_c2_format
+from maskrcnn_benchmark.utils.imports import import_file
+from maskrcnn_benchmark.utils.model_zoo import cache_url
+
+
+class Checkpointer(object):
+    def __init__(
+        self,
+        model,
+        optimizer=None,
+        scheduler=None,
+        save_dir="",
+        save_to_disk=None,
+        logger=None,
+    ):
+        self.model = model
+        self.optimizer = optimizer
+        self.scheduler = scheduler
+        self.save_dir = save_dir
+        self.save_to_disk = save_to_disk
+        if logger is None:
+            logger = logging.getLogger(__name__)
+        self.logger = logger
+
+    def save(self, name, **kwargs):
+        if not self.save_dir:
+            return
+
+        if not self.save_to_disk:
+            return
+
+        data = {}
+        data["model"] = self.model.state_dict()
+        if self.optimizer is not None:
+            data["optimizer"] = self.optimizer.state_dict()
+        if self.scheduler is not None:
+            data["scheduler"] = self.scheduler.state_dict()
+        data.update(kwargs)
+
+        save_file = os.path.join(self.save_dir, "{}.pth".format(name))
+        self.logger.info("Saving checkpoint to {}".format(save_file))
+        torch.save(data, save_file)
+        self.tag_last_checkpoint(save_file)
+
+    def load(self, f=None, use_latest=True):
+        if self.has_checkpoint() and use_latest:
+            # override argument with existing checkpoint
+            f = self.get_checkpoint_file()
+        if not f:
+            # no checkpoint could be found
+            self.logger.info("No checkpoint found. Initializing model from scratch")
+            return {}
+        self.logger.info("Loading checkpoint from {}".format(f))
+        checkpoint = self._load_file(f)
+        self._load_model(checkpoint)
+        if "optimizer" in checkpoint and self.optimizer:
+            self.logger.info("Loading optimizer from {}".format(f))
+            self.optimizer.load_state_dict(checkpoint.pop("optimizer"))
+        if "scheduler" in checkpoint and self.scheduler:
+            self.logger.info("Loading scheduler from {}".format(f))
+            self.scheduler.load_state_dict(checkpoint.pop("scheduler"))
+
+        # return any further checkpoint data
+        return checkpoint
+
+    def has_checkpoint(self):
+        save_file = os.path.join(self.save_dir, "last_checkpoint")
+        return os.path.exists(save_file)
+
+    def get_checkpoint_file(self):
+        save_file = os.path.join(self.save_dir, "last_checkpoint")
+        try:
+            with open(save_file, "r") as f:
+                last_saved = f.read()
+                last_saved = last_saved.strip()
+        except IOError:
+            # if file doesn't exist, maybe because it has just been
+            # deleted by a separate process
+            last_saved = ""
+        return last_saved
+
+    def tag_last_checkpoint(self, last_filename):
+        save_file = os.path.join(self.save_dir, "last_checkpoint")
+        with open(save_file, "w") as f:
+            f.write(last_filename)
+
+    def _load_file(self, f):
+        return torch.load(f, map_location=torch.device("cpu"))
+
+    def _load_model(self, checkpoint):
+        load_state_dict(self.model, checkpoint.pop("model"))
+
+
+class DetectronCheckpointer(Checkpointer):
+    def __init__(
+        self,
+        cfg,
+        model,
+        optimizer=None,
+        scheduler=None,
+        save_dir="",
+        save_to_disk=None,
+        logger=None,
+    ):
+        super(DetectronCheckpointer, self).__init__(
+            model, optimizer, scheduler, save_dir, save_to_disk, logger
+        )
+        self.cfg = cfg.clone()
+
+    def _load_file(self, f):
+        # catalog lookup
+        if f.startswith("catalog://"):
+            paths_catalog = import_file(
+                "maskrcnn_benchmark.config.paths_catalog", self.cfg.PATHS_CATALOG, True
+            )
+            catalog_f = paths_catalog.ModelCatalog.get(f[len("catalog://") :])
+            self.logger.info("{} points to {}".format(f, catalog_f))
+            f = catalog_f
+        # download url files
+        if f.startswith("http"):
+            # if the file is a url path, download it and cache it
+            cached_f = cache_url(f)
+            self.logger.info("url {} cached in {}".format(f, cached_f))
+            f = cached_f
+        # convert Caffe2 checkpoint from pkl
+        if f.endswith(".pkl"):
+            return load_c2_format(self.cfg, f)
+        # load native detectron.pytorch checkpoint
+        loaded = super(DetectronCheckpointer, self)._load_file(f)
+        if "model" not in loaded:
+            loaded = dict(model=loaded)
+        return loaded

+ 224 - 0
File/file.py

@@ -0,0 +1,224 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+import os
+import re
+import zipfile
+from pathlib import Path
+from typing import Dict, Generator, List, Optional, Pattern
+
+from airflow.configuration import conf
+
+log = logging.getLogger(__name__)
+
+
+def TemporaryDirectory(*args, **kwargs):  # pylint: disable=invalid-name
+    """This function is deprecated. Please use `tempfile.TemporaryDirectory`"""
+    import warnings
+    from tempfile import TemporaryDirectory as TmpDir
+
+    warnings.warn(
+        "This function is deprecated. Please use `tempfile.TemporaryDirectory`",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return TmpDir(*args, **kwargs)
+
+
+def mkdirs(path, mode):
+    """
+    Creates the directory specified by path, creating intermediate directories
+    as necessary. If directory already exists, this is a no-op.
+
+    :param path: The directory to create
+    :type path: str
+    :param mode: The mode to give to the directory e.g. 0o755, ignores umask
+    :type mode: int
+    """
+    import warnings
+
+    warnings.warn(
+        f"This function is deprecated. Please use `pathlib.Path({path}).mkdir`",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    Path(path).mkdir(mode=mode, parents=True, exist_ok=True)
+
+
+ZIP_REGEX = re.compile(r'((.*\.zip){})?(.*)'.format(re.escape(os.sep)))
+
+
+def correct_maybe_zipped(fileloc):
+    """
+    If the path contains a folder with a .zip suffix, then
+    the folder is treated as a zip archive and path to zip is returned.
+    """
+    _, archive, _ = ZIP_REGEX.search(fileloc).groups()
+    if archive and zipfile.is_zipfile(archive):
+        return archive
+    else:
+        return fileloc
+
+
+def open_maybe_zipped(fileloc, mode='r'):
+    """
+    Opens the given file. If the path contains a folder with a .zip suffix, then
+    the folder is treated as a zip archive, opening the file inside the archive.
+
+    :return: a file object, as in `open`, or as in `ZipFile.open`.
+    """
+    _, archive, filename = ZIP_REGEX.search(fileloc).groups()
+    if archive and zipfile.is_zipfile(archive):
+        return zipfile.ZipFile(archive, mode=mode).open(filename)
+    else:
+        return open(fileloc, mode=mode)
+
+
+def find_path_from_directory(base_dir_path: str, ignore_file_name: str) -> Generator[str, None, None]:
+    """
+    Search the file and return the path of the file that should not be ignored.
+    :param base_dir_path: the base path to be searched for.
+    :param ignore_file_name: the file name in which specifies a regular expression pattern is written.
+
+    :return : file path not to be ignored.
+    """
+    patterns_by_dir: Dict[str, List[Pattern[str]]] = {}
+
+    for root, dirs, files in os.walk(str(base_dir_path), followlinks=True):
+        patterns: List[Pattern[str]] = patterns_by_dir.get(root, [])
+
+        ignore_file_path = os.path.join(root, ignore_file_name)
+        if os.path.isfile(ignore_file_path):
+            with open(ignore_file_path) as file:
+                lines_no_comments = [re.sub(r"\s*#.*", "", line) for line in file.read().split("\n")]
+                patterns += [re.compile(line) for line in lines_no_comments if line]
+                patterns = list(set(patterns))
+
+        dirs[:] = [
+            subdir
+            for subdir in dirs
+            if not any(
+                p.search(os.path.join(os.path.relpath(root, str(base_dir_path)), subdir)) for p in patterns
+            )
+        ]
+
+        patterns_by_dir.update({os.path.join(root, sd): patterns.copy() for sd in dirs})
+
+        for file in files:  # type: ignore
+            if file == ignore_file_name:
+                continue
+            abs_file_path = os.path.join(root, str(file))
+            rel_file_path = os.path.join(os.path.relpath(root, str(base_dir_path)), str(file))
+            if any(p.search(rel_file_path) for p in patterns):
+                continue
+            yield str(abs_file_path)
+
+
+def list_py_file_paths(
+    directory: str,
+    safe_mode: bool = conf.getboolean('core', 'DAG_DISCOVERY_SAFE_MODE', fallback=True),
+    include_examples: Optional[bool] = None,
+    include_smart_sensor: Optional[bool] = conf.getboolean('smart_sensor', 'use_smart_sensor'),
+):
+    """
+    Traverse a directory and look for Python files.
+
+    :param directory: the directory to traverse
+    :type directory: unicode
+    :param safe_mode: whether to use a heuristic to determine whether a file
+        contains Airflow DAG definitions. If not provided, use the
+        core.DAG_DISCOVERY_SAFE_MODE configuration setting. If not set, default
+        to safe.
+    :type safe_mode: bool
+    :param include_examples: include example DAGs
+    :type include_examples: bool
+    :param include_smart_sensor: include smart sensor native control DAGs
+    :type include_examples: bool
+    :return: a list of paths to Python files in the specified directory
+    :rtype: list[unicode]
+    """
+    if include_examples is None:
+        include_examples = conf.getboolean('core', 'LOAD_EXAMPLES')
+    file_paths: List[str] = []
+    if directory is None:
+        file_paths = []
+    elif os.path.isfile(directory):
+        file_paths = [directory]
+    elif os.path.isdir(directory):
+        find_dag_file_paths(directory, file_paths, safe_mode)
+    if include_examples:
+        from airflow import example_dags
+
+        example_dag_folder = example_dags.__path__[0]  # type: ignore
+        file_paths.extend(list_py_file_paths(example_dag_folder, safe_mode, False, False))
+    if include_smart_sensor:
+        from airflow import smart_sensor_dags
+
+        smart_sensor_dag_folder = smart_sensor_dags.__path__[0]  # type: ignore
+        file_paths.extend(list_py_file_paths(smart_sensor_dag_folder, safe_mode, False, False))
+    return file_paths
+
+
+def find_dag_file_paths(directory: str, file_paths: list, safe_mode: bool):
+    """Finds file paths of all DAG files."""
+    for file_path in find_path_from_directory(directory, ".airflowignore"):
+        try:
+            if not os.path.isfile(file_path):
+                continue
+            _, file_ext = os.path.splitext(os.path.split(file_path)[-1])
+            if file_ext != '.py' and not zipfile.is_zipfile(file_path):
+                continue
+            if not might_contain_dag(file_path, safe_mode):
+                continue
+
+            file_paths.append(file_path)
+        except Exception:  # noqa pylint: disable=broad-except
+            log.exception("Error while examining %s", file_path)
+
+
+COMMENT_PATTERN = re.compile(r"\s*#.*")
+
+
+def might_contain_dag(file_path: str, safe_mode: bool, zip_file: Optional[zipfile.ZipFile] = None):
+    """
+    Heuristic that guesses whether a Python file contains an Airflow DAG definition.
+
+    :param file_path: Path to the file to be checked.
+    :param safe_mode: Is safe mode active?. If no, this function always returns True.
+    :param zip_file: if passed, checks the archive. Otherwise, check local filesystem.
+    :return: True, if file might contain DAGS.
+    """
+    if not safe_mode:
+        return True
+    if zip_file:
+        with zip_file.open(file_path) as current_file:
+            content = current_file.read()
+    else:
+        if zipfile.is_zipfile(file_path):
+            return True
+        with open(file_path, 'rb') as dag_file:
+            content = dag_file.read()
+    content = content.lower()
+    return all(s in content for s in (b'dag', b'airflow'))
+
+
+def get_sha1hash(file_path: str) -> str:
+    import hashlib
+    with open(file_path, 'rb') as file:
+        return hashlib.sha1(file.read()).hexdigest()

+ 155 - 0
File/file_1.py

@@ -0,0 +1,155 @@
+import json
+import os
+import random
+from os import getcwd
+from os.path import isfile
+from typing import Optional
+
+import pandas as pd
+from PyQt5.QtWidgets import QFileDialog, QInputDialog, QMessageBox
+
+
+def dict_from_json(filename: str) -> dict:
+    if isfile(filename):
+        with open(filename) as f:
+            return json.load(f)
+    else:
+        return {}
+
+
+def mass_from_xlsx(path: str) -> dict:
+    logs = {}
+    xl = pd.ExcelFile(path)
+    df = xl.parse(xl.sheet_names[0])
+    for name in df.keys():
+        logs[name] = list(df[name].values)
+    return logs
+
+
+def save_dict_as_json(data: dict, path: str = os.getcwd(),
+                      filename: str = f'/temp_files/lay_name{random.randint(1, 1000)}') -> str:
+    if path.__contains__('.json'):
+        path_save = path
+    elif filename.__contains__('.json'):
+        path_save = filename
+    elif filename.__contains__('.'):
+        path_save = path + '/' + filename.split('/')[-1]
+    else:
+        path_save = path + f'/{filename}.json'
+    path_save.replace("\\", '/')
+    path_save.replace('//', '/')
+
+    try:
+        json_file = open(path_save, mode='x')
+    except FileNotFoundError:
+        os.mkdir(path_save.split('lay_name')[0])
+        json_file = open(path_save, mode='x')
+    except FileExistsError:
+        json_file = open(path_save, mode='w')
+    json.dump(data, json_file)
+    json_file.close()
+    return path_save
+
+
+class FileEdit:
+    # Messages
+    create_project_default = 'Введите название проекта:'
+    create_file_default = 'Введите название файла:'
+    create_file_error = 'Не удалось создать файл,  \nфайл с таким именем уже существует'
+    data_log = 'log.log'
+    data_model_name = 'data.model'
+    data_polygon_model = 'polygon.model'
+
+    def __init__(self, project_path=''):
+        self.project_path = project_path
+
+    @property
+    def model_path(self):
+        if os.path.isdir(self.project_path):
+            if os.path.isfile(self.project_path + '/' + FileEdit.data_model_name):
+                pass
+            else:
+                save_dict_as_json({}, self.project_path, FileEdit.data_model_name)
+            return self.project_path + '/' + FileEdit.data_model_name
+        return ''
+
+    @property
+    def polygon_model_path(self):
+        if os.path.isdir(self.project_path):
+            return self.project_path + '/' + FileEdit.data_polygon_model
+
+    @property
+    def log_path(self):
+        if os.path.isdir(self.project_path):
+            return self.project_path + '/' + FileEdit.data_log
+
+    def save_log(self, data):
+        if not self.log_path:
+            self.open_project()
+        if self.log_path:
+            save_dict_as_json(data=data, path=self.project_path, filename=self.log_path)
+
+    def save_model_file(self, data) -> str:
+        if not self.model_path:
+            self.create_project()
+        if self.model_path:
+            return save_dict_as_json(data=data, path=self.project_path, filename=self.model_path)
+        else:
+            return ''
+
+    def save_polygon_model(self, data) -> str:
+        if not self.polygon_model_path:
+            self.create_project()
+        if self.polygon_model_path:
+            return save_dict_as_json(data=data, path=self.project_path,
+                                     filename=self.polygon_model_path)
+        else:
+            return ''
+
+    def save_file(self, data: dict):
+        if not self.project_path:
+            self.create_file()
+        save_dict_as_json(data=data, filename=self.project_path)
+
+    def open_file(self, file_extension='json'):
+        message = f'{file_extension} Files (*.{file_extension})'
+        self.project_path, _ = QFileDialog.getOpenFileName(None, '', getcwd(), message)
+        return self.project_path
+
+    def create_file(self, message=None, extension: str = '', filename: str = '') -> Optional[str]:
+        if not message:
+            message = self.create_file_default
+        filename, ok = QInputDialog.getText(None, 'Input Dialog', str(message)) \
+                           if not filename else (filename, True)
+
+        if ok and filename and filename != '':
+            path = QFileDialog.getExistingDirectory(None, getcwd())
+            if filename.__contains__('.') or extension is None:
+                return f'{path}/{filename}'
+            else:
+                return f'{path}/{filename}{"." + extension.replace(".", "")}'
+
+        return None
+
+    def open_project(self):
+        self.project_path = QFileDialog.getExistingDirectory(None, '', getcwd())
+        if self.project_path:
+            return self.project_path
+
+    def create_project(self) -> Optional[str]:
+        name, ok = QInputDialog.getText(None, 'Input Dialog', str(FileEdit.create_project_default))
+        if not ok or name == '':
+            return
+
+        project_name = f'/{name.replace(" ", "_")}.oilcase'
+        self.project_path = QFileDialog.getExistingDirectory(None, getcwd()) + project_name
+
+        try:
+            os.mkdir(self.project_path)
+            save_dict_as_json({}, self.project_path, 'data.model')
+            return self.project_path
+        except FileExistsError as e:
+            msg = QMessageBox()
+            msg.setWindowTitle("Project create error")
+            msg.setText(str(e))
+            msg.exec_()

+ 33 - 0
File/file_util.py

@@ -0,0 +1,33 @@
+import os
+
+def read_dir(root):
+	file_path_list = []
+	for file_path, dirs, files in os.walk(root):
+		for file in files:
+			file_path_list.append(os.path.join(file_path, file).replace('\\', '/'))
+	file_path_list.sort()
+	return file_path_list
+
+def read_file(file_path):
+	file_object = open(file_path, 'r')
+	file_content = file_object.read()
+	file_object.close()
+	return file_content
+
+def write_file(file_path, file_content):
+	if file_path.find('/') != -1:
+		father_dir = '/'.join(file_path.split('/')[0:-1])
+		if not os.path.exists(father_dir):
+			os.makedirs(father_dir)
+	file_object = open(file_path, 'w')
+	file_object.write(file_content)
+	file_object.close()
+
+
+def write_file_not_cover(file_path, file_content):
+	father_dir = '/'.join(file_path.split('/')[0:-1])
+	if not os.path.exists(father_dir):
+		os.makedirs(father_dir)
+	file_object = open(file_path, 'a')
+	file_object.write(file_content)
+	file_object.close()

+ 398 - 0
File/files.py

@@ -0,0 +1,398 @@
+import datetime
+import os
+
+import django.utils.copycompat as copy
+
+from django.conf import settings
+from django.db.models.fields import Field
+from django.core.files.base import File, ContentFile
+from django.core.files.storage import default_storage
+from django.core.files.images import ImageFile, get_image_dimensions
+from django.core.files.uploadedfile import UploadedFile
+from django.utils.functional import curry
+from django.db.models import signals
+from django.utils.encoding import force_unicode, smart_str
+from django.utils.translation import ugettext_lazy, ugettext as _
+from django import forms
+from django.db.models.loading import cache
+
+class FieldFile(File):
+    def __init__(self, instance, field, name):
+        super(FieldFile, self).__init__(None, name)
+        self.instance = instance
+        self.field = field
+        self.storage = field.storage
+        self._committed = True
+
+    def __eq__(self, other):
+        # Older code may be expecting FileField values to be simple strings.
+        # By overriding the == operator, it can remain backwards compatibility.
+        if hasattr(other, 'name'):
+            return self.name == other.name
+        return self.name == other
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        # Required because we defined a custom __eq__.
+        return hash(self.name)
+
+    # The standard File contains most of the necessary properties, but
+    # FieldFiles can be instantiated without a name, so that needs to
+    # be checked for here.
+
+    def _require_file(self):
+        if not self:
+            raise ValueError("The '%s' attribute has no file associated with it." % self.field.name)
+
+    def _get_file(self):
+        self._require_file()
+        if not hasattr(self, '_file') or self._file is None:
+            self._file = self.storage.open(self.name, 'rb')
+        return self._file
+
+    def _set_file(self, file):
+        self._file = file
+
+    def _del_file(self):
+        del self._file
+
+    file = property(_get_file, _set_file, _del_file)
+
+    def _get_path(self):
+        self._require_file()
+        return self.storage.path(self.name)
+    path = property(_get_path)
+
+    def _get_url(self):
+        self._require_file()
+        return self.storage.url(self.name)
+    url = property(_get_url)
+
+    def _get_size(self):
+        self._require_file()
+        if not self._committed:
+            return self.file.size
+        return self.storage.size(self.name)
+    size = property(_get_size)
+
+    def open(self, mode='rb'):
+        self._require_file()
+        self.file.open(mode)
+    # open() doesn't alter the file's contents, but it does reset the pointer
+    open.alters_data = True
+
+    # In addition to the standard File API, FieldFiles have extra methods
+    # to further manipulate the underlying file, as well as update the
+    # associated model instance.
+
+    def save(self, name, content, save=True):
+        name = self.field.generate_filename(self.instance, name)
+        self.name = self.storage.save(name, content)
+        setattr(self.instance, self.field.name, self.name)
+
+        # Update the filesize cache
+        self._size = content.size
+        self._committed = True
+
+        # Save the object because it has changed, unless save is False
+        if save:
+            self.instance.save()
+    save.alters_data = True
+
+    def delete(self, save=True):
+        # Only close the file if it's already open, which we know by the
+        # presence of self._file
+        if hasattr(self, '_file'):
+            self.close()
+            del self.file
+
+        self.storage.delete(self.name)
+
+        self.name = None
+        setattr(self.instance, self.field.name, self.name)
+
+        # Delete the filesize cache
+        if hasattr(self, '_size'):
+            del self._size
+        self._committed = False
+
+        if save:
+            self.instance.save()
+    delete.alters_data = True
+
+    def _get_closed(self):
+        file = getattr(self, '_file', None)
+        return file is None or file.closed
+    closed = property(_get_closed)
+
+    def close(self):
+        file = getattr(self, '_file', None)
+        if file is not None:
+            file.close()
+
+    def __getstate__(self):
+        # FieldFile needs access to its associated model field and an instance
+        # it's attached to in order to work properly, but the only necessary
+        # data to be pickled is the file's name itself. Everything else will
+        # be restored later, by FileDescriptor below.
+        return {'name': self.name, 'closed': False, '_committed': True, '_file': None}
+
+class FileDescriptor(object):
+    """
+    The descriptor for the file attribute on the model instance. Returns a
+    FieldFile when accessed so you can do stuff like::
+
+        >>> instance.file.size
+
+    Assigns a file object on assignment so you can do::
+
+        >>> instance.file = File(...)
+
+    """
+    def __init__(self, field):
+        self.field = field
+
+    def __get__(self, instance=None, owner=None):
+        if instance is None:
+            raise AttributeError(
+                "The '%s' attribute can only be accessed from %s instances."
+                % (self.field.name, owner.__name__))
+
+        # This is slightly complicated, so worth an explanation.
+        # instance.file`needs to ultimately return some instance of `File`,
+        # probably a subclass. Additionally, this returned object needs to have
+        # the FieldFile API so that users can easily do things like
+        # instance.file.path and have that delegated to the file storage engine.
+        # Easy enough if we're strict about assignment in __set__, but if you
+        # peek below you can see that we're not. So depending on the current
+        # value of the field we have to dynamically construct some sort of
+        # "thing" to return.
+
+        # The instance dict contains whatever was originally assigned
+        # in __set__.
+        file = instance.__dict__[self.field.name]
+
+        # If this value is a string (instance.file = "path/to/file") or None
+        # then we simply wrap it with the appropriate attribute class according
+        # to the file field. [This is FieldFile for FileFields and
+        # ImageFieldFile for ImageFields; it's also conceivable that user
+        # subclasses might also want to subclass the attribute class]. This
+        # object understands how to convert a path to a file, and also how to
+        # handle None.
+        if isinstance(file, basestring) or file is None:
+            attr = self.field.attr_class(instance, self.field, file)
+            instance.__dict__[self.field.name] = attr
+
+        # Other types of files may be assigned as well, but they need to have
+        # the FieldFile interface added to the. Thus, we wrap any other type of
+        # File inside a FieldFile (well, the field's attr_class, which is
+        # usually FieldFile).
+        elif isinstance(file, File) and not isinstance(file, FieldFile):
+            file_copy = self.field.attr_class(instance, self.field, file.name)
+            file_copy.file = file
+            file_copy._committed = False
+            instance.__dict__[self.field.name] = file_copy
+
+        # Finally, because of the (some would say boneheaded) way pickle works,
+        # the underlying FieldFile might not actually itself have an associated
+        # file. So we need to reset the details of the FieldFile in those cases.
+        elif isinstance(file, FieldFile) and not hasattr(file, 'field'):
+            file.instance = instance
+            file.field = self.field
+            file.storage = self.field.storage
+
+        # That was fun, wasn't it?
+        return instance.__dict__[self.field.name]
+
+    def __set__(self, instance, value):
+        instance.__dict__[self.field.name] = value
+
+class FileField(Field):
+    # The class to wrap instance attributes in. Accessing the file object off
+    # the instance will always return an instance of attr_class.
+    attr_class = FieldFile
+
+    # The descriptor to use for accessing the attribute off of the class.
+    descriptor_class = FileDescriptor
+
+    description = ugettext_lazy("File path")
+
+    def __init__(self, verbose_name=None, name=None, upload_to='', storage=None, **kwargs):
+        for arg in ('primary_key', 'unique'):
+            if arg in kwargs:
+                raise TypeError("'%s' is not a valid argument for %s." % (arg, self.__class__))
+
+        self.storage = storage or default_storage
+        self.upload_to = upload_to
+        if callable(upload_to):
+            self.generate_filename = upload_to
+
+        kwargs['max_length'] = kwargs.get('max_length', 100)
+        super(FileField, self).__init__(verbose_name, name, **kwargs)
+
+    def get_internal_type(self):
+        return "FileField"
+
+    def get_prep_lookup(self, lookup_type, value):
+        if hasattr(value, 'name'):
+            value = value.name
+        return super(FileField, self).get_prep_lookup(lookup_type, value)
+
+    def get_prep_value(self, value):
+        "Returns field's value prepared for saving into a database."
+        # Need to convert File objects provided via a form to unicode for database insertion
+        if value is None:
+            return None
+        return unicode(value)
+
+    def pre_save(self, model_instance, add):
+        "Returns field's value just before saving."
+        file = super(FileField, self).pre_save(model_instance, add)
+        if file and not file._committed:
+            # Commit the file to storage prior to saving the model
+            file.save(file.name, file, save=False)
+        return file
+
+    def contribute_to_class(self, cls, name):
+        super(FileField, self).contribute_to_class(cls, name)
+        setattr(cls, self.name, self.descriptor_class(self))
+
+    def get_directory_name(self):
+        return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
+
+    def get_filename(self, filename):
+        return os.path.normpath(self.storage.get_valid_name(os.path.basename(filename)))
+
+    def generate_filename(self, instance, filename):
+        return os.path.join(self.get_directory_name(), self.get_filename(filename))
+
+    def save_form_data(self, instance, data):
+        # Important: None means "no change", other false value means "clear"
+        # This subtle distinction (rather than a more explicit marker) is
+        # needed because we need to consume values that are also sane for a
+        # regular (non Model-) Form to find in its cleaned_data dictionary.
+        if data is not None:
+            # This value will be converted to unicode and stored in the
+            # database, so leaving False as-is is not acceptable.
+            if not data:
+                data = ''
+            setattr(instance, self.name, data)
+
+    def formfield(self, **kwargs):
+        defaults = {'form_class': forms.FileField, 'max_length': self.max_length}
+        # If a file has been provided previously, then the form doesn't require
+        # that a new file is provided this time.
+        # The code to mark the form field as not required is used by
+        # form_for_instance, but can probably be removed once form_for_instance
+        # is gone. ModelForm uses a different method to check for an existing file.
+        if 'initial' in kwargs:
+            defaults['required'] = False
+        defaults.update(kwargs)
+        return super(FileField, self).formfield(**defaults)
+
+class ImageFileDescriptor(FileDescriptor):
+    """
+    Just like the FileDescriptor, but for ImageFields. The only difference is
+    assigning the width/height to the width_field/height_field, if appropriate.
+    """
+    def __set__(self, instance, value):
+        previous_file = instance.__dict__.get(self.field.name)
+        super(ImageFileDescriptor, self).__set__(instance, value)
+
+        # To prevent recalculating image dimensions when we are instantiating
+        # an object from the database (bug #11084), only update dimensions if
+        # the field had a value before this assignment.  Since the default
+        # value for FileField subclasses is an instance of field.attr_class,
+        # previous_file will only be None when we are called from
+        # Model.__init__().  The ImageField.update_dimension_fields method
+        # hooked up to the post_init signal handles the Model.__init__() cases.
+        # Assignment happening outside of Model.__init__() will trigger the
+        # update right here.
+        if previous_file is not None:
+            self.field.update_dimension_fields(instance, force=True)
+
+class ImageFieldFile(ImageFile, FieldFile):
+    def delete(self, save=True):
+        # Clear the image dimensions cache
+        if hasattr(self, '_dimensions_cache'):
+            del self._dimensions_cache
+        super(ImageFieldFile, self).delete(save)
+
+class ImageField(FileField):
+    attr_class = ImageFieldFile
+    descriptor_class = ImageFileDescriptor
+    description = ugettext_lazy("File path")
+
+    def __init__(self, verbose_name=None, name=None, width_field=None, height_field=None, **kwargs):
+        self.width_field, self.height_field = width_field, height_field
+        FileField.__init__(self, verbose_name, name, **kwargs)
+
+    def contribute_to_class(self, cls, name):
+        super(ImageField, self).contribute_to_class(cls, name)
+        # Attach update_dimension_fields so that dimension fields declared
+        # after their corresponding image field don't stay cleared by
+        # Model.__init__, see bug #11196.
+        signals.post_init.connect(self.update_dimension_fields, sender=cls)
+
+    def update_dimension_fields(self, instance, force=False, *args, **kwargs):
+        """
+        Updates field's width and height fields, if defined.
+
+        This method is hooked up to model's post_init signal to update
+        dimensions after instantiating a model instance.  However, dimensions
+        won't be updated if the dimensions fields are already populated.  This
+        avoids unnecessary recalculation when loading an object from the
+        database.
+
+        Dimensions can be forced to update with force=True, which is how
+        ImageFileDescriptor.__set__ calls this method.
+        """
+        # Nothing to update if the field doesn't have have dimension fields.
+        has_dimension_fields = self.width_field or self.height_field
+        if not has_dimension_fields:
+            return
+
+        # getattr will call the ImageFileDescriptor's __get__ method, which
+        # coerces the assigned value into an instance of self.attr_class
+        # (ImageFieldFile in this case).
+        file = getattr(instance, self.attname)
+
+        # Nothing to update if we have no file and not being forced to update.
+        if not file and not force:
+            return
+
+        dimension_fields_filled = not(
+            (self.width_field and not getattr(instance, self.width_field))
+            or (self.height_field and not getattr(instance, self.height_field))
+        )
+        # When both dimension fields have values, we are most likely loading
+        # data from the database or updating an image field that already had
+        # an image stored.  In the first case, we don't want to update the
+        # dimension fields because we are already getting their values from the
+        # database.  In the second case, we do want to update the dimensions
+        # fields and will skip this return because force will be True since we
+        # were called from ImageFileDescriptor.__set__.
+        if dimension_fields_filled and not force:
+            return
+
+        # file should be an instance of ImageFieldFile or should be None.
+        if file:
+            width = file.width
+            height = file.height
+        else:
+            # No file, so clear dimensions fields.
+            width = None
+            height = None
+
+        # Update the width and height fields.
+        if self.width_field:
+            setattr(instance, self.width_field, width)
+        if self.height_field:
+            setattr(instance, self.height_field, height)
+
+    def formfield(self, **kwargs):
+        defaults = {'form_class': forms.ImageField}
+        defaults.update(kwargs)
+        return super(ImageField, self).formfield(**defaults)

+ 305 - 0
File/filesystem.py

@@ -0,0 +1,305 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2017 sqlmap developers (http://sqlmap.org/)
+See the file 'doc/COPYING' for copying permission
+"""
+
+import os
+import sys
+
+from lib.core.agent import agent
+from lib.core.common import dataToOutFile
+from lib.core.common import Backend
+from lib.core.common import checkFile
+from lib.core.common import decloakToTemp
+from lib.core.common import decodeHexValue
+from lib.core.common import getUnicode
+from lib.core.common import isNumPosStrValue
+from lib.core.common import isListLike
+from lib.core.common import isStackingAvailable
+from lib.core.common import isTechniqueAvailable
+from lib.core.common import readInput
+from lib.core.data import conf
+from lib.core.data import kb
+from lib.core.data import logger
+from lib.core.enums import DBMS
+from lib.core.enums import CHARSET_TYPE
+from lib.core.enums import EXPECTED
+from lib.core.enums import PAYLOAD
+from lib.core.exception import SqlmapUndefinedMethod
+from lib.core.settings import UNICODE_ENCODING
+from lib.request import inject
+
+class Filesystem:
+    """
+    This class defines generic OS file system functionalities for plugins.
+    """
+
+    def __init__(self):
+        self.fileTblName = "sqlmapfile"
+        self.tblField = "data"
+
+    def _checkFileLength(self, localFile, remoteFile, fileRead=False):
+        if Backend.isDbms(DBMS.MYSQL):
+            lengthQuery = "LENGTH(LOAD_FILE('%s'))" % remoteFile
+
+        elif Backend.isDbms(DBMS.PGSQL) and not fileRead:
+            lengthQuery = "SELECT SUM(LENGTH(data)) FROM pg_largeobject WHERE loid=%d" % self.oid
+
+        elif Backend.isDbms(DBMS.MSSQL):
+            self.createSupportTbl(self.fileTblName, self.tblField, "VARBINARY(MAX)")
+            inject.goStacked("INSERT INTO %s(%s) SELECT %s FROM OPENROWSET(BULK '%s', SINGLE_BLOB) AS %s(%s)" % (self.fileTblName, self.tblField, self.tblField, remoteFile, self.fileTblName, self.tblField));
+
+            lengthQuery = "SELECT DATALENGTH(%s) FROM %s" % (self.tblField, self.fileTblName)
+
+        try:
+            localFileSize = os.path.getsize(localFile)
+        except OSError:
+            warnMsg = "file '%s' is missing" % localFile
+            logger.warn(warnMsg)
+            localFileSize = 0
+
+        if fileRead and Backend.isDbms(DBMS.PGSQL):
+            logger.info("length of read file '%s' cannot be checked on PostgreSQL" % remoteFile)
+            sameFile = True
+        else:
+            logger.debug("checking the length of the remote file '%s'" % remoteFile)
+            remoteFileSize = inject.getValue(lengthQuery, resumeValue=False, expected=EXPECTED.INT, charsetType=CHARSET_TYPE.DIGITS)
+            sameFile = None
+
+            if isNumPosStrValue(remoteFileSize):
+                remoteFileSize = long(remoteFileSize)
+                localFile = getUnicode(localFile, encoding=sys.getfilesystemencoding() or UNICODE_ENCODING)
+                sameFile = False
+
+                if localFileSize == remoteFileSize:
+                    sameFile = True
+                    infoMsg = "the local file '%s' and the remote file " % localFile
+                    infoMsg += "'%s' have the same size (%d B)" % (remoteFile, localFileSize)
+                elif remoteFileSize > localFileSize:
+                    infoMsg = "the remote file '%s' is larger (%d B) than " % (remoteFile, remoteFileSize)
+                    infoMsg += "the local file '%s' (%dB)" % (localFile, localFileSize)
+                else:
+                    infoMsg = "the remote file '%s' is smaller (%d B) than " % (remoteFile, remoteFileSize)
+                    infoMsg += "file '%s' (%d B)" % (localFile, localFileSize)
+
+                logger.info(infoMsg)
+            else:
+                sameFile = False
+                warnMsg = "it looks like the file has not been written (usually "
+                warnMsg += "occurs if the DBMS process user has no write "
+                warnMsg += "privileges in the destination path)"
+                logger.warn(warnMsg)
+
+        return sameFile
+
+    def fileToSqlQueries(self, fcEncodedList):
+        """
+        Called by MySQL and PostgreSQL plugins to write a file on the
+        back-end DBMS underlying file system
+        """
+
+        counter = 0
+        sqlQueries = []
+
+        for fcEncodedLine in fcEncodedList:
+            if counter == 0:
+                sqlQueries.append("INSERT INTO %s(%s) VALUES (%s)" % (self.fileTblName, self.tblField, fcEncodedLine))
+            else:
+                updatedField = agent.simpleConcatenate(self.tblField, fcEncodedLine)
+                sqlQueries.append("UPDATE %s SET %s=%s" % (self.fileTblName, self.tblField, updatedField))
+
+            counter += 1
+
+        return sqlQueries
+
+    def fileEncode(self, fileName, encoding, single, chunkSize=256):
+        """
+        Called by MySQL and PostgreSQL plugins to write a file on the
+        back-end DBMS underlying file system
+        """
+
+        with open(fileName, "rb") as f:
+            content = f.read()
+
+        return self.fileContentEncode(content, encoding, single, chunkSize)
+
+    def fileContentEncode(self, content, encoding, single, chunkSize=256):
+        retVal = []
+
+        if encoding:
+            content = content.encode(encoding).replace("\n", "")
+
+        if not single:
+            if len(content) > chunkSize:
+                for i in xrange(0, len(content), chunkSize):
+                    _ = content[i:i + chunkSize]
+
+                    if encoding == "hex":
+                        _ = "0x%s" % _
+                    elif encoding == "base64":
+                        _ = "'%s'" % _
+
+                    retVal.append(_)
+
+        if not retVal:
+            if encoding == "hex":
+                content = "0x%s" % content
+            elif encoding == "base64":
+                content = "'%s'" % content
+
+            retVal = [content]
+
+        return retVal
+
+    def askCheckWrittenFile(self, localFile, remoteFile, forceCheck=False):
+        output = None
+
+        if forceCheck is not True:
+            message = "do you want confirmation that the local file '%s' " % localFile
+            message += "has been successfully written on the back-end DBMS "
+            message += "file system ('%s')? [Y/n] " % remoteFile
+            output = readInput(message, default="Y")
+
+        if forceCheck or (output and output.lower() == "y"):
+            return self._checkFileLength(localFile, remoteFile)
+
+        return True
+
+    def askCheckReadFile(self, localFile, remoteFile):
+        message = "do you want confirmation that the remote file '%s' " % remoteFile
+        message += "has been successfully downloaded from the back-end "
+        message += "DBMS file system? [Y/n] "
+        output = readInput(message, default="Y")
+
+        if not output or output in ("y", "Y"):
+            return self._checkFileLength(localFile, remoteFile, True)
+
+        return None
+
+    def nonStackedReadFile(self, remoteFile):
+        errMsg = "'nonStackedReadFile' method must be defined "
+        errMsg += "into the specific DBMS plugin"
+        raise SqlmapUndefinedMethod(errMsg)
+
+    def stackedReadFile(self, remoteFile):
+        errMsg = "'stackedReadFile' method must be defined "
+        errMsg += "into the specific DBMS plugin"
+        raise SqlmapUndefinedMethod(errMsg)
+
+    def unionWriteFile(self, localFile, remoteFile, fileType, forceCheck=False):
+        errMsg = "'unionWriteFile' method must be defined "
+        errMsg += "into the specific DBMS plugin"
+        raise SqlmapUndefinedMethod(errMsg)
+
+    def stackedWriteFile(self, localFile, remoteFile, fileType, forceCheck=False):
+        errMsg = "'stackedWriteFile' method must be defined "
+        errMsg += "into the specific DBMS plugin"
+        raise SqlmapUndefinedMethod(errMsg)
+
+    def readFile(self, remoteFiles):
+        localFilePaths = []
+
+        self.checkDbmsOs()
+
+        for remoteFile in remoteFiles.split(","):
+            fileContent = None
+            kb.fileReadMode = True
+
+            if conf.direct or isStackingAvailable():
+                if isStackingAvailable():
+                    debugMsg = "going to read the file with stacked query SQL "
+                    debugMsg += "injection technique"
+                    logger.debug(debugMsg)
+
+                fileContent = self.stackedReadFile(remoteFile)
+            elif Backend.isDbms(DBMS.MYSQL):
+                debugMsg = "going to read the file with a non-stacked query "
+                debugMsg += "SQL injection technique"
+                logger.debug(debugMsg)
+
+                fileContent = self.nonStackedReadFile(remoteFile)
+            else:
+                errMsg = "none of the SQL injection techniques detected can "
+                errMsg += "be used to read files from the underlying file "
+                errMsg += "system of the back-end %s server" % Backend.getDbms()
+                logger.error(errMsg)
+
+                fileContent = None
+
+            kb.fileReadMode = False
+
+            if fileContent in (None, "") and not Backend.isDbms(DBMS.PGSQL):
+                self.cleanup(onlyFileTbl=True)
+            elif isListLike(fileContent):
+                newFileContent = ""
+
+                for chunk in fileContent:
+                    if isListLike(chunk):
+                        if len(chunk) > 0:
+                            chunk = chunk[0]
+                        else:
+                            chunk = ""
+
+                    if chunk:
+                        newFileContent += chunk
+
+                fileContent = newFileContent
+
+            if fileContent is not None:
+                fileContent = decodeHexValue(fileContent, True)
+
+                if fileContent:
+                    localFilePath = dataToOutFile(remoteFile, fileContent)
+
+                    if not Backend.isDbms(DBMS.PGSQL):
+                        self.cleanup(onlyFileTbl=True)
+
+                    sameFile = self.askCheckReadFile(localFilePath, remoteFile)
+
+                    if sameFile is True:
+                        localFilePath += " (same file)"
+                    elif sameFile is False:
+                        localFilePath += " (size differs from remote file)"
+
+                    localFilePaths.append(localFilePath)
+                else:
+                    errMsg = "no data retrieved"
+                    logger.error(errMsg)
+
+        return localFilePaths
+
+    def writeFile(self, localFile, remoteFile, fileType=None, forceCheck=False):
+        written = False
+
+        checkFile(localFile)
+
+        self.checkDbmsOs()
+
+        if localFile.endswith('_'):
+            localFile = decloakToTemp(localFile)
+
+        if conf.direct or isStackingAvailable():
+            if isStackingAvailable():
+                debugMsg = "going to upload the file '%s' with " % fileType
+                debugMsg += "stacked query SQL injection technique"
+                logger.debug(debugMsg)
+
+            written = self.stackedWriteFile(localFile, remoteFile, fileType, forceCheck)
+            self.cleanup(onlyFileTbl=True)
+        elif isTechniqueAvailable(PAYLOAD.TECHNIQUE.UNION) and Backend.isDbms(DBMS.MYSQL):
+            debugMsg = "going to upload the file '%s' with " % fileType
+            debugMsg += "UNION query SQL injection technique"
+            logger.debug(debugMsg)
+
+            written = self.unionWriteFile(localFile, remoteFile, fileType, forceCheck)
+        else:
+            errMsg = "none of the SQL injection techniques detected can "
+            errMsg += "be used to write files to the underlying file "
+            errMsg += "system of the back-end %s server" % Backend.getDbms()
+            logger.error(errMsg)
+
+            return None
+
+        return written

+ 168 - 0
File/preprocess.py

@@ -0,0 +1,168 @@
+from config import FLAGS
+import numpy as np
+import math
+import h5py
+import glob
+from util.utils import load_nifti, save_nifti
+import os
+
+
+
+def rotate_flip(data, r=0, f_lf=False):
+
+	#rotate 90
+	data = np.rot90(data,r)
+
+	if f_lf:
+		data = np.fliplr(data)
+
+	return data
+
+
+
+def create_hdf5(img_data, t2_data, img_label, save_path):
+	assert img_data.shape == img_label.shape, 'shape of data and label must be the same..'
+	f = h5py.File(save_path, "w")
+	dset = f.create_dataset("t1data", img_data.shape, dtype=np.int16)
+	tset = f.create_dataset("t2data", t2_data.shape, dtype=np.int16)
+	lset = f.create_dataset("label", img_data.shape, dtype=np.uint8)
+
+	dset[...] = img_data
+	lset[...] = img_label
+	tset[...] = t2_data
+	print('saved hdf5 file in %s' % (save_path, ))
+	f.close()
+
+
+
+def get_nifti_path():
+	t1_path, t2_path, label_path = '', '', ''
+
+	dir_list = glob.glob('%s/*/' %(FLAGS.train_data_dir,))
+	# print dir_list, '....'
+	for _dir in dir_list:
+		# file_list = glob.glob('%s/*.nii' % (_dir, ))
+		img_id = _dir.split('/')[-2]
+		t1_path = '%s%s-T1.nii.gz' %(_dir, img_id)
+		t2_path = '%s%s-T2.nii.gz' %(_dir, img_id)
+		label_path = '%s%s-label.nii.gz' %(_dir, img_id)
+
+		yield t1_path, t2_path, label_path
+		
+		
+
+
+def remove_backgrounds(img_data, t2_data, img_label):
+	nonzero_label = img_label != 0
+	nonzero_label = np.asarray(nonzero_label)
+
+	nonzero_index = np.nonzero(nonzero_label)
+	nonzero_index = np.asarray(nonzero_index)
+
+	x_min, x_max = nonzero_index[0,:].min(), nonzero_index[0,:].max()
+	y_min, y_max = nonzero_index[1,:].min(), nonzero_index[1,:].max()
+	z_min, z_max = nonzero_index[2,:].min(), nonzero_index[2,:].max()
+
+	# print x_min, x_max
+	# print y_min, y_max
+	# print z_min, z_max
+
+	x_min = x_min - FLAGS.prepost_pad if x_min-FLAGS.prepost_pad>=0 else 0
+	y_min = y_min - FLAGS.prepost_pad if y_min-FLAGS.prepost_pad>=0 else 0
+	z_min = z_min - FLAGS.prepost_pad if z_min-FLAGS.prepost_pad>=0 else 0
+
+	x_max = x_max + FLAGS.prepost_pad if x_max+FLAGS.prepost_pad<=img_data.shape[0] else img_data.shape[0]
+	y_max = y_max + FLAGS.prepost_pad if y_max+FLAGS.prepost_pad<=img_data.shape[1] else img_data.shape[1]
+	z_max = z_max + FLAGS.prepost_pad if z_max+FLAGS.prepost_pad<=img_data.shape[2] else img_data.shape[2]
+
+
+	return (img_data[x_min:x_max, y_min:y_max, z_min:z_max], t2_data[x_min:x_max, y_min:y_max, z_min:z_max],
+					img_label[x_min:x_max, y_min:y_max, z_min:z_max])
+
+
+
+
+def generate_nifti_data():
+
+	for img_path, t2_path, label_path in get_nifti_path():
+		nifti_data, nifti_img = load_nifti(img_path)
+		t2_data, t2_img = load_nifti(t2_path)
+		nifti_label, _label = load_nifti(label_path)
+
+		img_id = img_path.split('/')[-2]
+
+		if len(nifti_data.shape)==3:
+			pass
+		elif len(nifti_data.shape)==4:
+			nifti_data = nifti_data[:,:,:,0]
+			t2_data = t2_data[:,:,:,0]
+			nifti_label = nifti_label[:,:,:,0]
+		
+
+		t1_data = np.asarray(nifti_data, np.int16)
+		t2_data = np.asarray(t2_data, np.int16)
+		
+		nifti_label = np.asarray(nifti_label, np.uint8)
+
+		nifti_label[nifti_label==10] = 1
+		nifti_label[nifti_label==150] = 2
+		nifti_label[nifti_label==250] = 3
+		croped_data, t2_data, croped_label = remove_backgrounds(t1_data,t2_data, nifti_label)
+		
+
+		t1_name = img_path.split('/')[-1].replace('.nii.gz', '')
+		t2_name = t2_path.split('/')[-1].replace('.nii.gz', '')
+		
+		
+		for _r in xrange(4):
+			for flip in [True, False]:
+				save_path = '%s/%s_r%d_f%d.h5' %(FLAGS.hdf5_dir, img_id, _r, flip)
+				print ('>> start to creat hdf5: %s' % (save_path,))
+				aug_data = rotate_flip(croped_data, r=_r, f_lf=flip )
+				aug_label = rotate_flip(croped_label, r=_r, f_lf=flip )
+				aug_t2_data = rotate_flip(t2_data,  r=_r, f_lf=flip)
+				
+				create_hdf5(aug_data,aug_t2_data, aug_label, save_path)
+
+				save_nifit_path = '%s/%s_r%d_f%d_data.nii' % (FLAGS.hdf5_dir, t1_name,_r, flip )
+				save_nifit_label_path = '%s/%s_r%d_f%d_label.nii' % (FLAGS.hdf5_dir, img_id, _r, flip)
+				t2_path = '%s/%s_r%d_f%d_data.nii' % (FLAGS.hdf5_dir, t2_name, _r, flip)
+
+		# break
+
+
+
+
+def generate_file_list():
+	# if os.pa
+	file_list = glob.glob('%s/*.h5' %(FLAGS.hdf5_dir,))
+	file_list.sort()
+	with open(FLAGS.hdf5_list_path, 'w') as _file:
+		for _file_path in file_list:
+			_file.write(_file_path)
+			_file.write('\n')
+
+
+	with open(FLAGS.hdf5_train_list_path, 'w') as _file:
+		for _file_path in file_list[8:]:
+			_file.write(_file_path)
+			_file.write('\n')
+
+	with open(FLAGS.hdf5_validation_list_path, 'w') as _file:
+		for _file_path in file_list[0:8]:
+			_file.write(_file_path)
+			_file.write('\n')
+
+	
+
+def main():
+	
+	generate_nifti_data()
+	generate_file_list()
+
+	
+
+	
+
+if __name__ == '__main__':
+	main()

+ 75 - 0
File/saver.py

@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import logging
+
+
+class InvalidSaveConfigsError(Exception):
+    def __init__(self, message, errors):
+        # Call the base class constructor with the parameters it needs
+        super(InvalidSaveConfigsError, self).__init__(message)
+
+
+class Saver(object):
+    """
+    get task from item queue, then save item to local storage
+    """
+
+    def __init__(self, save_configs=None):
+        self._save_configs = save_configs
+
+        if save_configs['type'] == 'file':
+            write_path = self._save_configs['path']
+            self._file_obj = open(write_path, 'w')
+        elif save_configs['type'] == 'db':
+            self._conn_pool = None
+
+        return
+
+    async def save(self, url, item):
+        """
+        try to save result according to save configs
+        """
+        try:
+            configs = self._save_configs
+            if configs['type'] == 'file':
+                await self.save_to_file(url, item, self._file_obj)
+            elif configs['type'] == 'db':
+                await self.save_to_db()
+            else:
+                raise InvalidSaveConfigsError
+        except InvalidSaveConfigsError:
+            logging.error("invalid save configs, update 'save' field in the config.json")
+        return
+
+
+    async def save_to_file(self, url, item, file_obj):
+        """
+        save the item to a file, must "try, except" and don't change the parameters and return
+        :return status:
+        """
+        logging.debug("%r start: url=%r", self.__class__.__name__, url)
+        status = 0
+
+        try:
+            write_path = self._save_configs['path']
+            item_str = ", ".join(str(x) for x in item)
+            line = url + ": " + item_str + '\n'
+            file_obj.write(line)
+
+        # TODO: substitute to more specific exception
+        except Exception as e:
+            status = -1
+            logging.error("%r error: %r, url=%r",
+                          self.__class__.__name__, e, url)
+
+        logging.debug("%r end: status=%r, url=%r",
+                      self.__class__.__name__, status, url)
+        return status
+
+
+    async def save_to_db(self):
+        """
+        save the item to a database
+        """
+        raise NotImplementedError

+ 154 - 0
File/transform_cuhk03.py

@@ -0,0 +1,154 @@
+"""Refactor file directories, save/rename images and partition the 
+train/val/test set, in order to support the unified dataset interface.
+"""
+
+from __future__ import print_function
+
+import sys
+sys.path.insert(0, '.')
+
+from zipfile import ZipFile
+import os.path as osp
+import sys
+import h5py
+from scipy.misc import imsave
+from itertools import chain
+
+from bpm.utils.utils import may_make_dir
+from bpm.utils.utils import load_pickle
+from bpm.utils.utils import save_pickle
+
+from bpm.utils.dataset_utils import partition_train_val_set
+from bpm.utils.dataset_utils import new_im_name_tmpl
+from bpm.utils.dataset_utils import parse_im_name
+
+
+def save_images(mat_file, save_dir, new_im_name_tmpl):
+  def deref(mat, ref):
+    return mat[ref][:].T
+
+  def dump(mat, refs, pid, cam, im_dir):
+    """Save the images of a person under one camera."""
+    for i, ref in enumerate(refs):
+      im = deref(mat, ref)
+      if im.size == 0 or im.ndim < 2: break
+      fname = new_im_name_tmpl.format(pid, cam, i)
+      imsave(osp.join(im_dir, fname), im)
+
+  mat = h5py.File(mat_file, 'r')
+  labeled_im_dir = osp.join(save_dir, 'labeled/images')
+  detected_im_dir = osp.join(save_dir, 'detected/images')
+  all_im_dir = osp.join(save_dir, 'all/images')
+
+  may_make_dir(labeled_im_dir)
+  may_make_dir(detected_im_dir)
+  may_make_dir(all_im_dir)
+
+  # loop through camera pairs
+  pid = 0
+  for labeled, detected in zip(mat['labeled'][0], mat['detected'][0]):
+    labeled, detected = deref(mat, labeled), deref(mat, detected)
+    assert labeled.shape == detected.shape
+    # loop through ids in a camera pair
+    for i in range(labeled.shape[0]):
+      # We don't care about whether different persons are under same cameras,
+      # we only care about the same person being under different cameras or not.
+      dump(mat, labeled[i, :5], pid, 0, labeled_im_dir)
+      dump(mat, labeled[i, 5:], pid, 1, labeled_im_dir)
+      dump(mat, detected[i, :5], pid, 0, detected_im_dir)
+      dump(mat, detected[i, 5:], pid, 1, detected_im_dir)
+      dump(mat, chain(detected[i, :5], labeled[i, :5]), pid, 0, all_im_dir)
+      dump(mat, chain(detected[i, 5:], labeled[i, 5:]), pid, 1, all_im_dir)
+      pid += 1
+      if pid % 100 == 0:
+        sys.stdout.write('\033[F\033[K')
+        print('Saving images {}/{}'.format(pid, 1467))
+
+
+def transform(zip_file, train_test_partition_file, save_dir=None):
+  """Save images and partition the train/val/test set.
+  """
+  print("Extracting zip file")
+  root = osp.dirname(osp.abspath(zip_file))
+  if save_dir is None:
+    save_dir = root
+  may_make_dir(save_dir)
+  with ZipFile(zip_file) as z:
+    z.extractall(path=save_dir)
+  print("Extracting zip file done")
+  mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')
+
+  save_images(mat_file, save_dir, new_im_name_tmpl)
+
+  if osp.exists(train_test_partition_file):
+    train_test_partition = load_pickle(train_test_partition_file)
+  else:
+    raise RuntimeError('Train/test partition file should be provided.')
+
+  for im_type in ['detected', 'labeled']:
+    trainval_im_names = train_test_partition[im_type]['train_im_names']
+    trainval_ids = list(set([parse_im_name(n, 'id')
+                             for n in trainval_im_names]))
+    # Sort ids, so that id-to-label mapping remains the same when running
+    # the code on different machines.
+    trainval_ids.sort()
+    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
+    train_val_partition = \
+      partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
+    train_im_names = train_val_partition['train_im_names']
+    train_ids = list(set([parse_im_name(n, 'id')
+                          for n in train_val_partition['train_im_names']]))
+    # Sort ids, so that id-to-label mapping remains the same when running
+    # the code on different machines.
+    train_ids.sort()
+    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))
+
+    # A mark is used to denote whether the image is from
+    #   query (mark == 0), or
+    #   gallery (mark == 1), or
+    #   multi query (mark == 2) set
+
+    val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
+                + [1, ] * len(train_val_partition['val_gallery_im_names'])
+    val_im_names = list(train_val_partition['val_query_im_names']) \
+                   + list(train_val_partition['val_gallery_im_names'])
+    test_im_names = list(train_test_partition[im_type]['query_im_names']) \
+                    + list(train_test_partition[im_type]['gallery_im_names'])
+    test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
+                 + [1, ] * len(
+      train_test_partition[im_type]['gallery_im_names'])
+    partitions = {'trainval_im_names': trainval_im_names,
+                  'trainval_ids2labels': trainval_ids2labels,
+                  'train_im_names': train_im_names,
+                  'train_ids2labels': train_ids2labels,
+                  'val_im_names': val_im_names,
+                  'val_marks': val_marks,
+                  'test_im_names': test_im_names,
+                  'test_marks': test_marks}
+    partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
+    save_pickle(partitions, partition_file)
+    print('Partition file for "{}" saved to {}'.format(im_type, partition_file))
+
+
+if __name__ == '__main__':
+  import argparse
+
+  parser = argparse.ArgumentParser(description="Transform CUHK03 Dataset")
+  parser.add_argument(
+    '--zip_file',
+    type=str,
+    default='~/Dataset/cuhk03/cuhk03_release.zip')
+  parser.add_argument(
+    '--save_dir',
+    type=str,
+    default='~/Dataset/cuhk03')
+  parser.add_argument(
+    '--train_test_partition_file',
+    type=str,
+    default='~/Dataset/cuhk03/re_ranking_train_test_split.pkl')
+  args = parser.parse_args()
+  zip_file = osp.abspath(osp.expanduser(args.zip_file))
+  train_test_partition_file = osp.abspath(osp.expanduser(
+    args.train_test_partition_file))
+  save_dir = osp.abspath(osp.expanduser(args.save_dir))
+  transform(zip_file, train_test_partition_file, save_dir)

+ 34 - 0
File/utils.py

@@ -0,0 +1,34 @@
+import torch
+import shutil
+import os
+
+
+def save_checkpoint(state, is_best, file_path, file_name='checkpoint.pth.tar'):
+    """
+    Saves the current state of the model. Does a copy of the file
+    in case the model performed better than previously.
+
+    Parameters:
+        state (dict): Includes optimizer and model state dictionaries.
+        is_best (bool): True if model is best performing model.
+        file_path (str): Path to save the file.
+        file_name (str): File name with extension (default: checkpoint.pth.tar).
+    """
+
+    save_path = os.path.join(file_path, file_name)
+    torch.save(state, save_path)
+    if is_best:
+        shutil.copyfile(save_path, os.path.join(file_path, 'model_best.pth.tar'))
+
+
+def save_task_checkpoint(file_path, task_num):
+    """
+    Saves the current state of the model for a given task by copying existing checkpoint created by the
+    save_checkpoint function.
+
+    Parameters:
+        file_path (str): Path to save the file,
+        task_num (int): Number of task increment.
+    """
+    save_path = os.path.join(file_path, 'checkpoint_task_' + str(task_num) + '.pth.tar')
+    shutil.copyfile(os.path.join(file_path, 'checkpoint.pth.tar'), save_path)

+ 37 - 0
Hash/03-HT-Get.py

@@ -0,0 +1,37 @@
+class HashTable:
+    def __init__(self, size = 7):
+        self.data_map = [None] * size
+      
+    def __hash(self, key):
+        my_hash = 0
+        for letter in key:
+            my_hash = (my_hash + ord(letter) * 23) % len(self.data_map)
+        return my_hash  
+
+    def print_table(self):
+        for i, val in enumerate(self.data_map): 
+            print(i, ": ", val)
+    
+    def set_item(self, key, value):
+        index = self.__hash(key)
+        if self.data_map[index] == None:
+            self.data_map[index] = []
+        self.data_map[index].append([key, value])
+    
+    def get_item(self, key):
+        index = self.__hash(key)
+        if self.data_map[index] is not None:
+            for i in range(len(self.data_map[index])):
+                if self.data_map[index][i][0] == key:
+                    return self.data_map[index][i][1]
+        return None
+             
+
+my_hash_table = HashTable()
+
+my_hash_table.set_item('bolts', 1400)
+my_hash_table.set_item('washers', 50)
+
+print(my_hash_table.get_item('bolts'))
+print(my_hash_table.get_item('washers'))
+print(my_hash_table.get_item('lumber'))

+ 99 - 0
Hash/GeneralHashFunctions.py

@@ -0,0 +1,99 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+def rs_hash(key):
+    a = 378551
+    b = 63689
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = hash_value * a + ord(key[i])
+        a = a * b
+    return hash_value
+
+
+def js_hash(key):
+    hash_value = 1315423911
+    for i in range(len(key)):
+        hash_value ^= ((hash_value << 5) + ord(key[i]) + (hash_value >> 2))
+    return hash_value
+
+
+def pjw_hash(key):
+    bits_in_unsigned_int = 4 * 8
+    three_quarters = (bits_in_unsigned_int * 3) / 4
+    one_eighth = bits_in_unsigned_int / 8
+    high_bits = 0xFFFFFFFF << int(bits_in_unsigned_int - one_eighth)
+    hash_value = 0
+    test = 0
+
+    for i in range(len(key)):
+        hash_value = (hash_value << int(one_eighth)) + ord(key[i])
+        test = hash_value & high_bits
+    if test != 0:
+        hash_value = ((hash_value ^ (test >> int(three_quarters))) & (~high_bits))
+    return hash_value & 0x7FFFFFFF
+
+
+def elf_hash(key):
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = (hash_value << 4) + ord(key[i])
+        x = hash_value & 0xF0000000
+        if x != 0:
+            hash_value ^= (x >> 24)
+        hash_value &= ~x
+    return hash_value
+
+
+def bkdr_hash(key):
+    seed = 131  # 31 131 1313 13131 131313 etc..
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = (hash_value * seed) + ord(key[i])
+    return hash_value
+
+
+def sdbm_hash(key):
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = ord(key[i]) + (hash_value << 6) + (hash_value << 16) - hash_value;
+    return hash_value
+
+
+def djb_hash(key):
+    hash_value = 5381
+    for i in range(len(key)):
+        hash_value = ((hash_value << 5) + hash_value) + ord(key[i])
+    return hash_value
+
+
+def dek_hash(key):
+    hash_value = len(key);
+    for i in range(len(key)):
+        hash_value = ((hash_value << 5) ^ (hash_value >> 27)) ^ ord(key[i])
+    return hash_value
+
+
+def bp_hash(key):
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = hash_value << 7 ^ ord(key[i])
+    return hash_value
+
+
+def fnv_hash(key):
+    fnv_prime = 0x811C9DC5
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value *= fnv_prime
+        hash_value ^= ord(key[i])
+    return hash_value
+
+
+def ap_hash(key):
+    hash_value = 0xAAAAAAAA
+    for i in range(len(key)):
+        if (i & 1) == 0:
+            hash_value ^= ((hash_value << 7) ^ ord(key[i]) * (hash_value >> 3))
+        else:
+            hash_value ^= (~((hash_value << 11) + ord(key[i]) ^ (hash_value >> 5)))
+    return hash_value

+ 226 - 0
Hash/Reducible.py

@@ -0,0 +1,226 @@
+#  File: Reducible.py
+
+#  Description: Input a list of words and find the largest sized word
+#  that can be reduced to smaller words. Then print all the words of
+#  this size in alphabetical order.
+
+#  Student Name: Anna Dougharty
+
+#  Student UT EID: amd5933
+
+#  Course Name: CS 313E
+
+#  Unique Number: 52600
+
+#  Date Created: 10/21/2021
+
+#  Date Last Modified: 10/21/2021
+
+import sys
+
+
+# Input: takes as input a positive integer n
+# Output: returns True if n is prime and False otherwise
+def is_prime(n):
+    if n == 1:
+        return False
+
+    limit = int(n ** 0.5) + 1
+    div = 2
+    while div < limit:
+        if n % div == 0:
+            return False
+        div += 1
+    return True
+
+
+# Input: takes as input a string in lower case and the size
+#        of the hash table
+# Output: returns the index the string will hash into
+def hash_word(s, size):
+    hash_idx = 0
+    for j in range(len(s)):
+        letter = ord(s[j]) - 96
+        hash_idx = (hash_idx * 26 + letter) % size
+    return hash_idx
+
+
+# Input: takes as input a string in lower case and the constant
+#        for double hashing
+# Output: returns the step size for that string
+
+# USE A SMALL PRIME NUMBER FOR CONSTANT
+
+def step_size(s, const):
+    return const - (hash_word(s, const))
+
+
+# Input: takes as input a string and a hash table
+# Output: no output; the function enters the string in the hash table,
+#         it resolves collisions by double hashing
+def insert_word(s, hash_table):
+    # find index of new word that string will hash into
+    word_idx = hash_word(s, len(hash_table))
+    # if this location in hash table is empty
+    if hash_table[word_idx] == '':
+        # hash word into table
+        hash_table[word_idx] = s
+    else:
+        # find step size for double hashing (if collision occurs)
+        step_val = step_size(s, 3)
+        # only increase index by step value if table at index is NOT empty
+        while hash_table[word_idx] != '':
+            word_idx = (word_idx + step_val) % len(hash_table)
+        # once an empty index has been found, hash word into the table
+        hash_table[word_idx] = s
+
+
+# Input: takes as input a string and a hash table
+# Output: returns True if the string is in the hash table
+#         and False otherwise
+def find_word(s, hash_table):
+    word_idx = hash_word(s, len(hash_table))
+    tracking_idx = word_idx
+    if hash_table[tracking_idx] == s:
+        return True
+    else:
+        # find step size for double hashing (if collision occurs)
+        step_val = step_size(s, 1)
+        count = 0
+        # increase by step size until s at index is found
+        while tracking_idx < len(hash_table) and hash_table[tracking_idx] != s and count < 3:
+            tracking_idx = (tracking_idx + step_val) % (len(hash_table))
+            if tracking_idx == word_idx:
+                return False
+            count += 1
+        if tracking_idx >= len(hash_table):
+            return False
+    # return T/F if word is in hash table at tracking index
+    return s == hash_table[tracking_idx]
+
+
+# Input: string s, a hash table, and a hash_memo
+#        recursively finds if the string is reducible
+# Output: if the string is reducible it enters it into the hash memo
+#         and returns True and False otherwise
+
+# LOOK INTO MEMOIZATION FOR THIS
+# Avoiding extra recursions in is_reducible by
+# checking for cases where it is impossible for
+# the word to be reducible
+
+def is_reducible(s, hash_table, hash_memo):
+    # if the word 's' is the letter 'a' or 'i' or 'o',
+    # then it has reached its final form
+    if s == 'a' or s == 'i' or s == 'o':
+        return True
+    # if the word is found in the hash memo, then it is
+    # by definition already reducible
+    elif find_word(s, hash_memo):
+        return True
+    # if it still contains at least one of the three letters
+    # 'a', 'i', or 'o', then it has potential to be reducible
+    elif 'a' in s or 'i' in s or 'o' in s:
+        # if word cannot be found
+        if find_word(s, hash_table) is False:
+            return False
+        # check each of the smaller words
+        for i in range(len(s)):
+            new_word = s[:i] + s[i + 1:]
+            # recursively call for each sub-word
+            if is_reducible(new_word, hash_table, hash_memo):
+                insert_word(new_word, hash_memo)
+                return True
+    # if the word fails all criteria above then it is not reducible
+    else:
+        return False
+
+
+# Input: string_list a list of words
+# Output: returns a list of words that have the maximum length
+def get_longest_words(string_list):
+    max_word_length = 0
+    # increase max length until largest size is found
+    for word in string_list:
+        if len(word) > max_word_length:
+            max_word_length = len(word)
+
+    # append words of max size to the list
+    max_word_list = []
+    for word in string_list:
+        if len(word) == max_word_length:
+            max_word_list.append(word)
+
+    # sort list alphabetically
+    max_word_list.sort()
+    return max_word_list
+
+
+def main():
+    # create an empty word_list
+    word_list = []
+
+    # read words from words.txt and append to word_list
+    for line in sys.stdin:
+        line = line.strip()
+        word_list.append(line)
+
+    # find length of word_list
+    length_words = len(word_list)
+
+    # determine prime number N that is greater than twice
+    # the length of the word_list
+    prime_num = (length_words * 2) + 1
+    while is_prime(prime_num) is False:
+        prime_num += 1
+
+    # create an empty hash_list
+    hash_table = []
+
+    # populate the hash_list with N blank strings
+    for i in range(prime_num):
+        hash_table.append('')
+
+    # hash each word in word_list into hash_list
+    # for collisions use double hashing
+    for word in word_list:
+        insert_word(word, hash_table)
+
+    # create an empty hash_memo of size M
+    # we do not know a priori how many words will be reducible
+    # let us assume it is 10 percent (fairly safe) of the words
+    # then M is a prime number that is slightly greater than
+    # 0.2 * size of word_list
+    hash_memo = []
+    hash_memo_size = int((len(word_list) * 0.2) + 1)
+    while is_prime(hash_memo_size) is False:
+        hash_memo_size += 1
+
+    # populate the hash_memo with M blank strings
+    for i in range(hash_memo_size):
+        hash_memo.append('')
+
+    # create an empty list reducible_words
+    reducible_word_list = []
+
+    # for each word in the word_list recursively determine
+    # if it is reducible, if it is, add it to reducible_words
+    # as you recursively remove one letter at a time check
+    # first if the sub-word exists in the hash_memo. if it does
+    # then the word is reducible and you do not have to test
+    # any further. add the word to the hash_memo.
+    for word in word_list:
+        if is_reducible(word, hash_table, hash_memo):
+            reducible_word_list.append(word)
+
+    # find the largest reducible words in reducible_words
+    reducible_word_largest = get_longest_words(reducible_word_list)
+
+    # print the reducible words in alphabetical order
+    # one word per line
+    for word in reducible_word_largest:
+        print(word)
+
+
+if __name__ == "__main__":
+    main()

+ 42 - 0
Hash/bignum.py

@@ -0,0 +1,42 @@
+#
+# These tests require gmpy and test the limits of the 32-bit build. The
+# limits of the 64-bit build are so large that they cannot be tested
+# on accessible hardware.
+#
+
+import sys
+from decimal import *
+from gmpy import mpz
+
+
+_PyHASH_MODULUS = sys.hash_info.modulus
+# hash values to use for positive and negative infinities, and nans
+_PyHASH_INF = sys.hash_info.inf
+_PyHASH_NAN = sys.hash_info.nan
+
+# _PyHASH_10INV is the inverse of 10 modulo the prime _PyHASH_MODULUS
+_PyHASH_10INV = pow(10, _PyHASH_MODULUS - 2, _PyHASH_MODULUS)
+
+def xhash(coeff, exp):
+    sign = 1
+    if coeff < 0:
+        sign = -1
+        coeff = -coeff
+    if exp >= 0:
+        exp_hash = pow(10, exp, _PyHASH_MODULUS)
+    else:
+        exp_hash = pow(_PyHASH_10INV, -exp, _PyHASH_MODULUS)
+    hash_ = coeff * exp_hash % _PyHASH_MODULUS
+    ans = hash_ if sign == 1 else -hash_
+    return -2 if ans == -1 else ans
+
+
+x = mpz(10) ** 425000000 - 1
+coeff = int(x)
+
+d = Decimal('9' * 425000000 + 'e-849999999')
+
+h1 = xhash(coeff, -849999999)
+h2 = hash(d)
+
+assert h2 == h1

+ 0 - 0
Hash/common_substring.py


+ 70 - 0
Hash/crypto_hash.py

@@ -0,0 +1,70 @@
+# Copyright 2013 Donald Stufft and individual contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+from nacl import exceptions as exc
+from nacl._sodium import ffi, lib
+from nacl.exceptions import ensure
+
+
+# crypto_hash_BYTES = lib.crypto_hash_bytes()
+crypto_hash_BYTES = lib.crypto_hash_sha512_bytes()
+crypto_hash_sha256_BYTES = lib.crypto_hash_sha256_bytes()
+crypto_hash_sha512_BYTES = lib.crypto_hash_sha512_bytes()
+
+
+def crypto_hash(message):
+    """
+    Hashes and returns the message ``message``.
+
+    :param message: bytes
+    :rtype: bytes
+    """
+    digest = ffi.new("unsigned char[]", crypto_hash_BYTES)
+    rc = lib.crypto_hash(digest, message, len(message))
+    ensure(rc == 0,
+           'Unexpected library error',
+           raising=exc.RuntimeError)
+    return ffi.buffer(digest, crypto_hash_BYTES)[:]
+
+
+def crypto_hash_sha256(message):
+    """
+    Hashes and returns the message ``message``.
+
+    :param message: bytes
+    :rtype: bytes
+    """
+    digest = ffi.new("unsigned char[]", crypto_hash_sha256_BYTES)
+    rc = lib.crypto_hash_sha256(digest, message, len(message))
+    ensure(rc == 0,
+           'Unexpected library error',
+           raising=exc.RuntimeError)
+    return ffi.buffer(digest, crypto_hash_sha256_BYTES)[:]
+
+
+def crypto_hash_sha512(message):
+    """
+    Hashes and returns the message ``message``.
+
+    :param message: bytes
+    :rtype: bytes
+    """
+    digest = ffi.new("unsigned char[]", crypto_hash_sha512_BYTES)
+    rc = lib.crypto_hash_sha512(digest, message, len(message))
+    ensure(rc == 0,
+           'Unexpected library error',
+           raising=exc.RuntimeError)
+    return ffi.buffer(digest, crypto_hash_sha512_BYTES)[:]

+ 607 - 0
Hash/des_crypt.py

@@ -0,0 +1,607 @@
+"""passlib.handlers.des_crypt - traditional unix (DES) crypt and variants"""
+#=============================================================================
+# imports
+#=============================================================================
+# core
+import re
+import logging; log = logging.getLogger(__name__)
+from warnings import warn
+# site
+# pkg
+from passlib.utils import safe_crypt, test_crypt, to_unicode
+from passlib.utils.binary import h64, h64big
+from passlib.utils.compat import byte_elem_value, u, uascii_to_str, unicode, suppress_cause
+from passlib.crypto.des import des_encrypt_int_block
+import passlib.utils.handlers as uh
+# local
+__all__ = [
+    "des_crypt",
+    "bsdi_crypt",
+    "bigcrypt",
+    "crypt16",
+]
+
+#=============================================================================
+# pure-python backend for des_crypt family
+#=============================================================================
+_BNULL = b'\x00'
+
+def _crypt_secret_to_key(secret):
+    """convert secret to 64-bit DES key.
+
+    this only uses the first 8 bytes of the secret,
+    and discards the high 8th bit of each byte at that.
+    a null parity bit is inserted after every 7th bit of the output.
+    """
+    # NOTE: this would set the parity bits correctly,
+    #       but des_encrypt_int_block() would just ignore them...
+    ##return sum(expand_7bit(byte_elem_value(c) & 0x7f) << (56-i*8)
+    ##           for i, c in enumerate(secret[:8]))
+    return sum((byte_elem_value(c) & 0x7f) << (57-i*8)
+               for i, c in enumerate(secret[:8]))
+
+def _raw_des_crypt(secret, salt):
+    """pure-python backed for des_crypt"""
+    assert len(salt) == 2
+
+    # NOTE: some OSes will accept non-HASH64 characters in the salt,
+    #       but what value they assign these characters varies wildy,
+    #       so just rejecting them outright.
+    #       the same goes for single-character salts...
+    #       some OSes duplicate the char, some insert a '.' char,
+    #       and openbsd does (something) which creates an invalid hash.
+    salt_value = h64.decode_int12(salt)
+
+    # gotta do something - no official policy since this predates unicode
+    if isinstance(secret, unicode):
+        secret = secret.encode("utf-8")
+    assert isinstance(secret, bytes)
+
+    # forbidding NULL char because underlying crypt() rejects them too.
+    if _BNULL in secret:
+        raise uh.exc.NullPasswordError(des_crypt)
+
+    # convert first 8 bytes of secret string into an integer
+    key_value = _crypt_secret_to_key(secret)
+
+    # run data through des using input of 0
+    result = des_encrypt_int_block(key_value, 0, salt_value, 25)
+
+    # run h64 encode on result
+    return h64big.encode_int64(result)
+
+def _bsdi_secret_to_key(secret):
+    """convert secret to DES key used by bsdi_crypt"""
+    key_value = _crypt_secret_to_key(secret)
+    idx = 8
+    end = len(secret)
+    while idx < end:
+        next = idx + 8
+        tmp_value = _crypt_secret_to_key(secret[idx:next])
+        key_value = des_encrypt_int_block(key_value, key_value) ^ tmp_value
+        idx = next
+    return key_value
+
+def _raw_bsdi_crypt(secret, rounds, salt):
+    """pure-python backend for bsdi_crypt"""
+
+    # decode salt
+    salt_value = h64.decode_int24(salt)
+
+    # gotta do something - no official policy since this predates unicode
+    if isinstance(secret, unicode):
+        secret = secret.encode("utf-8")
+    assert isinstance(secret, bytes)
+
+    # forbidding NULL char because underlying crypt() rejects them too.
+    if _BNULL in secret:
+        raise uh.exc.NullPasswordError(bsdi_crypt)
+
+    # convert secret string into an integer
+    key_value = _bsdi_secret_to_key(secret)
+
+    # run data through des using input of 0
+    result = des_encrypt_int_block(key_value, 0, salt_value, rounds)
+
+    # run h64 encode on result
+    return h64big.encode_int64(result)
+
+#=============================================================================
+# handlers
+#=============================================================================
+class des_crypt(uh.TruncateMixin, uh.HasManyBackends, uh.HasSalt, uh.GenericHandler):
+    """This class implements the des-crypt password hash, and follows the :ref:`password-hash-api`.
+
+    It supports a fixed-length salt.
+
+    The :meth:`~passlib.ifc.PasswordHash.using` method accepts the following optional keywords:
+
+    :type salt: str
+    :param salt:
+        Optional salt string.
+        If not specified, one will be autogenerated (this is recommended).
+        If specified, it must be 2 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
+
+    :param bool truncate_error:
+        By default, des_crypt will silently truncate passwords larger than 8 bytes.
+        Setting ``truncate_error=True`` will cause :meth:`~passlib.ifc.PasswordHash.hash`
+        to raise a :exc:`~passlib.exc.PasswordTruncateError` instead.
+
+        .. versionadded:: 1.7
+
+    :type relaxed: bool
+    :param relaxed:
+        By default, providing an invalid value for one of the other
+        keywords will result in a :exc:`ValueError`. If ``relaxed=True``,
+        and the error can be corrected, a :exc:`~passlib.exc.PasslibHashWarning`
+        will be issued instead. Correctable errors include
+        ``salt`` strings that are too long.
+
+        .. versionadded:: 1.6
+    """
+    #===================================================================
+    # class attrs
+    #===================================================================
+
+    #--------------------
+    # PasswordHash
+    #--------------------
+    name = "des_crypt"
+    setting_kwds = ("salt", "truncate_error")
+
+    #--------------------
+    # GenericHandler
+    #--------------------
+    checksum_chars = uh.HASH64_CHARS
+    checksum_size = 11
+
+    #--------------------
+    # HasSalt
+    #--------------------
+    min_salt_size = max_salt_size = 2
+    salt_chars = uh.HASH64_CHARS
+
+    #--------------------
+    # TruncateMixin
+    #--------------------
+    truncate_size = 8
+
+    #===================================================================
+    # formatting
+    #===================================================================
+    # FORMAT: 2 chars of H64-encoded salt + 11 chars of H64-encoded checksum
+
+    _hash_regex = re.compile(u(r"""
+        ^
+        (?P<salt>[./a-z0-9]{2})
+        (?P<chk>[./a-z0-9]{11})?
+        $"""), re.X|re.I)
+
+    @classmethod
+    def from_string(cls, hash):
+        hash = to_unicode(hash, "ascii", "hash")
+        salt, chk = hash[:2], hash[2:]
+        return cls(salt=salt, checksum=chk or None)
+
+    def to_string(self):
+        hash = u("%s%s") % (self.salt, self.checksum)
+        return uascii_to_str(hash)
+
+    #===================================================================
+    # digest calculation
+    #===================================================================
+    def _calc_checksum(self, secret):
+        # check for truncation (during .hash() calls only)
+        if self.use_defaults:
+            self._check_truncate_policy(secret)
+
+        return self._calc_checksum_backend(secret)
+
+    #===================================================================
+    # backend
+    #===================================================================
+    backends = ("os_crypt", "builtin")
+
+    #---------------------------------------------------------------
+    # os_crypt backend
+    #---------------------------------------------------------------
+    @classmethod
+    def _load_backend_os_crypt(cls):
+        if test_crypt("test", 'abgOeLfPimXQo'):
+            cls._set_calc_checksum_backend(cls._calc_checksum_os_crypt)
+            return True
+        else:
+            return False
+
+    def _calc_checksum_os_crypt(self, secret):
+        # NOTE: we let safe_crypt() encode unicode secret -> utf8;
+        #       no official policy since des-crypt predates unicode
+        hash = safe_crypt(secret, self.salt)
+        if hash:
+            assert hash.startswith(self.salt) and len(hash) == 13
+            return hash[2:]
+        else:
+            # py3's crypt.crypt() can't handle non-utf8 bytes.
+            # fallback to builtin alg, which is always available.
+            return self._calc_checksum_builtin(secret)
+
+    #---------------------------------------------------------------
+    # builtin backend
+    #---------------------------------------------------------------
+    @classmethod
+    def _load_backend_builtin(cls):
+        cls._set_calc_checksum_backend(cls._calc_checksum_builtin)
+        return True
+
+    def _calc_checksum_builtin(self, secret):
+        return _raw_des_crypt(secret, self.salt.encode("ascii")).decode("ascii")
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+class bsdi_crypt(uh.HasManyBackends, uh.HasRounds, uh.HasSalt, uh.GenericHandler):
+    """This class implements the BSDi-Crypt password hash, and follows the :ref:`password-hash-api`.
+
+    It supports a fixed-length salt, and a variable number of rounds.
+
+    The :meth:`~passlib.ifc.PasswordHash.using` method accepts the following optional keywords:
+
+    :type salt: str
+    :param salt:
+        Optional salt string.
+        If not specified, one will be autogenerated (this is recommended).
+        If specified, it must be 4 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
+
+    :type rounds: int
+    :param rounds:
+        Optional number of rounds to use.
+        Defaults to 5001, must be between 1 and 16777215, inclusive.
+
+    :type relaxed: bool
+    :param relaxed:
+        By default, providing an invalid value for one of the other
+        keywords will result in a :exc:`ValueError`. If ``relaxed=True``,
+        and the error can be corrected, a :exc:`~passlib.exc.PasslibHashWarning`
+        will be issued instead. Correctable errors include ``rounds``
+        that are too small or too large, and ``salt`` strings that are too long.
+
+        .. versionadded:: 1.6
+
+    .. versionchanged:: 1.6
+        :meth:`hash` will now issue a warning if an even number of rounds is used
+        (see :ref:`bsdi-crypt-security-issues` regarding weak DES keys).
+    """
+    #===================================================================
+    # class attrs
+    #===================================================================
+    #--GenericHandler--
+    name = "bsdi_crypt"
+    setting_kwds = ("salt", "rounds")
+    checksum_size = 11
+    checksum_chars = uh.HASH64_CHARS
+
+    #--HasSalt--
+    min_salt_size = max_salt_size = 4
+    salt_chars = uh.HASH64_CHARS
+
+    #--HasRounds--
+    default_rounds = 5001
+    min_rounds = 1
+    max_rounds = 16777215 # (1<<24)-1
+    rounds_cost = "linear"
+
+    # NOTE: OpenBSD login.conf reports 7250 as minimum allowed rounds,
+    # but that seems to be an OS policy, not a algorithm limitation.
+
+    #===================================================================
+    # parsing
+    #===================================================================
+    _hash_regex = re.compile(u(r"""
+        ^
+        _
+        (?P<rounds>[./a-z0-9]{4})
+        (?P<salt>[./a-z0-9]{4})
+        (?P<chk>[./a-z0-9]{11})?
+        $"""), re.X|re.I)
+
+    @classmethod
+    def from_string(cls, hash):
+        hash = to_unicode(hash, "ascii", "hash")
+        m = cls._hash_regex.match(hash)
+        if not m:
+            raise uh.exc.InvalidHashError(cls)
+        rounds, salt, chk = m.group("rounds", "salt", "chk")
+        return cls(
+            rounds=h64.decode_int24(rounds.encode("ascii")),
+            salt=salt,
+            checksum=chk,
+        )
+
+    def to_string(self):
+        hash = u("_%s%s%s") % (h64.encode_int24(self.rounds).decode("ascii"),
+                               self.salt, self.checksum)
+        return uascii_to_str(hash)
+
+    #===================================================================
+    # validation
+    #===================================================================
+
+    # NOTE: keeping this flag for admin/choose_rounds.py script.
+    #       want to eventually expose rounds logic to that script in better way.
+    _avoid_even_rounds = True
+
+    @classmethod
+    def using(cls, **kwds):
+        subcls = super(bsdi_crypt, cls).using(**kwds)
+        if not subcls.default_rounds & 1:
+            # issue warning if caller set an even 'rounds' value.
+            warn("bsdi_crypt rounds should be odd, as even rounds may reveal weak DES keys",
+                 uh.exc.PasslibSecurityWarning)
+        return subcls
+
+    @classmethod
+    def _generate_rounds(cls):
+        rounds = super(bsdi_crypt, cls)._generate_rounds()
+        # ensure autogenerated rounds are always odd
+        # NOTE: doing this even for default_rounds so needs_update() doesn't get
+        #       caught in a loop.
+        # FIXME: this technically might generate a rounds value 1 larger
+        # than the requested upper bound - but better to err on side of safety.
+        return rounds|1
+
+    #===================================================================
+    # migration
+    #===================================================================
+
+    def _calc_needs_update(self, **kwds):
+        # mark bsdi_crypt hashes as deprecated if they have even rounds.
+        if not self.rounds & 1:
+            return True
+        # hand off to base implementation
+        return super(bsdi_crypt, self)._calc_needs_update(**kwds)
+
+    #===================================================================
+    # backends
+    #===================================================================
+    backends = ("os_crypt", "builtin")
+
+    #---------------------------------------------------------------
+    # os_crypt backend
+    #---------------------------------------------------------------
+    @classmethod
+    def _load_backend_os_crypt(cls):
+        if test_crypt("test", '_/...lLDAxARksGCHin.'):
+            cls._set_calc_checksum_backend(cls._calc_checksum_os_crypt)
+            return True
+        else:
+            return False
+
+    def _calc_checksum_os_crypt(self, secret):
+        config = self.to_string()
+        hash = safe_crypt(secret, config)
+        if hash:
+            assert hash.startswith(config[:9]) and len(hash) == 20
+            return hash[-11:]
+        else:
+            # py3's crypt.crypt() can't handle non-utf8 bytes.
+            # fallback to builtin alg, which is always available.
+            return self._calc_checksum_builtin(secret)
+
+    #---------------------------------------------------------------
+    # builtin backend
+    #---------------------------------------------------------------
+    @classmethod
+    def _load_backend_builtin(cls):
+        cls._set_calc_checksum_backend(cls._calc_checksum_builtin)
+        return True
+
+    def _calc_checksum_builtin(self, secret):
+        return _raw_bsdi_crypt(secret, self.rounds, self.salt.encode("ascii")).decode("ascii")
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+class bigcrypt(uh.HasSalt, uh.GenericHandler):
+    """This class implements the BigCrypt password hash, and follows the :ref:`password-hash-api`.
+
+    It supports a fixed-length salt.
+
+    The :meth:`~passlib.ifc.PasswordHash.using` method accepts the following optional keywords:
+
+    :type salt: str
+    :param salt:
+        Optional salt string.
+        If not specified, one will be autogenerated (this is recommended).
+        If specified, it must be 22 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
+
+    :type relaxed: bool
+    :param relaxed:
+        By default, providing an invalid value for one of the other
+        keywords will result in a :exc:`ValueError`. If ``relaxed=True``,
+        and the error can be corrected, a :exc:`~passlib.exc.PasslibHashWarning`
+        will be issued instead. Correctable errors include
+        ``salt`` strings that are too long.
+
+        .. versionadded:: 1.6
+    """
+    #===================================================================
+    # class attrs
+    #===================================================================
+    #--GenericHandler--
+    name = "bigcrypt"
+    setting_kwds = ("salt",)
+    checksum_chars = uh.HASH64_CHARS
+    # NOTE: checksum chars must be multiple of 11
+
+    #--HasSalt--
+    min_salt_size = max_salt_size = 2
+    salt_chars = uh.HASH64_CHARS
+
+    #===================================================================
+    # internal helpers
+    #===================================================================
+    _hash_regex = re.compile(u(r"""
+        ^
+        (?P<salt>[./a-z0-9]{2})
+        (?P<chk>([./a-z0-9]{11})+)?
+        $"""), re.X|re.I)
+
+    @classmethod
+    def from_string(cls, hash):
+        hash = to_unicode(hash, "ascii", "hash")
+        m = cls._hash_regex.match(hash)
+        if not m:
+            raise uh.exc.InvalidHashError(cls)
+        salt, chk = m.group("salt", "chk")
+        return cls(salt=salt, checksum=chk)
+
+    def to_string(self):
+        hash = u("%s%s") % (self.salt, self.checksum)
+        return uascii_to_str(hash)
+
+    def _norm_checksum(self, checksum, relaxed=False):
+        checksum = super(bigcrypt, self)._norm_checksum(checksum, relaxed=relaxed)
+        if len(checksum) % 11:
+            raise uh.exc.InvalidHashError(self)
+        return checksum
+
+    #===================================================================
+    # backend
+    #===================================================================
+    def _calc_checksum(self, secret):
+        if isinstance(secret, unicode):
+            secret = secret.encode("utf-8")
+        chk = _raw_des_crypt(secret, self.salt.encode("ascii"))
+        idx = 8
+        end = len(secret)
+        while idx < end:
+            next = idx + 8
+            chk += _raw_des_crypt(secret[idx:next], chk[-11:-9])
+            idx = next
+        return chk.decode("ascii")
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+class crypt16(uh.TruncateMixin, uh.HasSalt, uh.GenericHandler):
+    """This class implements the crypt16 password hash, and follows the :ref:`password-hash-api`.
+
+    It supports a fixed-length salt.
+
+    The :meth:`~passlib.ifc.PasswordHash.using` method accepts the following optional keywords:
+
+    :type salt: str
+    :param salt:
+        Optional salt string.
+        If not specified, one will be autogenerated (this is recommended).
+        If specified, it must be 2 characters, drawn from the regexp range ``[./0-9A-Za-z]``.
+
+    :param bool truncate_error:
+        By default, crypt16 will silently truncate passwords larger than 16 bytes.
+        Setting ``truncate_error=True`` will cause :meth:`~passlib.ifc.PasswordHash.hash`
+        to raise a :exc:`~passlib.exc.PasswordTruncateError` instead.
+
+        .. versionadded:: 1.7
+
+    :type relaxed: bool
+    :param relaxed:
+        By default, providing an invalid value for one of the other
+        keywords will result in a :exc:`ValueError`. If ``relaxed=True``,
+        and the error can be corrected, a :exc:`~passlib.exc.PasslibHashWarning`
+        will be issued instead. Correctable errors include
+        ``salt`` strings that are too long.
+
+        .. versionadded:: 1.6
+    """
+    #===================================================================
+    # class attrs
+    #===================================================================
+
+    #--------------------
+    # PasswordHash
+    #--------------------
+    name = "crypt16"
+    setting_kwds = ("salt", "truncate_error")
+
+    #--------------------
+    # GenericHandler
+    #--------------------
+    checksum_size = 22
+    checksum_chars = uh.HASH64_CHARS
+
+    #--------------------
+    # HasSalt
+    #--------------------
+    min_salt_size = max_salt_size = 2
+    salt_chars = uh.HASH64_CHARS
+
+    #--------------------
+    # TruncateMixin
+    #--------------------
+    truncate_size = 16
+
+    #===================================================================
+    # internal helpers
+    #===================================================================
+    _hash_regex = re.compile(u(r"""
+        ^
+        (?P<salt>[./a-z0-9]{2})
+        (?P<chk>[./a-z0-9]{22})?
+        $"""), re.X|re.I)
+
+    @classmethod
+    def from_string(cls, hash):
+        hash = to_unicode(hash, "ascii", "hash")
+        m = cls._hash_regex.match(hash)
+        if not m:
+            raise uh.exc.InvalidHashError(cls)
+        salt, chk = m.group("salt", "chk")
+        return cls(salt=salt, checksum=chk)
+
+    def to_string(self):
+        hash = u("%s%s") % (self.salt, self.checksum)
+        return uascii_to_str(hash)
+
+    #===================================================================
+    # backend
+    #===================================================================
+    def _calc_checksum(self, secret):
+        if isinstance(secret, unicode):
+            secret = secret.encode("utf-8")
+
+        # check for truncation (during .hash() calls only)
+        if self.use_defaults:
+            self._check_truncate_policy(secret)
+
+        # parse salt value
+        try:
+            salt_value = h64.decode_int12(self.salt.encode("ascii"))
+        except ValueError: # pragma: no cover - caught by class
+            raise suppress_cause(ValueError("invalid chars in salt"))
+
+        # convert first 8 byts of secret string into an integer,
+        key1 = _crypt_secret_to_key(secret)
+
+        # run data through des using input of 0
+        result1 = des_encrypt_int_block(key1, 0, salt_value, 20)
+
+        # convert next 8 bytes of secret string into integer (key=0 if secret < 8 chars)
+        key2 = _crypt_secret_to_key(secret[8:16])
+
+        # run data through des using input of 0
+        result2 = des_encrypt_int_block(key2, 0, salt_value, 5)
+
+        # done
+        chk = h64big.encode_int64(result1) + h64big.encode_int64(result2)
+        return chk.decode("ascii")
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+#=============================================================================
+# eof
+#=============================================================================

+ 146 - 0
Hash/digests.py

@@ -0,0 +1,146 @@
+"""passlib.handlers.digests - plain hash digests
+"""
+#=============================================================================
+# imports
+#=============================================================================
+# core
+import hashlib
+import logging; log = logging.getLogger(__name__)
+# site
+# pkg
+from passlib.utils import to_native_str, to_bytes, render_bytes, consteq
+from passlib.utils.compat import unicode, str_to_uascii
+import passlib.utils.handlers as uh
+from passlib.crypto.digest import lookup_hash
+# local
+__all__ = [
+    "create_hex_hash",
+    "hex_md4",
+    "hex_md5",
+    "hex_sha1",
+    "hex_sha256",
+    "hex_sha512",
+]
+
+#=============================================================================
+# helpers for hexadecimal hashes
+#=============================================================================
+class HexDigestHash(uh.StaticHandler):
+    """this provides a template for supporting passwords stored as plain hexadecimal hashes"""
+    #===================================================================
+    # class attrs
+    #===================================================================
+    _hash_func = None # hash function to use - filled in by create_hex_hash()
+    checksum_size = None # filled in by create_hex_hash()
+    checksum_chars = uh.HEX_CHARS
+
+    #===================================================================
+    # methods
+    #===================================================================
+    @classmethod
+    def _norm_hash(cls, hash):
+        return hash.lower()
+
+    def _calc_checksum(self, secret):
+        if isinstance(secret, unicode):
+            secret = secret.encode("utf-8")
+        return str_to_uascii(self._hash_func(secret).hexdigest())
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+def create_hex_hash(digest, module=__name__):
+    # NOTE: could set digest_name=hash.name for cpython, but not for some other platforms.
+    info = lookup_hash(digest)
+    name = "hex_" + info.name
+    return type(name, (HexDigestHash,), dict(
+        name=name,
+        __module__=module, # so ABCMeta won't clobber it
+        _hash_func=staticmethod(info.const), # sometimes it's a function, sometimes not. so wrap it.
+        checksum_size=info.digest_size*2,
+        __doc__="""This class implements a plain hexadecimal %s hash, and follows the :ref:`password-hash-api`.
+
+It supports no optional or contextual keywords.
+""" % (info.name,)
+    ))
+
+#=============================================================================
+# predefined handlers
+#=============================================================================
+hex_md4     = create_hex_hash("md4")
+hex_md5     = create_hex_hash("md5")
+hex_md5.django_name = "unsalted_md5"
+hex_sha1    = create_hex_hash("sha1")
+hex_sha256  = create_hex_hash("sha256")
+hex_sha512  = create_hex_hash("sha512")
+
+#=============================================================================
+# htdigest
+#=============================================================================
+class htdigest(uh.MinimalHandler):
+    """htdigest hash function.
+
+    .. todo::
+        document this hash
+    """
+    name = "htdigest"
+    setting_kwds = ()
+    context_kwds = ("user", "realm", "encoding")
+    default_encoding = "utf-8"
+
+    @classmethod
+    def hash(cls, secret, user, realm, encoding=None):
+        # NOTE: this was deliberately written so that raw bytes are passed through
+        # unchanged, the encoding kwd is only used to handle unicode values.
+        if not encoding:
+            encoding = cls.default_encoding
+        uh.validate_secret(secret)
+        if isinstance(secret, unicode):
+            secret = secret.encode(encoding)
+        user = to_bytes(user, encoding, "user")
+        realm = to_bytes(realm, encoding, "realm")
+        data = render_bytes("%s:%s:%s", user, realm, secret)
+        return hashlib.md5(data).hexdigest()
+
+    @classmethod
+    def _norm_hash(cls, hash):
+        """normalize hash to native string, and validate it"""
+        hash = to_native_str(hash, param="hash")
+        if len(hash) != 32:
+            raise uh.exc.MalformedHashError(cls, "wrong size")
+        for char in hash:
+            if char not in uh.LC_HEX_CHARS:
+                raise uh.exc.MalformedHashError(cls, "invalid chars in hash")
+        return hash
+
+    @classmethod
+    def verify(cls, secret, hash, user, realm, encoding="utf-8"):
+        hash = cls._norm_hash(hash)
+        other = cls.hash(secret, user, realm, encoding)
+        return consteq(hash, other)
+
+    @classmethod
+    def identify(cls, hash):
+        try:
+            cls._norm_hash(hash)
+        except ValueError:
+            return False
+        return True
+
+    @uh.deprecated_method(deprecated="1.7", removed="2.0")
+    @classmethod
+    def genconfig(cls):
+        return cls.hash("", "", "")
+
+    @uh.deprecated_method(deprecated="1.7", removed="2.0")
+    @classmethod
+    def genhash(cls, secret, config, user, realm, encoding=None):
+        # NOTE: 'config' is ignored, as this hash has no salting / other configuration.
+        #       just have to make sure it's valid.
+        cls._norm_hash(config)
+        return cls.hash(secret, user, realm, encoding)
+
+#=============================================================================
+# eof
+#=============================================================================

+ 157 - 0
Hash/generate_hash_macro.py

@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# Copyright 2020 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Generates a C macro for the PW tokenizer 65599 fixed length hash."""
+
+from __future__ import print_function
+
+import datetime
+import os
+
+HASH_CONSTANT = 65599
+HASH_NAME = 'pw_tokenizer_65599_fixed_length'
+HASH_LENGTHS = 80, 96, 128
+
+FILE_HEADER = """\
+// Copyright {year} The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+// AUTOGENERATED - DO NOT EDIT
+//
+// This file was generated by {script}.
+// To make changes, update the script and run it to regenerate the files.
+#pragma once
+
+#include <stdint.h>
+
+// {hash_length}-character version of the tokenizer hash function.
+//
+// The argument must be a string literal. It is concatenated with "" to ensure
+// that this is the case.
+//
+// clang-format off
+
+"""
+
+
+def generate_pw_tokenizer_65599_fixed_length_hash_macro(hash_length):
+    """Generate macro that hashes a string literal using a modified x65599 hash.
+
+    The macros generated by this function only operate on string literals.
+
+    Since macros can only operate on fixed-length strings, the hash macro only
+    hashes up to a fixed length, and characters beyond that length are ignored.
+    To eliminate some collisions, the length of the string is hashed as if it
+    were the first character.
+
+    This hash is calculated with the following equation, where s is the string
+    and k is the maximum hash length:
+
+       H(s, k) = len(s) + 65599 * s[0] + 65599^2 * s[1] + ... + 65599^k * s[k-1]
+
+    The hash algorithm is a modified version of the x65599 hash used by the SDBM
+    open source project. This hash has the following differences from x65599:
+      - Characters are only hashed up to a fixed maximum string length.
+      - Characters are hashed in reverse order.
+      - The string length is hashed as the first character in the string.
+
+    The code generated by this function is intentionally sparse. Each character
+    appears hash_length times per log message, so using fewer characters results
+    in faster compilation times.
+
+    Args:
+      hash_length: maximum string size to hash; extra characters are ignored
+
+    Returns:
+      the macro header file as a string
+  """
+
+    first_hash_term = ('(uint32_t)(sizeof(str "") - 1 + '
+                       '/* The argument must be a string literal. */ \\\n')
+
+    # Use this to add the aligned backslash at the end of the macro lines.
+    line_format = '{{:<{}}}\\\n'.format(len(first_hash_term))
+
+    lines = [
+        FILE_HEADER.format(script=os.path.basename(__file__),
+                           hash_length=hash_length,
+                           year=datetime.date.today().year)
+    ]
+
+    lines.append(
+        line_format.format('#define {}_{}_HASH(str)'.format(
+            HASH_NAME.upper(), hash_length)))
+    lines.append('  ' + first_hash_term)  # add indendation and the macro line
+
+    indent = ' ' * len('  (uint32_t)(')
+    coefficient_format = '0x{coefficient:0>8x}u'
+
+    # The string will have at least a null terminator
+    lines.append(
+        line_format.format('{}0x{:0>8x}u * (uint8_t)str[0] +'.format(
+            indent, HASH_CONSTANT)))
+
+    # Format string to use for the remaining terms.
+    term_format = (
+        '{indent}{coefficient} * '
+        '(uint8_t)({index} < sizeof(str) ? str[{index}] : 0) +').format(
+            indent=indent,
+            coefficient=coefficient_format,
+            index='{{index:>{}}}'.format(len(str(hash_length - 1))))
+
+    for i in range(1, hash_length):
+        coefficient = HASH_CONSTANT**(i + 1) % 2**32
+        term = term_format.format(index=i, coefficient=coefficient)
+        lines.append(line_format.format(term))
+
+    # Remove the extra + and \ and add the closing )
+    lines[-1] = lines[-1].rstrip(' +\\\n') + ')'
+
+    lines.append('\n\n// clang-format on\n')
+
+    return ''.join(lines)
+
+
+def _main():
+    base = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), '..', 'public', 'pw_tokenizer',
+                     'internal'))
+
+    # Generate macros for hashes of the specified lengths.
+    for hash_length in HASH_LENGTHS:
+        path = os.path.join(
+            base, '{}_{}_hash_macro.h'.format(HASH_NAME, hash_length))
+
+        with open(path, 'w') as header_file:
+            header_file.write(
+                generate_pw_tokenizer_65599_fixed_length_hash_macro(
+                    hash_length))
+
+        print('Generated {}-character hash macro at {}'.format(
+            hash_length, path))
+
+
+if __name__ == '__main__':
+    _main()

+ 997 - 0
Hash/hash_1.py

@@ -0,0 +1,997 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2017 sqlmap developers (http://sqlmap.org/)
+See the file 'doc/COPYING' for copying permission
+"""
+
+try:
+    from crypt import crypt
+except ImportError:
+    from thirdparty.fcrypt.fcrypt import crypt
+
+_multiprocessing = None
+try:
+    import multiprocessing
+
+    # problems on FreeBSD (Reference: http://www.eggheadcafe.com/microsoft/Python/35880259/multiprocessing-on-freebsd.aspx)
+    _ = multiprocessing.Queue()
+except (ImportError, OSError):
+    pass
+else:
+    try:
+        if multiprocessing.cpu_count() > 1:
+            _multiprocessing = multiprocessing
+    except NotImplementedError:
+        pass
+
+import gc
+import os
+import re
+import tempfile
+import time
+import zipfile
+
+from hashlib import md5
+from hashlib import sha1
+from hashlib import sha224
+from hashlib import sha384
+from hashlib import sha512
+from Queue import Queue
+
+from lib.core.common import Backend
+from lib.core.common import checkFile
+from lib.core.common import clearConsoleLine
+from lib.core.common import dataToStdout
+from lib.core.common import getFileItems
+from lib.core.common import getPublicTypeMembers
+from lib.core.common import getSafeExString
+from lib.core.common import getUnicode
+from lib.core.common import hashDBRetrieve
+from lib.core.common import hashDBWrite
+from lib.core.common import normalizeUnicode
+from lib.core.common import paths
+from lib.core.common import readInput
+from lib.core.common import singleTimeLogMessage
+from lib.core.common import singleTimeWarnMessage
+from lib.core.convert import hexdecode
+from lib.core.convert import hexencode
+from lib.core.convert import utf8encode
+from lib.core.data import conf
+from lib.core.data import kb
+from lib.core.data import logger
+from lib.core.enums import DBMS
+from lib.core.enums import HASH
+from lib.core.enums import MKSTEMP_PREFIX
+from lib.core.exception import SqlmapDataException
+from lib.core.exception import SqlmapUserQuitException
+from lib.core.settings import COMMON_PASSWORD_SUFFIXES
+from lib.core.settings import COMMON_USER_COLUMNS
+from lib.core.settings import DUMMY_USER_PREFIX
+from lib.core.settings import HASH_MOD_ITEM_DISPLAY
+from lib.core.settings import HASH_RECOGNITION_QUIT_THRESHOLD
+from lib.core.settings import IS_WIN
+from lib.core.settings import ITOA64
+from lib.core.settings import NULL
+from lib.core.settings import UNICODE_ENCODING
+from lib.core.settings import ROTATING_CHARS
+from lib.core.wordlist import Wordlist
+from thirdparty.colorama.initialise import init as coloramainit
+from thirdparty.pydes.pyDes import des
+from thirdparty.pydes.pyDes import CBC
+
+def mysql_passwd(password, uppercase=True):
+    """
+    Reference(s):
+        http://csl.sublevel3.org/mysql-password-function/
+
+    >>> mysql_passwd(password='testpass', uppercase=True)
+    '*00E247AC5F9AF26AE0194B41E1E769DEE1429A29'
+    """
+
+    retVal = "*%s" % sha1(sha1(password).digest()).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def mysql_old_passwd(password, uppercase=True):  # prior to version '4.1'
+    """
+    Reference(s):
+        http://www.sfr-fresh.com/unix/privat/tpop3d-1.5.5.tar.gz:a/tpop3d-1.5.5/password.c
+        http://voidnetwork.org/5ynL0rd/darkc0de/python_script/darkMySQLi.html
+
+    >>> mysql_old_passwd(password='testpass', uppercase=True)
+    '7DCDA0D57290B453'
+    """
+
+    a, b, c = 1345345333, 7, 0x12345671
+
+    for d in password:
+        if d == ' ' or d == '\t':
+            continue
+
+        e = ord(d)
+        a ^= (((a & 63) + b) * e) + (a << 8)
+        c += (c << 8) ^ a
+        b += e
+
+    retVal = "%08lx%08lx" % (a & ((1 << 31) - 1), c & ((1 << 31) - 1))
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def postgres_passwd(password, username, uppercase=False):
+    """
+    Reference(s):
+        http://pentestmonkey.net/blog/cracking-postgres-hashes/
+
+    >>> postgres_passwd(password='testpass', username='testuser', uppercase=False)
+    'md599e5ea7a6f7c3269995cba3927fd0093'
+    """
+
+
+    if isinstance(username, unicode):
+        username = unicode.encode(username, UNICODE_ENCODING)
+
+    if isinstance(password, unicode):
+        password = unicode.encode(password, UNICODE_ENCODING)
+
+    retVal = "md5%s" % md5(password + username).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def mssql_passwd(password, salt, uppercase=False):
+    """
+    Reference(s):
+        http://www.leidecker.info/projects/phrasendrescher/mssql.c
+        https://www.evilfingers.com/tools/GSAuditor.php
+
+    >>> mssql_passwd(password='testpass', salt='4086ceb6', uppercase=False)
+    '0x01004086ceb60c90646a8ab9889fe3ed8e5c150b5460ece8425a'
+    """
+
+    binsalt = hexdecode(salt)
+    unistr = "".join(map(lambda c: ("%s\0" if ord(c) < 256 else "%s") % utf8encode(c), password))
+
+    retVal = "0100%s%s" % (salt, sha1(unistr + binsalt).hexdigest())
+
+    return "0x%s" % (retVal.upper() if uppercase else retVal.lower())
+
+def mssql_old_passwd(password, salt, uppercase=True):  # prior to version '2005'
+    """
+    Reference(s):
+        www.exploit-db.com/download_pdf/15537/
+        http://www.leidecker.info/projects/phrasendrescher/mssql.c
+        https://www.evilfingers.com/tools/GSAuditor.php
+
+    >>> mssql_old_passwd(password='testpass', salt='4086ceb6', uppercase=True)
+    '0x01004086CEB60C90646A8AB9889FE3ED8E5C150B5460ECE8425AC7BB7255C0C81D79AA5D0E93D4BB077FB9A51DA0'
+    """
+
+    binsalt = hexdecode(salt)
+    unistr = "".join(map(lambda c: ("%s\0" if ord(c) < 256 else "%s") % utf8encode(c), password))
+
+    retVal = "0100%s%s%s" % (salt, sha1(unistr + binsalt).hexdigest(), sha1(unistr.upper() + binsalt).hexdigest())
+
+    return "0x%s" % (retVal.upper() if uppercase else retVal.lower())
+
+def mssql_new_passwd(password, salt, uppercase=False):
+    """
+    Reference(s):
+        http://hashcat.net/forum/thread-1474.html
+
+    >>> mssql_new_passwd(password='testpass', salt='4086ceb6', uppercase=False)
+    '0x02004086ceb6eb051cdbc5bdae68ffc66c918d4977e592f6bdfc2b444a7214f71fa31c35902c5b7ae773ed5f4c50676d329120ace32ee6bc81c24f70711eb0fc6400e85ebf25'
+    """
+
+    binsalt = hexdecode(salt)
+    unistr = "".join(map(lambda c: ("%s\0" if ord(c) < 256 else "%s") % utf8encode(c), password))
+
+    retVal = "0200%s%s" % (salt, sha512(unistr + binsalt).hexdigest())
+
+    return "0x%s" % (retVal.upper() if uppercase else retVal.lower())
+
+def oracle_passwd(password, salt, uppercase=True):
+    """
+    Reference(s):
+        https://www.evilfingers.com/tools/GSAuditor.php
+        http://www.notesbit.com/index.php/scripts-oracle/oracle-11g-new-password-algorithm-is-revealed-by-seclistsorg/
+        http://seclists.org/bugtraq/2007/Sep/304
+
+    >>> oracle_passwd(password='SHAlala', salt='1B7B5F82B7235E9E182C', uppercase=True)
+    'S:2BFCFDF5895014EE9BB2B9BA067B01E0389BB5711B7B5F82B7235E9E182C'
+    """
+
+    binsalt = hexdecode(salt)
+
+    retVal = "s:%s%s" % (sha1(utf8encode(password) + binsalt).hexdigest(), salt)
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def oracle_old_passwd(password, username, uppercase=True):  # prior to version '11g'
+    """
+    Reference(s):
+        http://www.notesbit.com/index.php/scripts-oracle/oracle-11g-new-password-algorithm-is-revealed-by-seclistsorg/
+
+    >>> oracle_old_passwd(password='tiger', username='scott', uppercase=True)
+    'F894844C34402B67'
+    """
+
+    IV, pad = "\0" * 8, "\0"
+
+    if isinstance(username, unicode):
+        username = unicode.encode(username, UNICODE_ENCODING)
+
+    if isinstance(password, unicode):
+        password = unicode.encode(password, UNICODE_ENCODING)
+
+    unistr = "".join("\0%s" % c for c in (username + password).upper())
+
+    cipher = des(hexdecode("0123456789ABCDEF"), CBC, IV, pad)
+    encrypted = cipher.encrypt(unistr)
+    cipher = des(encrypted[-8:], CBC, IV, pad)
+    encrypted = cipher.encrypt(unistr)
+
+    retVal = hexencode(encrypted[-8:])
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def md5_generic_passwd(password, uppercase=False):
+    """
+    >>> md5_generic_passwd(password='testpass', uppercase=False)
+    '179ad45c6ce2cb97cf1029e212046e81'
+    """
+
+    retVal = md5(password).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def sha1_generic_passwd(password, uppercase=False):
+    """
+    >>> sha1_generic_passwd(password='testpass', uppercase=False)
+    '206c80413b9a96c1312cc346b7d2517b84463edd'
+    """
+
+    retVal = sha1(password).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def sha224_generic_passwd(password, uppercase=False):
+    """
+    >>> sha224_generic_passwd(password='testpass', uppercase=False)
+    '648db6019764b598f75ab6b7616d2e82563a00eb1531680e19ac4c6f'
+    """
+
+    retVal = sha224(password).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def sha384_generic_passwd(password, uppercase=False):
+    """
+    >>> sha384_generic_passwd(password='testpass', uppercase=False)
+    '6823546e56adf46849343be991d4b1be9b432e42ed1b4bb90635a0e4b930e49b9ca007bc3e04bf0a4e0df6f1f82769bf'
+    """
+
+    retVal = sha384(password).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def sha512_generic_passwd(password, uppercase=False):
+    """
+    >>> sha512_generic_passwd(password='testpass', uppercase=False)
+    '78ddc8555bb1677ff5af75ba5fc02cb30bb592b0610277ae15055e189b77fe3fda496e5027a3d99ec85d54941adee1cc174b50438fdc21d82d0a79f85b58cf44'
+    """
+
+    retVal = sha512(password).hexdigest()
+
+    return retVal.upper() if uppercase else retVal.lower()
+
+def crypt_generic_passwd(password, salt, uppercase=False):
+    """
+    Reference(s):
+        http://docs.python.org/library/crypt.html
+        http://helpful.knobs-dials.com/index.php/Hashing_notes
+        http://php.net/manual/en/function.crypt.php
+        http://carey.geek.nz/code/python-fcrypt/
+
+    >>> crypt_generic_passwd(password='rasmuslerdorf', salt='rl', uppercase=False)
+    'rl.3StKT.4T8M'
+    """
+
+    retVal = crypt(password, salt)
+
+    return retVal.upper() if uppercase else retVal
+
+def wordpress_passwd(password, salt, count, prefix, uppercase=False):
+    """
+    Reference(s):
+        http://packetstormsecurity.org/files/74448/phpassbrute.py.txt
+        http://scriptserver.mainframe8.com/wordpress_password_hasher.php
+
+    >>> wordpress_passwd(password='testpass', salt='aD9ZLmkp', count=2048, prefix='$P$9aD9ZLmkp', uppercase=False)
+    '$P$9aD9ZLmkpsN4A83G8MefaaP888gVKX0'
+    """
+
+    def _encode64(input_, count):
+        output = ''
+        i = 0
+
+        while i < count:
+            value = ord(input_[i])
+            i += 1
+            output = output + ITOA64[value & 0x3f]
+
+            if i < count:
+                value = value | (ord(input_[i]) << 8)
+
+            output = output + ITOA64[(value >> 6) & 0x3f]
+
+            i += 1
+            if i >= count:
+                break
+
+            if i < count:
+                value = value | (ord(input_[i]) << 16)
+
+            output = output + ITOA64[(value >> 12) & 0x3f]
+
+            i += 1
+            if i >= count:
+                break
+
+            output = output + ITOA64[(value >> 18) & 0x3f]
+
+        return output
+
+    if isinstance(password, unicode):
+        password = password.encode(UNICODE_ENCODING)
+
+    cipher = md5(salt)
+    cipher.update(password)
+    hash_ = cipher.digest()
+
+    for i in xrange(count):
+        _ = md5(hash_)
+        _.update(password)
+        hash_ = _.digest()
+
+    retVal = prefix + _encode64(hash_, 16)
+
+    return retVal.upper() if uppercase else retVal
+
+__functions__ = {
+                    HASH.MYSQL: mysql_passwd,
+                    HASH.MYSQL_OLD: mysql_old_passwd,
+                    HASH.POSTGRES: postgres_passwd,
+                    HASH.MSSQL: mssql_passwd,
+                    HASH.MSSQL_OLD: mssql_old_passwd,
+                    HASH.MSSQL_NEW: mssql_new_passwd,
+                    HASH.ORACLE: oracle_passwd,
+                    HASH.ORACLE_OLD: oracle_old_passwd,
+                    HASH.MD5_GENERIC: md5_generic_passwd,
+                    HASH.SHA1_GENERIC: sha1_generic_passwd,
+                    HASH.SHA224_GENERIC: sha224_generic_passwd,
+                    HASH.SHA384_GENERIC: sha384_generic_passwd,
+                    HASH.SHA512_GENERIC: sha512_generic_passwd,
+                    HASH.CRYPT_GENERIC: crypt_generic_passwd,
+                    HASH.WORDPRESS: wordpress_passwd,
+                }
+
+def storeHashesToFile(attack_dict):
+    if not attack_dict:
+        return
+
+    if kb.storeHashesChoice is None:
+        message = "do you want to store hashes to a temporary file "
+        message += "for eventual further processing with other tools [y/N] "
+        test = readInput(message, default="N")
+        kb.storeHashesChoice = test[0] in ("y", "Y")
+
+    if not kb.storeHashesChoice:
+        return
+
+    handle, filename = tempfile.mkstemp(prefix=MKSTEMP_PREFIX.HASHES, suffix=".txt")
+    os.close(handle)
+
+    infoMsg = "writing hashes to a temporary file '%s' " % filename
+    logger.info(infoMsg)
+
+    items = set()
+
+    with open(filename, "w+") as f:
+        for user, hashes in attack_dict.items():
+            for hash_ in hashes:
+                hash_ = hash_.split()[0] if hash_ and hash_.strip() else hash_
+                if hash_ and hash_ != NULL and hashRecognition(hash_):
+                    item = None
+                    if user and not user.startswith(DUMMY_USER_PREFIX):
+                        item = "%s:%s\n" % (user.encode(UNICODE_ENCODING), hash_.encode(UNICODE_ENCODING))
+                    else:
+                        item = "%s\n" % hash_.encode(UNICODE_ENCODING)
+
+                    if item and item not in items:
+                        f.write(item)
+                        items.add(item)
+
+def attackCachedUsersPasswords():
+    if kb.data.cachedUsersPasswords:
+        results = dictionaryAttack(kb.data.cachedUsersPasswords)
+
+        lut = {}
+        for (_, hash_, password) in results:
+            lut[hash_.lower()] = password
+
+        for user in kb.data.cachedUsersPasswords.keys():
+            for i in xrange(len(kb.data.cachedUsersPasswords[user])):
+                if (kb.data.cachedUsersPasswords[user][i] or "").strip():
+                    value = kb.data.cachedUsersPasswords[user][i].lower().split()[0]
+                    if value in lut:
+                        kb.data.cachedUsersPasswords[user][i] += "%s    clear-text password: %s" % ('\n' if kb.data.cachedUsersPasswords[user][i][-1] != '\n' else '', lut[value])
+
+def attackDumpedTable():
+    if kb.data.dumpedTable:
+        table = kb.data.dumpedTable
+        columns = table.keys()
+        count = table["__infos__"]["count"]
+
+        if not count:
+            return
+
+        infoMsg = "analyzing table dump for possible password hashes"
+        logger.info(infoMsg)
+
+        found = False
+        col_user = ''
+        col_passwords = set()
+        attack_dict = {}
+
+        for column in columns:
+            if column and column.lower() in COMMON_USER_COLUMNS:
+                col_user = column
+                break
+
+        for i in xrange(count):
+            if not found and i > HASH_RECOGNITION_QUIT_THRESHOLD:
+                break
+
+            for column in columns:
+                if column == col_user or column == '__infos__':
+                    continue
+
+                if len(table[column]['values']) <= i:
+                    continue
+
+                value = table[column]['values'][i]
+
+                if hashRecognition(value):
+                    found = True
+
+                    if col_user and i < len(table[col_user]['values']):
+                        if table[col_user]['values'][i] not in attack_dict:
+                            attack_dict[table[col_user]['values'][i]] = []
+
+                        attack_dict[table[col_user]['values'][i]].append(value)
+                    else:
+                        attack_dict['%s%d' % (DUMMY_USER_PREFIX, i)] = [value]
+
+                    col_passwords.add(column)
+
+        if attack_dict:
+            infoMsg = "recognized possible password hashes in column%s " % ("s" if len(col_passwords) > 1 else "")
+            infoMsg += "'%s'" % ", ".join(col for col in col_passwords)
+            logger.info(infoMsg)
+
+            storeHashesToFile(attack_dict)
+
+            message = "do you want to crack them via a dictionary-based attack? %s" % ("[y/N/q]" if conf.multipleTargets else "[Y/n/q]")
+            test = readInput(message, default="N" if conf.multipleTargets else "Y")
+
+            if test[0] in ("n", "N"):
+                return
+            elif test[0] in ("q", "Q"):
+                raise SqlmapUserQuitException
+
+            results = dictionaryAttack(attack_dict)
+            lut = dict()
+
+            for (_, hash_, password) in results:
+                if hash_:
+                    lut[hash_.lower()] = password
+
+            infoMsg = "postprocessing table dump"
+            logger.info(infoMsg)
+
+            for i in xrange(count):
+                for column in columns:
+                    if not (column == col_user or column == '__infos__' or len(table[column]['values']) <= i):
+                        value = table[column]['values'][i]
+
+                        if value and value.lower() in lut:
+                            table[column]['values'][i] = "%s (%s)" % (getUnicode(table[column]['values'][i]), getUnicode(lut[value.lower()]))
+                            table[column]['length'] = max(table[column]['length'], len(table[column]['values'][i]))
+
+def hashRecognition(value):
+    retVal = None
+
+    isOracle, isMySQL = Backend.isDbms(DBMS.ORACLE), Backend.isDbms(DBMS.MYSQL)
+
+    if isinstance(value, basestring):
+        for name, regex in getPublicTypeMembers(HASH):
+            # Hashes for Oracle and old MySQL look the same hence these checks
+            if isOracle and regex == HASH.MYSQL_OLD:
+                continue
+            elif isMySQL and regex == HASH.ORACLE_OLD:
+                continue
+            elif regex == HASH.CRYPT_GENERIC:
+                if any((value.lower() == value, value.upper() == value)):
+                    continue
+            elif re.match(regex, value):
+                retVal = regex
+                break
+
+    return retVal
+
+def _bruteProcessVariantA(attack_info, hash_regex, suffix, retVal, proc_id, proc_count, wordlists, custom_wordlist):
+    if IS_WIN:
+        coloramainit()
+
+    count = 0
+    rotator = 0
+    hashes = set([item[0][1] for item in attack_info])
+
+    wordlist = Wordlist(wordlists, proc_id, getattr(proc_count, "value", 0), custom_wordlist)
+
+    try:
+        for word in wordlist:
+            if not attack_info:
+                break
+
+            if not isinstance(word, basestring):
+                continue
+
+            if suffix:
+                word = word + suffix
+
+            try:
+                current = __functions__[hash_regex](password=word, uppercase=False)
+
+                count += 1
+
+                if current in hashes:
+                    for item in attack_info[:]:
+                        ((user, hash_), _) = item
+
+                        if hash_ == current:
+                            retVal.put((user, hash_, word))
+
+                            clearConsoleLine()
+
+                            infoMsg = "\r[%s] [INFO] cracked password '%s'" % (time.strftime("%X"), word)
+
+                            if user and not user.startswith(DUMMY_USER_PREFIX):
+                                infoMsg += " for user '%s'\n" % user
+                            else:
+                                infoMsg += " for hash '%s'\n" % hash_
+
+                            dataToStdout(infoMsg, True)
+
+                            attack_info.remove(item)
+
+                elif (proc_id == 0 or getattr(proc_count, "value", 0) == 1) and count % HASH_MOD_ITEM_DISPLAY == 0 or hash_regex == HASH.ORACLE_OLD or hash_regex == HASH.CRYPT_GENERIC and IS_WIN:
+                    rotator += 1
+
+                    if rotator >= len(ROTATING_CHARS):
+                        rotator = 0
+
+                    status = 'current status: %s... %s' % (word.ljust(5)[:5], ROTATING_CHARS[rotator])
+
+                    if not hasattr(conf, "api"):
+                        dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status))
+
+            except KeyboardInterrupt:
+                raise
+
+            except (UnicodeEncodeError, UnicodeDecodeError):
+                pass  # ignore possible encoding problems caused by some words in custom dictionaries
+
+            except Exception as e:
+                warnMsg = "there was a problem while hashing entry: %s (%s). " % (repr(word), e)
+                warnMsg += "Please report by e-mail to 'dev@sqlmap.org'"
+                logger.critical(warnMsg)
+
+    except KeyboardInterrupt:
+        pass
+
+    finally:
+        if hasattr(proc_count, "value"):
+            with proc_count.get_lock():
+                proc_count.value -= 1
+
+def _bruteProcessVariantB(user, hash_, kwargs, hash_regex, suffix, retVal, found, proc_id, proc_count, wordlists, custom_wordlist):
+    if IS_WIN:
+        coloramainit()
+
+    count = 0
+    rotator = 0
+
+    wordlist = Wordlist(wordlists, proc_id, getattr(proc_count, "value", 0), custom_wordlist)
+
+    try:
+        for word in wordlist:
+            if found.value:
+                break
+
+            current = __functions__[hash_regex](password=word, uppercase=False, **kwargs)
+            count += 1
+
+            if not isinstance(word, basestring):
+                continue
+
+            if suffix:
+                word = word + suffix
+
+            try:
+                if hash_ == current:
+                    if hash_regex == HASH.ORACLE_OLD:  # only for cosmetic purposes
+                        word = word.upper()
+
+                    retVal.put((user, hash_, word))
+
+                    clearConsoleLine()
+
+                    infoMsg = "\r[%s] [INFO] cracked password '%s'" % (time.strftime("%X"), word)
+
+                    if user and not user.startswith(DUMMY_USER_PREFIX):
+                        infoMsg += " for user '%s'\n" % user
+                    else:
+                        infoMsg += " for hash '%s'\n" % hash_
+
+                    dataToStdout(infoMsg, True)
+
+                    found.value = True
+
+                elif (proc_id == 0 or getattr(proc_count, "value", 0) == 1) and count % HASH_MOD_ITEM_DISPLAY == 0:
+                    rotator += 1
+                    if rotator >= len(ROTATING_CHARS):
+                        rotator = 0
+                    status = 'current status: %s... %s' % (word.ljust(5)[:5], ROTATING_CHARS[rotator])
+
+                    if user and not user.startswith(DUMMY_USER_PREFIX):
+                        status += ' (user: %s)' % user
+
+                    if not hasattr(conf, "api"):
+                        dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status))
+
+            except KeyboardInterrupt:
+                raise
+
+            except (UnicodeEncodeError, UnicodeDecodeError):
+                pass  # ignore possible encoding problems caused by some words in custom dictionaries
+
+            except Exception as e:
+                warnMsg = "there was a problem while hashing entry: %s (%s). " % (repr(word), e)
+                warnMsg += "Please report by e-mail to 'dev@sqlmap.org'"
+                logger.critical(warnMsg)
+
+    except KeyboardInterrupt:
+        pass
+
+    finally:
+        if hasattr(proc_count, "value"):
+            with proc_count.get_lock():
+                proc_count.value -= 1
+
+def dictionaryAttack(attack_dict):
+    suffix_list = [""]
+    custom_wordlist = [""]
+    hash_regexes = []
+    results = []
+    resumes = []
+    user_hash = []
+    processException = False
+    foundHash = False
+
+    for (_, hashes) in attack_dict.items():
+        for hash_ in hashes:
+            if not hash_:
+                continue
+
+            hash_ = hash_.split()[0] if hash_ and hash_.strip() else hash_
+            regex = hashRecognition(hash_)
+
+            if regex and regex not in hash_regexes:
+                hash_regexes.append(regex)
+                infoMsg = "using hash method '%s'" % __functions__[regex].func_name
+                logger.info(infoMsg)
+
+    for hash_regex in hash_regexes:
+        keys = set()
+        attack_info = []
+
+        for (user, hashes) in attack_dict.items():
+            for hash_ in hashes:
+                if not hash_:
+                    continue
+
+                foundHash = True
+                hash_ = hash_.split()[0] if hash_ and hash_.strip() else hash_
+
+                if re.match(hash_regex, hash_):
+                    item = None
+
+                    if hash_regex not in (HASH.CRYPT_GENERIC, HASH.WORDPRESS):
+                        hash_ = hash_.lower()
+
+                    if hash_regex in (HASH.MYSQL, HASH.MYSQL_OLD, HASH.MD5_GENERIC, HASH.SHA1_GENERIC):
+                        item = [(user, hash_), {}]
+                    elif hash_regex in (HASH.ORACLE_OLD, HASH.POSTGRES):
+                        item = [(user, hash_), {'username': user}]
+                    elif hash_regex in (HASH.ORACLE,):
+                        item = [(user, hash_), {'salt': hash_[-20:]}]
+                    elif hash_regex in (HASH.MSSQL, HASH.MSSQL_OLD, HASH.MSSQL_NEW):
+                        item = [(user, hash_), {'salt': hash_[6:14]}]
+                    elif hash_regex in (HASH.CRYPT_GENERIC,):
+                        item = [(user, hash_), {'salt': hash_[0:2]}]
+                    elif hash_regex in (HASH.WORDPRESS,):
+                        if ITOA64.index(hash_[3]) < 32:
+                            item = [(user, hash_), {'salt': hash_[4:12], 'count': 1 << ITOA64.index(hash_[3]), 'prefix': hash_[:12]}]
+                        else:
+                            warnMsg = "invalid hash '%s'" % hash_
+                            logger.warn(warnMsg)
+
+                    if item and hash_ not in keys:
+                        resumed = hashDBRetrieve(hash_)
+                        if not resumed:
+                            attack_info.append(item)
+                            user_hash.append(item[0])
+                        else:
+                            infoMsg = "resuming password '%s' for hash '%s'" % (resumed, hash_)
+                            if user and not user.startswith(DUMMY_USER_PREFIX):
+                                infoMsg += " for user '%s'" % user
+                            logger.info(infoMsg)
+                            resumes.append((user, hash_, resumed))
+                        keys.add(hash_)
+
+        if not attack_info:
+            continue
+
+        if not kb.wordlists:
+            while not kb.wordlists:
+
+                # the slowest of all methods hence smaller default dict
+                if hash_regex in (HASH.ORACLE_OLD, HASH.WORDPRESS):
+                    dictPaths = [paths.SMALL_DICT]
+                else:
+                    dictPaths = [paths.WORDLIST]
+
+                message = "what dictionary do you want to use?\n"
+                message += "[1] default dictionary file '%s' (press Enter)\n" % dictPaths[0]
+                message += "[2] custom dictionary file\n"
+                message += "[3] file with list of dictionary files"
+                choice = readInput(message, default="1")
+
+                try:
+                    if choice == "2":
+                        message = "what's the custom dictionary's location?\n"
+                        dictPaths = [readInput(message)]
+
+                        logger.info("using custom dictionary")
+                    elif choice == "3":
+                        message = "what's the list file location?\n"
+                        listPath = readInput(message)
+                        checkFile(listPath)
+                        dictPaths = getFileItems(listPath)
+
+                        logger.info("using custom list of dictionaries")
+                    else:
+                        logger.info("using default dictionary")
+
+                    dictPaths = filter(None, dictPaths)
+
+                    for dictPath in dictPaths:
+                        checkFile(dictPath)
+
+                        if os.path.splitext(dictPath)[1].lower() == ".zip":
+                            _ = zipfile.ZipFile(dictPath, 'r')
+                            if len(_.namelist()) == 0:
+                                errMsg = "no file(s) inside '%s'" % dictPath
+                                raise SqlmapDataException(errMsg)
+                            else:
+                                _.open(_.namelist()[0])
+
+                    kb.wordlists = dictPaths
+
+                except Exception as ex:
+                    warnMsg = "there was a problem while loading dictionaries"
+                    warnMsg += " ('%s')" % getSafeExString(ex)
+                    logger.critical(warnMsg)
+
+            message = "do you want to use common password suffixes? (slow!) [y/N] "
+            test = readInput(message, default="N")
+
+            if test[0] in ("y", "Y"):
+                suffix_list += COMMON_PASSWORD_SUFFIXES
+
+        infoMsg = "starting dictionary-based cracking (%s)" % __functions__[hash_regex].func_name
+        logger.info(infoMsg)
+
+        for item in attack_info:
+            ((user, _), _) = item
+            if user and not user.startswith(DUMMY_USER_PREFIX):
+                custom_wordlist.append(normalizeUnicode(user))
+
+        if hash_regex in (HASH.MYSQL, HASH.MYSQL_OLD, HASH.MD5_GENERIC, HASH.SHA1_GENERIC):
+            for suffix in suffix_list:
+                if not attack_info or processException:
+                    break
+
+                if suffix:
+                    clearConsoleLine()
+                    infoMsg = "using suffix '%s'" % suffix
+                    logger.info(infoMsg)
+
+                retVal = None
+                processes = []
+
+                try:
+                    if _multiprocessing:
+                        if _multiprocessing.cpu_count() > 1:
+                            infoMsg = "starting %d processes " % _multiprocessing.cpu_count()
+                            singleTimeLogMessage(infoMsg)
+
+                        gc.disable()
+
+                        retVal = _multiprocessing.Queue()
+                        count = _multiprocessing.Value('i', _multiprocessing.cpu_count())
+
+                        for i in xrange(_multiprocessing.cpu_count()):
+                            p = _multiprocessing.Process(target=_bruteProcessVariantA, args=(attack_info, hash_regex, suffix, retVal, i, count, kb.wordlists, custom_wordlist))
+                            processes.append(p)
+
+                        for p in processes:
+                            p.daemon = True
+                            p.start()
+
+                        while count.value > 0:
+                            time.sleep(0.5)
+
+                    else:
+                        warnMsg = "multiprocessing hash cracking is currently "
+                        warnMsg += "not supported on this platform"
+                        singleTimeWarnMessage(warnMsg)
+
+                        retVal = Queue()
+                        _bruteProcessVariantA(attack_info, hash_regex, suffix, retVal, 0, 1, kb.wordlists, custom_wordlist)
+
+                except KeyboardInterrupt:
+                    print
+                    processException = True
+                    warnMsg = "user aborted during dictionary-based attack phase (Ctrl+C was pressed)"
+                    logger.warn(warnMsg)
+
+                    for process in processes:
+                        try:
+                            process.terminate()
+                            process.join()
+                        except (OSError, AttributeError):
+                            pass
+
+                finally:
+                    if _multiprocessing:
+                        gc.enable()
+
+                    if retVal:
+                        conf.hashDB.beginTransaction()
+
+                        while not retVal.empty():
+                            user, hash_, word = item = retVal.get(block=False)
+                            attack_info = filter(lambda _: _[0][0] != user or _[0][1] != hash_, attack_info)
+                            hashDBWrite(hash_, word)
+                            results.append(item)
+
+                        conf.hashDB.endTransaction()
+
+            clearConsoleLine()
+
+        else:
+            for ((user, hash_), kwargs) in attack_info:
+                if processException:
+                    break
+
+                if any(_[0] == user and _[1] == hash_ for _ in results):
+                    continue
+
+                count = 0
+                found = False
+
+                for suffix in suffix_list:
+                    if found or processException:
+                        break
+
+                    if suffix:
+                        clearConsoleLine()
+                        infoMsg = "using suffix '%s'" % suffix
+                        logger.info(infoMsg)
+
+                    retVal = None
+                    processes = []
+
+                    try:
+                        if _multiprocessing:
+                            if _multiprocessing.cpu_count() > 1:
+                                infoMsg = "starting %d processes " % _multiprocessing.cpu_count()
+                                singleTimeLogMessage(infoMsg)
+
+                            gc.disable()
+
+                            retVal = _multiprocessing.Queue()
+                            found_ = _multiprocessing.Value('i', False)
+                            count = _multiprocessing.Value('i', _multiprocessing.cpu_count())
+
+                            for i in xrange(_multiprocessing.cpu_count()):
+                                p = _multiprocessing.Process(target=_bruteProcessVariantB, args=(user, hash_, kwargs, hash_regex, suffix, retVal, found_, i, count, kb.wordlists, custom_wordlist))
+                                processes.append(p)
+
+                            for p in processes:
+                                p.daemon = True
+                                p.start()
+
+                            while count.value > 0:
+                                time.sleep(0.5)
+
+                            found = found_.value != 0
+
+                        else:
+                            warnMsg = "multiprocessing hash cracking is currently "
+                            warnMsg += "not supported on this platform"
+                            singleTimeWarnMessage(warnMsg)
+
+                            class Value():
+                                pass
+
+                            retVal = Queue()
+                            found_ = Value()
+                            found_.value = False
+
+                            _bruteProcessVariantB(user, hash_, kwargs, hash_regex, suffix, retVal, found_, 0, 1, kb.wordlists, custom_wordlist)
+
+                            found = found_.value
+
+                    except KeyboardInterrupt:
+                        print
+                        processException = True
+                        warnMsg = "user aborted during dictionary-based attack phase (Ctrl+C was pressed)"
+                        logger.warn(warnMsg)
+
+                        for process in processes:
+                            try:
+                                process.terminate()
+                                process.join()
+                            except (OSError, AttributeError):
+                                pass
+
+                    finally:
+                        if _multiprocessing:
+                            gc.enable()
+
+                        if retVal:
+                            conf.hashDB.beginTransaction()
+
+                            while not retVal.empty():
+                                user, hash_, word = item = retVal.get(block=False)
+                                hashDBWrite(hash_, word)
+                                results.append(item)
+
+                            conf.hashDB.endTransaction()
+
+                clearConsoleLine()
+
+    results.extend(resumes)
+
+    if foundHash and len(hash_regexes) == 0:
+        warnMsg = "unknown hash format"
+        logger.warn(warnMsg)
+
+    if len(results) == 0:
+        warnMsg = "no clear password(s) found"
+        logger.warn(warnMsg)
+
+    return results

+ 117 - 0
Hash/hash_functions.py

@@ -0,0 +1,117 @@
+#
+#**************************************************************************
+#*                                                                        *
+#*          General Purpose Hash Function Algorithms Library              *
+#*                                                                        *
+#* Author: Arash Partow - 2002                                            *
+#* URL: http://www.partow.net                                             *
+#* URL: http://www.partow.net/programming/hashfunctions/index.html        *
+#*                                                                        *
+#* Copyright notice:                                                      *
+#* Free use of the General Purpose Hash Function Algorithms Library is    *
+#* permitted under the guidelines and in accordance with the most current *
+#* version of the Common Public License.                                  *
+#* http://www.opensource.org/licenses/cpl1.0.php                          *
+#*                                                                        *
+#**************************************************************************
+#
+
+def RSHash(key):
+    a    = 378551
+    b    =  63689
+    hash =      0
+    for i in range(len(key)):
+      hash = hash * a + ord(key[i])
+      a = a * b
+    return hash
+
+
+def JSHash(key):
+    hash = 1315423911
+    for i in range(len(key)):
+      hash ^= ((hash << 5) + ord(key[i]) + (hash >> 2))
+    return hash
+
+
+def PJWHash(key):
+   BitsInUnsignedInt = 4 * 8
+   ThreeQuarters     = long((BitsInUnsignedInt  * 3) / 4)
+   OneEighth         = long(BitsInUnsignedInt / 8)
+   HighBits          = (0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth)
+   hash              = 0
+   test              = 0
+
+   for i in range(len(key)):
+     hash = (hash << OneEighth) + ord(key[i])
+     test = hash & HighBits
+     if test != 0:
+       hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));
+   return (hash & 0x7FFFFFFF)
+
+
+def ELFHash(key):
+    hash = 0
+    x    = 0
+    for i in range(len(key)):
+      hash = (hash << 4) + ord(key[i])
+      x = hash & 0xF0000000
+      if x != 0:
+        hash ^= (x >> 24)
+      hash &= ~x
+    return hash
+
+
+def BKDRHash(key):
+    seed = 131 # 31 131 1313 13131 131313 etc..
+    hash = 0
+    for i in range(len(key)):
+      hash = (hash * seed) + ord(key[i])
+    return hash
+
+
+def SDBMHash(key):
+    hash = 0
+    for i in range(len(key)):
+      hash = ord(key[i]) + (hash << 6) + (hash << 16) - hash;
+    return hash
+
+
+def DJBHash(key):
+    hash = 5381
+    for i in range(len(key)):
+       hash = ((hash << 5) + hash) + ord(key[i])
+    return hash
+
+
+def DEKHash(key):
+    hash = len(key);
+    for i in range(len(key)):
+      hash = ((hash << 5) ^ (hash >> 27)) ^ ord(key[i])
+    return hash
+
+
+def BPHash(key):
+    hash = 0
+    for i in range(len(key)):
+       hash = hash << 7 ^ ord(key[i])
+    return hash
+
+
+def FNVHash(key):
+    fnv_prime = 0x811C9DC5
+    hash = 0
+    for i in range(len(key)):
+      hash *= fnv_prime
+      hash ^= ord(key[i])
+    return hash
+
+
+def APHash(key):
+    hash = 0xAAAAAAAA
+    for i in range(len(key)):
+      if ((i & 1) == 0):
+        hash ^= ((hash <<  7) ^ ord(key[i]) * (hash >> 3))
+      else:
+        hash ^= (~((hash << 11) + ord(key[i]) ^ (hash >> 5)))
+    return hash
+

+ 173 - 0
Hash/hash_util.py

@@ -0,0 +1,173 @@
+from pandas import read_csv
+from numpy import float64
+
+class HashString(object):
+    AMINO_ACIDS = {'A':0, 'R': 1, 'N':2,'D':3,'C':4, 'Q':5,'E':6,'G':7, 'H':8, 'I':9, 'L':10, 'K': 11, 'M': 12, 'F':13,'P':14, 'S':15, 'T':16, 'W':17,'Y':18,'V':19}
+    MOD = 2**64-1
+    BASE = 20
+    def __init__(self, string_to_hash=""):
+        """
+        Class to compute rolling hash values for peptide sequences. 
+        The hash function can be rolled over longer sequences by repeatedly
+            using the pop_front and insert methods.
+        The size and hash (probably) uniquely identify the peptide.
+        Use the get-methods to fetch attributes. 
+
+        Parameters
+        ----------
+        string_to_hash : TYPE, optional
+            Optional starting peptide sequence. The default is "".
+
+        Returns
+        -------
+        None.
+
+        """
+        string_to_hash = string_to_hash.upper()
+        self.hash_value = 0
+        self.first_index = 0
+        self.size = 0
+        self.charstring = []
+        
+        while(self.size < len(string_to_hash)):
+            self.insert(string_to_hash[self.size])
+
+    def _po(self,a,b):
+        """
+        Fast way of computing large powers.
+
+        Parameters
+        ----------
+        a : float
+            Base.
+        b : int
+            Exponent.
+
+        Returns
+        -------
+        float
+            a**b.
+
+        """
+        if b == 0:
+            return 1
+        c = self._po(a,b // 2)
+        if b % 2 == 1:
+            return (((c*c)%self.MOD)*a) % self.MOD
+        else:
+            return (c*c)%self.MOD
+
+    def __eq__(self,other):
+        return (self.size == other.size) and (self.hash_value == other.hash_value)
+    
+    def __str__(self):
+        return ''.join(self.charstring) + " : " + str(self.hash_value)
+
+    def insert(self,char):
+        """
+        Inserts a character at the end of the sequence.
+
+        Parameters
+        ----------
+        char : char
+            Character to insert.
+
+        Returns
+        -------
+        None.
+
+        """
+        char = char.upper()
+        self.hash_value *= self.BASE
+        self.hash_value += self.AMINO_ACIDS[char]
+        self.hash_value %= self.MOD
+        self.charstring.append(char)
+        self.size += 1
+
+    def pop_front(self):
+        """
+        Removes the first character in the string.
+
+        Raises
+        ------
+        IndexError
+            When the hash string already is of length 0.
+
+        Returns
+        -------
+        char
+            The removed character.
+        """
+        
+        
+        if self.size == 0:
+            raise IndexError('Unable to pop HashString of length 0')
+            
+        self.hash_value -= self.AMINO_ACIDS[self.charstring[self.first_index]]*self._po(self.BASE, self.size - 1)
+        while(self.hash_value < 0):
+            self.hash_value += self.MOD
+        self.hash_value %= self.MOD
+        self.first_index += 1
+        self.size -= 1
+        return self.charstring[self.first_index-1]
+
+    def pop_back(self):
+        """
+        Removes last character in sequence.
+
+        Raises
+        ------
+        IndexError
+            When sequence already is of length 0.
+
+        Returns
+        -------
+        char
+            The removed character.
+
+        """
+        if self.size == 0:
+            raise IndexError('Unable to pop HashString of length 0')
+            
+        self.hash_value -= (self.AMINO_ACIDS[self.charstring[self.size-1]])
+        self.hash_value = self.hash_value // self.BASE 
+        while(self.hash_value < 0):
+            self.hash_value += self.MOD 
+        self.hash_value %= self.MOD 
+        self.size -= 1
+        return self.charstring.pop()
+
+    def getString(self):
+        return ''.join(self.charstring)
+    
+    def getHash(self):
+        return self.hash_value
+    
+    def getSize(self):
+        return self.size
+
+class PeptideSequence(HashString):
+    def __init__(self, sequence):
+        """
+        Utility class to easily fetch and hash peptide sequences. 
+        When querying the clean dataframes, use the loc method as follows:
+            peptide_sequence = PeptideSequence('GATCA')
+            info = clean_df.loc[peptide_sequence.loc(),:]
+
+        Parameters
+        ----------
+        sequence : string
+            Peptide sequence to be hashed.
+
+        Returns
+        -------
+        None.
+        """
+        super().__init__(sequence)
+    
+    def loc(self):
+        return (self.getSize(), self.getHash())
+    
+    
+def import_clean_data(path):
+    return read_csv(path, index_col=[0,1], header=0)

+ 127 - 0
Hash/hash_v.1.0.py

@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+import sys
+from argparse import ArgumentParser
+import hashlib
+import os.path
+import time
+from multiprocessing import Process, current_process, Queue
+
+
+# Several hash types(other existing hash types can be added)
+def hash_function(hash_type, object):
+    hashed_file = {
+        'sha256': hashlib.sha256(object).hexdigest(),
+        'sha512': hashlib.sha512(object).hexdigest(),
+        'md5': hashlib.md5(object).hexdigest()
+    }
+    if hash_type.lower() in hashed_file.keys():
+        return hashed_file[hash_type.lower()]
+    else:
+        print('[!]  Wrong or non existing hash type')
+        sys.exit(1)
+
+
+# Get the CheckSum of the whole file
+def hash_of_file(hash_type, path):
+    with open(path, 'rb') as file:
+        print('[+]  Hash of file:\n'+hash_function(hash_type, file.read()))
+
+
+# Hashed passwords list generator
+def read_file_with_passwds(start_index, end_index, queue, hash_type, input_path):
+    print('[+]  Process: '+current_process().name+' Started')
+    hash = []
+    # Open file with passwords in plain text and read lines
+    with open(input_path, 'r') as input_file:
+        passwds = input_file.read().splitlines()
+        # Making pair of Password:Hash(exmpl. qwerty:65e84be3...)
+        for word in range(start_index, end_index):
+            hash.append(passwds[word] + ':' + hash_function(hash_type, passwds[word].encode()) + '\n')
+    # To read the file correctly while processing put our passwd:hash pair to queue
+    queue.put(hash)
+    print('[+]  Process: '+current_process().name + ' Finished')
+
+
+# After creating list of passwd:hash pair write it in output file
+def write_hashes_to_file(hash_list, output_path):
+    with open(output_path, 'a') as output_file:
+        for word in hash_list:
+            output_file.write(word)
+
+
+# Main function to create and start multiple processors to generate rainbow table
+def processing_rainbow_generator(hash_type, input_path, output_path, count):
+    # Check if file exists before writing in
+    if os.path.exists(output_path):
+        print('[!]  Output file cannot be used because it already exists')
+        sys.exit(1)
+    else:
+        # Read the whole file to detect number of passwords
+        with open(input_path, 'r') as file:
+            file_len = len(file.readlines())
+
+        # Arguments to get number of passwords for each process
+        # For example, number of process: 5, number of passwords: 1000
+        # 1000/5=200, 200 passwords for each process
+        start_index, end_index = 0, 0
+
+        # List of process
+        procs = []
+        q = Queue()
+
+        # Creating number(count) of processes
+        for i in range(count):
+            end_index += file_len / count
+            p = Process(target=read_file_with_passwds, args=(int(start_index), int(end_index), q, hash_type, input_path))
+            procs.append(p)
+            start_index += file_len / count
+
+        # Starting processes
+        for i in range(count):
+            procs[i].start()
+
+        # Writing passwd:hash pair to output file
+        for i in range(count):
+            # Get and write the value in queue
+            write_hashes_to_file(q.get(), output_path)
+
+        # Waiting for process to finish
+        for i in range(count):
+            procs[i].join()
+
+        print('[+]  Rainbow table generated successfully')
+
+
+if __name__ == '__main__':
+    # Available arguments(in cmd type -h for help)
+    parser = ArgumentParser()
+    parser.add_argument('-s', '--hash', help="sha256/md5/sha512", default='', required=True)
+    parser.add_argument('-w', '--word', help="Random string to get hash", default='')
+    parser.add_argument('-f', '--file', help="/home/kali/somefile", default='')
+    parser.add_argument('-i', '--input', help="Rainbow table generator input file: /home/kali/passwords.txt", default='')
+    parser.add_argument('-o', '--output', help="Rainbow table generator output file: /home/kali/rainbow.txt(default rainbow_table.txt)", default='rainbow_table.txt')
+    parser.add_argument('-p', '--procs', help="Rainbow table generator: Number of used processes(default 5)", type=int, default=5)
+    args = parser.parse_args()
+
+    # Hash of simple string
+    if args.word is not '':
+        print('[+]  Hash of word:\n'+hash_function(args.hash, args.word.encode()))
+    # Hash of file
+    elif args.file is not '':
+        try:
+            hash_of_file(args.hash, args.file)
+        except FileNotFoundError:
+            print('[!]  Not existing file or path')
+    # Creating rainbow table(passwd:hash pair), options output and procs have default values
+    elif args.input is not '' and args.output is not '':
+        try:
+            # Starting timer
+            start = time.time()
+            processing_rainbow_generator(args.hash, args.input, args.output, args.procs)
+            stop = time.time()
+            print('Time used: ', stop - start)
+        except FileNotFoundError:
+            print('[!]  Not existing file or path')
+    else:
+        print('[!]  No word, file or input file were given')
+        sys.exit(1)

+ 34 - 0
Hash/hasher.py

@@ -0,0 +1,34 @@
+import hashlib
+
+def hash_with_md5(string):
+    hash = hashlib.md5()
+    string = string.encode('utf-8')
+    hash.update(string)
+    return hash
+
+def hash_with_sha1(string):
+    hash = hashlib.sha1()
+    string = string.encode('utf-8')
+    hash.update(string)
+    return hash
+
+def hash_with_sha224(string):
+    hash = hashlib.sha224()
+    string = string.encode('utf-8')
+    hash.update(string)
+    return hash
+
+def hash_with_sha256(string):
+    hash = hashlib.sha256()
+    string = string.encode('utf-8')
+    hash.update(string)
+    return hash
+
+def hash_with_sha512(string):
+    hash = hashlib.sha512()
+    string = string.encode('utf-8')
+    hash.update(string)
+    return hash
+
+def display_hash(hash):
+    print(hash.hexdigest())

+ 255 - 0
Hash/hashes.py

@@ -0,0 +1,255 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+
+from __future__ import absolute_import, division, print_function
+
+import abc
+
+import six
+
+from cryptography import utils
+from cryptography.exceptions import (
+    AlreadyFinalized, UnsupportedAlgorithm, _Reasons
+)
+from cryptography.hazmat.backends.interfaces import HashBackend
+
+
+@six.add_metaclass(abc.ABCMeta)
+class HashAlgorithm(object):
+    @abc.abstractproperty
+    def name(self):
+        """
+        A string naming this algorithm (e.g. "sha256", "md5").
+        """
+
+    @abc.abstractproperty
+    def digest_size(self):
+        """
+        The size of the resulting digest in bytes.
+        """
+
+
+@six.add_metaclass(abc.ABCMeta)
+class HashContext(object):
+    @abc.abstractproperty
+    def algorithm(self):
+        """
+        A HashAlgorithm that will be used by this context.
+        """
+
+    @abc.abstractmethod
+    def update(self, data):
+        """
+        Processes the provided bytes through the hash.
+        """
+
+    @abc.abstractmethod
+    def finalize(self):
+        """
+        Finalizes the hash context and returns the hash digest as bytes.
+        """
+
+    @abc.abstractmethod
+    def copy(self):
+        """
+        Return a HashContext that is a copy of the current context.
+        """
+
+
+@six.add_metaclass(abc.ABCMeta)
+class ExtendableOutputFunction(object):
+    """
+    An interface for extendable output functions.
+    """
+
+
+@utils.register_interface(HashContext)
+class Hash(object):
+    def __init__(self, algorithm, backend, ctx=None):
+        if not isinstance(backend, HashBackend):
+            raise UnsupportedAlgorithm(
+                "Backend object does not implement HashBackend.",
+                _Reasons.BACKEND_MISSING_INTERFACE
+            )
+
+        if not isinstance(algorithm, HashAlgorithm):
+            raise TypeError("Expected instance of hashes.HashAlgorithm.")
+        self._algorithm = algorithm
+
+        self._backend = backend
+
+        if ctx is None:
+            self._ctx = self._backend.create_hash_ctx(self.algorithm)
+        else:
+            self._ctx = ctx
+
+    algorithm = utils.read_only_property("_algorithm")
+
+    def update(self, data):
+        if self._ctx is None:
+            raise AlreadyFinalized("Context was already finalized.")
+        utils._check_byteslike("data", data)
+        self._ctx.update(data)
+
+    def copy(self):
+        if self._ctx is None:
+            raise AlreadyFinalized("Context was already finalized.")
+        return Hash(
+            self.algorithm, backend=self._backend, ctx=self._ctx.copy()
+        )
+
+    def finalize(self):
+        if self._ctx is None:
+            raise AlreadyFinalized("Context was already finalized.")
+        digest = self._ctx.finalize()
+        self._ctx = None
+        return digest
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA1(object):
+    name = "sha1"
+    digest_size = 20
+    block_size = 64
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA512_224(object):  # noqa: N801
+    name = "sha512-224"
+    digest_size = 28
+    block_size = 128
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA512_256(object):  # noqa: N801
+    name = "sha512-256"
+    digest_size = 32
+    block_size = 128
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA224(object):
+    name = "sha224"
+    digest_size = 28
+    block_size = 64
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA256(object):
+    name = "sha256"
+    digest_size = 32
+    block_size = 64
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA384(object):
+    name = "sha384"
+    digest_size = 48
+    block_size = 128
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA512(object):
+    name = "sha512"
+    digest_size = 64
+    block_size = 128
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA3_224(object):  # noqa: N801
+    name = "sha3-224"
+    digest_size = 28
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA3_256(object):  # noqa: N801
+    name = "sha3-256"
+    digest_size = 32
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA3_384(object):  # noqa: N801
+    name = "sha3-384"
+    digest_size = 48
+
+
+@utils.register_interface(HashAlgorithm)
+class SHA3_512(object):  # noqa: N801
+    name = "sha3-512"
+    digest_size = 64
+
+
+@utils.register_interface(HashAlgorithm)
+@utils.register_interface(ExtendableOutputFunction)
+class SHAKE128(object):
+    name = "shake128"
+
+    def __init__(self, digest_size):
+        if not isinstance(digest_size, six.integer_types):
+            raise TypeError("digest_size must be an integer")
+
+        if digest_size < 1:
+            raise ValueError("digest_size must be a positive integer")
+
+        self._digest_size = digest_size
+
+    digest_size = utils.read_only_property("_digest_size")
+
+
+@utils.register_interface(HashAlgorithm)
+@utils.register_interface(ExtendableOutputFunction)
+class SHAKE256(object):
+    name = "shake256"
+
+    def __init__(self, digest_size):
+        if not isinstance(digest_size, six.integer_types):
+            raise TypeError("digest_size must be an integer")
+
+        if digest_size < 1:
+            raise ValueError("digest_size must be a positive integer")
+
+        self._digest_size = digest_size
+
+    digest_size = utils.read_only_property("_digest_size")
+
+
+@utils.register_interface(HashAlgorithm)
+class MD5(object):
+    name = "md5"
+    digest_size = 16
+    block_size = 64
+
+
+@utils.register_interface(HashAlgorithm)
+class BLAKE2b(object):
+    name = "blake2b"
+    _max_digest_size = 64
+    _min_digest_size = 1
+    block_size = 128
+
+    def __init__(self, digest_size):
+
+        if digest_size != 64:
+            raise ValueError("Digest size must be 64")
+
+        self._digest_size = digest_size
+
+    digest_size = utils.read_only_property("_digest_size")
+
+
+@utils.register_interface(HashAlgorithm)
+class BLAKE2s(object):
+    name = "blake2s"
+    block_size = 64
+    _max_digest_size = 32
+    _min_digest_size = 1
+
+    def __init__(self, digest_size):
+
+        if digest_size != 32:
+            raise ValueError("Digest size must be 32")
+
+        self._digest_size = digest_size
+
+    digest_size = utils.read_only_property("_digest_size")

+ 298 - 0
Hash/hashing.py

@@ -0,0 +1,298 @@
+"""
+data hash pandas / numpy objects
+"""
+import itertools
+from typing import Optional
+
+import numpy as np
+
+import pandas._libs.hashing as hashing
+
+from pandas.core.dtypes.common import (
+    is_categorical_dtype,
+    is_extension_array_dtype,
+    is_list_like,
+)
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCIndexClass,
+    ABCMultiIndex,
+    ABCSeries,
+)
+
+# 16 byte long hashing key
+_default_hash_key = "0123456789123456"
+
+
+def _combine_hash_arrays(arrays, num_items: int):
+    """
+    Parameters
+    ----------
+    arrays : generator
+    num_items : int
+
+    Should be the same as CPython's tupleobject.c
+    """
+    try:
+        first = next(arrays)
+    except StopIteration:
+        return np.array([], dtype=np.uint64)
+
+    arrays = itertools.chain([first], arrays)
+
+    mult = np.uint64(1000003)
+    out = np.zeros_like(first) + np.uint64(0x345678)
+    for i, a in enumerate(arrays):
+        inverse_i = num_items - i
+        out ^= a
+        out *= mult
+        mult += np.uint64(82520 + inverse_i + inverse_i)
+    assert i + 1 == num_items, "Fed in wrong num_items"
+    out += np.uint64(97531)
+    return out
+
+
+def hash_pandas_object(
+    obj,
+    index: bool = True,
+    encoding: str = "utf8",
+    hash_key: Optional[str] = _default_hash_key,
+    categorize: bool = True,
+):
+    """
+    Return a data hash of the Index/Series/DataFrame.
+
+    Parameters
+    ----------
+    index : bool, default True
+        Include the index in the hash (if Series/DataFrame).
+    encoding : str, default 'utf8'
+        Encoding for data & key when strings.
+    hash_key : str, default _default_hash_key
+        Hash_key for string key to encode.
+    categorize : bool, default True
+        Whether to first categorize object arrays before hashing. This is more
+        efficient when the array contains duplicate values.
+
+    Returns
+    -------
+    Series of uint64, same length as the object
+    """
+    from pandas import Series
+
+    if hash_key is None:
+        hash_key = _default_hash_key
+
+    if isinstance(obj, ABCMultiIndex):
+        return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False)
+
+    elif isinstance(obj, ABCIndexClass):
+        h = hash_array(obj._values, encoding, hash_key, categorize).astype(
+            "uint64", copy=False
+        )
+        h = Series(h, index=obj, dtype="uint64", copy=False)
+
+    elif isinstance(obj, ABCSeries):
+        h = hash_array(obj._values, encoding, hash_key, categorize).astype(
+            "uint64", copy=False
+        )
+        if index:
+            index_iter = (
+                hash_pandas_object(
+                    obj.index,
+                    index=False,
+                    encoding=encoding,
+                    hash_key=hash_key,
+                    categorize=categorize,
+                )._values
+                for _ in [None]
+            )
+            arrays = itertools.chain([h], index_iter)
+            h = _combine_hash_arrays(arrays, 2)
+
+        h = Series(h, index=obj.index, dtype="uint64", copy=False)
+
+    elif isinstance(obj, ABCDataFrame):
+        hashes = (hash_array(series._values) for _, series in obj.items())
+        num_items = len(obj.columns)
+        if index:
+            index_hash_generator = (
+                hash_pandas_object(
+                    obj.index,
+                    index=False,
+                    encoding=encoding,
+                    hash_key=hash_key,
+                    categorize=categorize,
+                )._values
+                for _ in [None]
+            )
+            num_items += 1
+
+            # keep `hashes` specifically a generator to keep mypy happy
+            _hashes = itertools.chain(hashes, index_hash_generator)
+            hashes = (x for x in _hashes)
+        h = _combine_hash_arrays(hashes, num_items)
+
+        h = Series(h, index=obj.index, dtype="uint64", copy=False)
+    else:
+        raise TypeError(f"Unexpected type for hashing {type(obj)}")
+    return h
+
+
+def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key):
+    """
+    Hash an MultiIndex / list-of-tuples efficiently
+
+    Parameters
+    ----------
+    vals : MultiIndex, list-of-tuples, or single tuple
+    encoding : str, default 'utf8'
+    hash_key : str, default _default_hash_key
+
+    Returns
+    -------
+    ndarray of hashed values array
+    """
+    is_tuple = False
+    if isinstance(vals, tuple):
+        vals = [vals]
+        is_tuple = True
+    elif not is_list_like(vals):
+        raise TypeError("must be convertible to a list-of-tuples")
+
+    from pandas import Categorical, MultiIndex
+
+    if not isinstance(vals, ABCMultiIndex):
+        vals = MultiIndex.from_tuples(vals)
+
+    # create a list-of-Categoricals
+    vals = [
+        Categorical(vals.codes[level], vals.levels[level], ordered=False, fastpath=True)
+        for level in range(vals.nlevels)
+    ]
+
+    # hash the list-of-ndarrays
+    hashes = (
+        _hash_categorical(cat, encoding=encoding, hash_key=hash_key) for cat in vals
+    )
+    h = _combine_hash_arrays(hashes, len(vals))
+    if is_tuple:
+        h = h[0]
+
+    return h
+
+
+def _hash_categorical(c, encoding: str, hash_key: str):
+    """
+    Hash a Categorical by hashing its categories, and then mapping the codes
+    to the hashes
+
+    Parameters
+    ----------
+    c : Categorical
+    encoding : str
+    hash_key : str
+
+    Returns
+    -------
+    ndarray of hashed values array, same size as len(c)
+    """
+    # Convert ExtensionArrays to ndarrays
+    values = np.asarray(c.categories._values)
+    hashed = hash_array(values, encoding, hash_key, categorize=False)
+
+    # we have uint64, as we don't directly support missing values
+    # we don't want to use take_nd which will coerce to float
+    # instead, directly construct the result with a
+    # max(np.uint64) as the missing value indicator
+    #
+    # TODO: GH 15362
+
+    mask = c.isna()
+    if len(hashed):
+        result = hashed.take(c.codes)
+    else:
+        result = np.zeros(len(mask), dtype="uint64")
+
+    if mask.any():
+        result[mask] = np.iinfo(np.uint64).max
+
+    return result
+
+
+def hash_array(
+    vals,
+    encoding: str = "utf8",
+    hash_key: str = _default_hash_key,
+    categorize: bool = True,
+):
+    """
+    Given a 1d array, return an array of deterministic integers.
+
+    Parameters
+    ----------
+    vals : ndarray, Categorical
+    encoding : str, default 'utf8'
+        Encoding for data & key when strings.
+    hash_key : str, default _default_hash_key
+        Hash_key for string key to encode.
+    categorize : bool, default True
+        Whether to first categorize object arrays before hashing. This is more
+        efficient when the array contains duplicate values.
+
+    Returns
+    -------
+    1d uint64 numpy array of hash values, same length as the vals
+    """
+    if not hasattr(vals, "dtype"):
+        raise TypeError("must pass a ndarray-like")
+    dtype = vals.dtype
+
+    # For categoricals, we hash the categories, then remap the codes to the
+    # hash values. (This check is above the complex check so that we don't ask
+    # numpy if categorical is a subdtype of complex, as it will choke).
+    if is_categorical_dtype(dtype):
+        return _hash_categorical(vals, encoding, hash_key)
+    elif is_extension_array_dtype(dtype):
+        vals, _ = vals._values_for_factorize()
+        dtype = vals.dtype
+
+    # we'll be working with everything as 64-bit values, so handle this
+    # 128-bit value early
+    if np.issubdtype(dtype, np.complex128):
+        return hash_array(np.real(vals)) + 23 * hash_array(np.imag(vals))
+
+    # First, turn whatever array this is into unsigned 64-bit ints, if we can
+    # manage it.
+    elif isinstance(dtype, bool):
+        vals = vals.astype("u8")
+    elif issubclass(dtype.type, (np.datetime64, np.timedelta64)):
+        vals = vals.view("i8").astype("u8", copy=False)
+    elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8:
+        vals = vals.view(f"u{vals.dtype.itemsize}").astype("u8")
+    else:
+        # With repeated values, its MUCH faster to categorize object dtypes,
+        # then hash and rename categories. We allow skipping the categorization
+        # when the values are known/likely to be unique.
+        if categorize:
+            from pandas import Categorical, Index, factorize
+
+            codes, categories = factorize(vals, sort=False)
+            cat = Categorical(codes, Index(categories), ordered=False, fastpath=True)
+            return _hash_categorical(cat, encoding, hash_key)
+
+        try:
+            vals = hashing.hash_object_array(vals, hash_key, encoding)
+        except TypeError:
+            # we have mixed types
+            vals = hashing.hash_object_array(
+                vals.astype(str).astype(object), hash_key, encoding
+            )
+
+    # Then, redistribute these 64-bit ints within the space of 64-bit ints
+    vals ^= vals >> 30
+    vals *= np.uint64(0xBF58476D1CE4E5B9)
+    vals ^= vals >> 27
+    vals *= np.uint64(0x94D049BB133111EB)
+    vals ^= vals >> 31
+    return vals

+ 181 - 0
Hash/headers.py

@@ -0,0 +1,181 @@
+
+from typing import (
+    cast,
+    overload,
+)
+
+import rlp
+from rlp.sedes import (
+    big_endian_int,
+    Binary,
+    binary,
+)
+
+from eth_typing import (
+    Address,
+    BlockNumber,
+    Hash32,
+)
+
+from eth_hash.auto import keccak
+
+from eth_utils import (
+    encode_hex,
+)
+
+from eth._utils.headers import (
+    new_timestamp_from_parent,
+)
+from eth.abc import (
+    BlockHeaderAPI,
+    MiningHeaderAPI,
+)
+from eth.constants import (
+    ZERO_ADDRESS,
+    ZERO_HASH32,
+    EMPTY_UNCLE_HASH,
+    GENESIS_NONCE,
+    GENESIS_PARENT_HASH,
+    BLANK_ROOT_HASH,
+)
+from eth.typing import HeaderParams
+
+from .sedes import (
+    address,
+    hash32,
+    uint256,
+    trie_root,
+)
+
+
+class MiningHeader(rlp.Serializable, MiningHeaderAPI):
+    fields = [
+        ('parent_hash', hash32),
+        ('uncles_hash', hash32),
+        ('coinbase', address),
+        ('state_root', trie_root),
+        ('transaction_root', trie_root),
+        ('receipt_root', trie_root),
+        ('bloom', uint256),
+        ('difficulty', big_endian_int),
+        ('block_number', big_endian_int),
+        ('gas_limit', big_endian_int),
+        ('gas_used', big_endian_int),
+        ('timestamp', big_endian_int),
+        ('extra_data', binary),
+    ]
+
+
+class BlockHeader(rlp.Serializable, BlockHeaderAPI):
+    fields = [
+        ('parent_hash', hash32),
+        ('uncles_hash', hash32),
+        ('coinbase', address),
+        ('state_root', trie_root),
+        ('transaction_root', trie_root),
+        ('receipt_root', trie_root),
+        ('bloom', uint256),
+        ('difficulty', big_endian_int),
+        ('block_number', big_endian_int),
+        ('gas_limit', big_endian_int),
+        ('gas_used', big_endian_int),
+        ('timestamp', big_endian_int),
+        ('extra_data', binary),
+        ('mix_hash', binary),
+        ('nonce', Binary(8, allow_empty=True))
+    ]
+
+    @overload
+    def __init__(self, **kwargs: HeaderParams) -> None:
+        ...
+
+    @overload  # noqa: F811
+    def __init__(self,
+                 difficulty: int,
+                 block_number: BlockNumber,
+                 gas_limit: int,
+                 timestamp: int = None,
+                 coinbase: Address = ZERO_ADDRESS,
+                 parent_hash: Hash32 = ZERO_HASH32,
+                 uncles_hash: Hash32 = EMPTY_UNCLE_HASH,
+                 state_root: Hash32 = BLANK_ROOT_HASH,
+                 transaction_root: Hash32 = BLANK_ROOT_HASH,
+                 receipt_root: Hash32 = BLANK_ROOT_HASH,
+                 bloom: int = 0,
+                 gas_used: int = 0,
+                 extra_data: bytes = b'',
+                 mix_hash: Hash32 = ZERO_HASH32,
+                 nonce: bytes = GENESIS_NONCE) -> None:
+        ...
+
+    def __init__(self,              # type: ignore  # noqa: F811
+                 difficulty: int,
+                 block_number: BlockNumber,
+                 gas_limit: int,
+                 timestamp: int = None,
+                 coinbase: Address = ZERO_ADDRESS,
+                 parent_hash: Hash32 = ZERO_HASH32,
+                 uncles_hash: Hash32 = EMPTY_UNCLE_HASH,
+                 state_root: Hash32 = BLANK_ROOT_HASH,
+                 transaction_root: Hash32 = BLANK_ROOT_HASH,
+                 receipt_root: Hash32 = BLANK_ROOT_HASH,
+                 bloom: int = 0,
+                 gas_used: int = 0,
+                 extra_data: bytes = b'',
+                 mix_hash: Hash32 = ZERO_HASH32,
+                 nonce: bytes = GENESIS_NONCE) -> None:
+        if timestamp is None:
+            if parent_hash == ZERO_HASH32:
+                timestamp = new_timestamp_from_parent(None)
+            else:
+                # without access to the parent header, we cannot select a new timestamp correctly
+                raise ValueError("Must set timestamp explicitly if this is not a genesis header")
+
+        super().__init__(
+            parent_hash=parent_hash,
+            uncles_hash=uncles_hash,
+            coinbase=coinbase,
+            state_root=state_root,
+            transaction_root=transaction_root,
+            receipt_root=receipt_root,
+            bloom=bloom,
+            difficulty=difficulty,
+            block_number=block_number,
+            gas_limit=gas_limit,
+            gas_used=gas_used,
+            timestamp=timestamp,
+            extra_data=extra_data,
+            mix_hash=mix_hash,
+            nonce=nonce,
+        )
+
+    def __str__(self) -> str:
+        return f'<BlockHeader #{self.block_number} {encode_hex(self.hash)[2:10]}>'
+
+    _hash = None
+
+    @property
+    def hash(self) -> Hash32:
+        if self._hash is None:
+            self._hash = keccak(rlp.encode(self))
+        return cast(Hash32, self._hash)
+
+    @property
+    def mining_hash(self) -> Hash32:
+        result = keccak(rlp.encode(self[:-2], MiningHeader))
+        return cast(Hash32, result)
+
+    @property
+    def hex_hash(self) -> str:
+        return encode_hex(self.hash)
+
+    @property
+    def is_genesis(self) -> bool:
+        # if removing the block_number == 0 test, consider the validation consequences.
+        # validate_header stops trying to check the current header against a parent header.
+        # Can someone trick us into following a high difficulty header with genesis parent hash?
+        return self.parent_hash == GENESIS_PARENT_HASH and self.block_number == 0
+
+    @property
+    def base_fee_per_gas(self) -> int:
+        raise AttributeError("Base fee per gas not available until London fork")

+ 109 - 0
Hash/matching_with_mismatches.py

@@ -0,0 +1,109 @@
+# python3
+
+import sys
+import random
+import fileinput
+
+## fro preComputeHash and getHashValue function refer to the 4th program - substring equality
+## compute the hash values for all the combinations in the given string
+def preComputeHash(string, m, x):
+    string_length = len(string)
+    hash_values = [0]
+    for i in range(1, string_length + 1):
+		# subtracting the ASCII code of 'a' to maintain the value of the strings within 0 to 26
+        val = ((x * hash_values[i - 1]) % m + (ord(string[i - 1]) - ord('a')) % m) % m
+        hash_values.append(val)
+    return hash_values
+
+## return the hash value using the precomputed hash values of the given index and length
+def getHashValue(hash_values, index_start, length, x, m, power_multiplier):
+	return (hash_values[index_start + length] % m - (power_multiplier * hash_values[index_start]) % m) % m
+
+
+"""
+	Precompute power of X with module of M1 and M2. It can buy you extra time when you try 
+	to compute hash of substring. Otherwise, it will cost you extra O(log(n)) for pow(X, n, M).
+"""
+def getPower(x,length,m):
+	pow_lst = []
+	for i in range(length+1):
+		pow_lst.append(pow(x,i,m))
+	return pow_lst
+
+
+
+class Mismatches:
+	def __init__(self, text, pattern):
+		self.text = text
+		self.pattern = pattern
+		self.m1 = pow(10, 15) + 7
+		self.m2 = pow(10, 15) + 456
+		self.x = 26
+
+		self.pattern_length = len(pattern)
+
+		self.hash_values_t1 = preComputeHash(self.text, self.m1, self.x)
+		self.hash_values_t2 = preComputeHash(self.text, self.m2, self.x)
+
+		self.hash_values_p1 = preComputeHash(self.pattern, self.m1, self.x)
+		self.hash_values_p2 = preComputeHash(self.pattern, self.m2, self.x)
+
+		self.power_lst_m1 = getPower(self.x, self.pattern_length, self.m1)
+		self.power_lst_m2 = getPower(self.x, self.pattern_length, self.m2)
+
+	def solve(self, k, text, pattern):
+		pattern_length = len(pattern)
+		text_length = len(text)
+		result = []
+		for i in range(text_length-pattern_length+1):
+			value = self.checkMatches(text[i:i+pattern_length], pattern, k, i, i+pattern_length, 0, pattern_length)
+			if value <= k:
+				result.append(i)
+		return result
+
+
+	def checkMatches(self, subtext, pattern, k, start_index_t, end_index_t, start_index_p, end_index_p):
+		mismatch_count = 0
+
+		left = 0
+		right = len(pattern)
+		mid = left + (right - left)//2
+		#print("    ",subtext, pattern, start_index_t, end_index_t, start_index_p, end_index_p)  #left, right, mid, mismatch_count)
+
+
+		if mid == 0:
+			if ord(subtext) != ord(pattern):
+				return 1
+			else:
+				return 0
+
+		if self.checkHash(start_index_t, end_index_t, start_index_p, end_index_p):
+			#mismatch_count = mismatch_count + self.checkMatches(subtext[mid:right], pattern[mid:right], k)
+			return mismatch_count
+		else:
+			mid_index_t = (start_index_t + end_index_t)//2  ## uses this index to get the has values of the pattern from the original text
+			mid_index_p = (start_index_p + end_index_p)//2  ## keep track of the index for the pattern to get the hash values
+			mismatch_count = self.checkMatches(subtext[left:mid], pattern[left:mid], k, start_index_t, mid_index_t, start_index_p, mid_index_p) + \
+							 self.checkMatches(subtext[mid:right], pattern[mid:right], k, mid_index_t, end_index_t, mid_index_p, end_index_p)
+			return mismatch_count
+		#return mismatch_count
+
+
+
+	def checkHash(self, index_start_t, index_end_t, index_start_p, index_end_p):
+		length = index_end_t - index_start_t
+		hash_value_st1 = getHashValue(self.hash_values_t1, index_start_t, length, self.x, self.m1, self.power_lst_m1[length])
+		hash_value_st2 = getHashValue(self.hash_values_t2, index_start_t, length, self.x, self.m2, self.power_lst_m2[length])
+
+		hash_value_sp1 = getHashValue(self.hash_values_p1, index_start_p, length, self.x, self.m1, self.power_lst_m1[length])
+		hash_value_sp2 = getHashValue(self.hash_values_p2, index_start_p, length, self.x, self.m2, self.power_lst_m2[length])
+
+		return (hash_value_st1 == hash_value_sp1 and hash_value_st2 == hash_value_sp2)
+
+
+for line in fileinput.input():
+	if line != '\n':
+		k, t, p = line.split()
+		process = Mismatches(t,p)
+		ans = process.solve(int(k), t, p)
+		print(len(ans), *ans)

+ 77 - 0
Hash/optimize-pngs.py

@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# Copyright (c) 2014-2017 The Bitcoin Core developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+'''
+Run this script every time you change one of the png files. Using pngcrush, it will optimize the png files, remove various color profiles, remove ancillary chunks (alla) and text chunks (text).
+#pngcrush -brute -ow -rem gAMA -rem cHRM -rem iCCP -rem sRGB -rem alla -rem text
+'''
+import os
+import sys
+import subprocess
+import hashlib
+from PIL import Image
+
+def file_hash(filename):
+    '''Return hash of raw file contents'''
+    with open(filename, 'rb') as f:
+        return hashlib.sha256(f.read()).hexdigest()
+
+def content_hash(filename):
+    '''Return hash of RGBA contents of image'''
+    i = Image.open(filename)
+    i = i.convert('RGBA')
+    data = i.tobytes()
+    return hashlib.sha256(data).hexdigest()
+
+pngcrush = 'pngcrush'
+git = 'git'
+folders = ["src/qt/res/movies", "src/qt/res/icons", "share/pixmaps"]
+basePath = subprocess.check_output([git, 'rev-parse', '--show-toplevel']).rstrip('\n')
+totalSaveBytes = 0
+noHashChange = True
+
+outputArray = []
+for folder in folders:
+    absFolder=os.path.join(basePath, folder)
+    for file in os.listdir(absFolder):
+        extension = os.path.splitext(file)[1]
+        if extension.lower() == '.png':
+            print("optimizing "+file+"..."),
+            file_path = os.path.join(absFolder, file)
+            fileMetaMap = {'file' : file, 'osize': os.path.getsize(file_path), 'sha256Old' : file_hash(file_path)}
+            fileMetaMap['contentHashPre'] = content_hash(file_path)
+        
+            pngCrushOutput = ""
+            try:
+                pngCrushOutput = subprocess.check_output(
+                        [pngcrush, "-brute", "-ow", "-rem", "gAMA", "-rem", "cHRM", "-rem", "iCCP", "-rem", "sRGB", "-rem", "alla", "-rem", "text", file_path],
+                        stderr=subprocess.STDOUT).rstrip('\n')
+            except:
+                
+                sys.exit(0)
+        
+            #verify
+            if "Not a PNG file" in subprocess.check_output([pngcrush, "-n", "-v", file_path], stderr=subprocess.STDOUT):
+                
+                sys.exit(1)
+            
+            fileMetaMap['sha256New'] = file_hash(file_path)
+            fileMetaMap['contentHashPost'] = content_hash(file_path)
+
+            if fileMetaMap['contentHashPre'] != fileMetaMap['contentHashPost']:
+                
+                sys.exit(1)
+
+            fileMetaMap['psize'] = os.path.getsize(file_path)
+            outputArray.append(fileMetaMap)
+            print("done\n"),
+
+print ("summary:\n+++++++++++++++++")
+for fileDict in outputArray:
+    oldHash = fileDict['sha256Old']
+    newHash = fileDict['sha256New']
+    totalSaveBytes += fileDict['osize'] - fileDict['psize']
+    noHashChange = noHashChange and (oldHash == newHash)
+    print (fileDict['file']+"\n  size diff from: "+str(fileDict['osize'])+" to: "+str(fileDict['psize'])+"\n  old sha256: "+oldHash+"\n  new sha256: "+newHash+"\n")
+    

+ 117 - 0
Hash/password_hashers.py

@@ -0,0 +1,117 @@
+import hashlib
+import hmac
+
+from persistent.mapping import PersistentMapping
+from zope.annotation.interfaces import IAnnotations
+from zope.interface import implements
+
+from AccessControl.AuthEncoding import pw_encrypt
+from AccessControl.AuthEncoding import pw_validate
+
+from Products.remember.config import ANNOT_KEY
+from Products.remember.interfaces import IHashPW
+
+
+class BaseHash(object):
+    """
+    Abstract base class for actual hashing implementations.
+    """
+    implements(IHashPW)
+
+    def __init__(self, context):
+        self.context = context
+
+    def isAvailable(self):
+        return True
+
+    def hashPassword(self, password):
+        raise NotImplementedError
+
+    def validate(self, reference, attempt):
+        """
+        Check to see if the reference is a hash of the attempt.
+        """
+        return self.hashPassword(attempt) == reference
+
+
+class BCryptHash(BaseHash):
+    """
+    Adapts from IAnnotatable to IHashPW. Uses bcrypt to hash the
+    password
+    """
+    try:
+        import bcrypt
+    except ImportError:
+        bcrypt = None
+
+    def __init__(self, context):
+        self.context = context
+        if self.bcrypt is None:
+            return
+        annotations = IAnnotations(context)
+        storage = annotations.setdefault(ANNOT_KEY,
+                                         PersistentMapping())
+        storage.setdefault('bcrypt_salt', self.bcrypt.gensalt())
+        self.storage = storage
+
+    def isAvailable(self):
+        return self.bcrypt is not None
+
+    def hashPassword(self, password):
+        """
+        Return a hashed version of password using bcrypt
+        """
+        return self.bcrypt.hashpw(password, self.storage['bcrypt_salt'])
+
+
+class SHAHash(BaseHash):
+    """
+    Adapts from IAnnotatable to IHashPW. Uses SHA to hash the password
+    """
+
+    def hashPassword(self, password):
+        """
+        Return a hashed version of password using SHA
+        """
+        return hashlib.sha1(password).hexdigest()
+
+
+class HMACHash(BaseHash):
+    """
+    Adapts from IAnnotatable to IHashPW. Uses SHA to hash the password
+    """
+
+    def __init__(self, context):
+        self.context = context
+        key = str(context)
+        annotations = IAnnotations(context)
+        storage = annotations.setdefault(ANNOT_KEY,
+                                         PersistentMapping())
+        storage.setdefault('hmac_key', key)
+        self.storage = storage
+
+    def hashPassword(self, password):
+        """
+        Return a hashed version of password using SHA
+        """
+        return hmac.new(self.storage['hmac_key'], password, hashlib.sha1
+                        ).hexdigest()
+
+
+class ZAuthHash(BaseHash):
+    """
+    Adapts from IAnnotatable to IHashPW. Uses Zope 2's
+    AccessControl.AuthEncoding module to hash the password.
+    """
+
+    def hashPassword(self, password):
+        """
+        Delegate to AccessControl.AuthEncoding.
+        """
+        return pw_encrypt(password)
+
+    def validate(self, reference, attempt):
+        """
+        Check to see if the reference is a hash of the attempt.
+        """
+        return pw_validate(reference, attempt)

+ 105 - 0
Hash/security.py

@@ -0,0 +1,105 @@
+# ---------------------------
+#Fichier : security.py
+#Date : 14.10.2020
+#But :
+#Remarque :
+#------------------------------
+import random, string, base64, hmac, re
+
+
+SCRYPT_N = 65536
+SCRYPT_SALT_LEN = 16
+API_ID_LEN = 24
+
+def implode(pw_hash, salt):
+    """
+    Implodes the given hash and its arguments into a string
+
+    :param pw_hash: password hash
+    :param salt: hash salt
+    :returns: hash string with arguments in order (salt,hash)
+    """
+    return '$' + salt + '$' + pw_hash
+
+def explode(hashstr):
+    """
+    Explodes a hash string whose values are separated by '$' into an array
+
+    :param hashstr: hash string to explode
+    :returns: elements of the given hash string as an array
+    """
+
+    return hashstr.split('$')
+
+def pw_complexity(password):
+    """
+    Checks the password complexity before using it
+
+    :param password: password to verify complexity
+    :returns: true if the password is complex enoughs, false otherwise
+    """
+    # Dans le cas d'un mot de passe inférieur à 8 caractères
+    if (len(password) < 8):
+        return False
+    # Dans le cas d'un mot de passe sans chiffre
+    if (re.search(r"\d", password) is None):
+        return False
+    # Dans le cas d'un mot de passe sans majuscule
+    if (re.search(r"[A-Z]", password) is None):
+        return False
+    # Dans le cas d'un mot de passe sans minuscule
+    if (re.search(r"[a-z]", password) is None):
+        return False
+
+    return True
+
+
+def hash_pw(password):
+    """
+    Hashes the given password using scrypt and a random salt
+
+    :param password: password to hash
+    :returns: hashed password with arguments
+    """
+
+    salt = gen_rand_string()
+    pw_hash = base64.b64encode(
+        hmac.new(
+            key=salt.encode('utf8'),
+            msg=password.encode('utf8'),
+            digestmod='SHA256'
+        ).digest()
+    ).decode('utf8')
+
+    return implode(pw_hash, salt)
+
+def check_pw(password, pw_hash):
+    """
+    Checks whether the specified password and parameters match the given hash
+
+    :param password: password to check
+    :param pw_hash: hash string to compare with
+    :returns: True if the password is correct, else False
+    """
+
+    hashvars = explode(pw_hash)
+    mac = base64.b64decode(pw_hash)
+
+    return hmac.compare_digest(hashvars[2].encode('utf8'), base64.b64encode(hmac.new(
+        key=hashvars[1].encode('utf8'),
+        msg=password.encode('utf8'),
+        digestmod='SHA256'
+    ).digest()))
+
+def gen_rand_string(prefix=None):
+    """
+    Generates a random string of 24 characters (alphanumeric case-sensitive)
+
+    :param prefix: prefix to append to the random string
+    :returns: random string of 24 alphanumeric characters (case-sensitive)
+    """
+    return (prefix + '_' if prefix != None else '') + ''.join(
+        random.SystemRandom().choice(
+            string.ascii_letters + string.digits
+        ) for _ in range(API_ID_LEN)
+    )

+ 69 - 0
Hash/tab_hash.py

@@ -0,0 +1,69 @@
+import random
+import string
+
+# bin(random.getrandbits(64))
+
+# tab hashing
+n = 11
+seed = 31
+key = 'hellp'
+
+
+def fimurhash(key, seed, n):
+    hash = seed
+    for c in key:
+        c_int = ord(c)
+        hash *= (11400714819323198485 * (c_int << 15)) & 0xFFFFFFFFFFFFFFFF
+        hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
+    return (hash >> (64 - n))
+
+    
+hmap = [random.getrandbits(n + 7) for r in range(256)]
+
+def tab_hash(key):
+    hash = 0
+    i = 0
+    for c in key:
+        c_int = ord(c)
+        hash_partial = hmap[c_int] ^ (c_int << i)
+        hash ^= hash_partial
+        i += 1
+        # print(f"{c} : {hash_partial} : {hash}")
+        # i+= hmap[i & 0xFF]
+    return hash
+
+def tab_gash(key):
+    hash = 0
+    # i = 0
+    for c in key:
+        c_int = ord(c)
+        hash_partial = hmap[(c_int + hash) & 0xFF] ^ c_int
+        hash ^= hash_partial
+        # print(f"{c} : {hash_partial} : {hash}")
+        # i+= hmap[i & 0xFF]
+    return hash
+
+def bad_hash(key):
+    hash = 0
+    for c in key:
+        c_int = ord(c)
+        hash_partial = hmap[c_int] ^ c_int
+        hash ^= hash_partial
+        # print(f"{c} : {hash_partial} : {hash}")
+    return hash
+
+# generate random keys
+# def random_key(size=6, charset=string.digits):
+# def random_key(size=6, charset=string.ascii_letters + string.digits):
+def random_key(size=6, charset="abc"):
+    return ''.join(random.choices(charset, k=size))
+
+# test drive tab hashing
+for i in range(2**(n-1)):
+    # key = random_key(random.randint(1, 10))
+    key = random_key(8)
+    t_hash = tab_gash(key)
+    fimur_hash = fimurhash(key, seed, n)
+    # mixed_hash = tab_hash(str(fimur_hash))
+    # print(f"{key} ; {t_hash} ; {fimur_hash} ; {mixed_hash}")
+    print(f"{key} ; {t_hash} ; {fimur_hash}")

+ 346 - 0
Hash/test_hash.py

@@ -0,0 +1,346 @@
+# test the invariant that
+#   iff a==b then hash(a)==hash(b)
+#
+# Also test that hash implementations are inherited as expected
+
+import datetime
+import os
+import sys
+import unittest
+from test.support.script_helper import assert_python_ok
+from collections import Hashable
+
+IS_64BIT = sys.maxsize > 2**32
+
+def lcg(x, length=16):
+    """Linear congruential generator"""
+    if x == 0:
+        return bytes(length)
+    out = bytearray(length)
+    for i in range(length):
+        x = (214013 * x + 2531011) & 0x7fffffff
+        out[i] = (x >> 16) & 0xff
+    return bytes(out)
+
+def pysiphash(uint64):
+    """Convert SipHash24 output to Py_hash_t
+    """
+    assert 0 <= uint64 < (1 << 64)
+    # simple unsigned to signed int64
+    if uint64 > (1 << 63) - 1:
+        int64 = uint64 - (1 << 64)
+    else:
+        int64 = uint64
+    # mangle uint64 to uint32
+    uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
+    # simple unsigned to signed int32
+    if uint32 > (1 << 31) - 1:
+        int32 = uint32 - (1 << 32)
+    else:
+        int32 = uint32
+    return int32, int64
+
+def skip_unless_internalhash(test):
+    """Skip decorator for tests that depend on SipHash24 or FNV"""
+    ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
+    msg = "Requires SipHash24 or FNV"
+    return test if ok else unittest.skip(msg)(test)
+
+
+class HashEqualityTestCase(unittest.TestCase):
+
+    def same_hash(self, *objlist):
+        # Hash each object given and fail if
+        # the hash values are not all the same.
+        hashed = list(map(hash, objlist))
+        for h in hashed[1:]:
+            if h != hashed[0]:
+                self.fail("hashed values differ: %r" % (objlist,))
+
+    def test_numeric_literals(self):
+        self.same_hash(1, 1, 1.0, 1.0+0.0j)
+        self.same_hash(0, 0.0, 0.0+0.0j)
+        self.same_hash(-1, -1.0, -1.0+0.0j)
+        self.same_hash(-2, -2.0, -2.0+0.0j)
+
+    def test_coerced_integers(self):
+        self.same_hash(int(1), int(1), float(1), complex(1),
+                       int('1'), float('1.0'))
+        self.same_hash(int(-2**31), float(-2**31))
+        self.same_hash(int(1-2**31), float(1-2**31))
+        self.same_hash(int(2**31-1), float(2**31-1))
+        # for 64-bit platforms
+        self.same_hash(int(2**31), float(2**31))
+        self.same_hash(int(-2**63), float(-2**63))
+        self.same_hash(int(2**63), float(2**63))
+
+    def test_coerced_floats(self):
+        self.same_hash(int(1.23e300), float(1.23e300))
+        self.same_hash(float(0.5), complex(0.5, 0.0))
+
+    def test_unaligned_buffers(self):
+        # The hash function for bytes-like objects shouldn't have
+        # alignment-dependent results (example in issue #16427).
+        b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128
+        for i in range(16):
+            for j in range(16):
+                aligned = b[i:128+j]
+                unaligned = memoryview(b)[i:128+j]
+                self.assertEqual(hash(aligned), hash(unaligned))
+
+
+_default_hash = object.__hash__
+class DefaultHash(object): pass
+
+_FIXED_HASH_VALUE = 42
+class FixedHash(object):
+    def __hash__(self):
+        return _FIXED_HASH_VALUE
+
+class OnlyEquality(object):
+    def __eq__(self, other):
+        return self is other
+
+class OnlyInequality(object):
+    def __ne__(self, other):
+        return self is not other
+
+class InheritedHashWithEquality(FixedHash, OnlyEquality): pass
+class InheritedHashWithInequality(FixedHash, OnlyInequality): pass
+
+class NoHash(object):
+    __hash__ = None
+
+class HashInheritanceTestCase(unittest.TestCase):
+    default_expected = [object(),
+                        DefaultHash(),
+                        OnlyInequality(),
+                       ]
+    fixed_expected = [FixedHash(),
+                      InheritedHashWithEquality(),
+                      InheritedHashWithInequality(),
+                      ]
+    error_expected = [NoHash(),
+                      OnlyEquality(),
+                      ]
+
+    def test_default_hash(self):
+        for obj in self.default_expected:
+            self.assertEqual(hash(obj), _default_hash(obj))
+
+    def test_fixed_hash(self):
+        for obj in self.fixed_expected:
+            self.assertEqual(hash(obj), _FIXED_HASH_VALUE)
+
+    def test_error_hash(self):
+        for obj in self.error_expected:
+            self.assertRaises(TypeError, hash, obj)
+
+    def test_hashable(self):
+        objects = (self.default_expected +
+                   self.fixed_expected)
+        for obj in objects:
+            self.assertIsInstance(obj, Hashable)
+
+    def test_not_hashable(self):
+        for obj in self.error_expected:
+            self.assertNotIsInstance(obj, Hashable)
+
+
+# Issue #4701: Check that some builtin types are correctly hashable
+class DefaultIterSeq(object):
+    seq = range(10)
+    def __len__(self):
+        return len(self.seq)
+    def __getitem__(self, index):
+        return self.seq[index]
+
+class HashBuiltinsTestCase(unittest.TestCase):
+    hashes_to_check = [enumerate(range(10)),
+                       iter(DefaultIterSeq()),
+                       iter(lambda: 0, 0),
+                      ]
+
+    def test_hashes(self):
+        _default_hash = object.__hash__
+        for obj in self.hashes_to_check:
+            self.assertEqual(hash(obj), _default_hash(obj))
+
+class HashRandomizationTests:
+
+    # Each subclass should define a field "repr_", containing the repr() of
+    # an object to be tested
+
+    def get_hash_command(self, repr_):
+        return 'print(hash(eval(%a)))' % repr_
+
+    def get_hash(self, repr_, seed=None):
+        env = os.environ.copy()
+        env['__cleanenv'] = True  # signal to assert_python not to do a copy
+                                  # of os.environ on its own
+        if seed is not None:
+            env['PYTHONHASHSEED'] = str(seed)
+        else:
+            env.pop('PYTHONHASHSEED', None)
+        out = assert_python_ok(
+            '-c', self.get_hash_command(repr_),
+            **env)
+        stdout = out[1].strip()
+        return int(stdout)
+
+    def test_randomized_hash(self):
+        # two runs should return different hashes
+        run1 = self.get_hash(self.repr_, seed='random')
+        run2 = self.get_hash(self.repr_, seed='random')
+        self.assertNotEqual(run1, run2)
+
+class StringlikeHashRandomizationTests(HashRandomizationTests):
+    repr_ = None
+    repr_long = None
+
+    # 32bit little, 64bit little, 32bit big, 64bit big
+    known_hashes = {
+        'djba33x': [ # only used for small strings
+            # seed 0, 'abc'
+            [193485960, 193485960,  193485960, 193485960],
+            # seed 42, 'abc'
+            [-678966196, 573763426263223372, -820489388, -4282905804826039665],
+            ],
+        'siphash24': [
+            # NOTE: PyUCS2 layout depends on endianess
+            # seed 0, 'abc'
+            [1198583518, 4596069200710135518, 1198583518, 4596069200710135518],
+            # seed 42, 'abc'
+            [273876886, -4501618152524544106, 273876886, -4501618152524544106],
+            # seed 42, 'abcdefghijk'
+            [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975],
+            # seed 0, 'äú∑ℇ'
+            [493570806, 5749986484189612790, -1006381564, -5915111450199468540],
+            # seed 42, 'äú∑ℇ'
+            [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017],
+        ],
+        'fnv': [
+            # seed 0, 'abc'
+            [-1600925533, 1453079729188098211, -1600925533,
+             1453079729188098211],
+            # seed 42, 'abc'
+            [-206076799, -4410911502303878509, -1024014457,
+             -3570150969479994130],
+            # seed 42, 'abcdefghijk'
+            [811136751, -5046230049376118746, -77208053 ,
+             -4779029615281019666],
+            # seed 0, 'äú∑ℇ'
+            [44402817, 8998297579845987431, -1956240331,
+             -782697888614047887],
+            # seed 42, 'äú∑ℇ'
+            [-283066365, -4576729883824601543, -271871407,
+             -3927695501187247084],
+        ]
+    }
+
+    def get_expected_hash(self, position, length):
+        if length < sys.hash_info.cutoff:
+            algorithm = "djba33x"
+        else:
+            algorithm = sys.hash_info.algorithm
+        if sys.byteorder == 'little':
+            platform = 1 if IS_64BIT else 0
+        else:
+            assert(sys.byteorder == 'big')
+            platform = 3 if IS_64BIT else 2
+        return self.known_hashes[algorithm][position][platform]
+
+    def test_null_hash(self):
+        # PYTHONHASHSEED=0 disables the randomized hash
+        known_hash_of_obj = self.get_expected_hash(0, 3)
+
+        # Randomization is enabled by default:
+        self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
+
+        # It can also be disabled by setting the seed to 0:
+        self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
+
+    @skip_unless_internalhash
+    def test_fixed_hash(self):
+        # test a fixed seed for the randomized hash
+        # Note that all types share the same values:
+        h = self.get_expected_hash(1, 3)
+        self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+
+    @skip_unless_internalhash
+    def test_long_fixed_hash(self):
+        if self.repr_long is None:
+            return
+        h = self.get_expected_hash(2, 11)
+        self.assertEqual(self.get_hash(self.repr_long, seed=42), h)
+
+
+class StrHashRandomizationTests(StringlikeHashRandomizationTests,
+                                unittest.TestCase):
+    repr_ = repr('abc')
+    repr_long = repr('abcdefghijk')
+    repr_ucs2 = repr('äú∑ℇ')
+
+    @skip_unless_internalhash
+    def test_empty_string(self):
+        self.assertEqual(hash(""), 0)
+
+    @skip_unless_internalhash
+    def test_ucs2_string(self):
+        h = self.get_expected_hash(3, 6)
+        self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
+        h = self.get_expected_hash(4, 6)
+        self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)
+
+class BytesHashRandomizationTests(StringlikeHashRandomizationTests,
+                                  unittest.TestCase):
+    repr_ = repr(b'abc')
+    repr_long = repr(b'abcdefghijk')
+
+    @skip_unless_internalhash
+    def test_empty_string(self):
+        self.assertEqual(hash(b""), 0)
+
+class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests,
+                                       unittest.TestCase):
+    repr_ = "memoryview(b'abc')"
+    repr_long = "memoryview(b'abcdefghijk')"
+
+    @skip_unless_internalhash
+    def test_empty_string(self):
+        self.assertEqual(hash(memoryview(b"")), 0)
+
+class DatetimeTests(HashRandomizationTests):
+    def get_hash_command(self, repr_):
+        return 'import datetime; print(hash(%s))' % repr_
+
+class DatetimeDateTests(DatetimeTests, unittest.TestCase):
+    repr_ = repr(datetime.date(1066, 10, 14))
+
+class DatetimeDatetimeTests(DatetimeTests, unittest.TestCase):
+    repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
+
+class DatetimeTimeTests(DatetimeTests, unittest.TestCase):
+    repr_ = repr(datetime.time(0))
+
+
+class HashDistributionTestCase(unittest.TestCase):
+
+    def test_hash_distribution(self):
+        # check for hash collision
+        base = "abcdefghabcdefg"
+        for i in range(1, len(base)):
+            prefix = base[:i]
+            with self.subTest(prefix=prefix):
+                s15 = set()
+                s255 = set()
+                for c in range(256):
+                    h = hash(prefix + chr(c))
+                    s15.add(h & 0xf)
+                    s255.add(h & 0xff)
+                # SipHash24 distribution depends on key, usually > 60%
+                self.assertGreater(len(s15), 8, prefix)
+                self.assertGreater(len(s255), 128, prefix)
+
+if __name__ == "__main__":
+    unittest.main()

+ 92 - 0
Hash/verifier.py

@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+#
+# Electrum - Lightweight Bitcoin Client
+# Copyright (c) 2012 Thomas Voegtlin
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+from util import ThreadJob
+from bitcoin import *
+
+
+class SPV(ThreadJob):
+    """ Simple Payment Verification """
+
+    def __init__(self, network, wallet):
+        self.wallet = wallet
+        self.network = network
+        # Keyed by tx hash.  Value is None if the merkle branch was
+        # requested, and the merkle root once it has been verified
+        self.merkle_roots = {}
+
+    def run(self):
+        lh = self.network.get_local_height()
+        unverified = self.wallet.get_unverified_txs()
+        for tx_hash, tx_height in unverified.items():
+            # do not request merkle branch before headers are available
+            if tx_hash not in self.merkle_roots and tx_height <= lh:
+                request = ('blockchain.transaction.get_merkle',
+                           [tx_hash, tx_height])
+                self.network.send([request], self.verify_merkle)
+                self.print_error('requested merkle', tx_hash)
+                self.merkle_roots[tx_hash] = None
+
+    def verify_merkle(self, r):
+        if r.get('error'):
+            self.print_error('received an error:', r)
+            return
+
+        params = r['params']
+        merkle = r['result']
+
+        # Verify the hash of the server-provided merkle branch to a
+        # transaction matches the merkle root of its block
+        tx_hash = params[0]
+        tx_height = merkle.get('block_height')
+        pos = merkle.get('pos')
+        merkle_root = self.hash_merkle_root(merkle['merkle'], tx_hash, pos)
+        header = self.network.get_header(tx_height)
+        if not header or header.get('merkle_root') != merkle_root:
+            # FIXME: we should make a fresh connection to a server to
+            # recover from this, as this TX will now never verify
+            self.print_error("merkle verification failed for", tx_hash)
+            return
+
+        # we passed all the tests
+        self.merkle_roots[tx_hash] = merkle_root
+        self.print_error("verified %s" % tx_hash)
+        self.wallet.add_verified_tx(tx_hash, (tx_height, header.get('timestamp'), pos))
+
+
+    def hash_merkle_root(self, merkle_s, target_hash, pos):
+        h = hash_decode(target_hash)
+        for i in range(len(merkle_s)):
+            item = merkle_s[i]
+            h = Hash( hash_decode(item) + h ) if ((pos >> i) & 1) else Hash( h + hash_decode(item) )
+        return hash_encode(h)
+
+
+    def undo_verifications(self, height):
+        tx_hashes = self.wallet.undo_verifications(height)
+        for tx_hash in tx_hashes:
+            self.print_error("redoing", tx_hash)
+            self.merkle_roots.pop(tx_hash, None)

+ 334 - 0
Hash/windows.py

@@ -0,0 +1,334 @@
+"""passlib.handlers.nthash - Microsoft Windows -related hashes"""
+#=============================================================================
+# imports
+#=============================================================================
+# core
+from binascii import hexlify
+import logging; log = logging.getLogger(__name__)
+from warnings import warn
+# site
+# pkg
+from passlib.utils import to_unicode, right_pad_string
+from passlib.utils.compat import unicode
+from passlib.crypto.digest import lookup_hash
+md4 = lookup_hash("md4").const
+import passlib.utils.handlers as uh
+# local
+__all__ = [
+    "lmhash",
+    "nthash",
+    "bsd_nthash",
+    "msdcc",
+    "msdcc2",
+]
+
+#=============================================================================
+# lanman hash
+#=============================================================================
+class lmhash(uh.TruncateMixin, uh.HasEncodingContext, uh.StaticHandler):
+    """This class implements the Lan Manager Password hash, and follows the :ref:`password-hash-api`.
+
+    It has no salt and a single fixed round.
+
+    The :meth:`~passlib.ifc.PasswordHash.using` method accepts a single
+    optional keyword:
+
+    :param bool truncate_error:
+        By default, this will silently truncate passwords larger than 14 bytes.
+        Setting ``truncate_error=True`` will cause :meth:`~passlib.ifc.PasswordHash.hash`
+        to raise a :exc:`~passlib.exc.PasswordTruncateError` instead.
+
+        .. versionadded:: 1.7
+
+    The :meth:`~passlib.ifc.PasswordHash.hash` and :meth:`~passlib.ifc.PasswordHash.verify` methods accept a single
+    optional keyword:
+
+    :type encoding: str
+    :param encoding:
+
+        This specifies what character encoding LMHASH should use when
+        calculating digest. It defaults to ``cp437``, the most
+        common encoding encountered.
+
+    Note that while this class outputs digests in lower-case hexadecimal,
+    it will accept upper-case as well.
+    """
+    #===================================================================
+    # class attrs
+    #===================================================================
+
+    #--------------------
+    # PasswordHash
+    #--------------------
+    name = "lmhash"
+    setting_kwds = ("truncate_error",)
+
+    #--------------------
+    # GenericHandler
+    #--------------------
+    checksum_chars = uh.HEX_CHARS
+    checksum_size = 32
+
+    #--------------------
+    # TruncateMixin
+    #--------------------
+    truncate_size = 14
+
+    #--------------------
+    # custom
+    #--------------------
+    default_encoding = "cp437"
+
+    #===================================================================
+    # methods
+    #===================================================================
+    @classmethod
+    def _norm_hash(cls, hash):
+        return hash.lower()
+
+    def _calc_checksum(self, secret):
+        # check for truncation (during .hash() calls only)
+        if self.use_defaults:
+            self._check_truncate_policy(secret)
+
+        return hexlify(self.raw(secret, self.encoding)).decode("ascii")
+
+    # magic constant used by LMHASH
+    _magic = b"KGS!@#$%"
+
+    @classmethod
+    def raw(cls, secret, encoding=None):
+        """encode password using LANMAN hash algorithm.
+
+        :type secret: unicode or utf-8 encoded bytes
+        :arg secret: secret to hash
+        :type encoding: str
+        :arg encoding:
+            optional encoding to use for unicode inputs.
+            this defaults to ``cp437``, which is the
+            common case for most situations.
+
+        :returns: returns string of raw bytes
+        """
+        if not encoding:
+            encoding = cls.default_encoding
+        # some nice empircal data re: different encodings is at...
+        # http://www.openwall.com/lists/john-dev/2011/08/01/2
+        # http://www.freerainbowtables.com/phpBB3/viewtopic.php?t=387&p=12163
+        from passlib.crypto.des import des_encrypt_block
+        MAGIC = cls._magic
+        if isinstance(secret, unicode):
+            # perform uppercasing while we're still unicode,
+            # to give a better shot at getting non-ascii chars right.
+            # (though some codepages do NOT upper-case the same as unicode).
+            secret = secret.upper().encode(encoding)
+        elif isinstance(secret, bytes):
+            # FIXME: just trusting ascii upper will work?
+            # and if not, how to do codepage specific case conversion?
+            # we could decode first using <encoding>,
+            # but *that* might not always be right.
+            secret = secret.upper()
+        else:
+            raise TypeError("secret must be unicode or bytes")
+        secret = right_pad_string(secret, 14)
+        return des_encrypt_block(secret[0:7], MAGIC) + \
+               des_encrypt_block(secret[7:14], MAGIC)
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+#=============================================================================
+# ntlm hash
+#=============================================================================
+class nthash(uh.StaticHandler):
+    """This class implements the NT Password hash, and follows the :ref:`password-hash-api`.
+
+    It has no salt and a single fixed round.
+
+    The :meth:`~passlib.ifc.PasswordHash.hash` and :meth:`~passlib.ifc.PasswordHash.genconfig` methods accept no optional keywords.
+
+    Note that while this class outputs lower-case hexadecimal digests,
+    it will accept upper-case digests as well.
+    """
+    #===================================================================
+    # class attrs
+    #===================================================================
+    name = "nthash"
+    checksum_chars = uh.HEX_CHARS
+    checksum_size = 32
+
+    #===================================================================
+    # methods
+    #===================================================================
+    @classmethod
+    def _norm_hash(cls, hash):
+        return hash.lower()
+
+    def _calc_checksum(self, secret):
+        return hexlify(self.raw(secret)).decode("ascii")
+
+    @classmethod
+    def raw(cls, secret):
+        """encode password using MD4-based NTHASH algorithm
+
+        :arg secret: secret as unicode or utf-8 encoded bytes
+
+        :returns: returns string of raw bytes
+        """
+        secret = to_unicode(secret, "utf-8", param="secret")
+        # XXX: found refs that say only first 128 chars are used.
+        return md4(secret.encode("utf-16-le")).digest()
+
+    @classmethod
+    def raw_nthash(cls, secret, hex=False):
+        warn("nthash.raw_nthash() is deprecated, and will be removed "
+             "in Passlib 1.8, please use nthash.raw() instead",
+             DeprecationWarning)
+        ret = nthash.raw(secret)
+        return hexlify(ret).decode("ascii") if hex else ret
+
+    #===================================================================
+    # eoc
+    #===================================================================
+
+bsd_nthash = uh.PrefixWrapper("bsd_nthash", nthash, prefix="$3$$", ident="$3$$",
+    doc="""The class support FreeBSD's representation of NTHASH
+    (which is compatible with the :ref:`modular-crypt-format`),
+    and follows the :ref:`password-hash-api`.
+
+    It has no salt and a single fixed round.
+
+    The :meth:`~passlib.ifc.PasswordHash.hash` and :meth:`~passlib.ifc.PasswordHash.genconfig` methods accept no optional keywords.
+    """)
+
+##class ntlm_pair(object):
+##    "combined lmhash & nthash"
+##    name = "ntlm_pair"
+##    setting_kwds = ()
+##    _hash_regex = re.compile(u"^(?P<lm>[0-9a-f]{32}):(?P<nt>[0-9][a-f]{32})$",
+##                             re.I)
+##
+##    @classmethod
+##    def identify(cls, hash):
+##        hash = to_unicode(hash, "latin-1", "hash")
+##        return len(hash) == 65 and cls._hash_regex.match(hash) is not None
+##
+##    @classmethod
+##    def hash(cls, secret, config=None):
+##        if config is not None and not cls.identify(config):
+##            raise uh.exc.InvalidHashError(cls)
+##        return lmhash.hash(secret) + ":" + nthash.hash(secret)
+##
+##    @classmethod
+##    def verify(cls, secret, hash):
+##        hash = to_unicode(hash, "ascii", "hash")
+##        m = cls._hash_regex.match(hash)
+##        if not m:
+##            raise uh.exc.InvalidHashError(cls)
+##        lm, nt = m.group("lm", "nt")
+##        # NOTE: verify against both in case encoding issue
+##        # causes one not to match.
+##        return lmhash.verify(secret, lm) or nthash.verify(secret, nt)
+
+#=============================================================================
+# msdcc v1
+#=============================================================================
+class msdcc(uh.HasUserContext, uh.StaticHandler):
+    """This class implements Microsoft's Domain Cached Credentials password hash,
+    and follows the :ref:`password-hash-api`.
+
+    It has a fixed number of rounds, and uses the associated
+    username as the salt.
+
+    The :meth:`~passlib.ifc.PasswordHash.hash`, :meth:`~passlib.ifc.PasswordHash.genhash`, and :meth:`~passlib.ifc.PasswordHash.verify` methods
+    have the following optional keywords:
+
+    :type user: str
+    :param user:
+        String containing name of user account this password is associated with.
+        This is required to properly calculate the hash.
+
+        This keyword is case-insensitive, and should contain just the username
+        (e.g. ``Administrator``, not ``SOMEDOMAIN\\Administrator``).
+
+    Note that while this class outputs lower-case hexadecimal digests,
+    it will accept upper-case digests as well.
+    """
+    name = "msdcc"
+    checksum_chars = uh.HEX_CHARS
+    checksum_size = 32
+
+    @classmethod
+    def _norm_hash(cls, hash):
+        return hash.lower()
+
+    def _calc_checksum(self, secret):
+        return hexlify(self.raw(secret, self.user)).decode("ascii")
+
+    @classmethod
+    def raw(cls, secret, user):
+        """encode password using mscash v1 algorithm
+
+        :arg secret: secret as unicode or utf-8 encoded bytes
+        :arg user: username to use as salt
+
+        :returns: returns string of raw bytes
+        """
+        secret = to_unicode(secret, "utf-8", param="secret").encode("utf-16-le")
+        user = to_unicode(user, "utf-8", param="user").lower().encode("utf-16-le")
+        return md4(md4(secret).digest() + user).digest()
+
+#=============================================================================
+# msdcc2 aka mscash2
+#=============================================================================
+class msdcc2(uh.HasUserContext, uh.StaticHandler):
+    """This class implements version 2 of Microsoft's Domain Cached Credentials
+    password hash, and follows the :ref:`password-hash-api`.
+
+    It has a fixed number of rounds, and uses the associated
+    username as the salt.
+
+    The :meth:`~passlib.ifc.PasswordHash.hash`, :meth:`~passlib.ifc.PasswordHash.genhash`, and :meth:`~passlib.ifc.PasswordHash.verify` methods
+    have the following extra keyword:
+
+    :type user: str
+    :param user:
+        String containing name of user account this password is associated with.
+        This is required to properly calculate the hash.
+
+        This keyword is case-insensitive, and should contain just the username
+        (e.g. ``Administrator``, not ``SOMEDOMAIN\\Administrator``).
+    """
+    name = "msdcc2"
+    checksum_chars = uh.HEX_CHARS
+    checksum_size = 32
+
+    @classmethod
+    def _norm_hash(cls, hash):
+        return hash.lower()
+
+    def _calc_checksum(self, secret):
+        return hexlify(self.raw(secret, self.user)).decode("ascii")
+
+    @classmethod
+    def raw(cls, secret, user):
+        """encode password using msdcc v2 algorithm
+
+        :type secret: unicode or utf-8 bytes
+        :arg secret: secret
+
+        :type user: str
+        :arg user: username to use as salt
+
+        :returns: returns string of raw bytes
+        """
+        from passlib.crypto.digest import pbkdf2_hmac
+        secret = to_unicode(secret, "utf-8", param="secret").encode("utf-16-le")
+        user = to_unicode(user, "utf-8", param="user").lower().encode("utf-16-le")
+        tmp = md4(md4(secret).digest() + user).digest()
+        return pbkdf2_hmac("sha1", tmp, user, 10240, 16)
+
+#=============================================================================
+# eof
+#=============================================================================

+ 337 - 0
Hash/xor.py

@@ -0,0 +1,337 @@
+entry = 0x7FC66B580010
+head = 0x5586267F2080
+tail = 0x55862759E700
+
+bin(head ^ tail)
+hex(head ^ tail)
+(head ^ tail) ^ head == tail
+(head ^ tail) ^ tail == head
+
+
+# force odd
+for n in range(100):
+    n |= 1
+    print(n)
+
+bin(11400714819323198486)
+bin(11400714819323198485)
+bin(11400714819323198485 ^ 11400714819323198486)
+bin(11400714819323198486 ^ 10)
+11400714819323198486 & 1
+11400714819323198485
+
+import math
+
+int(2 ** 16 / ((1 + math.sqrt(5)) / 2))
+int(2 ** 32 / ((1 + math.sqrt(5)) / 2))
+int(2 ** 64 / ((1 + math.sqrt(5)) / 2))
+int(2 ** 128 / ((1 + math.sqrt(5)) / 2))
+
+
+# fib hash
+for key in range(100):
+    (key * 11400714819323198485) >> 54
+
+
+fimur_hash = [
+    8613972110907996638,
+    15358817489599691552,
+    3656918988786498657,
+    10401764430870904227,
+    17146609988017651429,
+    5444711761147035686,
+    12189556917607727464,
+    487658412734448298,
+    7232503855452193771,
+    13977349419385324845,
+    2275450786074068591,
+    9020296620023908272,
+    15765141844109363442,
+    4063243553980254771,
+    10808088789751040885,
+    17552934303797120183,
+    5851035703310487032,
+    12595881237862518586,
+    893982664156516476,
+    14383673675207217919,
+    9426620913003140482,
+    16171466353514682052,
+    4469567715245758469,
+    11214413148748618055,
+]
+
+murmur3_x64 = [
+    1584096894626881991315877671151210123721,
+    54336250343350542782176903032023148998,
+    855158039471493497310317094450549862077,
+    1039127549861262650713404564393879311435,
+    93136154732261704556933925320311062615,
+    74312781558375054611426997378294798738,
+    1807475194205470988210194190753005636218,
+    1619970959750794755710948967737182030885,
+    128772317988171237053664783826596130169,
+    1550580235102978885114018615696532709555,
+    1154421219551708742117692178646737305277,
+    70824873152156146165017857262847459992,
+    86718167958849630914322687166258693395,
+    5145779186172661644318506030192103308,
+    1112946414600426880511747594572229236485,
+    82534992099906578798463907845345498967,
+    413297339436388227811327427340427008806,
+    1814992898938886923516075192548260744819,
+    540449371664412957912350958338390321963,
+    38622926961033010357625350269841354667,
+    56363050618590338737050388589839385361,
+    86807990082632946749662700404111625938,
+    162281206466706151118429615108459332167,
+    170811608674138578702126145761342232142,
+]
+
+# for h in fimurhash:
+#     bin(h)
+
+def hamming(a, b, bitdepth):
+    distance = 0
+    for i in range(bitdepth - 1, -1, -1):
+        bit_a = (a >> i) & 1
+        bit_b = (b >> i) & 1
+        distance += not(bit_a == bit_b)
+    return distance
+
+
+for i, hash in enumerate(fimur_hash):
+    distance = hamming(hash, fimur_hash[i-1], 64)
+    print(f"distance  [{i}]<->[{i-1}] \t {distance}/64")
+
+for i, hash in enumerate(murmur3_x64):
+    distance = hamming(hash, murmur3_x64[i-1], 128)
+    print(f"distance  [{i}]<->[{i-1}] \t {distance}/128")
+
+
+# fimurhash
+# distance  [0]<->[-1]     =28/64
+# distance  [1]<->[0]      =35/64
+# distance  [2]<->[1]      =31/64
+# distance  [3]<->[2]      =26/64
+# distance  [4]<->[3]      =33/64
+# distance  [5]<->[4]      =37/64
+# distance  [6]<->[5]      =24/64
+# distance  [7]<->[6]      =34/64
+# distance  [8]<->[7]      =27/64
+# distance  [9]<->[8]      =28/64
+# distance  [10]<->[9]     =32/64
+# distance  [11]<->[10]    =32/64
+# distance  [12]<->[11]    =33/64
+# distance  [13]<->[12]    =23/64
+# distance  [14]<->[13]    =28/64
+# distance  [15]<->[14]    =29/64
+# distance  [16]<->[15]    =27/64
+# distance  [17]<->[16]    =34/64
+# distance  [18]<->[17]    =32/64
+# distance  [19]<->[18]    =29/64
+# distance  [20]<->[19]    =29/64
+# distance  [21]<->[20]    =31/64
+# distance  [22]<->[21]    =32/64
+# distance  [23]<->[22]    =24/64
+
+# murmur3_x64
+# distance  [0]<->[-1]     =67/128
+# distance  [1]<->[0]      =55/128
+# distance  [2]<->[1]      =66/128
+# distance  [3]<->[2]      =62/128
+# distance  [4]<->[3]      =59/128
+# distance  [5]<->[4]      =63/128
+# distance  [6]<->[5]      =66/128
+# distance  [7]<->[6]      =62/128
+# distance  [8]<->[7]      =65/128
+# distance  [9]<->[8]      =64/128
+# distance  [10]<->[9]     =70/128
+# distance  [11]<->[10]    =65/128
+# distance  [12]<->[11]    =66/128
+# distance  [13]<->[12]    =67/128
+# distance  [14]<->[13]    =59/128
+# distance  [15]<->[14]    =55/128
+# distance  [16]<->[15]    =67/128
+# distance  [17]<->[16]    =64/128
+# distance  [18]<->[17]    =65/128
+# distance  [19]<->[18]    =61/128
+# distance  [20]<->[19]    =66/128
+# distance  [21]<->[20]    =58/128
+# distance  [22]<->[21]    =63/128
+# distance  [23]<->[22]    =65/128
+
+
+
+def fimurhash(number):
+    hash = (31 * 11400714819323198485 * (number << 15)) & 0xFFFFFFFFFFFFFFFF
+    hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
+    return hash
+
+def fimurhalt(number):
+    hash = (31 * 102334155 * (number << 15)) & 0xFFFFFFFFFFFFFFFF
+    hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
+    return hash
+
+mean = 0
+pool = 10000
+for i in range(pool):
+    hash_a = fimurhash(i)
+    hash_b = fimurhash(i + 1)
+    distance = hamming(hash_a, hash_b, 64)
+    mean += distance/64
+    print(f"fimurhash [{i}]<->[{i+1}] \t {distance/64}     \t {(hash_a & 0xFFFFFFFFFFFFFFFF)>> (64 - 14)} ")
+
+print(f"mean = {mean/pool}")
+
+for i in range(0, 544, 34):
+    hash_a = fimurhash(i)
+    hash_b = fimurhash(i + 1)
+    distance = hamming(hash_a, hash_b, 64)
+    print(f"fimurhash [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
+
+mean = 0
+pool = 10000
+for i in range(pool):
+    hash_a = fimurhalt(i)
+    hash_b = fimurhalt(i + 1)
+    distance = hamming(hash_a, hash_b, 64)
+    mean += distance/64
+    print(f"fimurhalt [{i}]<->[{i+1}] \t {distance/64}     \t {(hash_a & 0xFFFFFFFFFFFFFFFF)>> (64 - 14)} ")
+
+print(f"mean = {mean/pool}")
+
+for i in range(0, 544, 34):
+    hash_a = fimurhalt(i)
+    hash_b = fimurhalt(i + 1)
+    distance = hamming(hash_a, hash_b, 64)
+    print(f"fimurhalt [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
+
+
+
+pool = 1000
+for i in range(pool):
+    print(f"{fimurhash(i) >> 54} \t: "
+    f"{fimurhash(fimurhash(i >> 54) + i) >> 54} \t: "
+    f"{fimurhash(fimurhash(i >> 54) ^ i) >> 54}")
+
+# for i in range(pool):
+#     print(f"{fimurhash(i)} \t: "
+#     f"{fimurhash(fimurhash(i) + i)} \t: "
+#     f"{fimurhash(fimurhash(i) ^ i)}")
+    
+
+def fimurhash_seed(number, seed):
+    hash = (seed * 11400714819323198485 * (number << 15)) & 0xFFFFFFFFFFFFFFFF
+    hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
+    return hash
+
+# for i in range(0, 545, 34):
+#     print(f"{i} ; "
+#         f"{fimurhash_seed(i, 31) >> 54} ; "
+#     f"{fimurhash_seed(i, 31 + fimurhash_seed(i, 31)) >> 54} ; "
+#     f"{fimurhash_seed(i, 31 ^ fimurhash_seed(i, 31)) >> 54}")
+
+
+# nested hash
+n = 14
+top_n = 0
+seed = 31 << 7
+collisions = 0
+entry_count = (2**(n-1))
+
+hmap = [[] for n in range(2**top_n)]
+for i in range(1, entry_count):
+    hash = fimurhash_seed(i, seed)
+    top_level = hash >> (64 - top_n)
+    # nested_seed =fimurhash_seed(i ^ hash, seed + hash) >> (64 - n + top_n)
+    # nested_hash = fimurhash_seed(i, (seed * hash) & 0xFFFFFFFFFFFFFFFF )
+    # nested_hash = fimurhash_seed(i, seed << 7)
+    nested_hash = fimurhash_seed(i, seed)
+    nested_level = nested_hash >> (64 - n + top_n)
+    print(f"{i} : { hash } : { nested_hash } : {top_level} : {nested_level}")
+    hmap[top_level].append(nested_level)
+
+
+for i, top_level in enumerate(hmap):
+    entries = len(top_level)
+    unique = len(set(top_level))
+    collisions += entries-unique
+    print(f"toplevel[{i}] -> {unique}/{entries} : {entries-unique} collisions")
+
+print(f"{collisions} / {entry_count} : {collisions/entry_count}")
+
+
+
+# for i in range(100):
+#     print(f"{i} : { fimurhash_seed(i, 31) >> 54}")
+# def fimurhash(number):
+#     hash = 31 * 11400714819323198486 * (number << 15)
+#     hash = (hash << 7) | (hash >> (32 - 7))
+#     return hash
+
+# def fimurhash(number):
+#     hash = 31 * 62831853071 * (number << 15)
+#     hash = (hash << 7) | (hash >> (32 - 7))
+#     return hash
+
+# def fimurhash(number):
+#     hash = 31 * 102334155 * (number << 15)
+#     hash = (hash << 7) | (hash >> (32 - 7))
+#     return hash
+
+# def fimurhash(number):
+#     hash = 31 * 102334155 * (number << 16)
+#     hash = (hash << 8) | (hash >> (32 - 8))
+#     return hash
+
+# def fimurhash(number):
+#     hash = 31 * 11400714819323198486 * (number << 16)
+#     hash = (hash << 8) | (hash >> (32 - 8))
+#     return hash
+
+# def fimurhash64(number):
+#     hash = 31 * 11400714819323198485 * (number << 31)
+#     hash = (hash << 15) | (hash >> (64 - 19))
+#     return hash
+
+# def merxorhash64(number, n):
+#     hash = 31 * ((2**n)-1 ) * (number << 31)
+#     hash = (hash << 15) | (hash >> (64 - 19))
+#     return hash
+
+# import string
+# for char in string.ascii_lowercase:
+
+
+# mean = 0
+# for i in range(pool):
+#     hash_a = fimurhash64(i)
+#     hash_b = fimurhash64(i + 1)
+#     distance = hamming(hash_a, hash_b, 64)
+#     mean += distance/64
+#     print(f"fimurhash64 [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
+
+# print(f"mean = {mean/pool}")
+
+# mean = 0
+# for i in range(pool):
+#     hash_a = merxorhash64(i, 61)
+#     hash_b = merxorhash64(i + 1, 61)
+#     distance = hamming(hash_a, hash_b, 64)
+#     mean += distance/64
+#     print(f"merxorhash64 [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
+
+# print(f"mean = {mean/pool}")
+
+# mean = 0
+# for i in range(pool):
+#     hash_a = merxorhash64(i, 31)
+#     hash_b = merxorhash64(i + 1, 31)
+#     distance = hamming(hash_a, hash_b, 64)
+#     mean += distance/64
+#     print(f"merxorhash64 [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
+
+# print(f"mean = {mean/pool}")
+
+
+

+ 68 - 0
Hash/zorbristHash.py

@@ -0,0 +1,68 @@
+from PRNG import *
+#zobrist hashing class
+class hashTable():
+	def __init__(self, seed = time.time_ns()):
+		self.constVal = [0,1,2,3,4,5,6,7,8,9,10,11]
+		self.pieceChar = 'PRBNQKprbnqk'
+		self.random_val = PRNG.LCG(maxn = 768, seedn = seed, m = 18446744073709551557, a = 2774243619903564593)
+		self.table = []
+		self.black_move = PRNG.LCG(maxn = 1, seedn = 4829959, m = 18446744073709551557, a = 2774243619903564593)[0]
+		self.hashTbl = {}
+
+    #initiating zobrist hash values, one for each
+    #piece and every location
+	def init_zobrist(self):
+		self.table = []
+		for i in range(64):
+			self.table.append([])
+			for j in range(12):
+				self.table[i].append(None)
+
+		for i in range(64):
+			for j in range(12):
+				self.table[i][j] = self.random_val[i*12 + j]
+
+    #hashing algorithm for board
+	def hash(self, board, colour):
+		h = 0
+		if colour == 'black':
+			h = h ^ self.black_move
+
+		p = 0
+		for i in range(128):
+			if (i & 0x88) == 0:
+				if board.board[i] != None:
+					j = self.constVal[self.pieceChar.index(board.board[i].character)]
+					h = h ^ self.table[p][j]
+
+				p += 1
+		return h
+
+	#adding items to the lookup table
+	def append(self, board, colour, score, FEN, hashVal = None):
+		content = [board, score, FEN]
+		if hashVal == None:
+			hashVal = self.hash(board, colour)
+		if hashVal not in self.hashTbl:
+			self.hashTbl[hashVal] = content
+
+		else:
+			if self.hashTbl[hashVal][2] == content[2]:
+				return
+			replace = self.hashTbl[hashVal]
+			replace = (replace[0], replace[1], replace[2], hashVal + 1)
+			self.append(board, colour, score, FEN, hashVal = hashVal + 1)
+
+	#fetching from lookup table
+	def find(self, content, colour, hashVal = None):
+		if hashVal == None:
+			hashVal = self.hash(content, colour)
+
+		if hashVal in self.hashTbl:
+			if len(self.hashTbl[hashVal]) == 4:
+				if self.hashTbl[2] == self.hashTabl[2]:
+					return self.hashTbl[hashVal]
+				return self.find(content, hashVal + 1)
+
+			else:
+				return self.hashTbl[hashVal]

+ 127 - 0
Pseudonym/03-pseudonymize-data-in-power-bi-python.py

@@ -0,0 +1,127 @@
+import os
+import pickle
+import pandas as pd
+
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine
+
+from faker import Faker
+from faker.providers import internet
+
+
+# Function used to pseudonymize a text containing emails
+def anonymizeEmail(text_to_anonymize, country):
+    # Initialize Faker
+    fake = Faker(faker_locales_dict[country])
+    fake.add_provider(internet)
+
+    analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["EMAIL_ADDRESS"], language='en')
+
+    matched_emails = {}
+    for match in analyzer_results:
+        email = text_to_anonymize[match.start:match.end]
+
+        if email not in emails_dict:
+            fake_email = fake.safe_email()
+
+            while (fake_email in emails_dict.values()) or (fake_email in emails_dict):
+                fake_email = fake.safe_email()
+
+            emails_dict[email] = fake_email
+            matched_emails[email] = fake_email
+        else:
+            fake_email = emails_dict[email]
+            matched_emails[email] = fake_email
+
+    anonymized_result = text_to_anonymize
+    for email in matched_emails:
+        anonymized_result = anonymized_result.replace(email, matched_emails[email])
+
+    return anonymized_result
+
+
+# Function used to pseudonymize a text containing names
+def anonymizeName(text_to_anonymize, country):
+    # Initialize Faker
+    fake = Faker(faker_locales_dict[country])
+
+    analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["PERSON"], language='en')
+
+    matched_names = {}
+    for match in analyzer_results:
+        name = text_to_anonymize[match.start:match.end]
+
+        if name not in names_dict:
+            fake_name = fake.name()
+
+            while (fake_name in names_dict.values()) or (fake_name in names_dict):
+                fake_name = fake.name()
+
+            names_dict[name] = fake_name
+            matched_names[name] = fake_name
+        else:
+            fake_name = names_dict[name]
+            matched_names[name] = fake_name
+
+    anonymized_result = text_to_anonymize
+    for name in matched_names:
+        anonymized_result = anonymized_result.replace(name, matched_names[name])
+
+    return anonymized_result
+
+
+# For testing purpose you can load the Excel content directly here.
+# Just uncomment the following 2 lines.
+# # Load the Excel content in a dataframe
+# dataset = pd.read_excel(r'D:\<your-path>\Chapter06\CustomersCreditCardAttempts.xlsx', engine='openpyxl')
+
+# Load mapping dictionaries from PKL files if they exist, otherwise create empty dictionaries
+pkls_path = r'D:\<your-path>\Chapter06\pkls'
+emails_dict_pkl_path = os.path.join(pkls_path, 'emails_dict.pkl')
+names_dict_pkl_path = os.path.join(pkls_path, 'names_dict.pkl')
+
+if os.path.isfile(emails_dict_pkl_path):
+    emails_dict = pickle.load(open(emails_dict_pkl_path, "rb"))
+else:
+    emails_dict = {}
+
+if os.path.isfile(names_dict_pkl_path):
+    names_dict = pickle.load(open(names_dict_pkl_path, "rb"))
+else:
+    names_dict = {}
+
+# Define locale and language dictionaries
+faker_locales_dict = {'UNITED STATES': 'en_US', 'ITALY': 'it_IT', 'GERMANY': 'de_DE'}
+
+# Initialize Presidio's analyzer and anonymizer
+# https://microsoft.github.io/presidio/supported_entities/
+analyzer = AnalyzerEngine()
+anonymizer = AnonymizerEngine()
+
+# Create a copy of the source dataset
+df = dataset.copy()
+
+# Apply the function anonymizeName for each value of the Name column
+df.Name = pd.Series([anonymizeName(text, country) for (text, country) in zip(df['Name'], df['Country'])])
+
+# Apply the function anonymizeEmail for each value of the Email column
+df.Email = pd.Series([anonymizeEmail(text, country) for (text, country) in zip(df['Email'], df['Country'])])
+
+# Column Notes is 'object' data type as it contains lot of NaN and
+# Pandas doesn't recognize it as string. So it has to be cast to string
+# in order to be anonymized. Then replace it with its anonymization
+df.Notes = pd.Series(
+    [anonymizeName(text, country) for (text, country) in zip(df['Notes'].astype('str'), df['Country'])])
+df.Notes = pd.Series(
+    [anonymizeEmail(text, country) for (text, country) in zip(df['Notes'].astype('str'), df['Country'])])
+
+# # Prevent Pandas to truncate strings in cells
+# pd.set_option('display.max_colwidth', None)
+
+# # Show both the dataframes
+# dataset
+# df
+
+# Write emails and names dictionaries to PKL files
+pickle.dump(emails_dict, open(emails_dict_pkl_path, "wb"))
+pickle.dump(names_dict, open(names_dict_pkl_path, "wb"))

+ 196 - 0
Pseudonym/dataset_pseudonymizer.py

@@ -0,0 +1,196 @@
+import logging
+from typing import Tuple, Union
+
+import microdata_validator
+
+from job_executor.exception import BuilderStepError
+from job_executor.adapter import pseudonym_service
+from job_executor.model import Metadata
+
+logger = logging.getLogger()
+
+
+def _get_unit_types(
+    metadata: Metadata
+) -> Tuple[Union[str, None], Union[str, None]]:
+    return (
+        metadata.get_identifier_key_type_name(),
+        metadata.get_measure_key_type_name()
+    )
+
+
+def _pseudonymize_identifier_only(
+    input_csv_path: str,
+    unit_id_type: str,
+    job_id: str
+) -> str:
+    unique_identifiers = set()
+    with open(input_csv_path, newline='', encoding='utf8') as csv_file:
+        for line in csv_file:
+            unit_id = line.strip().split(';')[1]
+            unique_identifiers.add(unit_id)
+    identifier_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_identifiers), unit_id_type, job_id
+    )
+    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
+    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            line_number: int = row[0]
+            unit_id: str = row[1]
+            value: str = row[2]
+            start_date: str = row[3]
+            stop_date: str = row[4]
+            target_file.write(
+                ';'.join([
+                    str(line_number),
+                    str(identifier_to_pseudonym[unit_id]),
+                    value,
+                    start_date, stop_date
+                ]) + '\n'
+            )
+    target_file.close()
+    return output_csv_path
+
+
+def _pseudonymize_measure_only(
+    input_csv_path: str,
+    unit_id_type: str,
+    job_id: str
+) -> str:
+    unique_measure_values = set()
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            value = line.strip().split(';')[2]
+            unique_measure_values.add(value)
+    value_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_measure_values), unit_id_type, job_id
+    )
+    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
+    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            line_number: int = row[0]
+            unit_id: str = row[1]
+            value: str = row[2]
+            start_date: str = row[3]
+            stop_date: str = row[4]
+            target_file.write(
+                ';'.join([
+                    str(line_number),
+                    unit_id,
+                    str(value_to_pseudonym[value]),
+                    start_date, stop_date
+                ]) + '\n'
+            )
+    target_file.close()
+    return output_csv_path
+
+
+def _pseudonymize_identifier_and_measure(
+    input_csv_path: str,
+    identifier_unit_id_type: str,
+    measure_unit_id_type: str,
+    job_id: str
+) -> str:
+    unique_idents = set()
+    unique_measure_values = set()
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            unit_id = row[1]
+            value = row[2]
+            unique_idents.add(unit_id)
+            unique_measure_values.add(value)
+    identifier_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_idents), identifier_unit_id_type, job_id
+    )
+    value_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_measure_values), measure_unit_id_type, job_id
+    )
+    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
+    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            line_number: int = row[0]
+            unit_id: str = row[1]
+            value: str = row[2]
+            start_date: str = row[3]
+            stop_date: str = row[4]
+            target_file.write(
+                ';'.join([
+                    str(line_number),
+                    str(identifier_to_pseudonym[unit_id]),
+                    str(value_to_pseudonym[value]),
+                    start_date, stop_date
+                ]) + '\n'
+            )
+    target_file.close()
+    return output_csv_path
+
+
+def _pseudonymize_csv(
+    input_csv_path: str,
+    identifier_unit_id_type: Union[str, None],
+    measure_unit_id_type: Union[str, None],
+    job_id: str
+) -> str:
+    if identifier_unit_id_type and not measure_unit_id_type:
+        logger.info('Pseudonymizing identifier')
+        return _pseudonymize_identifier_only(
+            input_csv_path, identifier_unit_id_type, job_id
+        )
+    elif measure_unit_id_type and not identifier_unit_id_type:
+        logger.info('Pseudonymizing measure')
+        return _pseudonymize_measure_only(
+            input_csv_path, measure_unit_id_type, job_id
+        )
+    elif identifier_unit_id_type and measure_unit_id_type:
+        logger.info('Pseudonymizing identifier and measure')
+        return _pseudonymize_identifier_and_measure(
+            input_csv_path,
+            identifier_unit_id_type,
+            measure_unit_id_type,
+            job_id
+        )
+    else:
+        logger.info('No pseudonymization')
+        return input_csv_path
+
+
+def run(input_csv_path: str, metadata: Metadata, job_id: str) -> str:
+    """
+    Pseudonymizes the identifier column of the dataset. Requests pseudonyms
+    from an external service and replaces all values in the identifier column.
+    """
+    try:
+        logger.info(f'Pseudonymizing data {input_csv_path}')
+        identifier_unit_type, measure_unit_type = (
+            _get_unit_types(metadata)
+        )
+        identifier_unit_id_type = (
+            None if identifier_unit_type is None
+            else microdata_validator.get_unit_id_type_for_unit_type(
+                identifier_unit_type
+            )
+        )
+        measure_unit_id_type = (
+            None if measure_unit_type is None
+            else microdata_validator.get_unit_id_type_for_unit_type(
+                measure_unit_type
+            )
+        )
+        output_file = _pseudonymize_csv(
+            input_csv_path,
+            identifier_unit_id_type,
+            measure_unit_id_type,
+            job_id
+        )
+        logger.info(f'Pseudonymization step done {output_file}')
+        return output_file
+    except Exception as e:
+        logger.error(f'Error during pseudonymization: {str(e)}')
+        raise BuilderStepError('Failed to pseudonymize dataset') from e

+ 65 - 0
Pseudonym/doc2pseudo.py

@@ -0,0 +1,65 @@
+"""
+Pseudonymize a doc file. It takes as input a .doc file, converts it to txt, pseudonymizes it and outputs a
+pseudonymized txt file.
+
+Usage:
+    doc2pseudo.py <input_file_path> <model_folder> [options]
+
+Arguments:
+    <input_file_path>       A required path parameter
+    <model_folder>          A folder with a model inside
+"""
+from pathlib import Path
+
+from argopt import argopt
+from flair.models import SequenceTagger
+from tqdm import tqdm
+
+from data_ETL import pseudonymize
+
+
+def doc2txt(doc_path: Path):
+    if doc_path.suffix == ".doc":
+        try:
+            import textract
+        except ImportError:
+            raise Exception("Textract is not installed. Cannot convert .doc file")
+        text = textract.process(doc_path.as_posix()).decode("utf-8").replace("|", "")
+        return text
+    elif doc_path.suffix == ".txt":
+        with open(doc_path.as_posix()) as filo:
+            return filo.read()
+    else:
+        raise Exception("File type not handled: either .doc or .txt")
+
+
+def save_text_file(text: str, output_file: Path):
+    with open(output_file.as_posix(), "w") as out:
+        out.write(text)
+
+
+def run(doc_path: Path):
+    text = doc2txt(doc_path=doc_path)
+    output_text = Path(doc_path.stem + "_anon.txt")
+    tags, pseudo = pseudonymize(text=text, tagger=TAGGER)
+    save_text_file(pseudo, output_file=Path(output_text))
+    print(pseudo)
+
+
+def main(input_file_path: Path, model_folder: Path):
+    global TAGGER
+
+    doc_paths = []
+    TAGGER = SequenceTagger.load(model_folder)
+    job_output = []
+    tqdm.write(f"Converting file {input_file_path}")
+    job_output.append(run(input_file_path))
+
+    return doc_paths
+
+
+if __name__ == "__main__":
+    parser = argopt(__doc__).parse_args()
+    input_file_path = Path(parser.input_file_path)
+    model_folder = parser.model_folder
+    main(input_file_path=input_file_path, model_folder=model_folder)

+ 1227 - 0
Pseudonym/main.py

@@ -0,0 +1,1227 @@
+def pseudonymize(colname):
+    if colname not in key:
+        sha3 = hashlib.sha3_512()
+        data = salt + colname
+        sha3.update(data.encode('utf-8'))
+        hexdigest = sha3.hexdigest()
+        key[colname] = hexdigest
+        return hexdigest
+    else:
+        return key[colname]
+
+
+import logging
+from typing import Tuple, Union, List
+
+import microdata_validator
+
+from job_executor.exception import BuilderStepError
+from job_executor.adapter import pseudonym_service
+from job_executor.model import Metadata
+
+logger = logging.getLogger()
+
+
+def _get_unit_types(
+        metadata: Metadata
+) -> Tuple[Union[str, None], Union[str, None]]:
+    return (
+        metadata.get_identifier_key_type_name(),
+        metadata.get_measure_key_type_name()
+    )
+
+
+def _pseudonymize_identifier_only(
+        input_csv_path: str,
+        unit_id_type: str,
+        job_id: str
+) -> str:
+    unique_identifiers = set()
+    with open(input_csv_path, newline='', encoding='utf8') as csv_file:
+        for line in csv_file:
+            unit_id = line.strip().split(';')[1]
+            unique_identifiers.add(unit_id)
+    identifier_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_identifiers), unit_id_type, job_id
+    )
+    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
+    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            line_number: int = row[0]
+            unit_id: str = row[1]
+            value: str = row[2]
+            start_date: str = row[3]
+            stop_date: str = row[4]
+            target_file.write(
+                ';'.join([
+                    str(line_number),
+                    str(identifier_to_pseudonym[unit_id]),
+                    value,
+                    start_date, stop_date
+                ]) + '\n'
+            )
+    target_file.close()
+    return output_csv_path
+
+
+def _pseudonymize_measure_only(
+        input_csv_path: str,
+        unit_id_type: str,
+        job_id: str
+) -> str:
+    unique_measure_values = set()
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            value = line.strip().split(';')[2]
+            unique_measure_values.add(value)
+    value_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_measure_values), unit_id_type, job_id
+    )
+    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
+    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            line_number: int = row[0]
+            unit_id: str = row[1]
+            value: str = row[2]
+            start_date: str = row[3]
+            stop_date: str = row[4]
+            target_file.write(
+                ';'.join([
+                    str(line_number),
+                    unit_id,
+                    str(value_to_pseudonym[value]),
+                    start_date, stop_date
+                ]) + '\n'
+            )
+    target_file.close()
+    return output_csv_path
+
+
+def _pseudonymize_identifier_and_measure(
+        input_csv_path: str,
+        identifier_unit_id_type: str,
+        measure_unit_id_type: str,
+        job_id: str
+) -> str:
+    unique_idents = set()
+    unique_measure_values = set()
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            unit_id = row[1]
+            value = row[2]
+            unique_idents.add(unit_id)
+            unique_measure_values.add(value)
+    identifier_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_idents), identifier_unit_id_type, job_id
+    )
+    value_to_pseudonym = pseudonym_service.pseudonymize(
+        list(unique_measure_values), measure_unit_id_type, job_id
+    )
+    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
+    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
+    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
+        for line in csv_file:
+            row = line.strip().split(';')
+            line_number: int = row[0]
+            unit_id: str = row[1]
+            value: str = row[2]
+            start_date: str = row[3]
+            stop_date: str = row[4]
+            target_file.write(
+                ';'.join([
+                    str(line_number),
+                    str(identifier_to_pseudonym[unit_id]),
+                    str(value_to_pseudonym[value]),
+                    start_date, stop_date
+                ]) + '\n'
+            )
+    target_file.close()
+    return output_csv_path
+
+
+def _pseudonymize_csv(
+        input_csv_path: str,
+        identifier_unit_id_type: Union[str, None],
+        measure_unit_id_type: Union[str, None],
+        job_id: str
+) -> str:
+    if identifier_unit_id_type and not measure_unit_id_type:
+        logger.info('Pseudonymizing identifier')
+        return _pseudonymize_identifier_only(
+            input_csv_path, identifier_unit_id_type, job_id
+        )
+    elif measure_unit_id_type and not identifier_unit_id_type:
+        logger.info('Pseudonymizing measure')
+        return _pseudonymize_measure_only(
+            input_csv_path, measure_unit_id_type, job_id
+        )
+    elif identifier_unit_id_type and measure_unit_id_type:
+        logger.info('Pseudonymizing identifier and measure')
+        return _pseudonymize_identifier_and_measure(
+            input_csv_path,
+            identifier_unit_id_type,
+            measure_unit_id_type,
+            job_id
+        )
+    else:
+        logger.info('No pseudonymization')
+        return input_csv_path
+
+
+def run(input_csv_path: str, metadata: Metadata, job_id: str) -> str:
+    """
+    Pseudonymizes the identifier column of the dataset. Requests pseudonyms
+    from an external service and replaces all values in the identifier column.
+    """
+    try:
+        logger.info(f'Pseudonymizing data {input_csv_path}')
+        identifier_unit_type, measure_unit_type = (
+            _get_unit_types(metadata)
+        )
+        identifier_unit_id_type = (
+            None if identifier_unit_type is None
+            else microdata_validator.get_unit_id_type_for_unit_type(
+                identifier_unit_type
+            )
+        )
+        measure_unit_id_type = (
+            None if measure_unit_type is None
+            else microdata_validator.get_unit_id_type_for_unit_type(
+                measure_unit_type
+            )
+        )
+        output_file = _pseudonymize_csv(
+            input_csv_path,
+            identifier_unit_id_type,
+            measure_unit_id_type,
+            job_id
+        )
+        logger.info(f'Pseudonymization step done {output_file}')
+        return output_file
+    except Exception as e:
+        logger.error(f'Error during pseudonymization: {str(e)}')
+        raise BuilderStepError('Failed to pseudonymize dataset') from e
+
+
+def pseudonymize_1(self, df, schema):  #: list[list[str]]):
+    """ Performs pseudonymization of the given dataframe based on the provided schema.
+        For example, if the given df is for an entity called person,
+        2 dataframes will be returned, one called person that has hashed ids and masked fields,
+        and one called person_lookup that contains the original person_id, person_id_pseudo,
+        and the non-masked values for columns marked to be masked."""
+
+    df_pseudo = df_lookup = df
+
+    for col_name, dtype, op in schema:
+        if op == "hash-no-lookup" or op == "hnl":
+            # This means that the lookup can be performed against a different table so no lookup is needed.
+            df_pseudo = df_pseudo.withColumn(col_name, F.sha2(F.concat(F.col(col_name), F.lit(self.salt)),
+                                                              256)).withColumnRenamed(col_name,
+                                                                                      col_name + "_pseudonym")
+            df_lookup = df_lookup.drop(col_name)
+        elif op == "hash" or op == 'h':
+            df_pseudo = df_pseudo.withColumn(col_name, F.sha2(F.concat(F.col(col_name), F.lit(self.salt)),
+                                                              256)).withColumnRenamed(col_name,
+                                                                                      col_name + "_pseudonym")
+            df_lookup = df_lookup.withColumn(col_name + "_pseudonym",
+                                             F.sha2(F.concat(F.col(col_name), F.lit(self.salt)), 256))
+        elif op == "mask" or op == 'm':
+            df_pseudo = df_pseudo.withColumn(col_name, F.lit('*'))
+        elif op == "partition-by":
+            pass  # make no changes for this column so that it will be in both dataframes and can be used for partitioning
+        elif op == "no-op" or op == 'x':
+            df_lookup = df_lookup.drop(col_name)
+
+    df_pseudo = self.fix_column_names(df_pseudo)
+    df_lookup = self.fix_column_names(df_lookup)
+
+    return (df_pseudo, df_lookup)
+
+
+def pseudonymize_2(value, salt=SALT_KEY):
+    """Pseudonymize value with salt, using HMAC-SHA256 encoding
+    Parameters
+    ----------
+    value: value to be pseudonymized
+    salt: hazard salt for additional protection
+    Returns
+    -------
+    pseudonymized value using HMAC-SHA256
+    """
+
+    # NOTE: Here we must bypass empty or None value as
+    # it will introduce specific hash value
+    if value is None or value is np.nan or value == '':
+        return None
+
+    return hmac.new(
+        key=salt.encode('utf-8'),  # La clé
+        msg=str(value).encode('utf-8'),  # La donnée à pseudonymiser
+        digestmod=hashlib.sha256  # La fonction de hash
+    ).hexdigest()  # L’encodage en hexadécimal
+
+
+def pseudonymize_row(row):
+    """
+    Replace some identifying information with others:
+    - Fake name
+    - Birthdate is replaced with the age
+    """
+    anonymized_row = row.copy()
+
+    # using Faker (https://faker.readthedocs.io/en/master/), we generate fake names
+    if anonymized_row['Gender'] == 'Female':
+        anonymized_row['Fullname'] = faker.name_female()
+    else:
+        anonymized_row['Fullname'] = faker.name_male()
+
+    del anonymized_row['Birthdate']
+    birthdate = datetime.strptime(row['Birthdate'], '%Y-%m-%d')
+    age = today.year - birthdate.year - ((today.month, today.day) < (birthdate.month, birthdate.day))
+    anonymized_row['Age'] = age
+
+    return anonymized_row
+
+
+def anonymize_one(self, column, delete: bool, pattern: AnonymizationPattern = None):
+    if column is None:
+        return Logger.log_none_type_error('column')
+
+    Logger.log_info_table_manipulation_started(self.filename, f'Anonymize One ({column})')
+    # delete column in every dataset if found
+    error_count = 0
+    if delete:
+        for ds in self.datasets:
+            out = ds.delete_column(column)
+            if out < 1:
+                error_count += 1
+        self.remove_columnnames([column])
+    else:
+        # if column is not deleted: generate a value for column, random or by pattern
+        if pattern is None:
+            for ds in self.datasets:
+                out = ds.set_columnvalue_random(column)
+                if out < 1:
+                    error_count += 1
+        else:
+            for ds in self.datasets:
+                out = ds.set_columnvalue_by_pattern(column, pattern)
+                if out < 1:
+                    error_count += 1
+
+    Logger.log_info_table_manipulation_finished(error_count)
+    return error_count
+
+
+def pseudonymize_3(field):
+    return sha256(field.encode() + get_seed(seed).encode()).hexdigest()[:20]
+
+
+def pseudonymize_columns(dataframe, cols,
+                         ps_key='test',
+                         api_key=SHARED_KEY):
+    actions = [
+        {"name": "pseudonymize-{}".format(c),
+         "transform-value": {
+             "key": c,
+             "pseudonymize": {
+                 "method": "merengue",
+                 "key": ps_key,
+             }
+         }
+         } for c in cols]
+    items = dataframe.fillna('').T.to_dict()
+    item_list = list(items.values())
+    data = requests.post(
+        'https://api.kiprotect.com/v1/transform',
+        data=json.dumps(
+            {"actions": actions, "items": item_list},
+            allow_nan=False),
+        headers={
+            'Authorization': 'Bearer {}'.format(api_key)})
+    return pd.DataFrame(data.json()['items'])
+
+
+def _parse_url_parts(self, tld_extractor: TLDExtract, url_str: str) -> dict:
+    url = tld_extractor(url_str)
+
+    parts = {}
+    parts["scheme"] = self._find_first(r"^([a-z0-9]+)\:\/\/", url_str)
+    parts["auth"] = self._find_first(r"(?:.*\/\/|^)(.*:.*)@.*", url_str)
+    parts["domain"] = url.domain
+    parts["subdomain"] = url.subdomain
+    parts["suffix"] = url.suffix
+    url_list = ".".join(list(url))
+    parts["path"] = self._find_first(
+        rf"(?:^[a-z0-9]+\:\/\/)?{url_list}(?:\:\d+)?([^#^\?]*).*", url_str
+    )
+    parts["query"] = self._find_first(r".*(\?\w+=[a-zA-Z0-9](?:&\w+=[a-zA-Z0-9]+)*).*", url_str)
+    parts["fragment"] = self._find_first(r".*#(.*)", url_str)
+
+    return parts
+
+
+def _pseudonymize_value(self, value: str, pseudonyms: list[dict]) -> str:
+    hash_string = self._hasher.hash_str(value, salt=self._config.hash_salt)
+    if self._cache.requires_storing(hash_string):
+        encrypted_origin = self._encrypter.encrypt(value)
+        pseudonyms.append({"pseudonym": hash_string, "origin": encrypted_origin})
+    return self._wrap_hash(hash_string)
+
+
+def base64_method(data_path, columns):
+    data = pd.read_csv(data_path)
+    data.dropna()
+    data.reset_index(drop=True, inplace=True)
+    existing_columns = list(data)
+    for column in columns:
+        if column in existing_columns:
+            data[column] = data[column].apply(str)
+            data[column] = data[column].apply(lambda x: base64.b64encode(bytes(x, 'utf-8')))
+    return data
+
+
+def pseudonymize_4(self, s):
+    sl = len(s) / self.__byte
+    return struct.unpack('<%dh' % sl, s)
+
+
+def _replace_name(item, value, field, dicom):
+    sex = dicom.get("PatientSex")
+    sex = {"F": "Female", "M": "Male", "O": "Other", "": "Unk"}[sex]
+    age = Deider._round_to_nearest(parse_AS_as_int(dicom.get("PatientAge")), 5)
+    return f"{sex} {age:03d}Y {dicom.get('Modality')}"
+
+
+def apply(config, val):
+    """ Pseudonymize using format preserving encryption.
+    Example config:
+    {
+        'func': 'fpe',
+        'key': 'some-secret-key',
+        'alphabet': string.ascii_letters
+    }
+    """
+    validate_func_params(config, MANDATORY_CONFIG_PARAMS)
+    try:
+        alphabet = config.get('alphabet', string.printable)
+        e = pyffx.String(config['key'].encode("utf-8"), alphabet, length=len(val))
+        return e.encrypt(val)
+    except ValueError:
+        raise PseudoFuncError("Could not pseudonymize '{0}'. Check alphabet compatibility ({1})".format(val, alphabet))
+
+
+def pseudonymize_6(text: str, tagger: SequenceTagger) -> Tuple[str, str]:
+    """
+    Perform the pseudonymization action and return both the tagged version (see function "tag_entities") and the pseudonymized version
+    Args:
+        text (str): the input text to pseudonymize
+        tagger (SequenceTagger): the flair model for NER
+    Returns:
+        Tuple[str, str]: the original text with tags, and the pseudonymized text
+    """
+    with sw.timer("root"):
+        text_sentences = [Sentence(t.strip()) for t in text.split("\n") if t.strip()]
+        with sw.timer("model_annotation"):
+            # inplace function
+            tagger.predict(
+                sentences=text_sentences,
+                mini_batch_size=32,
+                embedding_storage_mode="none",
+                verbose=True,
+            )
+        return tag_entities(sentences=text_sentences)
+
+
+def get_replacement_stock() -> List[str]:
+    """
+    A list of faked names to replace the information you want to hide
+    """
+    stock = [f"{letter}..." for letter in ascii_uppercase] + [
+        f"{a}{b}..." for a, b in list(itertools.combinations(ascii_uppercase, 2))
+    ]
+    random.shuffle(stock)
+    return stock
+
+
+def apply_tagging_sentence(
+        starts: List[int],
+        ends: List[int],
+        tags: List[str],
+        entities: List[str],
+        plain_text: str,
+        replacement_dict: Dict[str, str],
+) -> Tuple[str, str]:
+    """
+    Args:
+        starts, ends, tags, entity texts of the entities found in the sentence + the text of the sentence + the prepared replacement dictionary for pseudo
+    Returns:
+        str, str: a text where the entities have a XML tag, and a text where entities have been pseudonymized
+    """
+
+    assert (
+            len(starts) == len(ends) == len(tags) == len(entities)
+    ), "Input lists mast be of the same length"
+    shift_tags_start, shift_tags_end = 0, 0  # shift due to the add of tags
+    shift_pseudo_start, shift_pseudo_end = 0, 0
+    tagged_sentence, pseudo_sentence = plain_text, plain_text
+    n_entities = len(starts)
+
+    for i in range(n_entities):
+        start, end, entity, tag = starts[i], ends[i], entities[i], tags[i]
+        replacement = replacement_dict[entity]
+
+        pseudo_sentence = (
+                pseudo_sentence[: start + shift_pseudo_start]
+                + replacement
+                + pseudo_sentence[end + shift_pseudo_end:]
+        )
+        shift_pseudo_start += len(replacement) - (end - start)
+        shift_pseudo_end += len(replacement) - (end - start)
+        tagged_sentence = (
+                tagged_sentence[: start + shift_tags_start]
+                + "</a>"
+                + f"<{tag}>"
+                + plain_text[start:end]
+                + f"</{tag}>"
+                + "<a>"
+                + tagged_sentence[end + shift_tags_end:]
+        )
+        shift_tags_start += (
+                5 + 6 + 3 + 4
+        )  # 5 characters for tag <PER> (or LOC or ORG) + 6 for </PER> + 3 for <a> and 4 for </a>
+        shift_tags_end += (
+                5 + 6 + 3 + 4
+        )  # 5 characters for tag <PER> (or LOC or ORG) + 6 for </PER> + 3 for <a> and 4 for </a>
+    tagged_sentence = "<a>" + tagged_sentence + "</a>"
+    tagged_sentence = tagged_sentence.replace("<a></a>", "")
+    return (
+        f"<sentence>{tagged_sentence}</sentence>",
+        pseudo_sentence,
+    )
+
+
+def english_pseudo(text):
+    anon = AnonymizerChain(Anonymization('en_US'))
+    anon.add_anonymizers(EmailAnonymizer, NamedEntitiesAnonymizer('en_core_web_lg'))
+    clean_text, patch = anon.pseudonymize(text)
+
+
+def pseudonymize_user_name(self, user_name: UserName) -> PseudoUserName:
+    hasher = hashlib.sha256()
+    hasher.update(user_name.encode('utf-8'))
+
+    # salt
+    hasher.update(b'\0')
+    hasher.update(self.salt)
+
+    pseudonymized = base64.b64encode(hasher.digest()).decode('utf-8')
+    return PseudoUserName(pseudonymized)
+
+
+def parse_lines(text):
+    lines = []
+    for m in LINE_RE.finditer(text):
+        ln = {"TIMESTAMP": parse_date(m.group(1).strip("\n").strip()),
+              "SPEAKER": m.group(2).strip(),
+              "MESSAGE": m.group(3).strip()}
+        lines.append(ln)
+    return
+
+
+def pseudonymize_7(graph: ProvDocument) -> ProvDocument:
+    log.info(f"pseudonymize agents in {graph=}")
+
+    # get all records except for agents and relations
+    records = list(graph.get_records((ProvActivity, ProvEntity)))
+
+    pseudonyms = dict()
+    for agent in graph.get_records(ProvAgent):
+        name = get_attribute(agent, USERNAME)
+        mail = get_attribute(agent, USEREMAIL)
+
+        if name is None:
+            raise ValueError("ProvAgent representing a user has to have a name!")
+
+        # hash name & mail if present
+        namehash = hashlib.sha256(bytes(name, "utf-8")).hexdigest()
+        mailhash = hashlib.sha256(bytes(mail, "utf-8")).hexdigest() if mail else None
+        # create a new id as a pseudonym using the hashes
+        pseudonym = qualified_name(f"User?name={namehash}&email={mailhash}")
+
+        # map the old id to the pseudonym
+        pseudonyms[agent.identifier] = pseudonym
+
+        # keep only prov role & prov type
+        # replace name & mail with hashes
+        pseudonymized = pseudonymize_agent(
+            agent,
+            identifier=pseudonym,
+            keep=[PROV_ROLE, PROV_TYPE],
+            replace={USERNAME: namehash, USEREMAIL: mailhash},
+        )
+        # add pseudonymized agent to the list of records
+        records.append(pseudonymized)
+
+    # replace old id occurences with the pseudonymized id
+    for relation in graph.get_records(ProvRelation):
+        formal = [(key, pseudonyms.get(val, val)) for key, val in relation.formal_attributes]
+        extra = [(key, pseudonyms.get(val, val)) for key, val in relation.extra_attributes]
+        r_type = PROV_REC_CLS.get(relation.get_type())
+        records.append(r_type(relation.bundle, relation.identifier, formal + extra))
+
+    return graph_factory(records)
+
+
+def _make_sentence(self, tokens_left, tokens_right, seq_length=128):
+    len_left = len(tokens_left)
+    len_right = len(tokens_right)
+
+    cut_len = len_left + len_right - (seq_length - 1)
+    if cut_len > 0:
+        cut_left = len_left - seq_length // 2
+        cut_right = len_right - (seq_length - 1) // 2
+        if cut_left < 0:
+            cut_left, cut_right = 0, cut_left + cut_right
+        elif cut_right < 0:
+            cut_left, cut_right = cut_left + cut_right, 0
+    else:
+        cut_left, cut_right = 0, 0
+
+    tokens_left = tokens_left[cut_left:]
+    # tokens_right = tokens_right[:-cut_right]
+    tokens_right = tokens_right[:len(tokens_right) - cut_right]
+
+    tokens = tokens_left + [self.bert_tokenizer.mask_token] + tokens_right
+    attention_mask = [1] * len(tokens_left) + [1] + [1] * len(tokens_right)
+
+    if len(tokens) < seq_length:
+        num_padding = seq_length - len(tokens)
+        tokens += [self.bert_tokenizer.pad_token] * num_padding
+        attention_mask += [0] * num_paddi
+
+
+def _random_word_context(self, text, max_trial=10):
+    puncs = list("[]!\"#$%&'()*+,./:;<=>?@\^_`{|}~-")
+    words = text.split()
+
+    trial = 0
+    done = False
+    while trial < max_trial and not done:
+        trial += 1
+        w_idx = random.randint(0, len(words) - 1)
+        word, left_res, right_res = words[w_idx], [], []
+
+        # If the word is already in vocab, it's good to go.
+        if len(word) >= self.min_word_len and \
+                (word.lower() in self.dictionary) and \
+                len(word) < DEFAULT_MAX_CHARACTER_POSITIONS - 4:
+            done = True
+        else:
+            # Otherwise, detach puncs at the first and the last char, and check again
+            if word[0] in puncs:
+                word, left_res = word[1:], [word[0]]
+            else:
+                word, left_res = word, []
+            if not word: continue  # The word was just a punc
+
+            if word[-1] in puncs:
+                word, right_res = word[:-1], [word[-1]]
+            else:
+                word, right_res = word, []
+
+            if len(word) < self.min_word_len or \
+                    (not word.lower() in self.dictionary) or \
+                    len(word) >= DEFAULT_MAX_CHARACTER_POSITIONS - 4:
+                continue
+
+            # Check whether it's anonymized field
+            right_snip = ' '.join(words[w_idx + 1:w_idx + 5])
+            if '**]' in right_snip and '[**' not in right_snip:
+                continue
+            left_snip = ' '.join(words[w_idx - 4:w_idx])
+            if '[**' in left_snip and '**]' not in left_snip:
+                continue
+
+            # Pass!
+            done = True
+
+    if done:
+        return word, ' '.join(words[:w_idx] + left_res), ' '.join(right_res + words[w_idx + 1:])
+    else:
+        raise ValueError('failed to choose word')
+
+
+def _random_word_context(self, text, max_trial=10):
+    puncs = list("[]!\"#$%&'()*+,./:;<=>?@\^_`{|}~-")
+    words = text.split()
+
+    trial = 0
+    done = False
+    while trial < max_trial and not done:
+        trial += 1
+        w_idx = random.randint(0, len(words) - 1)
+        word, left_res, right_res = words[w_idx], [], []
+
+        # If the word is already in vocab, it's good to go.
+        if len(word) >= self.min_word_len and \
+                (word.lower() in self.dictionary) and \
+                len(word) < DEFAULT_MAX_CHARACTER_POSITIONS - 4:
+            done = True
+        else:
+            # Otherwise, detach puncs at the first and the last char, and check again
+            if word[0] in puncs:
+                word, left_res = word[1:], [word[0]]
+            else:
+                word, left_res = word, []
+            if not word: continue  # The word was just a punc
+
+            if word[-1] in puncs:
+                word, right_res = word[:-1], [word[-1]]
+            else:
+                word, right_res = word, []
+
+            if len(word) < self.min_word_len or \
+                    (not word.lower() in self.dictionary) or \
+                    len(word) >= DEFAULT_MAX_CHARACTER_POSITIONS - 4:
+                continue
+
+            # Check whether it's anonymized field
+            right_snip = ' '.join(words[w_idx + 1:w_idx + 5])
+            if '**]' in right_snip and '[**' not in right_snip:
+                continue
+            left_snip = ' '.join(words[w_idx - 4:w_idx])
+            if '[**' in left_snip and '**]' not in left_snip:
+                continue
+
+            # Pass!
+            done = True
+
+    if done:
+        return word, ' '.join(words[:w_idx] + left_res), ' '.join(right_res + words[w_idx + 1:])
+    else:
+        raise ValueError('failed to choose word')
+
+
+def __next__(self):
+    # Select next note (length >= 2000)
+    while True:
+        try:
+            _, row = next(self.note_iterrows)
+        except StopIteration:
+            self._load_random_csv()
+            _, row = next(self.note_iterrows)
+        note_id = int(row.ROW_ID)
+        note = row.TEXT.strip()
+        # if len(note) >= 2000:
+        # break
+        if len(note) < 2000:
+            continue
+
+        try:
+            correct, left, right = self._random_word_context(note)
+        except:
+            # import traceback; traceback.print_exc();
+            continue
+        break
+
+    # Corrupt and pseudonymize
+    correct = correct.lower()
+    if random.uniform(0, 1) >= self.no_corruption_prob:
+        typo = self.word_corrupter.corrupt_word(correct)
+    else:
+        typo = correct
+    left = self.mimic_pseudo.pseudonymize(left)
+    left = self._process_note(left)
+    left = ' '.join(left.split(' ')[-128:])
+    right = self.mimic_pseudo.pseudonymize(right)
+    right = self._process_note(right)
+    right = ' '.join(right.split(' ')[:128])
+
+    # Parse
+    temp_csv_row = [-1, note_id, typo, left, right, correct]
+    # print(f'{self.csv_fname}({note_id}, {_}/{len(self.df_note)}): {correct} -> {typo}')
+    example = self._parse_row(temp_csv_row)
+
+    return example
+
+
+def pseudonymize_8(self, s):
+    return struct.unpack(">" + ("I" * (len(s) / self.__stride)), s)
+
+
+def pseudonymize(field):
+    return sha256(field.encode() + salt.encode()).hexdigest()[:16]
+
+
+def pseudonymize(
+        self,
+        original_text: str,
+        presidio_response: List[RecognizerResult],
+        count: int,
+):
+    """
+    :param original_text: str containing the original text
+    :param presidio_response: list of results from Presidio, to be used to know where entities are
+    :param count: number of perturbations to return
+    :return: List[str] with fake perturbations of original text
+    """
+
+    presidio_response = sorted(presidio_response, key=lambda resp: resp.start)
+
+    anonymizer_engine = AnonymizerEngine()
+    anonymized_result = anonymizer_engine.anonymize(
+        text=original_text, analyzer_results=presidio_response
+    )
+
+    templated_text = anonymized_result.text
+    templated_text = templated_text.replace(">", "}}").replace("<", "{{")
+    fake_texts = [self.parse(templated_text, add_spans=False) for _ in range(count)]
+    return fake_texts
+
+
+def pseudonymize(
+        self, key_file: KeyFile, identifiers: List["Identifier"]
+) -> List["Key"]:
+    """Get a pseudonym for each identifier. If identifier is known in PIMS,
+    return this. Otherwise, have PIMS generate a new pseudonym and return that.
+    Parameters
+    ----------
+    identifiers: List[Identifier]
+        The identifiers to get pseudonyms for
+    key_file: KeyFile
+        The key_file to use
+    Notes
+    -----
+    Each call this function calls PIMS API twice for each unique source in
+    identifiers. This is result of the way the API can be called
+    Returns
+    -------
+    List[Key]
+        The PIMS pseudonym for each identifier
+    """
+    keys = []
+    # Each call to process a list of identifiers only allows a single source.
+    # Split identifiers by source
+    per_source = defaultdict(list)
+    for x in identifiers:
+        per_source[x.source].append(x)
+    for source, items in per_source.items():
+        keys = keys + self.deidentify(key_file, [x.value for x in items], source)
+
+    return keys
+
+
+def pseudonymize(self, s):
+    sl = len(s) / 2
+    return struct.unpack('<%dh' % sl, s)
+
+
+def regex_anonymizer(self, text: str, regex: Pattern, provider: str) -> str:
+    '''
+    Anonymize all substring matching a specific regex using a Faker provider
+    '''
+    matchs = re.findall(regex, text)
+    return self.replace_all(text, matchs, provider)
+
+
+def psdnmyz_2():
+    # load TWO csv to be sent to be pseudonymz
+    # metrics_df=pd.read_csv('/home/arasan/testrep/psmd/jureca/TOTAL_METRICS_Skel_header.csv')
+    seg_df = pd.read_csv('/home/arasan/testrep/psmd/jureca/psmd_seg_vols.csv')
+    # add rnadom id column to both df
+    # below line is a disaster
+    # metrics_df['RNDNAME'] = metrics_df['NAME'].apply(lambda x: gocept.pseudonymize.integer(x, 'secret'))
+    # seg_df['RNDNAME'] = seg_df['NAME'].apply(lambda x: gocept.pseudonymize.integer(x, 'secret'))
+    #    a=np.random.randint(100000,999999,metrics_df.NAME.values.size)
+    #    metrics_df['RNDNAME']=a
+    #    print 'after rqndom id has been added'
+    #    flagg=True
+    #    while(flagg):
+    #        try:
+    #            print pd.concat(g for _, g in metrics_df.groupby("RNDNAME") if len(g) > 1)
+    #        except ValueError:
+    #            print 'NO DUPLICAtes'
+    #            metrics_df.to_csv('/home/arasan/testrep/psmd/jureca/TOTAL_rnd_temp.csv')
+    #            flagg=False
+    #        else:
+    #            print 'DUPES'
+    #            metrics_df=metrics_df.drop('RNDNAME', axis=1)
+    #            a=np.random.randint(100000,999999,metrics_df.NAME.values.size)
+    #            metrics_df['RNDNAME']=a
+    # load double chekced randomeized df 1) above try catch 2) using np unique
+    metrnd = pd.read_csv('/home/arasan/testrep/psmd/jureca/TOTAL_rnd_temp.csv')
+    seg_df['SNO'] = seg_df.index + 1
+    metrnd['SNO'] = seg_df.index + 1
+    # add RNDAME column to seg_df
+    seg_df['RNDNAME'] = metrnd.RNDNAME.values
+    # rename columns NANME to ID and RNDNAME to NAME
+    seg_df = seg_df.rename(index=str, columns={"NAME": "ID"})
+    seg_df = seg_df.rename(index=str, columns={"RNDNAME": "NAME"})
+    metrnd = metrnd.rename(index=str, columns={"NAME": "ID"})
+    metrnd = metrnd.rename(index=str, columns={"RNDNAME": "NAME"})
+    # dump map out with 3 columns ID,NAME,SNO
+    mapdf = metrnd[['ID', 'NAME', 'SNO']]
+    mapdf.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/psdnmyz_map.csv', index=False)
+    # drop ID and SNO
+    seg_df = seg_df.drop(['ID', 'SNO'], axis=1)
+    metrnd = metrnd.drop(['ID', 'SNO'], axis=1)
+    # move NAME column to first position
+    metrnd = metrnd[['NAME', 'mean_skel_MD_LH_RH', 'sd_skel_MD_LH_RH', 'Pw90S_skel_MD_LH_RH', 'mean_skel_FA_LH_RH',
+                     'sd_skel_FA_LH_RH', 'mean_skel_AD_LH_RH', 'sd_skel_AD_LH_RH', 'mean_skel_RD_LH_RH',
+                     'sd_skel_RD_LH_RH']]
+    seg_df = seg_df[['NAME', 'AGE', 'SEX', 'GMV', 'WMV', 'CSFV', 'ICV']]
+    # if pd.concat(g for _, g in metrics_df.groupby("RNDNAME") if len(g) > 1).RNDNAME.values.size:
+    #    print 'NOT OK'
+    # else:
+    #    print 'OK'
+    metrnd.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/TOTAL_METRICS_Skel_header.csv', index=False)
+    seg_df.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/psmd_seg_vols.csv', index=False)
+
+
+def psdnmyz_3():
+    # load TWO csv to be sent to be pseudonymz
+    # metrics_df=pd.read_csv('/home/arasan/testrep/psmd/jureca/TOTAL_METRICS_Skel_header.csv')
+    seg_df = pd.read_csv('/home/arasan/testrep/psmd/jureca/psmd_seg2_vols.csv')
+    # add rnadom id column to both df
+    # below line is a disaster
+    # metrics_df['RNDNAME'] = metrics_df['NAME'].apply(lambda x: gocept.pseudonymize.integer(x, 'secret'))
+    # seg_df['RNDNAME'] = seg_df['NAME'].apply(lambda x: gocept.pseudonymize.integer(x, 'secret'))
+    #    a=np.random.randint(100000,999999,metrics_df.NAME.values.size)
+    #    metrics_df['RNDNAME']=a
+    #    print 'after rqndom id has been added'
+    #    flagg=True
+    #    while(flagg):
+    #        try:
+    #            print pd.concat(g for _, g in metrics_df.groupby("RNDNAME") if len(g) > 1)
+    #        except ValueError:
+    #            print 'NO DUPLICAtes'
+    #            metrics_df.to_csv('/home/arasan/testrep/psmd/jureca/TOTAL_rnd_temp.csv')
+    #            flagg=False
+    #        else:
+    #            print 'DUPES'
+    #            metrics_df=metrics_df.drop('RNDNAME', axis=1)
+    #            a=np.random.randint(100000,999999,metrics_df.NAME.values.size)
+    #            metrics_df['RNDNAME']=a
+    # load double chekced randomeized df 1) above try catch 2) using np unique
+    metrnd = pd.read_csv('/home/arasan/testrep/psmd/jureca/TOTAL_rnd_temp.csv')
+    seg_df['SNO'] = seg_df.index + 1
+    # metrnd['SNO']=seg_df.index+1
+    # add RNDAME column to seg_df
+    seg_df['RNDNAME'] = metrnd.RNDNAME.values
+    # rename columns NANME to ID and RNDNAME to NAME
+    # seg_df=seg_df.rename(index=str, columns={"NAME": "ID"})
+    seg_df = seg_df.rename(index=str, columns={"RNDNAME": "NAME"})
+    # metrnd=metrnd.rename(index=str, columns={"NAME": "ID"})
+    # metrnd=metrnd.rename(index=str, columns={"RNDNAME": "NAME"})
+    # dump map out with 3 columns ID,NAME,SNO
+    # mapdf=metrnd[['ID','NAME','SNO']]
+    # mapdf.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/psdnmyz_map.csv',index=False)
+    # drop ID and SNO
+    seg_df = seg_df.drop(['ID', 'SNO'], axis=1)
+    # metrnd=metrnd.drop(['ID','SNO'],axis=1)
+    # move NAME column to first position
+    # metrnd=metrnd[['NAME','mean_skel_MD_LH_RH','sd_skel_MD_LH_RH','Pw90S_skel_MD_LH_RH','mean_skel_FA_LH_RH','sd_skel_FA_LH_RH','mean_skel_AD_LH_RH','sd_skel_AD_LH_RH','mean_skel_RD_LH_RH','sd_skel_RD_LH_RH']]
+    seg_df = seg_df[['NAME', 'AGE', 'SEX', 'ICV']]
+    # if pd.concat(g for _, g in metrics_df.groupby("RNDNAME") if len(g) > 1).RNDNAME.values.size:
+    #    print 'NOT OK'
+    # else:
+    #    print 'OK'
+    # metrnd.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/TOTAL_METRICS_Skel_header.csv',index=False)
+    seg_df.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet3/psmd_seg2_vols.csv', index=False)
+
+
+def hashPseudonym(self, i, key, tile):
+    digest = hashes.Hash(hashes.SHA256(), default_backend())
+    # for i in range (0,len(plainTail)):    # {
+    _digest = digest.copy()
+    # key = secrets.token_bytes(32)
+    _digest.update(bytes(i))
+    _digest.update(key)
+    _digest.update(bytes(tile))
+    p = _digest.finalize()  # }
+    # digest.finalize()
+    return p
+
+
+def test_localization_of_pseudonym(self):
+    name = b" a 16 byte name "
+    target = b"PEP3 storage_facility"
+
+    pp = pep3_pb2.Pseudonymizable(data=name,
+                                  state=pep3_pb2.Pseudonymizable.UNENCRYPTED_NAME)
+
+    self.collector.pseudonymize([pp])
+    self.collector.relocalize([pp],
+                              self.config.collector.warrants.to_sf)
+
+    sfp = elgamal.Triple.unpack(pp.data) \
+        .decrypt(self.sf.private_keys['pseudonym'])
+
+    pseudonym_secrets = {}
+    for peer_secrets in self.secrets.peers.values():
+        for shard, shard_secrets in peer_secrets.by_shard.items():
+            pseudonym_secrets[shard] \
+                = shard_secrets.pseudonym_component_secret
+
+    s = 1
+    e = ed25519.scalar_unpack(common.sha256(target))
+    for secret in pseudonym_secrets.values():
+        s *= pow(ed25519.scalar_unpack(secret), e, ed25519.l)
+        s %= ed25519.l
+
+    self.assertEqual(
+        sfp * ed25519.scalar_inv(s),
+        ed25519.Point.lizard(name))
+
+
+def test_store_and_retrieve(self):
+    # first store a record with random source and target ip addresses,
+    # and see if we can recover it.
+    col_request = pep3_pb2.StoreRequest()
+    col_request.id = os.urandom(16)
+
+    flowrecord = col_request.records.add()
+    flowrecord.source_ip.data = os.urandom(16)
+    flowrecord.source_ip.state = pep3_pb2.Pseudonymizable.UNENCRYPTED_NAME
+    flowrecord.destination_ip.data = os.urandom(16)
+    flowrecord.destination_ip.state = \
+        pep3_pb2.Pseudonymizable.UNENCRYPTED_NAME
+
+    flowrecord.anonymous_part.number_of_bytes = 123
+    flowrecord.anonymous_part.number_of_packets = 456
+
+    updates = list(self.collector.connect_to('collector').Store(
+        iter([col_request])))
+    self.assertEqual(len(updates), 1)
+    self.assertEqual(updates[0].stored_id, col_request.id)
+
+    # store the same flowrecord twice, to see if that causes troubles
+    col_request.id = os.urandom(16)
+    updates = list(self.collector.connect_to('collector').Store(
+        iter([col_request])))
+    self.assertEqual(len(updates), 1)
+    self.assertEqual(updates[0].stored_id, col_request.id)
+
+    query = pep3_pb2.SqlQuery()
+
+    # manually compute storage_facility-local pseudonyms for query
+    sf_name = b"PEP3 storage_facility"
+
+    pseudonym_secrets = {}
+    for peer_secrets in self.secrets.peers.values():
+        for shard, shard_secrets in peer_secrets.by_shard.items():
+            pseudonym_secrets[shard] \
+                = shard_secrets.pseudonym_component_secret
+
+    s = 1
+    e = ed25519.scalar_unpack(common.sha256(sf_name))
+    for secret in pseudonym_secrets.values():
+        s *= pow(ed25519.scalar_unpack(secret), e, ed25519.l)
+        s %= ed25519.l
+
+    # see if the record was stored correctly by querying the
+    # database directly.
+    query.query = """SELECT peped_flows.p_dst_ip FROM peped_flows
+            WHERE peped_flows.p_src_ip=:ip"""
+    ip = query.parameters['ip'].pseudonymizable_value
+    ip.data = (ed25519.Point.lizard(
+        flowrecord.source_ip.data) * s).pack()
+    ip.state = pep3_pb2.Pseudonymizable.UNENCRYPTED_PSEUDONYM
+
+    row = self.sf.connect_to('database') \
+        .Query(query).next().rows[0]
+
+    self.assertEqual(row.cells[0].pseudonymizable_value.data,
+                     (ed25519.Point.lizard(flowrecord.destination_ip.data) * s
+                      ).pack())
+
+    # manually compute researcher-local pseudonyms for query
+    researcher_name = b"PEP3 researcher"
+
+    pseudonym_secrets = {}
+    for peer_secrets in self.secrets.peers.values():
+        for shard, shard_secrets in peer_secrets.by_shard.items():
+            pseudonym_secrets[shard] \
+                = shard_secrets.pseudonym_component_secret
+
+    s = 1
+    e = ed25519.scalar_unpack(common.sha256(researcher_name))
+    for secret in pseudonym_secrets.values():
+        s *= pow(ed25519.scalar_unpack(secret), e, ed25519.l)
+        s %= ed25519.l
+
+    # now query via the researcher
+    query.parameters['ip'].pseudonymizable_value.data \
+        = (ed25519.Point.lizard(flowrecord.source_ip.data) * s).pack()
+
+    row = self.researcher.connect_to('researcher') \
+        .Query(query).next().rows[0]
+
+    self.assertEqual(row.cells[0].pseudonymizable_value.data,
+                     (ed25519.Point.lizard(flowrecord.destination_ip.data) * s
+                      ).pack())
+
+
+def test_depseudonymize(self):
+    ip = os.urandom(16)
+
+    # manually compute investigator-local pseudonym
+    pseudonym_secrets = {}
+    for peer_secrets in self.secrets.peers.values():
+        for shard, shard_secrets in peer_secrets.by_shard.items():
+            pseudonym_secrets[shard] \
+                = shard_secrets.pseudonym_component_secret
+
+    s = 1
+    e = ed25519.scalar_unpack(common.sha256(b"PEP3 investigator"))
+    for secret in pseudonym_secrets.values():
+        s *= pow(ed25519.scalar_unpack(secret), e, ed25519.l)
+        s %= ed25519.l
+
+    investigator_local_ip = (ed25519.Point.lizard(ip) * s).pack()
+
+    # manually create warrant
+    warrant = pep3_pb2.DepseudonymizationRequest.Warrant()
+    warrant.act.actor = b"PEP3 investigator"
+    warrant.act.name.state = pep3_pb2.Pseudonymizable.UNENCRYPTED_PSEUDONYM
+    warrant.act.name.data = investigator_local_ip
+
+    self.investigator.encrypt([warrant.act.name],
+                              self.investigator.public_keys['pseudonym'])
+
+    warrant.signature = crypto.sign(
+        crypto.load_privatekey(crypto.FILETYPE_PEM,
+                               self.secrets.root_certificate_keys.warrants),
+        warrant.act.SerializeToString(), 'sha256')
+
+    result = self.investigator.connect_to("investigator") \
+        .Depseudonymize(warrant)
+
+    self.assertEqual(result.data, ip)
+
+
+def anonymize(cls, user, ldap_attrs, **kwargs):
+    # type: (User, Dict[AnyStr, Any], **Any) -> Dict[AnyStr, AnyStr]
+    """
+    Change values of function arguments to anonymize/pseudonymize user if
+    UCRV asm/attributes/<staff/student>/anonymize is true. Will return
+    unchanged function arguments otherwise.
+    :param User user: user object
+    :param dict ldap_attrs: dictionary with the users LDAP attributes
+    :return: dictionary with [modified] function arguments
+    :rtype: dict
+    :raises NotImplementedError: if cls.ucr_anonymize_key_base is unset
+    """
+    ucr = get_ucr()
+    if ucr.is_true(cls.ucr_anonymize_key_base):
+        for k, v in cls.anonymize_mapping().items():
+            if v and v.startswith('%'):
+                attr = v[1:].strip()
+                try:
+                    v = ldap_attrs[attr][0]
+                except KeyError:
+                    raise ValueError('Attribute {!r} not found in LDAP object of {}.'.format(attr, user))
+                except IndexError:
+                    raise ValueError('Attribute {!r} empty in LDAP object of {}.'.format(attr, user))
+            kwargs[k] = v
+    return kwargs
+
+
+def _modify_dataset(
+        self,
+        anonymizer: Anonymizer,
+        pseudonym: str,
+        ds: Dataset,
+) -> None:
+    """Optionally pseudonymize an incoming dataset with the given pseudonym
+    and add the trial ID and name to the DICOM header if specified."""
+    if pseudonym:
+        # All dates get pseudonymized, but we want to retain the study date.
+        study_date = ds.StudyDate
+
+        anonymizer.anonymize(ds)
+
+        ds.StudyDate = study_date
+
+        ds.PatientID = pseudonym
+        ds.PatientName = pseudonym
+
+    trial_protocol_id = (self.transfer_task.job.trial_protocol_id,)
+    trial_protocol_name = self.transfer_task.job.trial_protocol_name
+
+    if trial_protocol_id:
+        ds.ClinicalTrialProtocolID = trial_protocol_id
+
+    if trial_protocol_name:
+        ds.ClinicalTrialProtocolName = trial_protocol_name
+
+    if pseudonym and trial_protocol_id:
+        session_id = f"{ds.StudyDate}-{ds.StudyTime}"
+        ds.PatientComments = f"Project:{trial_protocol_id} Subject:{pseudonym} Session:{pseudonym}_{session_id}"
+
+
+def _psc1(psc1, psc2_from_psc1):
+    if 'TEST' in psc1.upper():
+        # skip test subjects
+        logging.debug('skipping test subject "%s"', psc1)
+    else:
+        # find and skip subjects with invalid identifier
+        if psc1[-3:] in {'FU2', 'FU3'}:
+            psc1 = psc1[:-3]
+        elif psc1[-2:] == 'SB':
+            psc1 = psc1[:-2]
+        if psc1 in psc2_from_psc1:
+            return psc1
+        elif psc1 in {'0x0000xxxxxx'}:
+            logging.info('skipping known invalid subject identifier "%s"',
+                         psc1)
+        else:
+            logging.error('invalid subject identifier "%s"', psc1)
+    return None
+
+
+def pseudonymize_node_name(name):
+    """Replace Node.Name (detector ID) by a hash with secret key"""
+    h = hashlib.md5((app.secret_key + name).encode('utf-8'))
+    return 'node.' + h.hexdigest()[:6]
+
+
+def pseudonymize(self, size=None):
+    """
+    Return pseudonymized values for this attribute, which is used to
+    substitute identifiable data with a reversible, consistent value.
+    """
+    size = size or self.size
+    if size != self.size:
+        attr = Series(np.random.choice(self.bins, size=size, p=self.prs))
+    else:
+        attr = self
+    if self.categorical:
+        mapping = {b: utils.pseudonymise_string(b) for b in self.bins}
+        return attr.map(lambda x: mapping[x])
+
+    if self.type == 'string':
+        return attr.map(utils.pseudonymise_string)
+    elif self.is_numerical or self.type == 'datetime':
+        return attr.map(str).map(utils.pseudonymise_string)
+
+
+def pseudonymize(self, content):
+    if not content: return content
+    content_modified = ''
+    start = 0
+    for mo in re.finditer("\[\*\*[^\[]*\*\*\]", content):
+        replacement = self.mapper.get_mapping(mo.group(0))
+        content_modified += content[start: mo.start()]
+        content_modified += replacement
+        start = mo.end()
+
+    if start < len(content):
+        content_modified += content[start: len(content)]
+    return content_modified

+ 184 - 0
Pseudonym/process_file.py

@@ -0,0 +1,184 @@
+import textract
+from docx import Document
+import re
+from string import ascii_uppercase
+from flair.data import Sentence
+import dash_html_components as html
+import run.tokenization as tkz
+import itertools
+
+
+def load_text(doc_path):
+    return textract.process(doc_path, encoding='utf-8').decode("utf8").replace("|", "\t")
+
+
+def flair_predict_tags(text, tagger):
+    """ Predict using flair tagger"""
+    sentence = Sentence(text)
+    tagger.predict(sentence)
+    return sentence
+
+
+def build_pseudonymisation_map_flair(sentences, pseudos, acceptance_score, tags="all"):
+    """
+    Gets all replacements to be made in pseudonimized text using flair tagged sentences
+
+    :param sentences: list of tuples (flair tagged sentences, original)
+    :param pseudos: list of pseudos to be used
+    :param acceptance_score: minimum confidence score to accept NER tag
+    :return: dict: keys are spans in sentence, values are pseudos
+    """
+
+    replacements = {}
+    mapping = {}
+    for sentence in sentences:
+        for entity in sentence[0].get_spans('ner'):
+            if entity.score > acceptance_score and entity.tag != '0' and (entity.tag in tags or tags == "all"):
+                # ajouter le score en param
+
+                # TODO refaire la gestion des B et I tags
+                for token in entity.tokens:
+                    if token.text.lower() not in mapping:
+                        mapping[token.text.lower()] = pseudos.pop(0)
+
+                    replacements[sentence[1][token.idx - 1]] = mapping[token.text.lower()]
+
+    return replacements
+
+
+def pseudonymize_text(replacements, text):
+    """ Create new text with pseudos in place of NER """
+    index = 0
+    pseudonymized_text = ''
+    for key in sorted(replacements.keys()):
+        chunk = text[index:key[0]]
+        # print(text[key[0]:key[1]])
+        pseudonymized_text += chunk
+        pseudonymized_text += replacements[key]
+        index = key[1]
+
+    pseudonymized_text += text[index:]
+
+    return pseudonymized_text
+
+
+def pseudonymize_html_text(replacements, text):
+    """ Create html blocs with pseudos in place of NER for dash tool"""
+
+    index = 0
+    pseudonymized_text = ''
+    for key in sorted(replacements.keys()):
+        chunk = text[index:key[0]]
+        # print(text[key[0]:key[1]])
+        pseudonymized_text += chunk
+        pseudonymized_text += "<ano>" + replacements[key] + "</ano>"
+        index = key[1]
+
+    pseudonymized_text += text[index:]
+
+    return pseudonymized_text
+
+
+def write_docx_file(text, path):
+    """Write pseudonimized file to docx"""
+    document = Document()
+    paragraph = document.add_paragraph(text)
+    document.save(path)
+
+
+def create_html_file(text, sent_tokenizer, word_tokenizer, tagger, acceptance_score=0.5):
+    """ Create HMTL files for the Dash tool """
+    singles = ["{}...".format(letter) for letter in ascii_uppercase]
+    doubles = ["{}{}...".format(d[0], d[1]) for d in list(itertools.combinations(ascii_uppercase, 2))]
+    pseudos = singles + doubles
+    sentences = tkz.tokenize_text(text, sent_tokenizer, word_tokenizer)
+
+    tagged_sentences = []
+    for sentence in sentences:
+        pseudo_sentence = " ".join([text[word[0]: word[1]] for word in sentence])
+        tagged_sentence = flair_predict_tags(pseudo_sentence, tagger)
+        tagged_sentences.append((tagged_sentence, sentence))
+
+    replacements = build_pseudonymisation_map_flair(tagged_sentences, pseudos, acceptance_score)
+    pseudonymized_text = pseudonymize_html_text(replacements, text)
+
+    html_text = []
+
+    for p in pseudonymized_text.split("\n"):
+        html_text.append(highlight_pseudo(p))
+
+    return html_text
+
+
+def highlight_pseudo(paragraph):
+    """ Hghlight pseudonymized text for Dash tool """
+    index = 0
+
+    new_str = []
+    for change in re.finditer('<ano>(.*?)</ano>', paragraph):
+        b = change.start(0)
+        e = change.end(0)
+        new_str.append(paragraph[index:b])
+        new_str.append(html.Mark(change.group(1), style={'color': 'blue'}))
+        index = e
+
+    new_str.append(paragraph[index:])
+
+    return html.P(new_str)
+
+
+def create_CoNLL(tagged_sentences, path):
+    """ Write CoNLL file """
+    with open(path, "w") as file:
+        for sent in tagged_sentences:
+            for token in sent[0]:
+                file.write(f"{token.text}\t{token.get_tag('ner').value}\n")
+
+
+def process_file(path, sent_tokenizer, word_tokenizer, tagger, acceptance_score=0.5, docx_path=False, CoNLL_path=False, tags="all"):
+    """
+    Pseudonymization of text file. Can create CoNLL file, HTML of docx. Only NLTK tokenizer supported for the moment
+    :param acceptance_score: minimum confidence score to accept ner tagging
+    :param tagger: A NER tagger
+    :param path: original file path
+    :param sent_tokenizer: Sentence tokenizer
+    :param word_tokenizer: Word tokenizer
+    :param docx_path: If path is given will write DOCX file
+    :param CoNLL_path: If path is given will write CoNLL file
+    :return: Pseudonymized text
+    """
+
+    text = load_text(path)
+    singles = ["{}...".format(letter) for letter in ascii_uppercase]
+    doubles = ["{}{}...".format(d[0], d[1]) for d in list(itertools.combinations(ascii_uppercase, 2))]
+    pseudos = singles + doubles
+
+    sentences = tkz.tokenize_text(text, sent_tokenizer, word_tokenizer)
+
+    tagged_sentences = []
+    for sentence in sentences:
+        pseudo_sentence = " ".join([text[word[0]: word[1]] for word in sentence])
+        tagged_sentence = flair_predict_tags(pseudo_sentence, tagger)
+        tagged_sentences.append((tagged_sentence, sentence))
+
+    if CoNLL_path:
+        create_CoNLL(tagged_sentences, CoNLL_path)
+
+    replacements = build_pseudonymisation_map_flair(tagged_sentences, pseudos, acceptance_score, tags)
+    pseudonymized_text = pseudonymize_text(replacements, text)
+
+    if docx_path:
+        write_docx_file(pseudonymized_text, docx_path)
+
+    return pseudonymized_text
+
+
+if __name__ == '__main__':
+    from nltk.tokenize import WordPunctTokenizer, PunktSentenceTokenizer
+    from flair.models import SequenceTagger
+
+    word_tokenizer = WordPunctTokenizer()
+    tagger = SequenceTagger.load('fr-ner')
+    sent_tokenizer = PunktSentenceTokenizer("nltk_data/tokenizers/punkt/french.pickle")
+    path = "path_to_doc"
+    process_file(path, sent_tokenizer, word_tokenizer, tagger, docx_path=False, CoNLL_path=False, tags=["PER"])

+ 17 - 0
Target/File/DataManagment_3.py

@@ -0,0 +1,17 @@
+def dump(self, save=False):
+        try:
+            if __name__ == '__main__':
+                f = open("/tmp/test", "wb")
+            else:
+                f = open(settings.SAVE_FILE, "wb")
+        except Exception:
+            # TODO : Custom exeption for saving error
+            raise Exception("Error while saving : data may be unavailable")
+        if self.save_file == None:
+            self.save_file = self.init_save()
+        pickle.dump(self.save_file, f)
+        f.close()
+
+        f = open('/tmp/bidule', 'w')
+        f.write(json.dumps(self.save_file))
+        f.close()

+ 19 - 0
Target/File/checkpoint_2.py

@@ -0,0 +1,19 @@
+def save(self, name, **kwargs):
+        if not self.save_dir:
+            return
+
+        if not self.save_to_disk:
+            return
+
+        data = {}
+        data["model"] = self.model.state_dict()
+        if self.optimizer is not None:
+            data["optimizer"] = self.optimizer.state_dict()
+        if self.scheduler is not None:
+            data["scheduler"] = self.scheduler.state_dict()
+        data.update(kwargs)
+
+        save_file = os.path.join(self.save_dir, "{}.pth".format(name))
+        self.logger.info("Saving checkpoint to {}".format(save_file))
+        torch.save(data, save_file)
+        self.tag_last_checkpoint(save_file)

+ 3 - 0
Target/File/checkpoint_4.py

@@ -0,0 +1,3 @@
+def has_checkpoint(self):
+        save_file = os.path.join(self.save_dir, "last_checkpoint")
+        return os.path.exists(save_file)

+ 4 - 0
Target/File/checkpoint_6.py

@@ -0,0 +1,4 @@
+def tag_last_checkpoint(self, last_filename):
+        save_file = os.path.join(self.save_dir, "last_checkpoint")
+        with open(save_file, "w") as f:
+            f.write(last_filename)

+ 2 - 0
Target/File/checkpoint_7.py

@@ -0,0 +1,2 @@
+def _load_file(self, f):
+        return torch.load(f, map_location=torch.device("cpu"))

+ 23 - 0
Target/File/file_1_3.py

@@ -0,0 +1,23 @@
+def save_dict_as_json(data: dict, path: str = os.getcwd(),
+                      filename: str = f'/temp_files/lay_name{random.randint(1, 1000)}') -> str:
+    if path.__contains__('.json'):
+        path_save = path
+    elif filename.__contains__('.json'):
+        path_save = filename
+    elif filename.__contains__('.'):
+        path_save = path + '/' + filename.split('/')[-1]
+    else:
+        path_save = path + f'/{filename}.json'
+    path_save.replace("\\", '/')
+    path_save.replace('//', '/')
+
+    try:
+        json_file = open(path_save, mode='x')
+    except FileNotFoundError:
+        os.mkdir(path_save.split('lay_name')[0])
+        json_file = open(path_save, mode='x')
+    except FileExistsError:
+        json_file = open(path_save, mode='w')
+    json.dump(data, json_file)
+    json_file.close()
+    return path_save

+ 8 - 0
Target/File/file_util_3.py

@@ -0,0 +1,8 @@
+def write_file(file_path, file_content):
+	if file_path.find('/') != -1:
+		father_dir = '/'.join(file_path.split('/')[0:-1])
+		if not os.path.exists(father_dir):
+			os.makedirs(father_dir)
+	file_object = open(file_path, 'w')
+	file_object.write(file_content)
+	file_object.close()

+ 7 - 0
Target/File/file_util_4.py

@@ -0,0 +1,7 @@
+def write_file_not_cover(file_path, file_content):
+	father_dir = '/'.join(file_path.split('/')[0:-1])
+	if not os.path.exists(father_dir):
+		os.makedirs(father_dir)
+	file_object = open(file_path, 'a')
+	file_object.write(file_content)
+	file_object.close()

+ 12 - 0
Target/File/files_13.py

@@ -0,0 +1,12 @@
+def save(self, name, content, save=True):
+        name = self.field.generate_filename(self.instance, name)
+        self.name = self.storage.save(name, content)
+        setattr(self.instance, self.field.name, self.name)
+
+        # Update the filesize cache
+        self._size = content.size
+        self._committed = True
+
+        # Save the object because it has changed, unless save is False
+        if save:
+            self.instance.save()

+ 7 - 0
Target/File/files_25.py

@@ -0,0 +1,7 @@
+def pre_save(self, model_instance, add):
+        "Returns field's value just before saving."
+        file = super(FileField, self).pre_save(model_instance, add)
+        if file and not file._committed:
+            # Commit the file to storage prior to saving the model
+            file.save(file.name, file, save=False)
+        return file

+ 33 - 0
Target/File/filesystem_13.py

@@ -0,0 +1,33 @@
+def writeFile(self, localFile, remoteFile, fileType=None, forceCheck=False):
+        written = False
+
+        checkFile(localFile)
+
+        self.checkDbmsOs()
+
+        if localFile.endswith('_'):
+            localFile = decloakToTemp(localFile)
+
+        if conf.direct or isStackingAvailable():
+            if isStackingAvailable():
+                debugMsg = "going to upload the file '%s' with " % fileType
+                debugMsg += "stacked query SQL injection technique"
+                logger.debug(debugMsg)
+
+            written = self.stackedWriteFile(localFile, remoteFile, fileType, forceCheck)
+            self.cleanup(onlyFileTbl=True)
+        elif isTechniqueAvailable(PAYLOAD.TECHNIQUE.UNION) and Backend.isDbms(DBMS.MYSQL):
+            debugMsg = "going to upload the file '%s' with " % fileType
+            debugMsg += "UNION query SQL injection technique"
+            logger.debug(debugMsg)
+
+            written = self.unionWriteFile(localFile, remoteFile, fileType, forceCheck)
+        else:
+            errMsg = "none of the SQL injection techniques detected can "
+            errMsg += "be used to write files to the underlying file "
+            errMsg += "system of the back-end %s server" % Backend.getDbms()
+            logger.error(errMsg)
+
+            return None
+
+        return written

+ 35 - 0
Target/File/hash_1_16.py

@@ -0,0 +1,35 @@
+def storeHashesToFile(attack_dict):
+    if not attack_dict:
+        return
+
+    if kb.storeHashesChoice is None:
+        message = "do you want to store hashes to a temporary file "
+        message += "for eventual further processing with other tools [y/N] "
+        test = readInput(message, default="N")
+        kb.storeHashesChoice = test[0] in ("y", "Y")
+
+    if not kb.storeHashesChoice:
+        return
+
+    handle, filename = tempfile.mkstemp(prefix=MKSTEMP_PREFIX.HASHES, suffix=".txt")
+    os.close(handle)
+
+    infoMsg = "writing hashes to a temporary file '%s' " % filename
+    logger.info(infoMsg)
+
+    items = set()
+
+    with open(filename, "w+") as f:
+        for user, hashes in attack_dict.items():
+            for hash_ in hashes:
+                hash_ = hash_.split()[0] if hash_ and hash_.strip() else hash_
+                if hash_ and hash_ != NULL and hashRecognition(hash_):
+                    item = None
+                    if user and not user.startswith(DUMMY_USER_PREFIX):
+                        item = "%s:%s\n" % (user.encode(UNICODE_ENCODING), hash_.encode(UNICODE_ENCODING))
+                    else:
+                        item = "%s\n" % hash_.encode(UNICODE_ENCODING)
+
+                    if item and item not in items:
+                        f.write(item)
+                        items.add(item)

+ 12 - 0
Target/File/preprocess_2.py

@@ -0,0 +1,12 @@
+def create_hdf5(img_data, t2_data, img_label, save_path):
+	assert img_data.shape == img_label.shape, 'shape of data and label must be the same..'
+	f = h5py.File(save_path, "w")
+	dset = f.create_dataset("t1data", img_data.shape, dtype=np.int16)
+	tset = f.create_dataset("t2data", t2_data.shape, dtype=np.int16)
+	lset = f.create_dataset("label", img_data.shape, dtype=np.uint8)
+
+	dset[...] = img_data
+	lset[...] = img_label
+	tset[...] = t2_data
+	print('saved hdf5 file in %s' % (save_path, ))
+	f.close()

+ 19 - 0
Target/File/preprocess_6.py

@@ -0,0 +1,19 @@
+def generate_file_list():
+	# if os.pa
+	file_list = glob.glob('%s/*.h5' %(FLAGS.hdf5_dir,))
+	file_list.sort()
+	with open(FLAGS.hdf5_list_path, 'w') as _file:
+		for _file_path in file_list:
+			_file.write(_file_path)
+			_file.write('\n')
+
+
+	with open(FLAGS.hdf5_train_list_path, 'w') as _file:
+		for _file_path in file_list[8:]:
+			_file.write(_file_path)
+			_file.write('\n')
+
+	with open(FLAGS.hdf5_validation_list_path, 'w') as _file:
+		for _file_path in file_list[0:8]:
+			_file.write(_file_path)
+			_file.write('\n')

+ 40 - 0
Target/File/transform_cuhk03_1.py

@@ -0,0 +1,40 @@
+def save_images(mat_file, save_dir, new_im_name_tmpl):
+  def deref(mat, ref):
+    return mat[ref][:].T
+
+  def dump(mat, refs, pid, cam, im_dir):
+    """Save the images of a person under one camera."""
+    for i, ref in enumerate(refs):
+      im = deref(mat, ref)
+      if im.size == 0 or im.ndim < 2: break
+      fname = new_im_name_tmpl.format(pid, cam, i)
+      imsave(osp.join(im_dir, fname), im)
+
+  mat = h5py.File(mat_file, 'r')
+  labeled_im_dir = osp.join(save_dir, 'labeled/images')
+  detected_im_dir = osp.join(save_dir, 'detected/images')
+  all_im_dir = osp.join(save_dir, 'all/images')
+
+  may_make_dir(labeled_im_dir)
+  may_make_dir(detected_im_dir)
+  may_make_dir(all_im_dir)
+
+  # loop through camera pairs
+  pid = 0
+  for labeled, detected in zip(mat['labeled'][0], mat['detected'][0]):
+    labeled, detected = deref(mat, labeled), deref(mat, detected)
+    assert labeled.shape == detected.shape
+    # loop through ids in a camera pair
+    for i in range(labeled.shape[0]):
+      # We don't care about whether different persons are under same cameras,
+      # we only care about the same person being under different cameras or not.
+      dump(mat, labeled[i, :5], pid, 0, labeled_im_dir)
+      dump(mat, labeled[i, 5:], pid, 1, labeled_im_dir)
+      dump(mat, detected[i, :5], pid, 0, detected_im_dir)
+      dump(mat, detected[i, 5:], pid, 1, detected_im_dir)
+      dump(mat, chain(detected[i, :5], labeled[i, :5]), pid, 0, all_im_dir)
+      dump(mat, chain(detected[i, 5:], labeled[i, 5:]), pid, 1, all_im_dir)
+      pid += 1
+      if pid % 100 == 0:
+        sys.stdout.write('\033[F\033[K')
+        print('Saving images {}/{}'.format(pid, 1467))

+ 16 - 0
Target/File/utils_1.py

@@ -0,0 +1,16 @@
+def save_checkpoint(state, is_best, file_path, file_name='checkpoint.pth.tar'):
+    """
+    Saves the current state of the model. Does a copy of the file
+    in case the model performed better than previously.
+
+    Parameters:
+        state (dict): Includes optimizer and model state dictionaries.
+        is_best (bool): True if model is best performing model.
+        file_path (str): Path to save the file.
+        file_name (str): File name with extension (default: checkpoint.pth.tar).
+    """
+
+    save_path = os.path.join(file_path, file_name)
+    torch.save(state, save_path)
+    if is_best:
+        shutil.copyfile(save_path, os.path.join(file_path, 'model_best.pth.tar'))

+ 11 - 0
Target/File/utils_2.py

@@ -0,0 +1,11 @@
+def save_task_checkpoint(file_path, task_num):
+    """
+    Saves the current state of the model for a given task by copying existing checkpoint created by the
+    save_checkpoint function.
+
+    Parameters:
+        file_path (str): Path to save the file,
+        task_num (int): Number of task increment.
+    """
+    save_path = os.path.join(file_path, 'checkpoint_task_' + str(task_num) + '.pth.tar')
+    shutil.copyfile(os.path.join(file_path, 'checkpoint.pth.tar'), save_path)

+ 5 - 0
Target/Hash/03-HT-Get_2.py

@@ -0,0 +1,5 @@
+def __hash(self, key):
+        my_hash = 0
+        for letter in key:
+            my_hash = (my_hash + ord(letter) * 23) % len(self.data_map)
+        return my_hash

+ 26 - 0
Target/Hash/EncrypC_3.py

@@ -0,0 +1,26 @@
+def encrypt(self):
+
+        # create a cipher object
+
+        cipher_object = AES.new(
+            self.hashed_key_salt["key"], AES.MODE_CFB, self.hashed_key_salt["salt"]
+        )
+
+        self.abort()  # if the output file already exists, remove it first
+
+        input_file = open(self.user_file, "rb")
+        output_file = open(self.encrypt_output_file, "ab")
+        done_chunks = 0
+
+        for piece in self.read_in_chunks(input_file, self.chunk_size):
+            encrypted_content = cipher_object.encrypt(piece)
+            output_file.write(encrypted_content)
+            done_chunks += 1
+            yield done_chunks / self.total_chunks * 100
+
+        input_file.close()
+        output_file.close()
+
+        # clean up the cipher object
+
+        del cipher_object

+ 29 - 0
Target/Hash/EncrypC_6.py

@@ -0,0 +1,29 @@
+def hash_key_salt(self):
+
+        # --- convert key to hash
+        #  create a new hash object
+
+        hasher = hashlib.new(self.hash_type)
+        hasher.update(self.user_key)
+
+        # turn the output key hash into 32 bytes (256 bits)
+
+        self.hashed_key_salt["key"] = bytes(hasher.hexdigest()[:32], "utf-8")
+
+        # clean up hash object
+
+        del hasher
+
+        # --- convert salt to hash
+        #  create a new hash object
+
+        hasher = hashlib.new(self.hash_type)
+        hasher.update(self.user_salt)
+
+        # turn the output salt hash into 16 bytes (128 bits)
+
+        self.hashed_key_salt["salt"] = bytes(hasher.hexdigest()[:16], "utf-8")
+
+        # clean up hash object
+
+        del hasher

+ 20 - 0
Target/Hash/EncryptionDecryption_2.py

@@ -0,0 +1,20 @@
+def encrypt(self, key, filename):
+        chunksize = 128 * 1024
+        outFile = os.path.join(os.path.dirname(filename), "(Secured)" + os.path.basename(filename))
+        filesize = str(os.path.getsize(filename)).zfill(16)
+        IV = Random.new().read(AES.block_size)
+        print(IV, len(IV))
+        encryptor = AES.new(key, AES.MODE_CBC, IV)
+
+        with open(filename, "rb") as infile:
+            with open(outFile, "wb") as outfile:
+                outfile.write(filesize.encode('utf-8'))
+                outfile.write(IV)
+                while True:
+                    chunk = infile.read(chunksize)
+                    if len(chunk) == 0:
+                        break
+                    elif len(chunk) % 16 != 0:
+                        chunk += b' ' * (16 - (len(chunk) % 16))
+                    outfile.write(encryptor.encrypt(chunk))
+        return outFile

+ 8 - 0
Target/Hash/GeneralHashFunctions_1.py

@@ -0,0 +1,8 @@
+def rs_hash(key):
+    a = 378551
+    b = 63689
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = hash_value * a + ord(key[i])
+        a = a * b
+    return hash_value

+ 7 - 0
Target/Hash/GeneralHashFunctions_10.py

@@ -0,0 +1,7 @@
+def fnv_hash(key):
+    fnv_prime = 0x811C9DC5
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value *= fnv_prime
+        hash_value ^= ord(key[i])
+    return hash_value

+ 8 - 0
Target/Hash/GeneralHashFunctions_11.py

@@ -0,0 +1,8 @@
+def ap_hash(key):
+    hash_value = 0xAAAAAAAA
+    for i in range(len(key)):
+        if (i & 1) == 0:
+            hash_value ^= ((hash_value << 7) ^ ord(key[i]) * (hash_value >> 3))
+        else:
+            hash_value ^= (~((hash_value << 11) + ord(key[i]) ^ (hash_value >> 5)))
+    return hash_value

+ 5 - 0
Target/Hash/GeneralHashFunctions_2.py

@@ -0,0 +1,5 @@
+def js_hash(key):
+    hash_value = 1315423911
+    for i in range(len(key)):
+        hash_value ^= ((hash_value << 5) + ord(key[i]) + (hash_value >> 2))
+    return hash_value

+ 14 - 0
Target/Hash/GeneralHashFunctions_3.py

@@ -0,0 +1,14 @@
+def pjw_hash(key):
+    bits_in_unsigned_int = 4 * 8
+    three_quarters = (bits_in_unsigned_int * 3) / 4
+    one_eighth = bits_in_unsigned_int / 8
+    high_bits = 0xFFFFFFFF << int(bits_in_unsigned_int - one_eighth)
+    hash_value = 0
+    test = 0
+
+    for i in range(len(key)):
+        hash_value = (hash_value << int(one_eighth)) + ord(key[i])
+        test = hash_value & high_bits
+    if test != 0:
+        hash_value = ((hash_value ^ (test >> int(three_quarters))) & (~high_bits))
+    return hash_value & 0x7FFFFFFF

+ 9 - 0
Target/Hash/GeneralHashFunctions_4.py

@@ -0,0 +1,9 @@
+def elf_hash(key):
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = (hash_value << 4) + ord(key[i])
+        x = hash_value & 0xF0000000
+        if x != 0:
+            hash_value ^= (x >> 24)
+        hash_value &= ~x
+    return hash_value

+ 6 - 0
Target/Hash/GeneralHashFunctions_5.py

@@ -0,0 +1,6 @@
+def bkdr_hash(key):
+    seed = 131  # 31 131 1313 13131 131313 etc..
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = (hash_value * seed) + ord(key[i])
+    return hash_value

+ 5 - 0
Target/Hash/GeneralHashFunctions_6.py

@@ -0,0 +1,5 @@
+def sdbm_hash(key):
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = ord(key[i]) + (hash_value << 6) + (hash_value << 16) - hash_value;
+    return hash_value

+ 5 - 0
Target/Hash/GeneralHashFunctions_7.py

@@ -0,0 +1,5 @@
+def djb_hash(key):
+    hash_value = 5381
+    for i in range(len(key)):
+        hash_value = ((hash_value << 5) + hash_value) + ord(key[i])
+    return hash_value

+ 5 - 0
Target/Hash/GeneralHashFunctions_8.py

@@ -0,0 +1,5 @@
+def dek_hash(key):
+    hash_value = len(key);
+    for i in range(len(key)):
+        hash_value = ((hash_value << 5) ^ (hash_value >> 27)) ^ ord(key[i])
+    return hash_value

+ 5 - 0
Target/Hash/GeneralHashFunctions_9.py

@@ -0,0 +1,5 @@
+def bp_hash(key):
+    hash_value = 0
+    for i in range(len(key)):
+        hash_value = hash_value << 7 ^ ord(key[i])
+    return hash_value

+ 6 - 0
Target/Hash/Reducible_2.py

@@ -0,0 +1,6 @@
+def hash_word(s, size):
+    hash_idx = 0
+    for j in range(len(s)):
+        letter = ord(s[j]) - 96
+        hash_idx = (hash_idx * 26 + letter) % size
+    return hash_idx

+ 10 - 0
Target/Hash/base64_2.py

@@ -0,0 +1,10 @@
+def encryptFile():
+    myFile = input("enter file to encrypt: ")
+    file = open(myFile,"r")
+    contents = file.read()
+    contents = contents.encode()
+    file = open(myFile, "w")
+    encoded = base64.b64encode(contents)
+    # the .decode() converts the bytes to str, taking off the b'...'
+    file.write(str(encoded))
+    print ("File is now encrypted... and the contents is unreadable")

+ 14 - 0
Target/Hash/base64_3.py

@@ -0,0 +1,14 @@
+def decryptMessage():
+    pwd = "N3VIQUJmZ2pyNDVkZDRvMzNkZmd0NzBkZzlLOWRmcjJ0NWhCdmRm"
+    key = base64.b64decode(pwd) #the decoded version of this is the key.
+    value = input("Enter the decryption key: ").encode()
+    if value == key:
+        time.sleep(1)
+        message = input("Enter the message to decode: ")
+        decoded = base64.b64decode(message)
+        print (decoded)
+        menu()
+        
+    else:
+        print("Decryption key is wrong.")
+        menu()

+ 5 - 0
Target/Hash/base64_4.py

@@ -0,0 +1,5 @@
+def encrypt():
+    password = input("Enter a message: ").encode()
+    encoded = base64.b64encode(password)
+    print (encoded.decode()) 
+    menu()

+ 3 - 0
Target/Hash/base64_5.py

@@ -0,0 +1,3 @@
+def hashing(password):
+    hash1 = hashlib.md5(str.encode(password)).hexdigest()
+    print ("your hashed password is:", hash1,"\n")

+ 12 - 0
Target/Hash/bignum_1.py

@@ -0,0 +1,12 @@
+def xhash(coeff, exp):
+    sign = 1
+    if coeff < 0:
+        sign = -1
+        coeff = -coeff
+    if exp >= 0:
+        exp_hash = pow(10, exp, _PyHASH_MODULUS)
+    else:
+        exp_hash = pow(_PyHASH_10INV, -exp, _PyHASH_MODULUS)
+    hash_ = coeff * exp_hash % _PyHASH_MODULUS
+    ans = hash_ if sign == 1 else -hash_
+    return -2 if ans == -1 else ans

+ 69 - 0
Target/Hash/biometry_hash_5.py

@@ -0,0 +1,69 @@
+def fp_search():
+        
+    """
+    PyFingerprint
+    Copyright (C) 2015 Bastian Raschke <bastian.raschke@posteo.de>
+    All rights reserved.
+
+    @author: Bastian Raschke <bastian.raschke@posteo.de>
+    """
+
+
+    ## Search for a finger
+    ##
+
+    ## Tries to initialize the sensor
+    try:
+        f = PyFingerprint('/dev/ttyUSB0', 57600, 0xFFFFFFFF, 0x00000000)
+
+        if ( f.verifyPassword() == False ):
+            raise ValueError('The given fingerprint sensor password is wrong!')
+
+    except Exception as e:
+        print('The fingerprint sensor could not be initialized!')
+        print('Exception message: ' + str(e))
+        exit(1)
+
+    ## Gets some sensor information
+    print('Currently stored templates: ' + str(f.getTemplateCount()))
+
+    ## Tries to search the finger and calculate hash
+    try:
+        print('Waiting for finger...')
+
+        ## Wait that finger is read
+        while ( f.readImage() == False ):
+            pass
+
+        ## Converts read image to characteristics and stores it in charbuffer 1
+        f.convertImage(0x01)
+
+        ## Searchs template
+        result = f.searchTemplate()
+
+        positionNumber = result[0]
+        accuracyScore = result[1]
+
+        if ( positionNumber == -1 ):
+            print('No match found!')
+            exit(0)
+        else:
+            print('Found template at position #' + str(positionNumber))
+            print('The accuracy score is: ' + str(accuracyScore))
+
+        ## OPTIONAL stuff
+        ##
+
+        ## Loads the found template to charbuffer 1
+        f.loadTemplate(positionNumber, 0x01)
+
+        ## Downloads the characteristics of template loaded in charbuffer 1
+        characterics = str(f.downloadCharacteristics(0x01))
+
+        ## Hashes characteristics of template
+        print('SHA-2 hash of template: ' + hashlib.sha256(characterics).hexdigest())
+
+    except Exception as e:
+        print('Operation failed!')
+        print('Exception message: ' + str(e))
+        exit(1)

+ 27 - 0
Target/Hash/biometry_hash_8.py

@@ -0,0 +1,27 @@
+def encrypt(key, filename):
+	chunksize = 64*1024
+	#print filename
+	#print "4th time: ", key
+	outputFile = "(encrypted)"+filename
+	filesize = str(os.path.getsize(filename)).zfill(16)
+	IV = ''
+
+	for i in range(16):
+		IV += chr(random.randint(0, 0xFF))
+
+	encryptor = AES.new(key, AES.MODE_CBC, IV)
+
+	with open(filename, 'rb') as infile:
+		with open(outputFile, 'wb') as outfile:
+			outfile.write(filesize)
+			outfile.write(IV)
+			
+			while True:
+				chunk = infile.read(chunksize)
+				
+				if len(chunk) == 0:
+					break
+				elif len(chunk) % 16 != 0:
+					chunk += ' ' * (16 - (len(chunk) % 16))
+
+				outfile.write(encryptor.encrypt(chunk))

+ 15 - 0
Target/Hash/crypto_11.py

@@ -0,0 +1,15 @@
+def __sec_key(self, data):
+        """ returns secret key and block id
+
+            Args
+                data: string
+        """
+        h = SHA256d(data)
+        if not self.__convergence_secret:
+            self.__warn_convergence()
+        else:
+            h.update(self.__convergence_secret)
+        key = h.digest()
+        del h
+        id = SHA256d(key).digest()
+        return key, id

+ 23 - 0
Target/Hash/crypto_13.py

@@ -0,0 +1,23 @@
+def decrypt(self, key, ciphertext, verify=False):
+        """ decrypt data with convergence encryption.
+        
+            Args
+                key: str, encryption key
+                cipher: str, ciphertext
+                verify: bool, verify decrypted data, default: False
+        
+            Returns
+                the plain text
+        """
+        plain = aes(key, ciphertext)
+        if verify:
+            h = SHA256d(plain)
+            if self.__convergence_secret:
+                h.update(self.__convergence_secret)
+            digest = h.digest()
+            # can verify only if convergence secret is known!
+            if self.__convergence_secret and not key == digest:
+                msg = "Block verification error on %s." % SHA256d(key).hexdigest()
+                log.error(msg)
+                raise CryptError(msg)
+        return plain

+ 17 - 0
Target/Hash/crypto_14.py

@@ -0,0 +1,17 @@
+def encrypt_key(key, nonce, data):
+    """ use "key" and "nonce" to generate a one time key and en-/decrypt
+        "data" with the one time key.
+
+        Args
+            key: encryption key
+            nounce: exactly once used string (try a time-based UUID)
+            data: the encrypted data
+        Returns
+            ciphertext: AES256 encrypted data
+    """
+
+    key = clean_string(key)
+    key = SHA256d(key).digest()
+    nonce_hash = SHA256d(nonce).digest()# assert 32 bytes key
+    enc_key = aes(key, nonce_hash)      # generate encryption key
+    return aes(enc_key, data)

+ 12 - 0
Target/Hash/crypto_4.py

@@ -0,0 +1,12 @@
+def __init__(self, data=None, truncate_to=None):
+        """ SHA-265d against length-extensions-attacks
+            with optional truncation of the hash
+
+        Args:
+            data: Initial string, optional
+            truncate_to: length to truncate the hash to, optional
+        """
+        self.h = sha256()
+        self.truncate_to = truncate_to
+        if data:
+            self.h.update(data)

+ 2 - 0
Target/Hash/crypto_6.py

@@ -0,0 +1,2 @@
+def digest(self):
+        return sha256(self.h.digest()).digest()[:self.truncate_to]

+ 2 - 0
Target/Hash/crypto_7.py

@@ -0,0 +1,2 @@
+def hexdigest(self):
+        return self.digest().encode('hex')

+ 13 - 0
Target/Hash/crypto_hash_1.py

@@ -0,0 +1,13 @@
+def crypto_hash(message):
+    """
+    Hashes and returns the message ``message``.
+
+    :param message: bytes
+    :rtype: bytes
+    """
+    digest = ffi.new("unsigned char[]", crypto_hash_BYTES)
+    rc = lib.crypto_hash(digest, message, len(message))
+    ensure(rc == 0,
+           'Unexpected library error',
+           raising=exc.RuntimeError)
+    return ffi.buffer(digest, crypto_hash_BYTES)[:]

+ 13 - 0
Target/Hash/crypto_hash_2.py

@@ -0,0 +1,13 @@
+def crypto_hash_sha256(message):
+    """
+    Hashes and returns the message ``message``.
+
+    :param message: bytes
+    :rtype: bytes
+    """
+    digest = ffi.new("unsigned char[]", crypto_hash_sha256_BYTES)
+    rc = lib.crypto_hash_sha256(digest, message, len(message))
+    ensure(rc == 0,
+           'Unexpected library error',
+           raising=exc.RuntimeError)
+    return ffi.buffer(digest, crypto_hash_sha256_BYTES)[:]

+ 13 - 0
Target/Hash/crypto_hash_3.py

@@ -0,0 +1,13 @@
+def crypto_hash_sha512(message):
+    """
+    Hashes and returns the message ``message``.
+
+    :param message: bytes
+    :rtype: bytes
+    """
+    digest = ffi.new("unsigned char[]", crypto_hash_sha512_BYTES)
+    rc = lib.crypto_hash_sha512(digest, message, len(message))
+    ensure(rc == 0,
+           'Unexpected library error',
+           raising=exc.RuntimeError)
+    return ffi.buffer(digest, crypto_hash_sha512_BYTES)[:]

+ 13 - 0
Target/Hash/des_crypt_1.py

@@ -0,0 +1,13 @@
+def _crypt_secret_to_key(secret):
+    """convert secret to 64-bit DES key.
+
+    this only uses the first 8 bytes of the secret,
+    and discards the high 8th bit of each byte at that.
+    a null parity bit is inserted after every 7th bit of the output.
+    """
+    # NOTE: this would set the parity bits correctly,
+    #       but des_encrypt_int_block() would just ignore them...
+    ##return sum(expand_7bit(byte_elem_value(c) & 0x7f) << (56-i*8)
+    ##           for i, c in enumerate(secret[:8]))
+    return sum((byte_elem_value(c) & 0x7f) << (57-i*8)
+               for i, c in enumerate(secret[:8]))

+ 29 - 0
Target/Hash/des_crypt_2.py

@@ -0,0 +1,29 @@
+def _raw_des_crypt(secret, salt):
+    """pure-python backed for des_crypt"""
+    assert len(salt) == 2
+
+    # NOTE: some OSes will accept non-HASH64 characters in the salt,
+    #       but what value they assign these characters varies wildy,
+    #       so just rejecting them outright.
+    #       the same goes for single-character salts...
+    #       some OSes duplicate the char, some insert a '.' char,
+    #       and openbsd does (something) which creates an invalid hash.
+    salt_value = h64.decode_int12(salt)
+
+    # gotta do something - no official policy since this predates unicode
+    if isinstance(secret, unicode):
+        secret = secret.encode("utf-8")
+    assert isinstance(secret, bytes)
+
+    # forbidding NULL char because underlying crypt() rejects them too.
+    if _BNULL in secret:
+        raise uh.exc.NullPasswordError(des_crypt)
+
+    # convert first 8 bytes of secret string into an integer
+    key_value = _crypt_secret_to_key(secret)
+
+    # run data through des using input of 0
+    result = des_encrypt_int_block(key_value, 0, salt_value, 25)
+
+    # run h64 encode on result
+    return h64big.encode_int64(result)

+ 29 - 0
Target/Hash/des_crypt_27.py

@@ -0,0 +1,29 @@
+def _calc_checksum(self, secret):
+        if isinstance(secret, unicode):
+            secret = secret.encode("utf-8")
+
+        # check for truncation (during .hash() calls only)
+        if self.use_defaults:
+            self._check_truncate_policy(secret)
+
+        # parse salt value
+        try:
+            salt_value = h64.decode_int12(self.salt.encode("ascii"))
+        except ValueError: # pragma: no cover - caught by class
+            raise suppress_cause(ValueError("invalid chars in salt"))
+
+        # convert first 8 byts of secret string into an integer,
+        key1 = _crypt_secret_to_key(secret)
+
+        # run data through des using input of 0
+        result1 = des_encrypt_int_block(key1, 0, salt_value, 20)
+
+        # convert next 8 bytes of secret string into integer (key=0 if secret < 8 chars)
+        key2 = _crypt_secret_to_key(secret[8:16])
+
+        # run data through des using input of 0
+        result2 = des_encrypt_int_block(key2, 0, salt_value, 5)
+
+        # done
+        chk = h64big.encode_int64(result1) + h64big.encode_int64(result2)
+        return chk.decode("ascii")

+ 23 - 0
Target/Hash/des_crypt_4.py

@@ -0,0 +1,23 @@
+def _raw_bsdi_crypt(secret, rounds, salt):
+    """pure-python backend for bsdi_crypt"""
+
+    # decode salt
+    salt_value = h64.decode_int24(salt)
+
+    # gotta do something - no official policy since this predates unicode
+    if isinstance(secret, unicode):
+        secret = secret.encode("utf-8")
+    assert isinstance(secret, bytes)
+
+    # forbidding NULL char because underlying crypt() rejects them too.
+    if _BNULL in secret:
+        raise uh.exc.NullPasswordError(bsdi_crypt)
+
+    # convert secret string into an integer
+    key_value = _bsdi_secret_to_key(secret)
+
+    # run data through des using input of 0
+    result = des_encrypt_int_block(key_value, 0, salt_value, rounds)
+
+    # run h64 encode on result
+    return h64big.encode_int64(result)

+ 4 - 0
Target/Hash/digests_2.py

@@ -0,0 +1,4 @@
+def _calc_checksum(self, secret):
+        if isinstance(secret, unicode):
+            secret = secret.encode("utf-8")
+        return str_to_uascii(self._hash_func(secret).hexdigest())

+ 12 - 0
Target/Hash/digests_4.py

@@ -0,0 +1,12 @@
+def hash(cls, secret, user, realm, encoding=None):
+        # NOTE: this was deliberately written so that raw bytes are passed through
+        # unchanged, the encoding kwd is only used to handle unicode values.
+        if not encoding:
+            encoding = cls.default_encoding
+        uh.validate_secret(secret)
+        if isinstance(secret, unicode):
+            secret = secret.encode(encoding)
+        user = to_bytes(user, encoding, "user")
+        realm = to_bytes(realm, encoding, "realm")
+        data = render_bytes("%s:%s:%s", user, realm, secret)
+        return hashlib.md5(data).hexdigest()

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است