archive.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. import hashlib
  2. import json
  3. import os
  4. import zipfile
  5. import io
  6. from os.path import basename, splitext
  7. import slackviewer
  8. from slackviewer.constants import SLACKVIEWER_TEMP_PATH
  9. from slackviewer.utils.six import to_unicode, to_bytes
  10. def SHA1_file(filepath, extra=b''):
  11. """
  12. Returns hex digest of SHA1 hash of file at filepath
  13. :param str filepath: File to hash
  14. :param bytes extra: Extra content added to raw read of file before taking hash
  15. :return: hex digest of hash
  16. :rtype: str
  17. """
  18. h = hashlib.sha1()
  19. with io.open(filepath, 'rb') as f:
  20. for chunk in iter(lambda: f.read(h.block_size), b''):
  21. h.update(chunk)
  22. h.update(extra)
  23. return h.hexdigest()
  24. def extract_archive(filepath):
  25. """
  26. Returns the path of the archive
  27. :param str filepath: Path to file to extract or read
  28. :return: path of the archive
  29. :rtype: str
  30. """
  31. # Checks if file path is a directory
  32. if os.path.isdir(filepath):
  33. path = os.path.abspath(filepath)
  34. print("Archive already extracted. Viewing from {}...".format(path))
  35. return path
  36. # Checks if the filepath is a zipfile and continues to extract if it is
  37. # if not it raises an error
  38. elif not zipfile.is_zipfile(filepath):
  39. # Misuse of TypeError? :P
  40. raise TypeError("{} is not a zipfile".format(filepath))
  41. archive_sha = SHA1_file(
  42. filepath=filepath,
  43. # Add version of slackviewer to hash as well so we can invalidate the cached copy
  44. # if there are new features added
  45. extra=to_bytes(slackviewer.__version__)
  46. )
  47. extracted_path = os.path.join(SLACKVIEWER_TEMP_PATH, archive_sha)
  48. if os.path.exists(extracted_path):
  49. print("{} already exists".format(extracted_path))
  50. else:
  51. # Extract zip
  52. with zipfile.ZipFile(filepath) as zip:
  53. print("{} extracting to {}...".format(filepath, extracted_path))
  54. zip.extractall(path=extracted_path)
  55. print("{} extracted to {}".format(filepath, extracted_path))
  56. # Add additional file with archive info
  57. create_archive_info(filepath, extracted_path, archive_sha)
  58. return extracted_path
  59. # Saves archive info
  60. # When loading empty dms and there is no info file then this is called to
  61. # create a new archive file
  62. def create_archive_info(filepath, extracted_path, archive_sha=None):
  63. """
  64. Saves archive info to a json file
  65. :param str filepath: Path to directory of archive
  66. :param str extracted_path: Path to directory of archive
  67. :param str archive_sha: SHA string created when archive was extracted from zip
  68. """
  69. archive_info = {
  70. "sha1": archive_sha,
  71. "filename": os.path.split(filepath)[1],
  72. }
  73. with io.open(
  74. os.path.join(
  75. extracted_path,
  76. ".slackviewer_archive_info.json",
  77. ), 'w+', encoding="utf-8"
  78. ) as f:
  79. s = json.dumps(archive_info, ensure_ascii=False)
  80. s = to_unicode(s)
  81. f.write(s)
  82. def get_export_info(archive_name):
  83. """
  84. Given a file or directory, extract it and return information that will be used in
  85. an export printout: the basename of the file, the name stripped of its extension, and
  86. our best guess (based on Slack's current naming convention) of the name of the
  87. workspace that this is an export of.
  88. """
  89. extracted_path = extract_archive(archive_name)
  90. base_filename = basename(archive_name)
  91. (noext_filename, _) = splitext(base_filename)
  92. # Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018"
  93. # If that's not the format, we will just fall back to the extension-free filename.
  94. (workspace_name, _) = noext_filename.split(" Slack export ", 1)
  95. return {
  96. "readable_path": extracted_path,
  97. "basename": base_filename,
  98. "stripped_name": noext_filename,
  99. "workspace_name": workspace_name,
  100. }