export.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. #!/bin/python
  2. # Usage: python export.py <path_to_database> <path_for_new_folder>
  3. # Output: folder in given directory
  4. import argparse
  5. import datetime
  6. import json
  7. import logging
  8. import os
  9. import sqlite3
  10. import time
  11. from six import iteritems
  12. # Used in conjunction with sqlite3 to generate JSON-like format
  13. def dict_factory(cursor, row):
  14. d = {}
  15. for index, column in enumerate(cursor.description):
  16. d[column[0]] = row[index]
  17. return d
  18. # Turns unicode into text
  19. def byteify(inp):
  20. if isinstance(inp, dict):
  21. return {byteify(key): byteify(value) for key, value in iteritems(inp)}
  22. if isinstance(inp, list):
  23. return [byteify(element) for element in inp]
  24. if "unicode" in vars(globals()["__builtins__"]) and isinstance(inp, unicode):
  25. return inp.encode("utf-8")
  26. return inp
  27. def get_channel_name(channel_id):
  28. return ENV["id_channel"].get(channel_id, "None")
  29. def get_date(ts):
  30. return datetime.datetime.fromtimestamp(int(ts)).strftime("%Y-%m-%d")
  31. # Uncomment time in the future if running daily (Used to export last days of messages)
  32. # time = time.time() - 86400 # One full day in seconds
  33. parser = argparse.ArgumentParser()
  34. parser.add_argument(
  35. "-d",
  36. "--database-path",
  37. default="slack.sqlite",
  38. help=("path to the SQLite database. (default = ./slack.sqlite)"),
  39. )
  40. parser.add_argument(
  41. "-a",
  42. "--archive_path",
  43. default="export",
  44. help=("path to export to (default ./export)"),
  45. )
  46. parser.add_argument(
  47. "-l",
  48. "--log-level",
  49. default="debug",
  50. help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"),
  51. )
  52. args = parser.parse_args()
  53. database_path = args.database_path
  54. archive_path = args.archive_path
  55. log_level = args.log_level.upper()
  56. assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
  57. logging.basicConfig(level=getattr(logging, log_level))
  58. logger = logging.getLogger(__name__)
  59. time = 0.0
  60. if not os.path.isdir(archive_path):
  61. os.makedirs(archive_path)
  62. time = 0.0 # Full export instead of day export
  63. # Uncomment if you need to export entire archive or make this choice
  64. # getAll = raw_input("Do you want to export all messages instead of last day?(y/N) ").lower()
  65. # if (getAll=='y'):
  66. # time = 0.0
  67. # Establish connection to SQL database
  68. connection = sqlite3.connect(database_path)
  69. connection.row_factory = dict_factory
  70. cursor = connection.cursor()
  71. # Get channel and user data
  72. cursor.execute("SELECT * FROM channels")
  73. channels = byteify(cursor.fetchall())
  74. cursor.execute("SELECT * FROM users")
  75. users = byteify(cursor.fetchall())
  76. for u in users:
  77. u["profile"] = {}
  78. u["profile"]["image_72"] = u.pop("avatar")
  79. # Save channel and user data files to archive folder
  80. channel_file = os.path.join(archive_path, "channels.json")
  81. with open(channel_file, "w") as outfile:
  82. json.dump(channels, outfile)
  83. outfile.close()
  84. user_file = os.path.join(archive_path, "users.json")
  85. with open(user_file, "w") as outfile:
  86. json.dump(users, outfile)
  87. outfile.close()
  88. # Define the names associated with each channel id
  89. ENV = {
  90. "channel_id": {},
  91. "id_channel": {},
  92. }
  93. ENV["channel_id"] = dict([(m["name"], m["id"]) for m in channels])
  94. ENV["id_channel"] = dict([(m["id"], m["name"]) for m in channels])
  95. # Get all messages after given time (in seconds since the Epoch)
  96. command = (
  97. "SELECT * FROM messages WHERE timestamp > %s ORDER BY channel, timestamp"
  98. ) % time
  99. cursor.execute(command)
  100. results = byteify(cursor.fetchall())
  101. # Clean and store message results in Slack-ish format
  102. channel_msgs = dict([(c["name"], {}) for c in channels])
  103. for message in results:
  104. message["text"] = message["message"]
  105. message["ts"] = message["timestamp"]
  106. message["type"] = "message"
  107. message.pop("message")
  108. message.pop("timestamp")
  109. channel_name = get_channel_name(message["channel"])
  110. if channel_name == "None":
  111. continue
  112. # timestamp format is #########.######
  113. day = get_date(message["ts"].split(".")[0])
  114. if channel_msgs[channel_name].get(day, None):
  115. channel_msgs[channel_name][day].append(message)
  116. else:
  117. channel_msgs[channel_name][day] = [message]
  118. # Go to channel folder and title message collection as <date>.json
  119. update_count = 0
  120. for channel_name in channel_msgs.keys():
  121. # Checks for any messages from today
  122. if not channel_msgs[channel_name]:
  123. continue
  124. else:
  125. update_count += 1
  126. logger.info("%s has been updated" % channel_name)
  127. directory = os.path.join(archive_path, channel_name)
  128. if "None" in directory:
  129. logger.warning("Channel not found: %s" % channel_name)
  130. continue
  131. if not os.path.isdir(directory):
  132. os.makedirs(directory)
  133. for day in channel_msgs[channel_name].keys():
  134. file = os.path.join(directory, "%s.json") % day
  135. with open(file, "w") as outfile:
  136. json.dump(channel_msgs[channel_name][day], outfile)
  137. outfile.close()
  138. logger.info("Updated %s channels" % update_count)
  139. connection.close()