reader.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. from collections import OrderedDict
  2. import glob
  3. import io
  4. import json
  5. import os
  6. from slackviewer.formatter import SlackFormatter
  7. from slackviewer.message import Message
  8. from slackviewer.user import User, deleted_user
  9. class Reader(object):
  10. """
  11. Reader object will read all of the archives' data from the json files
  12. """
  13. def __init__(self, PATH):
  14. self._PATH = PATH
  15. # TODO: Make sure this works
  16. with io.open(os.path.join(self._PATH, "users.json"), encoding="utf8") as f:
  17. self.__USER_DATA = {u["id"]: User(u) for u in json.load(f)}
  18. slackbot = {
  19. "id": "USLACKBOT",
  20. "name": "slackbot",
  21. "profile": {
  22. "image_24": "https://a.slack-edge.com/0180/img/slackbot_24.png",
  23. "image_32": "https://a.slack-edge.com/2fac/plugins/slackbot/assets/service_32.png",
  24. "image_48": "https://a.slack-edge.com/2fac/plugins/slackbot/assets/service_48.png",
  25. "image_72": "https://a.slack-edge.com/0180/img/slackbot_72.png",
  26. "image_192": "https://a.slack-edge.com/66f9/img/slackbot_192.png",
  27. "image_512": "https://a.slack-edge.com/1801/img/slackbot_512.png",
  28. }
  29. }
  30. self.__USER_DATA.setdefault("USLACKBOT", User(slackbot))
  31. ##################
  32. # Public Methods #
  33. ##################
  34. def compile_channels(self, channels=None):
  35. if isinstance(channels, str):
  36. channels = channels.split(',')
  37. channel_data = self._read_from_json("channels.json")
  38. channel_names = [c["name"] for c in channel_data.values() if not channels or c["name"] in channels]
  39. return self._create_messages(channel_names, channel_data)
  40. def compile_groups(self):
  41. group_data = self._read_from_json("groups.json")
  42. group_names = [c["name"] for c in group_data.values()]
  43. return self._create_messages(group_names, group_data)
  44. def compile_dm_messages(self):
  45. # Gets list of dm objects with dm ID and array of members ids
  46. dm_data = self._read_from_json("dms.json")
  47. dm_ids = [c["id"] for c in dm_data.values()]
  48. # True is passed here to let the create messages function know that
  49. # it is dm data being passed to it
  50. return self._create_messages(dm_ids, dm_data, True)
  51. def compile_dm_users(self):
  52. """
  53. Gets the info for the members within the dm
  54. Returns a list of all dms with the members that have ever existed
  55. :rtype: [object]
  56. {
  57. id: <id>
  58. users: [<user_id>]
  59. }
  60. """
  61. dm_data = self._read_from_json("dms.json")
  62. dms = dm_data.values()
  63. all_dms_users = []
  64. for dm in dms:
  65. # checks if messages actually exist
  66. if dm["id"] not in self._EMPTY_DMS:
  67. # added try catch for users from shared workspaces not in current workspace
  68. try:
  69. if "members" in dm:
  70. users = dm["members"]
  71. if "user" in dm:
  72. users = [dm["user"]]
  73. dm_members = {"id": dm["id"], "users": [self.__USER_DATA.setdefault(m, deleted_user(m)) for m in users]}
  74. all_dms_users.append(dm_members)
  75. except KeyError:
  76. dm_members = None
  77. return all_dms_users
  78. def compile_mpim_messages(self):
  79. mpim_data = self._read_from_json("mpims.json")
  80. mpim_names = [c["name"] for c in mpim_data.values()]
  81. return self._create_messages(mpim_names, mpim_data)
  82. def compile_mpim_users(self):
  83. """
  84. Gets the info for the members within the multiple person instant message
  85. Returns a list of all dms with the members that have ever existed
  86. :rtype: [object]
  87. {
  88. name: <name>
  89. users: [<user_id>]
  90. }
  91. """
  92. mpim_data = self._read_from_json("mpims.json")
  93. mpims = [c for c in mpim_data.values()]
  94. all_mpim_users = []
  95. for mpim in mpims:
  96. mpim_members = {"name": mpim["name"], "users": [] if "members" not in mpim.keys() else [self.__USER_DATA.setdefault(m, deleted_user(m)) for m in mpim["members"]]}
  97. all_mpim_users.append(mpim_members)
  98. return all_mpim_users
  99. @staticmethod
  100. def _extract_time(json):
  101. try:
  102. # Convert the timestamp part to float
  103. return float(json['ts'])
  104. except KeyError:
  105. return 0
  106. ###################
  107. # Private Methods #
  108. ###################
  109. def _create_messages(self, names, data, isDms=False):
  110. """
  111. Creates object of arrays of messages from each json file specified by the names or ids
  112. :param [str] names: names of each group of messages
  113. :param [object] data: array of objects detailing where to get the messages from in
  114. the directory structure
  115. :param bool isDms: boolean value used to tell if the data is dm data so the function can
  116. collect the empty dm directories and store them in memory only
  117. :return: object of arrays of messages
  118. :rtype: object
  119. """
  120. chats = {}
  121. empty_dms = []
  122. formatter = SlackFormatter(self.__USER_DATA, data)
  123. for name in names:
  124. # gets path to dm directory that holds the json archive
  125. dir_path = os.path.join(self._PATH, name)
  126. messages = []
  127. # array of all days archived
  128. day_files = glob.glob(os.path.join(dir_path, "*.json"))
  129. # this is where it's skipping the empty directories
  130. if not day_files:
  131. if isDms:
  132. empty_dms.append(name)
  133. continue
  134. for day in sorted(day_files):
  135. with io.open(os.path.join(self._PATH, day), encoding="utf8") as f:
  136. # loads all messages
  137. day_messages = json.load(f)
  138. # sorts the messages in the json file
  139. day_messages.sort(key=Reader._extract_time)
  140. messages.extend([Message(formatter, d) for d in day_messages])
  141. chats[name] = messages
  142. chats = self._build_threads(chats)
  143. if isDms:
  144. self._EMPTY_DMS = empty_dms
  145. return chats
  146. def _build_threads(self, channel_data):
  147. """
  148. Re-orders the JSON to allow for thread building.
  149. :param [dict] channel_data: dictionary of all Slack channels and messages
  150. :return: None
  151. """
  152. for channel_name in channel_data.keys():
  153. replies = {}
  154. user_ts_lookup = {}
  155. items_to_remove = []
  156. for i, m in enumerate(channel_data[channel_name]):
  157. user = m._message.get('user')
  158. ts = m._message.get('ts')
  159. if user is None or ts is None:
  160. continue
  161. k = (user, ts)
  162. if k not in user_ts_lookup:
  163. user_ts_lookup[k] = []
  164. user_ts_lookup[k].append((i, m))
  165. for location, message in enumerate(channel_data[channel_name]):
  166. # If there's a "reply_count" key, generate a list of user and timestamp dictionaries
  167. if 'reply_count' in message._message or 'replies' in message._message:
  168. # Identify and save where we are
  169. reply_list = []
  170. for reply in message._message.get('replies', []):
  171. reply_list.append(reply)
  172. reply_objects = []
  173. for item in reply_list:
  174. item_lookup_key = (item['user'], item['ts'])
  175. item_replies = user_ts_lookup.get(item_lookup_key)
  176. if item_replies is not None:
  177. reply_objects.extend(item_replies)
  178. if not reply_objects:
  179. continue
  180. sorted_reply_objects = sorted(reply_objects, key=lambda tup: tup[0])
  181. for reply_obj_tuple in sorted_reply_objects:
  182. items_to_remove.append(reply_obj_tuple[0])
  183. replies[location] = [tup[1] for tup in sorted_reply_objects]
  184. # Create an OrderedDict of thread locations and replies in reverse numerical order
  185. sorted_threads = OrderedDict(sorted(replies.items(), reverse=True))
  186. for idx_to_remove in sorted(items_to_remove, reverse=True):
  187. # threads location hotfix
  188. channel_data[channel_name][idx_to_remove] = {'user': -1}
  189. # Iterate through the threads and insert them back into channel_data[channel_name] in response order
  190. for grouping in sorted_threads.items():
  191. location = grouping[0] + 1
  192. for reply in grouping[1]:
  193. msgtext = reply._message.get("text")
  194. if not msgtext or not msgtext.startswith("**Thread Reply:**"):
  195. reply._message["text"] = "**Thread Reply:** {}".format(msgtext)
  196. channel_data[channel_name].insert(location, reply)
  197. location += 1
  198. # threads location hotfix
  199. data_with_sorted_threads = []
  200. for i, item in enumerate(channel_data[channel_name]):
  201. if isinstance(item, Message):
  202. data_with_sorted_threads.append(item)
  203. channel_data[channel_name] = data_with_sorted_threads.copy()
  204. return channel_data
  205. def _read_from_json(self, file):
  206. """
  207. Reads the file specified from json and creates an object based on the id of each element
  208. :param str file: Path to file of json to read
  209. :return: object of data read from json file
  210. :rtype: object
  211. """
  212. try:
  213. with io.open(os.path.join(self._PATH, file), encoding="utf8") as f:
  214. return {u["id"]: u for u in json.load(f)}
  215. except IOError:
  216. return {}