index.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. import os
  2. import json
  3. import glob
  4. import requests
  5. from threading import Thread, RLock
  6. import logging
  7. import datetime
  8. import time
  9. import re
  10. from .constants import TOKYO_TZ
  11. __all__ = ['ShowroomIndex']
  12. index_logger = logging.getLogger('showroom.index')
  13. _filename_re = re.compile(
  14. r'''
  15. (?:\d{6}\ Showroom\ -\ )?
  16. (.+)
  17. (?:\ \d{4,6})
  18. (?:\.mp4)?$
  19. ''',
  20. re.VERBOSE
  21. )
  22. def _get_index_path():
  23. abspath = os.path.abspath(__file__)
  24. return os.path.abspath(os.path.dirname(abspath) + '/../index')
  25. DEFAULT_INDEX_PATH = _get_index_path()
  26. # maps numerical genre_ids to human readable names
  27. # when fetching from the API via a script it seems to return English names
  28. # genre_id: (english name, japanese api name, english api name)
  29. full_genre_map = {
  30. 0: ("Popular", "人気", "Popularity"),
  31. 101: ("Music", "ミュージック", "Music"),
  32. 102: ("Idol", "アイドル", "Idol"),
  33. 103: ("Talent/Model", "タレント・モデル", "Talent Model"),
  34. 104: ("Voice Actor/Anime", "声優・アニメ", "Voice Actors & Anime"),
  35. 105: ("Comedy/Talk Show", "お笑い・トーク", "Comedians/Talk Show"),
  36. 106: ("Sports", "スポーツ", "Sports"),
  37. 107: ("Virtual", "バーチャル", "Virtual"),
  38. 200: ("Amateur", "アマチュア", "Non-Professionals"),
  39. 703: ("Karaoke", "カラオケ", "Karaoke"),
  40. 704: ("Men's", "メンズ", "MEN'S"),
  41. 701: ("Birthday", "誕生日", "DOB")
  42. }
  43. genre_map = {key: val[0] for key, val in full_genre_map.items()}
  44. class Room(object):
  45. def __init__(self, room_info=None, mod_time=0, language='eng', wanted=True):
  46. """
  47. :param room_info: Dictionary describing the room, from an index file
  48. :param mod_time: Time the source file was last modified
  49. """
  50. self._mod_time = mod_time
  51. room_info = self._fix_room_info(room_info)
  52. self._room_info = room_info
  53. self.set_language(language)
  54. self._wanted = wanted
  55. self._lock = RLock()
  56. @staticmethod
  57. def _fix_room_info(info):
  58. if 'showroom_id' in info:
  59. info['room_id'] = info.pop('showroom_id')
  60. if 'engGroup' not in info:
  61. team = info['engTeam']
  62. if '48' in team and len(team) > 5 and 'Gen' not in team:
  63. info["engGroup"] = info["engTeam"][:5].strip()
  64. info["jpnGroup"] = info["jpnTeam"][:5].strip()
  65. info["engTeam"] = info["engTeam"][5:].strip()
  66. info["jpnTeam"] = info["jpnTeam"][5:].strip()
  67. else:
  68. info['engGroup'] = info['engTeam']
  69. info['jpnGroup'] = info['jpnTeam']
  70. info['engTeam'] = ""
  71. info['jpnTeam'] = ""
  72. # TODO: genre_id
  73. # TODO: save fixed rooms
  74. return info
  75. def __getitem__(self, key):
  76. return self._room_info[key]
  77. def __bool__(self):
  78. return bool(self._room_info)
  79. def set_priority(self, new_priority, mod_time):
  80. with self._lock:
  81. self._mod_time = mod_time
  82. self._room_info['priority'] = new_priority
  83. @property
  84. def mod_time(self):
  85. return self._mod_time
  86. @property
  87. def short_url(self):
  88. return self._room_info['web_url'].split('/')[-1]
  89. @property
  90. def long_url(self):
  91. if self._room_info['web_url'].startswith('https://'):
  92. return self._room_info['web_url']
  93. else:
  94. return 'https://www.showroom-live.com/' + self._room_info['web_url'].strip('/')
  95. @property
  96. def room_id(self):
  97. return self._room_info['room_id']
  98. @property
  99. def priority(self):
  100. return self._room_info['priority']
  101. @property
  102. def name(self):
  103. return self._room_info[self._language + 'Name']
  104. @property
  105. def group(self):
  106. return self._room_info[self._language + 'Group']
  107. @property
  108. def team(self):
  109. return self._room_info[self._language + 'Team']
  110. @property
  111. def handle(self):
  112. # Don't duplicate group/team names if already in the room name
  113. # Are there any edge cases where this does the wrong thing?
  114. return ' '.join((*(x for x in (self.group, self.team) if x and x not in self.name), self.name))
  115. def is_wanted(self):
  116. return self._wanted
  117. def set_wanted(self, truth_value):
  118. with self._lock:
  119. if truth_value:
  120. self._wanted = True
  121. else:
  122. self._wanted = False
  123. def set_language(self, new_language):
  124. if new_language.lower() in ('eng', 'jpn'):
  125. self._language = new_language.lower()
  126. elif new_language.lower() in ('english', 'en'):
  127. self._language = 'eng'
  128. elif new_language.lower() in ('japanese', 'jp'):
  129. self._language = 'jpn'
  130. else:
  131. index_logger.debug('Unknown language: {}'.format(new_language))
  132. def get_language(self):
  133. if self._language == 'eng':
  134. return 'English'
  135. elif self._language == 'jpn':
  136. return 'Japanese'
  137. def get_info(self):
  138. with self._lock:
  139. return {"name": self.name,
  140. "group": self.group,
  141. "team": self.team,
  142. "room_id": self.room_id,
  143. "priority": self.priority,
  144. "web_url": self.long_url,
  145. "wanted": self.is_wanted()}
  146. # TODO: periodically check filter
  147. class ShowroomIndex(object):
  148. def __init__(self, index_directory: str=None,
  149. session: requests.Session = None,
  150. record_all: bool = False,
  151. language: str = 'eng'):
  152. self.room_dict = {}
  153. self._room_url_lookup = None
  154. self._room_name_lookup = None
  155. self._room_handle_lookup = None
  156. if session:
  157. self.session = session
  158. else:
  159. self.session = requests.Session()
  160. # TODO: test validity
  161. self.language = language
  162. # read index_directory
  163. # make note of modification times and file sizes for all *.jdex files
  164. # load data from all jdex files, creating Room objects for each unique room
  165. # updating rooms as necessary (include mod date w/ Room object? include source jdex?)
  166. # Including the source_jdex is superfluous since a room can be in multiple files,
  167. # and should only be removed if it's removed from all files
  168. # (and I don't want that to be a regular event)
  169. # mod_date is semi-useful when building the initial index
  170. self.directory = index_directory or DEFAULT_INDEX_PATH
  171. self.known_files = {}
  172. self.wanted_default = record_all
  173. self._thread = None
  174. self._build()
  175. self._lock = RLock()
  176. self._quitting = False
  177. def __len__(self):
  178. return len(self.room_dict)
  179. def __contains__(self, room_id):
  180. if room_id in self.room_dict:
  181. return True
  182. else:
  183. return False
  184. def __getitem__(self, room_id):
  185. if room_id in self.room_dict:
  186. return self.room_dict[room_id]
  187. else:
  188. return None
  189. def wants(self, room_id):
  190. try:
  191. return self.room_dict[room_id].is_wanted()
  192. except KeyError:
  193. return False
  194. def find_room(self, room_id=None, url=None, name=None, file_name=None):
  195. """
  196. Find a room matching one criterion.
  197. The first provided (non-None, non-False, non-"", etc.) criterion
  198. to match a room will be used.
  199. Args:
  200. room_id: id of the room to search for
  201. url: last part of room url,
  202. 48_Tomu_Mutou
  203. name: member name in the index's language,
  204. "Muto Tomu"
  205. file_name: either a filename, or the unique component of a filename,
  206. "161018 Showroom - AKB48 Team K Muto Tomu 2104.mp4"
  207. "AKB48 Team K Muto Tomu"
  208. Returns:
  209. A room object matching one of the given criteria, else None if no match is found.
  210. """
  211. if room_id:
  212. try:
  213. return self.room_dict[room_id]
  214. except KeyError:
  215. index_logger.debug("Failed to find ID {}".format(room_id))
  216. if url:
  217. # TODO: use the full https://www.showroom-live.com url
  218. # Primary hangup here is that the "url" could be a number of things...
  219. # but let us limit it to the end of the url, after the final /
  220. try:
  221. return self.room_url_lookup[url]
  222. except KeyError:
  223. index_logger.debug("Failed to find Room URL {}".format(url))
  224. if name:
  225. # TODO: support separating group/teams from string
  226. try:
  227. return self.room_name_lookup[name]
  228. except KeyError:
  229. index_logger.debug("Failed to find Room Name {}".format(name))
  230. if file_name:
  231. match = _filename_re.match(file_name)
  232. if match:
  233. handle = match.groups()[0]
  234. try:
  235. return self.room_handle_lookup[handle]
  236. except KeyError:
  237. index_logger.debug("Failed to find Room Handle {}".format(handle))
  238. return None
  239. # Filter methods
  240. def filter_add(self, names_to_add):
  241. for room_id in self.room_dict:
  242. if self.room_dict[room_id].name in names_to_add:
  243. self.room_dict[room_id].set_wanted(True)
  244. def filter_remove(self, names_to_remove):
  245. for room_id in self.room_dict:
  246. if self.room_dict[room_id].name in names_to_remove:
  247. self.room_dict[room_id].set_wanted(False)
  248. def filter_all(self):
  249. for room_id in self.room_dict:
  250. self.room_dict[room_id].set_wanted(True)
  251. self.wanted_default = True
  252. def filter_none(self):
  253. for room_id in self.room_dict:
  254. self.room_dict[room_id].set_wanted(False)
  255. self.wanted_default = False
  256. def filter_get_list(self):
  257. wanted = [e for e in self.room_dict if self.room_dict[e].is_wanted()]
  258. unwanted = [e for e in self.room_dict if not self.room_dict[e].is_wanted()]
  259. # is it better to process them here or at the caller?
  260. '''
  261. if len(wanted) == len(self.room_dict):
  262. return {"index_filters": {"wanted": "all", "unwanted": None}}
  263. elif len(unwanted) == len(self.room_dict):
  264. return {"index_filters": {"wanted": None, "unwanted": "all"}}
  265. elif len(wanted) > len(unwanted):
  266. result = [self.room_dict[e].name for e in unwanted]
  267. return {"index_filters": {"wanted": "remaining", "unwanted": result}}
  268. else:
  269. result = [self.room_dict[e].name for e in wanted]
  270. return {"index_filters": {"wanted": result, "unwanted": "remaining"}}
  271. '''
  272. return {"wanted": [self.room_dict[e].name for e in wanted],
  273. "unwanted": [self.room_dict[e].name for e in unwanted]}
  274. # Index methods
  275. def _build(self):
  276. index_logger.debug("Building index...")
  277. # TODO: apply record_all setting
  278. # get list of *.jdex files in index_directory
  279. found_files = glob.glob("{}/{}".format(self.directory, "*.jdex"))
  280. # need to keep a record of each file + mod_date and file_size
  281. for e in found_files:
  282. statinfo = os.stat(e)
  283. self.known_files[e] = {"mod_time": statinfo.st_mtime,
  284. "file_size": statinfo.st_size}
  285. found_files = sorted(self.known_files, key=lambda x: self.known_files[x]['mod_time'])
  286. index_logger.debug('\n'.join(['Found index files: '] + found_files))
  287. new_room_dict = {}
  288. for jdex in found_files:
  289. mod_time = self.known_files[jdex]['mod_time']
  290. # open the jdex
  291. try:
  292. with open(jdex, encoding='utf8') as infp:
  293. temp_data = json.load(infp)
  294. except json.JSONDecodeError as e:
  295. index_logger.warning('{} could not be read: {}'.format(jdex, e))
  296. continue
  297. # add each room to the room_dict and the room_url_lookup
  298. # perhaps in this phase it is not necessary to update existing
  299. # rooms but simply overwrite but later we will need to update
  300. for room in temp_data:
  301. # will overwrite duplicate rooms
  302. new_room = Room(room_info=room, mod_time=mod_time,
  303. wanted=self.wanted_default, language=self.language)
  304. new_room_dict[new_room.room_id] = new_room
  305. self.room_dict.clear()
  306. self.room_dict.update(new_room_dict)
  307. def rebuild(self):
  308. self.known_files = {}
  309. self._build()
  310. def update(self):
  311. # index_logger.debug("Checking local index")
  312. found_files = glob.glob("{}/{}".format(self.directory, "*.jdex"))
  313. changed_files = []
  314. for e in found_files:
  315. statinfo = os.stat(e)
  316. if e in self.known_files:
  317. # known file w/ changes
  318. if (statinfo.st_mtime > self.known_files[e]['mod_time'] or
  319. statinfo.st_size != self.known_files[e]['file_size']):
  320. changed_files.append(e)
  321. self.known_files[e] = {'mod_time': statinfo.st_mtime,
  322. 'file_size': statinfo.st_size}
  323. else:
  324. # new file
  325. changed_files.append(e)
  326. self.known_files[e] = {'mod_time': statinfo.st_mtime,
  327. 'file_size': statinfo.st_size}
  328. if len(changed_files) > 0:
  329. index_logger.info("Updating index")
  330. else:
  331. return
  332. changed_files = [e for e in sorted(self.known_files,
  333. key=lambda x: self.known_files[x]['mod_time'])
  334. if e in changed_files]
  335. # TODO: update information for existing rooms, not just priority
  336. for jdex in changed_files:
  337. # is it faster to assume new priorities or to check if they have changed?
  338. mod_time = self.known_files[jdex]['mod_time']
  339. try:
  340. with open(jdex, encoding='utf8') as infp:
  341. temp_data = json.load(infp)
  342. except json.JSONDecodeError:
  343. index_logger.warning('{} could not be read'.format(jdex))
  344. continue
  345. # check if room exists, if it does, check priority
  346. # if different, update priority and mod_time
  347. # if room does not exist, add
  348. for room in temp_data:
  349. room_id = room.get('room_id') or room.get('showroom_id')
  350. if room_id in self.room_dict:
  351. # is this check necessary?
  352. if room['priority'] != self.room_dict[room_id]['priority']:
  353. self.room_dict[room_id].set_priority(room['priority'], mod_time)
  354. else:
  355. new_room = Room(room_info=room, mod_time=mod_time,
  356. wanted=self.wanted_default, language=self.language)
  357. self.room_dict[new_room.room_id] = new_room
  358. if self._room_url_lookup:
  359. self._room_url_lookup[new_room.short_url] = new_room
  360. if self._room_name_lookup:
  361. self._room_name_lookup[new_room.name] = new_room
  362. if self._room_handle_lookup:
  363. self._room_handle_lookup[new_room.handle] = new_room
  364. # adding a custom room is managed by something else
  365. # likewise the option to create rooms from an event or campaign page
  366. def update_from_web(self, update_url=None):
  367. """
  368. :param update_url: URL to a list of JDEX files, w/ name, modtime, and path for each.
  369. Defaults to https://wlerin.github.io/showroom-index/list.json
  370. which see for an example.
  371. Modtime of each file is compared against the local copy and if newer, the contents are
  372. compared. Priorities and names are not changed. Group and Team may be updated, and any
  373. new rooms will be added.
  374. TODO: decide how to handle ignored rooms, i.e. say someone removes a file from the index
  375. Or just find a better way to manage who gets downloaded, e.g. a config.json option.
  376. """
  377. if not update_url:
  378. update_url = "https://wlerin.github.io/showroom-index/list.json"
  379. update_data = self.session.get(update_url).json()
  380. # TODO: Catch the error this raises when decoding fails
  381. # TODO: finish this method
  382. # 1) compare mod times
  383. # 2) get updated and new files
  384. # 3) compare contents
  385. def start(self):
  386. self._quitting = False
  387. self._thread = Thread(target=self.run, name='ShowroomIndex')
  388. self._thread.start()
  389. def stop(self):
  390. self._quitting = True
  391. def run(self):
  392. last_update = datetime.datetime.now(tz=TOKYO_TZ)
  393. update_interval = 120.0
  394. while not self._quitting:
  395. curr_time = datetime.datetime.now(tz=TOKYO_TZ)
  396. if (curr_time - last_update).total_seconds() > update_interval:
  397. self.update()
  398. time.sleep(0.9)
  399. # TODO: update from web
  400. # TODO: make last_update an attribute so it can be updated by other methods
  401. # TODO: make sure this is all thread-safe, in particular any index methods
  402. # that can be called from outside the index thread
  403. def _build_name_lookup(self):
  404. self._room_name_lookup = {}
  405. for room_id, room in self.room_dict.items():
  406. self._room_name_lookup[room.name] = room
  407. # without spaces
  408. self._room_name_lookup[re.sub(r'\s', '', room.name)] = room
  409. def _build_handle_lookup(self):
  410. self._room_handle_lookup = {}
  411. for room_id, room in self.room_dict.items():
  412. # TODO: make this a
  413. self._room_handle_lookup[room.handle] = room
  414. def _build_url_lookup(self):
  415. self._room_url_lookup = {}
  416. for room_id, room in self.room_dict.items():
  417. self._room_name_lookup[room.url] = room
  418. @property
  419. def room_name_lookup(self):
  420. with self._lock:
  421. if not self._room_name_lookup:
  422. self._build_name_lookup()
  423. return self._room_name_lookup
  424. @property
  425. def room_handle_lookup(self):
  426. with self._lock:
  427. if not self._room_handle_lookup:
  428. self._build_handle_lookup()
  429. return self._room_handle_lookup
  430. @property
  431. def room_url_lookup(self):
  432. with self._lock:
  433. if not self._room_url_lookup:
  434. self._build_url_lookup()
  435. return self._room_url_lookup