concat.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. #!/usr/bin/env python3
  2. """
  3. python3 concat.py [-h] [--generate] [--merge] [--both] [--max-gap MAX_GAP]
  4. [-e EXT]
  5. [TARGET_DIR]
  6. Generates concat files for merging. Creates separate videos for separate
  7. broadcasts and incompatible resolutions, ignores very broken videos.
  8. positional arguments:
  9. TARGET_DIR Optional. defaults to the current working directory.
  10. optional arguments:
  11. -h, --help show this help message and exit
  12. --generate generates concat files in TARGET_DIR, runs by default
  13. --merge merges videos in TARGET_DIR according to existing concat
  14. files
  15. --both both generates concat files and merges videos
  16. --max-gap MAX_GAP maximum gap between merged videos, in seconds. anything
  17. larger is treated as a separate broadcast
  18. -e EXT extension to merge, defaults to mp4
  19. When merging, watch the output for "Non-monotonous DTS in output stream" -- A
  20. few of these are harmless but a wall of them means that video is probably
  21. corrupted.
  22. scene detection with ffprobe
  23. https://lists.ffmpeg.org/pipermail/ffmpeg-user/2012-November/011101.html
  24. ffprobe -show_frames -of compact=p=0 -f lavfi \
  25. "movie=$F,select=gt(scene\,.8)" | gsed -r \
  26. 's/.*pkt_pts_time=([0-9.]{8,})\|.*/\1/' >> scenes-0.8
  27. that doesn't work though.
  28. https://pypi.python.org/pypi/PySceneDetect/
  29. https://pyscenedetect.readthedocs.io/en/latest/features/
  30. """
  31. import os
  32. import glob
  33. import json
  34. import shutil
  35. import argparse
  36. from math import floor
  37. from subprocess import check_output, run, CalledProcessError
  38. from showroom.settings import settings as config
  39. # known resolutions:
  40. # 352x198
  41. # 640x360
  42. # 704x396
  43. # 960x540 (march kimi dare episodes)
  44. # 1280x720 (a single kimi dare episode)
  45. # 1920x1080 (ann vr)
  46. # GOOD_HEIGHTS = (180, 198, 270, 360, 396, 720, 1080)
  47. BAD_HEIGHTS = (540,)
  48. # bitrate for upscaled videos, this is actually a bit too high
  49. DEF_BITRATE = '300k'
  50. # old version
  51. """
  52. def create_concat_files(target_dir, target_ext):
  53. oldcwd = os.getcwd()
  54. os.chdir(target_dir)
  55. # TODO: use ffprobe to separate files with incompatible resolutions and those with a gap greater than ~10 minutes
  56. files = sorted(glob.glob('{}/*.{}'.format(target_dir, target_ext)))
  57. member_dict = {}
  58. for file in files:
  59. member_name = file.rsplit(' ', 1)[0]
  60. if member_name not in member_dict:
  61. member_dict[member_name] = []
  62. member_dict[member_name].append(file)
  63. concat_files = {}
  64. for key in member_dict.keys():
  65. filename = key +' ' + member_dict[key][0].rsplit(' ', 1)[1][:4] + '.mp4.concat'
  66. text = ""
  67. for item in member_dict[key]:
  68. text += "file '" + item + "'\n"
  69. concat_files.update({filename:text})
  70. for key in concat_files.keys():
  71. with open(key, 'w', encoding='utf8') as outfp:
  72. _ = outfp.write(concat_files[key])
  73. os.chdir(oldcwd)
  74. """
  75. """
  76. {
  77. "member1" : [
  78. video1,
  79. video2,
  80. video3,
  81. video4
  82. ],
  83. "member2" : [
  84. ]
  85. }
  86. video: {
  87. "start_time" : parsed from file name; in seconds,
  88. "duration" : parsed from ffprobe,
  89. "height" : parsed from ffprobe,
  90. "valid" : true or false (false for stuff with no video content),
  91. "file" : location of file
  92. }
  93. "duration"
  94. "height"
  95. sample ffprobe output:
  96. {
  97. "programs": [
  98. ],
  99. "streams": [
  100. {
  101. "height": 198,
  102. "duration": "499.654000"
  103. }
  104. ]
  105. }
  106. for member in members:
  107. """
  108. # TODO: set this in some other module, perhaps constants
  109. if os.name == 'nt':
  110. _iswin32 = True
  111. else:
  112. _iswin32 = False
  113. _ffmpeg = config.ffmpeg.path
  114. _ffprobe = os.path.join(os.path.split(_ffmpeg)[0], 'ffprobe')
  115. def probe_file(filename):
  116. if _iswin32:
  117. extra_args = dict(shell=True)
  118. else:
  119. extra_args = dict()
  120. # So, I need to get both audio and video stream data
  121. # Simplest way to do that is to fetch all the streams
  122. # and map the audio stream to an audio key and the video to a video key etc.
  123. try:
  124. data = check_output(
  125. [
  126. _ffprobe,
  127. '-loglevel', '16',
  128. # '-show_entries', 'stream={}'.format(','.join(streams)),
  129. # '-select_streams', 'v,a',
  130. '-show_streams',
  131. '-i', filename,
  132. '-of', 'json'
  133. ],
  134. universal_newlines=True,
  135. **extra_args
  136. )
  137. except CalledProcessError:
  138. return None
  139. try:
  140. streams = json.loads(data)['streams']
  141. except KeyError:
  142. # TODO: log this
  143. return None
  144. results = {}
  145. for stream in streams:
  146. if stream['codec_type'] == 'video':
  147. if 'video' in results:
  148. # TODO: log this
  149. print('Found multiple video streams in {}, ignoring extra stream info'.format(filename))
  150. else:
  151. results['video'] = stream
  152. elif stream['codec_type'] == 'audio':
  153. if 'audio' in results:
  154. print('Found multiple audio streams in {}, ignoring extra stream info'.format(filename))
  155. else:
  156. results['audio'] = stream
  157. else:
  158. print('Found unknown stream type in {}: {}'.format(filename, stream['codec_type']))
  159. if len(results) == 1:
  160. print('Found only one stream in', filename)
  161. print(json.dumps(results, indent=2))
  162. return results
  163. def get_source_videos(target_ext):
  164. # TODO: properly support ts -> mp4 conversions
  165. # going from ts -> mp4 requires more logic than this (in particular, need to check video and audio codecs)
  166. # also, completed files should be excluded from this, no?
  167. files = sorted(glob.glob('*.{}'.format(target_ext)))
  168. if target_ext == 'mp4' and len(files) == 0:
  169. # kludge to support ts source files
  170. files = sorted(glob.glob('*.{}'.format('ts')))
  171. return files
  172. def resize_videos(target_dir, target_ext, copytb=1, target_bitrate='300k'):
  173. # TODO: scale up to the tallest video in a "stream"
  174. oldcwd = os.getcwd()
  175. os.chdir(target_dir)
  176. files = get_source_videos(target_ext)
  177. members = set()
  178. to_resize = []
  179. for file in files:
  180. results = probe_file(file)
  181. if results:
  182. if float(results['video']['duration']) >= 0.001 and int(results['video']['height']) == 198:
  183. to_resize.append(file)
  184. if len(to_resize) > 0:
  185. os.makedirs('resized', exist_ok=True)
  186. else:
  187. os.chdir(oldcwd)
  188. return
  189. codecs = {'mp4': 'libx264', 'webm': 'libvpx'}
  190. video_codec = codecs[target_ext]
  191. # the concat demuxer is not sufficient to merge files resized this way
  192. for file in to_resize:
  193. low_res_file = 'resized/' + file.replace('.' + target_ext, '_198p.' + target_ext)
  194. shutil.move(file, low_res_file)
  195. members.add(file.rsplit(' ', 1)[0])
  196. run([_ffmpeg,
  197. '-copytb', str(copytb),
  198. '-hide_banner', '-nostats',
  199. '-i', low_res_file,
  200. '-c:v', video_codec,
  201. # '-maxrate', str(target_bitrate),
  202. # '-bufsize', BUFSIZE,
  203. # '-crf', '18',
  204. '-vsync', '0', # leave timestamps unchanged
  205. '-refs', '1', # single reference frame, like the original
  206. '-copyts',
  207. '-b:v', target_bitrate,
  208. '-vf', 'scale=-1:360', # 'scale=-1:360,mpdecimate',
  209. '-c:a', 'copy', file])
  210. with open('resized.json', 'w', encoding='utf8') as outfp:
  211. json.dump(sorted(members), outfp, indent=2, ensure_ascii=False)
  212. os.chdir(oldcwd)
  213. def generate_concat_files(target_dir, target_ext, max_gap):
  214. oldcwd = os.getcwd()
  215. os.chdir(target_dir)
  216. max_gap = float(max_gap)
  217. try:
  218. with open('resized.json', encoding='utf8') as infp:
  219. resized_members = tuple(json.load(infp))
  220. except FileNotFoundError:
  221. resized_members = ()
  222. # TODO: deal with leftovers (from after 24:00)
  223. files = get_source_videos(target_ext)
  224. def get_start_seconds(file):
  225. time_str = file.rsplit(' ', 1)[1].split('.')[0]
  226. hours, minutes, seconds = int(time_str[:2]), int(time_str[2:4]), int(time_str[4:6])
  227. return float(hours * 60 * 60 + minutes * 60 + seconds)
  228. def get_start_hhmm(seconds):
  229. hours = seconds / (60 * 60)
  230. minutes = (hours - floor(hours)) * 60
  231. return '{:02d}{:02d}'.format(floor(hours), floor(minutes))
  232. member_dict = {}
  233. for file in files:
  234. streams = probe_file(file)
  235. if not streams:
  236. continue
  237. member_name = file.rsplit(' ', 1)[0]
  238. if member_name not in member_dict:
  239. member_dict[member_name] = []
  240. new_video = {"start_time": get_start_seconds(file)}
  241. # try:
  242. # stream = json.loads(results)['streams'][0]
  243. # except IndexError:
  244. # new_video['valid'] = False
  245. # print('failed to load ffprobe results')
  246. # print(results)
  247. # else:
  248. new_video['file'] = file
  249. new_video['duration'] = float(streams['video']['duration'])
  250. new_video['bit_rate'] = int(streams['video']['bit_rate'])
  251. new_video['height'] = int(streams['video']['height'])
  252. new_video['audio_sample_rate'] = int(streams['audio']['sample_rate'])
  253. if new_video['duration'] >= 0.001:
  254. if new_video['height'] in BAD_HEIGHTS and new_video['duration'] < 90 and new_video['bit_rate'] < 10000:
  255. new_video['valid'] = False
  256. else:
  257. new_video['valid'] = True
  258. else:
  259. new_video['valid'] = False
  260. if new_video['valid']:
  261. member_dict[member_name].append(new_video)
  262. concat_files = {}
  263. def new_concat_file(member, first_video):
  264. # decide between .proto and .concat based on presence of member_name in resized.json
  265. if member in resized_members:
  266. info_ext = 'proto'
  267. else:
  268. info_ext = 'concat'
  269. filename = '{} {}.{}.{}'.format(member, get_start_hhmm(first_video['start_time']), target_ext, info_ext)
  270. info = {'files': []}
  271. info['height'] = first_video['height']
  272. info['audio_sample_rate'] = first_video['audio_sample_rate']
  273. info['last_time'] = first_video['start_time'] + first_video['duration']
  274. info['files'].append(first_video['file'])
  275. return filename, info
  276. for member in member_dict.keys():
  277. """
  278. file_specifier (name + hhmm) : {
  279. height : 360 or 198,
  280. last_time : start_time + duration of most recently processed video
  281. files: [
  282. list of files
  283. ]
  284. }
  285. """
  286. try:
  287. filename, working = new_concat_file(member, member_dict[member][0])
  288. except IndexError:
  289. # no valid videos
  290. print('Failed to read videos for {}'.format(member))
  291. print(member_dict)
  292. continue
  293. for item in member_dict[member][1:]:
  294. if (item['start_time'] >= working['last_time'] + max_gap
  295. or item['height'] != working['height']
  296. or item['audio_sample_rate'] != working['audio_sample_rate']):
  297. if filename in working:
  298. # This needs to be dealt with by hand for now
  299. print('Tried to add duplicate concat file name: {}'.format(filename))
  300. raise FileExistsError
  301. concat_files[filename] = working
  302. filename, working = new_concat_file(member, item)
  303. else:
  304. if item['start_time'] < working['last_time'] - 5.0:
  305. print('{} overlaps {}'.format(item['file'], working['files'][-1]))
  306. # these have to be dealt with manually
  307. working['files'].append(item['file'])
  308. working['last_time'] = item['start_time'] + item['duration']
  309. concat_files[filename] = working
  310. for file in concat_files.keys():
  311. # skip singleton videos
  312. # if len(concat_files[file]['files']) == 1:
  313. # continue
  314. text = ""
  315. for item in concat_files[file]['files']:
  316. text += "file '" + item + "'\n"
  317. with open(file, 'w', encoding='utf8') as outfp:
  318. outfp.write(text)
  319. os.chdir(oldcwd)
  320. """
  321. #!/bin/bash
  322. # for f in ./*.mp4; do echo "file '$f'" >> mylist.txt; done
  323. # for f in ./*.concat; do echo "$( basename $f )"; done
  324. # for f in ./*.concat; do g="\"$( basename "$f" .mp4)\""; echo $f; echo $g; done
  325. # echo "\"$( basename ./160612\ Showroom\ -\ AKB48\ Team\ K\ Tano\ Yuka\ 124028.mp4 .mp4)\""
  326. for f in ./*.concat; do
  327. g=$( basename "$f" .concat);
  328. #ffmpeg -copytb 1 -f concat -i "$f" -vf "pad=width=640:height=360:(ow-iw)/2:(oh-ih)/2:color=black" -movflags +faststart "$g";
  329. ffmpeg -copytb 1 -f concat -i "$f" -movflags +faststart -c copy "$g";
  330. done
  331. """
  332. def merge_videos(target_dir, output_dir, copyts=False, copytb=1):
  333. oldcwd = os.getcwd()
  334. os.chdir(target_dir)
  335. os.makedirs(output_dir, exist_ok=True)
  336. bTempFiles = False
  337. for ext in ('concat', 'proto'):
  338. for concat_file in glob.glob('*.' + ext):
  339. outfile = '{}/{}'.format(output_dir, os.path.splitext(concat_file)[0])
  340. instructions = ['-hide_banner', '-nostats',
  341. # '-report',
  342. # 'file=logs/concat-{}.log:level=40'.format(os.path.splitext(concat_file)[0]),
  343. '-copytb', str(copytb)]
  344. with open(concat_file, encoding='utf8') as infp:
  345. data = infp.read()
  346. if data.count('file \'') == 0:
  347. print("Empty concat file: {}".format(concat_file))
  348. continue
  349. if data.count('file \'') == 1:
  350. src = data[5:].strip('\'\n./')
  351. instructions.extend(['-i', src])
  352. elif ext == 'concat':
  353. instructions.extend(['-auto_convert', '1', '-f', 'concat', '-safe', '0', '-i', concat_file])
  354. # ts source kludge
  355. with open(concat_file, encoding='utf8') as infp:
  356. for line in concat_file:
  357. if line.strip().endswith('.ts\''):
  358. instructions.extend(['-bsf:a', 'aac_adtstoasc'])
  359. break
  360. else:
  361. break
  362. else:
  363. os.makedirs('temp', exist_ok=True)
  364. src_videos = []
  365. for line in data.split('\n'):
  366. if line.strip():
  367. src_videos.append(line.strip()[6:-1]) # skip blank lines
  368. bTempFiles = True
  369. temp_videos = []
  370. for video in src_videos:
  371. tempfile = 'temp/' + video + '.ts'
  372. run([_ffmpeg,
  373. '-i', video,
  374. '-c', 'copy',
  375. '-bsf:v', 'h264_mp4toannexb',
  376. '-f', 'mpegts',
  377. tempfile])
  378. temp_videos.append(tempfile)
  379. videostring = 'concat:' + '|'.join(temp_videos)
  380. instructions.extend(['-i', videostring, '-bsf:a', 'aac_adtstoasc'])
  381. if copyts:
  382. instructions.append('-copyts')
  383. run([_ffmpeg,
  384. *instructions,
  385. '-movflags', '+faststart',
  386. '-c', 'copy', outfile])
  387. if bTempFiles:
  388. for tempfile in glob.glob('temp/*.ts'):
  389. os.remove(tempfile)
  390. bTempFiles = False
  391. os.chdir(oldcwd)
  392. if __name__ == '__main__':
  393. parser = argparse.ArgumentParser(
  394. description="Generates concat files for merging. Creates separate videos for separate broadcasts and \
  395. incompatible resolutions, ignores very broken videos.",
  396. epilog="When merging, watch the output for \"Non-monotonous DTS in output stream\" -- A few of these are \
  397. harmless but a wall of them means that video is probably corrupted.")
  398. parser.add_argument("--resize", action='store_true',
  399. help='!!EXPERIMENTAL!! resizes 198p videos in TARGET_DIR to 360p, '
  400. 'saves the old videos in a new "resized" subdirectory. Only supports h264 (MP4) and vpx (WEBM)')
  401. parser.add_argument("--generate", action='store_true', help='generates concat files in TARGET_DIR, runs by default')
  402. parser.add_argument("--merge", action='store_true',
  403. help='merges videos in TARGET_DIR according to existing concat files')
  404. parser.add_argument("--both", action='store_true', help='both generates concat files and merges videos')
  405. parser.add_argument("--aggressive", action='store_true', help='!!EXPERIMENTAL!! resizes, generates, and merges')
  406. parser.add_argument("target_dir", nargs='?', default='.',
  407. help='Optional. defaults to the current working directory.', metavar='TARGET_DIR')
  408. parser.add_argument("--max-gap", type=float, default=600.0,
  409. help='maximum gap between merged videos, in seconds. anything larger is treated as a separate \
  410. broadcast. default = 600.0')
  411. parser.add_argument("-e", dest='ext', default='mp4', help='extension to merge, defaults to mp4')
  412. parser.add_argument("--copytb", type=int, choices=[-1, 0, 1], default=1,
  413. help='it may be useful to try setting this to 0 or -1 if a video has timing issues.'
  414. 'Defaults to %(default)s')
  415. parser.add_argument('--copyts', action='store_true', help='Try setting this if there\'s a lot of DTS adjustment. '
  416. 'Only affects merges.')
  417. parser.add_argument("--output-dir", "-o", dest='output_dir', type=str, default='.',
  418. help='Optional, defaults to target directory. Note that relative paths will be relative to \
  419. the target directory, not the current working directory', metavar='OUTPUT_DIR')
  420. parser.add_argument("--bitrate", "-b", type=str, default=DEF_BITRATE,
  421. help='Bitrate for resizing. Defaults to %(default)s')
  422. parser.add_argument("--use-concat-protocol", action="store_true",
  423. help="!!EXPERIMENTAL!! Uses ffmpeg's concat protocol"
  424. " instead of the concat demuxer to allow merging videos with differing timebases (as result from"
  425. " --resize). Creates temporary intermediate .ts files. Used automatically with --aggressive")
  426. args = parser.parse_args()
  427. if args.resize or args.aggressive:
  428. resize_videos(target_dir=args.target_dir, target_ext=args.ext,
  429. copytb=args.copytb, target_bitrate=args.bitrate)
  430. if args.generate or args.both or args.aggressive:
  431. generate_concat_files(target_dir=args.target_dir, target_ext=args.ext,
  432. max_gap=args.max_gap)
  433. if (args.merge or args.both) and not args.use_concat_protocol:
  434. merge_videos(target_dir=args.target_dir, output_dir=args.output_dir, copyts=args.copyts,
  435. copytb=args.copytb)
  436. if args.aggressive or ((args.merge or args.both) and args.use_concat_protocol):
  437. merge_videos(target_dir=args.target_dir, output_dir=args.output_dir, copyts=args.copyts,
  438. copytb=args.copytb)
  439. # 2017-02-02
  440. # Making Aggressive Concat saner
  441. # resize creates a json
  442. # this lists all the names that have been resized. it kind of matters which videos if there was more than one broadcast in a given day,
  443. # but i'm not going to worry about that right now
  444. #