TubeUp_2.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. def create_archive_org_metadata_from_youtubedl_meta(vid_meta):
  2. """
  3. Create an archive.org from youtubedl-generated metadata.
  4. :param vid_meta: A dict containing youtubedl-generated metadata.
  5. :return: A dict containing metadata to be used by
  6. internetarchive library.
  7. """
  8. title = '%s' % (vid_meta['title'])
  9. videourl = vid_meta['webpage_url']
  10. collection = TubeUp.determine_collection_type(videourl)
  11. # Some video services don't tell you the uploader,
  12. # use our program's name in that case.
  13. try:
  14. if vid_meta['extractor_key'] == 'TwitchClips' and 'creator' in vid_meta and vid_meta['creator']:
  15. uploader = vid_meta['creator']
  16. elif 'uploader' in vid_meta and vid_meta['uploader']:
  17. uploader = vid_meta['uploader']
  18. elif 'uploader_url' in vid_meta and vid_meta['uploader_url']:
  19. uploader = vid_meta['uploader_url']
  20. else:
  21. uploader = 'tubeup.py'
  22. except TypeError: # apparently uploader is null as well
  23. uploader = 'tubeup.py'
  24. uploader_url = vid_meta.get('uploader_url', videourl)
  25. try: # some videos don't give an upload date
  26. d = datetime.strptime(vid_meta['upload_date'], '%Y%m%d')
  27. upload_date = d.isoformat().split('T')[0]
  28. upload_year = upload_date[:4] # 20150614 -> 2015
  29. except (KeyError, TypeError):
  30. # Use current date and time as default values
  31. upload_date = time.strftime("%Y-%m-%d")
  32. upload_year = time.strftime("%Y")
  33. # load up tags into an IA compatible semicolon-separated string
  34. # example: Youtube;video;
  35. tags_string = '%s;video;' % vid_meta['extractor_key']
  36. if 'categories' in vid_meta:
  37. # add categories as tags as well, if they exist
  38. try:
  39. for category in vid_meta['categories']:
  40. tags_string += '%s;' % category
  41. except Exception:
  42. print("No categories found.")
  43. if 'tags' in vid_meta: # some video services don't have tags
  44. try:
  45. if 'tags' in vid_meta is None:
  46. tags_string += '%s;' % vid_meta['id']
  47. tags_string += '%s;' % 'video'
  48. else:
  49. for tag in vid_meta['tags']:
  50. tags_string += '%s;' % tag
  51. except Exception:
  52. print("Unable to process tags successfully.")
  53. # license
  54. licenseurl = TubeUp.determine_licenseurl(vid_meta)
  55. # if there is no description don't upload the empty .description file
  56. description_text = vid_meta.get('description', '')
  57. if description_text is None:
  58. description_text = ''
  59. # archive.org does not display raw newlines
  60. description_text = re.sub('\r?\n', '<br>', description_text)
  61. description = ('{0} <br/><br/>Source: <a href="{1}">{2}</a>'
  62. '<br/>Uploader: <a href="{3}">{4}</a>').format(
  63. description_text, videourl, videourl, uploader_url, uploader)
  64. metadata = dict(
  65. mediatype=('audio' if collection == 'opensource_audio'
  66. else 'movies'),
  67. creator=uploader,
  68. collection=collection,
  69. title=title,
  70. description=description,
  71. date=upload_date,
  72. year=upload_year,
  73. subject=tags_string,
  74. originalurl=videourl,
  75. licenseurl=licenseurl,
  76. # Set 'scanner' metadata pair to allow tracking of TubeUp
  77. # powered uploads, per request from archive.org
  78. scanner='TubeUp Video Stream Mirroring Application {}'.format(__version__))
  79. return metadata