def create_archive_org_metadata_from_youtubedl_meta(vid_meta):
"""
Create an archive.org from youtubedl-generated metadata.
:param vid_meta: A dict containing youtubedl-generated metadata.
:return: A dict containing metadata to be used by
internetarchive library.
"""
title = '%s' % (vid_meta['title'])
videourl = vid_meta['webpage_url']
collection = TubeUp.determine_collection_type(videourl)
# Some video services don't tell you the uploader,
# use our program's name in that case.
try:
if vid_meta['extractor_key'] == 'TwitchClips' and 'creator' in vid_meta and vid_meta['creator']:
uploader = vid_meta['creator']
elif 'uploader' in vid_meta and vid_meta['uploader']:
uploader = vid_meta['uploader']
elif 'uploader_url' in vid_meta and vid_meta['uploader_url']:
uploader = vid_meta['uploader_url']
else:
uploader = 'tubeup.py'
except TypeError: # apparently uploader is null as well
uploader = 'tubeup.py'
uploader_url = vid_meta.get('uploader_url', videourl)
try: # some videos don't give an upload date
d = datetime.strptime(vid_meta['upload_date'], '%Y%m%d')
upload_date = d.isoformat().split('T')[0]
upload_year = upload_date[:4] # 20150614 -> 2015
except (KeyError, TypeError):
# Use current date and time as default values
upload_date = time.strftime("%Y-%m-%d")
upload_year = time.strftime("%Y")
# load up tags into an IA compatible semicolon-separated string
# example: Youtube;video;
tags_string = '%s;video;' % vid_meta['extractor_key']
if 'categories' in vid_meta:
# add categories as tags as well, if they exist
try:
for category in vid_meta['categories']:
tags_string += '%s;' % category
except Exception:
print("No categories found.")
if 'tags' in vid_meta: # some video services don't have tags
try:
if 'tags' in vid_meta is None:
tags_string += '%s;' % vid_meta['id']
tags_string += '%s;' % 'video'
else:
for tag in vid_meta['tags']:
tags_string += '%s;' % tag
except Exception:
print("Unable to process tags successfully.")
# license
licenseurl = TubeUp.determine_licenseurl(vid_meta)
# if there is no description don't upload the empty .description file
description_text = vid_meta.get('description', '')
if description_text is None:
description_text = ''
# archive.org does not display raw newlines
description_text = re.sub('\r?\n', '
', description_text)
description = ('{0}
Source: {2}'
'
Uploader: {4}').format(
description_text, videourl, videourl, uploader_url, uploader)
metadata = dict(
mediatype=('audio' if collection == 'opensource_audio'
else 'movies'),
creator=uploader,
collection=collection,
title=title,
description=description,
date=upload_date,
year=upload_year,
subject=tags_string,
originalurl=videourl,
licenseurl=licenseurl,
# Set 'scanner' metadata pair to allow tracking of TubeUp
# powered uploads, per request from archive.org
scanner='TubeUp Video Stream Mirroring Application {}'.format(__version__))
return metadata