def create_archive_org_metadata_from_youtubedl_meta(vid_meta): """ Create an archive.org from youtubedl-generated metadata. :param vid_meta: A dict containing youtubedl-generated metadata. :return: A dict containing metadata to be used by internetarchive library. """ title = '%s' % (vid_meta['title']) videourl = vid_meta['webpage_url'] collection = TubeUp.determine_collection_type(videourl) # Some video services don't tell you the uploader, # use our program's name in that case. try: if vid_meta['extractor_key'] == 'TwitchClips' and 'creator' in vid_meta and vid_meta['creator']: uploader = vid_meta['creator'] elif 'uploader' in vid_meta and vid_meta['uploader']: uploader = vid_meta['uploader'] elif 'uploader_url' in vid_meta and vid_meta['uploader_url']: uploader = vid_meta['uploader_url'] else: uploader = 'tubeup.py' except TypeError: # apparently uploader is null as well uploader = 'tubeup.py' uploader_url = vid_meta.get('uploader_url', videourl) try: # some videos don't give an upload date d = datetime.strptime(vid_meta['upload_date'], '%Y%m%d') upload_date = d.isoformat().split('T')[0] upload_year = upload_date[:4] # 20150614 -> 2015 except (KeyError, TypeError): # Use current date and time as default values upload_date = time.strftime("%Y-%m-%d") upload_year = time.strftime("%Y") # load up tags into an IA compatible semicolon-separated string # example: Youtube;video; tags_string = '%s;video;' % vid_meta['extractor_key'] if 'categories' in vid_meta: # add categories as tags as well, if they exist try: for category in vid_meta['categories']: tags_string += '%s;' % category except Exception: print("No categories found.") if 'tags' in vid_meta: # some video services don't have tags try: if 'tags' in vid_meta is None: tags_string += '%s;' % vid_meta['id'] tags_string += '%s;' % 'video' else: for tag in vid_meta['tags']: tags_string += '%s;' % tag except Exception: print("Unable to process tags successfully.") # license licenseurl = TubeUp.determine_licenseurl(vid_meta) # if there is no description don't upload the empty .description file description_text = vid_meta.get('description', '') if description_text is None: description_text = '' # archive.org does not display raw newlines description_text = re.sub('\r?\n', '
', description_text) description = ('{0}

Source: {2}' '
Uploader: {4}').format( description_text, videourl, videourl, uploader_url, uploader) metadata = dict( mediatype=('audio' if collection == 'opensource_audio' else 'movies'), creator=uploader, collection=collection, title=title, description=description, date=upload_date, year=upload_year, subject=tags_string, originalurl=videourl, licenseurl=licenseurl, # Set 'scanner' metadata pair to allow tracking of TubeUp # powered uploads, per request from archive.org scanner='TubeUp Video Stream Mirroring Application {}'.format(__version__)) return metadata