123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- def create_archive_org_metadata_from_youtubedl_meta(vid_meta):
- """
- Create an archive.org from youtubedl-generated metadata.
- :param vid_meta: A dict containing youtubedl-generated metadata.
- :return: A dict containing metadata to be used by
- internetarchive library.
- """
- title = '%s' % (vid_meta['title'])
- videourl = vid_meta['webpage_url']
- collection = TubeUp.determine_collection_type(videourl)
- # Some video services don't tell you the uploader,
- # use our program's name in that case.
- try:
- if vid_meta['extractor_key'] == 'TwitchClips' and 'creator' in vid_meta and vid_meta['creator']:
- uploader = vid_meta['creator']
- elif 'uploader' in vid_meta and vid_meta['uploader']:
- uploader = vid_meta['uploader']
- elif 'uploader_url' in vid_meta and vid_meta['uploader_url']:
- uploader = vid_meta['uploader_url']
- else:
- uploader = 'tubeup.py'
- except TypeError: # apparently uploader is null as well
- uploader = 'tubeup.py'
- uploader_url = vid_meta.get('uploader_url', videourl)
- try: # some videos don't give an upload date
- d = datetime.strptime(vid_meta['upload_date'], '%Y%m%d')
- upload_date = d.isoformat().split('T')[0]
- upload_year = upload_date[:4] # 20150614 -> 2015
- except (KeyError, TypeError):
- # Use current date and time as default values
- upload_date = time.strftime("%Y-%m-%d")
- upload_year = time.strftime("%Y")
- # load up tags into an IA compatible semicolon-separated string
- # example: Youtube;video;
- tags_string = '%s;video;' % vid_meta['extractor_key']
- if 'categories' in vid_meta:
- # add categories as tags as well, if they exist
- try:
- for category in vid_meta['categories']:
- tags_string += '%s;' % category
- except Exception:
- print("No categories found.")
- if 'tags' in vid_meta: # some video services don't have tags
- try:
- if 'tags' in vid_meta is None:
- tags_string += '%s;' % vid_meta['id']
- tags_string += '%s;' % 'video'
- else:
- for tag in vid_meta['tags']:
- tags_string += '%s;' % tag
- except Exception:
- print("Unable to process tags successfully.")
- # license
- licenseurl = TubeUp.determine_licenseurl(vid_meta)
- # if there is no description don't upload the empty .description file
- description_text = vid_meta.get('description', '')
- if description_text is None:
- description_text = ''
- # archive.org does not display raw newlines
- description_text = re.sub('\r?\n', '<br>', description_text)
- description = ('{0} <br/><br/>Source: <a href="{1}">{2}</a>'
- '<br/>Uploader: <a href="{3}">{4}</a>').format(
- description_text, videourl, videourl, uploader_url, uploader)
- metadata = dict(
- mediatype=('audio' if collection == 'opensource_audio'
- else 'movies'),
- creator=uploader,
- collection=collection,
- title=title,
- description=description,
- date=upload_date,
- year=upload_year,
- subject=tags_string,
- originalurl=videourl,
- licenseurl=licenseurl,
- # Set 'scanner' metadata pair to allow tracking of TubeUp
- # powered uploads, per request from archive.org
- scanner='TubeUp Video Stream Mirroring Application {}'.format(__version__))
- return metadata
|