iagitup.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import unicode_literals
  4. __author__ = "Giovanni Damiola"
  5. __copyright__ = "Copyright 2018, Giovanni Damiola"
  6. __main_name__ = 'iagitup'
  7. __license__ = 'GPLv3'
  8. __version__ = "v1.6.2"
  9. import os
  10. import sys
  11. import subprocess
  12. import shutil
  13. import json
  14. import internetarchive
  15. import internetarchive.cli
  16. import git
  17. import requests
  18. from datetime import datetime
  19. from markdown2 import markdown_path
  20. def mkdirs(path):
  21. """Make directory, if it doesn't exist."""
  22. if not os.path.exists(path):
  23. os.makedirs(path)
  24. # download the github repo
  25. def repo_download(github_repo_url):
  26. """Downloads a GitHub repo locally.
  27. arguments:
  28. github_repo_url -- the GitHub repo home url
  29. returns:
  30. gh_repo_data, repo_folder - the repo details and the local repo folder
  31. """
  32. download_dir = os.path.expanduser('~/.iagitup/downloads')
  33. mkdirs(os.path.expanduser('~/.iagitup'))
  34. mkdirs(download_dir)
  35. # parsing url to initialize the github api rul and get the repo_data
  36. gh_user, gh_repo = github_repo_url.split('/')[3:]
  37. gh_api_url = "https://api.github.com/repos/{}/{}".format(gh_user, gh_repo)
  38. # delete the temp directory if exists
  39. repo_folder = os.path.join(download_dir, gh_repo)
  40. if os.path.exists(repo_folder):
  41. shutil.rmtree(repo_folder)
  42. # get the data from GitHub api
  43. req = requests.get(gh_api_url)
  44. if req.status_code == 200:
  45. gh_repo_data = json.loads(req.text)
  46. # download the repo from github
  47. repo_folder = os.path.join(download_dir, gh_repo)
  48. try:
  49. git.Git().clone(gh_repo_data['clone_url'], repo_folder)
  50. except Exception as e:
  51. print('Error occurred while downloading: {}'.format(github_repo_url))
  52. print(str(e))
  53. exit(1)
  54. else:
  55. raise ValueError('Error occurred while downloading: {}'.format(github_repo_url))
  56. return gh_repo_data, repo_folder
  57. def get_description_from_readme(gh_repo_folder):
  58. """From the GitHub repo returns html description from the README.md or readme.txt
  59. arguments:
  60. gh_repo_folder -- the repo local folder path
  61. returns:
  62. description -- html description
  63. """
  64. path = os.path.join(gh_repo_folder, 'README.md')
  65. path3 = os.path.join(gh_repo_folder, 'readme.md')
  66. path2 = os.path.join(gh_repo_folder, 'readme.txt')
  67. description = ''
  68. if os.path.exists(path):
  69. description = markdown_path(path)
  70. description = description.replace('\n', '')
  71. elif os.path.exists(path3):
  72. description = markdown_path(path3)
  73. description = description.replace('\n', '')
  74. elif os.path.exists(path2):
  75. with open(path2, 'r') as f:
  76. description = f.readlines()
  77. description =' '.join(description)
  78. return description
  79. def create_bundle(gh_repo_folder, repo_name):
  80. """creates the gir repository bundle to upload
  81. arguments:
  82. gh_repo_folder -- the repo local folder path
  83. repo_name -- the repo name
  84. returns:
  85. bundle_path -- the path to the bundle file
  86. """
  87. print(gh_repo_folder, repo_name)
  88. if os.path.exists(gh_repo_folder):
  89. main_pwd = os.getcwd()
  90. os.chdir(gh_repo_folder)
  91. bundle_name = '{}.bundle'.format(repo_name)
  92. subprocess.check_call(['git', 'bundle', 'create', bundle_name, '--all'])
  93. bundle_path = os.path.join(gh_repo_folder, bundle_name)
  94. os.chdir(main_pwd)
  95. else:
  96. raise ValueError('Error creating bundle, directory does not exist: {}'.format(gh_repo_folder))
  97. return bundle_path
  98. def upload_ia(gh_repo_folder, gh_repo_data, custom_meta=None):
  99. """Uploads the bundle to the Internet Archive.
  100. arguments:
  101. gh_repo_folder -- path to the bundle
  102. gh_repo_data -- repository metadata
  103. custom_meta -- custom metadata
  104. returns:
  105. itemname -- Internet Archive item identifier
  106. meta -- the item metadata
  107. bundle_filename -- the git bundle filename
  108. """
  109. # formatting some dates string
  110. d = datetime.strptime(gh_repo_data['created_at'], '%Y-%m-%dT%H:%M:%SZ')
  111. pushed = datetime.strptime(gh_repo_data['pushed_at'], '%Y-%m-%dT%H:%M:%SZ')
  112. pushed_date = pushed.strftime('%Y-%m-%d_%H-%M-%S')
  113. raw_pushed_date = pushed.strftime('%Y-%m-%d %H:%M:%S')
  114. date = pushed.strftime('%Y-%m-%d')
  115. year = pushed.year
  116. # preparing some names
  117. repo_name = gh_repo_data['full_name'].replace('/', '-')
  118. originalurl = gh_repo_data['html_url']
  119. bundle_filename = '{}_-_{}'.format(repo_name, pushed_date)
  120. # preparing some description
  121. description_footer = 'To restore the repository download the bundle <pre><code>wget https://archive.org/download/github.com-{0}/{0}.bundle</code></pre> and run: <pre><code> git clone {0}.bundle </code></pre>'.format(bundle_filename)
  122. description = '<br/> {0} <br/><br/> {1} <br/>{2}'.format(gh_repo_data['description'], get_description_from_readme(gh_repo_folder), description_footer)
  123. # preparing uploader metadata
  124. uploader_url = gh_repo_data['owner']['html_url']
  125. uploader_name = gh_repo_data['owner']['login']
  126. # let's grab the avatar too
  127. uploader_avatar_url = gh_repo_data['owner']['avatar_url']
  128. pic = requests.get(uploader_avatar_url, stream = True)
  129. uploader_avatar_path = os.path.join(gh_repo_folder, 'cover.jpg')
  130. with open(uploader_avatar_path, 'wb') as f:
  131. pic.raw.decode_content = True
  132. shutil.copyfileobj(pic.raw, f)
  133. # some Internet Archive Metadata
  134. collection = 'open_source_software'
  135. mediatype = 'software'
  136. subject = 'GitHub;code;software;git'
  137. uploader = '{} - {}'.format(__main_name__, __version__)
  138. description = u'{0} <br/><br/>Source: <a href="{1}">{2}</a><br/>Uploader: <a href="{3}">{4}</a><br/>Upload date: {5}'.format(description, originalurl, originalurl, uploader_url, uploader_name, date)
  139. ## Creating bundle file of the git repo
  140. try:
  141. bundle_file = create_bundle(gh_repo_folder, bundle_filename)
  142. except ValueError as err:
  143. print(str(err))
  144. shutil.rmtree(gh_repo_folder)
  145. exit(1)
  146. # inizializing the internet archive item name
  147. # here we set the ia identifier
  148. itemname = '%s-%s_-_%s' % ('github.com', repo_name, pushed_date)
  149. title = '%s' % (itemname)
  150. #initializing the main metadata
  151. meta = dict(mediatype=mediatype, creator=uploader_name, collection=collection, title=title, year=year, date=date, \
  152. subject=subject, uploaded_with=uploader, originalurl=originalurl, pushed_date=raw_pushed_date, description=description)
  153. # override default metadata with any supplemental metadata provided.
  154. if custom_meta != None:
  155. meta.update(custom_meta)
  156. try:
  157. # upload the item to the Internet Archive
  158. print("Creating item on Internet Archive: %s" % meta['title'])
  159. item = internetarchive.get_item(itemname)
  160. # checking if the item already exists:
  161. if not item.exists:
  162. print("Uploading file to the internet archive: %s" % bundle_file)
  163. item.upload(bundle_file, metadata=meta, retries=9001, request_kwargs=dict(timeout=9001), delete=False)
  164. # upload the item to the Internet Archive
  165. print("Uploading avatar...")
  166. item.upload(os.path.join(gh_repo_folder, 'cover.jpg'), retries=9001, request_kwargs=dict(timeout=9001), delete=True)
  167. else:
  168. print("\nSTOP: The same repository seems already archived.")
  169. print("---->> Archived repository URL: \n \thttps://archive.org/details/%s" % itemname)
  170. print("---->> Archived git bundle file: \n \thttps://archive.org/download/{0}/{1}.bundle \n\n".format(itemname, bundle_filename))
  171. shutil.rmtree(gh_repo_folder)
  172. exit(0)
  173. except Exception as e:
  174. print(str(e))
  175. shutil.rmtree(gh_repo_folder)
  176. exit(1)
  177. # return item identifier and metadata as output
  178. return itemname, meta, bundle_filename
  179. def check_ia_credentials():
  180. """checks if the internet archive credentials are present.
  181. returns:
  182. exit(1) if there are no local credentialas.
  183. """
  184. filename = os.path.expanduser('~/.ia')
  185. filename2 = os.path.expanduser('~/.config/ia.ini')
  186. if not os.path.exists(filename) and not os.path.exists(filename2):
  187. msg = '\nWARNING - It looks like you need to configure your Internet Archive account!\n \
  188. for registation go to https://archive.org/account/login.createaccount.php\n'
  189. print(msg)
  190. try:
  191. noauth = subprocess.call(["ia", "configure"])
  192. if noauth:
  193. exit(1)
  194. except Exception as e:
  195. msg = 'Something went wrong trying to configure your internet archive account.\n Error - {}'.format(str(e))
  196. exit(1)