archiver_3.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. import fnmatch
  2. import os
  3. import re
  4. import subprocess
  5. import sys
  6. import tarfile
  7. import shutil
  8. import glob
  9. import locale
  10. import logging
  11. import tempfile
  12. from TarSCM.helpers import Helpers
  13. try:
  14. from io import StringIO
  15. except:
  16. from StringIO import StringIO
  17. METADATA_PATTERN = re.compile(r'.*/\.(bzr|git|hg|svn)(\/.*|$)')
  18. class BaseArchive():
  19. def __init__(self):
  20. self.helpers = Helpers()
  21. self.archivefile = None
  22. self.metafile = None
  23. def extract_from_archive(self, repodir, files, outdir):
  24. """Extract all files directly outside of the archive.
  25. """
  26. if files is None:
  27. return
  28. for filename in files:
  29. path = os.path.join(repodir, filename)
  30. path_glob = glob.glob(path)
  31. if not path_glob:
  32. sys.exit("%s: No such file or directory" % path)
  33. for src in path_glob:
  34. r_src = os.path.realpath(src)
  35. if not r_src.startswith(repodir):
  36. sys.exit("%s: tries to escape the repository" % src)
  37. shutil.copy2(src, outdir)
  38. class ObsCpio(BaseArchive):
  39. def create_archive(self, scm_object, **kwargs):
  40. """Create an OBS cpio archive of repodir in destination directory.
  41. """
  42. basename = kwargs['basename']
  43. dstname = kwargs['dstname']
  44. version = kwargs['version']
  45. args = kwargs['cli']
  46. commit = scm_object.get_current_commit()
  47. package_metadata = args.package_meta
  48. (workdir, topdir) = os.path.split(scm_object.arch_dir)
  49. extension = 'obscpio'
  50. cwd = os.getcwd()
  51. os.chdir(workdir)
  52. archivefilename = os.path.join(args.outdir, dstname + '.' + extension)
  53. archivefile = open(archivefilename, "w")
  54. # detect reproducible support
  55. params = ['cpio', '--create', '--format=newc', '--owner', '0:0']
  56. chkcmd = "cpio --create --format=newc --reproducible "
  57. chkcmd += "</dev/null >/dev/null 2>&1"
  58. if os.system(chkcmd) == 0:
  59. params.append('--reproducible')
  60. proc = subprocess.Popen(
  61. params,
  62. shell = False,
  63. stdin = subprocess.PIPE,
  64. stdout = archivefile,
  65. stderr = subprocess.STDOUT
  66. )
  67. # transform glob patterns to regular expressions
  68. includes = ''
  69. excludes = r'$.'
  70. topdir_re = '(' + topdir + '/)('
  71. if args.include:
  72. incl_arr = [fnmatch.translate(x + '*') for x in args.include]
  73. match_list = r'|'.join(incl_arr)
  74. includes = topdir_re + match_list + ')'
  75. if args.exclude:
  76. excl_arr = [fnmatch.translate(x) for x in args.exclude]
  77. excludes = topdir_re + r'|'.join(excl_arr) + ')'
  78. # add topdir without filtering for now
  79. cpiolist = []
  80. for root, dirs, files in os.walk(topdir, topdown=False):
  81. # excludes
  82. dirs[:] = [os.path.join(root, d) for d in dirs]
  83. dirs[:] = [d for d in dirs if not re.match(excludes, d)]
  84. dirs[:] = [d for d in dirs if re.match(includes, d)]
  85. # exclude/include files
  86. files = [os.path.join(root, f) for f in files]
  87. files = [f for f in files if not re.match(excludes, f)]
  88. files = [f for f in files if re.match(includes, f)]
  89. for name in dirs:
  90. if not METADATA_PATTERN.match(name) or package_metadata:
  91. cpiolist.append(name)
  92. for name in files:
  93. if not METADATA_PATTERN.match(name) or package_metadata:
  94. cpiolist.append(name)
  95. tstamp = self.helpers.get_timestamp(scm_object, args, topdir)
  96. for name in sorted(cpiolist):
  97. try:
  98. os.utime(name, (tstamp, tstamp))
  99. except OSError:
  100. pass
  101. # bytes() break in python2 with a TypeError as it expects only 1
  102. # arg
  103. try:
  104. proc.stdin.write(name.encode('UTF-8', 'surrogateescape'))
  105. except (TypeError, UnicodeDecodeError):
  106. proc.stdin.write(name)
  107. proc.stdin.write(b"\n")
  108. proc.stdin.close()
  109. ret_code = proc.wait()
  110. if ret_code != 0:
  111. raise SystemExit("Creating the cpio archive failed!")
  112. archivefile.close()
  113. # write meta data
  114. infofile = os.path.join(args.outdir, basename + '.obsinfo')
  115. logging.debug("Writing to obsinfo file '%s'", infofile)
  116. metafile = open(infofile, "w")
  117. metafile.write("name: " + basename + "\n")
  118. metafile.write("version: " + version + "\n")
  119. metafile.write("mtime: " + str(tstamp) + "\n")
  120. if commit:
  121. metafile.write("commit: " + commit + "\n")
  122. metafile.close()
  123. self.archivefile = archivefile.name
  124. self.metafile = metafile.name
  125. os.chdir(cwd)
  126. class Tar(BaseArchive):
  127. def create_archive(self, scm_object, **kwargs):
  128. """Create a tarball of repodir in destination directory."""
  129. (workdir, topdir) = os.path.split(scm_object.arch_dir)
  130. args = kwargs['cli']
  131. outdir = args.outdir
  132. dstname = kwargs['dstname']
  133. extension = (args.extension or 'tar')
  134. exclude = args.exclude
  135. include = args.include
  136. package_metadata = args.package_meta
  137. timestamp = self.helpers.get_timestamp(
  138. scm_object,
  139. args,
  140. scm_object.clone_dir
  141. )
  142. incl_patterns = []
  143. excl_patterns = []
  144. for i in include:
  145. # for backward compatibility add a trailing '*' if i isn't a
  146. # pattern
  147. if fnmatch.translate(i) == fnmatch.translate(i + r''):
  148. i += r'*'
  149. pat = fnmatch.translate(os.path.join(topdir, i))
  150. incl_patterns.append(re.compile(pat))
  151. for exc in exclude:
  152. pat = fnmatch.translate(os.path.join(topdir, exc))
  153. excl_patterns.append(re.compile(pat))
  154. def tar_exclude(filename):
  155. """
  156. Exclude (return True) or add (return False) file to tar achive.
  157. """
  158. if not package_metadata and METADATA_PATTERN.match(filename):
  159. return True
  160. if incl_patterns:
  161. for pat in incl_patterns:
  162. if pat.match(filename):
  163. return False
  164. return True
  165. for pat in excl_patterns:
  166. if pat.match(filename):
  167. return True
  168. return False
  169. def reset(tarinfo):
  170. """Python 2.7 only: reset uid/gid to 0/0 (root)."""
  171. tarinfo.uid = tarinfo.gid = 0
  172. tarinfo.uname = tarinfo.gname = "root"
  173. if timestamp != 0:
  174. tarinfo.mtime = timestamp
  175. return tarinfo
  176. def tar_filter(tarinfo):
  177. if tar_exclude(tarinfo.name):
  178. return None
  179. return reset(tarinfo)
  180. cwd = os.getcwd()
  181. os.chdir(workdir)
  182. enc = locale.getpreferredencoding()
  183. out_file = os.path.join(outdir, dstname + '.' + extension)
  184. with tarfile.open(out_file, "w", encoding=enc) as tar:
  185. try:
  186. tar.add(topdir, recursive=False, filter=reset)
  187. except TypeError:
  188. # Python 2.6 compatibility
  189. tar.add(topdir, recursive=False)
  190. for entry in map(lambda x: os.path.join(topdir, x),
  191. sorted(os.listdir(topdir))):
  192. try:
  193. tar.add(entry, filter=tar_filter)
  194. except TypeError:
  195. # Python 2.6 compatibility
  196. tar.add(entry, exclude=tar_exclude)
  197. self.archivefile = tar.name
  198. os.chdir(cwd)
  199. class Gbp(BaseArchive):
  200. def create_archive(self, scm_object, **kwargs):
  201. """Create Debian source artefacts using git-buildpackage.
  202. """
  203. args = kwargs['cli']
  204. version = kwargs['version']
  205. (workdir, topdir) = os.path.split(scm_object.clone_dir)
  206. cwd = os.getcwd()
  207. os.chdir(workdir)
  208. if not args.revision:
  209. revision = 'origin/master'
  210. else:
  211. revision = 'origin/' + args.revision
  212. command = ['gbp', 'buildpackage', '--git-notify=off',
  213. '--git-force-create', '--git-cleaner="true"']
  214. # we are not on a proper local branch due to using git-reset but we
  215. # anyway use the --git-export option
  216. command.extend(['--git-ignore-branch',
  217. "--git-export=%s" % revision])
  218. # gbp can load submodules without having to run the git command, and
  219. # will ignore submodules even if loaded manually unless this option is
  220. # passed.
  221. if args.submodules:
  222. command.extend(['--git-submodules'])
  223. # create local pristine-tar branch if present
  224. ret, output = self.helpers.run_cmd(['git', 'rev-parse', '--verify',
  225. '--quiet', 'origin/pristine-tar'],
  226. cwd=scm_object.clone_dir)
  227. if not ret:
  228. ret, output = self.helpers.run_cmd(['git', 'update-ref',
  229. 'refs/heads/pristine-tar',
  230. 'origin/pristine-tar'],
  231. cwd=scm_object.clone_dir)
  232. if not ret:
  233. command.append('--git-pristine-tar')
  234. else:
  235. command.append('--git-no-pristine-tar')
  236. else:
  237. command.append('--git-no-pristine-tar')
  238. # Prevent potentially dangerous arguments from being passed to gbp,
  239. # e.g. via cleaner, postexport or other hooks.
  240. if args.gbp_build_args:
  241. build_args = args.gbp_build_args.split(' ')
  242. safe_args = re.compile(
  243. '--git-verbose|--git-upstream-tree=.*|--git-no-pristine-tar')
  244. p = re.compile('--git-.*|--hook-.*|--.*-hook=.*')
  245. gbp_args = [arg for arg in build_args if safe_args.match(arg)]
  246. dpkg_args = [arg for arg in build_args if not p.match(arg)]
  247. ignored_args = list(set(build_args) - set(gbp_args + dpkg_args))
  248. if ignored_args:
  249. logging.info("Ignoring build_args: %s" % ignored_args)
  250. command.extend(gbp_args + dpkg_args)
  251. # Set the version in the changelog. Note that we can't simply use
  252. # --source-option=-Dversion=$ver as it will not change the tarball
  253. # name, which means dpkg-source -x pkg.dsc will fail as the names
  254. # and version will not match
  255. cl_path = os.path.join(scm_object.clone_dir, 'debian', 'changelog')
  256. skip_versions = ['', '_none_', '_auto_', None]
  257. if (os.path.isfile(cl_path) and version not in skip_versions):
  258. # Some characters are legal in Debian's versions but not in a git
  259. # tag, so they get substituted
  260. version = re.sub(r'_', r'~', version)
  261. version = re.sub(r'%', r':', version)
  262. with open(cl_path, 'r') as cl:
  263. lines = cl.readlines()
  264. old_version = re.search(r'.+ \((.+)\) .+', lines[0]).group(1)
  265. # non-native packages MUST have a debian revision (-xyz)
  266. drev_ov = re.search(r'-', old_version)
  267. drev_v = re.search(r'-', version)
  268. if (drev_ov is not None and drev_v) is None:
  269. logging.warning("Package is non-native but requested version"
  270. " %s is native! Ignoring.", version)
  271. else:
  272. with open(cl_path, 'w+') as cl:
  273. # A valid debian changelog has 'package (version) release'
  274. # as the first line, if it's malformed we don't care as it
  275. # will not even build
  276. logging.debug("Setting version to %s", version)
  277. # gbp by default complains about uncommitted changes
  278. command.append("--git-ignore-new")
  279. lines[0] = re.sub(r'^(.+) \(.+\) (.+)',
  280. r'\1 (%s) \2' % version, lines[0])
  281. cl.write("".join(lines))
  282. logging.debug("Running in %s", scm_object.clone_dir)
  283. self.helpers.safe_run(command, cwd=scm_object.clone_dir)
  284. # Use dpkg to find out what source artefacts have been built and copy
  285. # them back, which allows the script to be future-proof and work with
  286. # all present and future package formats
  287. sources = self.helpers.safe_run(['dpkg-scansources', workdir],
  288. cwd=workdir)[1]
  289. FILES_PATTERN = re.compile(
  290. r'^Files:(.*(?:\n .*)+)', flags=re.MULTILINE)
  291. for match in FILES_PATTERN.findall(sources):
  292. logging.info("Files:")
  293. for line in match.strip().split("\n"):
  294. fname = line.strip().split(' ')[2]
  295. logging.info(" %s", fname)
  296. input_file = os.path.join(workdir, fname)
  297. output_file = os.path.join(args.outdir, fname)
  298. filename_matches_dsc = fnmatch.fnmatch(fname, '*.dsc')
  299. if (args.gbp_dch_release_update and filename_matches_dsc):
  300. # This tag is used by the build-recipe-dsc to set the OBS
  301. # revision: https://github.com/openSUSE/obs-build/pull/192
  302. logging.debug("Setting OBS-DCH-RELEASE in %s", input_file)
  303. with open(input_file, "a") as dsc_file:
  304. dsc_file.write("OBS-DCH-RELEASE: 1")
  305. shutil.copy(input_file, output_file)
  306. os.chdir(cwd)