X-Git-Url: https://git.distorted.org.uk/~mdw/hippotat/blobdiff_plain/b635cd93477f2c2923f5eb25cce1237423e0c3c2..ff0fc3fa841805cb45013d4fdb3c0ca142b7a330:/hippotatlib/ownsource.py diff --git a/hippotatlib/ownsource.py b/hippotatlib/ownsource.py index b434f32..ac467f0 100644 --- a/hippotatlib/ownsource.py +++ b/hippotatlib/ownsource.py @@ -1,25 +1,64 @@ -# Automatic source code provision (AGPL compliance) +# -*- python -*- +# +# Hippotat - Asinine IP Over HTTP program +# hippotatlib/ownsource.py - Automatic source code provision (AGPL compliance) +# +# Copyright 2017 Ian Jackson +# +# AGPLv3+ + CAFv2+ +# +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public +# License as published by the Free Software Foundation, either +# version 3 of the License, or (at your option) any later version, +# with the "CAF Login Exception" as published by Ian Jackson +# (version 2, or at your option any later version) as an Additional +# Permission. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public +# License and the CAF Login Exception along with this program, in +# the file AGPLv3+CAFv2. If not, email Ian Jackson +# . + import os import sys import fnmatch import stat import subprocess +import tempfile +import shutil + +try: import debian.deb822 +except ImportError: pass class SourceShipmentPreparer(): def __init__(s, destdir): # caller may modify, and should read after calling generate() - s.output_name = 'srcbomb.tar.gz' + s.output_names = ['srcbomb.tar.gz', 'fullsrcbomb.tar'] + s.output_paths = [None,None] # alternatively caller may read this # defaults, caller can modify after creation + s.logger = lambda m: print('SourceShipmentPreparer',m) s.src_filter = s.src_filter_glob - s.src_filter_globs = ['/usr/local/*', '!/usr*', '!/etc/*'] + s.src_package_globs = ['!/usr/local/*', '/usr*'] + s.src_filter_globs = ['!/etc/*'] s.src_likeparent = s.src_likeparent_git + s.src_direxcludes = s.src_direxcludes_git + s.report_from_packages = s.report_from_packages_debian s.cwd = os.getcwd() s.find_rune_base = "find -type f -perm -004 \! -path '*/tmp/*'" - s.excludes = ['*~', '*.bak', '*.tmp', '#*#', - '[0-9][0-9][0-9][0-9]-src.cpio'] + s.ignores = ['*~', '*.bak', '*.tmp', '#*#', '__pycache__', + '[0-9][0-9][0-9][0-9]-src.tar'] s.rune_shell = ['/bin/bash', '-ec'] s.show_pathnames = True + s.download_packages = True + s.stream_stderr = sys.stderr + s.stream_debug = open('/dev/null','w') s.rune_cpio = r''' set -o pipefail ( @@ -31,24 +70,45 @@ class SourceShipmentPreparer(): ) ''' s.rune_portmanteau = r''' - outfile=$1; shift - rm -f "$outfile" - GZIP=-9 tar zcf "$outfile" "$@"' + GZIP=-1 tar zcf - "$@" + ''' + s.rune_portmanteau_uncompressed = r''' + tar cf - "$@" ''' s.manifest_name='0000-MANIFEST.txt' # private s._destdir = destdir s._outcounter = 0 s._manifest = [] + s._dirmap = { } + s._package_files = { } # map filename => infol + s._packages_path = os.path.join(s._destdir, 'packages') + s._package_sources = [] - def src_filter_glob(s, src): # default s.src_filter - for pat in s.src_filter_globs: + def thing_matches_globs(s, thing, globs): + for pat in globs: negate = pat.startswith('!') if negate: pat = pat[1:] - if fnmatch.fnmatch(src, pat): + if fnmatch.fnmatch(thing, pat): return not negate return negate + def src_filter_glob(s, src): # default s.src_filter + return s.thing_matches_globs(src, s.src_filter_globs) + + def src_direxcludes_git(s, d): + try: + excl = open(os.path.join(d, '.gitignore')) + except FileNotFoundError: + return [] + r = [] + for l in excl: + l.strip + if l.startswith('#'): next + if not len(l): next + r += l + return r + def src_likeparent_git(s, src): try: os.stat(os.path.join(src, '.git/.')) @@ -88,35 +148,53 @@ class SourceShipmentPreparer(): # no .git found anywhere return src - def src_prenormaliser(s, d, infol): # callers may monkey-patch away + def path_prenormaliser(s, d, infol): # callers may monkey-patch away return os.path.join(s.cwd, os.path.abspath(d)) - def src_find_rune(s, d): + def srcdir_find_rune(s, d): script = s.find_rune_base - for excl in s.excludes + [s.output_name, s.manifest_name]: + ignores = s.ignores + s.output_names + [s.manifest_name] + ignores += s.src_direxcludes(d) + for excl in ignores: assert("'" not in excl) - script += r" \! -name '%s'" % excl + script += r" \! -name '%s'" % excl + script += r" \! -path '*/%s/*'" % excl script += ' -print0' return script + def manifest_append(s, name, infol): + s._manifest.append({ 'file':name, 'info':' '.join(infol) }) + + def manifest_append_absentfile(s, name, infol): + s._manifest.append({ 'file_print':name, 'info':' '.join(infol) }) + def new_output_name(s, nametail, infol): s._outcounter += 1 name = '%04d-%s' % (s._outcounter, nametail) - s._manifest.append((name, ' '.join(infol))) + s.manifest_append(name, infol) return name - def new_output_fh(s, nametail, infol): - name = s.new_output_name(nametail, infol) - return s.open_output_fh(name, 'wb') - def open_output_fh(s, name, mode): return open(os.path.join(s._destdir, name), mode) - def mk_from_dir(s, d, infol): + def src_dir(s, d, infol): + try: name = s._dirmap[d] + except KeyError: pass + else: + s.manifest_append(name, infol) + return + if s.show_pathnames: infol.append(d) - find_rune = s.src_find_rune(d) + find_rune = s.srcdir_find_rune(d) total_rune = s.rune_cpio % find_rune - fh = s.new_output_fh('src.cpio', infol) + + name = s.new_output_name('src.tar', infol) + s._dirmap[d] = name + fh = s.open_output_fh(name, 'wb') + + s.logger('packing up into %s: %s (because %s)' % + (name, d, ' '.join(infol))) + subprocess.run(s.rune_shell + [total_rune], cwd=d, stdin=subprocess.DEVNULL, @@ -125,32 +203,172 @@ class SourceShipmentPreparer(): check=True) fh.close() - def mk_from_src(s, d, infol): - d = s.src_prenormaliser(d, infol) + def src_indir(s, d, infol): + d = s.path_prenormaliser(d, infol) if not s.src_filter(d): return + d = s.src_parentfinder(d, infol) - s.mk_from_dir(d, infol) + if d is None: return + s.src_dir(d, infol) + + def report_from_packages_debian(s, files): + dpkg_S_in = tempfile.TemporaryFile(mode='w+') + for (file, infols) in files.items(): + assert('\n' not in file) + dpkg_S_in.write(file) + dpkg_S_in.write('\0') + dpkg_S_in.seek(0) + cmdl = ['xargs','-0r','dpkg','-S','--'] + dpkg_S = subprocess.Popen(cmdl, + cwd='/', + stdin=dpkg_S_in, + stdout=subprocess.PIPE, + stderr=sys.stderr, + close_fds=False) + dpkg_show_in = tempfile.TemporaryFile(mode='w+') + pkginfos = { } + for l in dpkg_S.stdout: + l = l.strip(b'\n').decode('utf-8') + (pkgs, fname) = l.split(': ',1) + pks = pkgs.split(', ') + for pk in pks: + pkginfos.setdefault(pk,{'files':[]})['files'].append(fname) + print(pk, file=dpkg_show_in) + assert(dpkg_S.wait() == 0) + dpkg_show_in.seek(0) + cmdl = ['xargs','-r','dpkg-query', + r'-f${binary:Package}\t${Package}\t${Architecture}\t${Version}\t${source:Package}\t${source:Version}\t${source:Upstream-Version}\n', + '--show','--'] + dpkg_show = subprocess.Popen(cmdl, + cwd='/', + stdin=dpkg_show_in, + stdout=subprocess.PIPE, + stderr=sys.stderr, + close_fds=False) + for l in dpkg_show.stdout: + l = l.strip(b'\n').decode('utf-8') + (pk,p,a,v,sp,sv,suv) = l.split('\t') + pkginfos[pk]['binary'] = p + pkginfos[pk]['arch'] = a + pkginfos[pk]['version'] = v + pkginfos[pk]['source'] = sp + pkginfos[pk]['sourceversion'] = sv + pkginfos[pk]['sourceupstreamversion'] = sv + assert(dpkg_show.wait() == 0) + for pk in sorted(pkginfos.keys()): + pi = pkginfos[pk] + debfname = '%s_%s_%s.deb' % (pi['binary'], pi['version'], pi['arch']) + dscfname = '%s_%s.dsc' % (pi['source'], pi['sourceversion']) + s.manifest_append_absentfile(dscfname, [debfname]) + s.logger('mentioning %s and %s because %s' % + (dscfname, debfname, pi['files'][0])) + for fname in pi['files']: + infol = files[fname] + if s.show_pathnames: infol = infol + ['loaded='+fname] + s.manifest_append_absentfile(' \t' + debfname, infol) + + if s.download_packages: + try: os.mkdir(s._packages_path) + except FileExistsError: pass - def mk_from_srcs(s, dirs=sys.path): - s.mk_from_src(sys.argv[0], ['argv[0]']) + cmdl = ['apt-get','--download-only','source', + '%s=%s' % (pi['source'], pi['sourceversion'])] + subprocess.run(cmdl, + cwd=s._packages_path, + stdin=subprocess.DEVNULL, + stdout=s.stream_debug, + stderr=s.stream_stderr, + restore_signals=True, + check=True) + + s._package_sources.append(dscfname) + dsc = debian.deb822.Dsc(open(s._packages_path + '/' + dscfname)) + for indsc in dsc['Files']: + s._package_sources.append(indsc['name']) + + def thing_ought_packaged(s, fname): + return s.thing_matches_globs(fname, s.src_package_globs) + + def src_file_packaged(s, fname, infol): + s._package_files.setdefault(fname,[]).extend(infol) + + def src_file(s, fname, infol): + def fngens(): + yield (infol, fname) + infol_copy = infol.copy() + yield (infol_copy, s.path_prenormaliser(fname, infol_copy)) + yield (infol, os.path.realpath(fname)) + + for (tinfol, tfname) in fngens(): + if s.thing_ought_packaged(tfname): + s.src_file_packaged(tfname, tinfol) + return + + s.src_indir(fname, infol) + + def src_argv0(s, program, infol): + s.src_file(program, infol) + + def src_syspath(s, fname, infol): + if s.thing_ought_packaged(fname): return + s.src_indir(fname, infol) + + def src_module(s, m, infol): + try: fname = m.__file__ + except AttributeError: return + infol.append('module='+m.__name__) + + if s.thing_ought_packaged(fname): + s.src_file_packaged(fname, infol) + else: + s.src_indir(fname, infol) + + def srcs_allitems(s, dirs=sys.path): + s.logger('allitems') + s.src_argv0(sys.argv[0], ['argv[0]']) for d in sys.path: - s.mk_from_src(d, ['sys.path']) + s.src_syspath(d, ['sys.path']) + for m in sys.modules.values(): + s.src_module(m, ['sys.modules']) + s.report_from_packages(s._package_files) + s.logger('allitems done') - def mk_portmanteau(s): - cmdl = s.rune_shell + [ s.rune_portmanteau, 'x', - s.output_name, s.manifest_name ] - mfh = s.open_output_fh(s.manifest_name,'w') - for (name, info) in s._manifest: - cmdl.append(name) - print('%s\t%s' % (name,info), file=mfh) - mfh.close() - subprocess.run(s.rune_shell + cmdl, - cwd=s._destdir, + def _mk_portmanteau(s, ix, rune, cwd, files): + output_name = s.output_names[ix] + s.logger('making portmanteau %s' % output_name) + output_path = os.path.join(s._destdir, output_name) + subprocess.run(s.rune_shell + [ rune, 'x' ] + files, + cwd=cwd, stdin=subprocess.DEVNULL, - stdout=sys.stderr, + stdout=open(output_path, 'wb'), restore_signals=True, check=True) + s.output_paths[ix] = output_path + + def mk_inner_portmanteau(s): + outputs = [s.manifest_name] + outputs_done = { } + mfh = s.open_output_fh(s.manifest_name,'w') + for me in s._manifest: + try: fname = me['file'] + except KeyError: fname = me.get('file_print','') + else: + try: outputs_done[fname] + except KeyError: + outputs.append(fname) + outputs_done[fname] = 1 + print('%s\t%s' % (fname, me['info']), file=mfh) + mfh.close() + + s._mk_portmanteau(0, s.rune_portmanteau, + s._destdir, outputs) + + def mk_packages_portmanteau(s): + s._mk_portmanteau(1, s.rune_portmanteau_uncompressed, + s._packages_path, s._package_sources) def generate(s): - s.mk_from_srcs() - s.mk_portmanteau() + s.srcs_allitems() + s.mk_inner_portmanteau() + s.mk_packages_portmanteau() + s.logger('portmanteau ready in %s %s' % tuple(s.output_paths))