copyright notices
[hippotat] / hippotatlib / ownsource.py
1 # -*- python -*-
2 #
3 # Hippotat - Asinine IP Over HTTP program
4 # hippotatlib/ownsource.py - Automatic source code provision (AGPL compliance)
5 #
6 # Copyright 2017 Ian Jackson
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU Affero General Public License as
10 # published by the Free Software Foundation, either version 3 of the
11 # License, or (at your option) any later version, with the "CAF Login
12 # Exception" as published by Ian Jackson (version 2, or at your option
13 # any later version) as an Additional Permission.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Affero General Public License for more details.
19 #
20 # You should have received a copy of the GNU Affero General Public
21 # License and the CAF Login Exception along with this program, in the
22 # file AGPLv3+CAFv2. If not, email Ian Jackson
23 # <ijackson@chiark.greenend.org.uk>.
24
25
26 import os
27 import sys
28 import fnmatch
29 import stat
30 import subprocess
31 import tempfile
32
33 class SourceShipmentPreparer():
34 def __init__(s, destdir):
35 # caller may modify, and should read after calling generate()
36 s.output_name = 'srcbomb.tar.gz'
37 # s.output_path alternatively caller may read this
38 # defaults, caller can modify after creation
39 s.logger = lambda m: print('SourceShipmentPreparer',m)
40 s.src_filter = s.src_filter_glob
41 s.src_package_globs = ['!/usr/local/*', '/usr*']
42 s.src_filter_globs = ['!/etc/*']
43 s.src_likeparent = s.src_likeparent_git
44 s.src_direxcludes = s.src_direxcludes_git
45 s.report_from_packages = s.report_from_packages_debian
46 s.cwd = os.getcwd()
47 s.find_rune_base = "find -type f -perm -004 \! -path '*/tmp/*'"
48 s.ignores = ['*~', '*.bak', '*.tmp', '#*#', '__pycache__',
49 '[0-9][0-9][0-9][0-9]-src.cpio']
50 s.rune_shell = ['/bin/bash', '-ec']
51 s.show_pathnames = True
52 s.rune_cpio = r'''
53 set -o pipefail
54 (
55 %s
56 # ^ by default, is find ... -print0
57 ) | (
58 cpio -Hustar -o --quiet -0 -R 1000:1000 || \
59 cpio -Hustar -o --quiet -0
60 )
61 '''
62 s.rune_portmanteau = r'''
63 outfile=$1; shift
64 rm -f "$outfile"
65 GZIP=-1 tar zcf "$outfile" "$@"
66 '''
67 s.manifest_name='0000-MANIFEST.txt'
68 # private
69 s._destdir = destdir
70 s._outcounter = 0
71 s._manifest = []
72 s._dirmap = { }
73 s._package_files = { } # map filename => infol
74
75 def thing_matches_globs(s, thing, globs):
76 for pat in globs:
77 negate = pat.startswith('!')
78 if negate: pat = pat[1:]
79 if fnmatch.fnmatch(thing, pat):
80 return not negate
81 return negate
82
83 def src_filter_glob(s, src): # default s.src_filter
84 return s.thing_matches_globs(src, s.src_filter_globs)
85
86 def src_direxcludes_git(s, d):
87 try:
88 excl = open(os.path.join(d, '.gitignore'))
89 except FileNotFoundError:
90 return []
91 r = []
92 for l in excl:
93 l.strip
94 if l.startswith('#'): next
95 if not len(l): next
96 r += l
97 return r
98
99 def src_likeparent_git(s, src):
100 try:
101 os.stat(os.path.join(src, '.git/.'))
102 except FileNotFoundError:
103 return False
104 else:
105 return True
106
107 def src_parentfinder(s, src, infol): # callers may monkey-patch away
108 for deref in (False,True):
109 xinfo = []
110
111 search = src
112 if deref:
113 search = os.path.realpath(search)
114
115 def ascend():
116 nonlocal search
117 xinfo.append(os.path.basename(search))
118 search = os.path.dirname(search)
119
120 try:
121 stab = os.lstat(search)
122 except FileNotFoundError:
123 return
124 if stat.S_ISREG(stab.st_mode):
125 ascend()
126
127 while not os.path.ismount(search):
128 if s.src_likeparent(search):
129 xinfo.reverse()
130 if len(xinfo): infol.append('want=' + os.path.join(*xinfo))
131 return search
132
133 ascend()
134
135 # no .git found anywhere
136 return src
137
138 def path_prenormaliser(s, d, infol): # callers may monkey-patch away
139 return os.path.join(s.cwd, os.path.abspath(d))
140
141 def srcdir_find_rune(s, d):
142 script = s.find_rune_base
143 ignores = s.ignores + [s.output_name, s.manifest_name]
144 ignores += s.src_direxcludes(d)
145 for excl in ignores:
146 assert("'" not in excl)
147 script += r" \! -name '%s'" % excl
148 script += r" \! -path '*/%s/*'" % excl
149 script += ' -print0'
150 return script
151
152 def manifest_append(s, name, infol):
153 s._manifest.append({ 'file':name, 'info':' '.join(infol) })
154
155 def manifest_append_absentfile(s, name, infol):
156 s._manifest.append({ 'file_print':name, 'info':' '.join(infol) })
157
158 def new_output_name(s, nametail, infol):
159 s._outcounter += 1
160 name = '%04d-%s' % (s._outcounter, nametail)
161 s.manifest_append(name, infol)
162 return name
163
164 def open_output_fh(s, name, mode):
165 return open(os.path.join(s._destdir, name), mode)
166
167 def src_dir(s, d, infol):
168 try: name = s._dirmap[d]
169 except KeyError: pass
170 else:
171 s.manifest_append(name, infol)
172 return
173
174 if s.show_pathnames: infol.append(d)
175 find_rune = s.srcdir_find_rune(d)
176 total_rune = s.rune_cpio % find_rune
177
178 name = s.new_output_name('src.cpio', infol)
179 s._dirmap[d] = name
180 fh = s.open_output_fh(name, 'wb')
181
182 s.logger('packing up into %s: %s (because %s)' %
183 (name, d, ' '.join(infol)))
184
185 subprocess.run(s.rune_shell + [total_rune],
186 cwd=d,
187 stdin=subprocess.DEVNULL,
188 stdout=fh,
189 restore_signals=True,
190 check=True)
191 fh.close()
192
193 def src_indir(s, d, infol):
194 d = s.path_prenormaliser(d, infol)
195 if not s.src_filter(d): return
196
197 d = s.src_parentfinder(d, infol)
198 if d is None: return
199 s.src_dir(d, infol)
200
201 def report_from_packages_debian(s, files):
202 dpkg_S_in = tempfile.TemporaryFile(mode='w+')
203 for (file, infols) in files.items():
204 assert('\n' not in file)
205 dpkg_S_in.write(file)
206 dpkg_S_in.write('\0')
207 dpkg_S_in.seek(0)
208 cmdl = ['xargs','-0r','dpkg','-S','--']
209 dpkg_S = subprocess.Popen(cmdl,
210 cwd='/',
211 stdin=dpkg_S_in,
212 stdout=subprocess.PIPE,
213 stderr=sys.stderr,
214 close_fds=False)
215 dpkg_show_in = tempfile.TemporaryFile(mode='w+')
216 pkginfos = { }
217 for l in dpkg_S.stdout:
218 l = l.strip(b'\n').decode('utf-8')
219 (pkgs, fname) = l.split(': ',1)
220 pks = pkgs.split(', ')
221 for pk in pks:
222 pkginfos.setdefault(pk,{'files':[]})['files'].append(fname)
223 print(pk, file=dpkg_show_in)
224 assert(dpkg_S.wait() == 0)
225 dpkg_show_in.seek(0)
226 cmdl = ['xargs','-r','dpkg-query',
227 r'-f${binary:Package}\t${Package}\t${Architecture}\t${Version}\t${source:Package}\t${source:Version}\n',
228 '--show','--']
229 dpkg_show = subprocess.Popen(cmdl,
230 cwd='/',
231 stdin=dpkg_show_in,
232 stdout=subprocess.PIPE,
233 stderr=sys.stderr,
234 close_fds=False)
235 for l in dpkg_show.stdout:
236 l = l.strip(b'\n').decode('utf-8')
237 (pk,p,a,v,sp,sv) = l.split('\t')
238 pkginfos[pk]['binary'] = p
239 pkginfos[pk]['arch'] = a
240 pkginfos[pk]['version'] = v
241 pkginfos[pk]['source'] = sp
242 pkginfos[pk]['sourceversion'] = sv
243 assert(dpkg_show.wait() == 0)
244 for pk in sorted(pkginfos.keys()):
245 pi = pkginfos[pk]
246 debfname = '%s_%s_%s.deb' % (pi['binary'], pi['version'], pi['arch'])
247 dscfname = '%s_%s.dsc' % (pi['source'], pi['sourceversion'])
248 s.manifest_append_absentfile(dscfname, [debfname])
249 s.logger('mentioning %s and %s because %s' %
250 (dscfname, debfname, pi['files'][0]))
251 for fname in pi['files']:
252 infol = files[fname]
253 if s.show_pathnames: infol = infol + ['loaded='+fname]
254 s.manifest_append_absentfile(' \t' + debfname, infol)
255
256 def thing_ought_packaged(s, fname):
257 return s.thing_matches_globs(fname, s.src_package_globs)
258
259 def src_file_packaged(s, fname, infol):
260 s._package_files.setdefault(fname,[]).extend(infol)
261
262 def src_file(s, fname, infol):
263 def fngens():
264 yield (infol, fname)
265 infol_copy = infol.copy()
266 yield (infol_copy, s.path_prenormaliser(fname, infol_copy))
267 yield (infol, os.path.realpath(fname))
268
269 for (tinfol, tfname) in fngens():
270 if s.thing_ought_packaged(tfname):
271 s.src_file_packaged(tfname, tinfol)
272 return
273
274 s.src_indir(fname, infol)
275
276 def src_argv0(s, program, infol):
277 s.src_file(program, infol)
278
279 def src_syspath(s, fname, infol):
280 if s.thing_ought_packaged(fname): return
281 s.src_indir(fname, infol)
282
283 def src_module(s, m, infol):
284 try: fname = m.__file__
285 except AttributeError: return
286 infol.append('module='+m.__name__)
287
288 if s.thing_ought_packaged(fname):
289 s.src_file_packaged(fname, infol)
290 else:
291 s.src_indir(fname, infol)
292
293 def srcs_allitems(s, dirs=sys.path):
294 s.logger('allitems')
295 s.src_argv0(sys.argv[0], ['argv[0]'])
296 for d in sys.path:
297 s.src_syspath(d, ['sys.path'])
298 for m in sys.modules.values():
299 s.src_module(m, ['sys.modules'])
300 s.report_from_packages(s._package_files)
301 s.logger('allitems done')
302
303 def mk_portmanteau(s):
304 s.logger('making portmanteau')
305 cmdl = s.rune_shell + [ s.rune_portmanteau, 'x',
306 s.output_name, s.manifest_name ]
307 mfh = s.open_output_fh(s.manifest_name,'w')
308 for me in s._manifest:
309 try: fname = me['file']
310 except KeyError: fname = me.get('file_print','')
311 else: cmdl.append(fname)
312 print('%s\t%s' % (fname, me['info']), file=mfh)
313 mfh.close()
314 subprocess.run(cmdl,
315 cwd=s._destdir,
316 stdin=subprocess.DEVNULL,
317 stdout=sys.stderr,
318 restore_signals=True,
319 check=True)
320 s.output_path = os.path.join(s._destdir, s.output_name)
321 s.logger('portmanteau ready in %s' % s.output_path)
322
323 def generate(s):
324 s.srcs_allitems()
325 s.mk_portmanteau()