keys.scala, etc.: Make merging public keys have a progress bar.
[tripe-android] / tar.scala
CommitLineData
c8292b34
MW
1/* -*-scala-*-
2 *
3 * Extract data from `tar' archives
4 *
5 * (c) 2018 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the Trivial IP Encryption (TrIPE) Android app.
11 *
12 * TrIPE is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
16 *
17 * TrIPE is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
24 */
25
26package uk.org.distorted.tripe;
27
28/*----- Imports -----------------------------------------------------------*/
29
30import java.io.{Closeable, InputStream};
31import java.nio.ByteBuffer;
32import java.nio.charset.Charset;
33import java.util.Date;
34
a5ec891a
MW
35import sys.FileInfo;
36import sys.FileInfo.{Value, FIFO, CHR, DIR, BLK, REG, LNK, HDLNK, UNK};
37
0157de02
MW
38import Implicits.truish;
39
c8292b34
MW
40/*----- Main code ---------------------------------------------------------*/
41
42class TarFormatError(msg: String) extends Exception(msg);
43
44trait TarEntry {
45 /* Honestly, I'd rather just have `TarFile#Entry', but Scala doesn't permit
46 * the trait inheritance circularity. So this is a cardboard cutout
47 * version of `Entry'.
48 */
49
50 /* Basic facts about the entry. */
51 def name: String;
52 def size: Long;
a5ec891a 53 def rawtyp: Char;
c8292b34
MW
54 def mode: Int;
55 def mtime: Date;
56 def uid: Int;
57 def gid: Int;
58 def link: String;
59
60 /* Type predicates (intentionally like `FileInfo'). */
a5ec891a
MW
61 def isfifo: Boolean = rawtyp == '6';
62 def ischr: Boolean = rawtyp == '3';
63 def isdir: Boolean = rawtyp == '5';
64 def isblk: Boolean = rawtyp == '4';
65 def isreg: Boolean = rawtyp match {
c8292b34
MW
66 case 0 | '0' | '7' => true
67 case _ => false
68 }
a5ec891a 69 def islnk: Boolean = rawtyp == '2';
c8292b34 70 def issock: Boolean = false;
a5ec891a
MW
71 def ishardlink: Boolean = rawtyp == '1';
72
73 def typ: FileInfo.Value = rawtyp match {
74 case 0 | '0' | '7' => REG
75 case '1' => HDLNK
76 case '2' => LNK
77 case '3' => CHR
78 case '4' => BLK
79 case '5' => DIR
80 case '6' => FIFO
81 case _ => UNK
82 }
c8292b34
MW
83
84 def verbose: String = {
85 /* Encode information about this tar header as a string. */
86
87 val sb = new StringBuilder;
88
89 /* First, the type code. */
a5ec891a 90 sb += (rawtyp match {
c8292b34
MW
91 case 0 | '0' | '7' => '-'
92 case '1' => 'L'
93 case '2' => 'l'
94 case '3' => 'c'
95 case '4' => 'b'
96 case '5' => 'd'
97 case '6' => '|'
98 case _ => '?'
99 })
100
101 /* Then the permissions bits. Ugh, the permissions bits. */
102 def perm(s: Int, r: Int, w: Int, x: Int, schar: Char, Schar: Char) {
0157de02
MW
103 sb += (if (mode&r) 'r' else '-');
104 sb += (if (mode&w) 'w' else '-');
105 sb += (if (mode&s) { if (mode&x) schar else Schar; }
106 else { if (mode&x) 'x' else '-' });
c8292b34
MW
107 }
108 perm(0x800, 0x100, 0x080, 0x040, 's', 'S');
109 perm(0x400, 0x020, 0x010, 0x008, 's', 'S');
110 perm(0x200, 0x004, 0x002, 0x001, 't', 'T');
111
112 /* And the rest, which is easy. */
113 sb ++= f" $uid%8d $gid%8d $size%12d $mtime%tFT%<tT%<tz $name%s";
114
115 /* Done. */
116 sb.result
117 }
118
119 override def toString(): String = s"${getClass.getName}($verbose)";
120
121 def stream: InputStream;
122 def withStream[T](body: InputStream => T): T = {
123 val s = stream;
124 try { body(s) }
125 finally { s.close(); }
126 }
127}
128
129class TarFile(in: InputStream)
130 extends LookaheadIterator[TarEntry] with Closeable { tar =>
131
132 /* Tokens are just objects, meaningful only for their identity. */
133 private[TarFile] class Token;
134
135 /* Some useful state. */
136 private[TarFile] var offset: Long = 0; // current byte offset
137 private[this] var lockp = false; // locked by open entry?
138 private[this] var locktok = new Token; // active lock token
139 private[this] var nexthdr: Long = 0; // byte offset of next header
140 private[this] val hdr = new Array[Byte](512); // header under consideration
141
142 /* Making sure we clean up properly. */
143 override def close() { in.close(); }
144 override protected def finalize() { super.finalize(); close(); }
145
146 private[this] def eoferr()
147 { throw new TarFormatError(s"unexpected EOF (at $offset)"); }
148
149 /* Locking machinery.
150 *
151 * We work from a primitive `InputStream' which we can't seek. From this,
152 * we must be able to extract file contents, as an `InputStream', and parse
153 * file headers. We'll be badly lost if we lose track of where we are in
154 * the archive.
155 *
156 * So, there's a lock, which can be held by at most one actor at a time:
157 * either the `TarFile' itself, while it's (hopefully) reading a header
158 * block, or by the `Stream' object which lets the caller read an
159 * individual entry's content. Furthermore, if we start activating the
160 * per-entry streams out of order, we'll get confused about where we're
161 * meant to be, so there's also a `token' which represents a participant's
162 * right to claim the lock. The `TarFile' itself has special privileges
163 * and doesn't need a token, but the per-entry streams do, and only the
164 * stream associated with the most recently-read header is allowed to claim
165 * the lock.
166 */
167
168 private[this] def lock() {
169 /* Claim exclusive use of the input stream. */
170
171 if (lockp) throw new IllegalArgumentException("tarfile lock still held");
172 lockp = true;
173 }
174
175 private[TarFile] def lock(tok: Token) {
176 /* Claim exclusive use of the input stream, passing a token. */
177
178 if (tok ne locktok)
179 throw new IllegalArgumentException("stale lock token");
180 lock();
181 }
182
183 private[TarFile] def unlock() {
184 /* Release the input stream so someone else can have a go. */
185
186 assert(lockp);
187 lockp = false;
188 locktok = new Token;
189 }
190
191 /* Doing I/O on the input stream.
192 *
193 * Our `Stream' object sneakily grabs single bytes from the input. Given
194 * the way Scala works, we can't prevent that, so roll with it.
195 */
196
197 private[TarFile] def read(buf: Array[Byte], start: Int, len: Int) {
198 /* Read input data into the indicated region of the buffer. Short reads
199 * are diagnosed as errors. Advances the cursor.
200 */
201
202 var pos = start;
203 val limit = start + len;
204 while (pos < len) {
205 val n = in.read(buf, pos, limit - pos);
206 if (n < 0) eoferr();
207 pos += n; offset += n;
208 }
209 }
210
211 private[TarFile] def skip(len: Long) {
212 /* Skip ahead LEN bytes in the archive. (The int/long discrepancy
213 * matches Java's bizarre `InputStream' protocol.)
214 */
215
216 var remain = len;
217 while (remain > 0) {
218 val n = in.skip(remain);
219
220 if (n > 0) { remain -= n; offset += n; }
221 else {
222 /* It's hard to work out whether this means we've hit EOF or not. It
223 * seems best to check. We must have at least one byte left to skip
224 * or we wouldn't have started this iteration, so try to read that.
225 * If that works, then there's more stuff available and skipping
226 * isn't working, so start to read buffers and discard them.
227 */
228
229 if (in.read() == -1) eoferr();
230 remain -= 1; offset += 1;
231
232 /* Ugh. So, buffers it is then. */
233 val buf = new Array[Byte]((remain min 4096).toInt);
234 while (remain >= buf.length) {
235 val n = (remain min buf.length).toInt;
236 read(buf, 0, n);
237 remain -= n;
238 }
239 }
240 }
241 }
242
243 private[TarFile] class Stream(end: Long, tok: Token) extends InputStream {
244 /* An input stream for a single archive entry's content. */
245
246 /* Claim the lock. If we're stale, this won't work. */
247 lock(tok);
248 private[this] var open = true;
249
250 private[this] def checkopen() {
251 /* Complain if the stream is closed. */
252
253 if (!lockp) throw new IllegalArgumentException("stream is closed");
254 }
255
256 override def read(): Int = {
257 /* Read one byte. Don't know why there isn't a default implementation
258 * of this.
259 */
260
261 checkopen();
262 if (offset >= end) -1
263 else {
264 val b = in.read();
265 if (b == -1) eoferr();
266 offset += 1;
267 b
268 }
269 }
270
271 override def read(buf: Array[Byte], start: Int, len: Int): Int = {
272 /* Read a block. */
273
274 checkopen();
275 if (offset >= end) -1
276 else {
277 var n = (len.toLong min (end - offset)).toInt;
278 tar.read(buf, start, n);
279 n
280 }
281 }
282
283 override def close() {
284 /* Release the lock. */
285
286 if (open) { unlock(); open = false; }
287 }
288 }
289
290 private[this] class Entry(val name: String, val size: Long,
a5ec891a 291 val rawtyp: Char, val mode: Int,
c8292b34
MW
292 val mtime: Date,
293 val uid: Int, val gid: Int,
294 val link: String,
295 end: Long, tok: Token)
296 extends TarEntry{
297 /* See `TarEntry' for why we have this silliness. Most of the work is in
298 * the constructor above.
299 */
300
301 lazy val stream: InputStream = new Stream(end, tok);
302 }
303
304 /* Utilities for parsing archive-entry header blocks. */
305
306 private[this] def string(off: Int, len: Int): String = {
307 /* Parse a string from the block header. POSIX.1-2008 says that header
308 * fields should be ISO/IEC 646, but strange things can turn up
309 * especially in filenames. I'm going to translate strings according to
310 * the local character set, because that will map most easily if a
311 * program tries to write out files from the archive with their
312 * associated names.
313 */
314
315 /* First, find the null terminator, if there is one. Scala doesn't make
316 * this especially easy. Rustle up a view to limit the search.
317 */
318 val bview = hdr.view(off, off + len);
319 val n = bview.indexOf(0) match {
320 case -1 => len
321 case nul => nul
322 };
323
324 /* And then decode the relevant portion of the orignal buffer. */
325 val dec = Charset.defaultCharset.newDecoder;
326 val in = ByteBuffer.wrap(hdr, off, n);
327 dec.decode(in).toString
328 }
329
330 private[this] def number(off: Int, len: Int, max: Long): Long = {
331 /* Parse a number from the block header. POSIX.1-2008 says that numbers
332 * are in octal and terminated by space or nul.
333 */
334
335 var n = 0l; // accumulate the value
336 for (i <- off until off + len) {
337 val b = hdr(i);
338
339 /* See if we're done now. */
0157de02 340 if (!b || b == ' ') return n;
c8292b34
MW
341 else if (b < '0' || b > '7')
342 throw new TarFormatError(s"bad octal digit (at ${offset + off + i})");
343
344 /* Convert to a digit. */
345 val m = b - '0';
346
347 /* Check for overflow -- without overflowing.
348 *
349 * Write max 8 N + M. We overflow if 8 n + m > 8 N + M, i.e., 8 n >
350 * 8 N + (M - m), so n > N + (M - m)/8. This last calculation is a
351 * little fraught because Scala has the wrong semantics when dividing
352 * negative integers.
353 */
354 if (n > max/8 + (8 + max%8 - m)/8 - 1)
355 throw new TarFormatError(s"number out of range (at ${offset + off})");
356
357 /* Accumulate and go round again. */
358 n = 8*n + (b - '0');
359 }
360 unreachable;
361 }
362
363 override protected def fetch(): Option[TarEntry] = {
364 /* Collect the next archive header and return it as a file entry. */
365
366 /* Make sure that we can actually do this. */
367 withCleaner { clean =>
368 lock(); clean { unlock(); }
369
370 /* Skip ahead to the next header. */
371 skip(nexthdr - offset);
372
373 /* Read the header. The end of the archive is marked by two zero
374 * blocks, so the archive is broken if there isn't at least one here.
375 */
376 read(hdr, 0, 512);
377 }
378
379 /* If the block is entirely zero-filled then declare this file at an
380 * end. No good can come from checking the next block.
381 */
382 if (hdr.forall(_ == 0)) return None;
383
384 /* Verify the checksum. Pay attention because Java's bytes are
385 * (idiotically) signed.
386 */
387 var ck: Int = 8*' '; // pretend chksum field is spaces
388 for (i <- 0 until 148) ck += hdr(i)&0xff;
389 for (i <- 156 until 512) ck += hdr(i)&0xff;
390 val wantck = number(148, 8, 0x20000);
391 if (ck != wantck) {
392 throw new TarFormatError(
393 s"invalid checksum $ck /= $wantck (at $nexthdr)");
394 }
395
396 /* Fetch the `magic' and `version' fields. If this is a proper POSIX
397 * `ustar' file then special processing will apply.
398 */
399 val magic = string(257, 6);
400 val version = string(263, 2);
401 val posixp = magic == "ustar" && version == "00";
402
403 /* Figure out this entry's name. If this is a POSIX archive, then part
404 * of the name is stashed at the end of the header because of old, bad
405 * decisions. But don't look there unless we're sure because old GNU
406 * `tar' used that space for other things.
407 */
408 val name = {
409 val tail = string(0, 100);
0157de02 410 if (!posixp || !hdr(345)) tail
c8292b34
MW
411 else {
412 val prefix = string(345, 155);
413 prefix + '/' + tail
414 }
415 }
416
417 /* Read some other easy stuff. */
418 val mode = number(100, 8, 0xfff).toInt;
419 val uid = number(108, 8, Int.MaxValue).toInt;
420 val gid = number(116, 8, Int.MaxValue).toInt;
421 val typ = hdr(156).toChar;
422 val mtime = number(136, 12, Long.MaxValue);
423
424 /* The size is irrelevant, and possibly even misleading, for some entry
425 * types. We're not interested, for example, in systems where
426 * directories need to be preallocated.
427 */
428 val size = typ match {
429 case '1' | '2' | '3' | '4' | '5' | '6' => 0
430 case _ => number(124, 12, Long.MaxValue)
431 }
432
433 /* Maybe fetch the link name. */
434 val link = typ match {
435 case '1' | '2' => string(157, 100)
436 case _ => ""
437 }
438
439 /* Figure out where the next header ought to be. */
440 nexthdr = (offset + size + 511)& -512;
441
442 /* Return the finished archive entry. */
443 Some(new Entry(name, size, typ, mode,
444 new Date(1000*mtime), uid, gid, link,
445 offset + size, locktok));
446 }
447}
448
449/* Example:
450 *
451 * for (e <- TarFile(new GZIPInputStream(tarball.open())); if e.isreg)
452 * e withStream { in =>
453 * val h = java.security.MessageDigest.getInstance("SHA-256");
454 * for ((buf, n) <- in.blocks) h.update(b, 0, n);
455 * val hex = new String(h.digest flatMap { _.formatted("%02x") });
456 * println("s$hex ${e.name}");
457 * }
458 */
459
460/*----- That's all, folks -------------------------------------------------*/