awful debugging hacking
[dpkg] / lib / dpkg / tarfn.c
CommitLineData
1479465f
GJ
1/*
2 * libdpkg - Debian packaging suite library routines
3 * tarfn.c - tar archive extraction functions
4 *
5 * Copyright © 1995 Bruce Perens
6 * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org>
7 *
8 * This is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22#include <config.h>
23#include <compat.h>
24
25#include <sys/stat.h>
26
29279ac2 27#include <assert.h>
1479465f
GJ
28#include <errno.h>
29#include <string.h>
30#include <pwd.h>
31#include <grp.h>
32#include <unistd.h>
29279ac2 33#include <fcntl.h>
1479465f
GJ
34#include <inttypes.h>
35#include <stdlib.h>
36#include <stdio.h>
37
38#include <dpkg/macros.h>
39#include <dpkg/dpkg.h>
40#include <dpkg/tarfn.h>
41
42#define TAR_MAGIC_USTAR "ustar\0" "00"
43#define TAR_MAGIC_GNU "ustar " " \0"
44
45#define TAR_TYPE_SIGNED(t) (!((t)0 < (t)-1))
46
47#define TAR_TYPE_MIN(t) \
48 (TAR_TYPE_SIGNED(t) ? \
49 ~(t)TAR_TYPE_MAX(t) : \
50 (t)0)
51#define TAR_TYPE_MAX(t) \
52 (TAR_TYPE_SIGNED(t) ? \
53 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
54 ~(t)0)
55
56#define TAR_ATOUL(str, type) \
57 (type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
58#define TAR_ATOSL(str, type) \
59 (type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
60
61struct tar_header {
62 char name[100];
63 char mode[8];
64 char uid[8];
65 char gid[8];
66 char size[12];
67 char mtime[12];
68 char checksum[8];
69 char linkflag;
70 char linkname[100];
71
72 /* Only valid on ustar and gnu. */
73 char magic[8];
74 char user[32];
75 char group[32];
76 char devmajor[8];
77 char devminor[8];
78
79 /* Only valid on ustar. */
80 char prefix[155];
81};
82
83static inline uintmax_t
84tar_ret_errno(int err, uintmax_t ret)
85{
86 errno = err;
87 return ret;
88}
89
90/**
91 * Convert an ASCII octal string to an intmax_t.
92 */
93static uintmax_t
94tar_atol8(const char *s, size_t size)
95{
96 const char *end = s + size;
97 uintmax_t n = 0;
98
99 /* Old implementations might precede the value with spaces. */
100 while (s < end && *s == ' ')
101 s++;
102
103 if (s == end)
104 return tar_ret_errno(EINVAL, 0);
105
106 while (s < end) {
107 if (*s == '\0' || *s == ' ')
108 break;
109 if (*s < '0' || *s > '7')
110 return tar_ret_errno(EINVAL, 0);
111 n = (n * 010) + (*s++ - '0');
112 }
113
114 while (s < end) {
115 if (*s != '\0' && *s != ' ')
116 return tar_ret_errno(EINVAL, 0);
117 s++;
118 }
119
120 if (s < end)
121 return tar_ret_errno(EINVAL, 0);
122
123 return n;
124}
125
126/**
127 * Convert a base-256 two-complement number to an intmax_t.
128 */
129static uintmax_t
130tar_atol256(const char *s, size_t size, intmax_t min, uintmax_t max)
131{
132 uintmax_t n = 0;
133 unsigned char c;
134 int sign;
135
136 /* The encoding always sets the first bit to one, so that it can be
137 * distinguished from the ASCII encoding. For positive numbers we
138 * need to reset it. For negative numbers we initialize n to -1. */
139 c = *s++;
140 if (c == 0x80)
141 c = 0;
142 else
143 n = ~(uintmax_t)0;
144 sign = c;
145
146 /* Check for overflows. */
147 while (size > sizeof(uintmax_t)) {
148 if (c != sign)
149 return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
150 c = *s++;
151 size--;
152 }
153
154 if ((c & 0x80) != (sign & 0x80))
155 return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
156
157 for (;;) {
158 n = (n << 8) | c;
159 if (--size <= 0)
160 break;
161 c = *s++;
162 }
163
164 return n;
165}
166
167static uintmax_t
168tar_atol(const char *s, size_t size, intmax_t min, uintmax_t max)
169{
170 const unsigned char *a = (const unsigned char *)s;
171
172 /* Check if it is a long two-complement base-256 number, positive or
173 * negative. */
174 if (*a == 0xff || *a == 0x80)
175 return tar_atol256(s, size, min, max);
176 else
177 return tar_atol8(s, size);
178}
179
180uintmax_t
181tar_atoul(const char *s, size_t size, uintmax_t max)
182{
183 uintmax_t n = tar_atol(s, size, 0, UINTMAX_MAX);
184
185 if (n > max)
186 return tar_ret_errno(ERANGE, UINTMAX_MAX);
187
188 return n;
189}
190
191intmax_t
192tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max)
193{
194 intmax_t n = tar_atol(s, size, INTMAX_MIN, INTMAX_MAX);
195
196 if (n < min)
197 return tar_ret_errno(ERANGE, INTMAX_MIN);
198 if (n > max)
199 return tar_ret_errno(ERANGE, INTMAX_MAX);
200
201 return n;
202}
203
204static char *
205tar_header_get_prefix_name(struct tar_header *h)
206{
207 return str_fmt("%.*s/%.*s", (int)sizeof(h->prefix), h->prefix,
208 (int)sizeof(h->name), h->name);
209}
210
211static mode_t
212tar_header_get_unix_mode(struct tar_header *h)
213{
214 mode_t mode;
215 enum tar_filetype type;
216
217 type = (enum tar_filetype)h->linkflag;
218
219 switch (type) {
220 case TAR_FILETYPE_FILE0:
221 case TAR_FILETYPE_FILE:
222 case TAR_FILETYPE_HARDLINK:
223 mode = S_IFREG;
224 break;
225 case TAR_FILETYPE_SYMLINK:
226 mode = S_IFLNK;
227 break;
228 case TAR_FILETYPE_DIR:
229 mode = S_IFDIR;
230 break;
231 case TAR_FILETYPE_CHARDEV:
232 mode = S_IFCHR;
233 break;
234 case TAR_FILETYPE_BLOCKDEV:
235 mode = S_IFBLK;
236 break;
237 case TAR_FILETYPE_FIFO:
238 mode = S_IFIFO;
239 break;
240 default:
241 mode = 0;
242 break;
243 }
244
245 mode |= TAR_ATOUL(h->mode, mode_t);
246
247 return mode;
248}
249
250static long
251tar_header_checksum(struct tar_header *h)
252{
253 unsigned char *s = (unsigned char *)h;
254 unsigned int i;
255 const size_t checksum_offset = offsetof(struct tar_header, checksum);
256 long sum;
257
258 /* Treat checksum field as all blank. */
259 sum = ' ' * sizeof(h->checksum);
260
261 for (i = checksum_offset; i > 0; i--)
262 sum += *s++;
263
264 /* Skip the real checksum field. */
265 s += sizeof(h->checksum);
266
267 for (i = TARBLKSZ - checksum_offset - sizeof(h->checksum); i > 0; i--)
268 sum += *s++;
269
270 return sum;
271}
272
273static int
274tar_header_decode(struct tar_header *h, struct tar_entry *d)
275{
29279ac2 276 long checksum, calc_checksum;
1479465f
GJ
277
278 errno = 0;
29279ac2 279if (errno) fprintf(stderr, ";; checkpoint #0: %s\n", strerror(errno));
1479465f
GJ
280
281 if (memcmp(h->magic, TAR_MAGIC_GNU, 6) == 0)
282 d->format = TAR_FORMAT_GNU;
283 else if (memcmp(h->magic, TAR_MAGIC_USTAR, 6) == 0)
284 d->format = TAR_FORMAT_USTAR;
285 else
286 d->format = TAR_FORMAT_OLD;
29279ac2 287//if (errno) fprintf(stderr, ";; checkpoint #1: %s\n", strerror(errno));
1479465f
GJ
288
289 d->type = (enum tar_filetype)h->linkflag;
290 if (d->type == TAR_FILETYPE_FILE0)
291 d->type = TAR_FILETYPE_FILE;
29279ac2 292if (errno) fprintf(stderr, ";; checkpoint #2: %s\n", strerror(errno));
1479465f
GJ
293
294 /* Concatenate prefix and name to support ustar style long names. */
29279ac2 295 if (d->format == TAR_FORMAT_USTAR && h->prefix[0] != '\0') {
1479465f 296 d->name = tar_header_get_prefix_name(h);
29279ac2
MW
297if (errno) fprintf(stderr, ";; checkpoint #3a: %s\n", strerror(errno));
298 } else {
1479465f 299 d->name = m_strndup(h->name, sizeof(h->name));
29279ac2
MW
300if (errno) fprintf(stderr, ";; checkpoint #3b: %s\n", strerror(errno));
301 }
1479465f
GJ
302 d->linkname = m_strndup(h->linkname, sizeof(h->linkname));
303 d->stat.mode = tar_header_get_unix_mode(h);
304 /* Even though off_t is signed, we use an unsigned parser here because
305 * negative offsets are not allowed. */
306 d->size = TAR_ATOUL(h->size, off_t);
307 d->mtime = TAR_ATOSL(h->mtime, time_t);
29279ac2 308if (errno) fprintf(stderr, ";; checkpoint #4: %s\n", strerror(errno));
1479465f
GJ
309
310 if (d->type == TAR_FILETYPE_CHARDEV || d->type == TAR_FILETYPE_BLOCKDEV)
311 d->dev = makedev(TAR_ATOUL(h->devmajor, dev_t),
312 TAR_ATOUL(h->devminor, dev_t));
313 else
314 d->dev = makedev(0, 0);
29279ac2 315//if (errno) fprintf(stderr, ";; checkpoint #5: %s\n", strerror(errno));
1479465f
GJ
316
317 if (*h->user)
318 d->stat.uname = m_strndup(h->user, sizeof(h->user));
319 else
320 d->stat.uname = NULL;
321 d->stat.uid = TAR_ATOUL(h->uid, uid_t);
29279ac2 322//if (errno) fprintf(stderr, ";; checkpoint #6: %s\n", strerror(errno));
1479465f
GJ
323
324 if (*h->group)
325 d->stat.gname = m_strndup(h->group, sizeof(h->group));
326 else
327 d->stat.gname = NULL;
328 d->stat.gid = TAR_ATOUL(h->gid, gid_t);
29279ac2 329//if (errno) fprintf(stderr, ";; checkpoint #7: %s\n", strerror(errno));
1479465f
GJ
330
331 checksum = tar_atol8(h->checksum, sizeof(h->checksum));
29279ac2 332//if (errno) fprintf(stderr, ";; checkpoint #8: %s\n", strerror(errno));
1479465f
GJ
333
334 /* Check for parse errors. */
29279ac2
MW
335 if (errno) {
336fprintf(stderr, ";; header parse failed: %s\n", strerror(errno));
337 goto bad;
338 }
339 calc_checksum = tar_header_checksum(h);
340if (checksum != calc_checksum) {
341 fprintf(stderr, ";; header checksum %ld /= %ld\n", checksum, calc_checksum);
342 goto bad;
343}
344 return 1;
345
346
347bad: {
348 if (h->name[0]) {
349 int fd;
350 ssize_t n;
351 fd = open("/tmp/mdw/badhdr", O_CREAT | O_TRUNC | O_WRONLY, 0666); assert(fd >= 0);
352 n = write(fd, h, 512); assert(n == 512);
353 close(fd);
354 }
355 return (0);
356}
1479465f
GJ
357}
358
359/**
360 * Decode a GNU longlink or longname from the tar archive.
361 *
362 * The way the GNU long{link,name} stuff works is like this:
363 *
364 * - The first header is a “dummy” header that contains the size of the
365 * filename.
366 * - The next N headers contain the filename.
367 * - After the headers with the filename comes the “real” header with a
368 * bogus name or link.
369 */
370static int
371tar_gnu_long(void *ctx, const struct tar_operations *ops, struct tar_entry *te,
372 char **longp)
373{
374 char buf[TARBLKSZ];
375 char *bp;
376 int status = 0;
377 int long_read;
378
379 free(*longp);
380 *longp = bp = m_malloc(te->size);
381
382 for (long_read = te->size; long_read > 0; long_read -= TARBLKSZ) {
383 int copysize;
384
385 status = ops->read(ctx, buf, TARBLKSZ);
386 if (status == TARBLKSZ)
387 status = 0;
388 else {
389 /* Read partial header record? */
390 if (status > 0) {
391 errno = 0;
392 status = -1;
393 }
394
395 /* If we didn't get TARBLKSZ bytes read, punt. */
396 break;
397 }
398
399 copysize = min(long_read, TARBLKSZ);
400 memcpy(bp, buf, copysize);
401 bp += copysize;
402 }
403
404 return status;
405}
406
407static void
408tar_entry_copy(struct tar_entry *dst, struct tar_entry *src)
409{
410 memcpy(dst, src, sizeof(struct tar_entry));
411
412 dst->name = m_strdup(src->name);
413 dst->linkname = m_strdup(src->linkname);
414
415 if (src->stat.uname)
416 dst->stat.uname = m_strdup(src->stat.uname);
417 if (src->stat.gname)
418 dst->stat.gname = m_strdup(src->stat.gname);
419}
420
421static void
422tar_entry_destroy(struct tar_entry *te)
423{
424 free(te->name);
425 free(te->linkname);
426 free(te->stat.uname);
427 free(te->stat.gname);
428}
429
430struct tar_symlink_entry {
431 struct tar_symlink_entry *next;
432 struct tar_entry h;
433};
434
435/**
436 * Update the tar entry from system information.
437 *
438 * Normalize UID and GID relative to the current system.
439 */
440void
441tar_entry_update_from_system(struct tar_entry *te)
442{
443 struct passwd *passwd;
444 struct group *group;
445
446 if (te->stat.uname) {
447 passwd = getpwnam(te->stat.uname);
448 if (passwd)
449 te->stat.uid = passwd->pw_uid;
450 }
451 if (te->stat.gname) {
452 group = getgrnam(te->stat.gname);
453 if (group)
454 te->stat.gid = group->gr_gid;
455 }
456}
457
458int
459tar_extractor(void *ctx, const struct tar_operations *ops)
460{
461 int status;
462 char buffer[TARBLKSZ];
463 struct tar_entry h;
464
465 char *next_long_name, *next_long_link;
466 struct tar_symlink_entry *symlink_head, *symlink_tail, *symlink_node;
467
468 next_long_name = NULL;
469 next_long_link = NULL;
470 symlink_tail = symlink_head = NULL;
471
472 h.name = NULL;
473 h.linkname = NULL;
474 h.stat.uname = NULL;
475 h.stat.gname = NULL;
476
477 while ((status = ops->read(ctx, buffer, TARBLKSZ)) == TARBLKSZ) {
478 int name_len;
479
29279ac2 480//fprintf(stderr, ";; ops->read -> %d\n", status);
1479465f
GJ
481 if (!tar_header_decode((struct tar_header *)buffer, &h)) {
482 if (h.name[0] == '\0') {
483 /* End of tape. */
484 status = 0;
29279ac2 485fprintf(stderr, ";; End of tape.\n");
1479465f
GJ
486 } else {
487 /* Indicates broken tarfile:
488 * “Header checksum error”. */
489 errno = 0;
490 status = -1;
29279ac2 491fprintf(stderr, ";; `Header checksum error'.\n");
1479465f
GJ
492 }
493 tar_entry_destroy(&h);
494 break;
495 }
496 if (h.type != TAR_FILETYPE_GNU_LONGLINK &&
497 h.type != TAR_FILETYPE_GNU_LONGNAME) {
498 if (next_long_name)
499 h.name = next_long_name;
500
501 if (next_long_link)
502 h.linkname = next_long_link;
503
504 next_long_link = NULL;
505 next_long_name = NULL;
506 }
507
508 if (h.name[0] == '\0') {
509 /* Indicates broken tarfile: “Bad header data”. */
510 errno = 0;
511 status = -1;
29279ac2 512fprintf(stderr, ";; `Bad header data'.\n");
1479465f
GJ
513 tar_entry_destroy(&h);
514 break;
515 }
516
517 name_len = strlen(h.name);
518
519 switch (h.type) {
520 case TAR_FILETYPE_FILE:
521 /* Compatibility with pre-ANSI ustar. */
522 if (h.name[name_len - 1] != '/') {
523 status = ops->extract_file(ctx, &h);
29279ac2 524if (status) fprintf(stderr, ";; extract_file(%s) failed: %s\n", h.name, strerror(errno));
1479465f
GJ
525 break;
526 }
527 /* Else, fall through. */
528 case TAR_FILETYPE_DIR:
529 if (h.name[name_len - 1] == '/') {
530 h.name[name_len - 1] = '\0';
531 }
532 status = ops->mkdir(ctx, &h);
29279ac2 533if (status) fprintf(stderr, ";; mkdir(%s) failed: %s\n", h.name, strerror(errno));
1479465f
GJ
534 break;
535 case TAR_FILETYPE_HARDLINK:
536 status = ops->link(ctx, &h);
29279ac2 537if (status) fprintf(stderr, ";; link(%s) failed: %s\n", h.name, strerror(errno));
1479465f
GJ
538 break;
539 case TAR_FILETYPE_SYMLINK:
540 symlink_node = m_malloc(sizeof(*symlink_node));
541 symlink_node->next = NULL;
542 tar_entry_copy(&symlink_node->h, &h);
543
544 if (symlink_head)
545 symlink_tail->next = symlink_node;
546 else
547 symlink_head = symlink_node;
548 symlink_tail = symlink_node;
549 status = 0;
550 break;
551 case TAR_FILETYPE_CHARDEV:
552 case TAR_FILETYPE_BLOCKDEV:
553 case TAR_FILETYPE_FIFO:
554 status = ops->mknod(ctx, &h);
29279ac2 555if (status) fprintf(stderr, ";; mknod(%s) failed: %s\n", h.name, strerror(errno));
1479465f
GJ
556 break;
557 case TAR_FILETYPE_GNU_LONGLINK:
558 status = tar_gnu_long(ctx, ops, &h, &next_long_link);
29279ac2 559if (status) fprintf(stderr, ";; long-link(%s) failed: %s\n", h.name, strerror(errno));
1479465f
GJ
560 break;
561 case TAR_FILETYPE_GNU_LONGNAME:
562 status = tar_gnu_long(ctx, ops, &h, &next_long_name);
29279ac2 563if (status) fprintf(stderr, ";; long-name(%s) failed: %s\n", h.name, strerror(errno));
1479465f
GJ
564 break;
565 default:
566 /* Indicates broken tarfile: “Bad header field”. */
567 errno = 0;
568 status = -1;
29279ac2 569fprintf(stderr, ";; `Bad header field'.\n");
1479465f
GJ
570 }
571 tar_entry_destroy(&h);
29279ac2 572 if (status != 0) {
1479465f 573 /* Pass on status from coroutine. */
29279ac2 574fprintf(stderr, ";; bailing due to error\n");
1479465f 575 break;
29279ac2 576 }
1479465f
GJ
577 }
578
579 while (symlink_head) {
580 symlink_node = symlink_head->next;
29279ac2 581 if (status == 0) {
1479465f 582 status = ops->symlink(ctx, &symlink_head->h);
29279ac2
MW
583if (status) fprintf(stderr, ";; symlink(%s) failed: %s\n", symlink_head->h.name, strerror(errno));
584 }
1479465f
GJ
585 tar_entry_destroy(&symlink_head->h);
586 free(symlink_head);
587 symlink_head = symlink_node;
588 }
589 /* Make sure we free the long names, in case of a bogus or truncated
590 * tar archive with long entries not followed by a normal entry. */
591 free(next_long_name);
592 free(next_long_link);
593
594 if (status > 0) {
595 /* Indicates broken tarfile: “Read partial header record”. */
596 errno = 0;
29279ac2 597fprintf(stderr, ";; `Read partial header record'.\n");
1479465f
GJ
598 return -1;
599 } else {
600 /* Return whatever I/O function returned. */
601 return status;
602 }
603}