dpkg (1.18.25) stretch; urgency=medium
[dpkg] / lib / dpkg / tarfn.c
CommitLineData
1479465f
GJ
1/*
2 * libdpkg - Debian packaging suite library routines
3 * tarfn.c - tar archive extraction functions
4 *
5 * Copyright © 1995 Bruce Perens
6 * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org>
7 *
8 * This is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22#include <config.h>
23#include <compat.h>
24
25#include <sys/stat.h>
26
27#include <errno.h>
28#include <string.h>
29#include <pwd.h>
30#include <grp.h>
31#include <unistd.h>
32#include <inttypes.h>
33#include <stdlib.h>
34#include <stdio.h>
35
36#include <dpkg/macros.h>
37#include <dpkg/dpkg.h>
38#include <dpkg/tarfn.h>
39
40#define TAR_MAGIC_USTAR "ustar\0" "00"
41#define TAR_MAGIC_GNU "ustar " " \0"
42
43#define TAR_TYPE_SIGNED(t) (!((t)0 < (t)-1))
44
45#define TAR_TYPE_MIN(t) \
46 (TAR_TYPE_SIGNED(t) ? \
47 ~(t)TAR_TYPE_MAX(t) : \
48 (t)0)
49#define TAR_TYPE_MAX(t) \
50 (TAR_TYPE_SIGNED(t) ? \
51 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
52 ~(t)0)
53
54#define TAR_ATOUL(str, type) \
55 (type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
56#define TAR_ATOSL(str, type) \
57 (type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
58
59struct tar_header {
60 char name[100];
61 char mode[8];
62 char uid[8];
63 char gid[8];
64 char size[12];
65 char mtime[12];
66 char checksum[8];
67 char linkflag;
68 char linkname[100];
69
70 /* Only valid on ustar and gnu. */
71 char magic[8];
72 char user[32];
73 char group[32];
74 char devmajor[8];
75 char devminor[8];
76
77 /* Only valid on ustar. */
78 char prefix[155];
79};
80
81static inline uintmax_t
82tar_ret_errno(int err, uintmax_t ret)
83{
84 errno = err;
85 return ret;
86}
87
88/**
89 * Convert an ASCII octal string to an intmax_t.
90 */
91static uintmax_t
92tar_atol8(const char *s, size_t size)
93{
94 const char *end = s + size;
95 uintmax_t n = 0;
96
97 /* Old implementations might precede the value with spaces. */
98 while (s < end && *s == ' ')
99 s++;
100
101 if (s == end)
102 return tar_ret_errno(EINVAL, 0);
103
104 while (s < end) {
105 if (*s == '\0' || *s == ' ')
106 break;
107 if (*s < '0' || *s > '7')
108 return tar_ret_errno(EINVAL, 0);
109 n = (n * 010) + (*s++ - '0');
110 }
111
112 while (s < end) {
113 if (*s != '\0' && *s != ' ')
114 return tar_ret_errno(EINVAL, 0);
115 s++;
116 }
117
118 if (s < end)
119 return tar_ret_errno(EINVAL, 0);
120
121 return n;
122}
123
124/**
125 * Convert a base-256 two-complement number to an intmax_t.
126 */
127static uintmax_t
128tar_atol256(const char *s, size_t size, intmax_t min, uintmax_t max)
129{
130 uintmax_t n = 0;
131 unsigned char c;
132 int sign;
133
134 /* The encoding always sets the first bit to one, so that it can be
135 * distinguished from the ASCII encoding. For positive numbers we
136 * need to reset it. For negative numbers we initialize n to -1. */
137 c = *s++;
138 if (c == 0x80)
139 c = 0;
140 else
141 n = ~(uintmax_t)0;
142 sign = c;
143
144 /* Check for overflows. */
145 while (size > sizeof(uintmax_t)) {
146 if (c != sign)
147 return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
148 c = *s++;
149 size--;
150 }
151
152 if ((c & 0x80) != (sign & 0x80))
153 return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max);
154
155 for (;;) {
156 n = (n << 8) | c;
157 if (--size <= 0)
158 break;
159 c = *s++;
160 }
161
162 return n;
163}
164
165static uintmax_t
166tar_atol(const char *s, size_t size, intmax_t min, uintmax_t max)
167{
168 const unsigned char *a = (const unsigned char *)s;
169
170 /* Check if it is a long two-complement base-256 number, positive or
171 * negative. */
172 if (*a == 0xff || *a == 0x80)
173 return tar_atol256(s, size, min, max);
174 else
175 return tar_atol8(s, size);
176}
177
178uintmax_t
179tar_atoul(const char *s, size_t size, uintmax_t max)
180{
181 uintmax_t n = tar_atol(s, size, 0, UINTMAX_MAX);
182
183 if (n > max)
184 return tar_ret_errno(ERANGE, UINTMAX_MAX);
185
186 return n;
187}
188
189intmax_t
190tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max)
191{
192 intmax_t n = tar_atol(s, size, INTMAX_MIN, INTMAX_MAX);
193
194 if (n < min)
195 return tar_ret_errno(ERANGE, INTMAX_MIN);
196 if (n > max)
197 return tar_ret_errno(ERANGE, INTMAX_MAX);
198
199 return n;
200}
201
202static char *
203tar_header_get_prefix_name(struct tar_header *h)
204{
205 return str_fmt("%.*s/%.*s", (int)sizeof(h->prefix), h->prefix,
206 (int)sizeof(h->name), h->name);
207}
208
209static mode_t
210tar_header_get_unix_mode(struct tar_header *h)
211{
212 mode_t mode;
213 enum tar_filetype type;
214
215 type = (enum tar_filetype)h->linkflag;
216
217 switch (type) {
218 case TAR_FILETYPE_FILE0:
219 case TAR_FILETYPE_FILE:
220 case TAR_FILETYPE_HARDLINK:
221 mode = S_IFREG;
222 break;
223 case TAR_FILETYPE_SYMLINK:
224 mode = S_IFLNK;
225 break;
226 case TAR_FILETYPE_DIR:
227 mode = S_IFDIR;
228 break;
229 case TAR_FILETYPE_CHARDEV:
230 mode = S_IFCHR;
231 break;
232 case TAR_FILETYPE_BLOCKDEV:
233 mode = S_IFBLK;
234 break;
235 case TAR_FILETYPE_FIFO:
236 mode = S_IFIFO;
237 break;
238 default:
239 mode = 0;
240 break;
241 }
242
243 mode |= TAR_ATOUL(h->mode, mode_t);
244
245 return mode;
246}
247
248static long
249tar_header_checksum(struct tar_header *h)
250{
251 unsigned char *s = (unsigned char *)h;
252 unsigned int i;
253 const size_t checksum_offset = offsetof(struct tar_header, checksum);
254 long sum;
255
256 /* Treat checksum field as all blank. */
257 sum = ' ' * sizeof(h->checksum);
258
259 for (i = checksum_offset; i > 0; i--)
260 sum += *s++;
261
262 /* Skip the real checksum field. */
263 s += sizeof(h->checksum);
264
265 for (i = TARBLKSZ - checksum_offset - sizeof(h->checksum); i > 0; i--)
266 sum += *s++;
267
268 return sum;
269}
270
271static int
272tar_header_decode(struct tar_header *h, struct tar_entry *d)
273{
274 long checksum;
275
276 errno = 0;
277
278 if (memcmp(h->magic, TAR_MAGIC_GNU, 6) == 0)
279 d->format = TAR_FORMAT_GNU;
280 else if (memcmp(h->magic, TAR_MAGIC_USTAR, 6) == 0)
281 d->format = TAR_FORMAT_USTAR;
282 else
283 d->format = TAR_FORMAT_OLD;
284
285 d->type = (enum tar_filetype)h->linkflag;
286 if (d->type == TAR_FILETYPE_FILE0)
287 d->type = TAR_FILETYPE_FILE;
288
289 /* Concatenate prefix and name to support ustar style long names. */
290 if (d->format == TAR_FORMAT_USTAR && h->prefix[0] != '\0')
291 d->name = tar_header_get_prefix_name(h);
292 else
293 d->name = m_strndup(h->name, sizeof(h->name));
294 d->linkname = m_strndup(h->linkname, sizeof(h->linkname));
295 d->stat.mode = tar_header_get_unix_mode(h);
296 /* Even though off_t is signed, we use an unsigned parser here because
297 * negative offsets are not allowed. */
298 d->size = TAR_ATOUL(h->size, off_t);
299 d->mtime = TAR_ATOSL(h->mtime, time_t);
300
301 if (d->type == TAR_FILETYPE_CHARDEV || d->type == TAR_FILETYPE_BLOCKDEV)
302 d->dev = makedev(TAR_ATOUL(h->devmajor, dev_t),
303 TAR_ATOUL(h->devminor, dev_t));
304 else
305 d->dev = makedev(0, 0);
306
307 if (*h->user)
308 d->stat.uname = m_strndup(h->user, sizeof(h->user));
309 else
310 d->stat.uname = NULL;
311 d->stat.uid = TAR_ATOUL(h->uid, uid_t);
312
313 if (*h->group)
314 d->stat.gname = m_strndup(h->group, sizeof(h->group));
315 else
316 d->stat.gname = NULL;
317 d->stat.gid = TAR_ATOUL(h->gid, gid_t);
318
319 checksum = tar_atol8(h->checksum, sizeof(h->checksum));
320
321 /* Check for parse errors. */
322 if (errno)
323 return 0;
324 return tar_header_checksum(h) == checksum;
325}
326
327/**
328 * Decode a GNU longlink or longname from the tar archive.
329 *
330 * The way the GNU long{link,name} stuff works is like this:
331 *
332 * - The first header is a “dummy” header that contains the size of the
333 * filename.
334 * - The next N headers contain the filename.
335 * - After the headers with the filename comes the “real” header with a
336 * bogus name or link.
337 */
338static int
339tar_gnu_long(void *ctx, const struct tar_operations *ops, struct tar_entry *te,
340 char **longp)
341{
342 char buf[TARBLKSZ];
343 char *bp;
344 int status = 0;
345 int long_read;
346
347 free(*longp);
348 *longp = bp = m_malloc(te->size);
349
350 for (long_read = te->size; long_read > 0; long_read -= TARBLKSZ) {
351 int copysize;
352
353 status = ops->read(ctx, buf, TARBLKSZ);
354 if (status == TARBLKSZ)
355 status = 0;
356 else {
357 /* Read partial header record? */
358 if (status > 0) {
359 errno = 0;
360 status = -1;
361 }
362
363 /* If we didn't get TARBLKSZ bytes read, punt. */
364 break;
365 }
366
367 copysize = min(long_read, TARBLKSZ);
368 memcpy(bp, buf, copysize);
369 bp += copysize;
370 }
371
372 return status;
373}
374
375static void
376tar_entry_copy(struct tar_entry *dst, struct tar_entry *src)
377{
378 memcpy(dst, src, sizeof(struct tar_entry));
379
380 dst->name = m_strdup(src->name);
381 dst->linkname = m_strdup(src->linkname);
382
383 if (src->stat.uname)
384 dst->stat.uname = m_strdup(src->stat.uname);
385 if (src->stat.gname)
386 dst->stat.gname = m_strdup(src->stat.gname);
387}
388
389static void
390tar_entry_destroy(struct tar_entry *te)
391{
392 free(te->name);
393 free(te->linkname);
394 free(te->stat.uname);
395 free(te->stat.gname);
396}
397
398struct tar_symlink_entry {
399 struct tar_symlink_entry *next;
400 struct tar_entry h;
401};
402
403/**
404 * Update the tar entry from system information.
405 *
406 * Normalize UID and GID relative to the current system.
407 */
408void
409tar_entry_update_from_system(struct tar_entry *te)
410{
411 struct passwd *passwd;
412 struct group *group;
413
414 if (te->stat.uname) {
415 passwd = getpwnam(te->stat.uname);
416 if (passwd)
417 te->stat.uid = passwd->pw_uid;
418 }
419 if (te->stat.gname) {
420 group = getgrnam(te->stat.gname);
421 if (group)
422 te->stat.gid = group->gr_gid;
423 }
424}
425
426int
427tar_extractor(void *ctx, const struct tar_operations *ops)
428{
429 int status;
430 char buffer[TARBLKSZ];
431 struct tar_entry h;
432
433 char *next_long_name, *next_long_link;
434 struct tar_symlink_entry *symlink_head, *symlink_tail, *symlink_node;
435
436 next_long_name = NULL;
437 next_long_link = NULL;
438 symlink_tail = symlink_head = NULL;
439
440 h.name = NULL;
441 h.linkname = NULL;
442 h.stat.uname = NULL;
443 h.stat.gname = NULL;
444
445 while ((status = ops->read(ctx, buffer, TARBLKSZ)) == TARBLKSZ) {
446 int name_len;
447
448 if (!tar_header_decode((struct tar_header *)buffer, &h)) {
449 if (h.name[0] == '\0') {
450 /* End of tape. */
451 status = 0;
452 } else {
453 /* Indicates broken tarfile:
454 * “Header checksum error”. */
455 errno = 0;
456 status = -1;
457 }
458 tar_entry_destroy(&h);
459 break;
460 }
461 if (h.type != TAR_FILETYPE_GNU_LONGLINK &&
462 h.type != TAR_FILETYPE_GNU_LONGNAME) {
463 if (next_long_name)
464 h.name = next_long_name;
465
466 if (next_long_link)
467 h.linkname = next_long_link;
468
469 next_long_link = NULL;
470 next_long_name = NULL;
471 }
472
473 if (h.name[0] == '\0') {
474 /* Indicates broken tarfile: “Bad header data”. */
475 errno = 0;
476 status = -1;
477 tar_entry_destroy(&h);
478 break;
479 }
480
481 name_len = strlen(h.name);
482
483 switch (h.type) {
484 case TAR_FILETYPE_FILE:
485 /* Compatibility with pre-ANSI ustar. */
486 if (h.name[name_len - 1] != '/') {
487 status = ops->extract_file(ctx, &h);
488 break;
489 }
490 /* Else, fall through. */
491 case TAR_FILETYPE_DIR:
492 if (h.name[name_len - 1] == '/') {
493 h.name[name_len - 1] = '\0';
494 }
495 status = ops->mkdir(ctx, &h);
496 break;
497 case TAR_FILETYPE_HARDLINK:
498 status = ops->link(ctx, &h);
499 break;
500 case TAR_FILETYPE_SYMLINK:
501 symlink_node = m_malloc(sizeof(*symlink_node));
502 symlink_node->next = NULL;
503 tar_entry_copy(&symlink_node->h, &h);
504
505 if (symlink_head)
506 symlink_tail->next = symlink_node;
507 else
508 symlink_head = symlink_node;
509 symlink_tail = symlink_node;
510 status = 0;
511 break;
512 case TAR_FILETYPE_CHARDEV:
513 case TAR_FILETYPE_BLOCKDEV:
514 case TAR_FILETYPE_FIFO:
515 status = ops->mknod(ctx, &h);
516 break;
517 case TAR_FILETYPE_GNU_LONGLINK:
518 status = tar_gnu_long(ctx, ops, &h, &next_long_link);
519 break;
520 case TAR_FILETYPE_GNU_LONGNAME:
521 status = tar_gnu_long(ctx, ops, &h, &next_long_name);
522 break;
523 default:
524 /* Indicates broken tarfile: “Bad header field”. */
525 errno = 0;
526 status = -1;
527 }
528 tar_entry_destroy(&h);
529 if (status != 0)
530 /* Pass on status from coroutine. */
531 break;
532 }
533
534 while (symlink_head) {
535 symlink_node = symlink_head->next;
536 if (status == 0)
537 status = ops->symlink(ctx, &symlink_head->h);
538 tar_entry_destroy(&symlink_head->h);
539 free(symlink_head);
540 symlink_head = symlink_node;
541 }
542 /* Make sure we free the long names, in case of a bogus or truncated
543 * tar archive with long entries not followed by a normal entry. */
544 free(next_long_name);
545 free(next_long_link);
546
547 if (status > 0) {
548 /* Indicates broken tarfile: “Read partial header record”. */
549 errno = 0;
550 return -1;
551 } else {
552 /* Return whatever I/O function returned. */
553 return status;
554 }
555}