2 * libdpkg - Debian packaging suite library routines
3 * tarfn.c - tar archive extraction functions
5 * Copyright © 1995 Bruce Perens
6 * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org>
8 * This is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
36 #include <dpkg/macros.h>
37 #include <dpkg/dpkg.h>
38 #include <dpkg/tarfn.h>
40 #define TAR_MAGIC_USTAR "ustar\0" "00"
41 #define TAR_MAGIC_GNU "ustar " " \0"
43 #define TAR_TYPE_SIGNED(t) (!((t)0 < (t)-1))
45 #define TAR_TYPE_MIN(t) \
46 (TAR_TYPE_SIGNED(t) ? \
47 ~(t)TAR_TYPE_MAX(t) : \
49 #define TAR_TYPE_MAX(t) \
50 (TAR_TYPE_SIGNED(t) ? \
51 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
54 #define TAR_ATOUL(str, type) \
55 (type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
56 #define TAR_ATOSL(str, type) \
57 (type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
70 /* Only valid on ustar and gnu. */
77 /* Only valid on ustar. */
81 static inline uintmax_t
82 tar_ret_errno(int err
, uintmax_t ret
)
89 * Convert an ASCII octal string to an intmax_t.
92 tar_atol8(const char *s
, size_t size
)
94 const char *end
= s
+ size
;
97 /* Old implementations might precede the value with spaces. */
98 while (s
< end
&& *s
== ' ')
102 return tar_ret_errno(EINVAL
, 0);
105 if (*s
== '\0' || *s
== ' ')
107 if (*s
< '0' || *s
> '7')
108 return tar_ret_errno(EINVAL
, 0);
109 n
= (n
* 010) + (*s
++ - '0');
113 if (*s
!= '\0' && *s
!= ' ')
114 return tar_ret_errno(EINVAL
, 0);
119 return tar_ret_errno(EINVAL
, 0);
125 * Convert a base-256 two-complement number to an intmax_t.
128 tar_atol256(const char *s
, size_t size
, intmax_t min
, uintmax_t max
)
134 /* The encoding always sets the first bit to one, so that it can be
135 * distinguished from the ASCII encoding. For positive numbers we
136 * need to reset it. For negative numbers we initialize n to -1. */
144 /* Check for overflows. */
145 while (size
> sizeof(uintmax_t)) {
147 return tar_ret_errno(ERANGE
, sign ?
(uintmax_t)min
: max
);
152 if ((c
& 0x80) != (sign
& 0x80))
153 return tar_ret_errno(ERANGE
, sign ?
(uintmax_t)min
: max
);
166 tar_atol(const char *s
, size_t size
, intmax_t min
, uintmax_t max
)
168 const unsigned char *a
= (const unsigned char *)s
;
170 /* Check if it is a long two-complement base-256 number, positive or
172 if (*a
== 0xff || *a
== 0x80)
173 return tar_atol256(s
, size
, min
, max
);
175 return tar_atol8(s
, size
);
179 tar_atoul(const char *s
, size_t size
, uintmax_t max
)
181 uintmax_t n
= tar_atol(s
, size
, 0, UINTMAX_MAX
);
184 return tar_ret_errno(ERANGE
, UINTMAX_MAX
);
190 tar_atosl(const char *s
, size_t size
, intmax_t min
, intmax_t max
)
192 intmax_t n
= tar_atol(s
, size
, INTMAX_MIN
, INTMAX_MAX
);
195 return tar_ret_errno(ERANGE
, INTMAX_MIN
);
197 return tar_ret_errno(ERANGE
, INTMAX_MAX
);
203 tar_header_get_prefix_name(struct tar_header
*h
)
205 return str_fmt("%.*s/%.*s", (int)sizeof(h
->prefix
), h
->prefix
,
206 (int)sizeof(h
->name
), h
->name
);
210 tar_header_get_unix_mode(struct tar_header
*h
)
213 enum tar_filetype type
;
215 type
= (enum tar_filetype
)h
->linkflag
;
218 case TAR_FILETYPE_FILE0
:
219 case TAR_FILETYPE_FILE
:
220 case TAR_FILETYPE_HARDLINK
:
223 case TAR_FILETYPE_SYMLINK
:
226 case TAR_FILETYPE_DIR
:
229 case TAR_FILETYPE_CHARDEV
:
232 case TAR_FILETYPE_BLOCKDEV
:
235 case TAR_FILETYPE_FIFO
:
243 mode
|= TAR_ATOUL(h
->mode
, mode_t
);
249 tar_header_checksum(struct tar_header
*h
)
251 unsigned char *s
= (unsigned char *)h
;
253 const size_t checksum_offset
= offsetof(struct tar_header
, checksum
);
256 /* Treat checksum field as all blank. */
257 sum
= ' ' * sizeof(h
->checksum
);
259 for (i
= checksum_offset
; i
> 0; i
--)
262 /* Skip the real checksum field. */
263 s
+= sizeof(h
->checksum
);
265 for (i
= TARBLKSZ
- checksum_offset
- sizeof(h
->checksum
); i
> 0; i
--)
272 tar_header_decode(struct tar_header
*h
, struct tar_entry
*d
)
278 if (memcmp(h
->magic
, TAR_MAGIC_GNU
, 6) == 0)
279 d
->format
= TAR_FORMAT_GNU
;
280 else if (memcmp(h
->magic
, TAR_MAGIC_USTAR
, 6) == 0)
281 d
->format
= TAR_FORMAT_USTAR
;
283 d
->format
= TAR_FORMAT_OLD
;
285 d
->type
= (enum tar_filetype
)h
->linkflag
;
286 if (d
->type
== TAR_FILETYPE_FILE0
)
287 d
->type
= TAR_FILETYPE_FILE
;
289 /* Concatenate prefix and name to support ustar style long names. */
290 if (d
->format
== TAR_FORMAT_USTAR
&& h
->prefix
[0] != '\0')
291 d
->name
= tar_header_get_prefix_name(h
);
293 d
->name
= m_strndup(h
->name
, sizeof(h
->name
));
294 d
->linkname
= m_strndup(h
->linkname
, sizeof(h
->linkname
));
295 d
->stat
.mode
= tar_header_get_unix_mode(h
);
296 /* Even though off_t is signed, we use an unsigned parser here because
297 * negative offsets are not allowed. */
298 d
->size
= TAR_ATOUL(h
->size
, off_t
);
299 d
->mtime
= TAR_ATOSL(h
->mtime
, time_t);
301 if (d
->type
== TAR_FILETYPE_CHARDEV
|| d
->type
== TAR_FILETYPE_BLOCKDEV
)
302 d
->dev
= makedev(TAR_ATOUL(h
->devmajor
, dev_t
),
303 TAR_ATOUL(h
->devminor
, dev_t
));
305 d
->dev
= makedev(0, 0);
308 d
->stat
.uname
= m_strndup(h
->user
, sizeof(h
->user
));
310 d
->stat
.uname
= NULL
;
311 d
->stat
.uid
= TAR_ATOUL(h
->uid
, uid_t
);
314 d
->stat
.gname
= m_strndup(h
->group
, sizeof(h
->group
));
316 d
->stat
.gname
= NULL
;
317 d
->stat
.gid
= TAR_ATOUL(h
->gid
, gid_t
);
319 checksum
= tar_atol8(h
->checksum
, sizeof(h
->checksum
));
321 /* Check for parse errors. */
324 return tar_header_checksum(h
) == checksum
;
328 * Decode a GNU longlink or longname from the tar archive.
330 * The way the GNU long{link,name} stuff works is like this:
332 * - The first header is a “dummy” header that contains the size of the
334 * - The next N headers contain the filename.
335 * - After the headers with the filename comes the “real” header with a
336 * bogus name or link.
339 tar_gnu_long(void *ctx
, const struct tar_operations
*ops
, struct tar_entry
*te
,
348 *longp
= bp
= m_malloc(te
->size
);
350 for (long_read
= te
->size
; long_read
> 0; long_read
-= TARBLKSZ
) {
353 status
= ops
->read(ctx
, buf
, TARBLKSZ
);
354 if (status
== TARBLKSZ
)
357 /* Read partial header record? */
363 /* If we didn't get TARBLKSZ bytes read, punt. */
367 copysize
= min(long_read
, TARBLKSZ
);
368 memcpy(bp
, buf
, copysize
);
376 tar_entry_copy(struct tar_entry
*dst
, struct tar_entry
*src
)
378 memcpy(dst
, src
, sizeof(struct tar_entry
));
380 dst
->name
= m_strdup(src
->name
);
381 dst
->linkname
= m_strdup(src
->linkname
);
384 dst
->stat
.uname
= m_strdup(src
->stat
.uname
);
386 dst
->stat
.gname
= m_strdup(src
->stat
.gname
);
390 tar_entry_destroy(struct tar_entry
*te
)
394 free(te
->stat
.uname
);
395 free(te
->stat
.gname
);
398 struct tar_symlink_entry
{
399 struct tar_symlink_entry
*next
;
404 * Update the tar entry from system information.
406 * Normalize UID and GID relative to the current system.
409 tar_entry_update_from_system(struct tar_entry
*te
)
411 struct passwd
*passwd
;
414 if (te
->stat
.uname
) {
415 passwd
= getpwnam(te
->stat
.uname
);
417 te
->stat
.uid
= passwd
->pw_uid
;
419 if (te
->stat
.gname
) {
420 group
= getgrnam(te
->stat
.gname
);
422 te
->stat
.gid
= group
->gr_gid
;
427 tar_extractor(void *ctx
, const struct tar_operations
*ops
)
430 char buffer
[TARBLKSZ
];
433 char *next_long_name
, *next_long_link
;
434 struct tar_symlink_entry
*symlink_head
, *symlink_tail
, *symlink_node
;
436 next_long_name
= NULL
;
437 next_long_link
= NULL
;
438 symlink_tail
= symlink_head
= NULL
;
445 while ((status
= ops
->read(ctx
, buffer
, TARBLKSZ
)) == TARBLKSZ
) {
448 if (!tar_header_decode((struct tar_header
*)buffer
, &h
)) {
449 if (h
.name
[0] == '\0') {
453 /* Indicates broken tarfile:
454 * “Header checksum error”. */
458 tar_entry_destroy(&h
);
461 if (h
.type
!= TAR_FILETYPE_GNU_LONGLINK
&&
462 h
.type
!= TAR_FILETYPE_GNU_LONGNAME
) {
464 h
.name
= next_long_name
;
467 h
.linkname
= next_long_link
;
469 next_long_link
= NULL
;
470 next_long_name
= NULL
;
473 if (h
.name
[0] == '\0') {
474 /* Indicates broken tarfile: “Bad header data”. */
477 tar_entry_destroy(&h
);
481 name_len
= strlen(h
.name
);
484 case TAR_FILETYPE_FILE
:
485 /* Compatibility with pre-ANSI ustar. */
486 if (h
.name
[name_len
- 1] != '/') {
487 status
= ops
->extract_file(ctx
, &h
);
490 /* Else, fall through. */
491 case TAR_FILETYPE_DIR
:
492 if (h
.name
[name_len
- 1] == '/') {
493 h
.name
[name_len
- 1] = '\0';
495 status
= ops
->mkdir(ctx
, &h
);
497 case TAR_FILETYPE_HARDLINK
:
498 status
= ops
->link(ctx
, &h
);
500 case TAR_FILETYPE_SYMLINK
:
501 symlink_node
= m_malloc(sizeof(*symlink_node
));
502 symlink_node
->next
= NULL
;
503 tar_entry_copy(&symlink_node
->h
, &h
);
506 symlink_tail
->next
= symlink_node
;
508 symlink_head
= symlink_node
;
509 symlink_tail
= symlink_node
;
512 case TAR_FILETYPE_CHARDEV
:
513 case TAR_FILETYPE_BLOCKDEV
:
514 case TAR_FILETYPE_FIFO
:
515 status
= ops
->mknod(ctx
, &h
);
517 case TAR_FILETYPE_GNU_LONGLINK
:
518 status
= tar_gnu_long(ctx
, ops
, &h
, &next_long_link
);
520 case TAR_FILETYPE_GNU_LONGNAME
:
521 status
= tar_gnu_long(ctx
, ops
, &h
, &next_long_name
);
524 /* Indicates broken tarfile: “Bad header field”. */
528 tar_entry_destroy(&h
);
530 /* Pass on status from coroutine. */
534 while (symlink_head
) {
535 symlink_node
= symlink_head
->next
;
537 status
= ops
->symlink(ctx
, &symlink_head
->h
);
538 tar_entry_destroy(&symlink_head
->h
);
540 symlink_head
= symlink_node
;
542 /* Make sure we free the long names, in case of a bogus or truncated
543 * tar archive with long entries not followed by a normal entry. */
544 free(next_long_name
);
545 free(next_long_link
);
548 /* Indicates broken tarfile: “Read partial header record”. */
552 /* Return whatever I/O function returned. */