Commit | Line | Data |
---|---|---|
1479465f GJ |
1 | /* |
2 | * libdpkg - Debian packaging suite library routines | |
3 | * tarfn.c - tar archive extraction functions | |
4 | * | |
5 | * Copyright © 1995 Bruce Perens | |
6 | * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program. If not, see <https://www.gnu.org/licenses/>. | |
20 | */ | |
21 | ||
22 | #include <config.h> | |
23 | #include <compat.h> | |
24 | ||
25 | #include <sys/stat.h> | |
26 | ||
27 | #include <errno.h> | |
28 | #include <string.h> | |
29 | #include <pwd.h> | |
30 | #include <grp.h> | |
31 | #include <unistd.h> | |
32 | #include <inttypes.h> | |
33 | #include <stdlib.h> | |
34 | #include <stdio.h> | |
35 | ||
36 | #include <dpkg/macros.h> | |
37 | #include <dpkg/dpkg.h> | |
38 | #include <dpkg/tarfn.h> | |
39 | ||
40 | #define TAR_MAGIC_USTAR "ustar\0" "00" | |
41 | #define TAR_MAGIC_GNU "ustar " " \0" | |
42 | ||
43 | #define TAR_TYPE_SIGNED(t) (!((t)0 < (t)-1)) | |
44 | ||
45 | #define TAR_TYPE_MIN(t) \ | |
46 | (TAR_TYPE_SIGNED(t) ? \ | |
47 | ~(t)TAR_TYPE_MAX(t) : \ | |
48 | (t)0) | |
49 | #define TAR_TYPE_MAX(t) \ | |
50 | (TAR_TYPE_SIGNED(t) ? \ | |
51 | ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \ | |
52 | ~(t)0) | |
53 | ||
54 | #define TAR_ATOUL(str, type) \ | |
55 | (type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type)) | |
56 | #define TAR_ATOSL(str, type) \ | |
57 | (type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type)) | |
58 | ||
59 | struct tar_header { | |
60 | char name[100]; | |
61 | char mode[8]; | |
62 | char uid[8]; | |
63 | char gid[8]; | |
64 | char size[12]; | |
65 | char mtime[12]; | |
66 | char checksum[8]; | |
67 | char linkflag; | |
68 | char linkname[100]; | |
69 | ||
70 | /* Only valid on ustar and gnu. */ | |
71 | char magic[8]; | |
72 | char user[32]; | |
73 | char group[32]; | |
74 | char devmajor[8]; | |
75 | char devminor[8]; | |
76 | ||
77 | /* Only valid on ustar. */ | |
78 | char prefix[155]; | |
79 | }; | |
80 | ||
81 | static inline uintmax_t | |
82 | tar_ret_errno(int err, uintmax_t ret) | |
83 | { | |
84 | errno = err; | |
85 | return ret; | |
86 | } | |
87 | ||
88 | /** | |
89 | * Convert an ASCII octal string to an intmax_t. | |
90 | */ | |
91 | static uintmax_t | |
92 | tar_atol8(const char *s, size_t size) | |
93 | { | |
94 | const char *end = s + size; | |
95 | uintmax_t n = 0; | |
96 | ||
97 | /* Old implementations might precede the value with spaces. */ | |
98 | while (s < end && *s == ' ') | |
99 | s++; | |
100 | ||
101 | if (s == end) | |
102 | return tar_ret_errno(EINVAL, 0); | |
103 | ||
104 | while (s < end) { | |
105 | if (*s == '\0' || *s == ' ') | |
106 | break; | |
107 | if (*s < '0' || *s > '7') | |
108 | return tar_ret_errno(EINVAL, 0); | |
109 | n = (n * 010) + (*s++ - '0'); | |
110 | } | |
111 | ||
112 | while (s < end) { | |
113 | if (*s != '\0' && *s != ' ') | |
114 | return tar_ret_errno(EINVAL, 0); | |
115 | s++; | |
116 | } | |
117 | ||
118 | if (s < end) | |
119 | return tar_ret_errno(EINVAL, 0); | |
120 | ||
121 | return n; | |
122 | } | |
123 | ||
124 | /** | |
125 | * Convert a base-256 two-complement number to an intmax_t. | |
126 | */ | |
127 | static uintmax_t | |
128 | tar_atol256(const char *s, size_t size, intmax_t min, uintmax_t max) | |
129 | { | |
130 | uintmax_t n = 0; | |
131 | unsigned char c; | |
132 | int sign; | |
133 | ||
134 | /* The encoding always sets the first bit to one, so that it can be | |
135 | * distinguished from the ASCII encoding. For positive numbers we | |
136 | * need to reset it. For negative numbers we initialize n to -1. */ | |
137 | c = *s++; | |
138 | if (c == 0x80) | |
139 | c = 0; | |
140 | else | |
141 | n = ~(uintmax_t)0; | |
142 | sign = c; | |
143 | ||
144 | /* Check for overflows. */ | |
145 | while (size > sizeof(uintmax_t)) { | |
146 | if (c != sign) | |
147 | return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max); | |
148 | c = *s++; | |
149 | size--; | |
150 | } | |
151 | ||
152 | if ((c & 0x80) != (sign & 0x80)) | |
153 | return tar_ret_errno(ERANGE, sign ? (uintmax_t)min : max); | |
154 | ||
155 | for (;;) { | |
156 | n = (n << 8) | c; | |
157 | if (--size <= 0) | |
158 | break; | |
159 | c = *s++; | |
160 | } | |
161 | ||
162 | return n; | |
163 | } | |
164 | ||
165 | static uintmax_t | |
166 | tar_atol(const char *s, size_t size, intmax_t min, uintmax_t max) | |
167 | { | |
168 | const unsigned char *a = (const unsigned char *)s; | |
169 | ||
170 | /* Check if it is a long two-complement base-256 number, positive or | |
171 | * negative. */ | |
172 | if (*a == 0xff || *a == 0x80) | |
173 | return tar_atol256(s, size, min, max); | |
174 | else | |
175 | return tar_atol8(s, size); | |
176 | } | |
177 | ||
178 | uintmax_t | |
179 | tar_atoul(const char *s, size_t size, uintmax_t max) | |
180 | { | |
181 | uintmax_t n = tar_atol(s, size, 0, UINTMAX_MAX); | |
182 | ||
183 | if (n > max) | |
184 | return tar_ret_errno(ERANGE, UINTMAX_MAX); | |
185 | ||
186 | return n; | |
187 | } | |
188 | ||
189 | intmax_t | |
190 | tar_atosl(const char *s, size_t size, intmax_t min, intmax_t max) | |
191 | { | |
192 | intmax_t n = tar_atol(s, size, INTMAX_MIN, INTMAX_MAX); | |
193 | ||
194 | if (n < min) | |
195 | return tar_ret_errno(ERANGE, INTMAX_MIN); | |
196 | if (n > max) | |
197 | return tar_ret_errno(ERANGE, INTMAX_MAX); | |
198 | ||
199 | return n; | |
200 | } | |
201 | ||
202 | static char * | |
203 | tar_header_get_prefix_name(struct tar_header *h) | |
204 | { | |
205 | return str_fmt("%.*s/%.*s", (int)sizeof(h->prefix), h->prefix, | |
206 | (int)sizeof(h->name), h->name); | |
207 | } | |
208 | ||
209 | static mode_t | |
210 | tar_header_get_unix_mode(struct tar_header *h) | |
211 | { | |
212 | mode_t mode; | |
213 | enum tar_filetype type; | |
214 | ||
215 | type = (enum tar_filetype)h->linkflag; | |
216 | ||
217 | switch (type) { | |
218 | case TAR_FILETYPE_FILE0: | |
219 | case TAR_FILETYPE_FILE: | |
220 | case TAR_FILETYPE_HARDLINK: | |
221 | mode = S_IFREG; | |
222 | break; | |
223 | case TAR_FILETYPE_SYMLINK: | |
224 | mode = S_IFLNK; | |
225 | break; | |
226 | case TAR_FILETYPE_DIR: | |
227 | mode = S_IFDIR; | |
228 | break; | |
229 | case TAR_FILETYPE_CHARDEV: | |
230 | mode = S_IFCHR; | |
231 | break; | |
232 | case TAR_FILETYPE_BLOCKDEV: | |
233 | mode = S_IFBLK; | |
234 | break; | |
235 | case TAR_FILETYPE_FIFO: | |
236 | mode = S_IFIFO; | |
237 | break; | |
238 | default: | |
239 | mode = 0; | |
240 | break; | |
241 | } | |
242 | ||
243 | mode |= TAR_ATOUL(h->mode, mode_t); | |
244 | ||
245 | return mode; | |
246 | } | |
247 | ||
248 | static long | |
249 | tar_header_checksum(struct tar_header *h) | |
250 | { | |
251 | unsigned char *s = (unsigned char *)h; | |
252 | unsigned int i; | |
253 | const size_t checksum_offset = offsetof(struct tar_header, checksum); | |
254 | long sum; | |
255 | ||
256 | /* Treat checksum field as all blank. */ | |
257 | sum = ' ' * sizeof(h->checksum); | |
258 | ||
259 | for (i = checksum_offset; i > 0; i--) | |
260 | sum += *s++; | |
261 | ||
262 | /* Skip the real checksum field. */ | |
263 | s += sizeof(h->checksum); | |
264 | ||
265 | for (i = TARBLKSZ - checksum_offset - sizeof(h->checksum); i > 0; i--) | |
266 | sum += *s++; | |
267 | ||
268 | return sum; | |
269 | } | |
270 | ||
271 | static int | |
272 | tar_header_decode(struct tar_header *h, struct tar_entry *d) | |
273 | { | |
274 | long checksum; | |
275 | ||
276 | errno = 0; | |
277 | ||
278 | if (memcmp(h->magic, TAR_MAGIC_GNU, 6) == 0) | |
279 | d->format = TAR_FORMAT_GNU; | |
280 | else if (memcmp(h->magic, TAR_MAGIC_USTAR, 6) == 0) | |
281 | d->format = TAR_FORMAT_USTAR; | |
282 | else | |
283 | d->format = TAR_FORMAT_OLD; | |
284 | ||
285 | d->type = (enum tar_filetype)h->linkflag; | |
286 | if (d->type == TAR_FILETYPE_FILE0) | |
287 | d->type = TAR_FILETYPE_FILE; | |
288 | ||
289 | /* Concatenate prefix and name to support ustar style long names. */ | |
290 | if (d->format == TAR_FORMAT_USTAR && h->prefix[0] != '\0') | |
291 | d->name = tar_header_get_prefix_name(h); | |
292 | else | |
293 | d->name = m_strndup(h->name, sizeof(h->name)); | |
294 | d->linkname = m_strndup(h->linkname, sizeof(h->linkname)); | |
295 | d->stat.mode = tar_header_get_unix_mode(h); | |
296 | /* Even though off_t is signed, we use an unsigned parser here because | |
297 | * negative offsets are not allowed. */ | |
298 | d->size = TAR_ATOUL(h->size, off_t); | |
299 | d->mtime = TAR_ATOSL(h->mtime, time_t); | |
300 | ||
301 | if (d->type == TAR_FILETYPE_CHARDEV || d->type == TAR_FILETYPE_BLOCKDEV) | |
302 | d->dev = makedev(TAR_ATOUL(h->devmajor, dev_t), | |
303 | TAR_ATOUL(h->devminor, dev_t)); | |
304 | else | |
305 | d->dev = makedev(0, 0); | |
306 | ||
307 | if (*h->user) | |
308 | d->stat.uname = m_strndup(h->user, sizeof(h->user)); | |
309 | else | |
310 | d->stat.uname = NULL; | |
311 | d->stat.uid = TAR_ATOUL(h->uid, uid_t); | |
312 | ||
313 | if (*h->group) | |
314 | d->stat.gname = m_strndup(h->group, sizeof(h->group)); | |
315 | else | |
316 | d->stat.gname = NULL; | |
317 | d->stat.gid = TAR_ATOUL(h->gid, gid_t); | |
318 | ||
319 | checksum = tar_atol8(h->checksum, sizeof(h->checksum)); | |
320 | ||
321 | /* Check for parse errors. */ | |
322 | if (errno) | |
323 | return 0; | |
324 | return tar_header_checksum(h) == checksum; | |
325 | } | |
326 | ||
327 | /** | |
328 | * Decode a GNU longlink or longname from the tar archive. | |
329 | * | |
330 | * The way the GNU long{link,name} stuff works is like this: | |
331 | * | |
332 | * - The first header is a “dummy” header that contains the size of the | |
333 | * filename. | |
334 | * - The next N headers contain the filename. | |
335 | * - After the headers with the filename comes the “real” header with a | |
336 | * bogus name or link. | |
337 | */ | |
338 | static int | |
339 | tar_gnu_long(void *ctx, const struct tar_operations *ops, struct tar_entry *te, | |
340 | char **longp) | |
341 | { | |
342 | char buf[TARBLKSZ]; | |
343 | char *bp; | |
344 | int status = 0; | |
345 | int long_read; | |
346 | ||
347 | free(*longp); | |
348 | *longp = bp = m_malloc(te->size); | |
349 | ||
350 | for (long_read = te->size; long_read > 0; long_read -= TARBLKSZ) { | |
351 | int copysize; | |
352 | ||
353 | status = ops->read(ctx, buf, TARBLKSZ); | |
354 | if (status == TARBLKSZ) | |
355 | status = 0; | |
356 | else { | |
357 | /* Read partial header record? */ | |
358 | if (status > 0) { | |
359 | errno = 0; | |
360 | status = -1; | |
361 | } | |
362 | ||
363 | /* If we didn't get TARBLKSZ bytes read, punt. */ | |
364 | break; | |
365 | } | |
366 | ||
367 | copysize = min(long_read, TARBLKSZ); | |
368 | memcpy(bp, buf, copysize); | |
369 | bp += copysize; | |
370 | } | |
371 | ||
372 | return status; | |
373 | } | |
374 | ||
375 | static void | |
376 | tar_entry_copy(struct tar_entry *dst, struct tar_entry *src) | |
377 | { | |
378 | memcpy(dst, src, sizeof(struct tar_entry)); | |
379 | ||
380 | dst->name = m_strdup(src->name); | |
381 | dst->linkname = m_strdup(src->linkname); | |
382 | ||
383 | if (src->stat.uname) | |
384 | dst->stat.uname = m_strdup(src->stat.uname); | |
385 | if (src->stat.gname) | |
386 | dst->stat.gname = m_strdup(src->stat.gname); | |
387 | } | |
388 | ||
389 | static void | |
390 | tar_entry_destroy(struct tar_entry *te) | |
391 | { | |
392 | free(te->name); | |
393 | free(te->linkname); | |
394 | free(te->stat.uname); | |
395 | free(te->stat.gname); | |
396 | } | |
397 | ||
398 | struct tar_symlink_entry { | |
399 | struct tar_symlink_entry *next; | |
400 | struct tar_entry h; | |
401 | }; | |
402 | ||
403 | /** | |
404 | * Update the tar entry from system information. | |
405 | * | |
406 | * Normalize UID and GID relative to the current system. | |
407 | */ | |
408 | void | |
409 | tar_entry_update_from_system(struct tar_entry *te) | |
410 | { | |
411 | struct passwd *passwd; | |
412 | struct group *group; | |
413 | ||
414 | if (te->stat.uname) { | |
415 | passwd = getpwnam(te->stat.uname); | |
416 | if (passwd) | |
417 | te->stat.uid = passwd->pw_uid; | |
418 | } | |
419 | if (te->stat.gname) { | |
420 | group = getgrnam(te->stat.gname); | |
421 | if (group) | |
422 | te->stat.gid = group->gr_gid; | |
423 | } | |
424 | } | |
425 | ||
426 | int | |
427 | tar_extractor(void *ctx, const struct tar_operations *ops) | |
428 | { | |
429 | int status; | |
430 | char buffer[TARBLKSZ]; | |
431 | struct tar_entry h; | |
432 | ||
433 | char *next_long_name, *next_long_link; | |
434 | struct tar_symlink_entry *symlink_head, *symlink_tail, *symlink_node; | |
435 | ||
436 | next_long_name = NULL; | |
437 | next_long_link = NULL; | |
438 | symlink_tail = symlink_head = NULL; | |
439 | ||
440 | h.name = NULL; | |
441 | h.linkname = NULL; | |
442 | h.stat.uname = NULL; | |
443 | h.stat.gname = NULL; | |
444 | ||
445 | while ((status = ops->read(ctx, buffer, TARBLKSZ)) == TARBLKSZ) { | |
446 | int name_len; | |
447 | ||
448 | if (!tar_header_decode((struct tar_header *)buffer, &h)) { | |
449 | if (h.name[0] == '\0') { | |
450 | /* End of tape. */ | |
451 | status = 0; | |
452 | } else { | |
453 | /* Indicates broken tarfile: | |
454 | * “Header checksum error”. */ | |
455 | errno = 0; | |
456 | status = -1; | |
457 | } | |
458 | tar_entry_destroy(&h); | |
459 | break; | |
460 | } | |
461 | if (h.type != TAR_FILETYPE_GNU_LONGLINK && | |
462 | h.type != TAR_FILETYPE_GNU_LONGNAME) { | |
463 | if (next_long_name) | |
464 | h.name = next_long_name; | |
465 | ||
466 | if (next_long_link) | |
467 | h.linkname = next_long_link; | |
468 | ||
469 | next_long_link = NULL; | |
470 | next_long_name = NULL; | |
471 | } | |
472 | ||
473 | if (h.name[0] == '\0') { | |
474 | /* Indicates broken tarfile: “Bad header data”. */ | |
475 | errno = 0; | |
476 | status = -1; | |
477 | tar_entry_destroy(&h); | |
478 | break; | |
479 | } | |
480 | ||
481 | name_len = strlen(h.name); | |
482 | ||
483 | switch (h.type) { | |
484 | case TAR_FILETYPE_FILE: | |
485 | /* Compatibility with pre-ANSI ustar. */ | |
486 | if (h.name[name_len - 1] != '/') { | |
487 | status = ops->extract_file(ctx, &h); | |
488 | break; | |
489 | } | |
490 | /* Else, fall through. */ | |
491 | case TAR_FILETYPE_DIR: | |
492 | if (h.name[name_len - 1] == '/') { | |
493 | h.name[name_len - 1] = '\0'; | |
494 | } | |
495 | status = ops->mkdir(ctx, &h); | |
496 | break; | |
497 | case TAR_FILETYPE_HARDLINK: | |
498 | status = ops->link(ctx, &h); | |
499 | break; | |
500 | case TAR_FILETYPE_SYMLINK: | |
501 | symlink_node = m_malloc(sizeof(*symlink_node)); | |
502 | symlink_node->next = NULL; | |
503 | tar_entry_copy(&symlink_node->h, &h); | |
504 | ||
505 | if (symlink_head) | |
506 | symlink_tail->next = symlink_node; | |
507 | else | |
508 | symlink_head = symlink_node; | |
509 | symlink_tail = symlink_node; | |
510 | status = 0; | |
511 | break; | |
512 | case TAR_FILETYPE_CHARDEV: | |
513 | case TAR_FILETYPE_BLOCKDEV: | |
514 | case TAR_FILETYPE_FIFO: | |
515 | status = ops->mknod(ctx, &h); | |
516 | break; | |
517 | case TAR_FILETYPE_GNU_LONGLINK: | |
518 | status = tar_gnu_long(ctx, ops, &h, &next_long_link); | |
519 | break; | |
520 | case TAR_FILETYPE_GNU_LONGNAME: | |
521 | status = tar_gnu_long(ctx, ops, &h, &next_long_name); | |
522 | break; | |
523 | default: | |
524 | /* Indicates broken tarfile: “Bad header field”. */ | |
525 | errno = 0; | |
526 | status = -1; | |
527 | } | |
528 | tar_entry_destroy(&h); | |
529 | if (status != 0) | |
530 | /* Pass on status from coroutine. */ | |
531 | break; | |
532 | } | |
533 | ||
534 | while (symlink_head) { | |
535 | symlink_node = symlink_head->next; | |
536 | if (status == 0) | |
537 | status = ops->symlink(ctx, &symlink_head->h); | |
538 | tar_entry_destroy(&symlink_head->h); | |
539 | free(symlink_head); | |
540 | symlink_head = symlink_node; | |
541 | } | |
542 | /* Make sure we free the long names, in case of a bogus or truncated | |
543 | * tar archive with long entries not followed by a normal entry. */ | |
544 | free(next_long_name); | |
545 | free(next_long_link); | |
546 | ||
547 | if (status > 0) { | |
548 | /* Indicates broken tarfile: “Read partial header record”. */ | |
549 | errno = 0; | |
550 | return -1; | |
551 | } else { | |
552 | /* Return whatever I/O function returned. */ | |
553 | return status; | |
554 | } | |
555 | } |