Commit | Line | Data |
---|---|---|
1479465f GJ |
1 | # Copyright © 2007-2010 Raphaël Hertzog <hertzog@debian.org> |
2 | # | |
3 | # This program is free software; you can redistribute it and/or modify | |
4 | # it under the terms of the GNU General Public License as published by | |
5 | # the Free Software Foundation; either version 2 of the License, or | |
6 | # (at your option) any later version. | |
7 | # | |
8 | # This program is distributed in the hope that it will be useful, | |
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 | # GNU General Public License for more details. | |
12 | # | |
13 | # You should have received a copy of the GNU General Public License | |
14 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
15 | ||
16 | package Dpkg::Shlibs::Objdump; | |
17 | ||
18 | use strict; | |
19 | use warnings; | |
20 | use feature qw(state); | |
21 | ||
22 | our $VERSION = '0.01'; | |
23 | ||
24 | use Dpkg::Gettext; | |
25 | use Dpkg::ErrorHandling; | |
26 | use Dpkg::Path qw(find_command); | |
27 | use Dpkg::Arch qw(debarch_to_gnutriplet get_build_arch get_host_arch); | |
28 | use Dpkg::IPC; | |
29 | ||
30 | # Decide which objdump to call | |
31 | our $OBJDUMP = 'objdump'; | |
32 | if (get_build_arch() ne get_host_arch()) { | |
33 | my $od = debarch_to_gnutriplet(get_host_arch()) . '-objdump'; | |
34 | $OBJDUMP = $od if find_command($od); | |
35 | } | |
36 | ||
37 | ||
38 | sub new { | |
39 | my $this = shift; | |
40 | my $class = ref($this) || $this; | |
41 | my $self = { objects => {} }; | |
42 | bless $self, $class; | |
43 | return $self; | |
44 | } | |
45 | ||
46 | sub add_object { | |
47 | my ($self, $obj) = @_; | |
48 | my $id = $obj->get_id; | |
49 | if ($id) { | |
50 | $self->{objects}{$id} = $obj; | |
51 | } | |
52 | return $id; | |
53 | } | |
54 | ||
55 | sub analyze { | |
56 | my ($self, $file) = @_; | |
57 | my $obj = Dpkg::Shlibs::Objdump::Object->new($file); | |
58 | ||
59 | return $self->add_object($obj); | |
60 | } | |
61 | ||
62 | sub locate_symbol { | |
63 | my ($self, $name) = @_; | |
64 | foreach my $obj (values %{$self->{objects}}) { | |
65 | my $sym = $obj->get_symbol($name); | |
66 | if (defined($sym) && $sym->{defined}) { | |
67 | return $sym; | |
68 | } | |
69 | } | |
70 | return; | |
71 | } | |
72 | ||
73 | sub get_object { | |
74 | my ($self, $objid) = @_; | |
75 | if ($self->has_object($objid)) { | |
76 | return $self->{objects}{$objid}; | |
77 | } | |
78 | return; | |
79 | } | |
80 | ||
81 | sub has_object { | |
82 | my ($self, $objid) = @_; | |
83 | return exists $self->{objects}{$objid}; | |
84 | } | |
85 | ||
86 | use constant { | |
87 | ELF_BITS_NONE => 0, | |
88 | ELF_BITS_32 => 1, | |
89 | ELF_BITS_64 => 2, | |
90 | ||
91 | ELF_ORDER_NONE => 0, | |
92 | ELF_ORDER_2LSB => 1, | |
93 | ELF_ORDER_2MSB => 2, | |
94 | ||
95 | ELF_MACH_SPARC => 2, | |
96 | ELF_MACH_MIPS => 8, | |
97 | ELF_MACH_SPARC64_OLD => 11, | |
98 | ELF_MACH_SPARC32PLUS => 18, | |
99 | ELF_MACH_PPC64 => 21, | |
100 | ELF_MACH_S390 => 22, | |
101 | ELF_MACH_ARM => 40, | |
102 | ELF_MACH_ALPHA_OLD => 41, | |
103 | ELF_MACH_SH => 42, | |
104 | ELF_MACH_SPARC64 => 43, | |
105 | ELF_MACH_IA64 => 50, | |
106 | ELF_MACH_AVR => 83, | |
107 | ELF_MACH_M32R => 88, | |
108 | ELF_MACH_MN10300 => 89, | |
109 | ELF_MACH_MN10200 => 90, | |
110 | ELF_MACH_OR1K => 92, | |
111 | ELF_MACH_XTENSA => 94, | |
112 | ELF_MACH_MICROBLAZE => 189, | |
113 | ELF_MACH_AVR_OLD => 0x1057, | |
114 | ELF_MACH_OR1K_OLD => 0x8472, | |
115 | ELF_MACH_ALPHA => 0x9026, | |
116 | ELF_MACH_M32R_CYGNUS => 0x9041, | |
117 | ELF_MACH_S390_OLD => 0xa390, | |
118 | ELF_MACH_XTENSA_OLD => 0xabc7, | |
119 | ELF_MACH_MICROBLAZE_OLD => 0xbaab, | |
120 | ELF_MACH_MN10300_CYGNUS => 0xbeef, | |
121 | ELF_MACH_MN10200_CYGNUS => 0xdead, | |
122 | ||
123 | ELF_VERSION_NONE => 0, | |
124 | ELF_VERSION_CURRENT => 1, | |
125 | ||
126 | # List of processor flags that might influence the ABI. | |
127 | ||
128 | ELF_FLAG_ARM_ALIGN8 => 0x00000040, | |
129 | ELF_FLAG_ARM_NEW_ABI => 0x00000080, | |
130 | ELF_FLAG_ARM_OLD_ABI => 0x00000100, | |
131 | ELF_FLAG_ARM_SOFT_FLOAT => 0x00000200, | |
132 | ELF_FLAG_ARM_HARD_FLOAT => 0x00000400, | |
133 | ELF_FLAG_ARM_EABI_MASK => 0xff000000, | |
134 | ||
135 | ELF_FLAG_IA64_ABI64 => 0x00000010, | |
136 | ||
137 | ELF_FLAG_MIPS_ABI2 => 0x00000020, | |
138 | ELF_FLAG_MIPS_32BIT => 0x00000100, | |
139 | ELF_FLAG_MIPS_FP64 => 0x00000200, | |
140 | ELF_FLAG_MIPS_NAN2008 => 0x00000400, | |
141 | ELF_FLAG_MIPS_ABI_MASK => 0x0000f000, | |
142 | ELF_FLAG_MIPS_ARCH_MASK => 0xf0000000, | |
143 | ||
144 | ELF_FLAG_PPC64_ABI64 => 0x00000003, | |
145 | ||
146 | ELF_FLAG_SH_MACH_MASK => 0x0000001f, | |
147 | }; | |
148 | ||
149 | # These map alternative or old machine IDs to their canonical form. | |
150 | my %elf_mach_map = ( | |
151 | ELF_MACH_ALPHA_OLD() => ELF_MACH_ALPHA, | |
152 | ELF_MACH_AVR_OLD() => ELF_MACH_AVR, | |
153 | ELF_MACH_M32R_CYGNUS() => ELF_MACH_M32R, | |
154 | ELF_MACH_MICROBLAZE_OLD() => ELF_MACH_MICROBLAZE, | |
155 | ELF_MACH_MN10200_CYGNUS() => ELF_MACH_MN10200, | |
156 | ELF_MACH_MN10300_CYGNUS() => ELF_MACH_MN10300, | |
157 | ELF_MACH_OR1K_OLD() => ELF_MACH_OR1K, | |
158 | ELF_MACH_S390_OLD() => ELF_MACH_S390, | |
159 | ELF_MACH_SPARC32PLUS() => ELF_MACH_SPARC, | |
160 | ELF_MACH_SPARC64_OLD() => ELF_MACH_SPARC64, | |
161 | ELF_MACH_XTENSA_OLD() => ELF_MACH_XTENSA, | |
162 | ); | |
163 | ||
164 | # These masks will try to expose processor flags that are ABI incompatible, | |
165 | # and as such are part of defining the architecture ABI. If uncertain it is | |
166 | # always better to not mask a flag, because that preserves the historical | |
167 | # behavior, and we do not drop dependencies. | |
168 | my %elf_flags_mask = ( | |
169 | ELF_MACH_IA64() => ELF_FLAG_IA64_ABI64, | |
170 | ELF_MACH_MIPS() => ELF_FLAG_MIPS_ABI_MASK | ELF_FLAG_MIPS_ABI2, | |
171 | ELF_MACH_PPC64() => ELF_FLAG_PPC64_ABI64, | |
172 | ); | |
173 | ||
174 | sub get_format { | |
175 | my ($file) = @_; | |
176 | state %format; | |
177 | ||
178 | return $format{$file} if exists $format{$file}; | |
179 | ||
180 | my $header; | |
181 | ||
182 | open my $fh, '<', $file or syserr(g_('cannot read %s'), $file); | |
183 | my $rc = read $fh, $header, 64; | |
184 | if (not defined $rc) { | |
185 | syserr(g_('cannot read %s'), $file); | |
186 | } elsif ($rc != 64) { | |
187 | return; | |
188 | } | |
189 | close $fh; | |
190 | ||
191 | my %elf; | |
192 | ||
193 | # Unpack the identifier field. | |
194 | @elf{qw(magic bits endian vertype osabi verabi)} = unpack 'a4C5', $header; | |
195 | ||
196 | return unless $elf{magic} eq "\x7fELF"; | |
197 | return unless $elf{vertype} == ELF_VERSION_CURRENT; | |
198 | ||
199 | my ($elf_word, $elf_endian); | |
200 | if ($elf{bits} == ELF_BITS_32) { | |
201 | $elf_word = 'L'; | |
202 | } elsif ($elf{bits} == ELF_BITS_64) { | |
203 | $elf_word = 'Q'; | |
204 | } else { | |
205 | return; | |
206 | } | |
207 | if ($elf{endian} == ELF_ORDER_2LSB) { | |
208 | $elf_endian = '<'; | |
209 | } elsif ($elf{endian} == ELF_ORDER_2MSB) { | |
210 | $elf_endian = '>'; | |
211 | } else { | |
212 | return; | |
213 | } | |
214 | ||
215 | # Unpack the endianness and size dependent fields. | |
216 | my $tmpl = "x16(S2Lx[${elf_word}3]L)${elf_endian}"; | |
217 | @elf{qw(type mach version flags)} = unpack $tmpl, $header; | |
218 | ||
219 | # Canonicalize the machine ID. | |
220 | $elf{mach} = $elf_mach_map{$elf{mach}} // $elf{mach}; | |
221 | ||
222 | # Mask any processor flags that might not change the architecture ABI. | |
223 | $elf{flags} &= $elf_flags_mask{$elf{mach}} // 0; | |
224 | ||
225 | # Repack for easy comparison, as a big-endian byte stream, so that | |
226 | # unpacking for output gives meaningful results. | |
227 | $format{$file} = pack 'C2(SL)>', @elf{qw(bits endian mach flags)}; | |
228 | ||
229 | return $format{$file}; | |
230 | } | |
231 | ||
232 | sub is_elf { | |
233 | my $file = shift; | |
234 | open(my $file_fh, '<', $file) or syserr(g_('cannot read %s'), $file); | |
235 | my ($header, $result) = ('', 0); | |
236 | if (read($file_fh, $header, 4) == 4) { | |
237 | $result = 1 if ($header =~ /^\177ELF$/); | |
238 | } | |
239 | close($file_fh); | |
240 | return $result; | |
241 | } | |
242 | ||
243 | package Dpkg::Shlibs::Objdump::Object; | |
244 | ||
245 | use strict; | |
246 | use warnings; | |
247 | ||
248 | use Dpkg::Gettext; | |
249 | use Dpkg::ErrorHandling; | |
250 | ||
251 | sub new { | |
252 | my $this = shift; | |
253 | my $file = shift // ''; | |
254 | my $class = ref($this) || $this; | |
255 | my $self = {}; | |
256 | bless $self, $class; | |
257 | ||
258 | $self->reset; | |
259 | if ($file) { | |
260 | $self->analyze($file); | |
261 | } | |
262 | ||
263 | return $self; | |
264 | } | |
265 | ||
266 | sub reset { | |
267 | my $self = shift; | |
268 | ||
269 | $self->{file} = ''; | |
270 | $self->{id} = ''; | |
271 | $self->{SONAME} = ''; | |
272 | $self->{HASH} = ''; | |
273 | $self->{GNU_HASH} = ''; | |
274 | $self->{SONAME} = ''; | |
275 | $self->{NEEDED} = []; | |
276 | $self->{RPATH} = []; | |
277 | $self->{dynsyms} = {}; | |
278 | $self->{flags} = {}; | |
279 | $self->{dynrelocs} = {}; | |
280 | ||
281 | return $self; | |
282 | } | |
283 | ||
284 | ||
285 | sub analyze { | |
286 | my ($self, $file) = @_; | |
287 | ||
288 | $file ||= $self->{file}; | |
289 | return unless $file; | |
290 | ||
291 | $self->reset; | |
292 | $self->{file} = $file; | |
293 | ||
294 | $self->{exec_abi} = Dpkg::Shlibs::Objdump::get_format($file); | |
295 | ||
296 | if (not defined $self->{exec_abi}) { | |
297 | warning(g_("unknown executable format in file '%s'"), $file); | |
298 | return; | |
299 | } | |
300 | ||
301 | local $ENV{LC_ALL} = 'C'; | |
302 | open(my $objdump, '-|', $OBJDUMP, '-w', '-f', '-p', '-T', '-R', $file) | |
303 | or syserr(g_('cannot fork for %s'), $OBJDUMP); | |
304 | my $ret = $self->parse_objdump_output($objdump); | |
305 | close($objdump); | |
306 | return $ret; | |
307 | } | |
308 | ||
309 | sub parse_objdump_output { | |
310 | my ($self, $fh) = @_; | |
311 | ||
312 | my $section = 'none'; | |
313 | while (<$fh>) { | |
314 | s/\s*$//; | |
315 | next if length == 0; | |
316 | ||
317 | if (/^DYNAMIC SYMBOL TABLE:/) { | |
318 | $section = 'dynsym'; | |
319 | next; | |
320 | } elsif (/^DYNAMIC RELOCATION RECORDS/) { | |
321 | $section = 'dynreloc'; | |
322 | $_ = <$fh>; # Skip header | |
323 | next; | |
324 | } elsif (/^Dynamic Section:/) { | |
325 | $section = 'dyninfo'; | |
326 | next; | |
327 | } elsif (/^Program Header:/) { | |
328 | $section = 'header'; | |
329 | next; | |
330 | } elsif (/^Version definitions:/) { | |
331 | $section = 'verdef'; | |
332 | next; | |
333 | } elsif (/^Version References:/) { | |
334 | $section = 'verref'; | |
335 | next; | |
336 | } | |
337 | ||
338 | if ($section eq 'dynsym') { | |
339 | $self->parse_dynamic_symbol($_); | |
340 | } elsif ($section eq 'dynreloc') { | |
341 | if (/^\S+\s+(\S+)\s+(.+)$/) { | |
342 | $self->{dynrelocs}{$2} = $1; | |
343 | } else { | |
344 | warning(g_("couldn't parse dynamic relocation record: %s"), $_); | |
345 | } | |
346 | } elsif ($section eq 'dyninfo') { | |
347 | if (/^\s*NEEDED\s+(\S+)/) { | |
348 | push @{$self->{NEEDED}}, $1; | |
349 | } elsif (/^\s*SONAME\s+(\S+)/) { | |
350 | $self->{SONAME} = $1; | |
351 | } elsif (/^\s*HASH\s+(\S+)/) { | |
352 | $self->{HASH} = $1; | |
353 | } elsif (/^\s*GNU_HASH\s+(\S+)/) { | |
354 | $self->{GNU_HASH} = $1; | |
355 | } elsif (/^\s*RUNPATH\s+(\S+)/) { | |
356 | # RUNPATH takes precedence over RPATH but is | |
357 | # considered after LD_LIBRARY_PATH while RPATH | |
358 | # is considered before (if RUNPATH is not set). | |
359 | my $runpath = $1; | |
360 | $self->{RPATH} = [ split /:/, $runpath ]; | |
361 | } elsif (/^\s*RPATH\s+(\S+)/) { | |
362 | my $rpath = $1; | |
363 | unless (scalar(@{$self->{RPATH}})) { | |
364 | $self->{RPATH} = [ split /:/, $rpath ]; | |
365 | } | |
366 | } | |
367 | } elsif ($section eq 'none') { | |
368 | if (/^\s*.+:\s*file\s+format\s+(\S+)$/) { | |
369 | $self->{format} = $1; | |
370 | } elsif (/^architecture:\s*\S+,\s*flags\s*\S+:$/) { | |
371 | # Parse 2 lines of "-f" | |
372 | # architecture: i386, flags 0x00000112: | |
373 | # EXEC_P, HAS_SYMS, D_PAGED | |
374 | # start address 0x08049b50 | |
375 | $_ = <$fh>; | |
376 | chomp; | |
377 | $self->{flags}{$_} = 1 foreach (split(/,\s*/)); | |
378 | } | |
379 | } | |
380 | } | |
381 | # Update status of dynamic symbols given the relocations that have | |
382 | # been parsed after the symbols... | |
383 | $self->apply_relocations(); | |
384 | ||
385 | return $section ne 'none'; | |
386 | } | |
387 | ||
388 | # Output format of objdump -w -T | |
389 | # | |
390 | # /lib/libc.so.6: file format elf32-i386 | |
391 | # | |
392 | # DYNAMIC SYMBOL TABLE: | |
393 | # 00056ef0 g DF .text 000000db GLIBC_2.2 getwchar | |
394 | # 00000000 g DO *ABS* 00000000 GCC_3.0 GCC_3.0 | |
395 | # 00069960 w DF .text 0000001e GLIBC_2.0 bcmp | |
396 | # 00000000 w D *UND* 00000000 _pthread_cleanup_pop_restore | |
397 | # 0000b788 g DF .text 0000008e Base .protected xine_close | |
398 | # 0000b788 g DF .text 0000008e .hidden IA__g_free | |
399 | # | ||||||| | | | | | |
400 | # | ||||||| | | Version str (.visibility) + Symbol name | |
401 | # | ||||||| | Alignment | |
402 | # | ||||||| Section name (or *UND* for an undefined symbol) | |
403 | # | ||||||F=Function,f=file,O=object | |
404 | # | |||||d=debugging,D=dynamic | |
405 | # | ||||I=Indirect | |
406 | # | |||W=warning | |
407 | # | ||C=constructor | |
408 | # | |w=weak | |
409 | # | g=global,l=local,!=both global/local | |
410 | # Size of the symbol | |
411 | # | |
412 | # GLIBC_2.2 is the version string associated to the symbol | |
413 | # (GLIBC_2.2) is the same but the symbol is hidden, a newer version of the | |
414 | # symbol exist | |
415 | ||
416 | my $vis_re = qr/(\.protected|\.hidden|\.internal|0x\S+)/; | |
417 | my $dynsym_re = qr< | |
418 | ^ | |
419 | [0-9a-f]+ # Symbol size | |
420 | \ (.{7}) # Flags | |
421 | \s+(\S+) # Section name | |
422 | \s+[0-9a-f]+ # Alignment | |
423 | (?:\s+(\S+))? # Version string | |
424 | (?:\s+$vis_re)? # Visibility | |
425 | \s+(.+) # Symbol name | |
426 | >x; | |
427 | ||
428 | sub parse_dynamic_symbol { | |
429 | my ($self, $line) = @_; | |
430 | if ($line =~ $dynsym_re) { | |
431 | ||
432 | my ($flags, $sect, $ver, $vis, $name) = ($1, $2, $3, $4, $5); | |
433 | ||
434 | # Special case if version is missing but extra visibility | |
435 | # attribute replaces it in the match | |
436 | if (defined($ver) and $ver =~ /^$vis_re$/) { | |
437 | $vis = $ver; | |
438 | $ver = ''; | |
439 | } | |
440 | ||
441 | # Cleanup visibility field | |
442 | $vis =~ s/^\.// if defined($vis); | |
443 | ||
444 | my $symbol = { | |
445 | name => $name, | |
446 | version => $ver // '', | |
447 | section => $sect, | |
448 | dynamic => substr($flags, 5, 1) eq 'D', | |
449 | debug => substr($flags, 5, 1) eq 'd', | |
450 | type => substr($flags, 6, 1), | |
451 | weak => substr($flags, 1, 1) eq 'w', | |
452 | local => substr($flags, 0, 1) eq 'l', | |
453 | global => substr($flags, 0, 1) eq 'g', | |
454 | visibility => $vis // '', | |
455 | hidden => '', | |
456 | defined => $sect ne '*UND*' | |
457 | }; | |
458 | ||
459 | # Handle hidden symbols | |
460 | if (defined($ver) and $ver =~ /^\((.*)\)$/) { | |
461 | $ver = $1; | |
462 | $symbol->{version} = $1; | |
463 | $symbol->{hidden} = 1; | |
464 | } | |
465 | ||
466 | # Register symbol | |
467 | $self->add_dynamic_symbol($symbol); | |
468 | } elsif ($line =~ /^[0-9a-f]+ (.{7})\s+(\S+)\s+[0-9a-f]+/) { | |
469 | # Same start but no version and no symbol ... just ignore | |
470 | } elsif ($line =~ /^REG_G\d+\s+/) { | |
471 | # Ignore some s390-specific output like | |
472 | # REG_G6 g R *UND* 0000000000000000 #scratch | |
473 | } else { | |
474 | warning(g_("couldn't parse dynamic symbol definition: %s"), $line); | |
475 | } | |
476 | } | |
477 | ||
478 | sub apply_relocations { | |
479 | my $self = shift; | |
480 | foreach my $sym (values %{$self->{dynsyms}}) { | |
481 | # We want to mark as undefined symbols those which are currently | |
482 | # defined but that depend on a copy relocation | |
483 | next if not $sym->{defined}; | |
484 | next if not exists $self->{dynrelocs}{$sym->{name}}; | |
485 | if ($self->{dynrelocs}{$sym->{name}} =~ /^R_.*_COPY$/) { | |
486 | $sym->{defined} = 0; | |
487 | } | |
488 | } | |
489 | } | |
490 | ||
491 | sub add_dynamic_symbol { | |
492 | my ($self, $symbol) = @_; | |
493 | $symbol->{objid} = $symbol->{soname} = $self->get_id(); | |
494 | $symbol->{soname} =~ s{^.*/}{} unless $self->{SONAME}; | |
495 | if ($symbol->{version}) { | |
496 | $self->{dynsyms}{$symbol->{name} . '@' . $symbol->{version}} = $symbol; | |
497 | } else { | |
498 | $self->{dynsyms}{$symbol->{name} . '@Base'} = $symbol; | |
499 | } | |
500 | } | |
501 | ||
502 | sub get_id { | |
503 | my $self = shift; | |
504 | return $self->{SONAME} || $self->{file}; | |
505 | } | |
506 | ||
507 | sub get_symbol { | |
508 | my ($self, $name) = @_; | |
509 | if (exists $self->{dynsyms}{$name}) { | |
510 | return $self->{dynsyms}{$name}; | |
511 | } | |
512 | if ($name !~ /@/) { | |
513 | if (exists $self->{dynsyms}{$name . '@Base'}) { | |
514 | return $self->{dynsyms}{$name . '@Base'}; | |
515 | } | |
516 | } | |
517 | return; | |
518 | } | |
519 | ||
520 | sub get_exported_dynamic_symbols { | |
521 | my $self = shift; | |
522 | return grep { $_->{defined} && $_->{dynamic} && !$_->{local} } | |
523 | values %{$self->{dynsyms}}; | |
524 | } | |
525 | ||
526 | sub get_undefined_dynamic_symbols { | |
527 | my $self = shift; | |
528 | return grep { (!$_->{defined}) && $_->{dynamic} } | |
529 | values %{$self->{dynsyms}}; | |
530 | } | |
531 | ||
532 | sub get_needed_libraries { | |
533 | my $self = shift; | |
534 | return @{$self->{NEEDED}}; | |
535 | } | |
536 | ||
537 | sub is_executable { | |
538 | my $self = shift; | |
539 | return exists $self->{flags}{EXEC_P} && $self->{flags}{EXEC_P}; | |
540 | } | |
541 | ||
542 | sub is_public_library { | |
543 | my $self = shift; | |
544 | return exists $self->{flags}{DYNAMIC} && $self->{flags}{DYNAMIC} | |
545 | && exists $self->{SONAME} && $self->{SONAME}; | |
546 | } | |
547 | ||
548 | 1; |