3 * Report MTU on path to specified host
5 * (c) 2008 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Trivial IP Encryption (TrIPE).
12 * TrIPE is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
17 * TrIPE is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 * You should have received a copy of the GNU General Public License
23 * along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
26 /*----- Header files ------------------------------------------------------*/
38 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <netinet/in.h>
44 #include <arpa/inet.h>
47 #include <netinet/in_systm.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip_icmp.h>
50 #include <netinet/ip6.h>
51 #include <netinet/icmp6.h>
52 #include <netinet/udp.h>
54 #ifdef HAVE_GETIFADDRS
57 # include <sys/ioctl.h>
60 #include <mLib/alloc.h>
61 #include <mLib/bits.h>
62 #include <mLib/dstr.h>
64 #include <mLib/mdwopt.h>
65 #include <mLib/quis.h>
66 #include <mLib/report.h>
69 /*----- Static variables --------------------------------------------------*/
71 static unsigned char buf
[65536];
75 /*----- Utility functions -------------------------------------------------*/
77 /* Step a value according to a simple LFSR. */
79 do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0)
81 /* Fill buffer with a constant but pseudorandom string. Uses a simple
84 static void fillbuffer(unsigned char *p
, size_t sz
)
86 unsigned int y
= 0xbc20;
87 const unsigned char *l
= p
+ sz
;
92 for (i
= 0; i
< 8; i
++) STEP(y
);
96 /* Convert a string to floating point. */
97 static double s2f(const char *s
, const char *what
)
104 if (errno
|| *q
) die(EXIT_FAILURE
, "bad %s", what
);
108 /* Convert a floating-point value into a struct timeval. */
109 static void f2tv(struct timeval
*tv
, double t
)
110 { tv
->tv_sec
= t
; tv
->tv_usec
= (t
- tv
->tv_sec
)*MILLION
; }
114 struct sockaddr_in sin
;
115 struct sockaddr_in6 sin6
;
118 /* Check whether an address family is even slightly supported. */
119 static int addrfamok(int af
)
122 case AF_INET
: case AF_INET6
: return (1);
127 /* Return the size of a socket address. */
128 static size_t addrsz(const union addr
*a
)
130 switch (a
->sa
.sa_family
) {
131 case AF_INET
: return (sizeof(a
->sin
));
132 case AF_INET6
: return (sizeof(a
->sin6
));
137 /*----- Main algorithm skeleton -------------------------------------------*/
140 unsigned f
; /* Various flags */
141 #define F_VERBOSE 1u /* Give a running commentary */
142 double retx
; /* Initial retransmit interval */
143 double regr
; /* Retransmit growth factor */
144 double timeout
; /* Retransmission timeout */
145 int seqoff
; /* Offset to write sequence number */
146 const struct probe_ops
*pops
; /* Probe algorithm description */
147 union addr a
; /* Destination address */
151 const struct param
*pp
;
157 const struct probe_ops
*next
;
159 int (*setup
)(void *, int, const struct param
*);
160 void (*finish
)(void *);
161 void (*selprep
)(void *, int *, fd_set
*);
162 int (*xmit
)(void *, int);
163 int (*selproc
)(void *, fd_set
*, struct probestate
*);
174 /* or a positive MTU upper-bound */
177 /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */
179 do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0)
181 /* Check whether a buffer contains a packet from our current probe. */
182 static int mypacketp(struct probestate
*ps
,
183 const unsigned char *p
, size_t sz
)
185 const struct param
*pp
= ps
->pp
;
187 return (sz
>= pp
->seqoff
+ 2 && LOAD16(p
+ pp
->seqoff
) == ps
->q
);
190 /* See whether MTU is an acceptable MTU value. Return an appropriate
191 * RC_... code or a new suggested MTU.
193 static int probe(struct probestate
*ps
, void *st
, int mtu
)
195 const struct param
*pp
= ps
->pp
;
197 struct timeval tv
, now
, when
, done
;
198 double timer
= pp
->retx
;
201 /* Set up the first retransmit and give-up timers. */
202 gettimeofday(&now
, 0);
203 f2tv(&tv
, pp
->timeout
); TV_ADD(&done
, &now
, &tv
);
204 f2tv(&tv
, timer
); TV_ADD(&when
, &now
, &tv
);
205 if (TV_CMP(&when
, >, &done
)) when
= done
;
207 /* Send the initial probe. */
208 if (pp
->f
& F_VERBOSE
)
209 moan("sending probe of size %d (seq = %04x)", mtu
, ps
->q
);
211 STORE16(buf
+ pp
->seqoff
, ps
->q
);
212 if ((rc
= pp
->pops
->xmit(st
, mtu
)) != RC_OK
) return (rc
);
216 /* Wait for something interesting to happen. */
217 maxfd
= 0; FD_ZERO(&fd_in
);
218 pp
->pops
->selprep(st
, &maxfd
, &fd_in
);
219 TV_SUB(&tv
, &when
, &now
);
220 if (select(maxfd
+ 1, &fd_in
, 0, 0, &tv
) < 0) return (RC_FAIL
);
221 gettimeofday(&now
, 0);
223 /* See whether the probe method has any answers for us. */
224 if ((rc
= pp
->pops
->selproc(st
, &fd_in
, ps
)) != RC_OK
) return (rc
);
226 /* If we've waited too long, give up. If we should retransmit, do
229 if (TV_CMP(&now
, >, &done
))
231 else if (TV_CMP(&now
, >, &when
)) {
232 if (pp
->f
& F_VERBOSE
) moan("re-sending probe of size %d", mtu
);
233 if ((rc
= pp
->pops
->xmit(st
, mtu
)) != RC_OK
) return (rc
);
235 timer
*= pp
->regr
; f2tv(&tv
, timer
); TV_ADD(&when
, &when
, &tv
);
236 } while (TV_CMP(&when
, <, &now
));
237 if (TV_CMP(&when
, >, &done
)) when
= done
;
242 /* Discover the path MTU to the destination address. */
243 static int pathmtu(const struct param
*pp
)
249 struct probestate ps
;
251 /* Build and connect a UDP socket. We'll need this to know the local port
252 * number to use if nothing else. Set other stuff up.
254 if ((sk
= socket(pp
->a
.sa
.sa_family
, SOCK_DGRAM
, IPPROTO_UDP
)) < 0)
256 if (connect(sk
, &pp
->a
.sa
, addrsz(&pp
->a
))) goto fail_1
;
257 st
= xmalloc(pp
->pops
->statesz
);
258 if ((mtu
= pp
->pops
->setup(st
, sk
, pp
)) < 0) goto fail_2
;
259 ps
.pp
= pp
; ps
.q
= rand() & 0xffff;
260 switch (pp
->a
.sa
.sa_family
) {
261 case AF_INET
: lo
= 576; break;
262 case AF_INET6
: lo
= 1280; break;
266 if (hi
< lo
) { errno
= EMSGSIZE
; return (-1); }
268 /* And now we do a thing which is sort of like a binary search, except that
269 * we also take explicit clues as establishing a new upper bound, and we
270 * try to hug that initially.
273 assert(lo
<= mtu
&& mtu
<= hi
);
274 if (pp
->f
& F_VERBOSE
) moan("probe: %d <= %d <= %d", lo
, mtu
, hi
);
275 rc
= probe(&ps
, st
, mtu
);
279 if (pp
->f
& F_VERBOSE
) moan("probe failed");
283 /* If we've not seen a dropped packet before then we don't know what
284 * this means yet -- in particular, we don't know which bit of the
285 * network is swallowing packets. Send a minimum-size probe. If
286 * that doesn't come back then assume that the remote host is
287 * swallowing our packets. If it does, then we assume that dropped
288 * packets are a result of ICMP fragmentation-needed reports being
289 * lost or suppressed.
291 if (pp
->f
& F_VERBOSE
) moan("gave up: black hole detected");
293 if (pp
->f
& F_VERBOSE
) moan("sending minimum-size probe");
294 switch (probe(&ps
, st
, lo
)) {
298 if (pp
->f
& F_VERBOSE
) {
299 moan("no reply from min-size probe: "
300 "assume black hole at target");
305 if (pp
->f
& F_VERBOSE
) {
306 moan("reply from min-size probe OK: "
307 "assume black hole in network");
312 if (pp
->f
& F_VERBOSE
)
313 moan("unexpected return code from probe");
319 if (droppy
) goto higher
; else goto lower
;
324 if (pp
->f
& F_VERBOSE
)
325 moan("probe returned: remote host is not a black hole");
329 if (pp
->f
& F_VERBOSE
) moan("probe returned: found correct MTU");
334 /* Now we must make a new guess, between lo and hi. We know that lo
335 * is good; but we're not so sure about hi here. We know that hi >
336 * lo, so this will find an approximate midpoint, greater than lo and
339 if (pp
->f
& F_VERBOSE
) moan("probe returned: guessing higher");
340 mtu
+= (hi
- lo
+ 1)/2;
345 /* If this didn't work, and we're already at the bottom of our
346 * possible range, then something has gone horribly wrong.
351 if (pp
->f
& F_VERBOSE
) moan("error returned: found correct MTU");
356 /* We must make a new guess, between lo and hi. We're probably
357 * fairly sure that lo will succeed, since either it's the minimum
358 * MTU or we've tested it already; but we're not quite sure about hi,
359 * so we want to aim high.
361 if (pp
->f
& F_VERBOSE
) moan("error returned: guessing lower");
362 mtu
-= (hi
- lo
+ 1)/2;
366 if (pp
->f
& F_VERBOSE
) moan("error returned with new MTU estimate");
373 /* Clean up and return our result. */
374 pp
->pops
->finish(st
);
380 pp
->pops
->finish(st
);
389 /*----- Doing it the hard way ---------------------------------------------*/
391 #ifdef HAVE_GETIFADDRS
393 #if defined(linux) || defined(__OpenBSD__)
398 # define sane_htons htons
399 # define sane_htonl htonl
405 static int rawicmp
= -1, rawudp
= -1, rawerr
= 0;
406 static int rawicmp6
= -1, rawudp6
= -1, rawerr6
= 0;
408 #define IPCK_INIT 0xffff
410 /* Compare two addresses. Maybe compare the port numbers too. */
412 static int addreq(const union addr
*a
, const union addr
*b
, unsigned f
)
414 switch (a
->sa
.sa_family
) {
416 return (a
->sin
.sin_addr
.s_addr
== b
->sin
.sin_addr
.s_addr
&&
417 (!(f
&AEF_PORT
) || a
->sin
.sin_port
== b
->sin
.sin_port
));
419 return (!memcmp(a
->sin6
.sin6_addr
.s6_addr
,
420 b
->sin6
.sin6_addr
.s6_addr
, 16) &&
421 (!(f
&AEF_PORT
) || a
->sin6
.sin6_port
== b
->sin6
.sin6_port
));
427 /* Compute an IP checksum over some data. This is a restartable interface:
428 * initialize A to `IPCK_INIT' for the first call.
430 static unsigned ipcksum(const void *buf
, size_t n
, unsigned a
)
432 unsigned long aa
= a
^ 0xffff;
433 const unsigned char *p
= buf
, *l
= p
+ n
;
435 while (p
< l
- 1) { aa
+= LOAD16_B(p
); p
+= 2; }
436 if (p
< l
) { aa
+= (unsigned)(*p
) << 8; }
437 do aa
= (aa
& 0xffff) + (aa
>> 16); while (aa
>= 0x10000);
438 return (aa
== 0xffff ? aa
: aa
^ 0xffff);
441 /* TCP/UDP pseudoheader structure. */
443 struct in_addr ph_src
, ph_dst
;
448 struct in6_addr ph6_src
, ph6_dst
;
450 uint8_t ph6_z0
, ph6_z1
, ph6_z2
, ph6_nxt
;
455 int sk
, rawicmp
, rawudp
;
456 uint16_t srcport
, dstport
;
460 static int raw_setup(void *stv
, int sk
, const struct param
*pp
)
462 struct raw_state
*st
= stv
;
465 struct ifaddrs
*ifa
, *ifaa
, *ifap
;
467 struct icmp6_filter f6
;
469 /* Check that the address is OK, and that we have the necessary raw
472 * For IPv6, also set the filter so we don't get too many useless wakeups.
474 switch (pp
->a
.sa
.sa_family
) {
476 if (rawerr
) { errno
= rawerr
; goto fail_0
; }
477 st
->rawicmp
= rawicmp
; st
->rawudp
= rawudp
; st
->sk
= sk
;
478 /* IPv4 filtering is available on Linux but isn't portable. */
481 if (rawerr6
) { errno
= rawerr6
; goto fail_0
; }
482 st
->rawicmp
= rawicmp6
; st
->rawudp
= rawudp6
; st
->sk
= sk
;
483 ICMP6_FILTER_SETBLOCKALL(&f6
);
484 ICMP6_FILTER_SETPASS(ICMP6_PACKET_TOO_BIG
, &f6
);
485 ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH
, &f6
);
486 if (setsockopt(st
->rawicmp
, IPPROTO_ICMPV6
, ICMP6_FILTER
,
488 die(EXIT_FAILURE
, "failed to set icmpv6 filter: %s",
493 errno
= EPFNOSUPPORT
; goto fail_0
;
496 /* Initialize the sequence number. */
497 st
->q
= rand() & 0xffff;
499 /* Snaffle the local and remote address and port number. */
502 if (getsockname(sk
, &st
->me
.sa
, &sz
))
505 /* Only now do some fiddling because Linux doesn't like port numbers in
506 * IPv6 raw destination addresses...
508 switch (pp
->a
.sa
.sa_family
) {
510 st
->srcport
= st
->me
.sin
.sin_port
; st
->me
.sin
.sin_port
= 0;
511 st
->dstport
= st
->a
.sin
.sin_port
; st
->a
.sin
.sin_port
= 0;
514 st
->srcport
= st
->me
.sin6
.sin6_port
; st
->me
.sin6
.sin6_port
= 0;
515 st
->dstport
= st
->a
.sin6
.sin6_port
; st
->a
.sin6
.sin6_port
= 0;
521 /* There isn't a portable way to force the DF flag onto a packet through
522 * UDP, or even through raw IP, unless we write the entire IP header
523 * ourselves. This is somewhat annoying, especially since we have an
524 * uphill struggle keeping track of which systems randomly expect which
525 * header fields to be presented in host byte order. Oh, well.
528 if (setsockopt(rawudp
, IPPROTO_IP
, IP_HDRINCL
, &i
, sizeof(i
))) goto fail_0
;
530 /* Find an upper bound on the MTU. Do two passes over the interface
531 * list. If we can find matches for our local address then use the
532 * highest one of those; otherwise do a second pass and simply take the
533 * highest MTU of any network interface.
535 if (getifaddrs(&ifaa
)) goto fail_0
;
536 for (i
= 0; i
< 2; i
++) {
537 for (ifap
= 0, ifa
= ifaa
; ifa
; ifa
= ifa
->ifa_next
) {
538 if (!(ifa
->ifa_flags
& IFF_UP
) || !ifa
->ifa_addr
||
539 ifa
->ifa_addr
->sa_family
!= st
->me
.sa
.sa_family
||
541 !addreq((union addr
*)ifa
->ifa_addr
, &st
->me
, 0)) ||
542 (i
== 1 && ifap
&& strcmp(ifap
->ifa_name
, ifa
->ifa_name
) == 0) ||
543 strlen(ifa
->ifa_name
) >= sizeof(ifr
.ifr_name
))
546 strcpy(ifr
.ifr_name
, ifa
->ifa_name
);
547 if (ioctl(sk
, SIOCGIFMTU
, &ifr
)) goto fail_1
;
548 if (mtu
< ifr
.ifr_mtu
) mtu
= ifr
.ifr_mtu
;
552 if (mtu
< 0) { errno
= ENOTCONN
; goto fail_1
; }
564 static void raw_finish(void *stv
) { ; }
566 static void raw_selprep(void *stv
, int *maxfd
, fd_set
*fd_in
)
567 { struct raw_state
*st
= stv
; ADDFD(st
->sk
); ADDFD(st
->rawicmp
); }
569 static int raw_xmit(void *stv
, int mtu
)
571 struct raw_state
*st
= stv
;
572 unsigned char b
[65536], *p
;
580 switch (st
->a
.sa
.sa_family
) {
584 /* Build the IP header. */
587 ip
->ip_hl
= sizeof(*ip
)/4;
588 ip
->ip_tos
= IPTOS_RELIABILITY
;
589 ip
->ip_len
= sane_htons(mtu
);
590 STEP(st
->q
); ip
->ip_id
= htons(st
->q
);
591 ip
->ip_off
= sane_htons(0 | IP_DF
);
593 ip
->ip_p
= IPPROTO_UDP
;
595 ip
->ip_src
= st
->me
.sin
.sin_addr
;
596 ip
->ip_dst
= st
->a
.sin
.sin_addr
;
598 /* Build a UDP packet in the output buffer. */
599 udp
= (struct udphdr
*)(ip
+ 1);
600 udp
->uh_sport
= st
->srcport
;
601 udp
->uh_dport
= st
->dstport
;
602 udp
->uh_ulen
= htons(mtu
- sizeof(*ip
));
605 /* Copy the payload. */
606 p
= (unsigned char *)(udp
+ 1);
607 memcpy(p
, buf
, mtu
- (p
- b
));
609 /* Calculate the UDP checksum. */
610 ph
.ph_src
= ip
->ip_src
;
611 ph
.ph_dst
= ip
->ip_dst
;
613 ph
.ph_p
= IPPROTO_UDP
;
614 ph
.ph_len
= udp
->uh_ulen
;
616 ck
= ipcksum(&ph
, sizeof(ph
), ck
);
617 ck
= ipcksum(udp
, mtu
- sizeof(*ip
), ck
);
618 udp
->uh_sum
= htons(ck
);
624 /* Build the IP header. */
625 ip6
= (struct ip6_hdr
*)b
;
626 STEP(st
->q
); ip6
->ip6_flow
= htonl(0x60000000 | st
->q
);
627 ip6
->ip6_plen
= htons(mtu
- sizeof(*ip6
));
628 ip6
->ip6_nxt
= IPPROTO_UDP
;
630 ip6
->ip6_src
= st
->me
.sin6
.sin6_addr
;
631 ip6
->ip6_dst
= st
->a
.sin6
.sin6_addr
;
633 /* Build a UDP packet in the output buffer. */
634 udp
= (struct udphdr
*)(ip6
+ 1);
635 udp
->uh_sport
= st
->srcport
;
636 udp
->uh_dport
= st
->dstport
;
637 udp
->uh_ulen
= htons(mtu
- sizeof(*ip6
));
640 /* Copy the payload. */
641 p
= (unsigned char *)(udp
+ 1);
642 memcpy(p
, buf
, mtu
- (p
- b
));
644 /* Calculate the UDP checksum. */
645 ph6
.ph6_src
= ip6
->ip6_src
;
646 ph6
.ph6_dst
= ip6
->ip6_dst
;
647 ph6
.ph6_len
= udp
->uh_ulen
;
648 ph6
.ph6_z0
= ph6
.ph6_z1
= ph6
.ph6_z2
= 0;
649 ph6
.ph6_nxt
= IPPROTO_UDP
;
651 ck
= ipcksum(&ph6
, sizeof(ph6
), ck
);
652 ck
= ipcksum(udp
, mtu
- sizeof(*ip6
), ck
);
653 udp
->uh_sum
= htons(ck
);
661 /* Send the whole thing off. If we're too big for the interface then we
662 * might need to trim immediately.
664 if (sendto(st
->rawudp
, b
, mtu
, 0, &st
->a
.sa
, addrsz(&st
->a
)) < 0) {
665 if (errno
== EMSGSIZE
) return (RC_LOWER
);
676 static int raw_selproc(void *stv
, fd_set
*fd_in
, struct probestate
*ps
)
678 struct raw_state
*st
= stv
;
679 unsigned char b
[65536];
683 struct icmp6_hdr
*icmp6
;
685 const unsigned char *payload
;
688 /* An ICMP packet: see what's inside. */
689 if (FD_ISSET(st
->rawicmp
, fd_in
)) {
690 if ((n
= read(st
->rawicmp
, b
, sizeof(b
))) < 0) goto fail_0
;
692 switch (st
->me
.sa
.sa_family
) {
697 if (n
< sizeof(*ip
) || n
< sizeof(4*ip
->ip_hl
) ||
698 ip
->ip_v
!= 4 || ip
->ip_p
!= IPPROTO_ICMP
)
700 n
-= sizeof(4*ip
->ip_hl
);
702 icmp
= (struct icmp
*)(b
+ 4*ip
->ip_hl
);
703 if (n
< sizeof(*icmp
) || icmp
->icmp_type
!= ICMP_UNREACH
)
705 n
-= offsetof(struct icmp
, icmp_ip
);
708 if (n
< sizeof(*ip
) ||
709 ip
->ip_p
!= IPPROTO_UDP
|| ip
->ip_hl
!= sizeof(*ip
)/4 ||
710 ip
->ip_id
!= htons(st
->q
) ||
711 ip
->ip_src
.s_addr
!= st
->me
.sin
.sin_addr
.s_addr
||
712 ip
->ip_dst
.s_addr
!= st
->a
.sin
.sin_addr
.s_addr
)
716 udp
= (struct udphdr
*)(ip
+ 1);
717 if (n
< sizeof(*udp
) || udp
->uh_sport
!= st
->srcport
||
718 udp
->uh_dport
!= st
->dstport
)
722 payload
= (const unsigned char *)(udp
+ 1);
723 if (!mypacketp(ps
, payload
, n
)) goto skip_icmp
;
725 if (icmp
->icmp_code
== ICMP_UNREACH_PORT
) return (RC_HIGHER
);
726 else if (icmp
->icmp_code
!= ICMP_UNREACH_NEEDFRAG
) goto skip_icmp
;
727 else if (icmp
->icmp_nextmtu
) return (htons(icmp
->icmp_nextmtu
));
728 else return (RC_LOWER
);
733 icmp6
= (struct icmp6_hdr
*)b
;
734 if (n
< sizeof(*icmp6
) ||
735 (icmp6
->icmp6_type
!= ICMP6_PACKET_TOO_BIG
&&
736 icmp6
->icmp6_type
!= ICMP6_DST_UNREACH
))
740 ip6
= (struct ip6_hdr
*)(icmp6
+ 1);
741 if (n
< sizeof(*ip6
) || ip6
->ip6_nxt
!= IPPROTO_UDP
||
742 memcmp(ip6
->ip6_src
.s6_addr
,
743 st
->me
.sin6
.sin6_addr
.s6_addr
, 16) ||
744 memcmp(ip6
->ip6_dst
.s6_addr
,
745 st
->a
.sin6
.sin6_addr
.s6_addr
, 16) ||
746 (ntohl(ip6
->ip6_flow
)&0xffff) != st
->q
)
750 udp
= (struct udphdr
*)(ip6
+ 1);
751 if (n
< sizeof(*udp
) || udp
->uh_sport
!= st
->srcport
||
752 udp
->uh_dport
!= st
->dstport
)
756 payload
= (const unsigned char *)(udp
+ 1);
757 if (!mypacketp(ps
, payload
, n
)) goto skip_icmp
;
759 if (icmp6
->icmp6_type
== ICMP6_PACKET_TOO_BIG
)
760 return (ntohs(icmp6
->icmp6_mtu
));
761 else switch (icmp6
->icmp6_code
) {
762 case ICMP6_DST_UNREACH_ADMIN
:
763 case ICMP6_DST_UNREACH_NOPORT
:
777 /* If we got a reply to the current probe then we're good. If we got an
778 * error, or the packet's sequence number is wrong, then ignore it.
780 if (FD_ISSET(st
->sk
, fd_in
)) {
781 if ((n
= read(st
->sk
, b
, sizeof(b
))) < 0) return (RC_OK
);
782 else if (mypacketp(ps
, b
, n
)) return (RC_HIGHER
);
792 static const struct probe_ops raw_ops
= {
793 "raw", OPS_CHAIN
, sizeof(struct raw_state
),
794 raw_setup
, raw_finish
,
795 raw_selprep
, raw_xmit
, raw_selproc
799 #define OPS_CHAIN &raw_ops
803 /*----- Doing the job on Linux --------------------------------------------*/
808 # define IP_MTU 14 /* Blech! */
812 int sol
, so_mtu_discover
, so_mtu
;
817 static int linux_setup(void *stv
, int sk
, const struct param
*pp
)
819 struct linux_state
*st
= stv
;
823 /* Check that the address is OK. */
824 switch (pp
->a
.sa
.sa_family
) {
826 st
->sol
= IPPROTO_IP
;
827 st
->so_mtu_discover
= IP_MTU_DISCOVER
;
832 st
->sol
= IPPROTO_IPV6
;
833 st
->so_mtu_discover
= IPV6_MTU_DISCOVER
;
834 st
->so_mtu
= IPV6_MTU
;
838 errno
= EPFNOSUPPORT
;
842 /* Snaffle the UDP socket. */
845 /* Turn on kernel path-MTU discovery and force DF on. */
846 i
= IP_PMTUDISC_PROBE
;
847 if (setsockopt(st
->sk
, st
->sol
, st
->so_mtu_discover
, &i
, sizeof(i
)))
850 /* Read the initial MTU guess back and report it. */
852 if (getsockopt(st
->sk
, st
->sol
, st
->so_mtu
, &mtu
, &sz
))
859 static void linux_finish(void *stv
) { ; }
861 static void linux_selprep(void *stv
, int *maxfd
, fd_set
*fd_in
)
862 { struct linux_state
*st
= stv
; ADDFD(st
->sk
); }
864 static int linux_xmit(void *stv
, int mtu
)
866 struct linux_state
*st
= stv
;
868 /* Write the packet. */
869 if (write(st
->sk
, buf
, mtu
- st
->hdrlen
) >= 0) return (RC_OK
);
870 else if (errno
== EMSGSIZE
) return (RC_LOWER
);
871 else return (RC_FAIL
);
874 static int linux_selproc(void *stv
, fd_set
*fd_in
, struct probestate
*ps
)
876 struct linux_state
*st
= stv
;
880 unsigned char b
[65536];
882 /* Read an answer. If it looks like the right kind of error then report a
883 * success. This is potentially wrong, since we can't tell whether an
884 * error was delayed from an earlier probe. However, we never return
885 * RC_LOWER from this method, so the packet sizes ought to be monotonically
886 * decreasing and this won't cause trouble. Otherwise update from the
887 * kernel's idea of the right MTU.
889 if (FD_ISSET(st
->sk
, fd_in
)) {
890 n
= read(st
->sk
, &buf
, sizeof(buf
));
892 mypacketp(ps
, b
, n
) :
893 errno
== ECONNREFUSED
|| errno
== EHOSTUNREACH
)
896 if (getsockopt(st
->sk
, st
->sol
, st
->so_mtu
, &mtu
, &sz
))
903 static const struct probe_ops linux_ops
= {
904 "linux", OPS_CHAIN
, sizeof(struct linux_state
),
905 linux_setup
, linux_finish
,
906 linux_selprep
, linux_xmit
, linux_selproc
910 #define OPS_CHAIN &linux_ops
914 /*----- Help options ------------------------------------------------------*/
916 static const struct probe_ops
*probe_ops
= OPS_CHAIN
;
918 static void version(FILE *fp
)
919 { pquis(fp
, "$, TrIPE version " VERSION
"\n"); }
921 static void usage(FILE *fp
)
923 pquis(fp
, "Usage: $ [-46v] [-H HEADER] [-m METHOD]\n\
924 [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n");
927 static void help(FILE *fp
)
929 const struct probe_ops
*ops
;
938 -h, --help Show this help text.\n\
939 -V, --version Show version number.\n\
940 -u, --usage Show brief usage message.\n\
942 -4, --ipv4 Restrict to IPv4 only.\n\
943 -6, --ipv6 Restrict to IPv6 only.\n\
944 -g, --growth=FACTOR Growth factor for retransmit interval.\n\
945 -m, --method=METHOD Use METHOD to probe for MTU.\n\
946 -r, --retransmit=SECS Retransmit if no reply after SEC.\n\
947 -t, --timeout=SECS Give up expecting a reply after SECS.\n\
948 -v, --verbose Write a running commentary to stderr.\n\
949 -H, --header=HEX Packet header, in hexadecimal.\n\
953 for (ops
= probe_ops
; ops
; ops
= ops
->next
)
954 printf("\t%s\n", ops
->name
);
957 /*----- Main code ---------------------------------------------------------*/
959 int main(int argc
, char *argv
[])
961 struct param pp
= { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN
};
966 struct addrinfo aihint
= { 0 }, *ailist
, *ai
;
967 const char *host
, *svc
= "7";
972 #ifdef HAVE_GETIFADDRS
973 if ((rawicmp
= socket(PF_INET
, SOCK_RAW
, IPPROTO_ICMP
)) < 0 ||
974 (rawudp
= socket(PF_INET
, SOCK_RAW
, IPPROTO_UDP
)) < 0)
976 if ((rawicmp6
= socket(PF_INET6
, SOCK_RAW
, IPPROTO_ICMPV6
)) < 0 ||
977 (rawudp6
= socket(PF_INET6
, SOCK_RAW
, IPPROTO_RAW
)) < 0)
980 if (setuid(getuid()))
984 fillbuffer(buf
, sizeof(buf
));
986 aihint
.ai_family
= AF_UNSPEC
;
987 aihint
.ai_protocol
= IPPROTO_UDP
;
988 aihint
.ai_socktype
= SOCK_DGRAM
;
989 aihint
.ai_flags
= AI_ADDRCONFIG
;
992 static const struct option opts
[] = {
993 { "help", 0, 0, 'h' },
994 { "version", 0, 0, 'V' },
995 { "usage", 0, 0, 'u' },
996 { "ipv4", 0, 0, '4' },
997 { "ipv6", 0, 0, '6' },
998 { "header", OPTF_ARGREQ
, 0, 'H' },
999 { "growth", OPTF_ARGREQ
, 0, 'g' },
1000 { "method", OPTF_ARGREQ
, 0, 'm' },
1001 { "retransmit", OPTF_ARGREQ
, 0, 'r' },
1002 { "timeout", OPTF_ARGREQ
, 0, 't' },
1003 { "verbose", 0, 0, 'v' },
1007 i
= mdwopt(argc
, argv
, "hVu" "46H:g:m:r:t:v", opts
, 0, 0, 0);
1010 case 'h': help(stdout
); exit(0);
1011 case 'V': version(stdout
); exit(0);
1012 case 'u': usage(stdout
); exit(0);
1017 hex_decode(&hc
, optarg
, strlen(optarg
), &d
);
1018 hex_decode(&hc
, 0, 0, &d
);
1019 sz
= d
.len
< 532 ? d
.len
: 532;
1020 memcpy(buf
, d
.buf
, sz
);
1024 case '4': aihint
.ai_family
= AF_INET
; break;
1025 case '6': aihint
.ai_family
= AF_INET6
; break;
1026 case 'g': pp
.regr
= s2f(optarg
, "retransmit growth factor"); break;
1027 case 'r': pp
.retx
= s2f(optarg
, "retransmit interval"); break;
1028 case 't': pp
.timeout
= s2f(optarg
, "timeout"); break;
1031 for (pp
.pops
= OPS_CHAIN
; pp
.pops
; pp
.pops
= pp
.pops
->next
)
1032 if (strcmp(pp
.pops
->name
, optarg
) == 0) goto found_alg
;
1033 die(EXIT_FAILURE
, "unknown probe algorithm `%s'", optarg
);
1037 case 'v': pp
.f
|= F_VERBOSE
; break;
1044 argv
+= optind
; argc
-= optind
;
1045 if ((f
& f_bogus
) || 1 > argc
|| argc
> 2) {
1051 if (argv
[1]) svc
= argv
[1];
1052 if ((err
= getaddrinfo(host
, svc
, &aihint
, &ailist
)) != 0) {
1053 die(EXIT_FAILURE
, "unknown host `%s' or service `%s': %s",
1054 host
, svc
, gai_strerror(err
));
1056 for (ai
= ailist
; ai
&& !addrfamok(ai
->ai_family
); ai
= ai
->ai_next
);
1057 if (!ai
) die(EXIT_FAILURE
, "no supported address families for `%s'", host
);
1058 assert(ai
->ai_addrlen
<= sizeof(pp
.a
));
1059 memcpy(&pp
.a
, ai
->ai_addr
, ai
->ai_addrlen
);
1063 die(EXIT_FAILURE
, "failed to discover MTU: %s", strerror(errno
));
1065 if (ferror(stdout
) || fflush(stdout
) || fclose(stdout
))
1066 die(EXIT_FAILURE
, "failed to write result: %s", strerror(errno
));
1070 /*----- That's all, folks -------------------------------------------------*/