3 * Report MTU on path to specified host
5 * (c) 2008 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Trivial IP Encryption (TrIPE).
12 * TrIPE is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
17 * TrIPE is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 * You should have received a copy of the GNU General Public License
23 * along with TrIPE. If not, see <https://www.gnu.org/licenses/>.
26 /*----- Header files ------------------------------------------------------*/
38 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <netinet/in.h>
44 #include <arpa/inet.h>
47 #include <netinet/in_systm.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip_icmp.h>
50 #include <netinet/ip6.h>
51 #include <netinet/icmp6.h>
52 #include <netinet/udp.h>
56 #include <sys/ioctl.h>
58 #include <mLib/alloc.h>
59 #include <mLib/bits.h>
60 #include <mLib/dstr.h>
62 #include <mLib/mdwopt.h>
63 #include <mLib/quis.h>
64 #include <mLib/report.h>
67 /*----- Static variables --------------------------------------------------*/
69 static unsigned char buf
[65536];
73 /*----- Utility functions -------------------------------------------------*/
75 /* Step a value according to a simple LFSR. */
77 do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0)
79 /* Fill buffer with a constant but pseudorandom string. Uses a simple
82 static void fillbuffer(unsigned char *p
, size_t sz
)
84 unsigned int y
= 0xbc20;
85 const unsigned char *l
= p
+ sz
;
90 for (i
= 0; i
< 8; i
++) STEP(y
);
94 /* Convert a string to floating point. */
95 static double s2f(const char *s
, const char *what
)
102 if (errno
|| *q
) die(EXIT_FAILURE
, "bad %s", what
);
106 /* Convert a floating-point value into a struct timeval. */
107 static void f2tv(struct timeval
*tv
, double t
)
108 { tv
->tv_sec
= t
; tv
->tv_usec
= (t
- tv
->tv_sec
)*MILLION
; }
112 struct sockaddr_in sin
;
113 struct sockaddr_in6 sin6
;
116 /* Check whether an address family is even slightly supported. */
117 static int addrfamok(int af
)
120 case AF_INET
: case AF_INET6
: return (1);
125 /* Return the size of a socket address. */
126 static size_t addrsz(const union addr
*a
)
128 switch (a
->sa
.sa_family
) {
129 case AF_INET
: return (sizeof(a
->sin
));
130 case AF_INET6
: return (sizeof(a
->sin6
));
135 /*----- Main algorithm skeleton -------------------------------------------*/
138 unsigned f
; /* Various flags */
139 #define F_VERBOSE 1u /* Give a running commentary */
140 double retx
; /* Initial retransmit interval */
141 double regr
; /* Retransmit growth factor */
142 double timeout
; /* Retransmission timeout */
143 int seqoff
; /* Offset to write sequence number */
144 const struct probe_ops
*pops
; /* Probe algorithm description */
145 union addr a
; /* Destination address */
149 const struct param
*pp
;
155 const struct probe_ops
*next
;
157 int (*setup
)(void *, int, const struct param
*);
158 void (*finish
)(void *);
159 void (*selprep
)(void *, int *, fd_set
*);
160 int (*xmit
)(void *, int);
161 int (*selproc
)(void *, fd_set
*, struct probestate
*);
172 /* or a positive MTU upper-bound */
175 /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */
177 do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0)
179 /* Check whether a buffer contains a packet from our current probe. */
180 static int mypacketp(struct probestate
*ps
,
181 const unsigned char *p
, size_t sz
)
183 const struct param
*pp
= ps
->pp
;
185 return (sz
>= pp
->seqoff
+ 2 && LOAD16(p
+ pp
->seqoff
) == ps
->q
);
188 /* See whether MTU is an acceptable MTU value. Return an appropriate
189 * RC_... code or a new suggested MTU.
191 static int probe(struct probestate
*ps
, void *st
, int mtu
)
193 const struct param
*pp
= ps
->pp
;
195 struct timeval tv
, now
, when
, done
;
196 double timer
= pp
->retx
;
199 /* Set up the first retransmit and give-up timers. */
200 gettimeofday(&now
, 0);
201 f2tv(&tv
, pp
->timeout
); TV_ADD(&done
, &now
, &tv
);
202 f2tv(&tv
, timer
); TV_ADD(&when
, &now
, &tv
);
203 if (TV_CMP(&when
, >, &done
)) when
= done
;
205 /* Send the initial probe. */
206 if (pp
->f
& F_VERBOSE
)
207 moan("sending probe of size %d (seq = %04x)", mtu
, ps
->q
);
209 STORE16(buf
+ pp
->seqoff
, ps
->q
);
210 if ((rc
= pp
->pops
->xmit(st
, mtu
)) != RC_OK
) return (rc
);
214 /* Wait for something interesting to happen. */
215 maxfd
= 0; FD_ZERO(&fd_in
);
216 pp
->pops
->selprep(st
, &maxfd
, &fd_in
);
217 TV_SUB(&tv
, &when
, &now
);
218 if (select(maxfd
+ 1, &fd_in
, 0, 0, &tv
) < 0) return (RC_FAIL
);
219 gettimeofday(&now
, 0);
221 /* See whether the probe method has any answers for us. */
222 if ((rc
= pp
->pops
->selproc(st
, &fd_in
, ps
)) != RC_OK
) return (rc
);
224 /* If we've waited too long, give up. If we should retransmit, do
227 if (TV_CMP(&now
, >, &done
))
229 else if (TV_CMP(&now
, >, &when
)) {
230 if (pp
->f
& F_VERBOSE
) moan("re-sending probe of size %d", mtu
);
231 if ((rc
= pp
->pops
->xmit(st
, mtu
)) != RC_OK
) return (rc
);
233 timer
*= pp
->regr
; f2tv(&tv
, timer
); TV_ADD(&when
, &when
, &tv
);
234 } while (TV_CMP(&when
, <, &now
));
235 if (TV_CMP(&when
, >, &done
)) when
= done
;
240 /* Discover the path MTU to the destination address. */
241 static int pathmtu(const struct param
*pp
)
247 struct probestate ps
;
249 /* Build and connect a UDP socket. We'll need this to know the local port
250 * number to use if nothing else. Set other stuff up.
252 if ((sk
= socket(pp
->a
.sa
.sa_family
, SOCK_DGRAM
, IPPROTO_UDP
)) < 0)
254 if (connect(sk
, &pp
->a
.sa
, addrsz(&pp
->a
))) goto fail_1
;
255 st
= xmalloc(pp
->pops
->statesz
);
256 if ((mtu
= pp
->pops
->setup(st
, sk
, pp
)) < 0) goto fail_2
;
257 ps
.pp
= pp
; ps
.q
= rand() & 0xffff;
258 switch (pp
->a
.sa
.sa_family
) {
259 case AF_INET
: lo
= 576; break;
260 case AF_INET6
: lo
= 1280; break;
264 if (hi
< lo
) { errno
= EMSGSIZE
; return (-1); }
266 /* And now we do a thing which is sort of like a binary search, except that
267 * we also take explicit clues as establishing a new upper bound, and we
268 * try to hug that initially.
271 assert(lo
<= mtu
&& mtu
<= hi
);
272 if (pp
->f
& F_VERBOSE
) moan("probe: %d <= %d <= %d", lo
, mtu
, hi
);
273 rc
= probe(&ps
, st
, mtu
);
277 if (pp
->f
& F_VERBOSE
) moan("probe failed");
281 /* If we've not seen a dropped packet before then we don't know what
282 * this means yet -- in particular, we don't know which bit of the
283 * network is swallowing packets. Send a minimum-size probe. If
284 * that doesn't come back then assume that the remote host is
285 * swallowing our packets. If it does, then we assume that dropped
286 * packets are a result of ICMP fragmentation-needed reports being
287 * lost or suppressed.
289 if (pp
->f
& F_VERBOSE
) moan("gave up: black hole detected");
291 if (pp
->f
& F_VERBOSE
) moan("sending minimum-size probe");
292 switch (probe(&ps
, st
, lo
)) {
296 if (pp
->f
& F_VERBOSE
) {
297 moan("no reply from min-size probe: "
298 "assume black hole at target");
303 if (pp
->f
& F_VERBOSE
) {
304 moan("reply from min-size probe OK: "
305 "assume black hole in network");
310 if (pp
->f
& F_VERBOSE
)
311 moan("unexpected return code from probe");
317 if (droppy
) goto higher
; else goto lower
;
322 if (pp
->f
& F_VERBOSE
)
323 moan("probe returned: remote host is not a black hole");
327 if (pp
->f
& F_VERBOSE
) moan("probe returned: found correct MTU");
332 /* Now we must make a new guess, between lo and hi. We know that lo
333 * is good; but we're not so sure about hi here. We know that hi >
334 * lo, so this will find an approximate midpoint, greater than lo and
337 if (pp
->f
& F_VERBOSE
) moan("probe returned: guessing higher");
338 mtu
+= (hi
- lo
+ 1)/2;
343 /* If this didn't work, and we're already at the bottom of our
344 * possible range, then something has gone horribly wrong.
349 if (pp
->f
& F_VERBOSE
) moan("error returned: found correct MTU");
354 /* We must make a new guess, between lo and hi. We're probably
355 * fairly sure that lo will succeed, since either it's the minimum
356 * MTU or we've tested it already; but we're not quite sure about hi,
357 * so we want to aim high.
359 if (pp
->f
& F_VERBOSE
) moan("error returned: guessing lower");
360 mtu
-= (hi
- lo
+ 1)/2;
364 if (pp
->f
& F_VERBOSE
) moan("error returned with new MTU estimate");
371 /* Clean up and return our result. */
372 pp
->pops
->finish(st
);
378 pp
->pops
->finish(st
);
387 /*----- Doing it the hard way ---------------------------------------------*/
389 #if defined(linux) || defined(__OpenBSD__)
394 # define sane_htons htons
395 # define sane_htonl htonl
401 static int rawicmp
= -1, rawudp
= -1, rawerr
= 0;
402 static int rawicmp6
= -1, rawudp6
= -1, rawerr6
= 0;
404 #define IPCK_INIT 0xffff
406 /* Compare two addresses. Maybe compare the port numbers too. */
408 static int addreq(const union addr
*a
, const union addr
*b
, unsigned f
)
410 switch (a
->sa
.sa_family
) {
412 return (a
->sin
.sin_addr
.s_addr
== b
->sin
.sin_addr
.s_addr
&&
413 (!(f
&AEF_PORT
) || a
->sin
.sin_port
== b
->sin
.sin_port
));
415 return (!memcmp(a
->sin6
.sin6_addr
.s6_addr
,
416 b
->sin6
.sin6_addr
.s6_addr
, 16) &&
417 (!(f
&AEF_PORT
) || a
->sin6
.sin6_port
== b
->sin6
.sin6_port
));
423 /* Compute an IP checksum over some data. This is a restartable interface:
424 * initialize A to `IPCK_INIT' for the first call.
426 static unsigned ipcksum(const void *buf
, size_t n
, unsigned a
)
428 unsigned long aa
= a
^ 0xffff;
429 const unsigned char *p
= buf
, *l
= p
+ n
;
431 while (p
< l
- 1) { aa
+= LOAD16_B(p
); p
+= 2; }
432 if (p
< l
) { aa
+= (unsigned)(*p
) << 8; }
433 do aa
= (aa
& 0xffff) + (aa
>> 16); while (aa
>= 0x10000);
434 return (aa
== 0xffff ? aa
: aa
^ 0xffff);
437 /* TCP/UDP pseudoheader structure. */
439 struct in_addr ph_src
, ph_dst
;
444 struct in6_addr ph6_src
, ph6_dst
;
446 uint8_t ph6_z0
, ph6_z1
, ph6_z2
, ph6_nxt
;
451 int sk
, rawicmp
, rawudp
;
452 uint16_t srcport
, dstport
;
456 static int raw_setup(void *stv
, int sk
, const struct param
*pp
)
458 struct raw_state
*st
= stv
;
461 struct ifaddrs
*ifa
, *ifaa
, *ifap
;
463 struct icmp6_filter f6
;
465 /* Check that the address is OK, and that we have the necessary raw
468 * For IPv6, also set the filter so we don't get too many useless wakeups.
470 switch (pp
->a
.sa
.sa_family
) {
472 if (rawerr
) { errno
= rawerr
; goto fail_0
; }
473 st
->rawicmp
= rawicmp
; st
->rawudp
= rawudp
; st
->sk
= sk
;
474 /* IPv4 filtering is available on Linux but isn't portable. */
477 if (rawerr6
) { errno
= rawerr6
; goto fail_0
; }
478 st
->rawicmp
= rawicmp6
; st
->rawudp
= rawudp6
; st
->sk
= sk
;
479 ICMP6_FILTER_SETBLOCKALL(&f6
);
480 ICMP6_FILTER_SETPASS(ICMP6_PACKET_TOO_BIG
, &f6
);
481 ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH
, &f6
);
482 if (setsockopt(st
->rawicmp
, IPPROTO_ICMPV6
, ICMP6_FILTER
,
484 die(EXIT_FAILURE
, "failed to set icmpv6 filter: %s",
489 errno
= EPFNOSUPPORT
; goto fail_0
;
492 /* Initialize the sequence number. */
493 st
->q
= rand() & 0xffff;
495 /* Snaffle the local and remote address and port number. */
498 if (getsockname(sk
, &st
->me
.sa
, &sz
))
501 /* Only now do some fiddling because Linux doesn't like port numbers in
502 * IPv6 raw destination addresses...
504 switch (pp
->a
.sa
.sa_family
) {
506 st
->srcport
= st
->me
.sin
.sin_port
; st
->me
.sin
.sin_port
= 0;
507 st
->dstport
= st
->a
.sin
.sin_port
; st
->a
.sin
.sin_port
= 0;
510 st
->srcport
= st
->me
.sin6
.sin6_port
; st
->me
.sin6
.sin6_port
= 0;
511 st
->dstport
= st
->a
.sin6
.sin6_port
; st
->a
.sin6
.sin6_port
= 0;
517 /* There isn't a portable way to force the DF flag onto a packet through
518 * UDP, or even through raw IP, unless we write the entire IP header
519 * ourselves. This is somewhat annoying, especially since we have an
520 * uphill struggle keeping track of which systems randomly expect which
521 * header fields to be presented in host byte order. Oh, well.
524 if (setsockopt(rawudp
, IPPROTO_IP
, IP_HDRINCL
, &i
, sizeof(i
))) goto fail_0
;
526 /* Find an upper bound on the MTU. Do two passes over the interface
527 * list. If we can find matches for our local address then use the
528 * highest one of those; otherwise do a second pass and simply take the
529 * highest MTU of any network interface.
531 if (getifaddrs(&ifaa
)) goto fail_0
;
532 for (i
= 0; i
< 2; i
++) {
533 for (ifap
= 0, ifa
= ifaa
; ifa
; ifa
= ifa
->ifa_next
) {
534 if (!(ifa
->ifa_flags
& IFF_UP
) || !ifa
->ifa_addr
||
535 ifa
->ifa_addr
->sa_family
!= st
->me
.sa
.sa_family
||
537 !addreq((union addr
*)ifa
->ifa_addr
, &st
->me
, 0)) ||
538 (i
== 1 && ifap
&& strcmp(ifap
->ifa_name
, ifa
->ifa_name
) == 0) ||
539 strlen(ifa
->ifa_name
) >= sizeof(ifr
.ifr_name
))
542 strcpy(ifr
.ifr_name
, ifa
->ifa_name
);
543 if (ioctl(sk
, SIOCGIFMTU
, &ifr
)) goto fail_1
;
544 if (mtu
< ifr
.ifr_mtu
) mtu
= ifr
.ifr_mtu
;
548 if (mtu
< 0) { errno
= ENOTCONN
; goto fail_1
; }
560 static void raw_finish(void *stv
) { ; }
562 static void raw_selprep(void *stv
, int *maxfd
, fd_set
*fd_in
)
563 { struct raw_state
*st
= stv
; ADDFD(st
->sk
); ADDFD(st
->rawicmp
); }
565 static int raw_xmit(void *stv
, int mtu
)
567 struct raw_state
*st
= stv
;
568 unsigned char b
[65536], *p
;
576 switch (st
->a
.sa
.sa_family
) {
580 /* Build the IP header. */
583 ip
->ip_hl
= sizeof(*ip
)/4;
584 ip
->ip_tos
= IPTOS_RELIABILITY
;
585 ip
->ip_len
= sane_htons(mtu
);
586 STEP(st
->q
); ip
->ip_id
= htons(st
->q
);
587 ip
->ip_off
= sane_htons(0 | IP_DF
);
589 ip
->ip_p
= IPPROTO_UDP
;
591 ip
->ip_src
= st
->me
.sin
.sin_addr
;
592 ip
->ip_dst
= st
->a
.sin
.sin_addr
;
594 /* Build a UDP packet in the output buffer. */
595 udp
= (struct udphdr
*)(ip
+ 1);
596 udp
->uh_sport
= st
->srcport
;
597 udp
->uh_dport
= st
->dstport
;
598 udp
->uh_ulen
= htons(mtu
- sizeof(*ip
));
601 /* Copy the payload. */
602 p
= (unsigned char *)(udp
+ 1);
603 memcpy(p
, buf
, mtu
- (p
- b
));
605 /* Calculate the UDP checksum. */
606 ph
.ph_src
= ip
->ip_src
;
607 ph
.ph_dst
= ip
->ip_dst
;
609 ph
.ph_p
= IPPROTO_UDP
;
610 ph
.ph_len
= udp
->uh_ulen
;
612 ck
= ipcksum(&ph
, sizeof(ph
), ck
);
613 ck
= ipcksum(udp
, mtu
- sizeof(*ip
), ck
);
614 udp
->uh_sum
= htons(ck
);
620 /* Build the IP header. */
621 ip6
= (struct ip6_hdr
*)b
;
622 STEP(st
->q
); ip6
->ip6_flow
= htonl(0x60000000 | st
->q
);
623 ip6
->ip6_plen
= htons(mtu
- sizeof(*ip6
));
624 ip6
->ip6_nxt
= IPPROTO_UDP
;
626 ip6
->ip6_src
= st
->me
.sin6
.sin6_addr
;
627 ip6
->ip6_dst
= st
->a
.sin6
.sin6_addr
;
629 /* Build a UDP packet in the output buffer. */
630 udp
= (struct udphdr
*)(ip6
+ 1);
631 udp
->uh_sport
= st
->srcport
;
632 udp
->uh_dport
= st
->dstport
;
633 udp
->uh_ulen
= htons(mtu
- sizeof(*ip6
));
636 /* Copy the payload. */
637 p
= (unsigned char *)(udp
+ 1);
638 memcpy(p
, buf
, mtu
- (p
- b
));
640 /* Calculate the UDP checksum. */
641 ph6
.ph6_src
= ip6
->ip6_src
;
642 ph6
.ph6_dst
= ip6
->ip6_dst
;
643 ph6
.ph6_len
= udp
->uh_ulen
;
644 ph6
.ph6_z0
= ph6
.ph6_z1
= ph6
.ph6_z2
= 0;
645 ph6
.ph6_nxt
= IPPROTO_UDP
;
647 ck
= ipcksum(&ph6
, sizeof(ph6
), ck
);
648 ck
= ipcksum(udp
, mtu
- sizeof(*ip6
), ck
);
649 udp
->uh_sum
= htons(ck
);
657 /* Send the whole thing off. If we're too big for the interface then we
658 * might need to trim immediately.
660 if (sendto(st
->rawudp
, b
, mtu
, 0, &st
->a
.sa
, addrsz(&st
->a
)) < 0) {
661 if (errno
== EMSGSIZE
) return (RC_LOWER
);
672 static int raw_selproc(void *stv
, fd_set
*fd_in
, struct probestate
*ps
)
674 struct raw_state
*st
= stv
;
675 unsigned char b
[65536];
679 struct icmp6_hdr
*icmp6
;
681 const unsigned char *payload
;
684 /* An ICMP packet: see what's inside. */
685 if (FD_ISSET(st
->rawicmp
, fd_in
)) {
686 if ((n
= read(st
->rawicmp
, b
, sizeof(b
))) < 0) goto fail_0
;
688 switch (st
->me
.sa
.sa_family
) {
693 if (n
< sizeof(*ip
) || n
< sizeof(4*ip
->ip_hl
) ||
694 ip
->ip_v
!= 4 || ip
->ip_p
!= IPPROTO_ICMP
)
696 n
-= sizeof(4*ip
->ip_hl
);
698 icmp
= (struct icmp
*)(b
+ 4*ip
->ip_hl
);
699 if (n
< sizeof(*icmp
) || icmp
->icmp_type
!= ICMP_UNREACH
)
701 n
-= offsetof(struct icmp
, icmp_ip
);
704 if (n
< sizeof(*ip
) ||
705 ip
->ip_p
!= IPPROTO_UDP
|| ip
->ip_hl
!= sizeof(*ip
)/4 ||
706 ip
->ip_id
!= htons(st
->q
) ||
707 ip
->ip_src
.s_addr
!= st
->me
.sin
.sin_addr
.s_addr
||
708 ip
->ip_dst
.s_addr
!= st
->a
.sin
.sin_addr
.s_addr
)
712 udp
= (struct udphdr
*)(ip
+ 1);
713 if (n
< sizeof(*udp
) || udp
->uh_sport
!= st
->srcport
||
714 udp
->uh_dport
!= st
->dstport
)
718 payload
= (const unsigned char *)(udp
+ 1);
719 if (!mypacketp(ps
, payload
, n
)) goto skip_icmp
;
721 if (icmp
->icmp_code
== ICMP_UNREACH_PORT
) return (RC_HIGHER
);
722 else if (icmp
->icmp_code
!= ICMP_UNREACH_NEEDFRAG
) goto skip_icmp
;
723 else if (icmp
->icmp_nextmtu
) return (htons(icmp
->icmp_nextmtu
));
724 else return (RC_LOWER
);
729 icmp6
= (struct icmp6_hdr
*)b
;
730 if (n
< sizeof(*icmp6
) ||
731 (icmp6
->icmp6_type
!= ICMP6_PACKET_TOO_BIG
&&
732 icmp6
->icmp6_type
!= ICMP6_DST_UNREACH
))
736 ip6
= (struct ip6_hdr
*)(icmp6
+ 1);
737 if (n
< sizeof(*ip6
) || ip6
->ip6_nxt
!= IPPROTO_UDP
||
738 memcmp(ip6
->ip6_src
.s6_addr
,
739 st
->me
.sin6
.sin6_addr
.s6_addr
, 16) ||
740 memcmp(ip6
->ip6_dst
.s6_addr
,
741 st
->a
.sin6
.sin6_addr
.s6_addr
, 16) ||
742 (ntohl(ip6
->ip6_flow
)&0xffff) != st
->q
)
746 udp
= (struct udphdr
*)(ip6
+ 1);
747 if (n
< sizeof(*udp
) || udp
->uh_sport
!= st
->srcport
||
748 udp
->uh_dport
!= st
->dstport
)
752 payload
= (const unsigned char *)(udp
+ 1);
753 if (!mypacketp(ps
, payload
, n
)) goto skip_icmp
;
755 if (icmp6
->icmp6_type
== ICMP6_PACKET_TOO_BIG
)
756 return (ntohs(icmp6
->icmp6_mtu
));
757 else switch (icmp6
->icmp6_code
) {
758 case ICMP6_DST_UNREACH_ADMIN
:
759 case ICMP6_DST_UNREACH_NOPORT
:
773 /* If we got a reply to the current probe then we're good. If we got an
774 * error, or the packet's sequence number is wrong, then ignore it.
776 if (FD_ISSET(st
->sk
, fd_in
)) {
777 if ((n
= read(st
->sk
, b
, sizeof(b
))) < 0) return (RC_OK
);
778 else if (mypacketp(ps
, b
, n
)) return (RC_HIGHER
);
788 static const struct probe_ops raw_ops
= {
789 "raw", OPS_CHAIN
, sizeof(struct raw_state
),
790 raw_setup
, raw_finish
,
791 raw_selprep
, raw_xmit
, raw_selproc
795 #define OPS_CHAIN &raw_ops
797 /*----- Doing the job on Linux --------------------------------------------*/
802 # define IP_MTU 14 /* Blech! */
806 int sol
, so_mtu_discover
, so_mtu
;
811 static int linux_setup(void *stv
, int sk
, const struct param
*pp
)
813 struct linux_state
*st
= stv
;
817 /* Check that the address is OK. */
818 switch (pp
->a
.sa
.sa_family
) {
820 st
->sol
= IPPROTO_IP
;
821 st
->so_mtu_discover
= IP_MTU_DISCOVER
;
826 st
->sol
= IPPROTO_IPV6
;
827 st
->so_mtu_discover
= IPV6_MTU_DISCOVER
;
828 st
->so_mtu
= IPV6_MTU
;
832 errno
= EPFNOSUPPORT
;
836 /* Snaffle the UDP socket. */
839 /* Turn on kernel path-MTU discovery and force DF on. */
840 i
= IP_PMTUDISC_PROBE
;
841 if (setsockopt(st
->sk
, st
->sol
, st
->so_mtu_discover
, &i
, sizeof(i
)))
844 /* Read the initial MTU guess back and report it. */
846 if (getsockopt(st
->sk
, st
->sol
, st
->so_mtu
, &mtu
, &sz
))
853 static void linux_finish(void *stv
) { ; }
855 static void linux_selprep(void *stv
, int *maxfd
, fd_set
*fd_in
)
856 { struct linux_state
*st
= stv
; ADDFD(st
->sk
); }
858 static int linux_xmit(void *stv
, int mtu
)
860 struct linux_state
*st
= stv
;
862 /* Write the packet. */
863 if (write(st
->sk
, buf
, mtu
- st
->hdrlen
) >= 0) return (RC_OK
);
864 else if (errno
== EMSGSIZE
) return (RC_LOWER
);
865 else return (RC_FAIL
);
868 static int linux_selproc(void *stv
, fd_set
*fd_in
, struct probestate
*ps
)
870 struct linux_state
*st
= stv
;
874 unsigned char b
[65536];
876 /* Read an answer. If it looks like the right kind of error then report a
877 * success. This is potentially wrong, since we can't tell whether an
878 * error was delayed from an earlier probe. However, we never return
879 * RC_LOWER from this method, so the packet sizes ought to be monotonically
880 * decreasing and this won't cause trouble. Otherwise update from the
881 * kernel's idea of the right MTU.
883 if (FD_ISSET(st
->sk
, fd_in
)) {
884 n
= read(st
->sk
, &buf
, sizeof(buf
));
886 mypacketp(ps
, b
, n
) :
887 errno
== ECONNREFUSED
|| errno
== EHOSTUNREACH
)
890 if (getsockopt(st
->sk
, st
->sol
, st
->so_mtu
, &mtu
, &sz
))
897 static const struct probe_ops linux_ops
= {
898 "linux", OPS_CHAIN
, sizeof(struct linux_state
),
899 linux_setup
, linux_finish
,
900 linux_selprep
, linux_xmit
, linux_selproc
904 #define OPS_CHAIN &linux_ops
908 /*----- Help options ------------------------------------------------------*/
910 static const struct probe_ops
*probe_ops
= OPS_CHAIN
;
912 static void version(FILE *fp
)
913 { pquis(fp
, "$, TrIPE version " VERSION
"\n"); }
915 static void usage(FILE *fp
)
917 pquis(fp
, "Usage: $ [-46v] [-H HEADER] [-m METHOD]\n\
918 [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n");
921 static void help(FILE *fp
)
923 const struct probe_ops
*ops
;
932 -h, --help Show this help text.\n\
933 -V, --version Show version number.\n\
934 -u, --usage Show brief usage message.\n\
936 -4, --ipv4 Restrict to IPv4 only.\n\
937 -6, --ipv6 Restrict to IPv6 only.\n\
938 -g, --growth=FACTOR Growth factor for retransmit interval.\n\
939 -m, --method=METHOD Use METHOD to probe for MTU.\n\
940 -r, --retransmit=SECS Retransmit if no reply after SEC.\n\
941 -t, --timeout=SECS Give up expecting a reply after SECS.\n\
942 -v, --verbose Write a running commentary to stderr.\n\
943 -H, --header=HEX Packet header, in hexadecimal.\n\
947 for (ops
= probe_ops
; ops
; ops
= ops
->next
)
948 printf("\t%s\n", ops
->name
);
951 /*----- Main code ---------------------------------------------------------*/
953 int main(int argc
, char *argv
[])
955 struct param pp
= { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN
};
960 struct addrinfo aihint
= { 0 }, *ailist
, *ai
;
961 const char *host
, *svc
= "7";
966 if ((rawicmp
= socket(PF_INET
, SOCK_RAW
, IPPROTO_ICMP
)) < 0 ||
967 (rawudp
= socket(PF_INET
, SOCK_RAW
, IPPROTO_UDP
)) < 0)
969 if ((rawicmp6
= socket(PF_INET6
, SOCK_RAW
, IPPROTO_ICMPV6
)) < 0 ||
970 (rawudp6
= socket(PF_INET6
, SOCK_RAW
, IPPROTO_RAW
)) < 0)
972 if (setuid(getuid()))
976 fillbuffer(buf
, sizeof(buf
));
978 aihint
.ai_family
= AF_UNSPEC
;
979 aihint
.ai_protocol
= IPPROTO_UDP
;
980 aihint
.ai_socktype
= SOCK_DGRAM
;
981 aihint
.ai_flags
= AI_ADDRCONFIG
;
984 static const struct option opts
[] = {
985 { "help", 0, 0, 'h' },
986 { "version", 0, 0, 'V' },
987 { "usage", 0, 0, 'u' },
988 { "ipv4", 0, 0, '4' },
989 { "ipv6", 0, 0, '6' },
990 { "header", OPTF_ARGREQ
, 0, 'H' },
991 { "growth", OPTF_ARGREQ
, 0, 'g' },
992 { "method", OPTF_ARGREQ
, 0, 'm' },
993 { "retransmit", OPTF_ARGREQ
, 0, 'r' },
994 { "timeout", OPTF_ARGREQ
, 0, 't' },
995 { "verbose", 0, 0, 'v' },
999 i
= mdwopt(argc
, argv
, "hVu" "46H:g:m:r:t:v", opts
, 0, 0, 0);
1002 case 'h': help(stdout
); exit(0);
1003 case 'V': version(stdout
); exit(0);
1004 case 'u': usage(stdout
); exit(0);
1009 hex_decode(&hc
, optarg
, strlen(optarg
), &d
);
1010 hex_decode(&hc
, 0, 0, &d
);
1011 sz
= d
.len
< 532 ? d
.len
: 532;
1012 memcpy(buf
, d
.buf
, sz
);
1016 case '4': aihint
.ai_family
= AF_INET
; break;
1017 case '6': aihint
.ai_family
= AF_INET6
; break;
1018 case 'g': pp
.regr
= s2f(optarg
, "retransmit growth factor"); break;
1019 case 'r': pp
.retx
= s2f(optarg
, "retransmit interval"); break;
1020 case 't': pp
.timeout
= s2f(optarg
, "timeout"); break;
1023 for (pp
.pops
= OPS_CHAIN
; pp
.pops
; pp
.pops
= pp
.pops
->next
)
1024 if (strcmp(pp
.pops
->name
, optarg
) == 0) goto found_alg
;
1025 die(EXIT_FAILURE
, "unknown probe algorithm `%s'", optarg
);
1029 case 'v': pp
.f
|= F_VERBOSE
; break;
1036 argv
+= optind
; argc
-= optind
;
1037 if ((f
& f_bogus
) || 1 > argc
|| argc
> 2) {
1043 if (argv
[1]) svc
= argv
[1];
1044 if ((err
= getaddrinfo(host
, svc
, &aihint
, &ailist
)) != 0) {
1045 die(EXIT_FAILURE
, "unknown host `%s' or service `%s': %s",
1046 host
, svc
, gai_strerror(err
));
1048 for (ai
= ailist
; ai
&& !addrfamok(ai
->ai_family
); ai
= ai
->ai_next
);
1049 if (!ai
) die(EXIT_FAILURE
, "no supported address families for `%s'", host
);
1050 assert(ai
->ai_addrlen
<= sizeof(pp
.a
));
1051 memcpy(&pp
.a
, ai
->ai_addr
, ai
->ai_addrlen
);
1055 die(EXIT_FAILURE
, "failed to discover MTU: %s", strerror(errno
));
1057 if (ferror(stdout
) || fflush(stdout
) || fclose(stdout
))
1058 die(EXIT_FAILURE
, "failed to write result: %s", strerror(errno
));
1062 /*----- That's all, folks -------------------------------------------------*/