Commit | Line | Data |
---|---|---|
c64d8cd5 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Report MTU on path to specified host | |
4 | * | |
5 | * (c) 2008 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Trivial IP Encryption (TrIPE). | |
11 | * | |
11ad66c2 MW |
12 | * TrIPE is free software: you can redistribute it and/or modify it under |
13 | * the terms of the GNU General Public License as published by the Free | |
14 | * Software Foundation; either version 3 of the License, or (at your | |
15 | * option) any later version. | |
c64d8cd5 | 16 | * |
11ad66c2 MW |
17 | * TrIPE is distributed in the hope that it will be useful, but WITHOUT |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
20 | * for more details. | |
c64d8cd5 MW |
21 | * |
22 | * You should have received a copy of the GNU General Public License | |
11ad66c2 | 23 | * along with TrIPE. If not, see <https://www.gnu.org/licenses/>. |
c64d8cd5 MW |
24 | */ |
25 | ||
26 | /*----- Header files ------------------------------------------------------*/ | |
27 | ||
28 | #include "config.h" | |
29 | ||
d245350a | 30 | #include <assert.h> |
c64d8cd5 | 31 | #include <errno.h> |
88510d86 | 32 | #include <stddef.h> |
c64d8cd5 MW |
33 | #include <stdio.h> |
34 | #include <stdlib.h> | |
35 | #include <string.h> | |
36 | #include <time.h> | |
37 | ||
38 | #include <sys/types.h> | |
39 | #include <sys/time.h> | |
40 | #include <unistd.h> | |
41 | ||
42 | #include <sys/socket.h> | |
43 | #include <netinet/in.h> | |
44 | #include <arpa/inet.h> | |
45 | #include <netdb.h> | |
46 | ||
88510d86 MW |
47 | #include <netinet/in_systm.h> |
48 | #include <netinet/ip.h> | |
49 | #include <netinet/ip_icmp.h> | |
50 | #include <netinet/udp.h> | |
51 | ||
52 | #include <net/if.h> | |
53 | #include <ifaddrs.h> | |
54 | #include <sys/ioctl.h> | |
55 | ||
56 | #include <mLib/alloc.h> | |
57 | #include <mLib/bits.h> | |
c64d8cd5 MW |
58 | #include <mLib/dstr.h> |
59 | #include <mLib/hex.h> | |
60 | #include <mLib/mdwopt.h> | |
61 | #include <mLib/quis.h> | |
62 | #include <mLib/report.h> | |
63 | #include <mLib/tv.h> | |
64 | ||
65 | /*----- Static variables --------------------------------------------------*/ | |
66 | ||
67 | static unsigned char buf[65536]; | |
68 | ||
88510d86 MW |
69 | #define POLY 0x1002d |
70 | ||
c64d8cd5 MW |
71 | /*----- Utility functions -------------------------------------------------*/ |
72 | ||
88510d86 MW |
73 | /* Step a value according to a simple LFSR. */ |
74 | #define STEP(q) \ | |
75 | do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0) | |
76 | ||
c64d8cd5 MW |
77 | /* Fill buffer with a constant but pseudorandom string. Uses a simple |
78 | * LFSR. | |
79 | */ | |
80 | static void fillbuffer(unsigned char *p, size_t sz) | |
81 | { | |
82 | unsigned int y = 0xbc20; | |
83 | const unsigned char *l = p + sz; | |
84 | int i; | |
c64d8cd5 MW |
85 | |
86 | while (p < l) { | |
87 | *p++ = y & 0xff; | |
88510d86 | 88 | for (i = 0; i < 8; i++) STEP(y); |
c64d8cd5 MW |
89 | } |
90 | } | |
91 | ||
88510d86 MW |
92 | /* Convert a string to floating point. */ |
93 | static double s2f(const char *s, const char *what) | |
94 | { | |
95 | double f; | |
96 | char *q; | |
c64d8cd5 | 97 | |
88510d86 MW |
98 | errno = 0; |
99 | f = strtod(s, &q); | |
100 | if (errno || *q) die(EXIT_FAILURE, "bad %s", what); | |
101 | return (f); | |
102 | } | |
c64d8cd5 | 103 | |
88510d86 MW |
104 | /* Convert a floating-point value into a struct timeval. */ |
105 | static void f2tv(struct timeval *tv, double t) | |
106 | { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; } | |
107 | ||
108 | /*----- Main algorithm skeleton -------------------------------------------*/ | |
109 | ||
110 | struct param { | |
111 | unsigned f; /* Various flags */ | |
112 | #define F_VERBOSE 1u /* Give a running commentary */ | |
113 | double retx; /* Initial retransmit interval */ | |
114 | double regr; /* Retransmit growth factor */ | |
115 | double timeout; /* Retransmission timeout */ | |
116 | int seqoff; /* Offset to write sequence number */ | |
117 | const struct probe_ops *pops; /* Probe algorithm description */ | |
118 | struct sockaddr_in sin; /* Destination address */ | |
119 | }; | |
120 | ||
121 | struct probestate { | |
122 | const struct param *pp; | |
123 | unsigned q; | |
124 | }; | |
125 | ||
126 | struct probe_ops { | |
127 | const char *name; | |
128 | const struct probe_ops *next; | |
129 | size_t statesz; | |
130 | int (*setup)(void *, int, const struct param *); | |
131 | void (*finish)(void *); | |
132 | void (*selprep)(void *, int *, fd_set *); | |
133 | int (*xmit)(void *, int); | |
134 | int (*selproc)(void *, fd_set *, struct probestate *); | |
135 | }; | |
136 | ||
137 | #define OPS_CHAIN 0 | |
138 | ||
139 | enum { | |
140 | RC_FAIL = -99, | |
141 | RC_OK = 0, | |
142 | RC_LOWER = -1, | |
143 | RC_HIGHER = -2, | |
144 | RC_NOREPLY = -3 | |
145 | /* or a positive MTU upper-bound */ | |
146 | }; | |
147 | ||
148 | /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */ | |
149 | #define ADDFD(fd) \ | |
150 | do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0) | |
151 | ||
152 | /* Check whether a buffer contains a packet from our current probe. */ | |
153 | static int mypacketp(struct probestate *ps, | |
154 | const unsigned char *p, size_t sz) | |
155 | { | |
156 | const struct param *pp = ps->pp; | |
c64d8cd5 | 157 | |
88510d86 MW |
158 | return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q); |
159 | } | |
160 | ||
161 | /* See whether MTU is an acceptable MTU value. Return an appropriate | |
162 | * RC_... code or a new suggested MTU. | |
163 | */ | |
164 | static int probe(struct probestate *ps, void *st, int mtu) | |
c64d8cd5 | 165 | { |
88510d86 | 166 | const struct param *pp = ps->pp; |
c64d8cd5 | 167 | fd_set fd_in; |
88510d86 MW |
168 | struct timeval tv, now, when, done; |
169 | double timer = pp->retx; | |
170 | int rc, maxfd; | |
171 | ||
172 | /* Set up the first retransmit and give-up timers. */ | |
173 | gettimeofday(&now, 0); | |
174 | f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv); | |
175 | f2tv(&tv, timer); TV_ADD(&when, &now, &tv); | |
176 | if (TV_CMP(&when, >, &done)) when = done; | |
177 | ||
178 | /* Send the initial probe. */ | |
179 | if (pp->f & F_VERBOSE) | |
180 | moan("sending probe of size %d (seq = %04x)", mtu, ps->q); | |
181 | STEP(ps->q); | |
182 | STORE16(buf + pp->seqoff, ps->q); | |
183 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
184 | ||
185 | for (;;) { | |
186 | ||
187 | /* Wait for something interesting to happen. */ | |
188 | maxfd = 0; FD_ZERO(&fd_in); | |
189 | pp->pops->selprep(st, &maxfd, &fd_in); | |
190 | TV_SUB(&tv, &when, &now); | |
191 | if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL); | |
192 | gettimeofday(&now, 0); | |
193 | ||
194 | /* See whether the probe method has any answers for us. */ | |
195 | if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc); | |
196 | ||
197 | /* If we've waited too long, give up. If we should retransmit, do | |
198 | * that. | |
199 | */ | |
200 | if (TV_CMP(&now, >, &done)) | |
201 | return (RC_NOREPLY); | |
202 | else if (TV_CMP(&now, >, &when)) { | |
203 | if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu); | |
204 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
205 | do { | |
206 | timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv); | |
207 | } while (TV_CMP(&when, <, &now)); | |
208 | if (TV_CMP(&when, >, &done)) when = done; | |
209 | } | |
210 | } | |
211 | } | |
c64d8cd5 | 212 | |
88510d86 MW |
213 | /* Discover the path MTU to the destination address. */ |
214 | static int pathmtu(const struct param *pp) | |
215 | { | |
216 | int sk; | |
217 | int mtu, lo, hi; | |
218 | int rc, droppy = -1; | |
219 | void *st; | |
220 | struct probestate ps; | |
221 | ||
222 | /* Build and connect a UDP socket. We'll need this to know the local port | |
223 | * number to use if nothing else. Set other stuff up. | |
224 | */ | |
c64d8cd5 | 225 | if ((sk = socket(PF_INET, SOCK_DGRAM, 0)) < 0) goto fail_0; |
88510d86 MW |
226 | if (connect(sk, (struct sockaddr *)&pp->sin, sizeof(pp->sin))) goto fail_1; |
227 | st = xmalloc(pp->pops->statesz); | |
228 | if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2; | |
229 | ps.pp = pp; ps.q = rand() & 0xffff; | |
230 | lo = 576; hi = mtu; | |
231 | ||
232 | /* And now we do a thing which is sort of like a binary search, except that | |
233 | * we also take explicit clues as establishing a new upper bound, and we | |
234 | * try to hug that initially. | |
235 | */ | |
c64d8cd5 | 236 | for (;;) { |
d245350a MW |
237 | assert(lo <= mtu && mtu <= hi); |
238 | if (pp->f & F_VERBOSE) moan("probe: %d <= %d <= %d", lo, mtu, hi); | |
88510d86 MW |
239 | rc = probe(&ps, st, mtu); |
240 | switch (rc) { | |
241 | ||
242 | case RC_FAIL: | |
243 | if (pp->f & F_VERBOSE) moan("probe failed"); | |
244 | goto fail_3; | |
245 | ||
246 | case RC_NOREPLY: | |
247 | /* If we've not seen a dropped packet before then we don't know what | |
248 | * this means yet -- in particular, we don't know which bit of the | |
249 | * network is swallowing packets. Send a minimum-size probe. If | |
250 | * that doesn't come back then assume that the remote host is | |
251 | * swallowing our packets. If it does, then we assume that dropped | |
252 | * packets are a result of ICMP fragmentation-needed reports being | |
253 | * lost or suppressed. | |
254 | */ | |
255 | if (pp->f & F_VERBOSE) moan("gave up: black hole detected"); | |
256 | if (droppy == -1) { | |
257 | if (pp->f & F_VERBOSE) moan("sending minimum-size probe"); | |
258 | switch (probe(&ps, st, lo)) { | |
259 | case RC_FAIL: | |
260 | goto fail_3; | |
261 | case RC_NOREPLY: | |
262 | if (pp->f & F_VERBOSE) { | |
263 | moan("no reply from min-size probe: " | |
264 | "assume black hole at target"); | |
265 | } | |
266 | droppy = 1; | |
267 | break; | |
268 | case RC_HIGHER: | |
269 | if (pp->f & F_VERBOSE) { | |
270 | moan("reply from min-size probe OK: " | |
271 | "assume black hole in network"); | |
272 | } | |
273 | droppy = 0; | |
274 | break; | |
275 | default: | |
276 | if (pp->f & F_VERBOSE) | |
277 | moan("unexpected return code from probe"); | |
278 | errno = ENOTCONN; | |
279 | goto fail_3; | |
280 | } | |
281 | } | |
282 | ||
283 | if (droppy) goto higher; else goto lower; | |
284 | ||
285 | case RC_HIGHER: | |
286 | higher: | |
287 | if (droppy == -1) { | |
288 | if (pp->f & F_VERBOSE) | |
289 | moan("probe returned: remote host is not a black hole"); | |
290 | droppy = 0; | |
291 | } | |
292 | if (mtu == hi) { | |
293 | if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU"); | |
294 | goto done; | |
295 | } | |
88510d86 | 296 | lo = mtu; |
d245350a MW |
297 | |
298 | /* Now we must make a new guess, between lo and hi. We know that lo | |
299 | * is good; but we're not so sure about hi here. We know that hi > | |
300 | * lo, so this will find an approximate midpoint, greater than lo and | |
301 | * no more than hi. | |
302 | */ | |
303 | if (pp->f & F_VERBOSE) moan("probe returned: guessing higher"); | |
88510d86 MW |
304 | mtu += (hi - lo + 1)/2; |
305 | break; | |
306 | ||
307 | case RC_LOWER: | |
308 | lower: | |
d245350a MW |
309 | /* If this didn't work, and we're already at the bottom of our |
310 | * possible range, then something has gone horribly wrong. | |
311 | */ | |
312 | assert(lo < mtu); | |
313 | hi = mtu - 1; | |
314 | if (lo == hi) { | |
88510d86 | 315 | if (pp->f & F_VERBOSE) moan("error returned: found correct MTU"); |
d245350a | 316 | mtu = lo; |
88510d86 MW |
317 | goto done; |
318 | } | |
d245350a MW |
319 | |
320 | /* We must make a new guess, between lo and hi. We're probably | |
321 | * fairly sure that lo will succeed, since either it's the minimum | |
322 | * MTU or we've tested it already; but we're not quite sure about hi, | |
323 | * so we want to aim high. | |
324 | */ | |
88510d86 | 325 | if (pp->f & F_VERBOSE) moan("error returned: guessing lower"); |
88510d86 MW |
326 | mtu -= (hi - lo + 1)/2; |
327 | break; | |
328 | ||
329 | default: | |
330 | if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate"); | |
331 | mtu = hi = rc; | |
332 | break; | |
333 | } | |
c64d8cd5 | 334 | } |
88510d86 MW |
335 | |
336 | done: | |
337 | /* Clean up and return our result. */ | |
338 | pp->pops->finish(st); | |
339 | xfree(st); | |
c64d8cd5 MW |
340 | close(sk); |
341 | return (mtu); | |
342 | ||
88510d86 MW |
343 | fail_3: |
344 | pp->pops->finish(st); | |
345 | fail_2: | |
346 | xfree(st); | |
c64d8cd5 MW |
347 | fail_1: |
348 | close(sk); | |
349 | fail_0: | |
350 | return (-1); | |
351 | } | |
352 | ||
88510d86 MW |
353 | /*----- Doing it the hard way ---------------------------------------------*/ |
354 | ||
355 | #if defined(linux) || defined(__OpenBSD__) | |
356 | #define IPHDR_SANE | |
357 | #endif | |
358 | ||
359 | #ifdef IPHDR_SANE | |
360 | # define sane_htons htons | |
361 | # define sane_htonl htonl | |
c64d8cd5 | 362 | #else |
88510d86 MW |
363 | # define sane_htons |
364 | # define sane_htonl | |
365 | #endif | |
366 | ||
367 | static int rawicmp = -1, rawudp = -1, rawerr = 0; | |
368 | ||
369 | #define IPCK_INIT 0xffff | |
370 | ||
371 | /* Compute an IP checksum over some data. This is a restartable interface: | |
372 | * initialize A to `IPCK_INIT' for the first call. | |
373 | */ | |
374 | static unsigned ipcksum(const void *buf, size_t n, unsigned a) | |
375 | { | |
376 | unsigned long aa = a ^ 0xffff; | |
377 | const unsigned char *p = buf, *l = p + n; | |
378 | ||
379 | while (p < l - 1) { aa += LOAD16_B(p); p += 2; } | |
380 | if (p < l) { aa += (unsigned)(*p) << 8; } | |
381 | do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000); | |
382 | return (aa == 0xffff ? aa : aa ^ 0xffff); | |
383 | } | |
384 | ||
385 | /* TCP/UDP pseudoheader structure. */ | |
386 | struct phdr { | |
387 | struct in_addr ph_src, ph_dst; | |
388 | u_char ph_z, ph_p; | |
389 | u_short ph_len; | |
390 | }; | |
391 | ||
392 | struct raw_state { | |
393 | struct sockaddr_in me, sin; | |
394 | int sk, rawicmp, rawudp; | |
395 | unsigned q; | |
396 | }; | |
397 | ||
398 | static int raw_setup(void *stv, int sk, const struct param *pp) | |
399 | { | |
400 | struct raw_state *st = stv; | |
cb160b86 | 401 | socklen_t sz; |
88510d86 MW |
402 | int i, mtu = -1; |
403 | struct ifaddrs *ifa, *ifaa, *ifap; | |
404 | struct ifreq ifr; | |
405 | ||
406 | /* If we couldn't acquire raw sockets, we fail here. */ | |
407 | if (rawerr) { errno = rawerr; goto fail_0; } | |
408 | st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk; | |
409 | ||
410 | /* Initialize the sequence number. */ | |
411 | st->q = rand() & 0xffff; | |
412 | ||
413 | /* Snaffle the local and remote address and port number. */ | |
414 | st->sin = pp->sin; | |
415 | sz = sizeof(st->me); | |
416 | if (getsockname(sk, (struct sockaddr *)&st->me, &sz)) | |
417 | goto fail_0; | |
418 | ||
419 | /* There isn't a portable way to force the DF flag onto a packet through | |
420 | * UDP, or even through raw IP, unless we write the entire IP header | |
421 | * ourselves. This is somewhat annoying, especially since we have an | |
422 | * uphill struggle keeping track of which systems randomly expect which | |
423 | * header fields to be presented in host byte order. Oh, well. | |
424 | */ | |
425 | i = 1; | |
426 | if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0; | |
427 | ||
428 | /* Find an upper bound on the MTU. Do two passes over the interface | |
429 | * list. If we can find matches for our local address then use the | |
430 | * highest one of those; otherwise do a second pass and simply take the | |
431 | * highest MTU of any network interface. | |
432 | */ | |
433 | if (getifaddrs(&ifaa)) goto fail_0; | |
434 | for (i = 0; i < 2; i++) { | |
435 | for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) { | |
436 | if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr || | |
437 | ifa->ifa_addr->sa_family != AF_INET || | |
438 | (i == 0 && | |
439 | ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr != | |
440 | st->me.sin_addr.s_addr) || | |
441 | (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) || | |
442 | strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name)) | |
443 | continue; | |
444 | ifap = ifa; | |
445 | strcpy(ifr.ifr_name, ifa->ifa_name); | |
446 | if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1; | |
447 | if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu; | |
448 | } | |
449 | if (mtu > 0) break; | |
450 | } | |
451 | if (mtu < 0) { errno = ENOTCONN; goto fail_1; } | |
452 | freeifaddrs(ifaa); | |
453 | ||
454 | /* Done. */ | |
455 | return (mtu); | |
456 | ||
457 | fail_1: | |
458 | freeifaddrs(ifaa); | |
459 | fail_0: | |
460 | return (-1); | |
461 | } | |
462 | ||
463 | static void raw_finish(void *stv) { ; } | |
464 | ||
465 | static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
466 | { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); } | |
467 | ||
468 | static int raw_xmit(void *stv, int mtu) | |
469 | { | |
470 | struct raw_state *st = stv; | |
471 | unsigned char b[65536], *p; | |
472 | struct ip *ip; | |
473 | struct udphdr *udp; | |
474 | struct phdr ph; | |
475 | unsigned ck; | |
476 | ||
477 | /* Build the IP header. */ | |
478 | ip = (struct ip *)b; | |
479 | ip->ip_v = 4; | |
480 | ip->ip_hl = sizeof(*ip)/4; | |
481 | ip->ip_tos = IPTOS_RELIABILITY; | |
482 | ip->ip_len = sane_htons(mtu); | |
483 | STEP(st->q); ip->ip_id = htons(st->q); | |
484 | ip->ip_off = sane_htons(0 | IP_DF); | |
485 | ip->ip_ttl = 64; | |
486 | ip->ip_p = IPPROTO_UDP; | |
487 | ip->ip_sum = 0; | |
488 | ip->ip_src = st->me.sin_addr; | |
489 | ip->ip_dst = st->sin.sin_addr; | |
490 | ||
491 | /* Build a UDP packet in the output buffer. */ | |
492 | udp = (struct udphdr *)(ip + 1); | |
493 | udp->uh_sport = st->me.sin_port; | |
494 | udp->uh_dport = st->sin.sin_port; | |
495 | udp->uh_ulen = htons(mtu - sizeof(*ip)); | |
496 | udp->uh_sum = 0; | |
497 | ||
498 | /* Copy the payload. */ | |
499 | p = (unsigned char *)(udp + 1); | |
500 | memcpy(p, buf, mtu - (p - b)); | |
501 | ||
502 | /* Calculate the UDP checksum. */ | |
503 | ph.ph_src = ip->ip_src; | |
504 | ph.ph_dst = ip->ip_dst; | |
505 | ph.ph_z = 0; | |
506 | ph.ph_p = IPPROTO_UDP; | |
507 | ph.ph_len = udp->uh_ulen; | |
508 | ck = IPCK_INIT; | |
509 | ck = ipcksum(&ph, sizeof(ph), ck); | |
510 | ck = ipcksum(udp, mtu - sizeof(*ip), ck); | |
511 | udp->uh_sum = htons(ck); | |
512 | ||
513 | /* Send the whole thing off. If we're too big for the interface then we | |
514 | * might need to trim immediately. | |
515 | */ | |
516 | if (sendto(st->rawudp, b, mtu, 0, | |
517 | (struct sockaddr *)&st->sin, sizeof(st->sin)) < 0) { | |
518 | if (errno == EMSGSIZE) return (RC_LOWER); | |
519 | else goto fail_0; | |
520 | } | |
521 | ||
522 | /* Done. */ | |
523 | return (RC_OK); | |
524 | ||
525 | fail_0: | |
526 | return (RC_FAIL); | |
527 | } | |
528 | ||
529 | static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
530 | { | |
531 | struct raw_state *st = stv; | |
532 | unsigned char b[65536]; | |
533 | struct ip *ip; | |
534 | struct icmp *icmp; | |
535 | struct udphdr *udp; | |
536 | ssize_t n; | |
537 | ||
538 | /* An ICMP packet: see what's inside. */ | |
539 | if (FD_ISSET(st->rawicmp, fd_in)) { | |
540 | if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0; | |
541 | ||
542 | ip = (struct ip *)b; | |
543 | if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) || | |
544 | ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP) | |
545 | goto skip_icmp; | |
546 | n -= sizeof(4*ip->ip_hl); | |
547 | ||
548 | icmp = (struct icmp *)(b + 4*ip->ip_hl); | |
549 | if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH) | |
550 | goto skip_icmp; | |
551 | n -= offsetof(struct icmp, icmp_ip); | |
552 | ||
553 | ip = &icmp->icmp_ip; | |
554 | if (n < sizeof(*ip) || | |
555 | ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 || | |
556 | ip->ip_id != htons(st->q) || | |
557 | ip->ip_src.s_addr != st->me.sin_addr.s_addr || | |
558 | ip->ip_dst.s_addr != st->sin.sin_addr.s_addr) | |
559 | goto skip_icmp; | |
560 | n -= sizeof(*ip); | |
561 | ||
562 | udp = (struct udphdr *)(ip + 1); | |
563 | if (n < sizeof(udp) || udp->uh_sport != st->me.sin_port || | |
564 | udp->uh_dport != st->sin.sin_port) | |
565 | goto skip_icmp; | |
566 | n -= sizeof(*udp); | |
567 | ||
568 | if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER); | |
569 | else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp; | |
570 | else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu)); | |
571 | else return (RC_LOWER); | |
572 | } | |
573 | skip_icmp:; | |
574 | ||
575 | /* If we got a reply to the current probe then we're good. If we got an | |
576 | * error, or the packet's sequence number is wrong, then ignore it. | |
577 | */ | |
578 | if (FD_ISSET(st->sk, fd_in)) { | |
579 | if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK); | |
580 | else if (mypacketp(ps, b, n)) return (RC_HIGHER); | |
581 | else return (RC_OK); | |
582 | } | |
583 | ||
584 | return (RC_OK); | |
585 | ||
586 | fail_0: | |
587 | return (RC_FAIL); | |
588 | } | |
589 | ||
590 | static const struct probe_ops raw_ops = { | |
591 | "raw", OPS_CHAIN, sizeof(struct raw_state), | |
592 | raw_setup, raw_finish, | |
593 | raw_selprep, raw_xmit, raw_selproc | |
594 | }; | |
595 | ||
596 | #undef OPS_CHAIN | |
597 | #define OPS_CHAIN &raw_ops | |
598 | ||
599 | /*----- Doing the job on Linux --------------------------------------------*/ | |
600 | ||
601 | #if defined(linux) | |
602 | ||
603 | #ifndef IP_MTU | |
604 | # define IP_MTU 14 /* Blech! */ | |
605 | #endif | |
606 | ||
607 | struct linux_state { | |
608 | int sk; | |
609 | }; | |
610 | ||
611 | static int linux_setup(void *stv, int sk, const struct param *pp) | |
612 | { | |
613 | struct linux_state *st = stv; | |
614 | int i, mtu; | |
cb160b86 | 615 | socklen_t sz; |
88510d86 MW |
616 | |
617 | /* Snaffle the UDP socket. */ | |
618 | st->sk = sk; | |
619 | ||
620 | /* Turn on kernel path-MTU discovery and force DF on. */ | |
18d5f6eb | 621 | i = IP_PMTUDISC_PROBE; |
88510d86 MW |
622 | if (setsockopt(st->sk, IPPROTO_IP, IP_MTU_DISCOVER, &i, sizeof(i))) |
623 | return (-1); | |
624 | ||
625 | /* Read the initial MTU guess back and report it. */ | |
626 | sz = sizeof(mtu); | |
627 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
628 | return (-1); | |
629 | ||
630 | /* Done. */ | |
631 | return (mtu); | |
632 | } | |
633 | ||
634 | static void linux_finish(void *stv) { ; } | |
635 | ||
636 | static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
637 | { struct linux_state *st = stv; ADDFD(st->sk); } | |
638 | ||
639 | static int linux_xmit(void *stv, int mtu) | |
640 | { | |
641 | struct linux_state *st = stv; | |
642 | ||
643 | /* Write the packet. */ | |
644 | if (write(st->sk, buf, mtu - 28) >= 0) return (RC_OK); | |
645 | else if (errno == EMSGSIZE) return (RC_LOWER); | |
646 | else return (RC_FAIL); | |
647 | } | |
648 | ||
649 | static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
650 | { | |
651 | struct linux_state *st = stv; | |
652 | int mtu; | |
cb160b86 | 653 | socklen_t sz; |
88510d86 MW |
654 | ssize_t n; |
655 | unsigned char b[65536]; | |
656 | ||
657 | /* Read an answer. If it looks like the right kind of error then report a | |
658 | * success. This is potentially wrong, since we can't tell whether an | |
659 | * error was delayed from an earlier probe. However, we never return | |
660 | * RC_LOWER from this method, so the packet sizes ought to be monotonically | |
661 | * decreasing and this won't cause trouble. Otherwise update from the | |
662 | * kernel's idea of the right MTU. | |
663 | */ | |
664 | if (FD_ISSET(st->sk, fd_in)) { | |
665 | n = read(st->sk, &buf, sizeof(buf)); | |
666 | if (n >= 0 ? | |
667 | mypacketp(ps, b, n) : | |
668 | errno == ECONNREFUSED || errno == EHOSTUNREACH) | |
669 | return (RC_HIGHER); | |
670 | sz = sizeof(mtu); | |
671 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
672 | return (RC_FAIL); | |
673 | return (mtu); | |
674 | } | |
675 | return (RC_OK); | |
676 | } | |
677 | ||
678 | static const struct probe_ops linux_ops = { | |
679 | "linux", OPS_CHAIN, sizeof(struct linux_state), | |
680 | linux_setup, linux_finish, | |
681 | linux_selprep, linux_xmit, linux_selproc | |
682 | }; | |
c64d8cd5 | 683 | |
88510d86 MW |
684 | #undef OPS_CHAIN |
685 | #define OPS_CHAIN &linux_ops | |
c64d8cd5 MW |
686 | |
687 | #endif | |
688 | ||
689 | /*----- Help options ------------------------------------------------------*/ | |
690 | ||
88510d86 MW |
691 | static const struct probe_ops *probe_ops = OPS_CHAIN; |
692 | ||
c64d8cd5 MW |
693 | static void version(FILE *fp) |
694 | { pquis(fp, "$, TrIPE version " VERSION "\n"); } | |
695 | ||
696 | static void usage(FILE *fp) | |
88510d86 MW |
697 | { |
698 | pquis(fp, "Usage: $ [-H HEADER] [-m METHOD]\n\ | |
699 | [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n"); | |
700 | } | |
c64d8cd5 MW |
701 | |
702 | static void help(FILE *fp) | |
703 | { | |
88510d86 MW |
704 | const struct probe_ops *ops; |
705 | ||
c64d8cd5 MW |
706 | version(fp); |
707 | fputc('\n', fp); | |
708 | usage(fp); | |
709 | fputs("\ | |
710 | \n\ | |
711 | Options in full:\n\ | |
712 | \n\ | |
713 | -h, --help Show this help text.\n\ | |
714 | -v, --version Show version number.\n\ | |
715 | -u, --usage Show brief usage message.\n\ | |
716 | \n\ | |
88510d86 MW |
717 | -g, --growth=FACTOR Growth factor for retransmit interval.\n\ |
718 | -m, --method=METHOD Use METHOD to probe for MTU.\n\ | |
719 | -r, --retransmit=SECS Retransmit if no reply after SEC.\n\ | |
720 | -t, --timeout=SECS Give up expecting a reply after SECS.\n\ | |
c64d8cd5 | 721 | -H, --header=HEX Packet header, in hexadecimal.\n\ |
88510d86 MW |
722 | \n\ |
723 | Probe methods:\n\ | |
c64d8cd5 | 724 | ", fp); |
88510d86 MW |
725 | for (ops = probe_ops; ops; ops = ops->next) |
726 | printf("\t%s\n", ops->name); | |
c64d8cd5 MW |
727 | } |
728 | ||
729 | /*----- Main code ---------------------------------------------------------*/ | |
730 | ||
731 | int main(int argc, char *argv[]) | |
732 | { | |
88510d86 | 733 | struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN }; |
c64d8cd5 MW |
734 | hex_ctx hc; |
735 | dstr d = DSTR_INIT; | |
736 | size_t sz; | |
737 | int i; | |
738 | unsigned long u; | |
739 | char *q; | |
740 | struct hostent *h; | |
741 | struct servent *s; | |
c64d8cd5 MW |
742 | unsigned f = 0; |
743 | ||
744 | #define f_bogus 1u | |
745 | ||
88510d86 MW |
746 | if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 || |
747 | (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0) | |
748 | rawerr = errno; | |
749 | if (setuid(getuid())) | |
750 | abort(); | |
751 | ||
c64d8cd5 MW |
752 | ego(argv[0]); |
753 | fillbuffer(buf, sizeof(buf)); | |
88510d86 | 754 | pp.sin.sin_port = htons(7); |
c64d8cd5 MW |
755 | |
756 | for (;;) { | |
757 | static const struct option opts[] = { | |
758 | { "help", 0, 0, 'h' }, | |
88510d86 | 759 | { "version", 0, 0, 'V' }, |
c64d8cd5 MW |
760 | { "usage", 0, 0, 'u' }, |
761 | { "header", OPTF_ARGREQ, 0, 'H' }, | |
88510d86 MW |
762 | { "growth", OPTF_ARGREQ, 0, 'g' }, |
763 | { "method", OPTF_ARGREQ, 0, 'm' }, | |
764 | { "retransmit", OPTF_ARGREQ, 0, 'r' }, | |
c64d8cd5 | 765 | { "timeout", OPTF_ARGREQ, 0, 't' }, |
88510d86 | 766 | { "verbose", 0, 0, 'v' }, |
c64d8cd5 MW |
767 | { 0, 0, 0, 0 } |
768 | }; | |
769 | ||
88510d86 | 770 | i = mdwopt(argc, argv, "hVu" "H:g:m:r:t:v", opts, 0, 0, 0); |
c64d8cd5 MW |
771 | if (i < 0) break; |
772 | switch (i) { | |
773 | case 'h': help(stdout); exit(0); | |
88510d86 | 774 | case 'V': version(stdout); exit(0); |
c64d8cd5 MW |
775 | case 'u': usage(stdout); exit(0); |
776 | ||
777 | case 'H': | |
778 | DRESET(&d); | |
779 | hex_init(&hc); | |
780 | hex_decode(&hc, optarg, strlen(optarg), &d); | |
781 | hex_decode(&hc, 0, 0, &d); | |
88510d86 | 782 | sz = d.len < 532 ? d.len : 532; |
c64d8cd5 | 783 | memcpy(buf, d.buf, sz); |
88510d86 | 784 | pp.seqoff = sz; |
c64d8cd5 MW |
785 | break; |
786 | ||
88510d86 MW |
787 | case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break; |
788 | case 'r': pp.retx = s2f(optarg, "retransmit interval"); break; | |
789 | case 't': pp.timeout = s2f(optarg, "timeout"); break; | |
790 | ||
791 | case 'm': | |
792 | for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next) | |
793 | if (strcmp(pp.pops->name, optarg) == 0) goto found_alg; | |
794 | die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg); | |
795 | found_alg: | |
c64d8cd5 MW |
796 | break; |
797 | ||
88510d86 MW |
798 | case 'v': pp.f |= F_VERBOSE; break; |
799 | ||
c64d8cd5 MW |
800 | default: |
801 | f |= f_bogus; | |
802 | break; | |
803 | } | |
804 | } | |
805 | argv += optind; argc -= optind; | |
806 | if ((f & f_bogus) || 1 > argc || argc > 2) { | |
807 | usage(stderr); | |
808 | exit(EXIT_FAILURE); | |
809 | } | |
810 | ||
811 | if ((h = gethostbyname(*argv)) == 0) | |
812 | die(EXIT_FAILURE, "unknown host `%s': %s", *argv, hstrerror(h_errno)); | |
813 | if (h->h_addrtype != AF_INET) | |
814 | die(EXIT_FAILURE, "unsupported address family for host `%s'", *argv); | |
88510d86 | 815 | memcpy(&pp.sin.sin_addr, h->h_addr, sizeof(struct in_addr)); |
c64d8cd5 MW |
816 | argv++; argc--; |
817 | ||
818 | if (*argv) { | |
819 | errno = 0; | |
820 | u = strtoul(*argv, &q, 0); | |
821 | if (!errno && !*q) | |
88510d86 | 822 | pp.sin.sin_port = htons(u); |
c64d8cd5 MW |
823 | else if ((s = getservbyname(*argv, "udp")) == 0) |
824 | die(EXIT_FAILURE, "unknown UDP service `%s'", *argv); | |
825 | else | |
88510d86 | 826 | pp.sin.sin_port = s->s_port; |
c64d8cd5 MW |
827 | } |
828 | ||
88510d86 MW |
829 | pp.sin.sin_family = AF_INET; |
830 | i = pathmtu(&pp); | |
c64d8cd5 MW |
831 | if (i < 0) |
832 | die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno)); | |
833 | printf("%d\n", i); | |
834 | if (ferror(stdout) || fflush(stdout) || fclose(stdout)) | |
835 | die(EXIT_FAILURE, "failed to write result: %s", strerror(errno)); | |
836 | return (0); | |
837 | } | |
838 | ||
839 | /*----- That's all, folks -------------------------------------------------*/ |