Commit | Line | Data |
---|---|---|
c64d8cd5 MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Report MTU on path to specified host | |
4 | * | |
5 | * (c) 2008 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of Trivial IP Encryption (TrIPE). | |
11 | * | |
12 | * TrIPE is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or | |
15 | * (at your option) any later version. | |
16 | * | |
17 | * TrIPE is distributed in the hope that it will be useful, | |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU General Public License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU General Public License | |
23 | * along with TrIPE; if not, write to the Free Software Foundation, | |
24 | * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
25 | */ | |
26 | ||
27 | /*----- Header files ------------------------------------------------------*/ | |
28 | ||
88510d86 MW |
29 | #if defined(linux) |
30 | # define _BSD_SOURCE | |
31 | #endif | |
32 | ||
c64d8cd5 MW |
33 | #include "config.h" |
34 | ||
d245350a | 35 | #include <assert.h> |
c64d8cd5 | 36 | #include <errno.h> |
88510d86 | 37 | #include <stddef.h> |
c64d8cd5 MW |
38 | #include <stdio.h> |
39 | #include <stdlib.h> | |
40 | #include <string.h> | |
41 | #include <time.h> | |
42 | ||
43 | #include <sys/types.h> | |
44 | #include <sys/time.h> | |
45 | #include <unistd.h> | |
46 | ||
47 | #include <sys/socket.h> | |
48 | #include <netinet/in.h> | |
49 | #include <arpa/inet.h> | |
50 | #include <netdb.h> | |
51 | ||
88510d86 MW |
52 | #include <netinet/in_systm.h> |
53 | #include <netinet/ip.h> | |
54 | #include <netinet/ip_icmp.h> | |
55 | #include <netinet/udp.h> | |
56 | ||
57 | #include <net/if.h> | |
58 | #include <ifaddrs.h> | |
59 | #include <sys/ioctl.h> | |
60 | ||
61 | #include <mLib/alloc.h> | |
62 | #include <mLib/bits.h> | |
c64d8cd5 MW |
63 | #include <mLib/dstr.h> |
64 | #include <mLib/hex.h> | |
65 | #include <mLib/mdwopt.h> | |
66 | #include <mLib/quis.h> | |
67 | #include <mLib/report.h> | |
68 | #include <mLib/tv.h> | |
69 | ||
70 | /*----- Static variables --------------------------------------------------*/ | |
71 | ||
72 | static unsigned char buf[65536]; | |
73 | ||
88510d86 MW |
74 | #define POLY 0x1002d |
75 | ||
c64d8cd5 MW |
76 | /*----- Utility functions -------------------------------------------------*/ |
77 | ||
88510d86 MW |
78 | /* Step a value according to a simple LFSR. */ |
79 | #define STEP(q) \ | |
80 | do (q) = ((q) & 0x8000) ? ((q) << 1) ^ POLY : ((q) << 1); while (0) | |
81 | ||
c64d8cd5 MW |
82 | /* Fill buffer with a constant but pseudorandom string. Uses a simple |
83 | * LFSR. | |
84 | */ | |
85 | static void fillbuffer(unsigned char *p, size_t sz) | |
86 | { | |
87 | unsigned int y = 0xbc20; | |
88 | const unsigned char *l = p + sz; | |
89 | int i; | |
c64d8cd5 MW |
90 | |
91 | while (p < l) { | |
92 | *p++ = y & 0xff; | |
88510d86 | 93 | for (i = 0; i < 8; i++) STEP(y); |
c64d8cd5 MW |
94 | } |
95 | } | |
96 | ||
88510d86 MW |
97 | /* Convert a string to floating point. */ |
98 | static double s2f(const char *s, const char *what) | |
99 | { | |
100 | double f; | |
101 | char *q; | |
c64d8cd5 | 102 | |
88510d86 MW |
103 | errno = 0; |
104 | f = strtod(s, &q); | |
105 | if (errno || *q) die(EXIT_FAILURE, "bad %s", what); | |
106 | return (f); | |
107 | } | |
c64d8cd5 | 108 | |
88510d86 MW |
109 | /* Convert a floating-point value into a struct timeval. */ |
110 | static void f2tv(struct timeval *tv, double t) | |
111 | { tv->tv_sec = t; tv->tv_usec = (t - tv->tv_sec)*MILLION; } | |
112 | ||
113 | /*----- Main algorithm skeleton -------------------------------------------*/ | |
114 | ||
115 | struct param { | |
116 | unsigned f; /* Various flags */ | |
117 | #define F_VERBOSE 1u /* Give a running commentary */ | |
118 | double retx; /* Initial retransmit interval */ | |
119 | double regr; /* Retransmit growth factor */ | |
120 | double timeout; /* Retransmission timeout */ | |
121 | int seqoff; /* Offset to write sequence number */ | |
122 | const struct probe_ops *pops; /* Probe algorithm description */ | |
123 | struct sockaddr_in sin; /* Destination address */ | |
124 | }; | |
125 | ||
126 | struct probestate { | |
127 | const struct param *pp; | |
128 | unsigned q; | |
129 | }; | |
130 | ||
131 | struct probe_ops { | |
132 | const char *name; | |
133 | const struct probe_ops *next; | |
134 | size_t statesz; | |
135 | int (*setup)(void *, int, const struct param *); | |
136 | void (*finish)(void *); | |
137 | void (*selprep)(void *, int *, fd_set *); | |
138 | int (*xmit)(void *, int); | |
139 | int (*selproc)(void *, fd_set *, struct probestate *); | |
140 | }; | |
141 | ||
142 | #define OPS_CHAIN 0 | |
143 | ||
144 | enum { | |
145 | RC_FAIL = -99, | |
146 | RC_OK = 0, | |
147 | RC_LOWER = -1, | |
148 | RC_HIGHER = -2, | |
149 | RC_NOREPLY = -3 | |
150 | /* or a positive MTU upper-bound */ | |
151 | }; | |
152 | ||
153 | /* Add a file descriptor FD to the set `fd_in', updating `*maxfd'. */ | |
154 | #define ADDFD(fd) \ | |
155 | do { FD_SET(fd, fd_in); if (*maxfd < fd) *maxfd = fd; } while (0) | |
156 | ||
157 | /* Check whether a buffer contains a packet from our current probe. */ | |
158 | static int mypacketp(struct probestate *ps, | |
159 | const unsigned char *p, size_t sz) | |
160 | { | |
161 | const struct param *pp = ps->pp; | |
c64d8cd5 | 162 | |
88510d86 MW |
163 | return (sz >= pp->seqoff + 2 && LOAD16(p + pp->seqoff) == ps->q); |
164 | } | |
165 | ||
166 | /* See whether MTU is an acceptable MTU value. Return an appropriate | |
167 | * RC_... code or a new suggested MTU. | |
168 | */ | |
169 | static int probe(struct probestate *ps, void *st, int mtu) | |
c64d8cd5 | 170 | { |
88510d86 | 171 | const struct param *pp = ps->pp; |
c64d8cd5 | 172 | fd_set fd_in; |
88510d86 MW |
173 | struct timeval tv, now, when, done; |
174 | double timer = pp->retx; | |
175 | int rc, maxfd; | |
176 | ||
177 | /* Set up the first retransmit and give-up timers. */ | |
178 | gettimeofday(&now, 0); | |
179 | f2tv(&tv, pp->timeout); TV_ADD(&done, &now, &tv); | |
180 | f2tv(&tv, timer); TV_ADD(&when, &now, &tv); | |
181 | if (TV_CMP(&when, >, &done)) when = done; | |
182 | ||
183 | /* Send the initial probe. */ | |
184 | if (pp->f & F_VERBOSE) | |
185 | moan("sending probe of size %d (seq = %04x)", mtu, ps->q); | |
186 | STEP(ps->q); | |
187 | STORE16(buf + pp->seqoff, ps->q); | |
188 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
189 | ||
190 | for (;;) { | |
191 | ||
192 | /* Wait for something interesting to happen. */ | |
193 | maxfd = 0; FD_ZERO(&fd_in); | |
194 | pp->pops->selprep(st, &maxfd, &fd_in); | |
195 | TV_SUB(&tv, &when, &now); | |
196 | if (select(maxfd + 1, &fd_in, 0, 0, &tv) < 0) return (RC_FAIL); | |
197 | gettimeofday(&now, 0); | |
198 | ||
199 | /* See whether the probe method has any answers for us. */ | |
200 | if ((rc = pp->pops->selproc(st, &fd_in, ps)) != RC_OK) return (rc); | |
201 | ||
202 | /* If we've waited too long, give up. If we should retransmit, do | |
203 | * that. | |
204 | */ | |
205 | if (TV_CMP(&now, >, &done)) | |
206 | return (RC_NOREPLY); | |
207 | else if (TV_CMP(&now, >, &when)) { | |
208 | if (pp->f & F_VERBOSE) moan("re-sending probe of size %d", mtu); | |
209 | if ((rc = pp->pops->xmit(st, mtu)) != RC_OK) return (rc); | |
210 | do { | |
211 | timer *= pp->regr; f2tv(&tv, timer); TV_ADD(&when, &when, &tv); | |
212 | } while (TV_CMP(&when, <, &now)); | |
213 | if (TV_CMP(&when, >, &done)) when = done; | |
214 | } | |
215 | } | |
216 | } | |
c64d8cd5 | 217 | |
88510d86 MW |
218 | /* Discover the path MTU to the destination address. */ |
219 | static int pathmtu(const struct param *pp) | |
220 | { | |
221 | int sk; | |
222 | int mtu, lo, hi; | |
223 | int rc, droppy = -1; | |
224 | void *st; | |
225 | struct probestate ps; | |
226 | ||
227 | /* Build and connect a UDP socket. We'll need this to know the local port | |
228 | * number to use if nothing else. Set other stuff up. | |
229 | */ | |
c64d8cd5 | 230 | if ((sk = socket(PF_INET, SOCK_DGRAM, 0)) < 0) goto fail_0; |
88510d86 MW |
231 | if (connect(sk, (struct sockaddr *)&pp->sin, sizeof(pp->sin))) goto fail_1; |
232 | st = xmalloc(pp->pops->statesz); | |
233 | if ((mtu = pp->pops->setup(st, sk, pp)) < 0) goto fail_2; | |
234 | ps.pp = pp; ps.q = rand() & 0xffff; | |
235 | lo = 576; hi = mtu; | |
236 | ||
237 | /* And now we do a thing which is sort of like a binary search, except that | |
238 | * we also take explicit clues as establishing a new upper bound, and we | |
239 | * try to hug that initially. | |
240 | */ | |
c64d8cd5 | 241 | for (;;) { |
d245350a MW |
242 | assert(lo <= mtu && mtu <= hi); |
243 | if (pp->f & F_VERBOSE) moan("probe: %d <= %d <= %d", lo, mtu, hi); | |
88510d86 MW |
244 | rc = probe(&ps, st, mtu); |
245 | switch (rc) { | |
246 | ||
247 | case RC_FAIL: | |
248 | if (pp->f & F_VERBOSE) moan("probe failed"); | |
249 | goto fail_3; | |
250 | ||
251 | case RC_NOREPLY: | |
252 | /* If we've not seen a dropped packet before then we don't know what | |
253 | * this means yet -- in particular, we don't know which bit of the | |
254 | * network is swallowing packets. Send a minimum-size probe. If | |
255 | * that doesn't come back then assume that the remote host is | |
256 | * swallowing our packets. If it does, then we assume that dropped | |
257 | * packets are a result of ICMP fragmentation-needed reports being | |
258 | * lost or suppressed. | |
259 | */ | |
260 | if (pp->f & F_VERBOSE) moan("gave up: black hole detected"); | |
261 | if (droppy == -1) { | |
262 | if (pp->f & F_VERBOSE) moan("sending minimum-size probe"); | |
263 | switch (probe(&ps, st, lo)) { | |
264 | case RC_FAIL: | |
265 | goto fail_3; | |
266 | case RC_NOREPLY: | |
267 | if (pp->f & F_VERBOSE) { | |
268 | moan("no reply from min-size probe: " | |
269 | "assume black hole at target"); | |
270 | } | |
271 | droppy = 1; | |
272 | break; | |
273 | case RC_HIGHER: | |
274 | if (pp->f & F_VERBOSE) { | |
275 | moan("reply from min-size probe OK: " | |
276 | "assume black hole in network"); | |
277 | } | |
278 | droppy = 0; | |
279 | break; | |
280 | default: | |
281 | if (pp->f & F_VERBOSE) | |
282 | moan("unexpected return code from probe"); | |
283 | errno = ENOTCONN; | |
284 | goto fail_3; | |
285 | } | |
286 | } | |
287 | ||
288 | if (droppy) goto higher; else goto lower; | |
289 | ||
290 | case RC_HIGHER: | |
291 | higher: | |
292 | if (droppy == -1) { | |
293 | if (pp->f & F_VERBOSE) | |
294 | moan("probe returned: remote host is not a black hole"); | |
295 | droppy = 0; | |
296 | } | |
297 | if (mtu == hi) { | |
298 | if (pp->f & F_VERBOSE) moan("probe returned: found correct MTU"); | |
299 | goto done; | |
300 | } | |
88510d86 | 301 | lo = mtu; |
d245350a MW |
302 | |
303 | /* Now we must make a new guess, between lo and hi. We know that lo | |
304 | * is good; but we're not so sure about hi here. We know that hi > | |
305 | * lo, so this will find an approximate midpoint, greater than lo and | |
306 | * no more than hi. | |
307 | */ | |
308 | if (pp->f & F_VERBOSE) moan("probe returned: guessing higher"); | |
88510d86 MW |
309 | mtu += (hi - lo + 1)/2; |
310 | break; | |
311 | ||
312 | case RC_LOWER: | |
313 | lower: | |
d245350a MW |
314 | /* If this didn't work, and we're already at the bottom of our |
315 | * possible range, then something has gone horribly wrong. | |
316 | */ | |
317 | assert(lo < mtu); | |
318 | hi = mtu - 1; | |
319 | if (lo == hi) { | |
88510d86 | 320 | if (pp->f & F_VERBOSE) moan("error returned: found correct MTU"); |
d245350a | 321 | mtu = lo; |
88510d86 MW |
322 | goto done; |
323 | } | |
d245350a MW |
324 | |
325 | /* We must make a new guess, between lo and hi. We're probably | |
326 | * fairly sure that lo will succeed, since either it's the minimum | |
327 | * MTU or we've tested it already; but we're not quite sure about hi, | |
328 | * so we want to aim high. | |
329 | */ | |
88510d86 | 330 | if (pp->f & F_VERBOSE) moan("error returned: guessing lower"); |
88510d86 MW |
331 | mtu -= (hi - lo + 1)/2; |
332 | break; | |
333 | ||
334 | default: | |
335 | if (pp->f & F_VERBOSE) moan("error returned with new MTU estimate"); | |
336 | mtu = hi = rc; | |
337 | break; | |
338 | } | |
c64d8cd5 | 339 | } |
88510d86 MW |
340 | |
341 | done: | |
342 | /* Clean up and return our result. */ | |
343 | pp->pops->finish(st); | |
344 | xfree(st); | |
c64d8cd5 MW |
345 | close(sk); |
346 | return (mtu); | |
347 | ||
88510d86 MW |
348 | fail_3: |
349 | pp->pops->finish(st); | |
350 | fail_2: | |
351 | xfree(st); | |
c64d8cd5 MW |
352 | fail_1: |
353 | close(sk); | |
354 | fail_0: | |
355 | return (-1); | |
356 | } | |
357 | ||
88510d86 MW |
358 | /*----- Doing it the hard way ---------------------------------------------*/ |
359 | ||
360 | #if defined(linux) || defined(__OpenBSD__) | |
361 | #define IPHDR_SANE | |
362 | #endif | |
363 | ||
364 | #ifdef IPHDR_SANE | |
365 | # define sane_htons htons | |
366 | # define sane_htonl htonl | |
c64d8cd5 | 367 | #else |
88510d86 MW |
368 | # define sane_htons |
369 | # define sane_htonl | |
370 | #endif | |
371 | ||
372 | static int rawicmp = -1, rawudp = -1, rawerr = 0; | |
373 | ||
374 | #define IPCK_INIT 0xffff | |
375 | ||
376 | /* Compute an IP checksum over some data. This is a restartable interface: | |
377 | * initialize A to `IPCK_INIT' for the first call. | |
378 | */ | |
379 | static unsigned ipcksum(const void *buf, size_t n, unsigned a) | |
380 | { | |
381 | unsigned long aa = a ^ 0xffff; | |
382 | const unsigned char *p = buf, *l = p + n; | |
383 | ||
384 | while (p < l - 1) { aa += LOAD16_B(p); p += 2; } | |
385 | if (p < l) { aa += (unsigned)(*p) << 8; } | |
386 | do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000); | |
387 | return (aa == 0xffff ? aa : aa ^ 0xffff); | |
388 | } | |
389 | ||
390 | /* TCP/UDP pseudoheader structure. */ | |
391 | struct phdr { | |
392 | struct in_addr ph_src, ph_dst; | |
393 | u_char ph_z, ph_p; | |
394 | u_short ph_len; | |
395 | }; | |
396 | ||
397 | struct raw_state { | |
398 | struct sockaddr_in me, sin; | |
399 | int sk, rawicmp, rawudp; | |
400 | unsigned q; | |
401 | }; | |
402 | ||
403 | static int raw_setup(void *stv, int sk, const struct param *pp) | |
404 | { | |
405 | struct raw_state *st = stv; | |
406 | size_t sz; | |
407 | int i, mtu = -1; | |
408 | struct ifaddrs *ifa, *ifaa, *ifap; | |
409 | struct ifreq ifr; | |
410 | ||
411 | /* If we couldn't acquire raw sockets, we fail here. */ | |
412 | if (rawerr) { errno = rawerr; goto fail_0; } | |
413 | st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk; | |
414 | ||
415 | /* Initialize the sequence number. */ | |
416 | st->q = rand() & 0xffff; | |
417 | ||
418 | /* Snaffle the local and remote address and port number. */ | |
419 | st->sin = pp->sin; | |
420 | sz = sizeof(st->me); | |
421 | if (getsockname(sk, (struct sockaddr *)&st->me, &sz)) | |
422 | goto fail_0; | |
423 | ||
424 | /* There isn't a portable way to force the DF flag onto a packet through | |
425 | * UDP, or even through raw IP, unless we write the entire IP header | |
426 | * ourselves. This is somewhat annoying, especially since we have an | |
427 | * uphill struggle keeping track of which systems randomly expect which | |
428 | * header fields to be presented in host byte order. Oh, well. | |
429 | */ | |
430 | i = 1; | |
431 | if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0; | |
432 | ||
433 | /* Find an upper bound on the MTU. Do two passes over the interface | |
434 | * list. If we can find matches for our local address then use the | |
435 | * highest one of those; otherwise do a second pass and simply take the | |
436 | * highest MTU of any network interface. | |
437 | */ | |
438 | if (getifaddrs(&ifaa)) goto fail_0; | |
439 | for (i = 0; i < 2; i++) { | |
440 | for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) { | |
441 | if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr || | |
442 | ifa->ifa_addr->sa_family != AF_INET || | |
443 | (i == 0 && | |
444 | ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr != | |
445 | st->me.sin_addr.s_addr) || | |
446 | (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) || | |
447 | strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name)) | |
448 | continue; | |
449 | ifap = ifa; | |
450 | strcpy(ifr.ifr_name, ifa->ifa_name); | |
451 | if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1; | |
452 | if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu; | |
453 | } | |
454 | if (mtu > 0) break; | |
455 | } | |
456 | if (mtu < 0) { errno = ENOTCONN; goto fail_1; } | |
457 | freeifaddrs(ifaa); | |
458 | ||
459 | /* Done. */ | |
460 | return (mtu); | |
461 | ||
462 | fail_1: | |
463 | freeifaddrs(ifaa); | |
464 | fail_0: | |
465 | return (-1); | |
466 | } | |
467 | ||
468 | static void raw_finish(void *stv) { ; } | |
469 | ||
470 | static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
471 | { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); } | |
472 | ||
473 | static int raw_xmit(void *stv, int mtu) | |
474 | { | |
475 | struct raw_state *st = stv; | |
476 | unsigned char b[65536], *p; | |
477 | struct ip *ip; | |
478 | struct udphdr *udp; | |
479 | struct phdr ph; | |
480 | unsigned ck; | |
481 | ||
482 | /* Build the IP header. */ | |
483 | ip = (struct ip *)b; | |
484 | ip->ip_v = 4; | |
485 | ip->ip_hl = sizeof(*ip)/4; | |
486 | ip->ip_tos = IPTOS_RELIABILITY; | |
487 | ip->ip_len = sane_htons(mtu); | |
488 | STEP(st->q); ip->ip_id = htons(st->q); | |
489 | ip->ip_off = sane_htons(0 | IP_DF); | |
490 | ip->ip_ttl = 64; | |
491 | ip->ip_p = IPPROTO_UDP; | |
492 | ip->ip_sum = 0; | |
493 | ip->ip_src = st->me.sin_addr; | |
494 | ip->ip_dst = st->sin.sin_addr; | |
495 | ||
496 | /* Build a UDP packet in the output buffer. */ | |
497 | udp = (struct udphdr *)(ip + 1); | |
498 | udp->uh_sport = st->me.sin_port; | |
499 | udp->uh_dport = st->sin.sin_port; | |
500 | udp->uh_ulen = htons(mtu - sizeof(*ip)); | |
501 | udp->uh_sum = 0; | |
502 | ||
503 | /* Copy the payload. */ | |
504 | p = (unsigned char *)(udp + 1); | |
505 | memcpy(p, buf, mtu - (p - b)); | |
506 | ||
507 | /* Calculate the UDP checksum. */ | |
508 | ph.ph_src = ip->ip_src; | |
509 | ph.ph_dst = ip->ip_dst; | |
510 | ph.ph_z = 0; | |
511 | ph.ph_p = IPPROTO_UDP; | |
512 | ph.ph_len = udp->uh_ulen; | |
513 | ck = IPCK_INIT; | |
514 | ck = ipcksum(&ph, sizeof(ph), ck); | |
515 | ck = ipcksum(udp, mtu - sizeof(*ip), ck); | |
516 | udp->uh_sum = htons(ck); | |
517 | ||
518 | /* Send the whole thing off. If we're too big for the interface then we | |
519 | * might need to trim immediately. | |
520 | */ | |
521 | if (sendto(st->rawudp, b, mtu, 0, | |
522 | (struct sockaddr *)&st->sin, sizeof(st->sin)) < 0) { | |
523 | if (errno == EMSGSIZE) return (RC_LOWER); | |
524 | else goto fail_0; | |
525 | } | |
526 | ||
527 | /* Done. */ | |
528 | return (RC_OK); | |
529 | ||
530 | fail_0: | |
531 | return (RC_FAIL); | |
532 | } | |
533 | ||
534 | static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
535 | { | |
536 | struct raw_state *st = stv; | |
537 | unsigned char b[65536]; | |
538 | struct ip *ip; | |
539 | struct icmp *icmp; | |
540 | struct udphdr *udp; | |
541 | ssize_t n; | |
542 | ||
543 | /* An ICMP packet: see what's inside. */ | |
544 | if (FD_ISSET(st->rawicmp, fd_in)) { | |
545 | if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0; | |
546 | ||
547 | ip = (struct ip *)b; | |
548 | if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) || | |
549 | ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP) | |
550 | goto skip_icmp; | |
551 | n -= sizeof(4*ip->ip_hl); | |
552 | ||
553 | icmp = (struct icmp *)(b + 4*ip->ip_hl); | |
554 | if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH) | |
555 | goto skip_icmp; | |
556 | n -= offsetof(struct icmp, icmp_ip); | |
557 | ||
558 | ip = &icmp->icmp_ip; | |
559 | if (n < sizeof(*ip) || | |
560 | ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 || | |
561 | ip->ip_id != htons(st->q) || | |
562 | ip->ip_src.s_addr != st->me.sin_addr.s_addr || | |
563 | ip->ip_dst.s_addr != st->sin.sin_addr.s_addr) | |
564 | goto skip_icmp; | |
565 | n -= sizeof(*ip); | |
566 | ||
567 | udp = (struct udphdr *)(ip + 1); | |
568 | if (n < sizeof(udp) || udp->uh_sport != st->me.sin_port || | |
569 | udp->uh_dport != st->sin.sin_port) | |
570 | goto skip_icmp; | |
571 | n -= sizeof(*udp); | |
572 | ||
573 | if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER); | |
574 | else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp; | |
575 | else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu)); | |
576 | else return (RC_LOWER); | |
577 | } | |
578 | skip_icmp:; | |
579 | ||
580 | /* If we got a reply to the current probe then we're good. If we got an | |
581 | * error, or the packet's sequence number is wrong, then ignore it. | |
582 | */ | |
583 | if (FD_ISSET(st->sk, fd_in)) { | |
584 | if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK); | |
585 | else if (mypacketp(ps, b, n)) return (RC_HIGHER); | |
586 | else return (RC_OK); | |
587 | } | |
588 | ||
589 | return (RC_OK); | |
590 | ||
591 | fail_0: | |
592 | return (RC_FAIL); | |
593 | } | |
594 | ||
595 | static const struct probe_ops raw_ops = { | |
596 | "raw", OPS_CHAIN, sizeof(struct raw_state), | |
597 | raw_setup, raw_finish, | |
598 | raw_selprep, raw_xmit, raw_selproc | |
599 | }; | |
600 | ||
601 | #undef OPS_CHAIN | |
602 | #define OPS_CHAIN &raw_ops | |
603 | ||
604 | /*----- Doing the job on Linux --------------------------------------------*/ | |
605 | ||
606 | #if defined(linux) | |
607 | ||
608 | #ifndef IP_MTU | |
609 | # define IP_MTU 14 /* Blech! */ | |
610 | #endif | |
611 | ||
612 | struct linux_state { | |
613 | int sk; | |
614 | }; | |
615 | ||
616 | static int linux_setup(void *stv, int sk, const struct param *pp) | |
617 | { | |
618 | struct linux_state *st = stv; | |
619 | int i, mtu; | |
620 | size_t sz; | |
621 | ||
622 | /* Snaffle the UDP socket. */ | |
623 | st->sk = sk; | |
624 | ||
625 | /* Turn on kernel path-MTU discovery and force DF on. */ | |
18d5f6eb | 626 | i = IP_PMTUDISC_PROBE; |
88510d86 MW |
627 | if (setsockopt(st->sk, IPPROTO_IP, IP_MTU_DISCOVER, &i, sizeof(i))) |
628 | return (-1); | |
629 | ||
630 | /* Read the initial MTU guess back and report it. */ | |
631 | sz = sizeof(mtu); | |
632 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
633 | return (-1); | |
634 | ||
635 | /* Done. */ | |
636 | return (mtu); | |
637 | } | |
638 | ||
639 | static void linux_finish(void *stv) { ; } | |
640 | ||
641 | static void linux_selprep(void *stv, int *maxfd, fd_set *fd_in) | |
642 | { struct linux_state *st = stv; ADDFD(st->sk); } | |
643 | ||
644 | static int linux_xmit(void *stv, int mtu) | |
645 | { | |
646 | struct linux_state *st = stv; | |
647 | ||
648 | /* Write the packet. */ | |
649 | if (write(st->sk, buf, mtu - 28) >= 0) return (RC_OK); | |
650 | else if (errno == EMSGSIZE) return (RC_LOWER); | |
651 | else return (RC_FAIL); | |
652 | } | |
653 | ||
654 | static int linux_selproc(void *stv, fd_set *fd_in, struct probestate *ps) | |
655 | { | |
656 | struct linux_state *st = stv; | |
657 | int mtu; | |
658 | size_t sz; | |
659 | ssize_t n; | |
660 | unsigned char b[65536]; | |
661 | ||
662 | /* Read an answer. If it looks like the right kind of error then report a | |
663 | * success. This is potentially wrong, since we can't tell whether an | |
664 | * error was delayed from an earlier probe. However, we never return | |
665 | * RC_LOWER from this method, so the packet sizes ought to be monotonically | |
666 | * decreasing and this won't cause trouble. Otherwise update from the | |
667 | * kernel's idea of the right MTU. | |
668 | */ | |
669 | if (FD_ISSET(st->sk, fd_in)) { | |
670 | n = read(st->sk, &buf, sizeof(buf)); | |
671 | if (n >= 0 ? | |
672 | mypacketp(ps, b, n) : | |
673 | errno == ECONNREFUSED || errno == EHOSTUNREACH) | |
674 | return (RC_HIGHER); | |
675 | sz = sizeof(mtu); | |
676 | if (getsockopt(st->sk, IPPROTO_IP, IP_MTU, &mtu, &sz)) | |
677 | return (RC_FAIL); | |
678 | return (mtu); | |
679 | } | |
680 | return (RC_OK); | |
681 | } | |
682 | ||
683 | static const struct probe_ops linux_ops = { | |
684 | "linux", OPS_CHAIN, sizeof(struct linux_state), | |
685 | linux_setup, linux_finish, | |
686 | linux_selprep, linux_xmit, linux_selproc | |
687 | }; | |
c64d8cd5 | 688 | |
88510d86 MW |
689 | #undef OPS_CHAIN |
690 | #define OPS_CHAIN &linux_ops | |
c64d8cd5 MW |
691 | |
692 | #endif | |
693 | ||
694 | /*----- Help options ------------------------------------------------------*/ | |
695 | ||
88510d86 MW |
696 | static const struct probe_ops *probe_ops = OPS_CHAIN; |
697 | ||
c64d8cd5 MW |
698 | static void version(FILE *fp) |
699 | { pquis(fp, "$, TrIPE version " VERSION "\n"); } | |
700 | ||
701 | static void usage(FILE *fp) | |
88510d86 MW |
702 | { |
703 | pquis(fp, "Usage: $ [-H HEADER] [-m METHOD]\n\ | |
704 | [-r SECS] [-g FACTOR] [-t SECS] HOST [PORT]\n"); | |
705 | } | |
c64d8cd5 MW |
706 | |
707 | static void help(FILE *fp) | |
708 | { | |
88510d86 MW |
709 | const struct probe_ops *ops; |
710 | ||
c64d8cd5 MW |
711 | version(fp); |
712 | fputc('\n', fp); | |
713 | usage(fp); | |
714 | fputs("\ | |
715 | \n\ | |
716 | Options in full:\n\ | |
717 | \n\ | |
718 | -h, --help Show this help text.\n\ | |
719 | -v, --version Show version number.\n\ | |
720 | -u, --usage Show brief usage message.\n\ | |
721 | \n\ | |
88510d86 MW |
722 | -g, --growth=FACTOR Growth factor for retransmit interval.\n\ |
723 | -m, --method=METHOD Use METHOD to probe for MTU.\n\ | |
724 | -r, --retransmit=SECS Retransmit if no reply after SEC.\n\ | |
725 | -t, --timeout=SECS Give up expecting a reply after SECS.\n\ | |
c64d8cd5 | 726 | -H, --header=HEX Packet header, in hexadecimal.\n\ |
88510d86 MW |
727 | \n\ |
728 | Probe methods:\n\ | |
c64d8cd5 | 729 | ", fp); |
88510d86 MW |
730 | for (ops = probe_ops; ops; ops = ops->next) |
731 | printf("\t%s\n", ops->name); | |
c64d8cd5 MW |
732 | } |
733 | ||
734 | /*----- Main code ---------------------------------------------------------*/ | |
735 | ||
736 | int main(int argc, char *argv[]) | |
737 | { | |
88510d86 | 738 | struct param pp = { 0, 0.333, 3.0, 8.0, 0, OPS_CHAIN }; |
c64d8cd5 MW |
739 | hex_ctx hc; |
740 | dstr d = DSTR_INIT; | |
741 | size_t sz; | |
742 | int i; | |
743 | unsigned long u; | |
744 | char *q; | |
745 | struct hostent *h; | |
746 | struct servent *s; | |
c64d8cd5 MW |
747 | unsigned f = 0; |
748 | ||
749 | #define f_bogus 1u | |
750 | ||
88510d86 MW |
751 | if ((rawicmp = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0 || |
752 | (rawudp = socket(PF_INET, SOCK_RAW, IPPROTO_UDP)) < 0) | |
753 | rawerr = errno; | |
754 | if (setuid(getuid())) | |
755 | abort(); | |
756 | ||
c64d8cd5 MW |
757 | ego(argv[0]); |
758 | fillbuffer(buf, sizeof(buf)); | |
88510d86 | 759 | pp.sin.sin_port = htons(7); |
c64d8cd5 MW |
760 | |
761 | for (;;) { | |
762 | static const struct option opts[] = { | |
763 | { "help", 0, 0, 'h' }, | |
88510d86 | 764 | { "version", 0, 0, 'V' }, |
c64d8cd5 MW |
765 | { "usage", 0, 0, 'u' }, |
766 | { "header", OPTF_ARGREQ, 0, 'H' }, | |
88510d86 MW |
767 | { "growth", OPTF_ARGREQ, 0, 'g' }, |
768 | { "method", OPTF_ARGREQ, 0, 'm' }, | |
769 | { "retransmit", OPTF_ARGREQ, 0, 'r' }, | |
c64d8cd5 | 770 | { "timeout", OPTF_ARGREQ, 0, 't' }, |
88510d86 | 771 | { "verbose", 0, 0, 'v' }, |
c64d8cd5 MW |
772 | { 0, 0, 0, 0 } |
773 | }; | |
774 | ||
88510d86 | 775 | i = mdwopt(argc, argv, "hVu" "H:g:m:r:t:v", opts, 0, 0, 0); |
c64d8cd5 MW |
776 | if (i < 0) break; |
777 | switch (i) { | |
778 | case 'h': help(stdout); exit(0); | |
88510d86 | 779 | case 'V': version(stdout); exit(0); |
c64d8cd5 MW |
780 | case 'u': usage(stdout); exit(0); |
781 | ||
782 | case 'H': | |
783 | DRESET(&d); | |
784 | hex_init(&hc); | |
785 | hex_decode(&hc, optarg, strlen(optarg), &d); | |
786 | hex_decode(&hc, 0, 0, &d); | |
88510d86 | 787 | sz = d.len < 532 ? d.len : 532; |
c64d8cd5 | 788 | memcpy(buf, d.buf, sz); |
88510d86 | 789 | pp.seqoff = sz; |
c64d8cd5 MW |
790 | break; |
791 | ||
88510d86 MW |
792 | case 'g': pp.regr = s2f(optarg, "retransmit growth factor"); break; |
793 | case 'r': pp.retx = s2f(optarg, "retransmit interval"); break; | |
794 | case 't': pp.timeout = s2f(optarg, "timeout"); break; | |
795 | ||
796 | case 'm': | |
797 | for (pp.pops = OPS_CHAIN; pp.pops; pp.pops = pp.pops->next) | |
798 | if (strcmp(pp.pops->name, optarg) == 0) goto found_alg; | |
799 | die(EXIT_FAILURE, "unknown probe algorithm `%s'", optarg); | |
800 | found_alg: | |
c64d8cd5 MW |
801 | break; |
802 | ||
88510d86 MW |
803 | case 'v': pp.f |= F_VERBOSE; break; |
804 | ||
c64d8cd5 MW |
805 | default: |
806 | f |= f_bogus; | |
807 | break; | |
808 | } | |
809 | } | |
810 | argv += optind; argc -= optind; | |
811 | if ((f & f_bogus) || 1 > argc || argc > 2) { | |
812 | usage(stderr); | |
813 | exit(EXIT_FAILURE); | |
814 | } | |
815 | ||
816 | if ((h = gethostbyname(*argv)) == 0) | |
817 | die(EXIT_FAILURE, "unknown host `%s': %s", *argv, hstrerror(h_errno)); | |
818 | if (h->h_addrtype != AF_INET) | |
819 | die(EXIT_FAILURE, "unsupported address family for host `%s'", *argv); | |
88510d86 | 820 | memcpy(&pp.sin.sin_addr, h->h_addr, sizeof(struct in_addr)); |
c64d8cd5 MW |
821 | argv++; argc--; |
822 | ||
823 | if (*argv) { | |
824 | errno = 0; | |
825 | u = strtoul(*argv, &q, 0); | |
826 | if (!errno && !*q) | |
88510d86 | 827 | pp.sin.sin_port = htons(u); |
c64d8cd5 MW |
828 | else if ((s = getservbyname(*argv, "udp")) == 0) |
829 | die(EXIT_FAILURE, "unknown UDP service `%s'", *argv); | |
830 | else | |
88510d86 | 831 | pp.sin.sin_port = s->s_port; |
c64d8cd5 MW |
832 | } |
833 | ||
88510d86 MW |
834 | pp.sin.sin_family = AF_INET; |
835 | i = pathmtu(&pp); | |
c64d8cd5 MW |
836 | if (i < 0) |
837 | die(EXIT_FAILURE, "failed to discover MTU: %s", strerror(errno)); | |
838 | printf("%d\n", i); | |
839 | if (ferror(stdout) || fflush(stdout) || fclose(stdout)) | |
840 | die(EXIT_FAILURE, "failed to write result: %s", strerror(errno)); | |
841 | return (0); | |
842 | } | |
843 | ||
844 | /*----- That's all, folks -------------------------------------------------*/ |