+# define sane_htons
+# define sane_htonl
+#endif
+
+static int rawicmp = -1, rawudp = -1, rawerr = 0;
+
+#define IPCK_INIT 0xffff
+
+/* Compute an IP checksum over some data. This is a restartable interface:
+ * initialize A to `IPCK_INIT' for the first call.
+ */
+static unsigned ipcksum(const void *buf, size_t n, unsigned a)
+{
+ unsigned long aa = a ^ 0xffff;
+ const unsigned char *p = buf, *l = p + n;
+
+ while (p < l - 1) { aa += LOAD16_B(p); p += 2; }
+ if (p < l) { aa += (unsigned)(*p) << 8; }
+ do aa = (aa & 0xffff) + (aa >> 16); while (aa >= 0x10000);
+ return (aa == 0xffff ? aa : aa ^ 0xffff);
+}
+
+/* TCP/UDP pseudoheader structure. */
+struct phdr {
+ struct in_addr ph_src, ph_dst;
+ u_char ph_z, ph_p;
+ u_short ph_len;
+};
+
+struct raw_state {
+ struct sockaddr_in me, sin;
+ int sk, rawicmp, rawudp;
+ unsigned q;
+};
+
+static int raw_setup(void *stv, int sk, const struct param *pp)
+{
+ struct raw_state *st = stv;
+ socklen_t sz;
+ int i, mtu = -1;
+ struct ifaddrs *ifa, *ifaa, *ifap;
+ struct ifreq ifr;
+
+ /* If we couldn't acquire raw sockets, we fail here. */
+ if (rawerr) { errno = rawerr; goto fail_0; }
+ st->rawicmp = rawicmp; st->rawudp = rawudp; st->sk = sk;
+
+ /* Initialize the sequence number. */
+ st->q = rand() & 0xffff;
+
+ /* Snaffle the local and remote address and port number. */
+ st->sin = pp->sin;
+ sz = sizeof(st->me);
+ if (getsockname(sk, (struct sockaddr *)&st->me, &sz))
+ goto fail_0;
+
+ /* There isn't a portable way to force the DF flag onto a packet through
+ * UDP, or even through raw IP, unless we write the entire IP header
+ * ourselves. This is somewhat annoying, especially since we have an
+ * uphill struggle keeping track of which systems randomly expect which
+ * header fields to be presented in host byte order. Oh, well.
+ */
+ i = 1;
+ if (setsockopt(rawudp, IPPROTO_IP, IP_HDRINCL, &i, sizeof(i))) goto fail_0;
+
+ /* Find an upper bound on the MTU. Do two passes over the interface
+ * list. If we can find matches for our local address then use the
+ * highest one of those; otherwise do a second pass and simply take the
+ * highest MTU of any network interface.
+ */
+ if (getifaddrs(&ifaa)) goto fail_0;
+ for (i = 0; i < 2; i++) {
+ for (ifap = 0, ifa = ifaa; ifa; ifa = ifa->ifa_next) {
+ if (!(ifa->ifa_flags & IFF_UP) || !ifa->ifa_addr ||
+ ifa->ifa_addr->sa_family != AF_INET ||
+ (i == 0 &&
+ ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr !=
+ st->me.sin_addr.s_addr) ||
+ (i == 1 && ifap && strcmp(ifap->ifa_name, ifa->ifa_name) == 0) ||
+ strlen(ifa->ifa_name) >= sizeof(ifr.ifr_name))
+ continue;
+ ifap = ifa;
+ strcpy(ifr.ifr_name, ifa->ifa_name);
+ if (ioctl(sk, SIOCGIFMTU, &ifr)) goto fail_1;
+ if (mtu < ifr.ifr_mtu) mtu = ifr.ifr_mtu;
+ }
+ if (mtu > 0) break;
+ }
+ if (mtu < 0) { errno = ENOTCONN; goto fail_1; }
+ freeifaddrs(ifaa);
+
+ /* Done. */
+ return (mtu);
+
+fail_1:
+ freeifaddrs(ifaa);
+fail_0:
+ return (-1);
+}
+
+static void raw_finish(void *stv) { ; }
+
+static void raw_selprep(void *stv, int *maxfd, fd_set *fd_in)
+ { struct raw_state *st = stv; ADDFD(st->sk); ADDFD(st->rawicmp); }
+
+static int raw_xmit(void *stv, int mtu)
+{
+ struct raw_state *st = stv;
+ unsigned char b[65536], *p;
+ struct ip *ip;
+ struct udphdr *udp;
+ struct phdr ph;
+ unsigned ck;
+
+ /* Build the IP header. */
+ ip = (struct ip *)b;
+ ip->ip_v = 4;
+ ip->ip_hl = sizeof(*ip)/4;
+ ip->ip_tos = IPTOS_RELIABILITY;
+ ip->ip_len = sane_htons(mtu);
+ STEP(st->q); ip->ip_id = htons(st->q);
+ ip->ip_off = sane_htons(0 | IP_DF);
+ ip->ip_ttl = 64;
+ ip->ip_p = IPPROTO_UDP;
+ ip->ip_sum = 0;
+ ip->ip_src = st->me.sin_addr;
+ ip->ip_dst = st->sin.sin_addr;
+
+ /* Build a UDP packet in the output buffer. */
+ udp = (struct udphdr *)(ip + 1);
+ udp->uh_sport = st->me.sin_port;
+ udp->uh_dport = st->sin.sin_port;
+ udp->uh_ulen = htons(mtu - sizeof(*ip));
+ udp->uh_sum = 0;
+
+ /* Copy the payload. */
+ p = (unsigned char *)(udp + 1);
+ memcpy(p, buf, mtu - (p - b));
+
+ /* Calculate the UDP checksum. */
+ ph.ph_src = ip->ip_src;
+ ph.ph_dst = ip->ip_dst;
+ ph.ph_z = 0;
+ ph.ph_p = IPPROTO_UDP;
+ ph.ph_len = udp->uh_ulen;
+ ck = IPCK_INIT;
+ ck = ipcksum(&ph, sizeof(ph), ck);
+ ck = ipcksum(udp, mtu - sizeof(*ip), ck);
+ udp->uh_sum = htons(ck);
+
+ /* Send the whole thing off. If we're too big for the interface then we
+ * might need to trim immediately.
+ */
+ if (sendto(st->rawudp, b, mtu, 0,
+ (struct sockaddr *)&st->sin, sizeof(st->sin)) < 0) {
+ if (errno == EMSGSIZE) return (RC_LOWER);
+ else goto fail_0;
+ }
+
+ /* Done. */
+ return (RC_OK);
+
+fail_0:
+ return (RC_FAIL);
+}
+
+static int raw_selproc(void *stv, fd_set *fd_in, struct probestate *ps)
+{
+ struct raw_state *st = stv;
+ unsigned char b[65536];
+ struct ip *ip;
+ struct icmp *icmp;
+ struct udphdr *udp;
+ ssize_t n;
+
+ /* An ICMP packet: see what's inside. */
+ if (FD_ISSET(st->rawicmp, fd_in)) {
+ if ((n = read(st->rawicmp, b, sizeof(b))) < 0) goto fail_0;
+
+ ip = (struct ip *)b;
+ if (n < sizeof(*ip) || n < sizeof(4*ip->ip_hl) ||
+ ip->ip_v != 4 || ip->ip_p != IPPROTO_ICMP)
+ goto skip_icmp;
+ n -= sizeof(4*ip->ip_hl);
+
+ icmp = (struct icmp *)(b + 4*ip->ip_hl);
+ if (n < sizeof(*icmp) || icmp->icmp_type != ICMP_UNREACH)
+ goto skip_icmp;
+ n -= offsetof(struct icmp, icmp_ip);
+
+ ip = &icmp->icmp_ip;
+ if (n < sizeof(*ip) ||
+ ip->ip_p != IPPROTO_UDP || ip->ip_hl != sizeof(*ip)/4 ||
+ ip->ip_id != htons(st->q) ||
+ ip->ip_src.s_addr != st->me.sin_addr.s_addr ||
+ ip->ip_dst.s_addr != st->sin.sin_addr.s_addr)
+ goto skip_icmp;
+ n -= sizeof(*ip);
+
+ udp = (struct udphdr *)(ip + 1);
+ if (n < sizeof(udp) || udp->uh_sport != st->me.sin_port ||
+ udp->uh_dport != st->sin.sin_port)
+ goto skip_icmp;
+ n -= sizeof(*udp);
+
+ if (icmp->icmp_code == ICMP_UNREACH_PORT) return (RC_HIGHER);
+ else if (icmp->icmp_code != ICMP_UNREACH_NEEDFRAG) goto skip_icmp;
+ else if (icmp->icmp_nextmtu) return (htons(icmp->icmp_nextmtu));
+ else return (RC_LOWER);
+ }
+skip_icmp:;
+
+ /* If we got a reply to the current probe then we're good. If we got an
+ * error, or the packet's sequence number is wrong, then ignore it.
+ */
+ if (FD_ISSET(st->sk, fd_in)) {
+ if ((n = read(st->sk, b, sizeof(b))) < 0) return (RC_OK);
+ else if (mypacketp(ps, b, n)) return (RC_HIGHER);
+ else return (RC_OK);
+ }
+
+ return (RC_OK);
+
+fail_0:
+ return (RC_FAIL);
+}
+
+static const struct probe_ops raw_ops = {
+ "raw", OPS_CHAIN, sizeof(struct raw_state),
+ raw_setup, raw_finish,
+ raw_selprep, raw_xmit, raw_selproc
+};
+
+#undef OPS_CHAIN
+#define OPS_CHAIN &raw_ops
+
+/*----- Doing the job on Linux --------------------------------------------*/
+
+#if defined(linux)
+
+#ifndef IP_MTU
+# define IP_MTU 14 /* Blech! */
+#endif
+
+struct linux_state {
+ int sk;
+};