| 1 | /* User-kernel network link */ |
| 2 | |
| 3 | /* See RFCs 791, 792, 1123 and 1812 */ |
| 4 | |
| 5 | /* The netlink device is actually a router. Tunnels are unnumbered |
| 6 | point-to-point lines (RFC1812 section 2.2.7); the router has a |
| 7 | single address (the 'router-id'). */ |
| 8 | |
| 9 | /* This is where we currently have the anti-spoofing paranoia - before |
| 10 | sending a packet to the kernel we check that the tunnel it came |
| 11 | over could reasonably have produced it. */ |
| 12 | |
| 13 | |
| 14 | /* Points to note from RFC1812 (which may require changes in this |
| 15 | file): |
| 16 | |
| 17 | 3.3.4 Maximum Transmission Unit - MTU |
| 18 | |
| 19 | The MTU of each logical interface MUST be configurable within the |
| 20 | range of legal MTUs for the interface. |
| 21 | |
| 22 | Many Link Layer protocols define a maximum frame size that may be |
| 23 | sent. In such cases, a router MUST NOT allow an MTU to be set which |
| 24 | would allow sending of frames larger than those allowed by the Link |
| 25 | Layer protocol. However, a router SHOULD be willing to receive a |
| 26 | packet as large as the maximum frame size even if that is larger than |
| 27 | the MTU. |
| 28 | |
| 29 | 4.2.1 A router SHOULD count datagrams discarded. |
| 30 | |
| 31 | 4.2.2.1 Source route options - we probably should implement processing |
| 32 | of source routes, even though mostly the security policy will prevent |
| 33 | their use. |
| 34 | |
| 35 | 5.3.13.4 Source Route Options |
| 36 | |
| 37 | A router MUST implement support for source route options in forwarded |
| 38 | packets. A router MAY implement a configuration option that, when |
| 39 | enabled, causes all source-routed packets to be discarded. However, |
| 40 | such an option MUST NOT be enabled by default. |
| 41 | |
| 42 | 5.3.13.5 Record Route Option |
| 43 | |
| 44 | Routers MUST support the Record Route option in forwarded packets. |
| 45 | |
| 46 | A router MAY provide a configuration option that, if enabled, will |
| 47 | cause the router to ignore (i.e., pass through unchanged) Record |
| 48 | Route options in forwarded packets. If provided, such an option MUST |
| 49 | default to enabling the record-route. This option should not affect |
| 50 | the processing of Record Route options in datagrams received by the |
| 51 | router itself (in particular, Record Route options in ICMP echo |
| 52 | requests will still be processed according to Section [4.3.3.6]). |
| 53 | |
| 54 | 5.3.13.6 Timestamp Option |
| 55 | |
| 56 | Routers MUST support the timestamp option in forwarded packets. A |
| 57 | timestamp value MUST follow the rules given [INTRO:2]. |
| 58 | |
| 59 | If the flags field = 3 (timestamp and prespecified address), the |
| 60 | router MUST add its timestamp if the next prespecified address |
| 61 | matches any of the router's IP addresses. It is not necessary that |
| 62 | the prespecified address be either the address of the interface on |
| 63 | which the packet arrived or the address of the interface over which |
| 64 | it will be sent. |
| 65 | |
| 66 | |
| 67 | 4.2.2.7 Fragmentation: RFC 791 Section 3.2 |
| 68 | |
| 69 | Fragmentation, as described in [INTERNET:1], MUST be supported by a |
| 70 | router. |
| 71 | |
| 72 | 4.2.2.8 Reassembly: RFC 791 Section 3.2 |
| 73 | |
| 74 | As specified in the corresponding section of [INTRO:2], a router MUST |
| 75 | support reassembly of datagrams that it delivers to itself. |
| 76 | |
| 77 | 4.2.2.9 Time to Live: RFC 791 Section 3.2 |
| 78 | |
| 79 | Note in particular that a router MUST NOT check the TTL of a packet |
| 80 | except when forwarding it. |
| 81 | |
| 82 | A router MUST NOT discard a datagram just because it was received |
| 83 | with TTL equal to zero or one; if it is to the router and otherwise |
| 84 | valid, the router MUST attempt to receive it. |
| 85 | |
| 86 | On messages the router originates, the IP layer MUST provide a means |
| 87 | for the transport layer to set the TTL field of every datagram that |
| 88 | is sent. When a fixed TTL value is used, it MUST be configurable. |
| 89 | |
| 90 | |
| 91 | 8.1 The Simple Network Management Protocol - SNMP |
| 92 | 8.1.1 SNMP Protocol Elements |
| 93 | |
| 94 | Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate |
| 95 | using UDP/IP as its transport and network protocols. |
| 96 | |
| 97 | |
| 98 | */ |
| 99 | |
| 100 | #include <string.h> |
| 101 | #include <assert.h> |
| 102 | #include <limits.h> |
| 103 | #include "secnet.h" |
| 104 | #include "util.h" |
| 105 | #include "ipaddr.h" |
| 106 | #include "netlink.h" |
| 107 | #include "process.h" |
| 108 | |
| 109 | #ifdef NETLINK_DEBUG |
| 110 | #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__) |
| 111 | #else /* !NETLINK_DEBUG */ |
| 112 | #define MDEBUG(...) ((void)0) |
| 113 | #endif /* !NETLINK_DEBUG */ |
| 114 | |
| 115 | #define ICMP_TYPE_ECHO_REPLY 0 |
| 116 | |
| 117 | #define ICMP_TYPE_UNREACHABLE 3 |
| 118 | #define ICMP_CODE_NET_UNREACHABLE 0 |
| 119 | #define ICMP_CODE_PROTOCOL_UNREACHABLE 2 |
| 120 | #define ICMP_CODE_FRAGMENTATION_REQUIRED 4 |
| 121 | #define ICMP_CODE_NET_PROHIBITED 13 |
| 122 | |
| 123 | #define ICMP_TYPE_ECHO_REQUEST 8 |
| 124 | |
| 125 | #define ICMP_TYPE_TIME_EXCEEDED 11 |
| 126 | #define ICMP_CODE_TTL_EXCEEDED 0 |
| 127 | |
| 128 | /* Generic IP checksum routine */ |
| 129 | static inline uint16_t ip_csum(const uint8_t *iph,int32_t count) |
| 130 | { |
| 131 | register uint32_t sum=0; |
| 132 | |
| 133 | while (count>1) { |
| 134 | sum+=ntohs(*(uint16_t *)iph); |
| 135 | iph+=2; |
| 136 | count-=2; |
| 137 | } |
| 138 | if(count>0) |
| 139 | sum+=*(uint8_t *)iph; |
| 140 | while (sum>>16) |
| 141 | sum=(sum&0xffff)+(sum>>16); |
| 142 | return htons(~sum); |
| 143 | } |
| 144 | |
| 145 | #ifdef i386 |
| 146 | /* |
| 147 | * This is a version of ip_compute_csum() optimized for IP headers, |
| 148 | * which always checksum on 4 octet boundaries. |
| 149 | * |
| 150 | * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by |
| 151 | * Arnt Gulbrandsen. |
| 152 | */ |
| 153 | static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) { |
| 154 | uint32_t sum; |
| 155 | |
| 156 | __asm__ __volatile__( |
| 157 | "movl (%1), %0 ;\n" |
| 158 | "subl $4, %2 ;\n" |
| 159 | "jbe 2f ;\n" |
| 160 | "addl 4(%1), %0 ;\n" |
| 161 | "adcl 8(%1), %0 ;\n" |
| 162 | "adcl 12(%1), %0 ;\n" |
| 163 | "1: adcl 16(%1), %0 ;\n" |
| 164 | "lea 4(%1), %1 ;\n" |
| 165 | "decl %2 ;\n" |
| 166 | "jne 1b ;\n" |
| 167 | "adcl $0, %0 ;\n" |
| 168 | "movl %0, %2 ;\n" |
| 169 | "shrl $16, %0 ;\n" |
| 170 | "addw %w2, %w0 ;\n" |
| 171 | "adcl $0, %0 ;\n" |
| 172 | "notl %0 ;\n" |
| 173 | "2: ;\n" |
| 174 | /* Since the input registers which are loaded with iph and ipl |
| 175 | are modified, we must also specify them as outputs, or gcc |
| 176 | will assume they contain their original values. */ |
| 177 | : "=r" (sum), "=r" (iph), "=r" (ihl) |
| 178 | : "1" (iph), "2" (ihl) |
| 179 | : "memory"); |
| 180 | return sum; |
| 181 | } |
| 182 | #else |
| 183 | static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) |
| 184 | { |
| 185 | assert(ihl < INT_MAX/4); |
| 186 | return ip_csum(iph,ihl*4); |
| 187 | } |
| 188 | #endif |
| 189 | |
| 190 | struct iphdr { |
| 191 | #if defined (WORDS_BIGENDIAN) |
| 192 | uint8_t version:4, |
| 193 | ihl:4; |
| 194 | #else |
| 195 | uint8_t ihl:4, |
| 196 | version:4; |
| 197 | #endif |
| 198 | uint8_t tos; |
| 199 | uint16_t tot_len; |
| 200 | uint16_t id; |
| 201 | uint16_t frag; |
| 202 | #define IPHDR_FRAG_OFF ((uint16_t)0x1fff) |
| 203 | #define IPHDR_FRAG_MORE ((uint16_t)0x2000) |
| 204 | #define IPHDR_FRAG_DONT ((uint16_t)0x4000) |
| 205 | /* reserved 0x8000 */ |
| 206 | uint8_t ttl; |
| 207 | uint8_t protocol; |
| 208 | uint16_t check; |
| 209 | uint32_t saddr; |
| 210 | uint32_t daddr; |
| 211 | /* The options start here. */ |
| 212 | }; |
| 213 | |
| 214 | struct icmphdr { |
| 215 | struct iphdr iph; |
| 216 | uint8_t type; |
| 217 | uint8_t code; |
| 218 | uint16_t check; |
| 219 | union icmpinfofield { |
| 220 | uint32_t unused; |
| 221 | struct { |
| 222 | uint8_t pointer; |
| 223 | uint8_t unused1; |
| 224 | uint16_t unused2; |
| 225 | } pprob; |
| 226 | uint32_t gwaddr; |
| 227 | struct { |
| 228 | uint16_t id; |
| 229 | uint16_t seq; |
| 230 | } echo; |
| 231 | struct { |
| 232 | uint16_t unused; |
| 233 | uint16_t mtu; |
| 234 | } fragneeded; |
| 235 | } d; |
| 236 | }; |
| 237 | |
| 238 | static const union icmpinfofield icmp_noinfo; |
| 239 | |
| 240 | static void netlink_client_deliver(struct netlink *st, |
| 241 | struct netlink_client *client, |
| 242 | uint32_t source, uint32_t dest, |
| 243 | struct buffer_if *buf); |
| 244 | static void netlink_host_deliver(struct netlink *st, |
| 245 | struct netlink_client *sender, |
| 246 | uint32_t source, uint32_t dest, |
| 247 | struct buffer_if *buf); |
| 248 | |
| 249 | static const char *sender_name(struct netlink_client *sender /* or NULL */) |
| 250 | { |
| 251 | return sender?sender->name:"(local)"; |
| 252 | } |
| 253 | |
| 254 | static void netlink_packet_deliver(struct netlink *st, |
| 255 | struct netlink_client *client, |
| 256 | struct buffer_if *buf); |
| 257 | |
| 258 | /* XXX RFC1812 4.3.2.5: |
| 259 | All other ICMP error messages (Destination Unreachable, |
| 260 | Redirect, Time Exceeded, and Parameter Problem) SHOULD have their |
| 261 | precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK |
| 262 | CONTROL). The IP Precedence value for these error messages MAY be |
| 263 | settable. |
| 264 | */ |
| 265 | static struct icmphdr *netlink_icmp_tmpl(struct netlink *st, |
| 266 | uint32_t source, uint32_t dest, |
| 267 | uint16_t len) |
| 268 | { |
| 269 | struct icmphdr *h; |
| 270 | |
| 271 | BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl"); |
| 272 | buffer_init(&st->icmp,calculate_max_start_pad()); |
| 273 | h=buf_append(&st->icmp,sizeof(*h)); |
| 274 | |
| 275 | h->iph.version=4; |
| 276 | h->iph.ihl=5; |
| 277 | h->iph.tos=0; |
| 278 | h->iph.tot_len=htons(len+(h->iph.ihl*4)+8); |
| 279 | h->iph.id=0; |
| 280 | h->iph.frag=0; |
| 281 | h->iph.ttl=255; /* XXX should be configurable */ |
| 282 | h->iph.protocol=1; |
| 283 | h->iph.saddr=htonl(source); |
| 284 | h->iph.daddr=htonl(dest); |
| 285 | h->iph.check=0; |
| 286 | h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl); |
| 287 | h->check=0; |
| 288 | h->d.unused=0; |
| 289 | |
| 290 | return h; |
| 291 | } |
| 292 | |
| 293 | /* Fill in the ICMP checksum field correctly */ |
| 294 | static void netlink_icmp_csum(struct icmphdr *h) |
| 295 | { |
| 296 | int32_t len; |
| 297 | |
| 298 | len=ntohs(h->iph.tot_len)-(4*h->iph.ihl); |
| 299 | h->check=0; |
| 300 | h->check=ip_csum(&h->type,len); |
| 301 | } |
| 302 | |
| 303 | /* RFC1122: |
| 304 | * An ICMP error message MUST NOT be sent as the result of |
| 305 | * receiving: |
| 306 | * |
| 307 | * * an ICMP error message, or |
| 308 | * |
| 309 | * * a datagram destined to an IP broadcast or IP multicast |
| 310 | * address, or |
| 311 | * |
| 312 | * * a datagram sent as a link-layer broadcast, or |
| 313 | * |
| 314 | * * a non-initial fragment, or |
| 315 | * |
| 316 | * * a datagram whose source address does not define a single |
| 317 | * host -- e.g., a zero address, a loopback address, a |
| 318 | * broadcast address, a multicast address, or a Class E |
| 319 | * address. |
| 320 | */ |
| 321 | static bool_t netlink_icmp_may_reply(struct buffer_if *buf) |
| 322 | { |
| 323 | struct iphdr *iph; |
| 324 | struct icmphdr *icmph; |
| 325 | uint32_t source; |
| 326 | |
| 327 | if (buf->size < (int)sizeof(struct icmphdr)) return False; |
| 328 | iph=(struct iphdr *)buf->start; |
| 329 | icmph=(struct icmphdr *)buf->start; |
| 330 | if (iph->protocol==1) { |
| 331 | switch(icmph->type) { |
| 332 | /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types |
| 333 | * as retrieved Thu, 20 Mar 2014 00:16:44 +0000. |
| 334 | * Deprecated, reserved, unassigned and experimental |
| 335 | * options are treated as not safe to reply to. |
| 336 | */ |
| 337 | case 0: /* Echo Reply */ |
| 338 | case 8: /* Echo */ |
| 339 | case 13: /* Timestamp */ |
| 340 | case 14: /* Timestamp Reply */ |
| 341 | return True; |
| 342 | default: |
| 343 | return False; |
| 344 | } |
| 345 | } |
| 346 | /* How do we spot broadcast destination addresses? */ |
| 347 | if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False; |
| 348 | source=ntohl(iph->saddr); |
| 349 | if (source==0) return False; |
| 350 | if ((source&0xff000000)==0x7f000000) return False; |
| 351 | /* How do we spot broadcast source addresses? */ |
| 352 | if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */ |
| 353 | if ((source&0xf0000000)==0xf0000000) return False; /* Class E */ |
| 354 | return True; |
| 355 | } |
| 356 | |
| 357 | /* How much of the original IP packet do we include in its ICMP |
| 358 | response? The header plus up to 64 bits. */ |
| 359 | |
| 360 | /* XXX TODO RFC1812: |
| 361 | 4.3.2.3 Original Message Header |
| 362 | |
| 363 | Historically, every ICMP error message has included the Internet |
| 364 | header and at least the first 8 data bytes of the datagram that |
| 365 | triggered the error. This is no longer adequate, due to the use of |
| 366 | IP-in-IP tunneling and other technologies. Therefore, the ICMP |
| 367 | datagram SHOULD contain as much of the original datagram as possible |
| 368 | without the length of the ICMP datagram exceeding 576 bytes. The |
| 369 | returned IP header (and user data) MUST be identical to that which |
| 370 | was received, except that the router is not required to undo any |
| 371 | modifications to the IP header that are normally performed in |
| 372 | forwarding that were performed before the error was detected (e.g., |
| 373 | decrementing the TTL, or updating options). Note that the |
| 374 | requirements of Section [4.3.3.5] supersede this requirement in some |
| 375 | cases (i.e., for a Parameter Problem message, if the problem is in a |
| 376 | modified field, the router must undo the modification). See Section |
| 377 | [4.3.3.5]). |
| 378 | */ |
| 379 | static uint16_t netlink_icmp_reply_len(struct buffer_if *buf) |
| 380 | { |
| 381 | if (buf->size < (int)sizeof(struct iphdr)) return 0; |
| 382 | struct iphdr *iph=(struct iphdr *)buf->start; |
| 383 | uint16_t hlen,plen; |
| 384 | |
| 385 | hlen=iph->ihl*4; |
| 386 | /* We include the first 8 bytes of the packet data, provided they exist */ |
| 387 | hlen+=8; |
| 388 | plen=ntohs(iph->tot_len); |
| 389 | return (hlen>plen?plen:hlen); |
| 390 | } |
| 391 | |
| 392 | /* client indicates where the packet we're constructing a response to |
| 393 | comes from. NULL indicates the host. */ |
| 394 | static void netlink_icmp_simple(struct netlink *st, |
| 395 | struct netlink_client *origsender, |
| 396 | struct buffer_if *buf, |
| 397 | uint8_t type, uint8_t code, |
| 398 | union icmpinfofield info) |
| 399 | { |
| 400 | struct icmphdr *h; |
| 401 | uint16_t len; |
| 402 | |
| 403 | if (netlink_icmp_may_reply(buf)) { |
| 404 | struct iphdr *iph=(struct iphdr *)buf->start; |
| 405 | |
| 406 | uint32_t icmpdest = ntohl(iph->saddr); |
| 407 | uint32_t icmpsource; |
| 408 | const char *icmpsourcedebugprefix; |
| 409 | if (!st->ptp) { |
| 410 | icmpsource=st->secnet_address; |
| 411 | icmpsourcedebugprefix=""; |
| 412 | } else if (origsender) { |
| 413 | /* was from peer, send reply as if from host */ |
| 414 | icmpsource=st->local_address; |
| 415 | icmpsourcedebugprefix="L!"; |
| 416 | } else { |
| 417 | /* was from host, send reply as if from peer */ |
| 418 | icmpsource=st->secnet_address; /* actually, peer address */ |
| 419 | icmpsourcedebugprefix="P!"; |
| 420 | } |
| 421 | MDEBUG("%s: generating ICMP re %s[%s]->[%s]:" |
| 422 | " from %s%s type=%u code=%u\n", |
| 423 | st->name, sender_name(origsender), |
| 424 | ipaddr_to_string(ntohl(iph->saddr)), |
| 425 | ipaddr_to_string(ntohl(iph->daddr)), |
| 426 | icmpsourcedebugprefix, |
| 427 | ipaddr_to_string(icmpsource), |
| 428 | type, code); |
| 429 | |
| 430 | len=netlink_icmp_reply_len(buf); |
| 431 | h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len); |
| 432 | h->type=type; h->code=code; h->d=info; |
| 433 | BUF_ADD_BYTES(append,&st->icmp,buf->start,len); |
| 434 | netlink_icmp_csum(h); |
| 435 | |
| 436 | if (!st->ptp) { |
| 437 | netlink_packet_deliver(st,NULL,&st->icmp); |
| 438 | } else if (origsender) { |
| 439 | netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp); |
| 440 | } else { |
| 441 | netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp); |
| 442 | } |
| 443 | BUF_ASSERT_FREE(&st->icmp); |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | /* |
| 448 | * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the |
| 449 | * checksum. |
| 450 | * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums. |
| 451 | * |
| 452 | * Is the datagram acceptable? |
| 453 | * |
| 454 | * 1. Length at least the size of an ip header |
| 455 | * 2. Version of 4 |
| 456 | * 3. Checksums correctly. |
| 457 | * 4. Doesn't have a bogus length |
| 458 | */ |
| 459 | static bool_t netlink_check(struct netlink *st, struct buffer_if *buf, |
| 460 | char *errmsgbuf, int errmsgbuflen) |
| 461 | { |
| 462 | #define BAD(...) do{ \ |
| 463 | snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \ |
| 464 | return False; \ |
| 465 | }while(0) |
| 466 | |
| 467 | if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size); |
| 468 | struct iphdr *iph=(struct iphdr *)buf->start; |
| 469 | int32_t len; |
| 470 | |
| 471 | if (iph->ihl < 5) BAD("ihl %u",iph->ihl); |
| 472 | if (iph->version != 4) BAD("version %u",iph->version); |
| 473 | if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl); |
| 474 | if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum"); |
| 475 | len=ntohs(iph->tot_len); |
| 476 | /* There should be no padding */ |
| 477 | if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len); |
| 478 | if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl); |
| 479 | /* XXX check that there's no source route specified */ |
| 480 | return True; |
| 481 | |
| 482 | #undef BAD |
| 483 | } |
| 484 | |
| 485 | static const char *fragment_filter_header(uint8_t *base, long *hlp) |
| 486 | { |
| 487 | const int fixedhl = sizeof(struct iphdr); |
| 488 | long hl = *hlp; |
| 489 | const uint8_t *ipend = base + hl; |
| 490 | uint8_t *op = base + fixedhl; |
| 491 | const uint8_t *ip = op; |
| 492 | |
| 493 | while (ip < ipend) { |
| 494 | uint8_t opt = ip[0]; |
| 495 | int remain = ipend - ip; |
| 496 | if (opt == 0x00) /* End of Options List */ break; |
| 497 | if (opt == 0x01) /* No Operation */ continue; |
| 498 | if (remain < 2) return "IPv4 options truncated at length"; |
| 499 | int optlen = ip[1]; |
| 500 | if (remain < optlen) return "IPv4 options truncated in option"; |
| 501 | if (opt & 0x80) /* copy */ { |
| 502 | memmove(op, ip, optlen); |
| 503 | op += optlen; |
| 504 | } |
| 505 | ip += optlen; |
| 506 | } |
| 507 | while ((hl = (op - base)) & 0x3) |
| 508 | *op++ = 0x00 /* End of Option List */; |
| 509 | ((struct iphdr*)base)->ihl = hl >> 2; |
| 510 | *hlp = hl; |
| 511 | |
| 512 | return 0; |
| 513 | } |
| 514 | |
| 515 | /* Fragment or send ICMP Fragmentation Needed */ |
| 516 | static void netlink_maybe_fragment(struct netlink *st, |
| 517 | struct netlink_client *sender, |
| 518 | netlink_deliver_fn *deliver, |
| 519 | void *deliver_dst, |
| 520 | const char *delivery_name, |
| 521 | int32_t mtu, |
| 522 | uint32_t source, uint32_t dest, |
| 523 | struct buffer_if *buf) |
| 524 | { |
| 525 | struct iphdr *iph=(struct iphdr*)buf->start; |
| 526 | long hl = iph->ihl*4; |
| 527 | const char *ssource = ipaddr_to_string(source); |
| 528 | |
| 529 | if (buf->size <= mtu) { |
| 530 | deliver(deliver_dst, buf); |
| 531 | return; |
| 532 | } |
| 533 | |
| 534 | MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n", |
| 535 | st->name, ssource, delivery_name, buf->size); |
| 536 | |
| 537 | #define BADFRAG(m, ...) \ |
| 538 | Message(M_WARNING, \ |
| 539 | "%s: fragmenting packet from source %s" \ |
| 540 | " for transmission via %s: " m "\n", \ |
| 541 | st->name, ssource, delivery_name, \ |
| 542 | ## __VA_ARGS__); |
| 543 | |
| 544 | unsigned orig_frag = ntohs(iph->frag); |
| 545 | |
| 546 | if (orig_frag&IPHDR_FRAG_DONT) { |
| 547 | union icmpinfofield info = |
| 548 | { .fragneeded = { .unused = 0, .mtu = htons(mtu) } }; |
| 549 | netlink_icmp_simple(st,sender,buf, |
| 550 | ICMP_TYPE_UNREACHABLE, |
| 551 | ICMP_CODE_FRAGMENTATION_REQUIRED, |
| 552 | info); |
| 553 | BUF_FREE(buf); |
| 554 | return; |
| 555 | } |
| 556 | if (mtu < hl + 8) { |
| 557 | BADFRAG("mtu %"PRId32" too small", mtu); |
| 558 | BUF_FREE(buf); |
| 559 | return; |
| 560 | } |
| 561 | |
| 562 | /* we (ab)use the icmp buffer to stash the original packet */ |
| 563 | struct buffer_if *orig = &st->icmp; |
| 564 | BUF_ALLOC(orig,"netlink_client_deliver fragment orig"); |
| 565 | buffer_copy(orig,buf); |
| 566 | BUF_FREE(buf); |
| 567 | |
| 568 | const uint8_t *startindata = orig->start + hl; |
| 569 | const uint8_t *indata = startindata; |
| 570 | const uint8_t *endindata = orig->start + orig->size; |
| 571 | _Bool filtered = 0; |
| 572 | |
| 573 | for (;;) { |
| 574 | /* compute our fragment offset */ |
| 575 | long dataoffset = indata - startindata |
| 576 | + (orig_frag & IPHDR_FRAG_OFF)*8; |
| 577 | assert(!(dataoffset & 7)); |
| 578 | if (dataoffset > IPHDR_FRAG_OFF*8) { |
| 579 | BADFRAG("ultimate fragment offset out of range"); |
| 580 | break; |
| 581 | } |
| 582 | |
| 583 | BUF_ALLOC(buf,"netlink_client_deliver fragment frag"); |
| 584 | buffer_init(buf,calculate_max_start_pad()); |
| 585 | |
| 586 | /* copy header (possibly filtered); will adjust in a bit */ |
| 587 | struct iphdr *fragh = buf_append(buf, hl); |
| 588 | memcpy(fragh, orig->start, hl); |
| 589 | |
| 590 | /* decide how much payload to copy and copy it */ |
| 591 | long avail = mtu - hl; |
| 592 | long remain = endindata - indata; |
| 593 | long use = avail < remain ? (avail & ~(long)7) : remain; |
| 594 | BUF_ADD_BYTES(append, buf, indata, use); |
| 595 | indata += use; |
| 596 | |
| 597 | _Bool last_frag = indata >= endindata; |
| 598 | |
| 599 | /* adjust the header */ |
| 600 | fragh->tot_len = htons(buf->size); |
| 601 | fragh->frag = |
| 602 | htons((orig_frag & ~IPHDR_FRAG_OFF) | |
| 603 | (last_frag ? 0 : IPHDR_FRAG_MORE) | |
| 604 | (dataoffset >> 3)); |
| 605 | fragh->check = 0; |
| 606 | fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl); |
| 607 | |
| 608 | /* actually send it */ |
| 609 | deliver(deliver_dst, buf); |
| 610 | if (last_frag) |
| 611 | break; |
| 612 | |
| 613 | /* after copying the header for the first frag, |
| 614 | * we filter the header for the remaining frags */ |
| 615 | if (!filtered++) { |
| 616 | const char *bad = fragment_filter_header(orig->start, &hl); |
| 617 | if (bad) { BADFRAG("%s", bad); break; } |
| 618 | } |
| 619 | } |
| 620 | |
| 621 | BUF_FREE(orig); |
| 622 | |
| 623 | #undef BADFRAG |
| 624 | } |
| 625 | |
| 626 | /* Deliver a packet _to_ client; used after we have decided |
| 627 | * what to do with it (and just to check that the client has |
| 628 | * actually registered a delivery function with us). */ |
| 629 | static void netlink_client_deliver(struct netlink *st, |
| 630 | struct netlink_client *client, |
| 631 | uint32_t source, uint32_t dest, |
| 632 | struct buffer_if *buf) |
| 633 | { |
| 634 | if (!client->deliver) { |
| 635 | string_t s,d; |
| 636 | s=ipaddr_to_string(source); |
| 637 | d=ipaddr_to_string(dest); |
| 638 | Message(M_ERR,"%s: dropping %s->%s, client not registered\n", |
| 639 | st->name,s,d); |
| 640 | BUF_FREE(buf); |
| 641 | return; |
| 642 | } |
| 643 | netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name, |
| 644 | client->mtu, source,dest,buf); |
| 645 | client->outcount++; |
| 646 | } |
| 647 | |
| 648 | /* Deliver a packet to the host; used after we have decided that that |
| 649 | * is what to do with it. */ |
| 650 | static void netlink_host_deliver(struct netlink *st, |
| 651 | struct netlink_client *sender, |
| 652 | uint32_t source, uint32_t dest, |
| 653 | struct buffer_if *buf) |
| 654 | { |
| 655 | netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)", |
| 656 | st->mtu, source,dest,buf); |
| 657 | st->outcount++; |
| 658 | } |
| 659 | |
| 660 | /* Deliver a packet. "sender"==NULL for packets from the host and packets |
| 661 | generated internally in secnet. */ |
| 662 | static void netlink_packet_deliver(struct netlink *st, |
| 663 | struct netlink_client *sender, |
| 664 | struct buffer_if *buf) |
| 665 | { |
| 666 | if (buf->size < (int)sizeof(struct iphdr)) { |
| 667 | Message(M_ERR,"%s: trying to deliver a too-short packet" |
| 668 | " from %s!\n",st->name, sender_name(sender)); |
| 669 | BUF_FREE(buf); |
| 670 | return; |
| 671 | } |
| 672 | |
| 673 | struct iphdr *iph=(struct iphdr *)buf->start; |
| 674 | uint32_t dest=ntohl(iph->daddr); |
| 675 | uint32_t source=ntohl(iph->saddr); |
| 676 | uint32_t best_quality; |
| 677 | bool_t allow_route=False; |
| 678 | bool_t found_allowed=False; |
| 679 | int best_match; |
| 680 | int i; |
| 681 | |
| 682 | BUF_ASSERT_USED(buf); |
| 683 | |
| 684 | if (dest==st->secnet_address) { |
| 685 | Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name); |
| 686 | BUF_FREE(buf); |
| 687 | return; |
| 688 | } |
| 689 | |
| 690 | /* Packets from the host (sender==NULL) may always be routed. Packets |
| 691 | from clients with the allow_route option will also be routed. */ |
| 692 | if (!sender || (sender && (sender->options & OPT_ALLOWROUTE))) |
| 693 | allow_route=True; |
| 694 | |
| 695 | /* If !allow_route, we check the routing table anyway, and if |
| 696 | there's a suitable route with OPT_ALLOWROUTE set we use it. If |
| 697 | there's a suitable route, but none with OPT_ALLOWROUTE set then |
| 698 | we generate ICMP 'communication with destination network |
| 699 | administratively prohibited'. */ |
| 700 | |
| 701 | best_quality=0; |
| 702 | best_match=-1; |
| 703 | for (i=0; i<st->n_clients; i++) { |
| 704 | if (st->routes[i]->up && |
| 705 | ipset_contains_addr(st->routes[i]->networks,dest)) { |
| 706 | /* It's an available route to the correct destination. But is |
| 707 | it better than the one we already have? */ |
| 708 | |
| 709 | /* If we have already found an allowed route then we don't |
| 710 | bother looking at routes we're not allowed to use. If |
| 711 | we don't yet have an allowed route we'll consider any. */ |
| 712 | if (!allow_route && found_allowed) { |
| 713 | if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue; |
| 714 | } |
| 715 | |
| 716 | if (st->routes[i]->link_quality>best_quality |
| 717 | || best_quality==0) { |
| 718 | best_quality=st->routes[i]->link_quality; |
| 719 | best_match=i; |
| 720 | if (st->routes[i]->options&OPT_ALLOWROUTE) |
| 721 | found_allowed=True; |
| 722 | /* If quality isn't perfect we may wish to |
| 723 | consider kicking the tunnel with a 0-length |
| 724 | packet to prompt it to perform a key setup. |
| 725 | Then it'll eventually decide it's up or |
| 726 | down. */ |
| 727 | /* If quality is perfect and we're allowed to use the |
| 728 | route we don't need to search any more. */ |
| 729 | if (best_quality>=MAXIMUM_LINK_QUALITY && |
| 730 | (allow_route || found_allowed)) break; |
| 731 | } |
| 732 | } |
| 733 | } |
| 734 | if (best_match==-1) { |
| 735 | /* The packet's not going down a tunnel. It might (ought to) |
| 736 | be for the host. */ |
| 737 | if (ipset_contains_addr(st->networks,dest)) { |
| 738 | netlink_host_deliver(st,sender,source,dest,buf); |
| 739 | BUF_ASSERT_FREE(buf); |
| 740 | } else { |
| 741 | string_t s,d; |
| 742 | s=ipaddr_to_string(source); |
| 743 | d=ipaddr_to_string(dest); |
| 744 | Message(M_DEBUG,"%s: don't know where to deliver packet " |
| 745 | "(s=%s, d=%s)\n", st->name, s, d); |
| 746 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE, |
| 747 | ICMP_CODE_NET_UNREACHABLE, icmp_noinfo); |
| 748 | BUF_FREE(buf); |
| 749 | } |
| 750 | } else { |
| 751 | if (!allow_route && |
| 752 | !(st->routes[best_match]->options&OPT_ALLOWROUTE)) { |
| 753 | string_t s,d; |
| 754 | s=ipaddr_to_string(source); |
| 755 | d=ipaddr_to_string(dest); |
| 756 | /* We have a usable route but aren't allowed to use it. |
| 757 | Generate ICMP destination unreachable: communication |
| 758 | with destination network administratively prohibited */ |
| 759 | Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n", |
| 760 | st->name,s,d); |
| 761 | |
| 762 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE, |
| 763 | ICMP_CODE_NET_PROHIBITED, icmp_noinfo); |
| 764 | BUF_FREE(buf); |
| 765 | } else { |
| 766 | if (best_quality>0) { |
| 767 | netlink_client_deliver(st,st->routes[best_match], |
| 768 | source,dest,buf); |
| 769 | BUF_ASSERT_FREE(buf); |
| 770 | } else { |
| 771 | /* Generate ICMP destination unreachable */ |
| 772 | netlink_icmp_simple(st,sender,buf, |
| 773 | ICMP_TYPE_UNREACHABLE, |
| 774 | ICMP_CODE_NET_UNREACHABLE, |
| 775 | icmp_noinfo); |
| 776 | BUF_FREE(buf); |
| 777 | } |
| 778 | } |
| 779 | } |
| 780 | BUF_ASSERT_FREE(buf); |
| 781 | } |
| 782 | |
| 783 | static void netlink_packet_forward(struct netlink *st, |
| 784 | struct netlink_client *sender, |
| 785 | struct buffer_if *buf) |
| 786 | { |
| 787 | if (buf->size < (int)sizeof(struct iphdr)) return; |
| 788 | struct iphdr *iph=(struct iphdr *)buf->start; |
| 789 | |
| 790 | BUF_ASSERT_USED(buf); |
| 791 | |
| 792 | /* Packet has already been checked */ |
| 793 | if (iph->ttl<=1) { |
| 794 | /* Generate ICMP time exceeded */ |
| 795 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED, |
| 796 | ICMP_CODE_TTL_EXCEEDED,icmp_noinfo); |
| 797 | BUF_FREE(buf); |
| 798 | return; |
| 799 | } |
| 800 | iph->ttl--; |
| 801 | iph->check=0; |
| 802 | iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl); |
| 803 | |
| 804 | netlink_packet_deliver(st,sender,buf); |
| 805 | BUF_ASSERT_FREE(buf); |
| 806 | } |
| 807 | |
| 808 | /* Deal with packets addressed explicitly to us */ |
| 809 | static void netlink_packet_local(struct netlink *st, |
| 810 | struct netlink_client *sender, |
| 811 | struct buffer_if *buf) |
| 812 | { |
| 813 | struct icmphdr *h; |
| 814 | |
| 815 | st->localcount++; |
| 816 | |
| 817 | if (buf->size < (int)sizeof(struct icmphdr)) { |
| 818 | Message(M_WARNING,"%s: short packet addressed to secnet; " |
| 819 | "ignoring it\n",st->name); |
| 820 | BUF_FREE(buf); |
| 821 | return; |
| 822 | } |
| 823 | h=(struct icmphdr *)buf->start; |
| 824 | |
| 825 | unsigned fraginfo = ntohs(h->iph.frag); |
| 826 | if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) { |
| 827 | if (!(fraginfo & IPHDR_FRAG_OFF)) |
| 828 | /* report only for first fragment */ |
| 829 | Message(M_WARNING,"%s: fragmented packet addressed to secnet; " |
| 830 | "ignoring it\n",st->name); |
| 831 | BUF_FREE(buf); |
| 832 | return; |
| 833 | } |
| 834 | |
| 835 | if (h->iph.protocol==1) { |
| 836 | /* It's ICMP */ |
| 837 | if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) { |
| 838 | /* ICMP echo-request. Special case: we re-use the buffer |
| 839 | to construct the reply. */ |
| 840 | h->type=ICMP_TYPE_ECHO_REPLY; |
| 841 | h->iph.daddr=h->iph.saddr; |
| 842 | h->iph.saddr=htonl(st->secnet_address); |
| 843 | h->iph.ttl=255; |
| 844 | h->iph.check=0; |
| 845 | h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl); |
| 846 | netlink_icmp_csum(h); |
| 847 | netlink_packet_deliver(st,NULL,buf); |
| 848 | return; |
| 849 | } |
| 850 | Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name); |
| 851 | } else { |
| 852 | /* Send ICMP protocol unreachable */ |
| 853 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE, |
| 854 | ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo); |
| 855 | BUF_FREE(buf); |
| 856 | return; |
| 857 | } |
| 858 | |
| 859 | BUF_FREE(buf); |
| 860 | } |
| 861 | |
| 862 | /* If cid==NULL packet is from host, otherwise cid specifies which tunnel |
| 863 | it came from. */ |
| 864 | static void netlink_incoming(struct netlink *st, struct netlink_client *sender, |
| 865 | struct buffer_if *buf) |
| 866 | { |
| 867 | uint32_t source,dest; |
| 868 | struct iphdr *iph; |
| 869 | char errmsgbuf[50]; |
| 870 | const char *sourcedesc=sender?sender->name:"host"; |
| 871 | |
| 872 | BUF_ASSERT_USED(buf); |
| 873 | |
| 874 | if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) { |
| 875 | Message(M_WARNING,"%s: bad IP packet from %s: %s\n", |
| 876 | st->name,sourcedesc, |
| 877 | errmsgbuf); |
| 878 | BUF_FREE(buf); |
| 879 | return; |
| 880 | } |
| 881 | assert(buf->size >= (int)sizeof(struct iphdr)); |
| 882 | iph=(struct iphdr *)buf->start; |
| 883 | |
| 884 | source=ntohl(iph->saddr); |
| 885 | dest=ntohl(iph->daddr); |
| 886 | |
| 887 | /* Check source. If we don't like the source, there's no point |
| 888 | generating ICMP because we won't know how to get it to the |
| 889 | source of the packet. */ |
| 890 | if (sender) { |
| 891 | /* Check that the packet source is appropriate for the tunnel |
| 892 | it came down */ |
| 893 | if (!ipset_contains_addr(sender->networks,source)) { |
| 894 | string_t s,d; |
| 895 | s=ipaddr_to_string(source); |
| 896 | d=ipaddr_to_string(dest); |
| 897 | Message(M_WARNING,"%s: packet from tunnel %s with bad " |
| 898 | "source address (s=%s,d=%s)\n",st->name,sender->name,s,d); |
| 899 | BUF_FREE(buf); |
| 900 | return; |
| 901 | } |
| 902 | } else { |
| 903 | /* Check that the packet originates in our configured local |
| 904 | network, and hasn't been forwarded from elsewhere or |
| 905 | generated with the wrong source address */ |
| 906 | if (!ipset_contains_addr(st->networks,source)) { |
| 907 | string_t s,d; |
| 908 | s=ipaddr_to_string(source); |
| 909 | d=ipaddr_to_string(dest); |
| 910 | Message(M_WARNING,"%s: outgoing packet with bad source address " |
| 911 | "(s=%s,d=%s)\n",st->name,s,d); |
| 912 | BUF_FREE(buf); |
| 913 | return; |
| 914 | } |
| 915 | } |
| 916 | |
| 917 | /* If this is a point-to-point device we don't examine the |
| 918 | destination address at all; we blindly send it down our |
| 919 | one-and-only registered tunnel, or to the host, depending on |
| 920 | where it came from. It's up to external software to check |
| 921 | address validity and generate ICMP, etc. */ |
| 922 | if (st->ptp) { |
| 923 | if (sender) { |
| 924 | netlink_host_deliver(st,sender,source,dest,buf); |
| 925 | } else { |
| 926 | netlink_client_deliver(st,st->clients,source,dest,buf); |
| 927 | } |
| 928 | BUF_ASSERT_FREE(buf); |
| 929 | return; |
| 930 | } |
| 931 | |
| 932 | /* st->secnet_address needs checking before matching destination |
| 933 | addresses */ |
| 934 | if (dest==st->secnet_address) { |
| 935 | netlink_packet_local(st,sender,buf); |
| 936 | BUF_ASSERT_FREE(buf); |
| 937 | return; |
| 938 | } |
| 939 | netlink_packet_forward(st,sender,buf); |
| 940 | BUF_ASSERT_FREE(buf); |
| 941 | } |
| 942 | |
| 943 | static void netlink_inst_incoming(void *sst, struct buffer_if *buf) |
| 944 | { |
| 945 | struct netlink_client *c=sst; |
| 946 | struct netlink *st=c->nst; |
| 947 | |
| 948 | netlink_incoming(st,c,buf); |
| 949 | } |
| 950 | |
| 951 | static void netlink_dev_incoming(void *sst, struct buffer_if *buf) |
| 952 | { |
| 953 | struct netlink *st=sst; |
| 954 | |
| 955 | netlink_incoming(st,NULL,buf); |
| 956 | } |
| 957 | |
| 958 | static void netlink_set_quality(void *sst, uint32_t quality) |
| 959 | { |
| 960 | struct netlink_client *c=sst; |
| 961 | struct netlink *st=c->nst; |
| 962 | |
| 963 | c->link_quality=quality; |
| 964 | c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True; |
| 965 | if (c->options&OPT_SOFTROUTE) { |
| 966 | st->set_routes(st->dst,c); |
| 967 | } |
| 968 | } |
| 969 | |
| 970 | static void netlink_output_subnets(struct netlink *st, uint32_t loglevel, |
| 971 | struct subnet_list *snets) |
| 972 | { |
| 973 | int32_t i; |
| 974 | string_t net; |
| 975 | |
| 976 | for (i=0; i<snets->entries; i++) { |
| 977 | net=subnet_to_string(snets->list[i]); |
| 978 | Message(loglevel,"%s ",net); |
| 979 | } |
| 980 | } |
| 981 | |
| 982 | static void netlink_dump_routes(struct netlink *st, bool_t requested) |
| 983 | { |
| 984 | int i; |
| 985 | string_t net; |
| 986 | uint32_t c=M_INFO; |
| 987 | |
| 988 | if (requested) c=M_WARNING; |
| 989 | if (st->ptp) { |
| 990 | net=ipaddr_to_string(st->secnet_address); |
| 991 | Message(c,"%s: point-to-point (remote end is %s); routes: ", |
| 992 | st->name, net); |
| 993 | netlink_output_subnets(st,c,st->clients->subnets); |
| 994 | Message(c,"\n"); |
| 995 | } else { |
| 996 | Message(c,"%s: routing table:\n",st->name); |
| 997 | for (i=0; i<st->n_clients; i++) { |
| 998 | netlink_output_subnets(st,c,st->routes[i]->subnets); |
| 999 | Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s," |
| 1000 | "quality %d,use %d,pri %lu)\n", |
| 1001 | st->routes[i]->name, |
| 1002 | st->routes[i]->up?"up":"down", |
| 1003 | st->routes[i]->mtu, |
| 1004 | st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard", |
| 1005 | st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted", |
| 1006 | st->routes[i]->link_quality, |
| 1007 | st->routes[i]->outcount, |
| 1008 | (unsigned long)st->routes[i]->priority); |
| 1009 | } |
| 1010 | net=ipaddr_to_string(st->secnet_address); |
| 1011 | Message(c,"%s/32 -> netlink \"%s\" (use %d)\n", |
| 1012 | net,st->name,st->localcount); |
| 1013 | for (i=0; i<st->subnets->entries; i++) { |
| 1014 | net=subnet_to_string(st->subnets->list[i]); |
| 1015 | Message(c,"%s ",net); |
| 1016 | } |
| 1017 | if (i>0) |
| 1018 | Message(c,"-> host (use %d)\n",st->outcount); |
| 1019 | } |
| 1020 | } |
| 1021 | |
| 1022 | /* ap is a pointer to a member of the routes array */ |
| 1023 | static int netlink_compare_client_priority(const void *ap, const void *bp) |
| 1024 | { |
| 1025 | const struct netlink_client *const*a=ap; |
| 1026 | const struct netlink_client *const*b=bp; |
| 1027 | |
| 1028 | if ((*a)->priority==(*b)->priority) return 0; |
| 1029 | if ((*a)->priority<(*b)->priority) return 1; |
| 1030 | return -1; |
| 1031 | } |
| 1032 | |
| 1033 | static void netlink_phase_hook(void *sst, uint32_t new_phase) |
| 1034 | { |
| 1035 | struct netlink *st=sst; |
| 1036 | struct netlink_client *c; |
| 1037 | int32_t i; |
| 1038 | |
| 1039 | /* All the networks serviced by the various tunnels should now |
| 1040 | * have been registered. We build a routing table by sorting the |
| 1041 | * clients by priority. */ |
| 1042 | st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients, |
| 1043 | "netlink_phase_hook"); |
| 1044 | /* Fill the table */ |
| 1045 | i=0; |
| 1046 | for (c=st->clients; c; c=c->next) { |
| 1047 | assert(i<INT_MAX); |
| 1048 | st->routes[i++]=c; |
| 1049 | } |
| 1050 | /* Sort the table in descending order of priority */ |
| 1051 | qsort(st->routes,st->n_clients,sizeof(*st->routes), |
| 1052 | netlink_compare_client_priority); |
| 1053 | |
| 1054 | netlink_dump_routes(st,False); |
| 1055 | } |
| 1056 | |
| 1057 | static void netlink_signal_handler(void *sst, int signum) |
| 1058 | { |
| 1059 | struct netlink *st=sst; |
| 1060 | Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name); |
| 1061 | netlink_dump_routes(st,True); |
| 1062 | } |
| 1063 | |
| 1064 | static void netlink_inst_set_mtu(void *sst, int32_t new_mtu) |
| 1065 | { |
| 1066 | struct netlink_client *c=sst; |
| 1067 | |
| 1068 | c->mtu=new_mtu; |
| 1069 | } |
| 1070 | |
| 1071 | static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver, |
| 1072 | void *dst, uint32_t *localmtu_r) |
| 1073 | { |
| 1074 | struct netlink_client *c=sst; |
| 1075 | struct netlink *st=c->nst; |
| 1076 | |
| 1077 | c->deliver=deliver; |
| 1078 | c->dst=dst; |
| 1079 | |
| 1080 | if (localmtu_r) |
| 1081 | *localmtu_r=st->mtu; |
| 1082 | } |
| 1083 | |
| 1084 | static struct flagstr netlink_option_table[]={ |
| 1085 | { "soft", OPT_SOFTROUTE }, |
| 1086 | { "allow-route", OPT_ALLOWROUTE }, |
| 1087 | { NULL, 0} |
| 1088 | }; |
| 1089 | /* This is the routine that gets called when the closure that's |
| 1090 | returned by an invocation of a netlink device closure (eg. tun, |
| 1091 | userv-ipif) is invoked. It's used to create routes and pass in |
| 1092 | information about them; the closure it returns is used by site |
| 1093 | code. */ |
| 1094 | static closure_t *netlink_inst_create(struct netlink *st, |
| 1095 | struct cloc loc, dict_t *dict) |
| 1096 | { |
| 1097 | struct netlink_client *c; |
| 1098 | string_t name; |
| 1099 | struct ipset *networks; |
| 1100 | uint32_t options,priority; |
| 1101 | int32_t mtu; |
| 1102 | list_t *l; |
| 1103 | |
| 1104 | name=dict_read_string(dict, "name", True, st->name, loc); |
| 1105 | |
| 1106 | l=dict_lookup(dict,"routes"); |
| 1107 | if (!l) |
| 1108 | cfgfatal(loc,st->name,"required parameter \"routes\" not found\n"); |
| 1109 | networks=string_list_to_ipset(l,loc,st->name,"routes"); |
| 1110 | options=string_list_to_word(dict_lookup(dict,"options"), |
| 1111 | netlink_option_table,st->name); |
| 1112 | |
| 1113 | priority=dict_read_number(dict,"priority",False,st->name,loc,0); |
| 1114 | mtu=dict_read_number(dict,"mtu",False,st->name,loc,0); |
| 1115 | |
| 1116 | if ((options&OPT_SOFTROUTE) && !st->set_routes) { |
| 1117 | cfgfatal(loc,st->name,"this netlink device does not support " |
| 1118 | "soft routes.\n"); |
| 1119 | return NULL; |
| 1120 | } |
| 1121 | |
| 1122 | if (options&OPT_SOFTROUTE) { |
| 1123 | /* XXX for now we assume that soft routes require root privilege; |
| 1124 | this may not always be true. The device driver can tell us. */ |
| 1125 | require_root_privileges=True; |
| 1126 | require_root_privileges_explanation="netlink: soft routes"; |
| 1127 | if (st->ptp) { |
| 1128 | cfgfatal(loc,st->name,"point-to-point netlinks do not support " |
| 1129 | "soft routes.\n"); |
| 1130 | return NULL; |
| 1131 | } |
| 1132 | } |
| 1133 | |
| 1134 | /* Check that nets are a subset of st->remote_networks; |
| 1135 | refuse to register if they are not. */ |
| 1136 | if (!ipset_is_subset(st->remote_networks,networks)) { |
| 1137 | cfgfatal(loc,st->name,"routes are not allowed\n"); |
| 1138 | return NULL; |
| 1139 | } |
| 1140 | |
| 1141 | c=safe_malloc(sizeof(*c),"netlink_inst_create"); |
| 1142 | c->cl.description=name; |
| 1143 | c->cl.type=CL_NETLINK; |
| 1144 | c->cl.apply=NULL; |
| 1145 | c->cl.interface=&c->ops; |
| 1146 | c->ops.st=c; |
| 1147 | c->ops.reg=netlink_inst_reg; |
| 1148 | c->ops.deliver=netlink_inst_incoming; |
| 1149 | c->ops.set_quality=netlink_set_quality; |
| 1150 | c->ops.set_mtu=netlink_inst_set_mtu; |
| 1151 | c->nst=st; |
| 1152 | |
| 1153 | c->networks=networks; |
| 1154 | c->subnets=ipset_to_subnet_list(networks); |
| 1155 | c->priority=priority; |
| 1156 | c->deliver=NULL; |
| 1157 | c->dst=NULL; |
| 1158 | c->name=name; |
| 1159 | c->link_quality=LINK_QUALITY_UNUSED; |
| 1160 | c->mtu=mtu?mtu:st->mtu; |
| 1161 | c->options=options; |
| 1162 | c->outcount=0; |
| 1163 | c->up=False; |
| 1164 | c->kup=False; |
| 1165 | c->next=st->clients; |
| 1166 | st->clients=c; |
| 1167 | assert(st->n_clients < INT_MAX); |
| 1168 | st->n_clients++; |
| 1169 | |
| 1170 | return &c->cl; |
| 1171 | } |
| 1172 | |
| 1173 | static list_t *netlink_inst_apply(closure_t *self, struct cloc loc, |
| 1174 | dict_t *context, list_t *args) |
| 1175 | { |
| 1176 | struct netlink *st=self->interface; |
| 1177 | |
| 1178 | dict_t *dict; |
| 1179 | item_t *item; |
| 1180 | closure_t *cl; |
| 1181 | |
| 1182 | item=list_elem(args,0); |
| 1183 | if (!item || item->type!=t_dict) { |
| 1184 | cfgfatal(loc,st->name,"must have a dictionary argument\n"); |
| 1185 | } |
| 1186 | dict=item->data.dict; |
| 1187 | |
| 1188 | cl=netlink_inst_create(st,loc,dict); |
| 1189 | |
| 1190 | return new_closure(cl); |
| 1191 | } |
| 1192 | |
| 1193 | netlink_deliver_fn *netlink_init(struct netlink *st, |
| 1194 | void *dst, struct cloc loc, |
| 1195 | dict_t *dict, cstring_t description, |
| 1196 | netlink_route_fn *set_routes, |
| 1197 | netlink_deliver_fn *to_host) |
| 1198 | { |
| 1199 | item_t *sa, *ptpa; |
| 1200 | list_t *l; |
| 1201 | |
| 1202 | st->dst=dst; |
| 1203 | st->cl.description=description; |
| 1204 | st->cl.type=CL_PURE; |
| 1205 | st->cl.apply=netlink_inst_apply; |
| 1206 | st->cl.interface=st; |
| 1207 | st->clients=NULL; |
| 1208 | st->routes=NULL; |
| 1209 | st->n_clients=0; |
| 1210 | st->set_routes=set_routes; |
| 1211 | st->deliver_to_host=to_host; |
| 1212 | |
| 1213 | st->name=dict_read_string(dict,"name",False,description,loc); |
| 1214 | if (!st->name) st->name=description; |
| 1215 | l=dict_lookup(dict,"networks"); |
| 1216 | if (l) |
| 1217 | st->networks=string_list_to_ipset(l,loc,st->name,"networks"); |
| 1218 | else { |
| 1219 | struct ipset *empty; |
| 1220 | empty=ipset_new(); |
| 1221 | st->networks=ipset_complement(empty); |
| 1222 | ipset_free(empty); |
| 1223 | } |
| 1224 | l=dict_lookup(dict,"remote-networks"); |
| 1225 | if (l) { |
| 1226 | st->remote_networks=string_list_to_ipset(l,loc,st->name, |
| 1227 | "remote-networks"); |
| 1228 | } else { |
| 1229 | struct ipset *empty; |
| 1230 | empty=ipset_new(); |
| 1231 | st->remote_networks=ipset_complement(empty); |
| 1232 | ipset_free(empty); |
| 1233 | } |
| 1234 | st->local_address=string_item_to_ipaddr( |
| 1235 | dict_find_item(dict,"local-address", True, "netlink", loc),"netlink"); |
| 1236 | |
| 1237 | sa=dict_find_item(dict,"secnet-address",False,"netlink",loc); |
| 1238 | ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc); |
| 1239 | if (sa && ptpa) { |
| 1240 | cfgfatal(loc,st->name,"you may not specify secnet-address and " |
| 1241 | "ptp-address in the same netlink device\n"); |
| 1242 | } |
| 1243 | if (!(sa || ptpa)) { |
| 1244 | cfgfatal(loc,st->name,"you must specify secnet-address or " |
| 1245 | "ptp-address for this netlink device\n"); |
| 1246 | } |
| 1247 | if (sa) { |
| 1248 | st->secnet_address=string_item_to_ipaddr(sa,"netlink"); |
| 1249 | st->ptp=False; |
| 1250 | } else { |
| 1251 | st->secnet_address=string_item_to_ipaddr(ptpa,"netlink"); |
| 1252 | st->ptp=True; |
| 1253 | } |
| 1254 | /* To be strictly correct we could subtract secnet_address from |
| 1255 | networks here. It shouldn't make any practical difference, |
| 1256 | though, and will make the route dump look complicated... */ |
| 1257 | st->subnets=ipset_to_subnet_list(st->networks); |
| 1258 | st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU); |
| 1259 | buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu)); |
| 1260 | st->outcount=0; |
| 1261 | st->localcount=0; |
| 1262 | |
| 1263 | add_hook(PHASE_SETUP,netlink_phase_hook,st); |
| 1264 | request_signal_notification(SIGUSR1, netlink_signal_handler, st); |
| 1265 | |
| 1266 | /* If we're point-to-point then we return a CL_NETLINK directly, |
| 1267 | rather than a CL_NETLINK_OLD or pure closure (depending on |
| 1268 | compatibility). This CL_NETLINK is for our one and only |
| 1269 | client. Our cl.apply function is NULL. */ |
| 1270 | if (st->ptp) { |
| 1271 | closure_t *cl; |
| 1272 | cl=netlink_inst_create(st,loc,dict); |
| 1273 | st->cl=*cl; |
| 1274 | } |
| 1275 | return netlink_dev_incoming; |
| 1276 | } |
| 1277 | |
| 1278 | /* No connection to the kernel at all... */ |
| 1279 | |
| 1280 | struct null { |
| 1281 | struct netlink nl; |
| 1282 | }; |
| 1283 | |
| 1284 | static bool_t null_set_route(void *sst, struct netlink_client *routes) |
| 1285 | { |
| 1286 | struct null *st=sst; |
| 1287 | |
| 1288 | if (routes->up!=routes->kup) { |
| 1289 | Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n", |
| 1290 | st->nl.name,routes->name, |
| 1291 | routes->up?"up":"down"); |
| 1292 | routes->kup=routes->up; |
| 1293 | return True; |
| 1294 | } |
| 1295 | return False; |
| 1296 | } |
| 1297 | |
| 1298 | static void null_deliver(void *sst, struct buffer_if *buf) |
| 1299 | { |
| 1300 | return; |
| 1301 | } |
| 1302 | |
| 1303 | static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context, |
| 1304 | list_t *args) |
| 1305 | { |
| 1306 | struct null *st; |
| 1307 | item_t *item; |
| 1308 | dict_t *dict; |
| 1309 | |
| 1310 | st=safe_malloc(sizeof(*st),"null_apply"); |
| 1311 | |
| 1312 | item=list_elem(args,0); |
| 1313 | if (!item || item->type!=t_dict) |
| 1314 | cfgfatal(loc,"null-netlink","parameter must be a dictionary\n"); |
| 1315 | |
| 1316 | dict=item->data.dict; |
| 1317 | |
| 1318 | netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route, |
| 1319 | null_deliver); |
| 1320 | |
| 1321 | return new_closure(&st->nl.cl); |
| 1322 | } |
| 1323 | |
| 1324 | void netlink_module(dict_t *dict) |
| 1325 | { |
| 1326 | add_closure(dict,"null-netlink",null_apply); |
| 1327 | } |