netlink: Move local_address into struct netlink
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* See RFCs 791, 792, 1123 and 1812 */
4
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14 /* Points to note from RFC1812 (which may require changes in this
15 file):
16
17 3.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
29 4.2.1 A router SHOULD count datagrams discarded.
30
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
33 their use.
34
35 5.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
42 5.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
54 5.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98 */
99
100 #include <string.h>
101 #include <assert.h>
102 #include <limits.h>
103 #include "secnet.h"
104 #include "util.h"
105 #include "ipaddr.h"
106 #include "netlink.h"
107 #include "process.h"
108
109 #ifdef NETLINK_DEBUG
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
114
115 #define ICMP_TYPE_ECHO_REPLY 0
116
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
122
123 #define ICMP_TYPE_ECHO_REQUEST 8
124
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
127
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
130 {
131 register uint32_t sum=0;
132
133 while (count>1) {
134 sum+=ntohs(*(uint16_t *)iph);
135 iph+=2;
136 count-=2;
137 }
138 if(count>0)
139 sum+=*(uint8_t *)iph;
140 while (sum>>16)
141 sum=(sum&0xffff)+(sum>>16);
142 return htons(~sum);
143 }
144
145 #ifdef i386
146 /*
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
149 *
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
151 * Arnt Gulbrandsen.
152 */
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
154 uint32_t sum;
155
156 __asm__ __volatile__(
157 "movl (%1), %0 ;\n"
158 "subl $4, %2 ;\n"
159 "jbe 2f ;\n"
160 "addl 4(%1), %0 ;\n"
161 "adcl 8(%1), %0 ;\n"
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
164 "lea 4(%1), %1 ;\n"
165 "decl %2 ;\n"
166 "jne 1b ;\n"
167 "adcl $0, %0 ;\n"
168 "movl %0, %2 ;\n"
169 "shrl $16, %0 ;\n"
170 "addw %w2, %w0 ;\n"
171 "adcl $0, %0 ;\n"
172 "notl %0 ;\n"
173 "2: ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
179 : "memory");
180 return sum;
181 }
182 #else
183 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
184 {
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
187 }
188 #endif
189
190 struct iphdr {
191 #if defined (WORDS_BIGENDIAN)
192 uint8_t version:4,
193 ihl:4;
194 #else
195 uint8_t ihl:4,
196 version:4;
197 #endif
198 uint8_t tos;
199 uint16_t tot_len;
200 uint16_t id;
201 uint16_t frag;
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
206 uint8_t ttl;
207 uint8_t protocol;
208 uint16_t check;
209 uint32_t saddr;
210 uint32_t daddr;
211 /* The options start here. */
212 };
213
214 struct icmphdr {
215 struct iphdr iph;
216 uint8_t type;
217 uint8_t code;
218 uint16_t check;
219 union icmpinfofield {
220 uint32_t unused;
221 struct {
222 uint8_t pointer;
223 uint8_t unused1;
224 uint16_t unused2;
225 } pprob;
226 uint32_t gwaddr;
227 struct {
228 uint16_t id;
229 uint16_t seq;
230 } echo;
231 struct {
232 uint16_t unused;
233 uint16_t mtu;
234 } fragneeded;
235 } d;
236 };
237
238 static const union icmpinfofield icmp_noinfo;
239
240 static const char *sender_name(struct netlink_client *sender /* or NULL */)
241 {
242 return sender?sender->name:"(local)";
243 }
244
245 static void netlink_packet_deliver(struct netlink *st,
246 struct netlink_client *client,
247 struct buffer_if *buf);
248
249 /* XXX RFC1812 4.3.2.5:
250 All other ICMP error messages (Destination Unreachable,
251 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
252 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
253 CONTROL). The IP Precedence value for these error messages MAY be
254 settable.
255 */
256 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
257 uint32_t dest,uint16_t len)
258 {
259 struct icmphdr *h;
260
261 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
262 buffer_init(&st->icmp,calculate_max_start_pad());
263 h=buf_append(&st->icmp,sizeof(*h));
264
265 h->iph.version=4;
266 h->iph.ihl=5;
267 h->iph.tos=0;
268 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
269 h->iph.id=0;
270 h->iph.frag=0;
271 h->iph.ttl=255; /* XXX should be configurable */
272 h->iph.protocol=1;
273 h->iph.saddr=htonl(st->secnet_address);
274 h->iph.daddr=htonl(dest);
275 h->iph.check=0;
276 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
277 h->check=0;
278 h->d.unused=0;
279
280 return h;
281 }
282
283 /* Fill in the ICMP checksum field correctly */
284 static void netlink_icmp_csum(struct icmphdr *h)
285 {
286 int32_t len;
287
288 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
289 h->check=0;
290 h->check=ip_csum(&h->type,len);
291 }
292
293 /* RFC1122:
294 * An ICMP error message MUST NOT be sent as the result of
295 * receiving:
296 *
297 * * an ICMP error message, or
298 *
299 * * a datagram destined to an IP broadcast or IP multicast
300 * address, or
301 *
302 * * a datagram sent as a link-layer broadcast, or
303 *
304 * * a non-initial fragment, or
305 *
306 * * a datagram whose source address does not define a single
307 * host -- e.g., a zero address, a loopback address, a
308 * broadcast address, a multicast address, or a Class E
309 * address.
310 */
311 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
312 {
313 struct iphdr *iph;
314 struct icmphdr *icmph;
315 uint32_t source;
316
317 if (buf->size < (int)sizeof(struct icmphdr)) return False;
318 iph=(struct iphdr *)buf->start;
319 icmph=(struct icmphdr *)buf->start;
320 if (iph->protocol==1) {
321 switch(icmph->type) {
322 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
323 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
324 * Deprecated, reserved, unassigned and experimental
325 * options are treated as not safe to reply to.
326 */
327 case 0: /* Echo Reply */
328 case 8: /* Echo */
329 case 13: /* Timestamp */
330 case 14: /* Timestamp Reply */
331 return True;
332 default:
333 return False;
334 }
335 }
336 /* How do we spot broadcast destination addresses? */
337 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
338 source=ntohl(iph->saddr);
339 if (source==0) return False;
340 if ((source&0xff000000)==0x7f000000) return False;
341 /* How do we spot broadcast source addresses? */
342 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
343 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
344 return True;
345 }
346
347 /* How much of the original IP packet do we include in its ICMP
348 response? The header plus up to 64 bits. */
349
350 /* XXX TODO RFC1812:
351 4.3.2.3 Original Message Header
352
353 Historically, every ICMP error message has included the Internet
354 header and at least the first 8 data bytes of the datagram that
355 triggered the error. This is no longer adequate, due to the use of
356 IP-in-IP tunneling and other technologies. Therefore, the ICMP
357 datagram SHOULD contain as much of the original datagram as possible
358 without the length of the ICMP datagram exceeding 576 bytes. The
359 returned IP header (and user data) MUST be identical to that which
360 was received, except that the router is not required to undo any
361 modifications to the IP header that are normally performed in
362 forwarding that were performed before the error was detected (e.g.,
363 decrementing the TTL, or updating options). Note that the
364 requirements of Section [4.3.3.5] supersede this requirement in some
365 cases (i.e., for a Parameter Problem message, if the problem is in a
366 modified field, the router must undo the modification). See Section
367 [4.3.3.5]).
368 */
369 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
370 {
371 if (buf->size < (int)sizeof(struct iphdr)) return 0;
372 struct iphdr *iph=(struct iphdr *)buf->start;
373 uint16_t hlen,plen;
374
375 hlen=iph->ihl*4;
376 /* We include the first 8 bytes of the packet data, provided they exist */
377 hlen+=8;
378 plen=ntohs(iph->tot_len);
379 return (hlen>plen?plen:hlen);
380 }
381
382 /* client indicates where the packet we're constructing a response to
383 comes from. NULL indicates the host. */
384 static void netlink_icmp_simple(struct netlink *st,
385 struct netlink_client *origsender,
386 struct buffer_if *buf,
387 uint8_t type, uint8_t code,
388 union icmpinfofield info)
389 {
390 struct icmphdr *h;
391 uint16_t len;
392
393 if (netlink_icmp_may_reply(buf)) {
394 struct iphdr *iph=(struct iphdr *)buf->start;
395 len=netlink_icmp_reply_len(buf);
396 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
397 h->type=type; h->code=code; h->d=info;
398 memcpy(buf_append(&st->icmp,len),buf->start,len);
399 netlink_icmp_csum(h);
400 netlink_packet_deliver(st,NULL,&st->icmp);
401 BUF_ASSERT_FREE(&st->icmp);
402 }
403 }
404
405 /*
406 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
407 * checksum.
408 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
409 *
410 * Is the datagram acceptable?
411 *
412 * 1. Length at least the size of an ip header
413 * 2. Version of 4
414 * 3. Checksums correctly.
415 * 4. Doesn't have a bogus length
416 */
417 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
418 char *errmsgbuf, int errmsgbuflen)
419 {
420 #define BAD(...) do{ \
421 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
422 return False; \
423 }while(0)
424
425 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
426 struct iphdr *iph=(struct iphdr *)buf->start;
427 int32_t len;
428
429 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
430 if (iph->version != 4) BAD("version %u",iph->version);
431 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
432 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
433 len=ntohs(iph->tot_len);
434 /* There should be no padding */
435 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
436 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
437 /* XXX check that there's no source route specified */
438 return True;
439
440 #undef BAD
441 }
442
443 static const char *fragment_filter_header(uint8_t *base, long *hlp)
444 {
445 const int fixedhl = sizeof(struct iphdr);
446 long hl = *hlp;
447 const uint8_t *ipend = base + hl;
448 uint8_t *op = base + fixedhl;
449 const uint8_t *ip = op;
450
451 while (ip < ipend) {
452 uint8_t opt = ip[0];
453 int remain = ipend - ip;
454 if (opt == 0x00) /* End of Options List */ break;
455 if (opt == 0x01) /* No Operation */ continue;
456 if (remain < 2) return "IPv4 options truncated at length";
457 int optlen = ip[1];
458 if (remain < optlen) return "IPv4 options truncated in option";
459 if (opt & 0x80) /* copy */ {
460 memmove(op, ip, optlen);
461 op += optlen;
462 }
463 ip += optlen;
464 }
465 while ((hl = (op - base)) & 0x3)
466 *op++ = 0x00 /* End of Option List */;
467 ((struct iphdr*)base)->ihl = hl >> 2;
468 *hlp = hl;
469
470 return 0;
471 }
472
473 /* Fragment or send ICMP Fragmentation Needed */
474 static void netlink_maybe_fragment(struct netlink *st,
475 struct netlink_client *sender,
476 netlink_deliver_fn *deliver,
477 void *deliver_dst,
478 const char *delivery_name,
479 int32_t mtu,
480 uint32_t source, uint32_t dest,
481 struct buffer_if *buf)
482 {
483 struct iphdr *iph=(struct iphdr*)buf->start;
484 long hl = iph->ihl*4;
485 const char *ssource = ipaddr_to_string(source);
486
487 if (buf->size <= mtu) {
488 deliver(deliver_dst, buf);
489 return;
490 }
491
492 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
493 st->name, ssource, delivery_name, buf->size);
494
495 #define BADFRAG(m, ...) \
496 Message(M_WARNING, \
497 "%s: fragmenting packet from source %s" \
498 " for transmission via %s: " m "\n", \
499 st->name, ssource, delivery_name, \
500 ## __VA_ARGS__);
501
502 unsigned orig_frag = ntohs(iph->frag);
503
504 if (orig_frag&IPHDR_FRAG_DONT) {
505 union icmpinfofield info =
506 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
507 netlink_icmp_simple(st,sender,buf,
508 ICMP_TYPE_UNREACHABLE,
509 ICMP_CODE_FRAGMENTATION_REQUIRED,
510 info);
511 BUF_FREE(buf);
512 return;
513 }
514 if (mtu < hl + 8) {
515 BADFRAG("mtu %"PRId32" too small", mtu);
516 BUF_FREE(buf);
517 return;
518 }
519
520 /* we (ab)use the icmp buffer to stash the original packet */
521 struct buffer_if *orig = &st->icmp;
522 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
523 buffer_copy(orig,buf);
524 BUF_FREE(buf);
525
526 const uint8_t *startindata = orig->start + hl;
527 const uint8_t *indata = startindata;
528 const uint8_t *endindata = orig->start + orig->size;
529 _Bool filtered = 0;
530
531 for (;;) {
532 /* compute our fragment offset */
533 long dataoffset = indata - startindata
534 + (orig_frag & IPHDR_FRAG_OFF)*8;
535 assert(!(dataoffset & 7));
536 if (dataoffset > IPHDR_FRAG_OFF*8) {
537 BADFRAG("ultimate fragment offset out of range");
538 break;
539 }
540
541 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
542 buffer_init(buf,calculate_max_start_pad());
543
544 /* copy header (possibly filtered); will adjust in a bit */
545 struct iphdr *fragh = buf_append(buf, hl);
546 memcpy(fragh, orig->start, hl);
547
548 /* decide how much payload to copy and copy it */
549 long avail = mtu - hl;
550 long remain = endindata - indata;
551 long use = avail < remain ? (avail & ~(long)7) : remain;
552 memcpy(buf_append(buf, use), indata, use);
553 indata += use;
554
555 _Bool last_frag = indata >= endindata;
556
557 /* adjust the header */
558 fragh->tot_len = htons(buf->size);
559 fragh->frag =
560 htons((orig_frag & ~IPHDR_FRAG_OFF) |
561 (last_frag ? 0 : IPHDR_FRAG_MORE) |
562 (dataoffset >> 3));
563 fragh->check = 0;
564 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
565
566 /* actually send it */
567 deliver(deliver_dst, buf);
568 if (last_frag)
569 break;
570
571 /* after copying the header for the first frag,
572 * we filter the header for the remaining frags */
573 if (!filtered++) {
574 const char *bad = fragment_filter_header(orig->start, &hl);
575 if (bad) { BADFRAG("%s", bad); break; }
576 }
577 }
578
579 BUF_FREE(orig);
580
581 #undef BADFRAG
582 }
583
584 /* Deliver a packet _to_ client; used after we have decided
585 * what to do with it (and just to check that the client has
586 * actually registered a delivery function with us). */
587 static void netlink_client_deliver(struct netlink *st,
588 struct netlink_client *client,
589 uint32_t source, uint32_t dest,
590 struct buffer_if *buf)
591 {
592 if (!client->deliver) {
593 string_t s,d;
594 s=ipaddr_to_string(source);
595 d=ipaddr_to_string(dest);
596 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
597 st->name,s,d);
598 free(s); free(d);
599 BUF_FREE(buf);
600 return;
601 }
602 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
603 client->mtu, source,dest,buf);
604 client->outcount++;
605 }
606
607 /* Deliver a packet to the host; used after we have decided that that
608 * is what to do with it. */
609 static void netlink_host_deliver(struct netlink *st,
610 struct netlink_client *sender,
611 uint32_t source, uint32_t dest,
612 struct buffer_if *buf)
613 {
614 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
615 st->mtu, source,dest,buf);
616 st->outcount++;
617 }
618
619 /* Deliver a packet. "sender"==NULL for packets from the host and packets
620 generated internally in secnet. */
621 static void netlink_packet_deliver(struct netlink *st,
622 struct netlink_client *sender,
623 struct buffer_if *buf)
624 {
625 if (buf->size < (int)sizeof(struct iphdr)) {
626 Message(M_ERR,"%s: trying to deliver a too-short packet"
627 " from %s!\n",st->name, sender_name(sender));
628 BUF_FREE(buf);
629 return;
630 }
631
632 struct iphdr *iph=(struct iphdr *)buf->start;
633 uint32_t dest=ntohl(iph->daddr);
634 uint32_t source=ntohl(iph->saddr);
635 uint32_t best_quality;
636 bool_t allow_route=False;
637 bool_t found_allowed=False;
638 int best_match;
639 int i;
640
641 BUF_ASSERT_USED(buf);
642
643 if (dest==st->secnet_address) {
644 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
645 BUF_FREE(buf);
646 return;
647 }
648
649 /* Packets from the host (sender==NULL) may always be routed. Packets
650 from clients with the allow_route option will also be routed. */
651 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
652 allow_route=True;
653
654 /* If !allow_route, we check the routing table anyway, and if
655 there's a suitable route with OPT_ALLOWROUTE set we use it. If
656 there's a suitable route, but none with OPT_ALLOWROUTE set then
657 we generate ICMP 'communication with destination network
658 administratively prohibited'. */
659
660 best_quality=0;
661 best_match=-1;
662 for (i=0; i<st->n_clients; i++) {
663 if (st->routes[i]->up &&
664 ipset_contains_addr(st->routes[i]->networks,dest)) {
665 /* It's an available route to the correct destination. But is
666 it better than the one we already have? */
667
668 /* If we have already found an allowed route then we don't
669 bother looking at routes we're not allowed to use. If
670 we don't yet have an allowed route we'll consider any. */
671 if (!allow_route && found_allowed) {
672 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
673 }
674
675 if (st->routes[i]->link_quality>best_quality
676 || best_quality==0) {
677 best_quality=st->routes[i]->link_quality;
678 best_match=i;
679 if (st->routes[i]->options&OPT_ALLOWROUTE)
680 found_allowed=True;
681 /* If quality isn't perfect we may wish to
682 consider kicking the tunnel with a 0-length
683 packet to prompt it to perform a key setup.
684 Then it'll eventually decide it's up or
685 down. */
686 /* If quality is perfect and we're allowed to use the
687 route we don't need to search any more. */
688 if (best_quality>=MAXIMUM_LINK_QUALITY &&
689 (allow_route || found_allowed)) break;
690 }
691 }
692 }
693 if (best_match==-1) {
694 /* The packet's not going down a tunnel. It might (ought to)
695 be for the host. */
696 if (ipset_contains_addr(st->networks,dest)) {
697 netlink_host_deliver(st,sender,source,dest,buf);
698 BUF_ASSERT_FREE(buf);
699 } else {
700 string_t s,d;
701 s=ipaddr_to_string(source);
702 d=ipaddr_to_string(dest);
703 Message(M_DEBUG,"%s: don't know where to deliver packet "
704 "(s=%s, d=%s)\n", st->name, s, d);
705 free(s); free(d);
706 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
707 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
708 BUF_FREE(buf);
709 }
710 } else {
711 if (!allow_route &&
712 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
713 string_t s,d;
714 s=ipaddr_to_string(source);
715 d=ipaddr_to_string(dest);
716 /* We have a usable route but aren't allowed to use it.
717 Generate ICMP destination unreachable: communication
718 with destination network administratively prohibited */
719 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
720 st->name,s,d);
721 free(s); free(d);
722
723 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
724 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
725 BUF_FREE(buf);
726 } else {
727 if (best_quality>0) {
728 netlink_client_deliver(st,st->routes[best_match],
729 source,dest,buf);
730 BUF_ASSERT_FREE(buf);
731 } else {
732 /* Generate ICMP destination unreachable */
733 netlink_icmp_simple(st,sender,buf,
734 ICMP_TYPE_UNREACHABLE,
735 ICMP_CODE_NET_UNREACHABLE,
736 icmp_noinfo);
737 BUF_FREE(buf);
738 }
739 }
740 }
741 BUF_ASSERT_FREE(buf);
742 }
743
744 static void netlink_packet_forward(struct netlink *st,
745 struct netlink_client *sender,
746 struct buffer_if *buf)
747 {
748 if (buf->size < (int)sizeof(struct iphdr)) return;
749 struct iphdr *iph=(struct iphdr *)buf->start;
750
751 BUF_ASSERT_USED(buf);
752
753 /* Packet has already been checked */
754 if (iph->ttl<=1) {
755 /* Generate ICMP time exceeded */
756 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
757 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
758 BUF_FREE(buf);
759 return;
760 }
761 iph->ttl--;
762 iph->check=0;
763 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
764
765 netlink_packet_deliver(st,sender,buf);
766 BUF_ASSERT_FREE(buf);
767 }
768
769 /* Deal with packets addressed explicitly to us */
770 static void netlink_packet_local(struct netlink *st,
771 struct netlink_client *sender,
772 struct buffer_if *buf)
773 {
774 struct icmphdr *h;
775
776 st->localcount++;
777
778 if (buf->size < (int)sizeof(struct icmphdr)) {
779 Message(M_WARNING,"%s: short packet addressed to secnet; "
780 "ignoring it\n",st->name);
781 BUF_FREE(buf);
782 return;
783 }
784 h=(struct icmphdr *)buf->start;
785
786 unsigned fraginfo = ntohs(h->iph.frag);
787 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
788 if (!(fraginfo & IPHDR_FRAG_OFF))
789 /* report only for first fragment */
790 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
791 "ignoring it\n",st->name);
792 BUF_FREE(buf);
793 return;
794 }
795
796 if (h->iph.protocol==1) {
797 /* It's ICMP */
798 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
799 /* ICMP echo-request. Special case: we re-use the buffer
800 to construct the reply. */
801 h->type=ICMP_TYPE_ECHO_REPLY;
802 h->iph.daddr=h->iph.saddr;
803 h->iph.saddr=htonl(st->secnet_address);
804 h->iph.ttl=255;
805 h->iph.check=0;
806 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
807 netlink_icmp_csum(h);
808 netlink_packet_deliver(st,NULL,buf);
809 return;
810 }
811 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
812 } else {
813 /* Send ICMP protocol unreachable */
814 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
815 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
816 BUF_FREE(buf);
817 return;
818 }
819
820 BUF_FREE(buf);
821 }
822
823 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
824 it came from. */
825 static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
826 struct buffer_if *buf)
827 {
828 uint32_t source,dest;
829 struct iphdr *iph;
830 char errmsgbuf[50];
831 const char *sourcedesc=sender?sender->name:"host";
832
833 BUF_ASSERT_USED(buf);
834
835 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
836 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
837 st->name,sourcedesc,
838 errmsgbuf);
839 BUF_FREE(buf);
840 return;
841 }
842 assert(buf->size >= (int)sizeof(struct iphdr));
843 iph=(struct iphdr *)buf->start;
844
845 source=ntohl(iph->saddr);
846 dest=ntohl(iph->daddr);
847
848 /* Check source. If we don't like the source, there's no point
849 generating ICMP because we won't know how to get it to the
850 source of the packet. */
851 if (sender) {
852 /* Check that the packet source is appropriate for the tunnel
853 it came down */
854 if (!ipset_contains_addr(sender->networks,source)) {
855 string_t s,d;
856 s=ipaddr_to_string(source);
857 d=ipaddr_to_string(dest);
858 Message(M_WARNING,"%s: packet from tunnel %s with bad "
859 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
860 free(s); free(d);
861 BUF_FREE(buf);
862 return;
863 }
864 } else {
865 /* Check that the packet originates in our configured local
866 network, and hasn't been forwarded from elsewhere or
867 generated with the wrong source address */
868 if (!ipset_contains_addr(st->networks,source)) {
869 string_t s,d;
870 s=ipaddr_to_string(source);
871 d=ipaddr_to_string(dest);
872 Message(M_WARNING,"%s: outgoing packet with bad source address "
873 "(s=%s,d=%s)\n",st->name,s,d);
874 free(s); free(d);
875 BUF_FREE(buf);
876 return;
877 }
878 }
879
880 /* If this is a point-to-point device we don't examine the
881 destination address at all; we blindly send it down our
882 one-and-only registered tunnel, or to the host, depending on
883 where it came from. It's up to external software to check
884 address validity and generate ICMP, etc. */
885 if (st->ptp) {
886 if (sender) {
887 netlink_host_deliver(st,sender,source,dest,buf);
888 } else {
889 netlink_client_deliver(st,st->clients,source,dest,buf);
890 }
891 BUF_ASSERT_FREE(buf);
892 return;
893 }
894
895 /* st->secnet_address needs checking before matching destination
896 addresses */
897 if (dest==st->secnet_address) {
898 netlink_packet_local(st,sender,buf);
899 BUF_ASSERT_FREE(buf);
900 return;
901 }
902 netlink_packet_forward(st,sender,buf);
903 BUF_ASSERT_FREE(buf);
904 }
905
906 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
907 {
908 struct netlink_client *c=sst;
909 struct netlink *st=c->nst;
910
911 netlink_incoming(st,c,buf);
912 }
913
914 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
915 {
916 struct netlink *st=sst;
917
918 netlink_incoming(st,NULL,buf);
919 }
920
921 static void netlink_set_quality(void *sst, uint32_t quality)
922 {
923 struct netlink_client *c=sst;
924 struct netlink *st=c->nst;
925
926 c->link_quality=quality;
927 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
928 if (c->options&OPT_SOFTROUTE) {
929 st->set_routes(st->dst,c);
930 }
931 }
932
933 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
934 struct subnet_list *snets)
935 {
936 int32_t i;
937 string_t net;
938
939 for (i=0; i<snets->entries; i++) {
940 net=subnet_to_string(snets->list[i]);
941 Message(loglevel,"%s ",net);
942 free(net);
943 }
944 }
945
946 static void netlink_dump_routes(struct netlink *st, bool_t requested)
947 {
948 int i;
949 string_t net;
950 uint32_t c=M_INFO;
951
952 if (requested) c=M_WARNING;
953 if (st->ptp) {
954 net=ipaddr_to_string(st->secnet_address);
955 Message(c,"%s: point-to-point (remote end is %s); routes: ",
956 st->name, net);
957 free(net);
958 netlink_output_subnets(st,c,st->clients->subnets);
959 Message(c,"\n");
960 } else {
961 Message(c,"%s: routing table:\n",st->name);
962 for (i=0; i<st->n_clients; i++) {
963 netlink_output_subnets(st,c,st->routes[i]->subnets);
964 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
965 "quality %d,use %d,pri %lu)\n",
966 st->routes[i]->name,
967 st->routes[i]->up?"up":"down",
968 st->routes[i]->mtu,
969 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
970 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
971 st->routes[i]->link_quality,
972 st->routes[i]->outcount,
973 (unsigned long)st->routes[i]->priority);
974 }
975 net=ipaddr_to_string(st->secnet_address);
976 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
977 net,st->name,st->localcount);
978 free(net);
979 for (i=0; i<st->subnets->entries; i++) {
980 net=subnet_to_string(st->subnets->list[i]);
981 Message(c,"%s ",net);
982 free(net);
983 }
984 if (i>0)
985 Message(c,"-> host (use %d)\n",st->outcount);
986 }
987 }
988
989 /* ap is a pointer to a member of the routes array */
990 static int netlink_compare_client_priority(const void *ap, const void *bp)
991 {
992 const struct netlink_client *const*a=ap;
993 const struct netlink_client *const*b=bp;
994
995 if ((*a)->priority==(*b)->priority) return 0;
996 if ((*a)->priority<(*b)->priority) return 1;
997 return -1;
998 }
999
1000 static void netlink_phase_hook(void *sst, uint32_t new_phase)
1001 {
1002 struct netlink *st=sst;
1003 struct netlink_client *c;
1004 int32_t i;
1005
1006 /* All the networks serviced by the various tunnels should now
1007 * have been registered. We build a routing table by sorting the
1008 * clients by priority. */
1009 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1010 "netlink_phase_hook");
1011 /* Fill the table */
1012 i=0;
1013 for (c=st->clients; c; c=c->next) {
1014 assert(i<INT_MAX);
1015 st->routes[i++]=c;
1016 }
1017 /* Sort the table in descending order of priority */
1018 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1019 netlink_compare_client_priority);
1020
1021 netlink_dump_routes(st,False);
1022 }
1023
1024 static void netlink_signal_handler(void *sst, int signum)
1025 {
1026 struct netlink *st=sst;
1027 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1028 netlink_dump_routes(st,True);
1029 }
1030
1031 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
1032 {
1033 struct netlink_client *c=sst;
1034
1035 c->mtu=new_mtu;
1036 }
1037
1038 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1039 void *dst, uint32_t *localmtu_r)
1040 {
1041 struct netlink_client *c=sst;
1042 struct netlink *st=c->nst;
1043
1044 c->deliver=deliver;
1045 c->dst=dst;
1046
1047 if (localmtu_r)
1048 *localmtu_r=st->mtu;
1049 }
1050
1051 static struct flagstr netlink_option_table[]={
1052 { "soft", OPT_SOFTROUTE },
1053 { "allow-route", OPT_ALLOWROUTE },
1054 { NULL, 0}
1055 };
1056 /* This is the routine that gets called when the closure that's
1057 returned by an invocation of a netlink device closure (eg. tun,
1058 userv-ipif) is invoked. It's used to create routes and pass in
1059 information about them; the closure it returns is used by site
1060 code. */
1061 static closure_t *netlink_inst_create(struct netlink *st,
1062 struct cloc loc, dict_t *dict)
1063 {
1064 struct netlink_client *c;
1065 string_t name;
1066 struct ipset *networks;
1067 uint32_t options,priority;
1068 int32_t mtu;
1069 list_t *l;
1070
1071 name=dict_read_string(dict, "name", True, st->name, loc);
1072
1073 l=dict_lookup(dict,"routes");
1074 if (!l)
1075 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1076 networks=string_list_to_ipset(l,loc,st->name,"routes");
1077 options=string_list_to_word(dict_lookup(dict,"options"),
1078 netlink_option_table,st->name);
1079
1080 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1081 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1082
1083 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
1084 cfgfatal(loc,st->name,"this netlink device does not support "
1085 "soft routes.\n");
1086 return NULL;
1087 }
1088
1089 if (options&OPT_SOFTROUTE) {
1090 /* XXX for now we assume that soft routes require root privilege;
1091 this may not always be true. The device driver can tell us. */
1092 require_root_privileges=True;
1093 require_root_privileges_explanation="netlink: soft routes";
1094 if (st->ptp) {
1095 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1096 "soft routes.\n");
1097 return NULL;
1098 }
1099 }
1100
1101 /* Check that nets are a subset of st->remote_networks;
1102 refuse to register if they are not. */
1103 if (!ipset_is_subset(st->remote_networks,networks)) {
1104 cfgfatal(loc,st->name,"routes are not allowed\n");
1105 return NULL;
1106 }
1107
1108 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1109 c->cl.description=name;
1110 c->cl.type=CL_NETLINK;
1111 c->cl.apply=NULL;
1112 c->cl.interface=&c->ops;
1113 c->ops.st=c;
1114 c->ops.reg=netlink_inst_reg;
1115 c->ops.deliver=netlink_inst_incoming;
1116 c->ops.set_quality=netlink_set_quality;
1117 c->ops.set_mtu=netlink_inst_set_mtu;
1118 c->nst=st;
1119
1120 c->networks=networks;
1121 c->subnets=ipset_to_subnet_list(networks);
1122 c->priority=priority;
1123 c->deliver=NULL;
1124 c->dst=NULL;
1125 c->name=name;
1126 c->link_quality=LINK_QUALITY_UNUSED;
1127 c->mtu=mtu?mtu:st->mtu;
1128 c->options=options;
1129 c->outcount=0;
1130 c->up=False;
1131 c->kup=False;
1132 c->next=st->clients;
1133 st->clients=c;
1134 assert(st->n_clients < INT_MAX);
1135 st->n_clients++;
1136
1137 return &c->cl;
1138 }
1139
1140 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1141 dict_t *context, list_t *args)
1142 {
1143 struct netlink *st=self->interface;
1144
1145 dict_t *dict;
1146 item_t *item;
1147 closure_t *cl;
1148
1149 item=list_elem(args,0);
1150 if (!item || item->type!=t_dict) {
1151 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1152 }
1153 dict=item->data.dict;
1154
1155 cl=netlink_inst_create(st,loc,dict);
1156
1157 return new_closure(cl);
1158 }
1159
1160 netlink_deliver_fn *netlink_init(struct netlink *st,
1161 void *dst, struct cloc loc,
1162 dict_t *dict, cstring_t description,
1163 netlink_route_fn *set_routes,
1164 netlink_deliver_fn *to_host)
1165 {
1166 item_t *sa, *ptpa;
1167 list_t *l;
1168
1169 st->dst=dst;
1170 st->cl.description=description;
1171 st->cl.type=CL_PURE;
1172 st->cl.apply=netlink_inst_apply;
1173 st->cl.interface=st;
1174 st->clients=NULL;
1175 st->routes=NULL;
1176 st->n_clients=0;
1177 st->set_routes=set_routes;
1178 st->deliver_to_host=to_host;
1179
1180 st->name=dict_read_string(dict,"name",False,description,loc);
1181 if (!st->name) st->name=description;
1182 l=dict_lookup(dict,"networks");
1183 if (l)
1184 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1185 else {
1186 struct ipset *empty;
1187 empty=ipset_new();
1188 st->networks=ipset_complement(empty);
1189 ipset_free(empty);
1190 }
1191 l=dict_lookup(dict,"remote-networks");
1192 if (l) {
1193 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1194 "remote-networks");
1195 } else {
1196 struct ipset *empty;
1197 empty=ipset_new();
1198 st->remote_networks=ipset_complement(empty);
1199 ipset_free(empty);
1200 }
1201 st->local_address=string_item_to_ipaddr(
1202 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
1203
1204 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1205 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1206 if (sa && ptpa) {
1207 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1208 "ptp-address in the same netlink device\n");
1209 }
1210 if (!(sa || ptpa)) {
1211 cfgfatal(loc,st->name,"you must specify secnet-address or "
1212 "ptp-address for this netlink device\n");
1213 }
1214 if (sa) {
1215 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1216 st->ptp=False;
1217 } else {
1218 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1219 st->ptp=True;
1220 }
1221 /* To be strictly correct we could subtract secnet_address from
1222 networks here. It shouldn't make any practical difference,
1223 though, and will make the route dump look complicated... */
1224 st->subnets=ipset_to_subnet_list(st->networks);
1225 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1226 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
1227 st->outcount=0;
1228 st->localcount=0;
1229
1230 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1231 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1232
1233 /* If we're point-to-point then we return a CL_NETLINK directly,
1234 rather than a CL_NETLINK_OLD or pure closure (depending on
1235 compatibility). This CL_NETLINK is for our one and only
1236 client. Our cl.apply function is NULL. */
1237 if (st->ptp) {
1238 closure_t *cl;
1239 cl=netlink_inst_create(st,loc,dict);
1240 st->cl=*cl;
1241 }
1242 return netlink_dev_incoming;
1243 }
1244
1245 /* No connection to the kernel at all... */
1246
1247 struct null {
1248 struct netlink nl;
1249 };
1250
1251 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1252 {
1253 struct null *st=sst;
1254
1255 if (routes->up!=routes->kup) {
1256 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1257 st->nl.name,routes->name,
1258 routes->up?"up":"down");
1259 routes->kup=routes->up;
1260 return True;
1261 }
1262 return False;
1263 }
1264
1265 static void null_deliver(void *sst, struct buffer_if *buf)
1266 {
1267 return;
1268 }
1269
1270 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1271 list_t *args)
1272 {
1273 struct null *st;
1274 item_t *item;
1275 dict_t *dict;
1276
1277 st=safe_malloc(sizeof(*st),"null_apply");
1278
1279 item=list_elem(args,0);
1280 if (!item || item->type!=t_dict)
1281 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1282
1283 dict=item->data.dict;
1284
1285 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1286 null_deliver);
1287
1288 return new_closure(&st->nl.cl);
1289 }
1290
1291 void netlink_module(dict_t *dict)
1292 {
1293 add_closure(dict,"null-netlink",null_apply);
1294 }