Import release 0.1.14
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* See RFCs 791, 792, 1123 and 1812 */
4
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14 /* Points to note from RFC1812 (which may require changes in this
15 file):
16
17 3.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
29 4.2.1 A router SHOULD count datagrams discarded.
30
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
33 their use.
34
35 5.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
42 5.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
54 5.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98 */
99
100 #include "secnet.h"
101 #include "util.h"
102 #include "ipaddr.h"
103 #include "netlink.h"
104 #include "process.h"
105
106 #define OPT_SOFTROUTE 1
107 #define OPT_ALLOWROUTE 2
108
109 #define ICMP_TYPE_ECHO_REPLY 0
110
111 #define ICMP_TYPE_UNREACHABLE 3
112 #define ICMP_CODE_NET_UNREACHABLE 0
113 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
114 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
115 #define ICMP_CODE_NET_PROHIBITED 13
116
117 #define ICMP_TYPE_ECHO_REQUEST 8
118
119 #define ICMP_TYPE_TIME_EXCEEDED 11
120 #define ICMP_CODE_TTL_EXCEEDED 0
121
122 /* Generic IP checksum routine */
123 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
124 {
125 register uint32_t sum=0;
126
127 while (count>1) {
128 sum+=ntohs(*(uint16_t *)iph);
129 iph+=2;
130 count-=2;
131 }
132 if(count>0)
133 sum+=*(uint8_t *)iph;
134 while (sum>>16)
135 sum=(sum&0xffff)+(sum>>16);
136 return htons(~sum);
137 }
138
139 #ifdef i386
140 /*
141 * This is a version of ip_compute_csum() optimized for IP headers,
142 * which always checksum on 4 octet boundaries.
143 *
144 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
145 * Arnt Gulbrandsen.
146 */
147 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
148 uint32_t sum;
149
150 __asm__ __volatile__("
151 movl (%1), %0
152 subl $4, %2
153 jbe 2f
154 addl 4(%1), %0
155 adcl 8(%1), %0
156 adcl 12(%1), %0
157 1: adcl 16(%1), %0
158 lea 4(%1), %1
159 decl %2
160 jne 1b
161 adcl $0, %0
162 movl %0, %2
163 shrl $16, %0
164 addw %w2, %w0
165 adcl $0, %0
166 notl %0
167 2:
168 "
169 /* Since the input registers which are loaded with iph and ipl
170 are modified, we must also specify them as outputs, or gcc
171 will assume they contain their original values. */
172 : "=r" (sum), "=r" (iph), "=r" (ihl)
173 : "1" (iph), "2" (ihl));
174 return sum;
175 }
176 #else
177 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
178 {
179 return ip_csum(iph,ihl*4);
180 }
181 #endif
182
183 struct iphdr {
184 #if defined (WORDS_BIGENDIAN)
185 uint8_t version:4,
186 ihl:4;
187 #else
188 uint8_t ihl:4,
189 version:4;
190 #endif
191 uint8_t tos;
192 uint16_t tot_len;
193 uint16_t id;
194 uint16_t frag_off;
195 uint8_t ttl;
196 uint8_t protocol;
197 uint16_t check;
198 uint32_t saddr;
199 uint32_t daddr;
200 /* The options start here. */
201 };
202
203 struct icmphdr {
204 struct iphdr iph;
205 uint8_t type;
206 uint8_t code;
207 uint16_t check;
208 union {
209 uint32_t unused;
210 struct {
211 uint8_t pointer;
212 uint8_t unused1;
213 uint16_t unused2;
214 } pprob;
215 uint32_t gwaddr;
216 struct {
217 uint16_t id;
218 uint16_t seq;
219 } echo;
220 } d;
221 };
222
223 static void netlink_packet_deliver(struct netlink *st,
224 struct netlink_client *client,
225 struct buffer_if *buf);
226
227 /* XXX RFC1812 4.3.2.5:
228 All other ICMP error messages (Destination Unreachable,
229 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
230 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
231 CONTROL). The IP Precedence value for these error messages MAY be
232 settable.
233 */
234 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
235 uint32_t dest,uint16_t len)
236 {
237 struct icmphdr *h;
238
239 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
240 buffer_init(&st->icmp,st->max_start_pad);
241 h=buf_append(&st->icmp,sizeof(*h));
242
243 h->iph.version=4;
244 h->iph.ihl=5;
245 h->iph.tos=0;
246 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
247 h->iph.id=0;
248 h->iph.frag_off=0;
249 h->iph.ttl=255; /* XXX should be configurable */
250 h->iph.protocol=1;
251 h->iph.saddr=htonl(st->secnet_address);
252 h->iph.daddr=htonl(dest);
253 h->iph.check=0;
254 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
255 h->check=0;
256 h->d.unused=0;
257
258 return h;
259 }
260
261 /* Fill in the ICMP checksum field correctly */
262 static void netlink_icmp_csum(struct icmphdr *h)
263 {
264 uint32_t len;
265
266 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
267 h->check=0;
268 h->check=ip_csum(&h->type,len);
269 }
270
271 /* RFC1122:
272 * An ICMP error message MUST NOT be sent as the result of
273 * receiving:
274 *
275 * * an ICMP error message, or
276 *
277 * * a datagram destined to an IP broadcast or IP multicast
278 * address, or
279 *
280 * * a datagram sent as a link-layer broadcast, or
281 *
282 * * a non-initial fragment, or
283 *
284 * * a datagram whose source address does not define a single
285 * host -- e.g., a zero address, a loopback address, a
286 * broadcast address, a multicast address, or a Class E
287 * address.
288 */
289 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
290 {
291 struct iphdr *iph;
292 struct icmphdr *icmph;
293 uint32_t source;
294
295 iph=(struct iphdr *)buf->start;
296 icmph=(struct icmphdr *)buf->start;
297 if (iph->protocol==1) {
298 switch(icmph->type) {
299 case 3: /* Destination unreachable */
300 case 11: /* Time Exceeded */
301 case 12: /* Parameter Problem */
302 return False;
303 }
304 }
305 /* How do we spot broadcast destination addresses? */
306 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
307 source=ntohl(iph->saddr);
308 if (source==0) return False;
309 if ((source&0xff000000)==0x7f000000) return False;
310 /* How do we spot broadcast source addresses? */
311 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
312 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
313 return True;
314 }
315
316 /* How much of the original IP packet do we include in its ICMP
317 response? The header plus up to 64 bits. */
318
319 /* XXX TODO RFC1812:
320 4.3.2.3 Original Message Header
321
322 Historically, every ICMP error message has included the Internet
323 header and at least the first 8 data bytes of the datagram that
324 triggered the error. This is no longer adequate, due to the use of
325 IP-in-IP tunneling and other technologies. Therefore, the ICMP
326 datagram SHOULD contain as much of the original datagram as possible
327 without the length of the ICMP datagram exceeding 576 bytes. The
328 returned IP header (and user data) MUST be identical to that which
329 was received, except that the router is not required to undo any
330 modifications to the IP header that are normally performed in
331 forwarding that were performed before the error was detected (e.g.,
332 decrementing the TTL, or updating options). Note that the
333 requirements of Section [4.3.3.5] supersede this requirement in some
334 cases (i.e., for a Parameter Problem message, if the problem is in a
335 modified field, the router must undo the modification). See Section
336 [4.3.3.5]).
337 */
338 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
339 {
340 struct iphdr *iph=(struct iphdr *)buf->start;
341 uint16_t hlen,plen;
342
343 hlen=iph->ihl*4;
344 /* We include the first 8 bytes of the packet data, provided they exist */
345 hlen+=8;
346 plen=ntohs(iph->tot_len);
347 return (hlen>plen?plen:hlen);
348 }
349
350 /* client indicates where the packet we're constructing a response to
351 comes from. NULL indicates the host. */
352 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
353 struct netlink_client *client,
354 uint8_t type, uint8_t code)
355 {
356 struct iphdr *iph=(struct iphdr *)buf->start;
357 struct icmphdr *h;
358 uint16_t len;
359
360 if (netlink_icmp_may_reply(buf)) {
361 len=netlink_icmp_reply_len(buf);
362 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
363 h->type=type; h->code=code;
364 memcpy(buf_append(&st->icmp,len),buf->start,len);
365 netlink_icmp_csum(h);
366 netlink_packet_deliver(st,NULL,&st->icmp);
367 BUF_ASSERT_FREE(&st->icmp);
368 }
369 }
370
371 /*
372 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
373 * checksum.
374 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
375 *
376 * Is the datagram acceptable?
377 *
378 * 1. Length at least the size of an ip header
379 * 2. Version of 4
380 * 3. Checksums correctly.
381 * 4. Doesn't have a bogus length
382 */
383 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
384 {
385 struct iphdr *iph=(struct iphdr *)buf->start;
386 uint32_t len;
387
388 if (iph->ihl < 5 || iph->version != 4) return False;
389 if (buf->size < iph->ihl*4) return False;
390 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
391 len=ntohs(iph->tot_len);
392 /* There should be no padding */
393 if (buf->size!=len || len<(iph->ihl<<2)) return False;
394 /* XXX check that there's no source route specified */
395 return True;
396 }
397
398 /* Deliver a packet. "client" is the _origin_ of the packet, not its
399 destination, and is NULL for packets from the host and packets
400 generated internally in secnet. */
401 static void netlink_packet_deliver(struct netlink *st,
402 struct netlink_client *client,
403 struct buffer_if *buf)
404 {
405 struct iphdr *iph=(struct iphdr *)buf->start;
406 uint32_t dest=ntohl(iph->daddr);
407 uint32_t source=ntohl(iph->saddr);
408 uint32_t best_quality;
409 bool_t allow_route=False;
410 bool_t found_allowed=False;
411 int best_match;
412 int i;
413
414 BUF_ASSERT_USED(buf);
415
416 if (dest==st->secnet_address) {
417 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
418 BUF_FREE(buf);
419 return;
420 }
421
422 /* Packets from the host (client==NULL) may always be routed. Packets
423 from clients with the allow_route option will also be routed. */
424 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
425 allow_route=True;
426
427 /* If !allow_route, we check the routing table anyway, and if
428 there's a suitable route with OPT_ALLOWROUTE set we use it. If
429 there's a suitable route, but none with OPT_ALLOWROUTE set then
430 we generate ICMP 'communication with destination network
431 administratively prohibited'. */
432
433 best_quality=0;
434 best_match=-1;
435 for (i=0; i<st->n_clients; i++) {
436 if (st->routes[i]->up &&
437 ipset_contains_addr(st->routes[i]->networks,dest)) {
438 /* It's an available route to the correct destination. But is
439 it better than the one we already have? */
440
441 /* If we have already found an allowed route then we don't
442 bother looking at routes we're not allowed to use. If
443 we don't yet have an allowed route we'll consider any. */
444 if (!allow_route && found_allowed) {
445 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
446 }
447
448 if (st->routes[i]->link_quality>best_quality
449 || best_quality==0) {
450 best_quality=st->routes[i]->link_quality;
451 best_match=i;
452 if (st->routes[i]->options&OPT_ALLOWROUTE)
453 found_allowed=True;
454 /* If quality isn't perfect we may wish to
455 consider kicking the tunnel with a 0-length
456 packet to prompt it to perform a key setup.
457 Then it'll eventually decide it's up or
458 down. */
459 /* If quality is perfect and we're allowed to use the
460 route we don't need to search any more. */
461 if (best_quality>=MAXIMUM_LINK_QUALITY &&
462 (allow_route || found_allowed)) break;
463 }
464 }
465 }
466 if (best_match==-1) {
467 /* The packet's not going down a tunnel. It might (ought to)
468 be for the host. */
469 if (ipset_contains_addr(st->networks,dest)) {
470 st->deliver_to_host(st->dst,buf);
471 st->outcount++;
472 BUF_ASSERT_FREE(buf);
473 } else {
474 string_t s,d;
475 s=ipaddr_to_string(source);
476 d=ipaddr_to_string(dest);
477 Message(M_DEBUG,"%s: don't know where to deliver packet "
478 "(s=%s, d=%s)\n", st->name, s, d);
479 free(s); free(d);
480 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
481 ICMP_CODE_NET_UNREACHABLE);
482 BUF_FREE(buf);
483 }
484 } else {
485 if (!allow_route &&
486 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
487 string_t s,d;
488 s=ipaddr_to_string(source);
489 d=ipaddr_to_string(dest);
490 /* We have a usable route but aren't allowed to use it.
491 Generate ICMP destination unreachable: communication
492 with destination network administratively prohibited */
493 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
494 st->name,s,d);
495 free(s); free(d);
496
497 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
498 ICMP_CODE_NET_PROHIBITED);
499 BUF_FREE(buf);
500 }
501 if (best_quality>0) {
502 /* XXX Fragment if required */
503 st->routes[best_match]->deliver(
504 st->routes[best_match]->dst, buf);
505 st->routes[best_match]->outcount++;
506 BUF_ASSERT_FREE(buf);
507 } else {
508 /* Generate ICMP destination unreachable */
509 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
510 ICMP_CODE_NET_UNREACHABLE); /* client==NULL */
511 BUF_FREE(buf);
512 }
513 }
514 BUF_ASSERT_FREE(buf);
515 }
516
517 static void netlink_packet_forward(struct netlink *st,
518 struct netlink_client *client,
519 struct buffer_if *buf)
520 {
521 struct iphdr *iph=(struct iphdr *)buf->start;
522
523 BUF_ASSERT_USED(buf);
524
525 /* Packet has already been checked */
526 if (iph->ttl<=1) {
527 /* Generate ICMP time exceeded */
528 netlink_icmp_simple(st,buf,client,ICMP_TYPE_TIME_EXCEEDED,
529 ICMP_CODE_TTL_EXCEEDED);
530 BUF_FREE(buf);
531 return;
532 }
533 iph->ttl--;
534 iph->check=0;
535 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
536
537 netlink_packet_deliver(st,client,buf);
538 BUF_ASSERT_FREE(buf);
539 }
540
541 /* Deal with packets addressed explicitly to us */
542 static void netlink_packet_local(struct netlink *st,
543 struct netlink_client *client,
544 struct buffer_if *buf)
545 {
546 struct icmphdr *h;
547
548 st->localcount++;
549
550 h=(struct icmphdr *)buf->start;
551
552 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
553 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
554 "ignoring it\n",st->name);
555 BUF_FREE(buf);
556 return;
557 }
558
559 if (h->iph.protocol==1) {
560 /* It's ICMP */
561 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
562 /* ICMP echo-request. Special case: we re-use the buffer
563 to construct the reply. */
564 h->type=ICMP_TYPE_ECHO_REPLY;
565 h->iph.daddr=h->iph.saddr;
566 h->iph.saddr=htonl(st->secnet_address);
567 h->iph.ttl=255;
568 h->iph.check=0;
569 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
570 netlink_icmp_csum(h);
571 netlink_packet_deliver(st,NULL,buf);
572 return;
573 }
574 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
575 } else {
576 /* Send ICMP protocol unreachable */
577 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
578 ICMP_CODE_PROTOCOL_UNREACHABLE);
579 BUF_FREE(buf);
580 return;
581 }
582
583 BUF_FREE(buf);
584 }
585
586 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
587 it came from. */
588 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
589 struct buffer_if *buf)
590 {
591 uint32_t source,dest;
592 struct iphdr *iph;
593
594 BUF_ASSERT_USED(buf);
595 if (!netlink_check(st,buf)) {
596 Message(M_WARNING,"%s: bad IP packet from %s\n",
597 st->name,client?client->name:"host");
598 BUF_FREE(buf);
599 return;
600 }
601 iph=(struct iphdr *)buf->start;
602
603 source=ntohl(iph->saddr);
604 dest=ntohl(iph->daddr);
605
606 /* Check source. If we don't like the source, there's no point
607 generating ICMP because we won't know how to get it to the
608 source of the packet. */
609 if (client) {
610 /* Check that the packet source is appropriate for the tunnel
611 it came down */
612 if (!ipset_contains_addr(client->networks,source)) {
613 string_t s,d;
614 s=ipaddr_to_string(source);
615 d=ipaddr_to_string(dest);
616 Message(M_WARNING,"%s: packet from tunnel %s with bad "
617 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
618 free(s); free(d);
619 BUF_FREE(buf);
620 return;
621 }
622 } else {
623 /* Check that the packet originates in our configured local
624 network, and hasn't been forwarded from elsewhere or
625 generated with the wrong source address */
626 if (!ipset_contains_addr(st->networks,source)) {
627 string_t s,d;
628 s=ipaddr_to_string(source);
629 d=ipaddr_to_string(dest);
630 Message(M_WARNING,"%s: outgoing packet with bad source address "
631 "(s=%s,d=%s)\n",st->name,s,d);
632 free(s); free(d);
633 BUF_FREE(buf);
634 return;
635 }
636 }
637
638 /* If this is a point-to-point device we don't examine the
639 destination address at all; we blindly send it down our
640 one-and-only registered tunnel, or to the host, depending on
641 where it came from. It's up to external software to check
642 address validity and generate ICMP, etc. */
643 if (st->ptp) {
644 if (client) {
645 st->deliver_to_host(st->dst,buf);
646 } else {
647 st->clients->deliver(st->clients->dst,buf);
648 }
649 BUF_ASSERT_FREE(buf);
650 return;
651 }
652
653 /* st->secnet_address needs checking before matching destination
654 addresses */
655 if (dest==st->secnet_address) {
656 netlink_packet_local(st,client,buf);
657 BUF_ASSERT_FREE(buf);
658 return;
659 }
660 netlink_packet_forward(st,client,buf);
661 BUF_ASSERT_FREE(buf);
662 }
663
664 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
665 {
666 struct netlink_client *c=sst;
667 struct netlink *st=c->nst;
668
669 netlink_incoming(st,c,buf);
670 }
671
672 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
673 {
674 struct netlink *st=sst;
675
676 netlink_incoming(st,NULL,buf);
677 }
678
679 static void netlink_set_quality(void *sst, uint32_t quality)
680 {
681 struct netlink_client *c=sst;
682 struct netlink *st=c->nst;
683
684 c->link_quality=quality;
685 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
686 if (c->options&OPT_SOFTROUTE) {
687 st->set_routes(st->dst,c);
688 }
689 }
690
691 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
692 struct subnet_list *snets)
693 {
694 uint32_t i;
695 string_t net;
696
697 for (i=0; i<snets->entries; i++) {
698 net=subnet_to_string(snets->list[i]);
699 Message(loglevel,"%s ",net);
700 free(net);
701 }
702 }
703
704 static void netlink_dump_routes(struct netlink *st, bool_t requested)
705 {
706 int i;
707 string_t net;
708 uint32_t c=M_INFO;
709
710 if (requested) c=M_WARNING;
711 if (st->ptp) {
712 net=ipaddr_to_string(st->secnet_address);
713 Message(c,"%s: point-to-point (remote end is %s); routes:\n",
714 st->name, net);
715 free(net);
716 netlink_output_subnets(st,c,st->clients->subnets);
717 Message(c,"\n");
718 } else {
719 Message(c,"%s: routing table:\n",st->name);
720 for (i=0; i<st->n_clients; i++) {
721 netlink_output_subnets(st,c,st->routes[i]->subnets);
722 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
723 "quality %d,use %d)\n",
724 st->routes[i]->name,
725 st->routes[i]->up?"up":"down",
726 st->routes[i]->mtu,
727 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
728 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
729 st->routes[i]->link_quality,
730 st->routes[i]->outcount);
731 }
732 net=ipaddr_to_string(st->secnet_address);
733 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
734 net,st->name,st->localcount);
735 free(net);
736 for (i=0; i<st->subnets->entries; i++) {
737 net=subnet_to_string(st->subnets->list[i]);
738 Message(c,"%s ",net);
739 free(net);
740 }
741 if (i>0)
742 Message(c,"-> host (use %d)\n",st->outcount);
743 }
744 }
745
746 /* ap is a pointer to a member of the routes array */
747 static int netlink_compare_client_priority(const void *ap, const void *bp)
748 {
749 const struct netlink_client *const*a=ap;
750 const struct netlink_client *const*b=bp;
751
752 if ((*a)->priority==(*b)->priority) return 0;
753 if ((*a)->priority<(*b)->priority) return 1;
754 return -1;
755 }
756
757 static void netlink_phase_hook(void *sst, uint32_t new_phase)
758 {
759 struct netlink *st=sst;
760 struct netlink_client *c;
761 uint32_t i;
762
763 /* All the networks serviced by the various tunnels should now
764 * have been registered. We build a routing table by sorting the
765 * clients by priority. */
766 st->routes=safe_malloc(st->n_clients*sizeof(*st->routes),
767 "netlink_phase_hook");
768 /* Fill the table */
769 i=0;
770 for (c=st->clients; c; c=c->next)
771 st->routes[i++]=c;
772 /* Sort the table in descending order of priority */
773 qsort(st->routes,st->n_clients,sizeof(*st->routes),
774 netlink_compare_client_priority);
775
776 netlink_dump_routes(st,False);
777 }
778
779 static void netlink_signal_handler(void *sst, int signum)
780 {
781 struct netlink *st=sst;
782 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
783 netlink_dump_routes(st,True);
784 }
785
786 static void netlink_inst_output_config(void *sst, struct buffer_if *buf)
787 {
788 /* struct netlink_client *c=sst; */
789 /* struct netlink *st=c->nst; */
790
791 /* For now we don't output anything */
792 BUF_ASSERT_USED(buf);
793 }
794
795 static bool_t netlink_inst_check_config(void *sst, struct buffer_if *buf)
796 {
797 /* struct netlink_client *c=sst; */
798 /* struct netlink *st=c->nst; */
799
800 BUF_ASSERT_USED(buf);
801 /* We need to eat all of the configuration information from the buffer
802 for backward compatibility. */
803 buf->size=0;
804 return True;
805 }
806
807 static void netlink_inst_set_mtu(void *sst, uint32_t new_mtu)
808 {
809 struct netlink_client *c=sst;
810
811 c->mtu=new_mtu;
812 }
813
814 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
815 void *dst, uint32_t max_start_pad,
816 uint32_t max_end_pad)
817 {
818 struct netlink_client *c=sst;
819 struct netlink *st=c->nst;
820
821 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
822 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
823 c->deliver=deliver;
824 c->dst=dst;
825 }
826
827 static struct flagstr netlink_option_table[]={
828 { "soft", OPT_SOFTROUTE },
829 { "allow-route", OPT_ALLOWROUTE },
830 { NULL, 0}
831 };
832 /* This is the routine that gets called when the closure that's
833 returned by an invocation of a netlink device closure (eg. tun,
834 userv-ipif) is invoked. It's used to create routes and pass in
835 information about them; the closure it returns is used by site
836 code. */
837 static closure_t *netlink_inst_create(struct netlink *st,
838 struct cloc loc, dict_t *dict)
839 {
840 struct netlink_client *c;
841 string_t name;
842 struct ipset *networks;
843 uint32_t options,priority,mtu;
844 list_t *l;
845
846 name=dict_read_string(dict, "name", True, st->name, loc);
847
848 l=dict_lookup(dict,"routes");
849 if (!l)
850 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
851 networks=string_list_to_ipset(l,loc,st->name,"routes");
852 options=string_list_to_word(dict_lookup(dict,"options"),
853 netlink_option_table,st->name);
854
855 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
856 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
857
858 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
859 cfgfatal(loc,st->name,"this netlink device does not support "
860 "soft routes.\n");
861 return NULL;
862 }
863
864 if (options&OPT_SOFTROUTE) {
865 /* XXX for now we assume that soft routes require root privilege;
866 this may not always be true. The device driver can tell us. */
867 require_root_privileges=True;
868 require_root_privileges_explanation="netlink: soft routes";
869 if (st->ptp) {
870 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
871 "soft routes.\n");
872 return NULL;
873 }
874 }
875
876 /* Check that nets are a subset of st->remote_networks;
877 refuse to register if they are not. */
878 if (!ipset_is_subset(st->remote_networks,networks)) {
879 cfgfatal(loc,st->name,"routes are not allowed\n");
880 return NULL;
881 }
882
883 c=safe_malloc(sizeof(*c),"netlink_inst_create");
884 c->cl.description=name;
885 c->cl.type=CL_NETLINK;
886 c->cl.apply=NULL;
887 c->cl.interface=&c->ops;
888 c->ops.st=c;
889 c->ops.reg=netlink_inst_reg;
890 c->ops.deliver=netlink_inst_incoming;
891 c->ops.set_quality=netlink_set_quality;
892 c->ops.output_config=netlink_inst_output_config;
893 c->ops.check_config=netlink_inst_check_config;
894 c->ops.set_mtu=netlink_inst_set_mtu;
895 c->nst=st;
896
897 c->networks=networks;
898 c->subnets=ipset_to_subnet_list(networks);
899 c->priority=priority;
900 c->deliver=NULL;
901 c->dst=NULL;
902 c->name=name;
903 c->link_quality=LINK_QUALITY_DOWN;
904 c->mtu=mtu?mtu:st->mtu;
905 c->options=options;
906 c->outcount=0;
907 c->up=False;
908 c->kup=False;
909 c->next=st->clients;
910 st->clients=c;
911 st->n_clients++;
912
913 return &c->cl;
914 }
915
916 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
917 dict_t *context, list_t *args)
918 {
919 struct netlink *st=self->interface;
920
921 dict_t *dict;
922 item_t *item;
923 closure_t *cl;
924
925 item=list_elem(args,0);
926 if (!item || item->type!=t_dict) {
927 cfgfatal(loc,st->name,"must have a dictionary argument\n");
928 }
929 dict=item->data.dict;
930
931 cl=netlink_inst_create(st,loc,dict);
932
933 return new_closure(cl);
934 }
935
936 netlink_deliver_fn *netlink_init(struct netlink *st,
937 void *dst, struct cloc loc,
938 dict_t *dict, string_t description,
939 netlink_route_fn *set_routes,
940 netlink_deliver_fn *to_host)
941 {
942 item_t *sa, *ptpa;
943 list_t *l;
944
945 st->dst=dst;
946 st->cl.description=description;
947 st->cl.type=CL_PURE;
948 st->cl.apply=netlink_inst_apply;
949 st->cl.interface=st;
950 st->max_start_pad=0;
951 st->max_end_pad=0;
952 st->clients=NULL;
953 st->routes=NULL;
954 st->n_clients=0;
955 st->set_routes=set_routes;
956 st->deliver_to_host=to_host;
957
958 st->name=dict_read_string(dict,"name",False,description,loc);
959 if (!st->name) st->name=description;
960 l=dict_lookup(dict,"networks");
961 if (l)
962 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
963 else {
964 struct ipset *empty;
965 empty=ipset_new();
966 st->networks=ipset_complement(empty);
967 ipset_free(empty);
968 }
969 l=dict_lookup(dict,"remote-networks");
970 if (l) {
971 st->remote_networks=string_list_to_ipset(l,loc,st->name,
972 "remote-networks");
973 } else {
974 struct ipset *empty;
975 empty=ipset_new();
976 st->remote_networks=ipset_complement(empty);
977 ipset_free(empty);
978 }
979
980 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
981 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
982 if (sa && ptpa) {
983 cfgfatal(loc,st->name,"you may not specify secnet-address and "
984 "ptp-address in the same netlink device\n");
985 }
986 if (!(sa || ptpa)) {
987 cfgfatal(loc,st->name,"you must specify secnet-address or "
988 "ptp-address for this netlink device\n");
989 }
990 if (sa) {
991 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
992 st->ptp=False;
993 } else {
994 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
995 st->ptp=True;
996 }
997 /* To be strictly correct we could subtract secnet_address from
998 networks here. It shouldn't make any practical difference,
999 though, and will make the route dump look complicated... */
1000 st->subnets=ipset_to_subnet_list(st->networks);
1001 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1002 buffer_new(&st->icmp,ICMP_BUFSIZE);
1003 st->outcount=0;
1004 st->localcount=0;
1005
1006 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1007 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1008
1009 /* If we're point-to-point then we return a CL_NETLINK directly,
1010 rather than a CL_NETLINK_OLD or pure closure (depending on
1011 compatibility). This CL_NETLINK is for our one and only
1012 client. Our cl.apply function is NULL. */
1013 if (st->ptp) {
1014 closure_t *cl;
1015 cl=netlink_inst_create(st,loc,dict);
1016 st->cl=*cl;
1017 }
1018 return netlink_dev_incoming;
1019 }
1020
1021 /* No connection to the kernel at all... */
1022
1023 struct null {
1024 struct netlink nl;
1025 };
1026
1027 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1028 {
1029 struct null *st=sst;
1030
1031 if (routes->up!=routes->kup) {
1032 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1033 st->nl.name,routes->name,
1034 routes->up?"up":"down");
1035 routes->kup=routes->up;
1036 return True;
1037 }
1038 return False;
1039 }
1040
1041 static void null_deliver(void *sst, struct buffer_if *buf)
1042 {
1043 return;
1044 }
1045
1046 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1047 list_t *args)
1048 {
1049 struct null *st;
1050 item_t *item;
1051 dict_t *dict;
1052
1053 st=safe_malloc(sizeof(*st),"null_apply");
1054
1055 item=list_elem(args,0);
1056 if (!item || item->type!=t_dict)
1057 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1058
1059 dict=item->data.dict;
1060
1061 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1062 null_deliver);
1063
1064 return new_closure(&st->nl.cl);
1065 }
1066
1067 init_module netlink_module;
1068 void netlink_module(dict_t *dict)
1069 {
1070 add_closure(dict,"null-netlink",null_apply);
1071 }