87b6671f37e3be92f3c310b5fac43aeb3aeda2f2
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* See RFCs 791, 792, 1123 and 1812 */
4
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14 /* Points to note from RFC1812 (which may require changes in this
15 file):
16
17 3.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
29 4.2.1 A router SHOULD count datagrams discarded.
30
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
33 their use.
34
35 5.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
42 5.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
54 5.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98 */
99
100 #include <string.h>
101 #include <assert.h>
102 #include <limits.h>
103 #include "secnet.h"
104 #include "util.h"
105 #include "ipaddr.h"
106 #include "netlink.h"
107 #include "process.h"
108
109 #define ICMP_TYPE_ECHO_REPLY 0
110
111 #define ICMP_TYPE_UNREACHABLE 3
112 #define ICMP_CODE_NET_UNREACHABLE 0
113 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
114 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
115 #define ICMP_CODE_NET_PROHIBITED 13
116
117 #define ICMP_TYPE_ECHO_REQUEST 8
118
119 #define ICMP_TYPE_TIME_EXCEEDED 11
120 #define ICMP_CODE_TTL_EXCEEDED 0
121
122 /* Generic IP checksum routine */
123 static inline uint16_t ip_csum(uint8_t *iph,int32_t count)
124 {
125 register uint32_t sum=0;
126
127 while (count>1) {
128 sum+=ntohs(*(uint16_t *)iph);
129 iph+=2;
130 count-=2;
131 }
132 if(count>0)
133 sum+=*(uint8_t *)iph;
134 while (sum>>16)
135 sum=(sum&0xffff)+(sum>>16);
136 return htons(~sum);
137 }
138
139 #ifdef i386
140 /*
141 * This is a version of ip_compute_csum() optimized for IP headers,
142 * which always checksum on 4 octet boundaries.
143 *
144 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
145 * Arnt Gulbrandsen.
146 */
147 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl) {
148 uint32_t sum;
149
150 __asm__ __volatile__(
151 "movl (%1), %0 ;\n"
152 "subl $4, %2 ;\n"
153 "jbe 2f ;\n"
154 "addl 4(%1), %0 ;\n"
155 "adcl 8(%1), %0 ;\n"
156 "adcl 12(%1), %0 ;\n"
157 "1: adcl 16(%1), %0 ;\n"
158 "lea 4(%1), %1 ;\n"
159 "decl %2 ;\n"
160 "jne 1b ;\n"
161 "adcl $0, %0 ;\n"
162 "movl %0, %2 ;\n"
163 "shrl $16, %0 ;\n"
164 "addw %w2, %w0 ;\n"
165 "adcl $0, %0 ;\n"
166 "notl %0 ;\n"
167 "2: ;\n"
168 /* Since the input registers which are loaded with iph and ipl
169 are modified, we must also specify them as outputs, or gcc
170 will assume they contain their original values. */
171 : "=r" (sum), "=r" (iph), "=r" (ihl)
172 : "1" (iph), "2" (ihl)
173 : "memory");
174 return sum;
175 }
176 #else
177 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
178 {
179 assert(ihl < INT_MAX/4);
180 return ip_csum(iph,ihl*4);
181 }
182 #endif
183
184 struct iphdr {
185 #if defined (WORDS_BIGENDIAN)
186 uint8_t version:4,
187 ihl:4;
188 #else
189 uint8_t ihl:4,
190 version:4;
191 #endif
192 uint8_t tos;
193 uint16_t tot_len;
194 uint16_t id;
195 uint16_t frag_off;
196 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
197 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
198 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
199 /* reserved 0x8000 */
200 uint8_t ttl;
201 uint8_t protocol;
202 uint16_t check;
203 uint32_t saddr;
204 uint32_t daddr;
205 /* The options start here. */
206 };
207
208 struct icmphdr {
209 struct iphdr iph;
210 uint8_t type;
211 uint8_t code;
212 uint16_t check;
213 union icmpinfofield {
214 uint32_t unused;
215 struct {
216 uint8_t pointer;
217 uint8_t unused1;
218 uint16_t unused2;
219 } pprob;
220 uint32_t gwaddr;
221 struct {
222 uint16_t id;
223 uint16_t seq;
224 } echo;
225 } d;
226 };
227
228 static const union icmpinfofield icmp_noinfo;
229
230 static void netlink_packet_deliver(struct netlink *st,
231 struct netlink_client *client,
232 struct buffer_if *buf);
233
234 /* XXX RFC1812 4.3.2.5:
235 All other ICMP error messages (Destination Unreachable,
236 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
237 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
238 CONTROL). The IP Precedence value for these error messages MAY be
239 settable.
240 */
241 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
242 uint32_t dest,uint16_t len)
243 {
244 struct icmphdr *h;
245
246 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
247 buffer_init(&st->icmp,calculate_max_start_pad());
248 h=buf_append(&st->icmp,sizeof(*h));
249
250 h->iph.version=4;
251 h->iph.ihl=5;
252 h->iph.tos=0;
253 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
254 h->iph.id=0;
255 h->iph.frag_off=0;
256 h->iph.ttl=255; /* XXX should be configurable */
257 h->iph.protocol=1;
258 h->iph.saddr=htonl(st->secnet_address);
259 h->iph.daddr=htonl(dest);
260 h->iph.check=0;
261 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
262 h->check=0;
263 h->d.unused=0;
264
265 return h;
266 }
267
268 /* Fill in the ICMP checksum field correctly */
269 static void netlink_icmp_csum(struct icmphdr *h)
270 {
271 int32_t len;
272
273 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
274 h->check=0;
275 h->check=ip_csum(&h->type,len);
276 }
277
278 /* RFC1122:
279 * An ICMP error message MUST NOT be sent as the result of
280 * receiving:
281 *
282 * * an ICMP error message, or
283 *
284 * * a datagram destined to an IP broadcast or IP multicast
285 * address, or
286 *
287 * * a datagram sent as a link-layer broadcast, or
288 *
289 * * a non-initial fragment, or
290 *
291 * * a datagram whose source address does not define a single
292 * host -- e.g., a zero address, a loopback address, a
293 * broadcast address, a multicast address, or a Class E
294 * address.
295 */
296 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
297 {
298 struct iphdr *iph;
299 struct icmphdr *icmph;
300 uint32_t source;
301
302 if (buf->size < (int)sizeof(struct icmphdr)) return False;
303 iph=(struct iphdr *)buf->start;
304 icmph=(struct icmphdr *)buf->start;
305 if (iph->protocol==1) {
306 switch(icmph->type) {
307 case 3: /* Destination unreachable */
308 case 11: /* Time Exceeded */
309 case 12: /* Parameter Problem */
310 return False;
311 }
312 }
313 /* How do we spot broadcast destination addresses? */
314 if (ntohs(iph->frag_off)&IPHDR_FRAG_OFF) return False;
315 source=ntohl(iph->saddr);
316 if (source==0) return False;
317 if ((source&0xff000000)==0x7f000000) return False;
318 /* How do we spot broadcast source addresses? */
319 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
320 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
321 return True;
322 }
323
324 /* How much of the original IP packet do we include in its ICMP
325 response? The header plus up to 64 bits. */
326
327 /* XXX TODO RFC1812:
328 4.3.2.3 Original Message Header
329
330 Historically, every ICMP error message has included the Internet
331 header and at least the first 8 data bytes of the datagram that
332 triggered the error. This is no longer adequate, due to the use of
333 IP-in-IP tunneling and other technologies. Therefore, the ICMP
334 datagram SHOULD contain as much of the original datagram as possible
335 without the length of the ICMP datagram exceeding 576 bytes. The
336 returned IP header (and user data) MUST be identical to that which
337 was received, except that the router is not required to undo any
338 modifications to the IP header that are normally performed in
339 forwarding that were performed before the error was detected (e.g.,
340 decrementing the TTL, or updating options). Note that the
341 requirements of Section [4.3.3.5] supersede this requirement in some
342 cases (i.e., for a Parameter Problem message, if the problem is in a
343 modified field, the router must undo the modification). See Section
344 [4.3.3.5]).
345 */
346 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
347 {
348 if (buf->size < (int)sizeof(struct iphdr)) return 0;
349 struct iphdr *iph=(struct iphdr *)buf->start;
350 uint16_t hlen,plen;
351
352 hlen=iph->ihl*4;
353 /* We include the first 8 bytes of the packet data, provided they exist */
354 hlen+=8;
355 plen=ntohs(iph->tot_len);
356 return (hlen>plen?plen:hlen);
357 }
358
359 /* client indicates where the packet we're constructing a response to
360 comes from. NULL indicates the host. */
361 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
362 struct netlink_client *client,
363 uint8_t type, uint8_t code,
364 union icmpinfofield info)
365 {
366 struct icmphdr *h;
367 uint16_t len;
368
369 if (netlink_icmp_may_reply(buf)) {
370 struct iphdr *iph=(struct iphdr *)buf->start;
371 len=netlink_icmp_reply_len(buf);
372 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
373 h->type=type; h->code=code; h->d=info;
374 memcpy(buf_append(&st->icmp,len),buf->start,len);
375 netlink_icmp_csum(h);
376 netlink_packet_deliver(st,NULL,&st->icmp);
377 BUF_ASSERT_FREE(&st->icmp);
378 }
379 }
380
381 /*
382 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
383 * checksum.
384 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
385 *
386 * Is the datagram acceptable?
387 *
388 * 1. Length at least the size of an ip header
389 * 2. Version of 4
390 * 3. Checksums correctly.
391 * 4. Doesn't have a bogus length
392 */
393 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
394 char *errmsgbuf, int errmsgbuflen)
395 {
396 #define BAD(...) do{ \
397 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
398 return False; \
399 }while(0)
400
401 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
402 struct iphdr *iph=(struct iphdr *)buf->start;
403 int32_t len;
404
405 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
406 if (iph->version != 4) BAD("version %u",iph->version);
407 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
408 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
409 len=ntohs(iph->tot_len);
410 /* There should be no padding */
411 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
412 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
413 /* XXX check that there's no source route specified */
414 return True;
415
416 #undef BAD
417 }
418
419 /* Deliver a packet _to_ client; used after we have decided
420 * what to do with it (and just to check that the client has
421 * actually registered a delivery function with us). */
422 static void netlink_client_deliver(struct netlink *st,
423 struct netlink_client *client,
424 uint32_t source, uint32_t dest,
425 struct buffer_if *buf)
426 {
427 if (!client->deliver) {
428 string_t s,d;
429 s=ipaddr_to_string(source);
430 d=ipaddr_to_string(dest);
431 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
432 st->name,s,d);
433 free(s); free(d);
434 BUF_FREE(buf);
435 return;
436 }
437 client->deliver(client->dst, buf);
438 client->outcount++;
439 }
440
441 /* Deliver a packet. "client" is the _origin_ of the packet, not its
442 destination, and is NULL for packets from the host and packets
443 generated internally in secnet. */
444 static void netlink_packet_deliver(struct netlink *st,
445 struct netlink_client *client,
446 struct buffer_if *buf)
447 {
448 if (buf->size < (int)sizeof(struct iphdr)) {
449 Message(M_ERR,"%s: trying to deliver a too-short packet"
450 " from %s!\n",st->name, client?client->name:"(local)");
451 BUF_FREE(buf);
452 return;
453 }
454
455 struct iphdr *iph=(struct iphdr *)buf->start;
456 uint32_t dest=ntohl(iph->daddr);
457 uint32_t source=ntohl(iph->saddr);
458 uint32_t best_quality;
459 bool_t allow_route=False;
460 bool_t found_allowed=False;
461 int best_match;
462 int i;
463
464 BUF_ASSERT_USED(buf);
465
466 if (dest==st->secnet_address) {
467 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
468 BUF_FREE(buf);
469 return;
470 }
471
472 /* Packets from the host (client==NULL) may always be routed. Packets
473 from clients with the allow_route option will also be routed. */
474 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
475 allow_route=True;
476
477 /* If !allow_route, we check the routing table anyway, and if
478 there's a suitable route with OPT_ALLOWROUTE set we use it. If
479 there's a suitable route, but none with OPT_ALLOWROUTE set then
480 we generate ICMP 'communication with destination network
481 administratively prohibited'. */
482
483 best_quality=0;
484 best_match=-1;
485 for (i=0; i<st->n_clients; i++) {
486 if (st->routes[i]->up &&
487 ipset_contains_addr(st->routes[i]->networks,dest)) {
488 /* It's an available route to the correct destination. But is
489 it better than the one we already have? */
490
491 /* If we have already found an allowed route then we don't
492 bother looking at routes we're not allowed to use. If
493 we don't yet have an allowed route we'll consider any. */
494 if (!allow_route && found_allowed) {
495 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
496 }
497
498 if (st->routes[i]->link_quality>best_quality
499 || best_quality==0) {
500 best_quality=st->routes[i]->link_quality;
501 best_match=i;
502 if (st->routes[i]->options&OPT_ALLOWROUTE)
503 found_allowed=True;
504 /* If quality isn't perfect we may wish to
505 consider kicking the tunnel with a 0-length
506 packet to prompt it to perform a key setup.
507 Then it'll eventually decide it's up or
508 down. */
509 /* If quality is perfect and we're allowed to use the
510 route we don't need to search any more. */
511 if (best_quality>=MAXIMUM_LINK_QUALITY &&
512 (allow_route || found_allowed)) break;
513 }
514 }
515 }
516 if (best_match==-1) {
517 /* The packet's not going down a tunnel. It might (ought to)
518 be for the host. */
519 if (ipset_contains_addr(st->networks,dest)) {
520 st->deliver_to_host(st->dst,buf);
521 st->outcount++;
522 BUF_ASSERT_FREE(buf);
523 } else {
524 string_t s,d;
525 s=ipaddr_to_string(source);
526 d=ipaddr_to_string(dest);
527 Message(M_DEBUG,"%s: don't know where to deliver packet "
528 "(s=%s, d=%s)\n", st->name, s, d);
529 free(s); free(d);
530 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
531 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
532 BUF_FREE(buf);
533 }
534 } else {
535 if (!allow_route &&
536 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
537 string_t s,d;
538 s=ipaddr_to_string(source);
539 d=ipaddr_to_string(dest);
540 /* We have a usable route but aren't allowed to use it.
541 Generate ICMP destination unreachable: communication
542 with destination network administratively prohibited */
543 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
544 st->name,s,d);
545 free(s); free(d);
546
547 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
548 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
549 BUF_FREE(buf);
550 } else {
551 if (best_quality>0) {
552 /* XXX Fragment if required */
553 netlink_client_deliver(st,st->routes[best_match],
554 source,dest,buf);
555 BUF_ASSERT_FREE(buf);
556 } else {
557 /* Generate ICMP destination unreachable */
558 netlink_icmp_simple(st,buf,client,/* client==NULL */
559 ICMP_TYPE_UNREACHABLE,
560 ICMP_CODE_NET_UNREACHABLE,
561 icmp_noinfo);
562 BUF_FREE(buf);
563 }
564 }
565 }
566 BUF_ASSERT_FREE(buf);
567 }
568
569 static void netlink_packet_forward(struct netlink *st,
570 struct netlink_client *client,
571 struct buffer_if *buf)
572 {
573 if (buf->size < (int)sizeof(struct iphdr)) return;
574 struct iphdr *iph=(struct iphdr *)buf->start;
575
576 BUF_ASSERT_USED(buf);
577
578 /* Packet has already been checked */
579 if (iph->ttl<=1) {
580 /* Generate ICMP time exceeded */
581 netlink_icmp_simple(st,buf,client,ICMP_TYPE_TIME_EXCEEDED,
582 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
583 BUF_FREE(buf);
584 return;
585 }
586 iph->ttl--;
587 iph->check=0;
588 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
589
590 netlink_packet_deliver(st,client,buf);
591 BUF_ASSERT_FREE(buf);
592 }
593
594 /* Deal with packets addressed explicitly to us */
595 static void netlink_packet_local(struct netlink *st,
596 struct netlink_client *client,
597 struct buffer_if *buf)
598 {
599 struct icmphdr *h;
600
601 st->localcount++;
602
603 if (buf->size < (int)sizeof(struct icmphdr)) {
604 Message(M_WARNING,"%s: short packet addressed to secnet; "
605 "ignoring it\n",st->name);
606 BUF_FREE(buf);
607 return;
608 }
609 h=(struct icmphdr *)buf->start;
610
611 if ((ntohs(h->iph.frag_off)&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
612 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
613 "ignoring it\n",st->name);
614 BUF_FREE(buf);
615 return;
616 }
617
618 if (h->iph.protocol==1) {
619 /* It's ICMP */
620 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
621 /* ICMP echo-request. Special case: we re-use the buffer
622 to construct the reply. */
623 h->type=ICMP_TYPE_ECHO_REPLY;
624 h->iph.daddr=h->iph.saddr;
625 h->iph.saddr=htonl(st->secnet_address);
626 h->iph.ttl=255;
627 h->iph.check=0;
628 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
629 netlink_icmp_csum(h);
630 netlink_packet_deliver(st,NULL,buf);
631 return;
632 }
633 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
634 } else {
635 /* Send ICMP protocol unreachable */
636 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
637 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
638 BUF_FREE(buf);
639 return;
640 }
641
642 BUF_FREE(buf);
643 }
644
645 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
646 it came from. */
647 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
648 struct buffer_if *buf)
649 {
650 uint32_t source,dest;
651 struct iphdr *iph;
652 char errmsgbuf[50];
653 const char *sourcedesc=client?client->name:"host";
654
655 BUF_ASSERT_USED(buf);
656
657 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
658 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
659 st->name,sourcedesc,
660 errmsgbuf);
661 BUF_FREE(buf);
662 return;
663 }
664 assert(buf->size >= (int)sizeof(struct icmphdr));
665 iph=(struct iphdr *)buf->start;
666
667 source=ntohl(iph->saddr);
668 dest=ntohl(iph->daddr);
669
670 /* Check source. If we don't like the source, there's no point
671 generating ICMP because we won't know how to get it to the
672 source of the packet. */
673 if (client) {
674 /* Check that the packet source is appropriate for the tunnel
675 it came down */
676 if (!ipset_contains_addr(client->networks,source)) {
677 string_t s,d;
678 s=ipaddr_to_string(source);
679 d=ipaddr_to_string(dest);
680 Message(M_WARNING,"%s: packet from tunnel %s with bad "
681 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
682 free(s); free(d);
683 BUF_FREE(buf);
684 return;
685 }
686 } else {
687 /* Check that the packet originates in our configured local
688 network, and hasn't been forwarded from elsewhere or
689 generated with the wrong source address */
690 if (!ipset_contains_addr(st->networks,source)) {
691 string_t s,d;
692 s=ipaddr_to_string(source);
693 d=ipaddr_to_string(dest);
694 Message(M_WARNING,"%s: outgoing packet with bad source address "
695 "(s=%s,d=%s)\n",st->name,s,d);
696 free(s); free(d);
697 BUF_FREE(buf);
698 return;
699 }
700 }
701
702 /* If this is a point-to-point device we don't examine the
703 destination address at all; we blindly send it down our
704 one-and-only registered tunnel, or to the host, depending on
705 where it came from. It's up to external software to check
706 address validity and generate ICMP, etc. */
707 if (st->ptp) {
708 if (client) {
709 st->deliver_to_host(st->dst,buf);
710 } else {
711 netlink_client_deliver(st,st->clients,source,dest,buf);
712 }
713 BUF_ASSERT_FREE(buf);
714 return;
715 }
716
717 /* st->secnet_address needs checking before matching destination
718 addresses */
719 if (dest==st->secnet_address) {
720 netlink_packet_local(st,client,buf);
721 BUF_ASSERT_FREE(buf);
722 return;
723 }
724 netlink_packet_forward(st,client,buf);
725 BUF_ASSERT_FREE(buf);
726 }
727
728 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
729 {
730 struct netlink_client *c=sst;
731 struct netlink *st=c->nst;
732
733 netlink_incoming(st,c,buf);
734 }
735
736 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
737 {
738 struct netlink *st=sst;
739
740 netlink_incoming(st,NULL,buf);
741 }
742
743 static void netlink_set_quality(void *sst, uint32_t quality)
744 {
745 struct netlink_client *c=sst;
746 struct netlink *st=c->nst;
747
748 c->link_quality=quality;
749 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
750 if (c->options&OPT_SOFTROUTE) {
751 st->set_routes(st->dst,c);
752 }
753 }
754
755 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
756 struct subnet_list *snets)
757 {
758 int32_t i;
759 string_t net;
760
761 for (i=0; i<snets->entries; i++) {
762 net=subnet_to_string(snets->list[i]);
763 Message(loglevel,"%s ",net);
764 free(net);
765 }
766 }
767
768 static void netlink_dump_routes(struct netlink *st, bool_t requested)
769 {
770 int i;
771 string_t net;
772 uint32_t c=M_INFO;
773
774 if (requested) c=M_WARNING;
775 if (st->ptp) {
776 net=ipaddr_to_string(st->secnet_address);
777 Message(c,"%s: point-to-point (remote end is %s); routes: ",
778 st->name, net);
779 free(net);
780 netlink_output_subnets(st,c,st->clients->subnets);
781 Message(c,"\n");
782 } else {
783 Message(c,"%s: routing table:\n",st->name);
784 for (i=0; i<st->n_clients; i++) {
785 netlink_output_subnets(st,c,st->routes[i]->subnets);
786 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
787 "quality %d,use %d,pri %lu)\n",
788 st->routes[i]->name,
789 st->routes[i]->up?"up":"down",
790 st->routes[i]->mtu,
791 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
792 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
793 st->routes[i]->link_quality,
794 st->routes[i]->outcount,
795 (unsigned long)st->routes[i]->priority);
796 }
797 net=ipaddr_to_string(st->secnet_address);
798 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
799 net,st->name,st->localcount);
800 free(net);
801 for (i=0; i<st->subnets->entries; i++) {
802 net=subnet_to_string(st->subnets->list[i]);
803 Message(c,"%s ",net);
804 free(net);
805 }
806 if (i>0)
807 Message(c,"-> host (use %d)\n",st->outcount);
808 }
809 }
810
811 /* ap is a pointer to a member of the routes array */
812 static int netlink_compare_client_priority(const void *ap, const void *bp)
813 {
814 const struct netlink_client *const*a=ap;
815 const struct netlink_client *const*b=bp;
816
817 if ((*a)->priority==(*b)->priority) return 0;
818 if ((*a)->priority<(*b)->priority) return 1;
819 return -1;
820 }
821
822 static void netlink_phase_hook(void *sst, uint32_t new_phase)
823 {
824 struct netlink *st=sst;
825 struct netlink_client *c;
826 int32_t i;
827
828 /* All the networks serviced by the various tunnels should now
829 * have been registered. We build a routing table by sorting the
830 * clients by priority. */
831 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
832 "netlink_phase_hook");
833 /* Fill the table */
834 i=0;
835 for (c=st->clients; c; c=c->next) {
836 assert(i<INT_MAX);
837 st->routes[i++]=c;
838 }
839 /* Sort the table in descending order of priority */
840 qsort(st->routes,st->n_clients,sizeof(*st->routes),
841 netlink_compare_client_priority);
842
843 netlink_dump_routes(st,False);
844 }
845
846 static void netlink_signal_handler(void *sst, int signum)
847 {
848 struct netlink *st=sst;
849 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
850 netlink_dump_routes(st,True);
851 }
852
853 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
854 {
855 struct netlink_client *c=sst;
856
857 c->mtu=new_mtu;
858 }
859
860 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
861 void *dst)
862 {
863 struct netlink_client *c=sst;
864
865 c->deliver=deliver;
866 c->dst=dst;
867 }
868
869 static struct flagstr netlink_option_table[]={
870 { "soft", OPT_SOFTROUTE },
871 { "allow-route", OPT_ALLOWROUTE },
872 { NULL, 0}
873 };
874 /* This is the routine that gets called when the closure that's
875 returned by an invocation of a netlink device closure (eg. tun,
876 userv-ipif) is invoked. It's used to create routes and pass in
877 information about them; the closure it returns is used by site
878 code. */
879 static closure_t *netlink_inst_create(struct netlink *st,
880 struct cloc loc, dict_t *dict)
881 {
882 struct netlink_client *c;
883 string_t name;
884 struct ipset *networks;
885 uint32_t options,priority;
886 int32_t mtu;
887 list_t *l;
888
889 name=dict_read_string(dict, "name", True, st->name, loc);
890
891 l=dict_lookup(dict,"routes");
892 if (!l)
893 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
894 networks=string_list_to_ipset(l,loc,st->name,"routes");
895 options=string_list_to_word(dict_lookup(dict,"options"),
896 netlink_option_table,st->name);
897
898 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
899 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
900
901 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
902 cfgfatal(loc,st->name,"this netlink device does not support "
903 "soft routes.\n");
904 return NULL;
905 }
906
907 if (options&OPT_SOFTROUTE) {
908 /* XXX for now we assume that soft routes require root privilege;
909 this may not always be true. The device driver can tell us. */
910 require_root_privileges=True;
911 require_root_privileges_explanation="netlink: soft routes";
912 if (st->ptp) {
913 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
914 "soft routes.\n");
915 return NULL;
916 }
917 }
918
919 /* Check that nets are a subset of st->remote_networks;
920 refuse to register if they are not. */
921 if (!ipset_is_subset(st->remote_networks,networks)) {
922 cfgfatal(loc,st->name,"routes are not allowed\n");
923 return NULL;
924 }
925
926 c=safe_malloc(sizeof(*c),"netlink_inst_create");
927 c->cl.description=name;
928 c->cl.type=CL_NETLINK;
929 c->cl.apply=NULL;
930 c->cl.interface=&c->ops;
931 c->ops.st=c;
932 c->ops.reg=netlink_inst_reg;
933 c->ops.deliver=netlink_inst_incoming;
934 c->ops.set_quality=netlink_set_quality;
935 c->ops.set_mtu=netlink_inst_set_mtu;
936 c->nst=st;
937
938 c->networks=networks;
939 c->subnets=ipset_to_subnet_list(networks);
940 c->priority=priority;
941 c->deliver=NULL;
942 c->dst=NULL;
943 c->name=name;
944 c->link_quality=LINK_QUALITY_UNUSED;
945 c->mtu=mtu?mtu:st->mtu;
946 c->options=options;
947 c->outcount=0;
948 c->up=False;
949 c->kup=False;
950 c->next=st->clients;
951 st->clients=c;
952 assert(st->n_clients < INT_MAX);
953 st->n_clients++;
954
955 return &c->cl;
956 }
957
958 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
959 dict_t *context, list_t *args)
960 {
961 struct netlink *st=self->interface;
962
963 dict_t *dict;
964 item_t *item;
965 closure_t *cl;
966
967 item=list_elem(args,0);
968 if (!item || item->type!=t_dict) {
969 cfgfatal(loc,st->name,"must have a dictionary argument\n");
970 }
971 dict=item->data.dict;
972
973 cl=netlink_inst_create(st,loc,dict);
974
975 return new_closure(cl);
976 }
977
978 netlink_deliver_fn *netlink_init(struct netlink *st,
979 void *dst, struct cloc loc,
980 dict_t *dict, cstring_t description,
981 netlink_route_fn *set_routes,
982 netlink_deliver_fn *to_host)
983 {
984 item_t *sa, *ptpa;
985 list_t *l;
986
987 st->dst=dst;
988 st->cl.description=description;
989 st->cl.type=CL_PURE;
990 st->cl.apply=netlink_inst_apply;
991 st->cl.interface=st;
992 st->clients=NULL;
993 st->routes=NULL;
994 st->n_clients=0;
995 st->set_routes=set_routes;
996 st->deliver_to_host=to_host;
997
998 st->name=dict_read_string(dict,"name",False,description,loc);
999 if (!st->name) st->name=description;
1000 l=dict_lookup(dict,"networks");
1001 if (l)
1002 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1003 else {
1004 struct ipset *empty;
1005 empty=ipset_new();
1006 st->networks=ipset_complement(empty);
1007 ipset_free(empty);
1008 }
1009 l=dict_lookup(dict,"remote-networks");
1010 if (l) {
1011 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1012 "remote-networks");
1013 } else {
1014 struct ipset *empty;
1015 empty=ipset_new();
1016 st->remote_networks=ipset_complement(empty);
1017 ipset_free(empty);
1018 }
1019
1020 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1021 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1022 if (sa && ptpa) {
1023 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1024 "ptp-address in the same netlink device\n");
1025 }
1026 if (!(sa || ptpa)) {
1027 cfgfatal(loc,st->name,"you must specify secnet-address or "
1028 "ptp-address for this netlink device\n");
1029 }
1030 if (sa) {
1031 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1032 st->ptp=False;
1033 } else {
1034 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1035 st->ptp=True;
1036 }
1037 /* To be strictly correct we could subtract secnet_address from
1038 networks here. It shouldn't make any practical difference,
1039 though, and will make the route dump look complicated... */
1040 st->subnets=ipset_to_subnet_list(st->networks);
1041 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1042 buffer_new(&st->icmp,ICMP_BUFSIZE);
1043 st->outcount=0;
1044 st->localcount=0;
1045
1046 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1047 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1048
1049 /* If we're point-to-point then we return a CL_NETLINK directly,
1050 rather than a CL_NETLINK_OLD or pure closure (depending on
1051 compatibility). This CL_NETLINK is for our one and only
1052 client. Our cl.apply function is NULL. */
1053 if (st->ptp) {
1054 closure_t *cl;
1055 cl=netlink_inst_create(st,loc,dict);
1056 st->cl=*cl;
1057 }
1058 return netlink_dev_incoming;
1059 }
1060
1061 /* No connection to the kernel at all... */
1062
1063 struct null {
1064 struct netlink nl;
1065 };
1066
1067 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1068 {
1069 struct null *st=sst;
1070
1071 if (routes->up!=routes->kup) {
1072 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1073 st->nl.name,routes->name,
1074 routes->up?"up":"down");
1075 routes->kup=routes->up;
1076 return True;
1077 }
1078 return False;
1079 }
1080
1081 static void null_deliver(void *sst, struct buffer_if *buf)
1082 {
1083 return;
1084 }
1085
1086 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1087 list_t *args)
1088 {
1089 struct null *st;
1090 item_t *item;
1091 dict_t *dict;
1092
1093 st=safe_malloc(sizeof(*st),"null_apply");
1094
1095 item=list_elem(args,0);
1096 if (!item || item->type!=t_dict)
1097 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1098
1099 dict=item->data.dict;
1100
1101 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1102 null_deliver);
1103
1104 return new_closure(&st->nl.cl);
1105 }
1106
1107 void netlink_module(dict_t *dict)
1108 {
1109 add_closure(dict,"null-netlink",null_apply);
1110 }