Import release 0.1.13
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
ff05a229 3/* See RFCs 791, 792, 1123 and 1812 */
2fe58dfd 4
ff05a229
SE
5/* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9/* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14/* Points to note from RFC1812 (which may require changes in this
15 file):
16
173.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
294.2.1 A router SHOULD count datagrams discarded.
30
314.2.2.1 Source route options - we probably should implement processing
32of source routes, even though mostly the security policy will prevent
33their use.
34
355.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
425.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
545.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
674.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
724.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
774.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
918.1 The Simple Network Management Protocol - SNMP
928.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98*/
2fe58dfd 99
8689b3a9 100#include "secnet.h"
2fe58dfd 101#include "util.h"
7138d0c5 102#include "ipaddr.h"
9d3a4132 103#include "netlink.h"
042a8da9 104#include "process.h"
2fe58dfd 105
469fd1d9
SE
106#define OPT_SOFTROUTE 1
107#define OPT_ALLOWROUTE 2
108
ff05a229
SE
109#define ICMP_TYPE_ECHO_REPLY 0
110
111#define ICMP_TYPE_UNREACHABLE 3
112#define ICMP_CODE_NET_UNREACHABLE 0
113#define ICMP_CODE_PROTOCOL_UNREACHABLE 2
114#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
115#define ICMP_CODE_NET_PROHIBITED 13
116
117#define ICMP_TYPE_ECHO_REQUEST 8
118
119#define ICMP_TYPE_TIME_EXCEEDED 11
120#define ICMP_CODE_TTL_EXCEEDED 0
121
4efd681a
SE
122/* Generic IP checksum routine */
123static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
2fe58dfd 124{
4efd681a
SE
125 register uint32_t sum=0;
126
127 while (count>1) {
128 sum+=ntohs(*(uint16_t *)iph);
129 iph+=2;
130 count-=2;
131 }
132 if(count>0)
133 sum+=*(uint8_t *)iph;
134 while (sum>>16)
135 sum=(sum&0xffff)+(sum>>16);
136 return htons(~sum);
2fe58dfd
SE
137}
138
4efd681a
SE
139#ifdef i386
140/*
141 * This is a version of ip_compute_csum() optimized for IP headers,
142 * which always checksum on 4 octet boundaries.
143 *
144 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
145 * Arnt Gulbrandsen.
146 */
147static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
148 uint32_t sum;
149
150 __asm__ __volatile__("
151 movl (%1), %0
152 subl $4, %2
153 jbe 2f
154 addl 4(%1), %0
155 adcl 8(%1), %0
156 adcl 12(%1), %0
1571: adcl 16(%1), %0
158 lea 4(%1), %1
159 decl %2
160 jne 1b
161 adcl $0, %0
162 movl %0, %2
163 shrl $16, %0
164 addw %w2, %w0
165 adcl $0, %0
166 notl %0
1672:
168 "
169 /* Since the input registers which are loaded with iph and ipl
170 are modified, we must also specify them as outputs, or gcc
171 will assume they contain their original values. */
172 : "=r" (sum), "=r" (iph), "=r" (ihl)
173 : "1" (iph), "2" (ihl));
174 return sum;
175}
176#else
177static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
2fe58dfd 178{
4efd681a
SE
179 return ip_csum(iph,ihl*4);
180}
181#endif
182
183struct iphdr {
184#if defined (WORDS_BIGENDIAN)
185 uint8_t version:4,
186 ihl:4;
187#else
188 uint8_t ihl:4,
189 version:4;
190#endif
191 uint8_t tos;
192 uint16_t tot_len;
193 uint16_t id;
194 uint16_t frag_off;
195 uint8_t ttl;
196 uint8_t protocol;
197 uint16_t check;
198 uint32_t saddr;
199 uint32_t daddr;
200 /* The options start here. */
201};
202
203struct icmphdr {
204 struct iphdr iph;
205 uint8_t type;
206 uint8_t code;
207 uint16_t check;
208 union {
209 uint32_t unused;
210 struct {
211 uint8_t pointer;
212 uint8_t unused1;
213 uint16_t unused2;
214 } pprob;
215 uint32_t gwaddr;
216 struct {
217 uint16_t id;
218 uint16_t seq;
219 } echo;
220 } d;
221};
222
70dc107b
SE
223static void netlink_packet_deliver(struct netlink *st,
224 struct netlink_client *client,
225 struct buffer_if *buf);
4efd681a 226
ff05a229
SE
227/* XXX RFC1812 4.3.2.5:
228 All other ICMP error messages (Destination Unreachable,
229 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
230 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
231 CONTROL). The IP Precedence value for these error messages MAY be
232 settable.
233 */
4efd681a
SE
234static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
235 uint32_t dest,uint16_t len)
236{
237 struct icmphdr *h;
238
239 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
240 buffer_init(&st->icmp,st->max_start_pad);
241 h=buf_append(&st->icmp,sizeof(*h));
242
243 h->iph.version=4;
244 h->iph.ihl=5;
245 h->iph.tos=0;
246 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
247 h->iph.id=0;
248 h->iph.frag_off=0;
ff05a229 249 h->iph.ttl=255; /* XXX should be configurable */
4efd681a
SE
250 h->iph.protocol=1;
251 h->iph.saddr=htonl(st->secnet_address);
252 h->iph.daddr=htonl(dest);
253 h->iph.check=0;
254 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
255 h->check=0;
256 h->d.unused=0;
257
258 return h;
259}
260
261/* Fill in the ICMP checksum field correctly */
262static void netlink_icmp_csum(struct icmphdr *h)
263{
264 uint32_t len;
265
266 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
267 h->check=0;
268 h->check=ip_csum(&h->type,len);
269}
270
271/* RFC1122:
272 * An ICMP error message MUST NOT be sent as the result of
273 * receiving:
274 *
275 * * an ICMP error message, or
276 *
277 * * a datagram destined to an IP broadcast or IP multicast
278 * address, or
279 *
280 * * a datagram sent as a link-layer broadcast, or
281 *
282 * * a non-initial fragment, or
283 *
284 * * a datagram whose source address does not define a single
285 * host -- e.g., a zero address, a loopback address, a
286 * broadcast address, a multicast address, or a Class E
287 * address.
288 */
289static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
290{
291 struct iphdr *iph;
8dea8d37 292 struct icmphdr *icmph;
4efd681a
SE
293 uint32_t source;
294
295 iph=(struct iphdr *)buf->start;
8dea8d37
SE
296 icmph=(struct icmphdr *)buf->start;
297 if (iph->protocol==1) {
298 switch(icmph->type) {
299 case 3: /* Destination unreachable */
300 case 11: /* Time Exceeded */
301 case 12: /* Parameter Problem */
302 return False;
303 }
304 }
4efd681a
SE
305 /* How do we spot broadcast destination addresses? */
306 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
307 source=ntohl(iph->saddr);
308 if (source==0) return False;
309 if ((source&0xff000000)==0x7f000000) return False;
310 /* How do we spot broadcast source addresses? */
311 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
312 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
313 return True;
314}
315
316/* How much of the original IP packet do we include in its ICMP
317 response? The header plus up to 64 bits. */
ff05a229
SE
318
319/* XXX TODO RFC1812:
3204.3.2.3 Original Message Header
321
322 Historically, every ICMP error message has included the Internet
323 header and at least the first 8 data bytes of the datagram that
324 triggered the error. This is no longer adequate, due to the use of
325 IP-in-IP tunneling and other technologies. Therefore, the ICMP
326 datagram SHOULD contain as much of the original datagram as possible
327 without the length of the ICMP datagram exceeding 576 bytes. The
328 returned IP header (and user data) MUST be identical to that which
329 was received, except that the router is not required to undo any
330 modifications to the IP header that are normally performed in
331 forwarding that were performed before the error was detected (e.g.,
332 decrementing the TTL, or updating options). Note that the
333 requirements of Section [4.3.3.5] supersede this requirement in some
334 cases (i.e., for a Parameter Problem message, if the problem is in a
335 modified field, the router must undo the modification). See Section
336 [4.3.3.5]).
337 */
4efd681a
SE
338static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
339{
340 struct iphdr *iph=(struct iphdr *)buf->start;
341 uint16_t hlen,plen;
342
343 hlen=iph->ihl*4;
344 /* We include the first 8 bytes of the packet data, provided they exist */
345 hlen+=8;
346 plen=ntohs(iph->tot_len);
347 return (hlen>plen?plen:hlen);
348}
349
70dc107b
SE
350/* client indicates where the packet we're constructing a response to
351 comes from. NULL indicates the host. */
4efd681a 352static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
70dc107b 353 struct netlink_client *client,
4efd681a
SE
354 uint8_t type, uint8_t code)
355{
356 struct iphdr *iph=(struct iphdr *)buf->start;
357 struct icmphdr *h;
358 uint16_t len;
359
360 if (netlink_icmp_may_reply(buf)) {
361 len=netlink_icmp_reply_len(buf);
362 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
363 h->type=type; h->code=code;
364 memcpy(buf_append(&st->icmp,len),buf->start,len);
365 netlink_icmp_csum(h);
70dc107b 366 netlink_packet_deliver(st,NULL,&st->icmp);
4efd681a
SE
367 BUF_ASSERT_FREE(&st->icmp);
368 }
369}
370
371/*
372 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
373 * checksum.
ff05a229 374 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
4efd681a
SE
375 *
376 * Is the datagram acceptable?
377 *
378 * 1. Length at least the size of an ip header
379 * 2. Version of 4
380 * 3. Checksums correctly.
381 * 4. Doesn't have a bogus length
382 */
383static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
384{
385 struct iphdr *iph=(struct iphdr *)buf->start;
386 uint32_t len;
387
9d3a4132
SE
388 if (iph->ihl < 5 || iph->version != 4) return False;
389 if (buf->size < iph->ihl*4) return False;
390 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
4efd681a
SE
391 len=ntohs(iph->tot_len);
392 /* There should be no padding */
9d3a4132 393 if (buf->size!=len || len<(iph->ihl<<2)) return False;
4efd681a
SE
394 /* XXX check that there's no source route specified */
395 return True;
396}
397
469fd1d9 398/* Deliver a packet. "client" is the _origin_ of the packet, not its
d3fe100d
SE
399 destination, and is NULL for packets from the host and packets
400 generated internally in secnet. */
70dc107b
SE
401static void netlink_packet_deliver(struct netlink *st,
402 struct netlink_client *client,
403 struct buffer_if *buf)
4efd681a
SE
404{
405 struct iphdr *iph=(struct iphdr *)buf->start;
406 uint32_t dest=ntohl(iph->daddr);
70dc107b
SE
407 uint32_t source=ntohl(iph->saddr);
408 uint32_t best_quality;
469fd1d9
SE
409 bool_t allow_route=False;
410 bool_t found_allowed=False;
70dc107b
SE
411 int best_match;
412 int i;
2fe58dfd 413
4efd681a 414 BUF_ASSERT_USED(buf);
2fe58dfd 415
4efd681a 416 if (dest==st->secnet_address) {
469fd1d9 417 Message(M_ERR,"%s: trying to deliver a packet to myself!\n");
4efd681a 418 BUF_FREE(buf);
2fe58dfd
SE
419 return;
420 }
4efd681a 421
d3fe100d 422 /* Packets from the host (client==NULL) may always be routed. Packets
469fd1d9
SE
423 from clients with the allow_route option will also be routed. */
424 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
425 allow_route=True;
426
427 /* If !allow_route, we check the routing table anyway, and if
428 there's a suitable route with OPT_ALLOWROUTE set we use it. If
429 there's a suitable route, but none with OPT_ALLOWROUTE set then
430 we generate ICMP 'communication with destination network
431 administratively prohibited'. */
432
433 best_quality=0;
434 best_match=-1;
d3fe100d
SE
435 for (i=0; i<st->n_clients; i++) {
436 if (st->routes[i]->up &&
437 ipset_contains_addr(st->routes[i]->networks,dest)) {
469fd1d9
SE
438 /* It's an available route to the correct destination. But is
439 it better than the one we already have? */
440
441 /* If we have already found an allowed route then we don't
442 bother looking at routes we're not allowed to use. If
443 we don't yet have an allowed route we'll consider any. */
444 if (!allow_route && found_allowed) {
d3fe100d 445 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
70dc107b 446 }
469fd1d9 447
d3fe100d 448 if (st->routes[i]->link_quality>best_quality
469fd1d9 449 || best_quality==0) {
d3fe100d 450 best_quality=st->routes[i]->link_quality;
469fd1d9 451 best_match=i;
d3fe100d 452 if (st->routes[i]->options&OPT_ALLOWROUTE)
469fd1d9
SE
453 found_allowed=True;
454 /* If quality isn't perfect we may wish to
455 consider kicking the tunnel with a 0-length
456 packet to prompt it to perform a key setup.
457 Then it'll eventually decide it's up or
458 down. */
459 /* If quality is perfect and we're allowed to use the
460 route we don't need to search any more. */
461 if (best_quality>=MAXIMUM_LINK_QUALITY &&
462 (allow_route || found_allowed)) break;
4efd681a 463 }
70dc107b 464 }
469fd1d9
SE
465 }
466 if (best_match==-1) {
467 /* The packet's not going down a tunnel. It might (ought to)
468 be for the host. */
794f2398 469 if (ipset_contains_addr(st->networks,dest)) {
469fd1d9
SE
470 st->deliver_to_host(st->dst,buf);
471 st->outcount++;
70dc107b
SE
472 BUF_ASSERT_FREE(buf);
473 } else {
469fd1d9
SE
474 string_t s,d;
475 s=ipaddr_to_string(source);
476 d=ipaddr_to_string(dest);
ff05a229 477 Message(M_DEBUG,"%s: don't know where to deliver packet "
469fd1d9
SE
478 "(s=%s, d=%s)\n", st->name, s, d);
479 free(s); free(d);
ff05a229
SE
480 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
481 ICMP_CODE_NET_UNREACHABLE);
70dc107b 482 BUF_FREE(buf);
2fe58dfd 483 }
469fd1d9
SE
484 } else {
485 if (!allow_route &&
d3fe100d 486 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
469fd1d9
SE
487 string_t s,d;
488 s=ipaddr_to_string(source);
489 d=ipaddr_to_string(dest);
490 /* We have a usable route but aren't allowed to use it.
491 Generate ICMP destination unreachable: communication
492 with destination network administratively prohibited */
493 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
494 st->name,s,d);
495 free(s); free(d);
496
ff05a229
SE
497 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
498 ICMP_CODE_NET_PROHIBITED);
469fd1d9
SE
499 BUF_FREE(buf);
500 }
501 if (best_quality>0) {
d3fe100d
SE
502 /* XXX Fragment if required */
503 st->routes[best_match]->deliver(
504 st->routes[best_match]->dst, buf);
505 st->routes[best_match]->outcount++;
469fd1d9
SE
506 BUF_ASSERT_FREE(buf);
507 } else {
508 /* Generate ICMP destination unreachable */
ff05a229
SE
509 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
510 ICMP_CODE_NET_UNREACHABLE); /* client==NULL */
469fd1d9
SE
511 BUF_FREE(buf);
512 }
2fe58dfd 513 }
70dc107b 514 BUF_ASSERT_FREE(buf);
4efd681a
SE
515}
516
70dc107b
SE
517static void netlink_packet_forward(struct netlink *st,
518 struct netlink_client *client,
519 struct buffer_if *buf)
4efd681a
SE
520{
521 struct iphdr *iph=(struct iphdr *)buf->start;
522
523 BUF_ASSERT_USED(buf);
524
525 /* Packet has already been checked */
526 if (iph->ttl<=1) {
527 /* Generate ICMP time exceeded */
ff05a229
SE
528 netlink_icmp_simple(st,buf,client,ICMP_TYPE_TIME_EXCEEDED,
529 ICMP_CODE_TTL_EXCEEDED);
4efd681a
SE
530 BUF_FREE(buf);
531 return;
532 }
533 iph->ttl--;
534 iph->check=0;
535 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
536
70dc107b 537 netlink_packet_deliver(st,client,buf);
4efd681a
SE
538 BUF_ASSERT_FREE(buf);
539}
540
9d3a4132 541/* Deal with packets addressed explicitly to us */
70dc107b
SE
542static void netlink_packet_local(struct netlink *st,
543 struct netlink_client *client,
544 struct buffer_if *buf)
4efd681a
SE
545{
546 struct icmphdr *h;
547
469fd1d9
SE
548 st->localcount++;
549
4efd681a
SE
550 h=(struct icmphdr *)buf->start;
551
552 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
9d3a4132
SE
553 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
554 "ignoring it\n",st->name);
4efd681a
SE
555 BUF_FREE(buf);
556 return;
557 }
558
559 if (h->iph.protocol==1) {
560 /* It's ICMP */
ff05a229 561 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
4efd681a
SE
562 /* ICMP echo-request. Special case: we re-use the buffer
563 to construct the reply. */
ff05a229 564 h->type=ICMP_TYPE_ECHO_REPLY;
4efd681a
SE
565 h->iph.daddr=h->iph.saddr;
566 h->iph.saddr=htonl(st->secnet_address);
ff05a229 567 h->iph.ttl=255;
4efd681a
SE
568 h->iph.check=0;
569 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
570 netlink_icmp_csum(h);
70dc107b 571 netlink_packet_deliver(st,NULL,buf);
4efd681a
SE
572 return;
573 }
574 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
575 } else {
576 /* Send ICMP protocol unreachable */
ff05a229
SE
577 netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
578 ICMP_CODE_PROTOCOL_UNREACHABLE);
4efd681a
SE
579 BUF_FREE(buf);
580 return;
581 }
582
583 BUF_FREE(buf);
584}
585
9d3a4132
SE
586/* If cid==NULL packet is from host, otherwise cid specifies which tunnel
587 it came from. */
469fd1d9
SE
588static void netlink_incoming(struct netlink *st, struct netlink_client *client,
589 struct buffer_if *buf)
4efd681a 590{
4efd681a
SE
591 uint32_t source,dest;
592 struct iphdr *iph;
593
594 BUF_ASSERT_USED(buf);
595 if (!netlink_check(st,buf)) {
9d3a4132
SE
596 Message(M_WARNING,"%s: bad IP packet from %s\n",
597 st->name,client?client->name:"host");
4efd681a
SE
598 BUF_FREE(buf);
599 return;
600 }
601 iph=(struct iphdr *)buf->start;
602
603 source=ntohl(iph->saddr);
604 dest=ntohl(iph->daddr);
605
d3fe100d
SE
606 /* Check source. If we don't like the source, there's no point
607 generating ICMP because we won't know how to get it to the
608 source of the packet. */
9d3a4132 609 if (client) {
c6f79b17
SE
610 /* Check that the packet source is appropriate for the tunnel
611 it came down */
794f2398 612 if (!ipset_contains_addr(client->networks,source)) {
9d3a4132
SE
613 string_t s,d;
614 s=ipaddr_to_string(source);
615 d=ipaddr_to_string(dest);
616 Message(M_WARNING,"%s: packet from tunnel %s with bad "
617 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
618 free(s); free(d);
619 BUF_FREE(buf);
620 return;
621 }
622 } else {
c6f79b17
SE
623 /* Check that the packet originates in our configured local
624 network, and hasn't been forwarded from elsewhere or
625 generated with the wrong source address */
794f2398 626 if (!ipset_contains_addr(st->networks,source)) {
9d3a4132
SE
627 string_t s,d;
628 s=ipaddr_to_string(source);
629 d=ipaddr_to_string(dest);
630 Message(M_WARNING,"%s: outgoing packet with bad source address "
631 "(s=%s,d=%s)\n",st->name,s,d);
632 free(s); free(d);
633 BUF_FREE(buf);
634 return;
635 }
4efd681a 636 }
c6f79b17 637
794f2398
SE
638 /* If this is a point-to-point device we don't examine the
639 destination address at all; we blindly send it down our
640 one-and-only registered tunnel, or to the host, depending on
d3fe100d
SE
641 where it came from. It's up to external software to check
642 address validity and generate ICMP, etc. */
c6f79b17
SE
643 if (st->ptp) {
644 if (client) {
469fd1d9 645 st->deliver_to_host(st->dst,buf);
c6f79b17 646 } else {
469fd1d9 647 st->clients->deliver(st->clients->dst,buf);
c6f79b17
SE
648 }
649 BUF_ASSERT_FREE(buf);
650 return;
651 }
652
d3fe100d
SE
653 /* st->secnet_address needs checking before matching destination
654 addresses */
2fe58dfd 655 if (dest==st->secnet_address) {
9d3a4132 656 netlink_packet_local(st,client,buf);
4efd681a 657 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
658 return;
659 }
70dc107b 660 netlink_packet_forward(st,client,buf);
4efd681a
SE
661 BUF_ASSERT_FREE(buf);
662}
663
469fd1d9
SE
664static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
665{
666 struct netlink_client *c=sst;
667 struct netlink *st=c->nst;
668
669 netlink_incoming(st,c,buf);
670}
671
672static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
673{
674 struct netlink *st=sst;
675
676 netlink_incoming(st,NULL,buf);
677}
678
d3fe100d 679static void netlink_set_quality(void *sst, uint32_t quality)
4efd681a 680{
d3fe100d
SE
681 struct netlink_client *c=sst;
682 struct netlink *st=c->nst;
4efd681a 683
d3fe100d
SE
684 c->link_quality=quality;
685 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
686 if (c->options&OPT_SOFTROUTE) {
687 st->set_routes(st->dst,c);
4efd681a 688 }
4efd681a
SE
689}
690
d3fe100d
SE
691static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
692 struct subnet_list *snets)
4efd681a 693{
d3fe100d
SE
694 uint32_t i;
695 string_t net;
4efd681a 696
d3fe100d
SE
697 for (i=0; i<snets->entries; i++) {
698 net=subnet_to_string(snets->list[i]);
699 Message(loglevel,"%s ",net);
700 free(net);
9d3a4132 701 }
4efd681a
SE
702}
703
042a8da9 704static void netlink_dump_routes(struct netlink *st, bool_t requested)
9d3a4132
SE
705{
706 int i;
707 string_t net;
042a8da9 708 uint32_t c=M_INFO;
9d3a4132 709
042a8da9 710 if (requested) c=M_WARNING;
469fd1d9
SE
711 if (st->ptp) {
712 net=ipaddr_to_string(st->secnet_address);
713 Message(c,"%s: point-to-point (remote end is %s); routes:\n",
714 st->name, net);
9d3a4132 715 free(net);
d3fe100d 716 netlink_output_subnets(st,c,st->clients->subnets);
469fd1d9
SE
717 Message(c,"\n");
718 } else {
719 Message(c,"%s: routing table:\n",st->name);
d3fe100d
SE
720 for (i=0; i<st->n_clients; i++) {
721 netlink_output_subnets(st,c,st->routes[i]->subnets);
ff05a229
SE
722 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
723 "quality %d,use %d)\n",
d3fe100d 724 st->routes[i]->name,
ff05a229
SE
725 st->routes[i]->up?"up":"down",
726 st->routes[i]->mtu,
d3fe100d
SE
727 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
728 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
d3fe100d
SE
729 st->routes[i]->link_quality,
730 st->routes[i]->outcount);
469fd1d9
SE
731 }
732 net=ipaddr_to_string(st->secnet_address);
733 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
734 net,st->name,st->localcount);
9d3a4132 735 free(net);
794f2398
SE
736 for (i=0; i<st->subnets->entries; i++) {
737 net=subnet_to_string(st->subnets->list[i]);
738 Message(c,"%s ",net);
469fd1d9
SE
739 free(net);
740 }
794f2398
SE
741 if (i>0)
742 Message(c,"-> host (use %d)\n",st->outcount);
9d3a4132
SE
743 }
744}
745
d3fe100d
SE
746/* ap is a pointer to a member of the routes array */
747static int netlink_compare_client_priority(const void *ap, const void *bp)
70dc107b 748{
d3fe100d
SE
749 const struct netlink_client *const*a=ap;
750 const struct netlink_client *const*b=bp;
70dc107b 751
d3fe100d
SE
752 if ((*a)->priority==(*b)->priority) return 0;
753 if ((*a)->priority<(*b)->priority) return 1;
70dc107b
SE
754 return -1;
755}
756
757static void netlink_phase_hook(void *sst, uint32_t new_phase)
758{
759 struct netlink *st=sst;
760 struct netlink_client *c;
d3fe100d 761 uint32_t i;
70dc107b
SE
762
763 /* All the networks serviced by the various tunnels should now
764 * have been registered. We build a routing table by sorting the
d3fe100d
SE
765 * clients by priority. */
766 st->routes=safe_malloc(st->n_clients*sizeof(*st->routes),
70dc107b
SE
767 "netlink_phase_hook");
768 /* Fill the table */
769 i=0;
d3fe100d
SE
770 for (c=st->clients; c; c=c->next)
771 st->routes[i++]=c;
772 /* Sort the table in descending order of priority */
773 qsort(st->routes,st->n_clients,sizeof(*st->routes),
774 netlink_compare_client_priority);
9d3a4132 775
042a8da9
SE
776 netlink_dump_routes(st,False);
777}
778
779static void netlink_signal_handler(void *sst, int signum)
780{
781 struct netlink *st=sst;
782 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
783 netlink_dump_routes(st,True);
70dc107b
SE
784}
785
794f2398
SE
786static void netlink_inst_output_config(void *sst, struct buffer_if *buf)
787{
788/* struct netlink_client *c=sst; */
789/* struct netlink *st=c->nst; */
790
791 /* For now we don't output anything */
792 BUF_ASSERT_USED(buf);
793}
794
795static bool_t netlink_inst_check_config(void *sst, struct buffer_if *buf)
796{
797/* struct netlink_client *c=sst; */
798/* struct netlink *st=c->nst; */
799
800 BUF_ASSERT_USED(buf);
801 /* We need to eat all of the configuration information from the buffer
802 for backward compatibility. */
803 buf->size=0;
804 return True;
805}
806
d3fe100d
SE
807static void netlink_inst_set_mtu(void *sst, uint32_t new_mtu)
808{
809 struct netlink_client *c=sst;
810
811 c->mtu=new_mtu;
812}
813
469fd1d9
SE
814static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
815 void *dst, uint32_t max_start_pad,
816 uint32_t max_end_pad)
817{
818 struct netlink_client *c=sst;
819 struct netlink *st=c->nst;
820
821 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
822 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
823 c->deliver=deliver;
824 c->dst=dst;
825}
826
827static struct flagstr netlink_option_table[]={
828 { "soft", OPT_SOFTROUTE },
829 { "allow-route", OPT_ALLOWROUTE },
830 { NULL, 0}
831};
832/* This is the routine that gets called when the closure that's
833 returned by an invocation of a netlink device closure (eg. tun,
834 userv-ipif) is invoked. It's used to create routes and pass in
835 information about them; the closure it returns is used by site
836 code. */
837static closure_t *netlink_inst_create(struct netlink *st,
838 struct cloc loc, dict_t *dict)
839{
840 struct netlink_client *c;
841 string_t name;
794f2398 842 struct ipset *networks;
d3fe100d 843 uint32_t options,priority,mtu;
794f2398 844 list_t *l;
469fd1d9
SE
845
846 name=dict_read_string(dict, "name", True, st->name, loc);
847
794f2398
SE
848 l=dict_lookup(dict,"routes");
849 if (!l)
850 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
851 networks=string_list_to_ipset(l,loc,st->name,"routes");
469fd1d9
SE
852 options=string_list_to_word(dict_lookup(dict,"options"),
853 netlink_option_table,st->name);
854
d3fe100d
SE
855 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
856 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
857
858 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
469fd1d9
SE
859 cfgfatal(loc,st->name,"this netlink device does not support "
860 "soft routes.\n");
861 return NULL;
862 }
863
864 if (options&OPT_SOFTROUTE) {
865 /* XXX for now we assume that soft routes require root privilege;
866 this may not always be true. The device driver can tell us. */
867 require_root_privileges=True;
868 require_root_privileges_explanation="netlink: soft routes";
869 if (st->ptp) {
870 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
871 "soft routes.\n");
872 return NULL;
873 }
874 }
875
794f2398
SE
876 /* Check that nets are a subset of st->remote_networks;
877 refuse to register if they are not. */
878 if (!ipset_is_subset(st->remote_networks,networks)) {
879 cfgfatal(loc,st->name,"routes are not allowed\n");
469fd1d9
SE
880 return NULL;
881 }
882
883 c=safe_malloc(sizeof(*c),"netlink_inst_create");
884 c->cl.description=name;
885 c->cl.type=CL_NETLINK;
886 c->cl.apply=NULL;
887 c->cl.interface=&c->ops;
888 c->ops.st=c;
889 c->ops.reg=netlink_inst_reg;
890 c->ops.deliver=netlink_inst_incoming;
891 c->ops.set_quality=netlink_set_quality;
794f2398
SE
892 c->ops.output_config=netlink_inst_output_config;
893 c->ops.check_config=netlink_inst_check_config;
d3fe100d 894 c->ops.set_mtu=netlink_inst_set_mtu;
469fd1d9
SE
895 c->nst=st;
896
897 c->networks=networks;
794f2398 898 c->subnets=ipset_to_subnet_list(networks);
d3fe100d 899 c->priority=priority;
469fd1d9
SE
900 c->deliver=NULL;
901 c->dst=NULL;
902 c->name=name;
469fd1d9 903 c->link_quality=LINK_QUALITY_DOWN;
d3fe100d
SE
904 c->mtu=mtu?mtu:st->mtu;
905 c->options=options;
906 c->outcount=0;
907 c->up=False;
908 c->kup=False;
469fd1d9
SE
909 c->next=st->clients;
910 st->clients=c;
d3fe100d 911 st->n_clients++;
469fd1d9
SE
912
913 return &c->cl;
914}
915
916static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
917 dict_t *context, list_t *args)
918{
919 struct netlink *st=self->interface;
920
921 dict_t *dict;
922 item_t *item;
923 closure_t *cl;
924
469fd1d9
SE
925 item=list_elem(args,0);
926 if (!item || item->type!=t_dict) {
927 cfgfatal(loc,st->name,"must have a dictionary argument\n");
928 }
929 dict=item->data.dict;
930
931 cl=netlink_inst_create(st,loc,dict);
932
933 return new_closure(cl);
934}
935
9d3a4132
SE
936netlink_deliver_fn *netlink_init(struct netlink *st,
937 void *dst, struct cloc loc,
938 dict_t *dict, string_t description,
d3fe100d 939 netlink_route_fn *set_routes,
9d3a4132 940 netlink_deliver_fn *to_host)
4efd681a 941{
c6f79b17 942 item_t *sa, *ptpa;
794f2398 943 list_t *l;
c6f79b17 944
4efd681a
SE
945 st->dst=dst;
946 st->cl.description=description;
469fd1d9
SE
947 st->cl.type=CL_PURE;
948 st->cl.apply=netlink_inst_apply;
949 st->cl.interface=st;
4efd681a
SE
950 st->max_start_pad=0;
951 st->max_end_pad=0;
952 st->clients=NULL;
d3fe100d
SE
953 st->routes=NULL;
954 st->n_clients=0;
955 st->set_routes=set_routes;
4efd681a
SE
956 st->deliver_to_host=to_host;
957
794f2398 958 st->name=dict_read_string(dict,"name",False,description,loc);
4efd681a 959 if (!st->name) st->name=description;
794f2398
SE
960 l=dict_lookup(dict,"networks");
961 if (l)
962 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
963 else {
964 Message(M_WARNING,"%s: no local networks (parameter \"networks\") "
965 "defined\n",st->name);
966 st->networks=ipset_new();
967 }
968 l=dict_lookup(dict,"remote-networks");
969 if (l) {
970 st->remote_networks=string_list_to_ipset(l,loc,st->name,
971 "remote-networks");
972 } else {
973 struct ipset *empty;
974 empty=ipset_new();
975 st->remote_networks=ipset_complement(empty);
976 ipset_free(empty);
977 }
978
c6f79b17 979 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
469fd1d9 980 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
c6f79b17
SE
981 if (sa && ptpa) {
982 cfgfatal(loc,st->name,"you may not specify secnet-address and "
983 "ptp-address in the same netlink device\n");
984 }
985 if (!(sa || ptpa)) {
986 cfgfatal(loc,st->name,"you must specify secnet-address or "
987 "ptp-address for this netlink device\n");
988 }
989 if (sa) {
794f2398 990 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
c6f79b17
SE
991 st->ptp=False;
992 } else {
794f2398 993 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
c6f79b17
SE
994 st->ptp=True;
995 }
d3fe100d
SE
996 /* To be strictly correct we could subtract secnet_address from
997 networks here. It shouldn't make any practical difference,
794f2398
SE
998 though, and will make the route dump look complicated... */
999 st->subnets=ipset_to_subnet_list(st->networks);
4efd681a
SE
1000 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1001 buffer_new(&st->icmp,ICMP_BUFSIZE);
469fd1d9
SE
1002 st->outcount=0;
1003 st->localcount=0;
70dc107b
SE
1004
1005 add_hook(PHASE_SETUP,netlink_phase_hook,st);
042a8da9 1006 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
4efd681a 1007
469fd1d9
SE
1008 /* If we're point-to-point then we return a CL_NETLINK directly,
1009 rather than a CL_NETLINK_OLD or pure closure (depending on
1010 compatibility). This CL_NETLINK is for our one and only
1011 client. Our cl.apply function is NULL. */
1012 if (st->ptp) {
1013 closure_t *cl;
1014 cl=netlink_inst_create(st,loc,dict);
1015 st->cl=*cl;
1016 }
1017 return netlink_dev_incoming;
2fe58dfd
SE
1018}
1019
9d3a4132 1020/* No connection to the kernel at all... */
2fe58dfd 1021
9d3a4132 1022struct null {
4efd681a 1023 struct netlink nl;
4efd681a 1024};
2fe58dfd 1025
d3fe100d 1026static bool_t null_set_route(void *sst, struct netlink_client *routes)
4efd681a 1027{
9d3a4132 1028 struct null *st=sst;
d3fe100d
SE
1029
1030 if (routes->up!=routes->kup) {
1031 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1032 st->nl.name,routes->name,
1033 routes->up?"up":"down");
1034 routes->kup=routes->up;
9d3a4132 1035 return True;
2fe58dfd 1036 }
9d3a4132 1037 return False;
2fe58dfd 1038}
9d3a4132 1039
469fd1d9 1040static void null_deliver(void *sst, struct buffer_if *buf)
2fe58dfd
SE
1041{
1042 return;
1043}
1044
1045static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1046 list_t *args)
1047{
1048 struct null *st;
4efd681a
SE
1049 item_t *item;
1050 dict_t *dict;
2fe58dfd 1051
4efd681a 1052 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1053
4efd681a
SE
1054 item=list_elem(args,0);
1055 if (!item || item->type!=t_dict)
1056 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1057
1058 dict=item->data.dict;
1059
9d3a4132
SE
1060 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1061 null_deliver);
4efd681a
SE
1062
1063 return new_closure(&st->nl.cl);
2fe58dfd
SE
1064}
1065
1066init_module netlink_module;
1067void netlink_module(dict_t *dict)
1068{
4efd681a 1069 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1070}