netlink: Provide MDEBUG macro
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* See RFCs 791, 792, 1123 and 1812 */
4
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14 /* Points to note from RFC1812 (which may require changes in this
15 file):
16
17 3.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
29 4.2.1 A router SHOULD count datagrams discarded.
30
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
33 their use.
34
35 5.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
42 5.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
54 5.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98 */
99
100 #include <string.h>
101 #include <assert.h>
102 #include <limits.h>
103 #include "secnet.h"
104 #include "util.h"
105 #include "ipaddr.h"
106 #include "netlink.h"
107 #include "process.h"
108
109 #ifdef NETLINK_DEBUG
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
114
115 #define ICMP_TYPE_ECHO_REPLY 0
116
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
122
123 #define ICMP_TYPE_ECHO_REQUEST 8
124
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
127
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
130 {
131 register uint32_t sum=0;
132
133 while (count>1) {
134 sum+=ntohs(*(uint16_t *)iph);
135 iph+=2;
136 count-=2;
137 }
138 if(count>0)
139 sum+=*(uint8_t *)iph;
140 while (sum>>16)
141 sum=(sum&0xffff)+(sum>>16);
142 return htons(~sum);
143 }
144
145 #ifdef i386
146 /*
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
149 *
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
151 * Arnt Gulbrandsen.
152 */
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
154 uint32_t sum;
155
156 __asm__ __volatile__(
157 "movl (%1), %0 ;\n"
158 "subl $4, %2 ;\n"
159 "jbe 2f ;\n"
160 "addl 4(%1), %0 ;\n"
161 "adcl 8(%1), %0 ;\n"
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
164 "lea 4(%1), %1 ;\n"
165 "decl %2 ;\n"
166 "jne 1b ;\n"
167 "adcl $0, %0 ;\n"
168 "movl %0, %2 ;\n"
169 "shrl $16, %0 ;\n"
170 "addw %w2, %w0 ;\n"
171 "adcl $0, %0 ;\n"
172 "notl %0 ;\n"
173 "2: ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
179 : "memory");
180 return sum;
181 }
182 #else
183 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
184 {
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
187 }
188 #endif
189
190 struct iphdr {
191 #if defined (WORDS_BIGENDIAN)
192 uint8_t version:4,
193 ihl:4;
194 #else
195 uint8_t ihl:4,
196 version:4;
197 #endif
198 uint8_t tos;
199 uint16_t tot_len;
200 uint16_t id;
201 uint16_t frag;
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
206 uint8_t ttl;
207 uint8_t protocol;
208 uint16_t check;
209 uint32_t saddr;
210 uint32_t daddr;
211 /* The options start here. */
212 };
213
214 struct icmphdr {
215 struct iphdr iph;
216 uint8_t type;
217 uint8_t code;
218 uint16_t check;
219 union icmpinfofield {
220 uint32_t unused;
221 struct {
222 uint8_t pointer;
223 uint8_t unused1;
224 uint16_t unused2;
225 } pprob;
226 uint32_t gwaddr;
227 struct {
228 uint16_t id;
229 uint16_t seq;
230 } echo;
231 } d;
232 };
233
234 static const union icmpinfofield icmp_noinfo;
235
236 static void netlink_packet_deliver(struct netlink *st,
237 struct netlink_client *client,
238 struct buffer_if *buf);
239
240 /* XXX RFC1812 4.3.2.5:
241 All other ICMP error messages (Destination Unreachable,
242 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
243 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
244 CONTROL). The IP Precedence value for these error messages MAY be
245 settable.
246 */
247 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
248 uint32_t dest,uint16_t len)
249 {
250 struct icmphdr *h;
251
252 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
253 buffer_init(&st->icmp,calculate_max_start_pad());
254 h=buf_append(&st->icmp,sizeof(*h));
255
256 h->iph.version=4;
257 h->iph.ihl=5;
258 h->iph.tos=0;
259 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
260 h->iph.id=0;
261 h->iph.frag=0;
262 h->iph.ttl=255; /* XXX should be configurable */
263 h->iph.protocol=1;
264 h->iph.saddr=htonl(st->secnet_address);
265 h->iph.daddr=htonl(dest);
266 h->iph.check=0;
267 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
268 h->check=0;
269 h->d.unused=0;
270
271 return h;
272 }
273
274 /* Fill in the ICMP checksum field correctly */
275 static void netlink_icmp_csum(struct icmphdr *h)
276 {
277 int32_t len;
278
279 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
280 h->check=0;
281 h->check=ip_csum(&h->type,len);
282 }
283
284 /* RFC1122:
285 * An ICMP error message MUST NOT be sent as the result of
286 * receiving:
287 *
288 * * an ICMP error message, or
289 *
290 * * a datagram destined to an IP broadcast or IP multicast
291 * address, or
292 *
293 * * a datagram sent as a link-layer broadcast, or
294 *
295 * * a non-initial fragment, or
296 *
297 * * a datagram whose source address does not define a single
298 * host -- e.g., a zero address, a loopback address, a
299 * broadcast address, a multicast address, or a Class E
300 * address.
301 */
302 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
303 {
304 struct iphdr *iph;
305 struct icmphdr *icmph;
306 uint32_t source;
307
308 if (buf->size < (int)sizeof(struct icmphdr)) return False;
309 iph=(struct iphdr *)buf->start;
310 icmph=(struct icmphdr *)buf->start;
311 if (iph->protocol==1) {
312 switch(icmph->type) {
313 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
314 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
315 * Deprecated, reserved, unassigned and experimental
316 * options are treated as not safe to reply to.
317 */
318 case 0: /* Echo Reply */
319 case 8: /* Echo */
320 case 13: /* Timestamp */
321 case 14: /* Timestamp Reply */
322 return True;
323 default:
324 return False;
325 }
326 }
327 /* How do we spot broadcast destination addresses? */
328 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
329 source=ntohl(iph->saddr);
330 if (source==0) return False;
331 if ((source&0xff000000)==0x7f000000) return False;
332 /* How do we spot broadcast source addresses? */
333 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
334 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
335 return True;
336 }
337
338 /* How much of the original IP packet do we include in its ICMP
339 response? The header plus up to 64 bits. */
340
341 /* XXX TODO RFC1812:
342 4.3.2.3 Original Message Header
343
344 Historically, every ICMP error message has included the Internet
345 header and at least the first 8 data bytes of the datagram that
346 triggered the error. This is no longer adequate, due to the use of
347 IP-in-IP tunneling and other technologies. Therefore, the ICMP
348 datagram SHOULD contain as much of the original datagram as possible
349 without the length of the ICMP datagram exceeding 576 bytes. The
350 returned IP header (and user data) MUST be identical to that which
351 was received, except that the router is not required to undo any
352 modifications to the IP header that are normally performed in
353 forwarding that were performed before the error was detected (e.g.,
354 decrementing the TTL, or updating options). Note that the
355 requirements of Section [4.3.3.5] supersede this requirement in some
356 cases (i.e., for a Parameter Problem message, if the problem is in a
357 modified field, the router must undo the modification). See Section
358 [4.3.3.5]).
359 */
360 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
361 {
362 if (buf->size < (int)sizeof(struct iphdr)) return 0;
363 struct iphdr *iph=(struct iphdr *)buf->start;
364 uint16_t hlen,plen;
365
366 hlen=iph->ihl*4;
367 /* We include the first 8 bytes of the packet data, provided they exist */
368 hlen+=8;
369 plen=ntohs(iph->tot_len);
370 return (hlen>plen?plen:hlen);
371 }
372
373 /* client indicates where the packet we're constructing a response to
374 comes from. NULL indicates the host. */
375 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
376 uint8_t type, uint8_t code,
377 union icmpinfofield info)
378 {
379 struct icmphdr *h;
380 uint16_t len;
381
382 if (netlink_icmp_may_reply(buf)) {
383 struct iphdr *iph=(struct iphdr *)buf->start;
384 len=netlink_icmp_reply_len(buf);
385 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
386 h->type=type; h->code=code; h->d=info;
387 memcpy(buf_append(&st->icmp,len),buf->start,len);
388 netlink_icmp_csum(h);
389 netlink_packet_deliver(st,NULL,&st->icmp);
390 BUF_ASSERT_FREE(&st->icmp);
391 }
392 }
393
394 /*
395 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
396 * checksum.
397 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
398 *
399 * Is the datagram acceptable?
400 *
401 * 1. Length at least the size of an ip header
402 * 2. Version of 4
403 * 3. Checksums correctly.
404 * 4. Doesn't have a bogus length
405 */
406 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
407 char *errmsgbuf, int errmsgbuflen)
408 {
409 #define BAD(...) do{ \
410 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
411 return False; \
412 }while(0)
413
414 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
415 struct iphdr *iph=(struct iphdr *)buf->start;
416 int32_t len;
417
418 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
419 if (iph->version != 4) BAD("version %u",iph->version);
420 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
421 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
422 len=ntohs(iph->tot_len);
423 /* There should be no padding */
424 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
425 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
426 /* XXX check that there's no source route specified */
427 return True;
428
429 #undef BAD
430 }
431
432 /* Deliver a packet _to_ client; used after we have decided
433 * what to do with it (and just to check that the client has
434 * actually registered a delivery function with us). */
435 static void netlink_client_deliver(struct netlink *st,
436 struct netlink_client *client,
437 uint32_t source, uint32_t dest,
438 struct buffer_if *buf)
439 {
440 if (!client->deliver) {
441 string_t s,d;
442 s=ipaddr_to_string(source);
443 d=ipaddr_to_string(dest);
444 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
445 st->name,s,d);
446 free(s); free(d);
447 BUF_FREE(buf);
448 return;
449 }
450 client->deliver(client->dst, buf);
451 client->outcount++;
452 }
453
454 /* Deliver a packet to the host; used after we have decided that that
455 * is what to do with it. */
456 static void netlink_host_deliver(struct netlink *st,
457 uint32_t source, uint32_t dest,
458 struct buffer_if *buf)
459 {
460 st->deliver_to_host(st->dst,buf);
461 st->outcount++;
462 }
463
464 /* Deliver a packet. "client" is the _origin_ of the packet, not its
465 destination, and is NULL for packets from the host and packets
466 generated internally in secnet. */
467 static void netlink_packet_deliver(struct netlink *st,
468 struct netlink_client *client,
469 struct buffer_if *buf)
470 {
471 if (buf->size < (int)sizeof(struct iphdr)) {
472 Message(M_ERR,"%s: trying to deliver a too-short packet"
473 " from %s!\n",st->name, client?client->name:"(local)");
474 BUF_FREE(buf);
475 return;
476 }
477
478 struct iphdr *iph=(struct iphdr *)buf->start;
479 uint32_t dest=ntohl(iph->daddr);
480 uint32_t source=ntohl(iph->saddr);
481 uint32_t best_quality;
482 bool_t allow_route=False;
483 bool_t found_allowed=False;
484 int best_match;
485 int i;
486
487 BUF_ASSERT_USED(buf);
488
489 if (dest==st->secnet_address) {
490 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
491 BUF_FREE(buf);
492 return;
493 }
494
495 /* Packets from the host (client==NULL) may always be routed. Packets
496 from clients with the allow_route option will also be routed. */
497 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
498 allow_route=True;
499
500 /* If !allow_route, we check the routing table anyway, and if
501 there's a suitable route with OPT_ALLOWROUTE set we use it. If
502 there's a suitable route, but none with OPT_ALLOWROUTE set then
503 we generate ICMP 'communication with destination network
504 administratively prohibited'. */
505
506 best_quality=0;
507 best_match=-1;
508 for (i=0; i<st->n_clients; i++) {
509 if (st->routes[i]->up &&
510 ipset_contains_addr(st->routes[i]->networks,dest)) {
511 /* It's an available route to the correct destination. But is
512 it better than the one we already have? */
513
514 /* If we have already found an allowed route then we don't
515 bother looking at routes we're not allowed to use. If
516 we don't yet have an allowed route we'll consider any. */
517 if (!allow_route && found_allowed) {
518 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
519 }
520
521 if (st->routes[i]->link_quality>best_quality
522 || best_quality==0) {
523 best_quality=st->routes[i]->link_quality;
524 best_match=i;
525 if (st->routes[i]->options&OPT_ALLOWROUTE)
526 found_allowed=True;
527 /* If quality isn't perfect we may wish to
528 consider kicking the tunnel with a 0-length
529 packet to prompt it to perform a key setup.
530 Then it'll eventually decide it's up or
531 down. */
532 /* If quality is perfect and we're allowed to use the
533 route we don't need to search any more. */
534 if (best_quality>=MAXIMUM_LINK_QUALITY &&
535 (allow_route || found_allowed)) break;
536 }
537 }
538 }
539 if (best_match==-1) {
540 /* The packet's not going down a tunnel. It might (ought to)
541 be for the host. */
542 if (ipset_contains_addr(st->networks,dest)) {
543 netlink_host_deliver(st,source,dest,buf);
544 BUF_ASSERT_FREE(buf);
545 } else {
546 string_t s,d;
547 s=ipaddr_to_string(source);
548 d=ipaddr_to_string(dest);
549 Message(M_DEBUG,"%s: don't know where to deliver packet "
550 "(s=%s, d=%s)\n", st->name, s, d);
551 free(s); free(d);
552 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
553 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
554 BUF_FREE(buf);
555 }
556 } else {
557 if (!allow_route &&
558 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
559 string_t s,d;
560 s=ipaddr_to_string(source);
561 d=ipaddr_to_string(dest);
562 /* We have a usable route but aren't allowed to use it.
563 Generate ICMP destination unreachable: communication
564 with destination network administratively prohibited */
565 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
566 st->name,s,d);
567 free(s); free(d);
568
569 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
570 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
571 BUF_FREE(buf);
572 } else {
573 if (best_quality>0) {
574 /* XXX Fragment if required */
575 netlink_client_deliver(st,st->routes[best_match],
576 source,dest,buf);
577 BUF_ASSERT_FREE(buf);
578 } else {
579 /* Generate ICMP destination unreachable */
580 netlink_icmp_simple(st,buf,
581 ICMP_TYPE_UNREACHABLE,
582 ICMP_CODE_NET_UNREACHABLE,
583 icmp_noinfo);
584 BUF_FREE(buf);
585 }
586 }
587 }
588 BUF_ASSERT_FREE(buf);
589 }
590
591 static void netlink_packet_forward(struct netlink *st,
592 struct netlink_client *client,
593 struct buffer_if *buf)
594 {
595 if (buf->size < (int)sizeof(struct iphdr)) return;
596 struct iphdr *iph=(struct iphdr *)buf->start;
597
598 BUF_ASSERT_USED(buf);
599
600 /* Packet has already been checked */
601 if (iph->ttl<=1) {
602 /* Generate ICMP time exceeded */
603 netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
604 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
605 BUF_FREE(buf);
606 return;
607 }
608 iph->ttl--;
609 iph->check=0;
610 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
611
612 netlink_packet_deliver(st,client,buf);
613 BUF_ASSERT_FREE(buf);
614 }
615
616 /* Deal with packets addressed explicitly to us */
617 static void netlink_packet_local(struct netlink *st,
618 struct netlink_client *client,
619 struct buffer_if *buf)
620 {
621 struct icmphdr *h;
622
623 st->localcount++;
624
625 if (buf->size < (int)sizeof(struct icmphdr)) {
626 Message(M_WARNING,"%s: short packet addressed to secnet; "
627 "ignoring it\n",st->name);
628 BUF_FREE(buf);
629 return;
630 }
631 h=(struct icmphdr *)buf->start;
632
633 if ((ntohs(h->iph.frag)&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
634 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
635 "ignoring it\n",st->name);
636 BUF_FREE(buf);
637 return;
638 }
639
640 if (h->iph.protocol==1) {
641 /* It's ICMP */
642 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
643 /* ICMP echo-request. Special case: we re-use the buffer
644 to construct the reply. */
645 h->type=ICMP_TYPE_ECHO_REPLY;
646 h->iph.daddr=h->iph.saddr;
647 h->iph.saddr=htonl(st->secnet_address);
648 h->iph.ttl=255;
649 h->iph.check=0;
650 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
651 netlink_icmp_csum(h);
652 netlink_packet_deliver(st,NULL,buf);
653 return;
654 }
655 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
656 } else {
657 /* Send ICMP protocol unreachable */
658 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
659 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
660 BUF_FREE(buf);
661 return;
662 }
663
664 BUF_FREE(buf);
665 }
666
667 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
668 it came from. */
669 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
670 struct buffer_if *buf)
671 {
672 uint32_t source,dest;
673 struct iphdr *iph;
674 char errmsgbuf[50];
675 const char *sourcedesc=client?client->name:"host";
676
677 BUF_ASSERT_USED(buf);
678
679 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
680 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
681 st->name,sourcedesc,
682 errmsgbuf);
683 BUF_FREE(buf);
684 return;
685 }
686 assert(buf->size >= (int)sizeof(struct icmphdr));
687 iph=(struct iphdr *)buf->start;
688
689 source=ntohl(iph->saddr);
690 dest=ntohl(iph->daddr);
691
692 /* Check source. If we don't like the source, there's no point
693 generating ICMP because we won't know how to get it to the
694 source of the packet. */
695 if (client) {
696 /* Check that the packet source is appropriate for the tunnel
697 it came down */
698 if (!ipset_contains_addr(client->networks,source)) {
699 string_t s,d;
700 s=ipaddr_to_string(source);
701 d=ipaddr_to_string(dest);
702 Message(M_WARNING,"%s: packet from tunnel %s with bad "
703 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
704 free(s); free(d);
705 BUF_FREE(buf);
706 return;
707 }
708 } else {
709 /* Check that the packet originates in our configured local
710 network, and hasn't been forwarded from elsewhere or
711 generated with the wrong source address */
712 if (!ipset_contains_addr(st->networks,source)) {
713 string_t s,d;
714 s=ipaddr_to_string(source);
715 d=ipaddr_to_string(dest);
716 Message(M_WARNING,"%s: outgoing packet with bad source address "
717 "(s=%s,d=%s)\n",st->name,s,d);
718 free(s); free(d);
719 BUF_FREE(buf);
720 return;
721 }
722 }
723
724 /* If this is a point-to-point device we don't examine the
725 destination address at all; we blindly send it down our
726 one-and-only registered tunnel, or to the host, depending on
727 where it came from. It's up to external software to check
728 address validity and generate ICMP, etc. */
729 if (st->ptp) {
730 if (client) {
731 netlink_host_deliver(st,source,dest,buf);
732 } else {
733 netlink_client_deliver(st,st->clients,source,dest,buf);
734 }
735 BUF_ASSERT_FREE(buf);
736 return;
737 }
738
739 /* st->secnet_address needs checking before matching destination
740 addresses */
741 if (dest==st->secnet_address) {
742 netlink_packet_local(st,client,buf);
743 BUF_ASSERT_FREE(buf);
744 return;
745 }
746 netlink_packet_forward(st,client,buf);
747 BUF_ASSERT_FREE(buf);
748 }
749
750 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
751 {
752 struct netlink_client *c=sst;
753 struct netlink *st=c->nst;
754
755 netlink_incoming(st,c,buf);
756 }
757
758 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
759 {
760 struct netlink *st=sst;
761
762 netlink_incoming(st,NULL,buf);
763 }
764
765 static void netlink_set_quality(void *sst, uint32_t quality)
766 {
767 struct netlink_client *c=sst;
768 struct netlink *st=c->nst;
769
770 c->link_quality=quality;
771 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
772 if (c->options&OPT_SOFTROUTE) {
773 st->set_routes(st->dst,c);
774 }
775 }
776
777 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
778 struct subnet_list *snets)
779 {
780 int32_t i;
781 string_t net;
782
783 for (i=0; i<snets->entries; i++) {
784 net=subnet_to_string(snets->list[i]);
785 Message(loglevel,"%s ",net);
786 free(net);
787 }
788 }
789
790 static void netlink_dump_routes(struct netlink *st, bool_t requested)
791 {
792 int i;
793 string_t net;
794 uint32_t c=M_INFO;
795
796 if (requested) c=M_WARNING;
797 if (st->ptp) {
798 net=ipaddr_to_string(st->secnet_address);
799 Message(c,"%s: point-to-point (remote end is %s); routes: ",
800 st->name, net);
801 free(net);
802 netlink_output_subnets(st,c,st->clients->subnets);
803 Message(c,"\n");
804 } else {
805 Message(c,"%s: routing table:\n",st->name);
806 for (i=0; i<st->n_clients; i++) {
807 netlink_output_subnets(st,c,st->routes[i]->subnets);
808 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
809 "quality %d,use %d,pri %lu)\n",
810 st->routes[i]->name,
811 st->routes[i]->up?"up":"down",
812 st->routes[i]->mtu,
813 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
814 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
815 st->routes[i]->link_quality,
816 st->routes[i]->outcount,
817 (unsigned long)st->routes[i]->priority);
818 }
819 net=ipaddr_to_string(st->secnet_address);
820 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
821 net,st->name,st->localcount);
822 free(net);
823 for (i=0; i<st->subnets->entries; i++) {
824 net=subnet_to_string(st->subnets->list[i]);
825 Message(c,"%s ",net);
826 free(net);
827 }
828 if (i>0)
829 Message(c,"-> host (use %d)\n",st->outcount);
830 }
831 }
832
833 /* ap is a pointer to a member of the routes array */
834 static int netlink_compare_client_priority(const void *ap, const void *bp)
835 {
836 const struct netlink_client *const*a=ap;
837 const struct netlink_client *const*b=bp;
838
839 if ((*a)->priority==(*b)->priority) return 0;
840 if ((*a)->priority<(*b)->priority) return 1;
841 return -1;
842 }
843
844 static void netlink_phase_hook(void *sst, uint32_t new_phase)
845 {
846 struct netlink *st=sst;
847 struct netlink_client *c;
848 int32_t i;
849
850 /* All the networks serviced by the various tunnels should now
851 * have been registered. We build a routing table by sorting the
852 * clients by priority. */
853 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
854 "netlink_phase_hook");
855 /* Fill the table */
856 i=0;
857 for (c=st->clients; c; c=c->next) {
858 assert(i<INT_MAX);
859 st->routes[i++]=c;
860 }
861 /* Sort the table in descending order of priority */
862 qsort(st->routes,st->n_clients,sizeof(*st->routes),
863 netlink_compare_client_priority);
864
865 netlink_dump_routes(st,False);
866 }
867
868 static void netlink_signal_handler(void *sst, int signum)
869 {
870 struct netlink *st=sst;
871 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
872 netlink_dump_routes(st,True);
873 }
874
875 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
876 {
877 struct netlink_client *c=sst;
878
879 c->mtu=new_mtu;
880 }
881
882 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
883 void *dst)
884 {
885 struct netlink_client *c=sst;
886
887 c->deliver=deliver;
888 c->dst=dst;
889 }
890
891 static struct flagstr netlink_option_table[]={
892 { "soft", OPT_SOFTROUTE },
893 { "allow-route", OPT_ALLOWROUTE },
894 { NULL, 0}
895 };
896 /* This is the routine that gets called when the closure that's
897 returned by an invocation of a netlink device closure (eg. tun,
898 userv-ipif) is invoked. It's used to create routes and pass in
899 information about them; the closure it returns is used by site
900 code. */
901 static closure_t *netlink_inst_create(struct netlink *st,
902 struct cloc loc, dict_t *dict)
903 {
904 struct netlink_client *c;
905 string_t name;
906 struct ipset *networks;
907 uint32_t options,priority;
908 int32_t mtu;
909 list_t *l;
910
911 name=dict_read_string(dict, "name", True, st->name, loc);
912
913 l=dict_lookup(dict,"routes");
914 if (!l)
915 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
916 networks=string_list_to_ipset(l,loc,st->name,"routes");
917 options=string_list_to_word(dict_lookup(dict,"options"),
918 netlink_option_table,st->name);
919
920 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
921 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
922
923 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
924 cfgfatal(loc,st->name,"this netlink device does not support "
925 "soft routes.\n");
926 return NULL;
927 }
928
929 if (options&OPT_SOFTROUTE) {
930 /* XXX for now we assume that soft routes require root privilege;
931 this may not always be true. The device driver can tell us. */
932 require_root_privileges=True;
933 require_root_privileges_explanation="netlink: soft routes";
934 if (st->ptp) {
935 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
936 "soft routes.\n");
937 return NULL;
938 }
939 }
940
941 /* Check that nets are a subset of st->remote_networks;
942 refuse to register if they are not. */
943 if (!ipset_is_subset(st->remote_networks,networks)) {
944 cfgfatal(loc,st->name,"routes are not allowed\n");
945 return NULL;
946 }
947
948 c=safe_malloc(sizeof(*c),"netlink_inst_create");
949 c->cl.description=name;
950 c->cl.type=CL_NETLINK;
951 c->cl.apply=NULL;
952 c->cl.interface=&c->ops;
953 c->ops.st=c;
954 c->ops.reg=netlink_inst_reg;
955 c->ops.deliver=netlink_inst_incoming;
956 c->ops.set_quality=netlink_set_quality;
957 c->ops.set_mtu=netlink_inst_set_mtu;
958 c->nst=st;
959
960 c->networks=networks;
961 c->subnets=ipset_to_subnet_list(networks);
962 c->priority=priority;
963 c->deliver=NULL;
964 c->dst=NULL;
965 c->name=name;
966 c->link_quality=LINK_QUALITY_UNUSED;
967 c->mtu=mtu?mtu:st->mtu;
968 c->options=options;
969 c->outcount=0;
970 c->up=False;
971 c->kup=False;
972 c->next=st->clients;
973 st->clients=c;
974 assert(st->n_clients < INT_MAX);
975 st->n_clients++;
976
977 return &c->cl;
978 }
979
980 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
981 dict_t *context, list_t *args)
982 {
983 struct netlink *st=self->interface;
984
985 dict_t *dict;
986 item_t *item;
987 closure_t *cl;
988
989 item=list_elem(args,0);
990 if (!item || item->type!=t_dict) {
991 cfgfatal(loc,st->name,"must have a dictionary argument\n");
992 }
993 dict=item->data.dict;
994
995 cl=netlink_inst_create(st,loc,dict);
996
997 return new_closure(cl);
998 }
999
1000 netlink_deliver_fn *netlink_init(struct netlink *st,
1001 void *dst, struct cloc loc,
1002 dict_t *dict, cstring_t description,
1003 netlink_route_fn *set_routes,
1004 netlink_deliver_fn *to_host)
1005 {
1006 item_t *sa, *ptpa;
1007 list_t *l;
1008
1009 st->dst=dst;
1010 st->cl.description=description;
1011 st->cl.type=CL_PURE;
1012 st->cl.apply=netlink_inst_apply;
1013 st->cl.interface=st;
1014 st->clients=NULL;
1015 st->routes=NULL;
1016 st->n_clients=0;
1017 st->set_routes=set_routes;
1018 st->deliver_to_host=to_host;
1019
1020 st->name=dict_read_string(dict,"name",False,description,loc);
1021 if (!st->name) st->name=description;
1022 l=dict_lookup(dict,"networks");
1023 if (l)
1024 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1025 else {
1026 struct ipset *empty;
1027 empty=ipset_new();
1028 st->networks=ipset_complement(empty);
1029 ipset_free(empty);
1030 }
1031 l=dict_lookup(dict,"remote-networks");
1032 if (l) {
1033 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1034 "remote-networks");
1035 } else {
1036 struct ipset *empty;
1037 empty=ipset_new();
1038 st->remote_networks=ipset_complement(empty);
1039 ipset_free(empty);
1040 }
1041
1042 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1043 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1044 if (sa && ptpa) {
1045 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1046 "ptp-address in the same netlink device\n");
1047 }
1048 if (!(sa || ptpa)) {
1049 cfgfatal(loc,st->name,"you must specify secnet-address or "
1050 "ptp-address for this netlink device\n");
1051 }
1052 if (sa) {
1053 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1054 st->ptp=False;
1055 } else {
1056 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1057 st->ptp=True;
1058 }
1059 /* To be strictly correct we could subtract secnet_address from
1060 networks here. It shouldn't make any practical difference,
1061 though, and will make the route dump look complicated... */
1062 st->subnets=ipset_to_subnet_list(st->networks);
1063 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1064 buffer_new(&st->icmp,ICMP_BUFSIZE);
1065 st->outcount=0;
1066 st->localcount=0;
1067
1068 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1069 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1070
1071 /* If we're point-to-point then we return a CL_NETLINK directly,
1072 rather than a CL_NETLINK_OLD or pure closure (depending on
1073 compatibility). This CL_NETLINK is for our one and only
1074 client. Our cl.apply function is NULL. */
1075 if (st->ptp) {
1076 closure_t *cl;
1077 cl=netlink_inst_create(st,loc,dict);
1078 st->cl=*cl;
1079 }
1080 return netlink_dev_incoming;
1081 }
1082
1083 /* No connection to the kernel at all... */
1084
1085 struct null {
1086 struct netlink nl;
1087 };
1088
1089 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1090 {
1091 struct null *st=sst;
1092
1093 if (routes->up!=routes->kup) {
1094 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1095 st->nl.name,routes->name,
1096 routes->up?"up":"down");
1097 routes->kup=routes->up;
1098 return True;
1099 }
1100 return False;
1101 }
1102
1103 static void null_deliver(void *sst, struct buffer_if *buf)
1104 {
1105 return;
1106 }
1107
1108 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1109 list_t *args)
1110 {
1111 struct null *st;
1112 item_t *item;
1113 dict_t *dict;
1114
1115 st=safe_malloc(sizeof(*st),"null_apply");
1116
1117 item=list_elem(args,0);
1118 if (!item || item->type!=t_dict)
1119 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1120
1121 dict=item->data.dict;
1122
1123 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1124 null_deliver);
1125
1126 return new_closure(&st->nl.cl);
1127 }
1128
1129 void netlink_module(dict_t *dict)
1130 {
1131 add_closure(dict,"null-netlink",null_apply);
1132 }