netlink: Generate ICMP correctly if point-to-point
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
ff05a229 3/* See RFCs 791, 792, 1123 and 1812 */
2fe58dfd 4
ff05a229
SE
5/* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9/* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14/* Points to note from RFC1812 (which may require changes in this
15 file):
16
173.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
294.2.1 A router SHOULD count datagrams discarded.
30
314.2.2.1 Source route options - we probably should implement processing
32of source routes, even though mostly the security policy will prevent
33their use.
34
355.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
425.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
545.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
674.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
724.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
774.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
918.1 The Simple Network Management Protocol - SNMP
928.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98*/
2fe58dfd 99
3b83c932 100#include <string.h>
59230b9b
IJ
101#include <assert.h>
102#include <limits.h>
8689b3a9 103#include "secnet.h"
2fe58dfd 104#include "util.h"
7138d0c5 105#include "ipaddr.h"
9d3a4132 106#include "netlink.h"
042a8da9 107#include "process.h"
2fe58dfd 108
a0b107b8
IJ
109#ifdef NETLINK_DEBUG
110#define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111#else /* !NETLINK_DEBUG */
112#define MDEBUG(...) ((void)0)
113#endif /* !NETLINK_DEBUG */
114
ff05a229
SE
115#define ICMP_TYPE_ECHO_REPLY 0
116
117#define ICMP_TYPE_UNREACHABLE 3
118#define ICMP_CODE_NET_UNREACHABLE 0
119#define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121#define ICMP_CODE_NET_PROHIBITED 13
122
123#define ICMP_TYPE_ECHO_REQUEST 8
124
125#define ICMP_TYPE_TIME_EXCEEDED 11
126#define ICMP_CODE_TTL_EXCEEDED 0
127
4efd681a 128/* Generic IP checksum routine */
211cd627 129static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
2fe58dfd 130{
4efd681a
SE
131 register uint32_t sum=0;
132
133 while (count>1) {
134 sum+=ntohs(*(uint16_t *)iph);
135 iph+=2;
136 count-=2;
137 }
138 if(count>0)
139 sum+=*(uint8_t *)iph;
140 while (sum>>16)
141 sum=(sum&0xffff)+(sum>>16);
142 return htons(~sum);
2fe58dfd
SE
143}
144
4efd681a
SE
145#ifdef i386
146/*
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
149 *
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
151 * Arnt Gulbrandsen.
152 */
211cd627 153static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
4efd681a
SE
154 uint32_t sum;
155
20d324b6
SE
156 __asm__ __volatile__(
157 "movl (%1), %0 ;\n"
158 "subl $4, %2 ;\n"
159 "jbe 2f ;\n"
160 "addl 4(%1), %0 ;\n"
161 "adcl 8(%1), %0 ;\n"
162 "adcl 12(%1), %0 ;\n"
163"1: adcl 16(%1), %0 ;\n"
164 "lea 4(%1), %1 ;\n"
165 "decl %2 ;\n"
166 "jne 1b ;\n"
167 "adcl $0, %0 ;\n"
168 "movl %0, %2 ;\n"
169 "shrl $16, %0 ;\n"
170 "addw %w2, %w0 ;\n"
171 "adcl $0, %0 ;\n"
172 "notl %0 ;\n"
173"2: ;\n"
4efd681a
SE
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
20d324b6
SE
178 : "1" (iph), "2" (ihl)
179 : "memory");
4efd681a
SE
180 return sum;
181}
182#else
1caa23ff 183static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
2fe58dfd 184{
1caa23ff 185 assert(ihl < INT_MAX/4);
4efd681a
SE
186 return ip_csum(iph,ihl*4);
187}
188#endif
189
190struct iphdr {
191#if defined (WORDS_BIGENDIAN)
192 uint8_t version:4,
193 ihl:4;
194#else
195 uint8_t ihl:4,
196 version:4;
197#endif
198 uint8_t tos;
199 uint16_t tot_len;
200 uint16_t id;
a6768d7c 201 uint16_t frag;
eff13010
IJ
202#define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203#define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204#define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205/* reserved 0x8000 */
4efd681a
SE
206 uint8_t ttl;
207 uint8_t protocol;
208 uint16_t check;
209 uint32_t saddr;
210 uint32_t daddr;
211 /* The options start here. */
212};
213
214struct icmphdr {
215 struct iphdr iph;
216 uint8_t type;
217 uint8_t code;
218 uint16_t check;
cfd79482 219 union icmpinfofield {
4efd681a
SE
220 uint32_t unused;
221 struct {
222 uint8_t pointer;
223 uint8_t unused1;
224 uint16_t unused2;
225 } pprob;
226 uint32_t gwaddr;
227 struct {
228 uint16_t id;
229 uint16_t seq;
230 } echo;
f3d69e41
IJ
231 struct {
232 uint16_t unused;
233 uint16_t mtu;
234 } fragneeded;
4efd681a
SE
235 } d;
236};
cfd79482
IJ
237
238static const union icmpinfofield icmp_noinfo;
4efd681a 239
826b47e9
IJ
240static void netlink_client_deliver(struct netlink *st,
241 struct netlink_client *client,
242 uint32_t source, uint32_t dest,
243 struct buffer_if *buf);
244static void netlink_host_deliver(struct netlink *st,
245 struct netlink_client *sender,
246 uint32_t source, uint32_t dest,
247 struct buffer_if *buf);
248
dbe11c20
IJ
249static const char *sender_name(struct netlink_client *sender /* or NULL */)
250{
251 return sender?sender->name:"(local)";
252}
253
70dc107b
SE
254static void netlink_packet_deliver(struct netlink *st,
255 struct netlink_client *client,
256 struct buffer_if *buf);
4efd681a 257
ff05a229
SE
258/* XXX RFC1812 4.3.2.5:
259 All other ICMP error messages (Destination Unreachable,
260 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
261 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
262 CONTROL). The IP Precedence value for these error messages MAY be
263 settable.
264 */
4efd681a 265static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
826b47e9
IJ
266 uint32_t source, uint32_t dest,
267 uint16_t len)
4efd681a
SE
268{
269 struct icmphdr *h;
270
271 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
3abd18e8 272 buffer_init(&st->icmp,calculate_max_start_pad());
4efd681a
SE
273 h=buf_append(&st->icmp,sizeof(*h));
274
275 h->iph.version=4;
276 h->iph.ihl=5;
277 h->iph.tos=0;
278 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
279 h->iph.id=0;
a6768d7c 280 h->iph.frag=0;
ff05a229 281 h->iph.ttl=255; /* XXX should be configurable */
4efd681a 282 h->iph.protocol=1;
826b47e9 283 h->iph.saddr=htonl(source);
4efd681a
SE
284 h->iph.daddr=htonl(dest);
285 h->iph.check=0;
286 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
287 h->check=0;
288 h->d.unused=0;
289
290 return h;
291}
292
293/* Fill in the ICMP checksum field correctly */
294static void netlink_icmp_csum(struct icmphdr *h)
295{
1caa23ff 296 int32_t len;
4efd681a
SE
297
298 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
299 h->check=0;
300 h->check=ip_csum(&h->type,len);
301}
302
303/* RFC1122:
304 * An ICMP error message MUST NOT be sent as the result of
305 * receiving:
306 *
307 * * an ICMP error message, or
308 *
309 * * a datagram destined to an IP broadcast or IP multicast
310 * address, or
311 *
312 * * a datagram sent as a link-layer broadcast, or
313 *
314 * * a non-initial fragment, or
315 *
316 * * a datagram whose source address does not define a single
317 * host -- e.g., a zero address, a loopback address, a
318 * broadcast address, a multicast address, or a Class E
319 * address.
320 */
321static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
322{
323 struct iphdr *iph;
8dea8d37 324 struct icmphdr *icmph;
4efd681a
SE
325 uint32_t source;
326
975820aa 327 if (buf->size < (int)sizeof(struct icmphdr)) return False;
4efd681a 328 iph=(struct iphdr *)buf->start;
8dea8d37
SE
329 icmph=(struct icmphdr *)buf->start;
330 if (iph->protocol==1) {
331 switch(icmph->type) {
686b7f1d
IJ
332 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
333 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
334 * Deprecated, reserved, unassigned and experimental
335 * options are treated as not safe to reply to.
336 */
337 case 0: /* Echo Reply */
338 case 8: /* Echo */
339 case 13: /* Timestamp */
340 case 14: /* Timestamp Reply */
341 return True;
342 default:
8dea8d37
SE
343 return False;
344 }
345 }
4efd681a 346 /* How do we spot broadcast destination addresses? */
a6768d7c 347 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
4efd681a
SE
348 source=ntohl(iph->saddr);
349 if (source==0) return False;
350 if ((source&0xff000000)==0x7f000000) return False;
351 /* How do we spot broadcast source addresses? */
352 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
353 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
354 return True;
355}
356
357/* How much of the original IP packet do we include in its ICMP
358 response? The header plus up to 64 bits. */
ff05a229
SE
359
360/* XXX TODO RFC1812:
3614.3.2.3 Original Message Header
362
363 Historically, every ICMP error message has included the Internet
364 header and at least the first 8 data bytes of the datagram that
365 triggered the error. This is no longer adequate, due to the use of
366 IP-in-IP tunneling and other technologies. Therefore, the ICMP
367 datagram SHOULD contain as much of the original datagram as possible
368 without the length of the ICMP datagram exceeding 576 bytes. The
369 returned IP header (and user data) MUST be identical to that which
370 was received, except that the router is not required to undo any
371 modifications to the IP header that are normally performed in
372 forwarding that were performed before the error was detected (e.g.,
373 decrementing the TTL, or updating options). Note that the
374 requirements of Section [4.3.3.5] supersede this requirement in some
375 cases (i.e., for a Parameter Problem message, if the problem is in a
376 modified field, the router must undo the modification). See Section
377 [4.3.3.5]).
378 */
4efd681a
SE
379static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
380{
975820aa 381 if (buf->size < (int)sizeof(struct iphdr)) return 0;
4efd681a
SE
382 struct iphdr *iph=(struct iphdr *)buf->start;
383 uint16_t hlen,plen;
384
385 hlen=iph->ihl*4;
386 /* We include the first 8 bytes of the packet data, provided they exist */
387 hlen+=8;
388 plen=ntohs(iph->tot_len);
389 return (hlen>plen?plen:hlen);
390}
391
70dc107b
SE
392/* client indicates where the packet we're constructing a response to
393 comes from. NULL indicates the host. */
ab62c3ed
IJ
394static void netlink_icmp_simple(struct netlink *st,
395 struct netlink_client *origsender,
396 struct buffer_if *buf,
cfd79482
IJ
397 uint8_t type, uint8_t code,
398 union icmpinfofield info)
4efd681a 399{
4efd681a
SE
400 struct icmphdr *h;
401 uint16_t len;
402
403 if (netlink_icmp_may_reply(buf)) {
975820aa 404 struct iphdr *iph=(struct iphdr *)buf->start;
826b47e9
IJ
405
406 uint32_t icmpdest = ntohl(iph->saddr);
407 uint32_t icmpsource;
408 const char *icmpsourcedebugprefix;
409 if (!st->ptp) {
410 icmpsource=st->secnet_address;
411 icmpsourcedebugprefix="";
412 } else if (origsender) {
413 /* was from peer, send reply as if from host */
414 icmpsource=st->local_address;
415 icmpsourcedebugprefix="L!";
416 } else {
417 /* was from host, send reply as if from peer */
418 icmpsource=st->secnet_address; /* actually, peer address */
419 icmpsourcedebugprefix="P!";
420 }
421 MDEBUG("%s: generating ICMP re %s[%s]->[%s]:"
422 " from %s%s type=%u code=%u\n",
423 st->name, sender_name(origsender),
424 ipaddr_to_string(ntohl(iph->saddr)),
425 ipaddr_to_string(ntohl(iph->daddr)),
426 icmpsourcedebugprefix,
427 ipaddr_to_string(icmpsource),
428 type, code);
429
4efd681a 430 len=netlink_icmp_reply_len(buf);
826b47e9 431 h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len);
cfd79482 432 h->type=type; h->code=code; h->d=info;
4efd681a
SE
433 memcpy(buf_append(&st->icmp,len),buf->start,len);
434 netlink_icmp_csum(h);
826b47e9
IJ
435
436 if (!st->ptp) {
437 netlink_packet_deliver(st,NULL,&st->icmp);
438 } else if (origsender) {
439 netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp);
440 } else {
441 netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp);
442 }
4efd681a
SE
443 BUF_ASSERT_FREE(&st->icmp);
444 }
445}
446
447/*
448 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
449 * checksum.
ff05a229 450 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
4efd681a
SE
451 *
452 * Is the datagram acceptable?
453 *
454 * 1. Length at least the size of an ip header
455 * 2. Version of 4
456 * 3. Checksums correctly.
457 * 4. Doesn't have a bogus length
458 */
d714da29
IJ
459static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
460 char *errmsgbuf, int errmsgbuflen)
4efd681a 461{
d714da29
IJ
462#define BAD(...) do{ \
463 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
464 return False; \
465 }while(0)
466
975820aa 467 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
4efd681a 468 struct iphdr *iph=(struct iphdr *)buf->start;
1caa23ff 469 int32_t len;
4efd681a 470
d714da29
IJ
471 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
472 if (iph->version != 4) BAD("version %u",iph->version);
473 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
474 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
4efd681a
SE
475 len=ntohs(iph->tot_len);
476 /* There should be no padding */
d714da29
IJ
477 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
478 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
4efd681a
SE
479 /* XXX check that there's no source route specified */
480 return True;
d714da29
IJ
481
482#undef BAD
4efd681a
SE
483}
484
f3d69e41
IJ
485static const char *fragment_filter_header(uint8_t *base, long *hlp)
486{
487 const int fixedhl = sizeof(struct iphdr);
488 long hl = *hlp;
489 const uint8_t *ipend = base + hl;
490 uint8_t *op = base + fixedhl;
491 const uint8_t *ip = op;
492
493 while (ip < ipend) {
494 uint8_t opt = ip[0];
495 int remain = ipend - ip;
496 if (opt == 0x00) /* End of Options List */ break;
497 if (opt == 0x01) /* No Operation */ continue;
498 if (remain < 2) return "IPv4 options truncated at length";
499 int optlen = ip[1];
500 if (remain < optlen) return "IPv4 options truncated in option";
501 if (opt & 0x80) /* copy */ {
502 memmove(op, ip, optlen);
503 op += optlen;
504 }
505 ip += optlen;
506 }
507 while ((hl = (op - base)) & 0x3)
508 *op++ = 0x00 /* End of Option List */;
509 ((struct iphdr*)base)->ihl = hl >> 2;
510 *hlp = hl;
511
512 return 0;
513}
514
515/* Fragment or send ICMP Fragmentation Needed */
516static void netlink_maybe_fragment(struct netlink *st,
ab62c3ed 517 struct netlink_client *sender,
f3d69e41
IJ
518 netlink_deliver_fn *deliver,
519 void *deliver_dst,
520 const char *delivery_name,
521 int32_t mtu,
522 uint32_t source, uint32_t dest,
523 struct buffer_if *buf)
524{
525 struct iphdr *iph=(struct iphdr*)buf->start;
526 long hl = iph->ihl*4;
527 const char *ssource = ipaddr_to_string(source);
528
529 if (buf->size <= mtu) {
530 deliver(deliver_dst, buf);
531 return;
532 }
533
534 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
535 st->name, ssource, delivery_name, buf->size);
536
537#define BADFRAG(m, ...) \
538 Message(M_WARNING, \
539 "%s: fragmenting packet from source %s" \
540 " for transmission via %s: " m "\n", \
541 st->name, ssource, delivery_name, \
542 ## __VA_ARGS__);
543
544 unsigned orig_frag = ntohs(iph->frag);
545
546 if (orig_frag&IPHDR_FRAG_DONT) {
547 union icmpinfofield info =
548 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
ab62c3ed 549 netlink_icmp_simple(st,sender,buf,
f3d69e41
IJ
550 ICMP_TYPE_UNREACHABLE,
551 ICMP_CODE_FRAGMENTATION_REQUIRED,
552 info);
553 BUF_FREE(buf);
554 return;
555 }
556 if (mtu < hl + 8) {
557 BADFRAG("mtu %"PRId32" too small", mtu);
558 BUF_FREE(buf);
559 return;
560 }
561
562 /* we (ab)use the icmp buffer to stash the original packet */
563 struct buffer_if *orig = &st->icmp;
564 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
565 buffer_copy(orig,buf);
566 BUF_FREE(buf);
567
568 const uint8_t *startindata = orig->start + hl;
569 const uint8_t *indata = startindata;
570 const uint8_t *endindata = orig->start + orig->size;
571 _Bool filtered = 0;
572
573 for (;;) {
574 /* compute our fragment offset */
575 long dataoffset = indata - startindata
576 + (orig_frag & IPHDR_FRAG_OFF)*8;
577 assert(!(dataoffset & 7));
578 if (dataoffset > IPHDR_FRAG_OFF*8) {
579 BADFRAG("ultimate fragment offset out of range");
580 break;
581 }
582
583 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
584 buffer_init(buf,calculate_max_start_pad());
585
586 /* copy header (possibly filtered); will adjust in a bit */
587 struct iphdr *fragh = buf_append(buf, hl);
588 memcpy(fragh, orig->start, hl);
589
590 /* decide how much payload to copy and copy it */
591 long avail = mtu - hl;
592 long remain = endindata - indata;
593 long use = avail < remain ? (avail & ~(long)7) : remain;
594 memcpy(buf_append(buf, use), indata, use);
595 indata += use;
596
597 _Bool last_frag = indata >= endindata;
598
599 /* adjust the header */
600 fragh->tot_len = htons(buf->size);
601 fragh->frag =
602 htons((orig_frag & ~IPHDR_FRAG_OFF) |
603 (last_frag ? 0 : IPHDR_FRAG_MORE) |
604 (dataoffset >> 3));
605 fragh->check = 0;
606 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
607
608 /* actually send it */
609 deliver(deliver_dst, buf);
610 if (last_frag)
611 break;
612
613 /* after copying the header for the first frag,
614 * we filter the header for the remaining frags */
615 if (!filtered++) {
616 const char *bad = fragment_filter_header(orig->start, &hl);
617 if (bad) { BADFRAG("%s", bad); break; }
618 }
619 }
620
621 BUF_FREE(orig);
622
623#undef BADFRAG
624}
625
7b6abafa 626/* Deliver a packet _to_ client; used after we have decided
55bc97e6
IJ
627 * what to do with it (and just to check that the client has
628 * actually registered a delivery function with us). */
7b6abafa
IJ
629static void netlink_client_deliver(struct netlink *st,
630 struct netlink_client *client,
631 uint32_t source, uint32_t dest,
632 struct buffer_if *buf)
633{
55bc97e6
IJ
634 if (!client->deliver) {
635 string_t s,d;
636 s=ipaddr_to_string(source);
637 d=ipaddr_to_string(dest);
638 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
639 st->name,s,d);
640 free(s); free(d);
641 BUF_FREE(buf);
642 return;
643 }
ab62c3ed 644 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
f3d69e41 645 client->mtu, source,dest,buf);
7b6abafa
IJ
646 client->outcount++;
647}
648
f928f069
IJ
649/* Deliver a packet to the host; used after we have decided that that
650 * is what to do with it. */
651static void netlink_host_deliver(struct netlink *st,
ab62c3ed 652 struct netlink_client *sender,
f928f069
IJ
653 uint32_t source, uint32_t dest,
654 struct buffer_if *buf)
655{
ab62c3ed 656 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
f3d69e41 657 st->mtu, source,dest,buf);
f928f069
IJ
658 st->outcount++;
659}
660
f2b711bd 661/* Deliver a packet. "sender"==NULL for packets from the host and packets
d3fe100d 662 generated internally in secnet. */
70dc107b 663static void netlink_packet_deliver(struct netlink *st,
f2b711bd 664 struct netlink_client *sender,
70dc107b 665 struct buffer_if *buf)
4efd681a 666{
975820aa
IJ
667 if (buf->size < (int)sizeof(struct iphdr)) {
668 Message(M_ERR,"%s: trying to deliver a too-short packet"
dbe11c20 669 " from %s!\n",st->name, sender_name(sender));
975820aa
IJ
670 BUF_FREE(buf);
671 return;
672 }
673
4efd681a
SE
674 struct iphdr *iph=(struct iphdr *)buf->start;
675 uint32_t dest=ntohl(iph->daddr);
70dc107b
SE
676 uint32_t source=ntohl(iph->saddr);
677 uint32_t best_quality;
469fd1d9
SE
678 bool_t allow_route=False;
679 bool_t found_allowed=False;
70dc107b
SE
680 int best_match;
681 int i;
2fe58dfd 682
4efd681a 683 BUF_ASSERT_USED(buf);
2fe58dfd 684
4efd681a 685 if (dest==st->secnet_address) {
4f5e39ec 686 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
4efd681a 687 BUF_FREE(buf);
2fe58dfd
SE
688 return;
689 }
4efd681a 690
f2b711bd 691 /* Packets from the host (sender==NULL) may always be routed. Packets
469fd1d9 692 from clients with the allow_route option will also be routed. */
f2b711bd 693 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
469fd1d9
SE
694 allow_route=True;
695
696 /* If !allow_route, we check the routing table anyway, and if
697 there's a suitable route with OPT_ALLOWROUTE set we use it. If
698 there's a suitable route, but none with OPT_ALLOWROUTE set then
699 we generate ICMP 'communication with destination network
700 administratively prohibited'. */
701
702 best_quality=0;
703 best_match=-1;
d3fe100d
SE
704 for (i=0; i<st->n_clients; i++) {
705 if (st->routes[i]->up &&
706 ipset_contains_addr(st->routes[i]->networks,dest)) {
469fd1d9
SE
707 /* It's an available route to the correct destination. But is
708 it better than the one we already have? */
709
710 /* If we have already found an allowed route then we don't
711 bother looking at routes we're not allowed to use. If
712 we don't yet have an allowed route we'll consider any. */
713 if (!allow_route && found_allowed) {
d3fe100d 714 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
70dc107b 715 }
469fd1d9 716
d3fe100d 717 if (st->routes[i]->link_quality>best_quality
469fd1d9 718 || best_quality==0) {
d3fe100d 719 best_quality=st->routes[i]->link_quality;
469fd1d9 720 best_match=i;
d3fe100d 721 if (st->routes[i]->options&OPT_ALLOWROUTE)
469fd1d9
SE
722 found_allowed=True;
723 /* If quality isn't perfect we may wish to
724 consider kicking the tunnel with a 0-length
725 packet to prompt it to perform a key setup.
726 Then it'll eventually decide it's up or
727 down. */
728 /* If quality is perfect and we're allowed to use the
729 route we don't need to search any more. */
730 if (best_quality>=MAXIMUM_LINK_QUALITY &&
731 (allow_route || found_allowed)) break;
4efd681a 732 }
70dc107b 733 }
469fd1d9
SE
734 }
735 if (best_match==-1) {
736 /* The packet's not going down a tunnel. It might (ought to)
737 be for the host. */
794f2398 738 if (ipset_contains_addr(st->networks,dest)) {
ab62c3ed 739 netlink_host_deliver(st,sender,source,dest,buf);
70dc107b
SE
740 BUF_ASSERT_FREE(buf);
741 } else {
469fd1d9
SE
742 string_t s,d;
743 s=ipaddr_to_string(source);
744 d=ipaddr_to_string(dest);
ff05a229 745 Message(M_DEBUG,"%s: don't know where to deliver packet "
469fd1d9
SE
746 "(s=%s, d=%s)\n", st->name, s, d);
747 free(s); free(d);
ab62c3ed 748 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 749 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
70dc107b 750 BUF_FREE(buf);
2fe58dfd 751 }
469fd1d9
SE
752 } else {
753 if (!allow_route &&
d3fe100d 754 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
469fd1d9
SE
755 string_t s,d;
756 s=ipaddr_to_string(source);
757 d=ipaddr_to_string(dest);
758 /* We have a usable route but aren't allowed to use it.
759 Generate ICMP destination unreachable: communication
760 with destination network administratively prohibited */
761 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
762 st->name,s,d);
763 free(s); free(d);
764
ab62c3ed 765 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 766 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
469fd1d9 767 BUF_FREE(buf);
469fd1d9 768 } else {
ea7ec970 769 if (best_quality>0) {
7b6abafa
IJ
770 netlink_client_deliver(st,st->routes[best_match],
771 source,dest,buf);
ea7ec970
SE
772 BUF_ASSERT_FREE(buf);
773 } else {
774 /* Generate ICMP destination unreachable */
ab62c3ed 775 netlink_icmp_simple(st,sender,buf,
cfd79482
IJ
776 ICMP_TYPE_UNREACHABLE,
777 ICMP_CODE_NET_UNREACHABLE,
778 icmp_noinfo);
ea7ec970
SE
779 BUF_FREE(buf);
780 }
469fd1d9 781 }
2fe58dfd 782 }
70dc107b 783 BUF_ASSERT_FREE(buf);
4efd681a
SE
784}
785
70dc107b 786static void netlink_packet_forward(struct netlink *st,
f2b711bd 787 struct netlink_client *sender,
70dc107b 788 struct buffer_if *buf)
4efd681a 789{
975820aa 790 if (buf->size < (int)sizeof(struct iphdr)) return;
4efd681a
SE
791 struct iphdr *iph=(struct iphdr *)buf->start;
792
793 BUF_ASSERT_USED(buf);
794
795 /* Packet has already been checked */
796 if (iph->ttl<=1) {
797 /* Generate ICMP time exceeded */
ab62c3ed 798 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
cfd79482 799 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
4efd681a
SE
800 BUF_FREE(buf);
801 return;
802 }
803 iph->ttl--;
804 iph->check=0;
805 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
806
f2b711bd 807 netlink_packet_deliver(st,sender,buf);
4efd681a
SE
808 BUF_ASSERT_FREE(buf);
809}
810
9d3a4132 811/* Deal with packets addressed explicitly to us */
70dc107b 812static void netlink_packet_local(struct netlink *st,
f2b711bd 813 struct netlink_client *sender,
70dc107b 814 struct buffer_if *buf)
4efd681a
SE
815{
816 struct icmphdr *h;
817
469fd1d9
SE
818 st->localcount++;
819
975820aa
IJ
820 if (buf->size < (int)sizeof(struct icmphdr)) {
821 Message(M_WARNING,"%s: short packet addressed to secnet; "
822 "ignoring it\n",st->name);
823 BUF_FREE(buf);
824 return;
825 }
4efd681a
SE
826 h=(struct icmphdr *)buf->start;
827
6e3fd952
IJ
828 unsigned fraginfo = ntohs(h->iph.frag);
829 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
830 if (!(fraginfo & IPHDR_FRAG_OFF))
831 /* report only for first fragment */
832 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
833 "ignoring it\n",st->name);
4efd681a
SE
834 BUF_FREE(buf);
835 return;
836 }
837
838 if (h->iph.protocol==1) {
839 /* It's ICMP */
ff05a229 840 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
4efd681a
SE
841 /* ICMP echo-request. Special case: we re-use the buffer
842 to construct the reply. */
ff05a229 843 h->type=ICMP_TYPE_ECHO_REPLY;
4efd681a
SE
844 h->iph.daddr=h->iph.saddr;
845 h->iph.saddr=htonl(st->secnet_address);
ff05a229 846 h->iph.ttl=255;
4efd681a
SE
847 h->iph.check=0;
848 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
849 netlink_icmp_csum(h);
70dc107b 850 netlink_packet_deliver(st,NULL,buf);
4efd681a
SE
851 return;
852 }
853 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
854 } else {
855 /* Send ICMP protocol unreachable */
ab62c3ed 856 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 857 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
4efd681a
SE
858 BUF_FREE(buf);
859 return;
860 }
861
862 BUF_FREE(buf);
863}
864
9d3a4132
SE
865/* If cid==NULL packet is from host, otherwise cid specifies which tunnel
866 it came from. */
f2b711bd 867static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
469fd1d9 868 struct buffer_if *buf)
4efd681a 869{
4efd681a
SE
870 uint32_t source,dest;
871 struct iphdr *iph;
d714da29 872 char errmsgbuf[50];
f2b711bd 873 const char *sourcedesc=sender?sender->name:"host";
4efd681a
SE
874
875 BUF_ASSERT_USED(buf);
a28d65a5 876
d714da29
IJ
877 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
878 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
a28d65a5 879 st->name,sourcedesc,
d714da29 880 errmsgbuf);
4efd681a
SE
881 BUF_FREE(buf);
882 return;
883 }
e8b1adac 884 assert(buf->size >= (int)sizeof(struct iphdr));
4efd681a
SE
885 iph=(struct iphdr *)buf->start;
886
887 source=ntohl(iph->saddr);
888 dest=ntohl(iph->daddr);
889
d3fe100d
SE
890 /* Check source. If we don't like the source, there's no point
891 generating ICMP because we won't know how to get it to the
892 source of the packet. */
f2b711bd 893 if (sender) {
c6f79b17
SE
894 /* Check that the packet source is appropriate for the tunnel
895 it came down */
f2b711bd 896 if (!ipset_contains_addr(sender->networks,source)) {
9d3a4132
SE
897 string_t s,d;
898 s=ipaddr_to_string(source);
899 d=ipaddr_to_string(dest);
900 Message(M_WARNING,"%s: packet from tunnel %s with bad "
f2b711bd 901 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
9d3a4132
SE
902 free(s); free(d);
903 BUF_FREE(buf);
904 return;
905 }
906 } else {
c6f79b17
SE
907 /* Check that the packet originates in our configured local
908 network, and hasn't been forwarded from elsewhere or
909 generated with the wrong source address */
794f2398 910 if (!ipset_contains_addr(st->networks,source)) {
9d3a4132
SE
911 string_t s,d;
912 s=ipaddr_to_string(source);
913 d=ipaddr_to_string(dest);
914 Message(M_WARNING,"%s: outgoing packet with bad source address "
915 "(s=%s,d=%s)\n",st->name,s,d);
916 free(s); free(d);
917 BUF_FREE(buf);
918 return;
919 }
4efd681a 920 }
c6f79b17 921
794f2398
SE
922 /* If this is a point-to-point device we don't examine the
923 destination address at all; we blindly send it down our
924 one-and-only registered tunnel, or to the host, depending on
d3fe100d
SE
925 where it came from. It's up to external software to check
926 address validity and generate ICMP, etc. */
c6f79b17 927 if (st->ptp) {
f2b711bd 928 if (sender) {
ab62c3ed 929 netlink_host_deliver(st,sender,source,dest,buf);
c6f79b17 930 } else {
7b6abafa 931 netlink_client_deliver(st,st->clients,source,dest,buf);
c6f79b17
SE
932 }
933 BUF_ASSERT_FREE(buf);
934 return;
935 }
936
d3fe100d
SE
937 /* st->secnet_address needs checking before matching destination
938 addresses */
2fe58dfd 939 if (dest==st->secnet_address) {
f2b711bd 940 netlink_packet_local(st,sender,buf);
4efd681a 941 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
942 return;
943 }
f2b711bd 944 netlink_packet_forward(st,sender,buf);
4efd681a
SE
945 BUF_ASSERT_FREE(buf);
946}
947
469fd1d9
SE
948static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
949{
950 struct netlink_client *c=sst;
951 struct netlink *st=c->nst;
952
953 netlink_incoming(st,c,buf);
954}
955
956static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
957{
958 struct netlink *st=sst;
959
960 netlink_incoming(st,NULL,buf);
961}
962
d3fe100d 963static void netlink_set_quality(void *sst, uint32_t quality)
4efd681a 964{
d3fe100d
SE
965 struct netlink_client *c=sst;
966 struct netlink *st=c->nst;
4efd681a 967
d3fe100d
SE
968 c->link_quality=quality;
969 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
970 if (c->options&OPT_SOFTROUTE) {
971 st->set_routes(st->dst,c);
4efd681a 972 }
4efd681a
SE
973}
974
d3fe100d
SE
975static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
976 struct subnet_list *snets)
4efd681a 977{
1caa23ff 978 int32_t i;
d3fe100d 979 string_t net;
4efd681a 980
d3fe100d
SE
981 for (i=0; i<snets->entries; i++) {
982 net=subnet_to_string(snets->list[i]);
983 Message(loglevel,"%s ",net);
984 free(net);
9d3a4132 985 }
4efd681a
SE
986}
987
042a8da9 988static void netlink_dump_routes(struct netlink *st, bool_t requested)
9d3a4132
SE
989{
990 int i;
991 string_t net;
042a8da9 992 uint32_t c=M_INFO;
9d3a4132 993
042a8da9 994 if (requested) c=M_WARNING;
469fd1d9
SE
995 if (st->ptp) {
996 net=ipaddr_to_string(st->secnet_address);
34d3bf4c 997 Message(c,"%s: point-to-point (remote end is %s); routes: ",
469fd1d9 998 st->name, net);
9d3a4132 999 free(net);
d3fe100d 1000 netlink_output_subnets(st,c,st->clients->subnets);
469fd1d9
SE
1001 Message(c,"\n");
1002 } else {
1003 Message(c,"%s: routing table:\n",st->name);
d3fe100d
SE
1004 for (i=0; i<st->n_clients; i++) {
1005 netlink_output_subnets(st,c,st->routes[i]->subnets);
ff05a229 1006 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
ea7ec970 1007 "quality %d,use %d,pri %lu)\n",
d3fe100d 1008 st->routes[i]->name,
ff05a229
SE
1009 st->routes[i]->up?"up":"down",
1010 st->routes[i]->mtu,
d3fe100d
SE
1011 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
1012 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
d3fe100d 1013 st->routes[i]->link_quality,
ea7ec970
SE
1014 st->routes[i]->outcount,
1015 (unsigned long)st->routes[i]->priority);
469fd1d9
SE
1016 }
1017 net=ipaddr_to_string(st->secnet_address);
1018 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
1019 net,st->name,st->localcount);
9d3a4132 1020 free(net);
794f2398
SE
1021 for (i=0; i<st->subnets->entries; i++) {
1022 net=subnet_to_string(st->subnets->list[i]);
1023 Message(c,"%s ",net);
469fd1d9
SE
1024 free(net);
1025 }
794f2398
SE
1026 if (i>0)
1027 Message(c,"-> host (use %d)\n",st->outcount);
9d3a4132
SE
1028 }
1029}
1030
d3fe100d
SE
1031/* ap is a pointer to a member of the routes array */
1032static int netlink_compare_client_priority(const void *ap, const void *bp)
70dc107b 1033{
d3fe100d
SE
1034 const struct netlink_client *const*a=ap;
1035 const struct netlink_client *const*b=bp;
70dc107b 1036
d3fe100d
SE
1037 if ((*a)->priority==(*b)->priority) return 0;
1038 if ((*a)->priority<(*b)->priority) return 1;
70dc107b
SE
1039 return -1;
1040}
1041
1042static void netlink_phase_hook(void *sst, uint32_t new_phase)
1043{
1044 struct netlink *st=sst;
1045 struct netlink_client *c;
1caa23ff 1046 int32_t i;
70dc107b
SE
1047
1048 /* All the networks serviced by the various tunnels should now
1049 * have been registered. We build a routing table by sorting the
d3fe100d 1050 * clients by priority. */
bb9d0561
IJ
1051 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1052 "netlink_phase_hook");
70dc107b
SE
1053 /* Fill the table */
1054 i=0;
59230b9b
IJ
1055 for (c=st->clients; c; c=c->next) {
1056 assert(i<INT_MAX);
d3fe100d 1057 st->routes[i++]=c;
59230b9b 1058 }
d3fe100d
SE
1059 /* Sort the table in descending order of priority */
1060 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1061 netlink_compare_client_priority);
9d3a4132 1062
042a8da9
SE
1063 netlink_dump_routes(st,False);
1064}
1065
1066static void netlink_signal_handler(void *sst, int signum)
1067{
1068 struct netlink *st=sst;
1069 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1070 netlink_dump_routes(st,True);
70dc107b
SE
1071}
1072
1caa23ff 1073static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
d3fe100d
SE
1074{
1075 struct netlink_client *c=sst;
1076
1077 c->mtu=new_mtu;
1078}
1079
469fd1d9 1080static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1c085348 1081 void *dst, uint32_t *localmtu_r)
469fd1d9
SE
1082{
1083 struct netlink_client *c=sst;
1c085348 1084 struct netlink *st=c->nst;
469fd1d9 1085
469fd1d9
SE
1086 c->deliver=deliver;
1087 c->dst=dst;
1c085348
IJ
1088
1089 if (localmtu_r)
1090 *localmtu_r=st->mtu;
469fd1d9
SE
1091}
1092
1093static struct flagstr netlink_option_table[]={
1094 { "soft", OPT_SOFTROUTE },
1095 { "allow-route", OPT_ALLOWROUTE },
1096 { NULL, 0}
1097};
1098/* This is the routine that gets called when the closure that's
1099 returned by an invocation of a netlink device closure (eg. tun,
1100 userv-ipif) is invoked. It's used to create routes and pass in
1101 information about them; the closure it returns is used by site
1102 code. */
1103static closure_t *netlink_inst_create(struct netlink *st,
1104 struct cloc loc, dict_t *dict)
1105{
1106 struct netlink_client *c;
1107 string_t name;
794f2398 1108 struct ipset *networks;
1caa23ff
IJ
1109 uint32_t options,priority;
1110 int32_t mtu;
794f2398 1111 list_t *l;
469fd1d9
SE
1112
1113 name=dict_read_string(dict, "name", True, st->name, loc);
1114
794f2398
SE
1115 l=dict_lookup(dict,"routes");
1116 if (!l)
1117 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1118 networks=string_list_to_ipset(l,loc,st->name,"routes");
469fd1d9
SE
1119 options=string_list_to_word(dict_lookup(dict,"options"),
1120 netlink_option_table,st->name);
1121
d3fe100d
SE
1122 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1123 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1124
1125 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
469fd1d9
SE
1126 cfgfatal(loc,st->name,"this netlink device does not support "
1127 "soft routes.\n");
1128 return NULL;
1129 }
1130
1131 if (options&OPT_SOFTROUTE) {
1132 /* XXX for now we assume that soft routes require root privilege;
1133 this may not always be true. The device driver can tell us. */
1134 require_root_privileges=True;
1135 require_root_privileges_explanation="netlink: soft routes";
1136 if (st->ptp) {
1137 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1138 "soft routes.\n");
1139 return NULL;
1140 }
1141 }
1142
794f2398
SE
1143 /* Check that nets are a subset of st->remote_networks;
1144 refuse to register if they are not. */
1145 if (!ipset_is_subset(st->remote_networks,networks)) {
1146 cfgfatal(loc,st->name,"routes are not allowed\n");
469fd1d9
SE
1147 return NULL;
1148 }
1149
1150 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1151 c->cl.description=name;
1152 c->cl.type=CL_NETLINK;
1153 c->cl.apply=NULL;
1154 c->cl.interface=&c->ops;
1155 c->ops.st=c;
1156 c->ops.reg=netlink_inst_reg;
1157 c->ops.deliver=netlink_inst_incoming;
1158 c->ops.set_quality=netlink_set_quality;
d3fe100d 1159 c->ops.set_mtu=netlink_inst_set_mtu;
469fd1d9
SE
1160 c->nst=st;
1161
1162 c->networks=networks;
794f2398 1163 c->subnets=ipset_to_subnet_list(networks);
d3fe100d 1164 c->priority=priority;
469fd1d9
SE
1165 c->deliver=NULL;
1166 c->dst=NULL;
1167 c->name=name;
f208b9a9 1168 c->link_quality=LINK_QUALITY_UNUSED;
d3fe100d
SE
1169 c->mtu=mtu?mtu:st->mtu;
1170 c->options=options;
1171 c->outcount=0;
1172 c->up=False;
1173 c->kup=False;
469fd1d9
SE
1174 c->next=st->clients;
1175 st->clients=c;
59230b9b 1176 assert(st->n_clients < INT_MAX);
d3fe100d 1177 st->n_clients++;
469fd1d9
SE
1178
1179 return &c->cl;
1180}
1181
1182static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1183 dict_t *context, list_t *args)
1184{
1185 struct netlink *st=self->interface;
1186
1187 dict_t *dict;
1188 item_t *item;
1189 closure_t *cl;
1190
469fd1d9
SE
1191 item=list_elem(args,0);
1192 if (!item || item->type!=t_dict) {
1193 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1194 }
1195 dict=item->data.dict;
1196
1197 cl=netlink_inst_create(st,loc,dict);
1198
1199 return new_closure(cl);
1200}
1201
9d3a4132
SE
1202netlink_deliver_fn *netlink_init(struct netlink *st,
1203 void *dst, struct cloc loc,
fe5e9cc4 1204 dict_t *dict, cstring_t description,
d3fe100d 1205 netlink_route_fn *set_routes,
9d3a4132 1206 netlink_deliver_fn *to_host)
4efd681a 1207{
c6f79b17 1208 item_t *sa, *ptpa;
794f2398 1209 list_t *l;
c6f79b17 1210
4efd681a
SE
1211 st->dst=dst;
1212 st->cl.description=description;
469fd1d9
SE
1213 st->cl.type=CL_PURE;
1214 st->cl.apply=netlink_inst_apply;
1215 st->cl.interface=st;
4efd681a 1216 st->clients=NULL;
d3fe100d
SE
1217 st->routes=NULL;
1218 st->n_clients=0;
1219 st->set_routes=set_routes;
4efd681a
SE
1220 st->deliver_to_host=to_host;
1221
794f2398 1222 st->name=dict_read_string(dict,"name",False,description,loc);
4efd681a 1223 if (!st->name) st->name=description;
794f2398
SE
1224 l=dict_lookup(dict,"networks");
1225 if (l)
1226 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1227 else {
4f5e39ec
SE
1228 struct ipset *empty;
1229 empty=ipset_new();
1230 st->networks=ipset_complement(empty);
1231 ipset_free(empty);
794f2398
SE
1232 }
1233 l=dict_lookup(dict,"remote-networks");
1234 if (l) {
1235 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1236 "remote-networks");
1237 } else {
1238 struct ipset *empty;
1239 empty=ipset_new();
1240 st->remote_networks=ipset_complement(empty);
1241 ipset_free(empty);
1242 }
091433c6
IJ
1243 st->local_address=string_item_to_ipaddr(
1244 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
794f2398 1245
c6f79b17 1246 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
469fd1d9 1247 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
c6f79b17
SE
1248 if (sa && ptpa) {
1249 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1250 "ptp-address in the same netlink device\n");
1251 }
1252 if (!(sa || ptpa)) {
1253 cfgfatal(loc,st->name,"you must specify secnet-address or "
1254 "ptp-address for this netlink device\n");
1255 }
1256 if (sa) {
794f2398 1257 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
c6f79b17
SE
1258 st->ptp=False;
1259 } else {
794f2398 1260 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
c6f79b17
SE
1261 st->ptp=True;
1262 }
d3fe100d
SE
1263 /* To be strictly correct we could subtract secnet_address from
1264 networks here. It shouldn't make any practical difference,
794f2398
SE
1265 though, and will make the route dump look complicated... */
1266 st->subnets=ipset_to_subnet_list(st->networks);
4efd681a 1267 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
f3d69e41 1268 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
469fd1d9
SE
1269 st->outcount=0;
1270 st->localcount=0;
70dc107b
SE
1271
1272 add_hook(PHASE_SETUP,netlink_phase_hook,st);
042a8da9 1273 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
4efd681a 1274
469fd1d9
SE
1275 /* If we're point-to-point then we return a CL_NETLINK directly,
1276 rather than a CL_NETLINK_OLD or pure closure (depending on
1277 compatibility). This CL_NETLINK is for our one and only
1278 client. Our cl.apply function is NULL. */
1279 if (st->ptp) {
1280 closure_t *cl;
1281 cl=netlink_inst_create(st,loc,dict);
1282 st->cl=*cl;
1283 }
1284 return netlink_dev_incoming;
2fe58dfd
SE
1285}
1286
9d3a4132 1287/* No connection to the kernel at all... */
2fe58dfd 1288
9d3a4132 1289struct null {
4efd681a 1290 struct netlink nl;
4efd681a 1291};
2fe58dfd 1292
d3fe100d 1293static bool_t null_set_route(void *sst, struct netlink_client *routes)
4efd681a 1294{
9d3a4132 1295 struct null *st=sst;
d3fe100d
SE
1296
1297 if (routes->up!=routes->kup) {
1298 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1299 st->nl.name,routes->name,
1300 routes->up?"up":"down");
1301 routes->kup=routes->up;
9d3a4132 1302 return True;
2fe58dfd 1303 }
9d3a4132 1304 return False;
2fe58dfd 1305}
9d3a4132 1306
469fd1d9 1307static void null_deliver(void *sst, struct buffer_if *buf)
2fe58dfd
SE
1308{
1309 return;
1310}
1311
1312static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1313 list_t *args)
1314{
1315 struct null *st;
4efd681a
SE
1316 item_t *item;
1317 dict_t *dict;
2fe58dfd 1318
4efd681a 1319 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1320
4efd681a
SE
1321 item=list_elem(args,0);
1322 if (!item || item->type!=t_dict)
1323 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1324
1325 dict=item->data.dict;
1326
9d3a4132
SE
1327 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1328 null_deliver);
4efd681a
SE
1329
1330 return new_closure(&st->nl.cl);
2fe58dfd
SE
1331}
1332
2fe58dfd
SE
1333void netlink_module(dict_t *dict)
1334{
4efd681a 1335 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1336}