changelog: mention hippotat
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
c215a4bc
IJ
3/*
4 * This file is part of secnet.
5 * See README for full list of copyright holders.
6 *
7 * secnet is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version d of the License, or
10 * (at your option) any later version.
11 *
12 * secnet is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * version 3 along with secnet; if not, see
19 * https://www.gnu.org/licenses/gpl.html.
20 */
21
ff05a229 22/* See RFCs 791, 792, 1123 and 1812 */
2fe58dfd 23
ff05a229
SE
24/* The netlink device is actually a router. Tunnels are unnumbered
25 point-to-point lines (RFC1812 section 2.2.7); the router has a
26 single address (the 'router-id'). */
27
28/* This is where we currently have the anti-spoofing paranoia - before
29 sending a packet to the kernel we check that the tunnel it came
30 over could reasonably have produced it. */
31
32
33/* Points to note from RFC1812 (which may require changes in this
34 file):
35
363.3.4 Maximum Transmission Unit - MTU
37
38 The MTU of each logical interface MUST be configurable within the
39 range of legal MTUs for the interface.
40
41 Many Link Layer protocols define a maximum frame size that may be
42 sent. In such cases, a router MUST NOT allow an MTU to be set which
43 would allow sending of frames larger than those allowed by the Link
44 Layer protocol. However, a router SHOULD be willing to receive a
45 packet as large as the maximum frame size even if that is larger than
46 the MTU.
47
484.2.1 A router SHOULD count datagrams discarded.
49
504.2.2.1 Source route options - we probably should implement processing
51of source routes, even though mostly the security policy will prevent
52their use.
53
545.3.13.4 Source Route Options
55
56 A router MUST implement support for source route options in forwarded
57 packets. A router MAY implement a configuration option that, when
58 enabled, causes all source-routed packets to be discarded. However,
59 such an option MUST NOT be enabled by default.
60
615.3.13.5 Record Route Option
62
63 Routers MUST support the Record Route option in forwarded packets.
64
65 A router MAY provide a configuration option that, if enabled, will
66 cause the router to ignore (i.e., pass through unchanged) Record
67 Route options in forwarded packets. If provided, such an option MUST
68 default to enabling the record-route. This option should not affect
69 the processing of Record Route options in datagrams received by the
70 router itself (in particular, Record Route options in ICMP echo
71 requests will still be processed according to Section [4.3.3.6]).
72
735.3.13.6 Timestamp Option
74
75 Routers MUST support the timestamp option in forwarded packets. A
76 timestamp value MUST follow the rules given [INTRO:2].
77
78 If the flags field = 3 (timestamp and prespecified address), the
79 router MUST add its timestamp if the next prespecified address
80 matches any of the router's IP addresses. It is not necessary that
81 the prespecified address be either the address of the interface on
82 which the packet arrived or the address of the interface over which
83 it will be sent.
84
85
864.2.2.7 Fragmentation: RFC 791 Section 3.2
87
88 Fragmentation, as described in [INTERNET:1], MUST be supported by a
89 router.
90
914.2.2.8 Reassembly: RFC 791 Section 3.2
92
93 As specified in the corresponding section of [INTRO:2], a router MUST
94 support reassembly of datagrams that it delivers to itself.
95
964.2.2.9 Time to Live: RFC 791 Section 3.2
97
98 Note in particular that a router MUST NOT check the TTL of a packet
99 except when forwarding it.
100
101 A router MUST NOT discard a datagram just because it was received
102 with TTL equal to zero or one; if it is to the router and otherwise
103 valid, the router MUST attempt to receive it.
104
105 On messages the router originates, the IP layer MUST provide a means
106 for the transport layer to set the TTL field of every datagram that
107 is sent. When a fixed TTL value is used, it MUST be configurable.
108
109
1108.1 The Simple Network Management Protocol - SNMP
1118.1.1 SNMP Protocol Elements
112
113 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
114 using UDP/IP as its transport and network protocols.
115
116
117*/
2fe58dfd 118
3b83c932 119#include <string.h>
59230b9b
IJ
120#include <assert.h>
121#include <limits.h>
8689b3a9 122#include "secnet.h"
2fe58dfd 123#include "util.h"
7138d0c5 124#include "ipaddr.h"
9d3a4132 125#include "netlink.h"
042a8da9 126#include "process.h"
2fe58dfd 127
a0b107b8
IJ
128#ifdef NETLINK_DEBUG
129#define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
130#else /* !NETLINK_DEBUG */
131#define MDEBUG(...) ((void)0)
132#endif /* !NETLINK_DEBUG */
133
ff05a229
SE
134#define ICMP_TYPE_ECHO_REPLY 0
135
136#define ICMP_TYPE_UNREACHABLE 3
137#define ICMP_CODE_NET_UNREACHABLE 0
138#define ICMP_CODE_PROTOCOL_UNREACHABLE 2
139#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
140#define ICMP_CODE_NET_PROHIBITED 13
141
142#define ICMP_TYPE_ECHO_REQUEST 8
143
144#define ICMP_TYPE_TIME_EXCEEDED 11
145#define ICMP_CODE_TTL_EXCEEDED 0
146
4efd681a 147/* Generic IP checksum routine */
211cd627 148static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
2fe58dfd 149{
4efd681a
SE
150 register uint32_t sum=0;
151
152 while (count>1) {
153 sum+=ntohs(*(uint16_t *)iph);
154 iph+=2;
155 count-=2;
156 }
157 if(count>0)
158 sum+=*(uint8_t *)iph;
159 while (sum>>16)
160 sum=(sum&0xffff)+(sum>>16);
161 return htons(~sum);
2fe58dfd
SE
162}
163
4efd681a
SE
164#ifdef i386
165/*
166 * This is a version of ip_compute_csum() optimized for IP headers,
167 * which always checksum on 4 octet boundaries.
168 *
169 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
170 * Arnt Gulbrandsen.
171 */
211cd627 172static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
4efd681a
SE
173 uint32_t sum;
174
20d324b6
SE
175 __asm__ __volatile__(
176 "movl (%1), %0 ;\n"
177 "subl $4, %2 ;\n"
178 "jbe 2f ;\n"
179 "addl 4(%1), %0 ;\n"
180 "adcl 8(%1), %0 ;\n"
181 "adcl 12(%1), %0 ;\n"
182"1: adcl 16(%1), %0 ;\n"
183 "lea 4(%1), %1 ;\n"
184 "decl %2 ;\n"
185 "jne 1b ;\n"
186 "adcl $0, %0 ;\n"
187 "movl %0, %2 ;\n"
188 "shrl $16, %0 ;\n"
189 "addw %w2, %w0 ;\n"
190 "adcl $0, %0 ;\n"
191 "notl %0 ;\n"
192"2: ;\n"
4efd681a
SE
193 /* Since the input registers which are loaded with iph and ipl
194 are modified, we must also specify them as outputs, or gcc
195 will assume they contain their original values. */
196 : "=r" (sum), "=r" (iph), "=r" (ihl)
20d324b6
SE
197 : "1" (iph), "2" (ihl)
198 : "memory");
4efd681a
SE
199 return sum;
200}
201#else
fe21ce55 202static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl)
2fe58dfd 203{
1caa23ff 204 assert(ihl < INT_MAX/4);
4efd681a
SE
205 return ip_csum(iph,ihl*4);
206}
207#endif
208
209struct iphdr {
210#if defined (WORDS_BIGENDIAN)
211 uint8_t version:4,
212 ihl:4;
213#else
214 uint8_t ihl:4,
215 version:4;
216#endif
217 uint8_t tos;
218 uint16_t tot_len;
219 uint16_t id;
a6768d7c 220 uint16_t frag;
eff13010
IJ
221#define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
222#define IPHDR_FRAG_MORE ((uint16_t)0x2000)
223#define IPHDR_FRAG_DONT ((uint16_t)0x4000)
224/* reserved 0x8000 */
4efd681a
SE
225 uint8_t ttl;
226 uint8_t protocol;
227 uint16_t check;
228 uint32_t saddr;
229 uint32_t daddr;
230 /* The options start here. */
231};
232
233struct icmphdr {
234 struct iphdr iph;
235 uint8_t type;
236 uint8_t code;
237 uint16_t check;
cfd79482 238 union icmpinfofield {
4efd681a
SE
239 uint32_t unused;
240 struct {
241 uint8_t pointer;
242 uint8_t unused1;
243 uint16_t unused2;
244 } pprob;
245 uint32_t gwaddr;
246 struct {
247 uint16_t id;
248 uint16_t seq;
249 } echo;
f3d69e41
IJ
250 struct {
251 uint16_t unused;
252 uint16_t mtu;
253 } fragneeded;
4efd681a
SE
254 } d;
255};
cfd79482
IJ
256
257static const union icmpinfofield icmp_noinfo;
4efd681a 258
826b47e9
IJ
259static void netlink_client_deliver(struct netlink *st,
260 struct netlink_client *client,
261 uint32_t source, uint32_t dest,
262 struct buffer_if *buf);
263static void netlink_host_deliver(struct netlink *st,
264 struct netlink_client *sender,
265 uint32_t source, uint32_t dest,
266 struct buffer_if *buf);
267
dbe11c20
IJ
268static const char *sender_name(struct netlink_client *sender /* or NULL */)
269{
270 return sender?sender->name:"(local)";
271}
272
70dc107b
SE
273static void netlink_packet_deliver(struct netlink *st,
274 struct netlink_client *client,
275 struct buffer_if *buf);
4efd681a 276
ff05a229
SE
277/* XXX RFC1812 4.3.2.5:
278 All other ICMP error messages (Destination Unreachable,
279 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
280 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
281 CONTROL). The IP Precedence value for these error messages MAY be
282 settable.
283 */
4efd681a 284static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
826b47e9
IJ
285 uint32_t source, uint32_t dest,
286 uint16_t len)
4efd681a
SE
287{
288 struct icmphdr *h;
289
290 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
3abd18e8 291 buffer_init(&st->icmp,calculate_max_start_pad());
4efd681a
SE
292 h=buf_append(&st->icmp,sizeof(*h));
293
294 h->iph.version=4;
295 h->iph.ihl=5;
296 h->iph.tos=0;
297 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
298 h->iph.id=0;
a6768d7c 299 h->iph.frag=0;
ff05a229 300 h->iph.ttl=255; /* XXX should be configurable */
4efd681a 301 h->iph.protocol=1;
826b47e9 302 h->iph.saddr=htonl(source);
4efd681a
SE
303 h->iph.daddr=htonl(dest);
304 h->iph.check=0;
305 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
306 h->check=0;
307 h->d.unused=0;
308
309 return h;
310}
311
312/* Fill in the ICMP checksum field correctly */
313static void netlink_icmp_csum(struct icmphdr *h)
314{
1caa23ff 315 int32_t len;
4efd681a
SE
316
317 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
318 h->check=0;
319 h->check=ip_csum(&h->type,len);
320}
321
322/* RFC1122:
323 * An ICMP error message MUST NOT be sent as the result of
324 * receiving:
325 *
326 * * an ICMP error message, or
327 *
328 * * a datagram destined to an IP broadcast or IP multicast
329 * address, or
330 *
331 * * a datagram sent as a link-layer broadcast, or
332 *
333 * * a non-initial fragment, or
334 *
335 * * a datagram whose source address does not define a single
336 * host -- e.g., a zero address, a loopback address, a
337 * broadcast address, a multicast address, or a Class E
338 * address.
339 */
340static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
341{
342 struct iphdr *iph;
8dea8d37 343 struct icmphdr *icmph;
4efd681a
SE
344 uint32_t source;
345
975820aa 346 if (buf->size < (int)sizeof(struct icmphdr)) return False;
4efd681a 347 iph=(struct iphdr *)buf->start;
8dea8d37
SE
348 icmph=(struct icmphdr *)buf->start;
349 if (iph->protocol==1) {
350 switch(icmph->type) {
686b7f1d
IJ
351 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
352 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
353 * Deprecated, reserved, unassigned and experimental
354 * options are treated as not safe to reply to.
355 */
356 case 0: /* Echo Reply */
357 case 8: /* Echo */
358 case 13: /* Timestamp */
359 case 14: /* Timestamp Reply */
360 return True;
361 default:
8dea8d37
SE
362 return False;
363 }
364 }
4efd681a 365 /* How do we spot broadcast destination addresses? */
a6768d7c 366 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
4efd681a
SE
367 source=ntohl(iph->saddr);
368 if (source==0) return False;
369 if ((source&0xff000000)==0x7f000000) return False;
370 /* How do we spot broadcast source addresses? */
371 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
372 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
373 return True;
374}
375
376/* How much of the original IP packet do we include in its ICMP
377 response? The header plus up to 64 bits. */
ff05a229
SE
378
379/* XXX TODO RFC1812:
3804.3.2.3 Original Message Header
381
382 Historically, every ICMP error message has included the Internet
383 header and at least the first 8 data bytes of the datagram that
384 triggered the error. This is no longer adequate, due to the use of
385 IP-in-IP tunneling and other technologies. Therefore, the ICMP
386 datagram SHOULD contain as much of the original datagram as possible
387 without the length of the ICMP datagram exceeding 576 bytes. The
388 returned IP header (and user data) MUST be identical to that which
389 was received, except that the router is not required to undo any
390 modifications to the IP header that are normally performed in
391 forwarding that were performed before the error was detected (e.g.,
392 decrementing the TTL, or updating options). Note that the
393 requirements of Section [4.3.3.5] supersede this requirement in some
394 cases (i.e., for a Parameter Problem message, if the problem is in a
395 modified field, the router must undo the modification). See Section
396 [4.3.3.5]).
397 */
4efd681a
SE
398static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
399{
975820aa 400 if (buf->size < (int)sizeof(struct iphdr)) return 0;
4efd681a
SE
401 struct iphdr *iph=(struct iphdr *)buf->start;
402 uint16_t hlen,plen;
403
404 hlen=iph->ihl*4;
405 /* We include the first 8 bytes of the packet data, provided they exist */
406 hlen+=8;
407 plen=ntohs(iph->tot_len);
4ea3eeb8 408 return MIN(hlen,plen);
4efd681a
SE
409}
410
70dc107b
SE
411/* client indicates where the packet we're constructing a response to
412 comes from. NULL indicates the host. */
ab62c3ed
IJ
413static void netlink_icmp_simple(struct netlink *st,
414 struct netlink_client *origsender,
415 struct buffer_if *buf,
cfd79482
IJ
416 uint8_t type, uint8_t code,
417 union icmpinfofield info)
4efd681a 418{
4efd681a
SE
419 struct icmphdr *h;
420 uint16_t len;
421
422 if (netlink_icmp_may_reply(buf)) {
975820aa 423 struct iphdr *iph=(struct iphdr *)buf->start;
826b47e9
IJ
424
425 uint32_t icmpdest = ntohl(iph->saddr);
426 uint32_t icmpsource;
427 const char *icmpsourcedebugprefix;
428 if (!st->ptp) {
429 icmpsource=st->secnet_address;
430 icmpsourcedebugprefix="";
431 } else if (origsender) {
432 /* was from peer, send reply as if from host */
433 icmpsource=st->local_address;
434 icmpsourcedebugprefix="L!";
435 } else {
436 /* was from host, send reply as if from peer */
437 icmpsource=st->secnet_address; /* actually, peer address */
438 icmpsourcedebugprefix="P!";
439 }
440 MDEBUG("%s: generating ICMP re %s[%s]->[%s]:"
441 " from %s%s type=%u code=%u\n",
442 st->name, sender_name(origsender),
443 ipaddr_to_string(ntohl(iph->saddr)),
444 ipaddr_to_string(ntohl(iph->daddr)),
445 icmpsourcedebugprefix,
446 ipaddr_to_string(icmpsource),
447 type, code);
448
4efd681a 449 len=netlink_icmp_reply_len(buf);
826b47e9 450 h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len);
cfd79482 451 h->type=type; h->code=code; h->d=info;
4f28e77e 452 BUF_ADD_BYTES(append,&st->icmp,buf->start,len);
4efd681a 453 netlink_icmp_csum(h);
826b47e9
IJ
454
455 if (!st->ptp) {
456 netlink_packet_deliver(st,NULL,&st->icmp);
457 } else if (origsender) {
458 netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp);
459 } else {
460 netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp);
461 }
4efd681a
SE
462 BUF_ASSERT_FREE(&st->icmp);
463 }
464}
465
466/*
467 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
468 * checksum.
ff05a229 469 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
4efd681a
SE
470 *
471 * Is the datagram acceptable?
472 *
473 * 1. Length at least the size of an ip header
474 * 2. Version of 4
475 * 3. Checksums correctly.
476 * 4. Doesn't have a bogus length
477 */
d714da29
IJ
478static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
479 char *errmsgbuf, int errmsgbuflen)
4efd681a 480{
d714da29
IJ
481#define BAD(...) do{ \
482 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
483 return False; \
484 }while(0)
485
975820aa 486 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
4efd681a 487 struct iphdr *iph=(struct iphdr *)buf->start;
1caa23ff 488 int32_t len;
4efd681a 489
d714da29
IJ
490 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
491 if (iph->version != 4) BAD("version %u",iph->version);
492 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
493 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
4efd681a
SE
494 len=ntohs(iph->tot_len);
495 /* There should be no padding */
d714da29
IJ
496 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
497 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
4efd681a
SE
498 /* XXX check that there's no source route specified */
499 return True;
d714da29
IJ
500
501#undef BAD
4efd681a
SE
502}
503
f3d69e41
IJ
504static const char *fragment_filter_header(uint8_t *base, long *hlp)
505{
506 const int fixedhl = sizeof(struct iphdr);
507 long hl = *hlp;
508 const uint8_t *ipend = base + hl;
509 uint8_t *op = base + fixedhl;
510 const uint8_t *ip = op;
511
512 while (ip < ipend) {
513 uint8_t opt = ip[0];
514 int remain = ipend - ip;
515 if (opt == 0x00) /* End of Options List */ break;
516 if (opt == 0x01) /* No Operation */ continue;
517 if (remain < 2) return "IPv4 options truncated at length";
518 int optlen = ip[1];
519 if (remain < optlen) return "IPv4 options truncated in option";
520 if (opt & 0x80) /* copy */ {
521 memmove(op, ip, optlen);
522 op += optlen;
523 }
524 ip += optlen;
525 }
526 while ((hl = (op - base)) & 0x3)
527 *op++ = 0x00 /* End of Option List */;
528 ((struct iphdr*)base)->ihl = hl >> 2;
529 *hlp = hl;
530
531 return 0;
532}
533
534/* Fragment or send ICMP Fragmentation Needed */
535static void netlink_maybe_fragment(struct netlink *st,
ab62c3ed 536 struct netlink_client *sender,
f3d69e41
IJ
537 netlink_deliver_fn *deliver,
538 void *deliver_dst,
539 const char *delivery_name,
540 int32_t mtu,
541 uint32_t source, uint32_t dest,
542 struct buffer_if *buf)
543{
544 struct iphdr *iph=(struct iphdr*)buf->start;
545 long hl = iph->ihl*4;
546 const char *ssource = ipaddr_to_string(source);
547
548 if (buf->size <= mtu) {
549 deliver(deliver_dst, buf);
550 return;
551 }
552
553 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
554 st->name, ssource, delivery_name, buf->size);
555
556#define BADFRAG(m, ...) \
557 Message(M_WARNING, \
558 "%s: fragmenting packet from source %s" \
559 " for transmission via %s: " m "\n", \
560 st->name, ssource, delivery_name, \
561 ## __VA_ARGS__);
562
563 unsigned orig_frag = ntohs(iph->frag);
564
565 if (orig_frag&IPHDR_FRAG_DONT) {
566 union icmpinfofield info =
567 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
ab62c3ed 568 netlink_icmp_simple(st,sender,buf,
f3d69e41
IJ
569 ICMP_TYPE_UNREACHABLE,
570 ICMP_CODE_FRAGMENTATION_REQUIRED,
571 info);
572 BUF_FREE(buf);
573 return;
574 }
575 if (mtu < hl + 8) {
576 BADFRAG("mtu %"PRId32" too small", mtu);
577 BUF_FREE(buf);
578 return;
579 }
580
581 /* we (ab)use the icmp buffer to stash the original packet */
582 struct buffer_if *orig = &st->icmp;
583 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
584 buffer_copy(orig,buf);
585 BUF_FREE(buf);
586
587 const uint8_t *startindata = orig->start + hl;
588 const uint8_t *indata = startindata;
589 const uint8_t *endindata = orig->start + orig->size;
590 _Bool filtered = 0;
591
592 for (;;) {
593 /* compute our fragment offset */
594 long dataoffset = indata - startindata
595 + (orig_frag & IPHDR_FRAG_OFF)*8;
596 assert(!(dataoffset & 7));
597 if (dataoffset > IPHDR_FRAG_OFF*8) {
598 BADFRAG("ultimate fragment offset out of range");
599 break;
600 }
601
602 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
603 buffer_init(buf,calculate_max_start_pad());
604
605 /* copy header (possibly filtered); will adjust in a bit */
606 struct iphdr *fragh = buf_append(buf, hl);
607 memcpy(fragh, orig->start, hl);
608
609 /* decide how much payload to copy and copy it */
610 long avail = mtu - hl;
611 long remain = endindata - indata;
612 long use = avail < remain ? (avail & ~(long)7) : remain;
4f28e77e 613 BUF_ADD_BYTES(append, buf, indata, use);
f3d69e41
IJ
614 indata += use;
615
616 _Bool last_frag = indata >= endindata;
617
618 /* adjust the header */
619 fragh->tot_len = htons(buf->size);
620 fragh->frag =
621 htons((orig_frag & ~IPHDR_FRAG_OFF) |
622 (last_frag ? 0 : IPHDR_FRAG_MORE) |
623 (dataoffset >> 3));
624 fragh->check = 0;
625 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
626
627 /* actually send it */
628 deliver(deliver_dst, buf);
629 if (last_frag)
630 break;
631
632 /* after copying the header for the first frag,
633 * we filter the header for the remaining frags */
634 if (!filtered++) {
635 const char *bad = fragment_filter_header(orig->start, &hl);
636 if (bad) { BADFRAG("%s", bad); break; }
637 }
638 }
639
640 BUF_FREE(orig);
641
642#undef BADFRAG
643}
644
7b6abafa 645/* Deliver a packet _to_ client; used after we have decided
55bc97e6
IJ
646 * what to do with it (and just to check that the client has
647 * actually registered a delivery function with us). */
7b6abafa
IJ
648static void netlink_client_deliver(struct netlink *st,
649 struct netlink_client *client,
650 uint32_t source, uint32_t dest,
651 struct buffer_if *buf)
652{
55bc97e6
IJ
653 if (!client->deliver) {
654 string_t s,d;
655 s=ipaddr_to_string(source);
656 d=ipaddr_to_string(dest);
657 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
658 st->name,s,d);
55bc97e6
IJ
659 BUF_FREE(buf);
660 return;
661 }
ab62c3ed 662 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
f3d69e41 663 client->mtu, source,dest,buf);
7b6abafa
IJ
664 client->outcount++;
665}
666
f928f069
IJ
667/* Deliver a packet to the host; used after we have decided that that
668 * is what to do with it. */
669static void netlink_host_deliver(struct netlink *st,
ab62c3ed 670 struct netlink_client *sender,
f928f069
IJ
671 uint32_t source, uint32_t dest,
672 struct buffer_if *buf)
673{
ab62c3ed 674 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
f3d69e41 675 st->mtu, source,dest,buf);
f928f069
IJ
676 st->outcount++;
677}
678
f2b711bd 679/* Deliver a packet. "sender"==NULL for packets from the host and packets
d3fe100d 680 generated internally in secnet. */
70dc107b 681static void netlink_packet_deliver(struct netlink *st,
f2b711bd 682 struct netlink_client *sender,
70dc107b 683 struct buffer_if *buf)
4efd681a 684{
975820aa
IJ
685 if (buf->size < (int)sizeof(struct iphdr)) {
686 Message(M_ERR,"%s: trying to deliver a too-short packet"
dbe11c20 687 " from %s!\n",st->name, sender_name(sender));
975820aa
IJ
688 BUF_FREE(buf);
689 return;
690 }
691
4efd681a
SE
692 struct iphdr *iph=(struct iphdr *)buf->start;
693 uint32_t dest=ntohl(iph->daddr);
70dc107b
SE
694 uint32_t source=ntohl(iph->saddr);
695 uint32_t best_quality;
469fd1d9
SE
696 bool_t allow_route=False;
697 bool_t found_allowed=False;
70dc107b
SE
698 int best_match;
699 int i;
2fe58dfd 700
4efd681a 701 BUF_ASSERT_USED(buf);
2fe58dfd 702
4efd681a 703 if (dest==st->secnet_address) {
4f5e39ec 704 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
4efd681a 705 BUF_FREE(buf);
2fe58dfd
SE
706 return;
707 }
4efd681a 708
f2b711bd 709 /* Packets from the host (sender==NULL) may always be routed. Packets
469fd1d9 710 from clients with the allow_route option will also be routed. */
f2b711bd 711 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
469fd1d9
SE
712 allow_route=True;
713
714 /* If !allow_route, we check the routing table anyway, and if
715 there's a suitable route with OPT_ALLOWROUTE set we use it. If
716 there's a suitable route, but none with OPT_ALLOWROUTE set then
717 we generate ICMP 'communication with destination network
718 administratively prohibited'. */
719
720 best_quality=0;
721 best_match=-1;
d3fe100d
SE
722 for (i=0; i<st->n_clients; i++) {
723 if (st->routes[i]->up &&
724 ipset_contains_addr(st->routes[i]->networks,dest)) {
469fd1d9
SE
725 /* It's an available route to the correct destination. But is
726 it better than the one we already have? */
727
728 /* If we have already found an allowed route then we don't
729 bother looking at routes we're not allowed to use. If
730 we don't yet have an allowed route we'll consider any. */
731 if (!allow_route && found_allowed) {
d3fe100d 732 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
70dc107b 733 }
469fd1d9 734
d3fe100d 735 if (st->routes[i]->link_quality>best_quality
469fd1d9 736 || best_quality==0) {
d3fe100d 737 best_quality=st->routes[i]->link_quality;
469fd1d9 738 best_match=i;
d3fe100d 739 if (st->routes[i]->options&OPT_ALLOWROUTE)
469fd1d9
SE
740 found_allowed=True;
741 /* If quality isn't perfect we may wish to
742 consider kicking the tunnel with a 0-length
743 packet to prompt it to perform a key setup.
744 Then it'll eventually decide it's up or
745 down. */
746 /* If quality is perfect and we're allowed to use the
747 route we don't need to search any more. */
748 if (best_quality>=MAXIMUM_LINK_QUALITY &&
749 (allow_route || found_allowed)) break;
4efd681a 750 }
70dc107b 751 }
469fd1d9
SE
752 }
753 if (best_match==-1) {
754 /* The packet's not going down a tunnel. It might (ought to)
755 be for the host. */
794f2398 756 if (ipset_contains_addr(st->networks,dest)) {
ab62c3ed 757 netlink_host_deliver(st,sender,source,dest,buf);
70dc107b
SE
758 BUF_ASSERT_FREE(buf);
759 } else {
469fd1d9
SE
760 string_t s,d;
761 s=ipaddr_to_string(source);
762 d=ipaddr_to_string(dest);
ff05a229 763 Message(M_DEBUG,"%s: don't know where to deliver packet "
469fd1d9 764 "(s=%s, d=%s)\n", st->name, s, d);
ab62c3ed 765 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 766 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
70dc107b 767 BUF_FREE(buf);
2fe58dfd 768 }
469fd1d9
SE
769 } else {
770 if (!allow_route &&
d3fe100d 771 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
469fd1d9
SE
772 string_t s,d;
773 s=ipaddr_to_string(source);
774 d=ipaddr_to_string(dest);
775 /* We have a usable route but aren't allowed to use it.
776 Generate ICMP destination unreachable: communication
777 with destination network administratively prohibited */
778 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
779 st->name,s,d);
469fd1d9 780
ab62c3ed 781 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 782 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
469fd1d9 783 BUF_FREE(buf);
469fd1d9 784 } else {
ea7ec970 785 if (best_quality>0) {
7b6abafa
IJ
786 netlink_client_deliver(st,st->routes[best_match],
787 source,dest,buf);
ea7ec970
SE
788 BUF_ASSERT_FREE(buf);
789 } else {
790 /* Generate ICMP destination unreachable */
ab62c3ed 791 netlink_icmp_simple(st,sender,buf,
cfd79482
IJ
792 ICMP_TYPE_UNREACHABLE,
793 ICMP_CODE_NET_UNREACHABLE,
794 icmp_noinfo);
ea7ec970
SE
795 BUF_FREE(buf);
796 }
469fd1d9 797 }
2fe58dfd 798 }
70dc107b 799 BUF_ASSERT_FREE(buf);
4efd681a
SE
800}
801
70dc107b 802static void netlink_packet_forward(struct netlink *st,
f2b711bd 803 struct netlink_client *sender,
70dc107b 804 struct buffer_if *buf)
4efd681a 805{
975820aa 806 if (buf->size < (int)sizeof(struct iphdr)) return;
4efd681a
SE
807 struct iphdr *iph=(struct iphdr *)buf->start;
808
809 BUF_ASSERT_USED(buf);
810
811 /* Packet has already been checked */
812 if (iph->ttl<=1) {
813 /* Generate ICMP time exceeded */
ab62c3ed 814 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
cfd79482 815 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
4efd681a
SE
816 BUF_FREE(buf);
817 return;
818 }
819 iph->ttl--;
820 iph->check=0;
821 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
822
f2b711bd 823 netlink_packet_deliver(st,sender,buf);
4efd681a
SE
824 BUF_ASSERT_FREE(buf);
825}
826
9d3a4132 827/* Deal with packets addressed explicitly to us */
70dc107b 828static void netlink_packet_local(struct netlink *st,
f2b711bd 829 struct netlink_client *sender,
70dc107b 830 struct buffer_if *buf)
4efd681a
SE
831{
832 struct icmphdr *h;
833
469fd1d9
SE
834 st->localcount++;
835
975820aa
IJ
836 if (buf->size < (int)sizeof(struct icmphdr)) {
837 Message(M_WARNING,"%s: short packet addressed to secnet; "
838 "ignoring it\n",st->name);
839 BUF_FREE(buf);
840 return;
841 }
4efd681a
SE
842 h=(struct icmphdr *)buf->start;
843
6e3fd952
IJ
844 unsigned fraginfo = ntohs(h->iph.frag);
845 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
846 if (!(fraginfo & IPHDR_FRAG_OFF))
847 /* report only for first fragment */
848 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
849 "ignoring it\n",st->name);
4efd681a
SE
850 BUF_FREE(buf);
851 return;
852 }
853
854 if (h->iph.protocol==1) {
855 /* It's ICMP */
ff05a229 856 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
4efd681a
SE
857 /* ICMP echo-request. Special case: we re-use the buffer
858 to construct the reply. */
ff05a229 859 h->type=ICMP_TYPE_ECHO_REPLY;
4efd681a
SE
860 h->iph.daddr=h->iph.saddr;
861 h->iph.saddr=htonl(st->secnet_address);
ff05a229 862 h->iph.ttl=255;
4efd681a
SE
863 h->iph.check=0;
864 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
865 netlink_icmp_csum(h);
70dc107b 866 netlink_packet_deliver(st,NULL,buf);
4efd681a
SE
867 return;
868 }
869 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
870 } else {
871 /* Send ICMP protocol unreachable */
ab62c3ed 872 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 873 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
4efd681a
SE
874 BUF_FREE(buf);
875 return;
876 }
877
878 BUF_FREE(buf);
879}
880
9d3a4132
SE
881/* If cid==NULL packet is from host, otherwise cid specifies which tunnel
882 it came from. */
f2b711bd 883static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
469fd1d9 884 struct buffer_if *buf)
4efd681a 885{
4efd681a
SE
886 uint32_t source,dest;
887 struct iphdr *iph;
d714da29 888 char errmsgbuf[50];
f2b711bd 889 const char *sourcedesc=sender?sender->name:"host";
4efd681a
SE
890
891 BUF_ASSERT_USED(buf);
a28d65a5 892
d714da29
IJ
893 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
894 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
a28d65a5 895 st->name,sourcedesc,
d714da29 896 errmsgbuf);
4efd681a
SE
897 BUF_FREE(buf);
898 return;
899 }
e8b1adac 900 assert(buf->size >= (int)sizeof(struct iphdr));
4efd681a
SE
901 iph=(struct iphdr *)buf->start;
902
903 source=ntohl(iph->saddr);
904 dest=ntohl(iph->daddr);
905
d3fe100d
SE
906 /* Check source. If we don't like the source, there's no point
907 generating ICMP because we won't know how to get it to the
908 source of the packet. */
f2b711bd 909 if (sender) {
c6f79b17
SE
910 /* Check that the packet source is appropriate for the tunnel
911 it came down */
f2b711bd 912 if (!ipset_contains_addr(sender->networks,source)) {
9d3a4132
SE
913 string_t s,d;
914 s=ipaddr_to_string(source);
915 d=ipaddr_to_string(dest);
916 Message(M_WARNING,"%s: packet from tunnel %s with bad "
f2b711bd 917 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
9d3a4132
SE
918 BUF_FREE(buf);
919 return;
920 }
921 } else {
c6f79b17
SE
922 /* Check that the packet originates in our configured local
923 network, and hasn't been forwarded from elsewhere or
924 generated with the wrong source address */
794f2398 925 if (!ipset_contains_addr(st->networks,source)) {
9d3a4132
SE
926 string_t s,d;
927 s=ipaddr_to_string(source);
928 d=ipaddr_to_string(dest);
929 Message(M_WARNING,"%s: outgoing packet with bad source address "
930 "(s=%s,d=%s)\n",st->name,s,d);
9d3a4132
SE
931 BUF_FREE(buf);
932 return;
933 }
4efd681a 934 }
c6f79b17 935
794f2398
SE
936 /* If this is a point-to-point device we don't examine the
937 destination address at all; we blindly send it down our
938 one-and-only registered tunnel, or to the host, depending on
d3fe100d
SE
939 where it came from. It's up to external software to check
940 address validity and generate ICMP, etc. */
c6f79b17 941 if (st->ptp) {
f2b711bd 942 if (sender) {
ab62c3ed 943 netlink_host_deliver(st,sender,source,dest,buf);
c6f79b17 944 } else {
7b6abafa 945 netlink_client_deliver(st,st->clients,source,dest,buf);
c6f79b17
SE
946 }
947 BUF_ASSERT_FREE(buf);
948 return;
949 }
950
d3fe100d
SE
951 /* st->secnet_address needs checking before matching destination
952 addresses */
2fe58dfd 953 if (dest==st->secnet_address) {
f2b711bd 954 netlink_packet_local(st,sender,buf);
4efd681a 955 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
956 return;
957 }
f2b711bd 958 netlink_packet_forward(st,sender,buf);
4efd681a
SE
959 BUF_ASSERT_FREE(buf);
960}
961
469fd1d9
SE
962static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
963{
964 struct netlink_client *c=sst;
965 struct netlink *st=c->nst;
966
967 netlink_incoming(st,c,buf);
968}
969
970static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
971{
972 struct netlink *st=sst;
973
974 netlink_incoming(st,NULL,buf);
975}
976
d3fe100d 977static void netlink_set_quality(void *sst, uint32_t quality)
4efd681a 978{
d3fe100d
SE
979 struct netlink_client *c=sst;
980 struct netlink *st=c->nst;
4efd681a 981
d3fe100d
SE
982 c->link_quality=quality;
983 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
984 if (c->options&OPT_SOFTROUTE) {
985 st->set_routes(st->dst,c);
4efd681a 986 }
4efd681a
SE
987}
988
d3fe100d
SE
989static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
990 struct subnet_list *snets)
4efd681a 991{
1caa23ff 992 int32_t i;
d3fe100d 993 string_t net;
4efd681a 994
d3fe100d
SE
995 for (i=0; i<snets->entries; i++) {
996 net=subnet_to_string(snets->list[i]);
997 Message(loglevel,"%s ",net);
9d3a4132 998 }
4efd681a
SE
999}
1000
042a8da9 1001static void netlink_dump_routes(struct netlink *st, bool_t requested)
9d3a4132
SE
1002{
1003 int i;
1004 string_t net;
042a8da9 1005 uint32_t c=M_INFO;
9d3a4132 1006
042a8da9 1007 if (requested) c=M_WARNING;
469fd1d9
SE
1008 if (st->ptp) {
1009 net=ipaddr_to_string(st->secnet_address);
34d3bf4c 1010 Message(c,"%s: point-to-point (remote end is %s); routes: ",
469fd1d9 1011 st->name, net);
d3fe100d 1012 netlink_output_subnets(st,c,st->clients->subnets);
469fd1d9
SE
1013 Message(c,"\n");
1014 } else {
1015 Message(c,"%s: routing table:\n",st->name);
d3fe100d
SE
1016 for (i=0; i<st->n_clients; i++) {
1017 netlink_output_subnets(st,c,st->routes[i]->subnets);
ff05a229 1018 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
ea7ec970 1019 "quality %d,use %d,pri %lu)\n",
d3fe100d 1020 st->routes[i]->name,
ff05a229
SE
1021 st->routes[i]->up?"up":"down",
1022 st->routes[i]->mtu,
d3fe100d
SE
1023 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
1024 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
d3fe100d 1025 st->routes[i]->link_quality,
ea7ec970
SE
1026 st->routes[i]->outcount,
1027 (unsigned long)st->routes[i]->priority);
469fd1d9
SE
1028 }
1029 net=ipaddr_to_string(st->secnet_address);
1030 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
1031 net,st->name,st->localcount);
794f2398
SE
1032 for (i=0; i<st->subnets->entries; i++) {
1033 net=subnet_to_string(st->subnets->list[i]);
1034 Message(c,"%s ",net);
469fd1d9 1035 }
794f2398
SE
1036 if (i>0)
1037 Message(c,"-> host (use %d)\n",st->outcount);
9d3a4132
SE
1038 }
1039}
1040
d3fe100d
SE
1041/* ap is a pointer to a member of the routes array */
1042static int netlink_compare_client_priority(const void *ap, const void *bp)
70dc107b 1043{
d3fe100d
SE
1044 const struct netlink_client *const*a=ap;
1045 const struct netlink_client *const*b=bp;
70dc107b 1046
d3fe100d
SE
1047 if ((*a)->priority==(*b)->priority) return 0;
1048 if ((*a)->priority<(*b)->priority) return 1;
70dc107b
SE
1049 return -1;
1050}
1051
1052static void netlink_phase_hook(void *sst, uint32_t new_phase)
1053{
1054 struct netlink *st=sst;
1055 struct netlink_client *c;
1caa23ff 1056 int32_t i;
70dc107b
SE
1057
1058 /* All the networks serviced by the various tunnels should now
1059 * have been registered. We build a routing table by sorting the
d3fe100d 1060 * clients by priority. */
b4ececfc 1061 NEW_ARY(st->routes,st->n_clients);
70dc107b
SE
1062 /* Fill the table */
1063 i=0;
59230b9b
IJ
1064 for (c=st->clients; c; c=c->next) {
1065 assert(i<INT_MAX);
d3fe100d 1066 st->routes[i++]=c;
59230b9b 1067 }
d3fe100d
SE
1068 /* Sort the table in descending order of priority */
1069 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1070 netlink_compare_client_priority);
9d3a4132 1071
042a8da9
SE
1072 netlink_dump_routes(st,False);
1073}
1074
1075static void netlink_signal_handler(void *sst, int signum)
1076{
1077 struct netlink *st=sst;
1078 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1079 netlink_dump_routes(st,True);
70dc107b
SE
1080}
1081
1caa23ff 1082static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
d3fe100d
SE
1083{
1084 struct netlink_client *c=sst;
1085
1086 c->mtu=new_mtu;
1087}
1088
469fd1d9 1089static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1c085348 1090 void *dst, uint32_t *localmtu_r)
469fd1d9
SE
1091{
1092 struct netlink_client *c=sst;
1c085348 1093 struct netlink *st=c->nst;
469fd1d9 1094
469fd1d9
SE
1095 c->deliver=deliver;
1096 c->dst=dst;
1c085348
IJ
1097
1098 if (localmtu_r)
1099 *localmtu_r=st->mtu;
469fd1d9
SE
1100}
1101
1102static struct flagstr netlink_option_table[]={
1103 { "soft", OPT_SOFTROUTE },
1104 { "allow-route", OPT_ALLOWROUTE },
1105 { NULL, 0}
1106};
1107/* This is the routine that gets called when the closure that's
1108 returned by an invocation of a netlink device closure (eg. tun,
1109 userv-ipif) is invoked. It's used to create routes and pass in
1110 information about them; the closure it returns is used by site
1111 code. */
1112static closure_t *netlink_inst_create(struct netlink *st,
1113 struct cloc loc, dict_t *dict)
1114{
1115 struct netlink_client *c;
1116 string_t name;
794f2398 1117 struct ipset *networks;
1caa23ff
IJ
1118 uint32_t options,priority;
1119 int32_t mtu;
794f2398 1120 list_t *l;
469fd1d9
SE
1121
1122 name=dict_read_string(dict, "name", True, st->name, loc);
1123
794f2398
SE
1124 l=dict_lookup(dict,"routes");
1125 if (!l)
1126 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1127 networks=string_list_to_ipset(l,loc,st->name,"routes");
469fd1d9
SE
1128 options=string_list_to_word(dict_lookup(dict,"options"),
1129 netlink_option_table,st->name);
1130
d3fe100d
SE
1131 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1132 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1133
1134 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
469fd1d9
SE
1135 cfgfatal(loc,st->name,"this netlink device does not support "
1136 "soft routes.\n");
1137 return NULL;
1138 }
1139
1140 if (options&OPT_SOFTROUTE) {
1141 /* XXX for now we assume that soft routes require root privilege;
1142 this may not always be true. The device driver can tell us. */
1143 require_root_privileges=True;
1144 require_root_privileges_explanation="netlink: soft routes";
1145 if (st->ptp) {
1146 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1147 "soft routes.\n");
1148 return NULL;
1149 }
1150 }
1151
794f2398
SE
1152 /* Check that nets are a subset of st->remote_networks;
1153 refuse to register if they are not. */
1154 if (!ipset_is_subset(st->remote_networks,networks)) {
1155 cfgfatal(loc,st->name,"routes are not allowed\n");
469fd1d9
SE
1156 return NULL;
1157 }
1158
b7886fd4 1159 NEW(c);
469fd1d9
SE
1160 c->cl.description=name;
1161 c->cl.type=CL_NETLINK;
1162 c->cl.apply=NULL;
1163 c->cl.interface=&c->ops;
1164 c->ops.st=c;
1165 c->ops.reg=netlink_inst_reg;
1166 c->ops.deliver=netlink_inst_incoming;
1167 c->ops.set_quality=netlink_set_quality;
d3fe100d 1168 c->ops.set_mtu=netlink_inst_set_mtu;
469fd1d9
SE
1169 c->nst=st;
1170
1171 c->networks=networks;
794f2398 1172 c->subnets=ipset_to_subnet_list(networks);
d3fe100d 1173 c->priority=priority;
469fd1d9
SE
1174 c->deliver=NULL;
1175 c->dst=NULL;
1176 c->name=name;
f208b9a9 1177 c->link_quality=LINK_QUALITY_UNUSED;
d3fe100d
SE
1178 c->mtu=mtu?mtu:st->mtu;
1179 c->options=options;
1180 c->outcount=0;
1181 c->up=False;
1182 c->kup=False;
469fd1d9
SE
1183 c->next=st->clients;
1184 st->clients=c;
59230b9b 1185 assert(st->n_clients < INT_MAX);
d3fe100d 1186 st->n_clients++;
469fd1d9
SE
1187
1188 return &c->cl;
1189}
1190
1191static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1192 dict_t *context, list_t *args)
1193{
1194 struct netlink *st=self->interface;
1195
1196 dict_t *dict;
1197 item_t *item;
1198 closure_t *cl;
1199
469fd1d9
SE
1200 item=list_elem(args,0);
1201 if (!item || item->type!=t_dict) {
1202 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1203 }
1204 dict=item->data.dict;
1205
1206 cl=netlink_inst_create(st,loc,dict);
1207
1208 return new_closure(cl);
1209}
1210
9d3a4132
SE
1211netlink_deliver_fn *netlink_init(struct netlink *st,
1212 void *dst, struct cloc loc,
fe5e9cc4 1213 dict_t *dict, cstring_t description,
d3fe100d 1214 netlink_route_fn *set_routes,
9d3a4132 1215 netlink_deliver_fn *to_host)
4efd681a 1216{
c6f79b17 1217 item_t *sa, *ptpa;
794f2398 1218 list_t *l;
c6f79b17 1219
4efd681a
SE
1220 st->dst=dst;
1221 st->cl.description=description;
469fd1d9
SE
1222 st->cl.type=CL_PURE;
1223 st->cl.apply=netlink_inst_apply;
1224 st->cl.interface=st;
4efd681a 1225 st->clients=NULL;
d3fe100d
SE
1226 st->routes=NULL;
1227 st->n_clients=0;
1228 st->set_routes=set_routes;
4efd681a
SE
1229 st->deliver_to_host=to_host;
1230
794f2398 1231 st->name=dict_read_string(dict,"name",False,description,loc);
4efd681a 1232 if (!st->name) st->name=description;
794f2398
SE
1233 l=dict_lookup(dict,"networks");
1234 if (l)
1235 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1236 else {
4f5e39ec
SE
1237 struct ipset *empty;
1238 empty=ipset_new();
1239 st->networks=ipset_complement(empty);
1240 ipset_free(empty);
794f2398
SE
1241 }
1242 l=dict_lookup(dict,"remote-networks");
1243 if (l) {
1244 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1245 "remote-networks");
1246 } else {
1247 struct ipset *empty;
1248 empty=ipset_new();
1249 st->remote_networks=ipset_complement(empty);
1250 ipset_free(empty);
1251 }
091433c6
IJ
1252 st->local_address=string_item_to_ipaddr(
1253 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
794f2398 1254
c6f79b17 1255 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
469fd1d9 1256 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
c6f79b17
SE
1257 if (sa && ptpa) {
1258 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1259 "ptp-address in the same netlink device\n");
1260 }
1261 if (!(sa || ptpa)) {
1262 cfgfatal(loc,st->name,"you must specify secnet-address or "
1263 "ptp-address for this netlink device\n");
1264 }
1265 if (sa) {
794f2398 1266 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
c6f79b17
SE
1267 st->ptp=False;
1268 } else {
794f2398 1269 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
c6f79b17
SE
1270 st->ptp=True;
1271 }
d3fe100d
SE
1272 /* To be strictly correct we could subtract secnet_address from
1273 networks here. It shouldn't make any practical difference,
794f2398
SE
1274 though, and will make the route dump look complicated... */
1275 st->subnets=ipset_to_subnet_list(st->networks);
4efd681a 1276 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
f3d69e41 1277 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
469fd1d9
SE
1278 st->outcount=0;
1279 st->localcount=0;
70dc107b
SE
1280
1281 add_hook(PHASE_SETUP,netlink_phase_hook,st);
042a8da9 1282 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
4efd681a 1283
469fd1d9
SE
1284 /* If we're point-to-point then we return a CL_NETLINK directly,
1285 rather than a CL_NETLINK_OLD or pure closure (depending on
1286 compatibility). This CL_NETLINK is for our one and only
1287 client. Our cl.apply function is NULL. */
1288 if (st->ptp) {
1289 closure_t *cl;
1290 cl=netlink_inst_create(st,loc,dict);
1291 st->cl=*cl;
1292 }
1293 return netlink_dev_incoming;
2fe58dfd
SE
1294}
1295
9d3a4132 1296/* No connection to the kernel at all... */
2fe58dfd 1297
9d3a4132 1298struct null {
4efd681a 1299 struct netlink nl;
4efd681a 1300};
2fe58dfd 1301
d3fe100d 1302static bool_t null_set_route(void *sst, struct netlink_client *routes)
4efd681a 1303{
9d3a4132 1304 struct null *st=sst;
d3fe100d
SE
1305
1306 if (routes->up!=routes->kup) {
1307 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1308 st->nl.name,routes->name,
1309 routes->up?"up":"down");
1310 routes->kup=routes->up;
9d3a4132 1311 return True;
2fe58dfd 1312 }
9d3a4132 1313 return False;
2fe58dfd 1314}
9d3a4132 1315
469fd1d9 1316static void null_deliver(void *sst, struct buffer_if *buf)
2fe58dfd
SE
1317{
1318 return;
1319}
1320
1321static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1322 list_t *args)
1323{
1324 struct null *st;
4efd681a
SE
1325 item_t *item;
1326 dict_t *dict;
2fe58dfd 1327
b7886fd4 1328 NEW(st);
2fe58dfd 1329
4efd681a
SE
1330 item=list_elem(args,0);
1331 if (!item || item->type!=t_dict)
1332 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1333
1334 dict=item->data.dict;
1335
9d3a4132
SE
1336 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1337 null_deliver);
4efd681a
SE
1338
1339 return new_closure(&st->nl.cl);
2fe58dfd
SE
1340}
1341
2fe58dfd
SE
1342void netlink_module(dict_t *dict)
1343{
4efd681a 1344 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1345}