netlink: Plumb "sender" through to ICMP generation
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
ff05a229 3/* See RFCs 791, 792, 1123 and 1812 */
2fe58dfd 4
ff05a229
SE
5/* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9/* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14/* Points to note from RFC1812 (which may require changes in this
15 file):
16
173.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
294.2.1 A router SHOULD count datagrams discarded.
30
314.2.2.1 Source route options - we probably should implement processing
32of source routes, even though mostly the security policy will prevent
33their use.
34
355.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
425.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
545.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
674.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
724.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
774.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
918.1 The Simple Network Management Protocol - SNMP
928.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98*/
2fe58dfd 99
3b83c932 100#include <string.h>
59230b9b
IJ
101#include <assert.h>
102#include <limits.h>
8689b3a9 103#include "secnet.h"
2fe58dfd 104#include "util.h"
7138d0c5 105#include "ipaddr.h"
9d3a4132 106#include "netlink.h"
042a8da9 107#include "process.h"
2fe58dfd 108
a0b107b8
IJ
109#ifdef NETLINK_DEBUG
110#define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111#else /* !NETLINK_DEBUG */
112#define MDEBUG(...) ((void)0)
113#endif /* !NETLINK_DEBUG */
114
ff05a229
SE
115#define ICMP_TYPE_ECHO_REPLY 0
116
117#define ICMP_TYPE_UNREACHABLE 3
118#define ICMP_CODE_NET_UNREACHABLE 0
119#define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121#define ICMP_CODE_NET_PROHIBITED 13
122
123#define ICMP_TYPE_ECHO_REQUEST 8
124
125#define ICMP_TYPE_TIME_EXCEEDED 11
126#define ICMP_CODE_TTL_EXCEEDED 0
127
4efd681a 128/* Generic IP checksum routine */
211cd627 129static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
2fe58dfd 130{
4efd681a
SE
131 register uint32_t sum=0;
132
133 while (count>1) {
134 sum+=ntohs(*(uint16_t *)iph);
135 iph+=2;
136 count-=2;
137 }
138 if(count>0)
139 sum+=*(uint8_t *)iph;
140 while (sum>>16)
141 sum=(sum&0xffff)+(sum>>16);
142 return htons(~sum);
2fe58dfd
SE
143}
144
4efd681a
SE
145#ifdef i386
146/*
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
149 *
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
151 * Arnt Gulbrandsen.
152 */
211cd627 153static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
4efd681a
SE
154 uint32_t sum;
155
20d324b6
SE
156 __asm__ __volatile__(
157 "movl (%1), %0 ;\n"
158 "subl $4, %2 ;\n"
159 "jbe 2f ;\n"
160 "addl 4(%1), %0 ;\n"
161 "adcl 8(%1), %0 ;\n"
162 "adcl 12(%1), %0 ;\n"
163"1: adcl 16(%1), %0 ;\n"
164 "lea 4(%1), %1 ;\n"
165 "decl %2 ;\n"
166 "jne 1b ;\n"
167 "adcl $0, %0 ;\n"
168 "movl %0, %2 ;\n"
169 "shrl $16, %0 ;\n"
170 "addw %w2, %w0 ;\n"
171 "adcl $0, %0 ;\n"
172 "notl %0 ;\n"
173"2: ;\n"
4efd681a
SE
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
20d324b6
SE
178 : "1" (iph), "2" (ihl)
179 : "memory");
4efd681a
SE
180 return sum;
181}
182#else
1caa23ff 183static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
2fe58dfd 184{
1caa23ff 185 assert(ihl < INT_MAX/4);
4efd681a
SE
186 return ip_csum(iph,ihl*4);
187}
188#endif
189
190struct iphdr {
191#if defined (WORDS_BIGENDIAN)
192 uint8_t version:4,
193 ihl:4;
194#else
195 uint8_t ihl:4,
196 version:4;
197#endif
198 uint8_t tos;
199 uint16_t tot_len;
200 uint16_t id;
a6768d7c 201 uint16_t frag;
eff13010
IJ
202#define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203#define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204#define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205/* reserved 0x8000 */
4efd681a
SE
206 uint8_t ttl;
207 uint8_t protocol;
208 uint16_t check;
209 uint32_t saddr;
210 uint32_t daddr;
211 /* The options start here. */
212};
213
214struct icmphdr {
215 struct iphdr iph;
216 uint8_t type;
217 uint8_t code;
218 uint16_t check;
cfd79482 219 union icmpinfofield {
4efd681a
SE
220 uint32_t unused;
221 struct {
222 uint8_t pointer;
223 uint8_t unused1;
224 uint16_t unused2;
225 } pprob;
226 uint32_t gwaddr;
227 struct {
228 uint16_t id;
229 uint16_t seq;
230 } echo;
f3d69e41
IJ
231 struct {
232 uint16_t unused;
233 uint16_t mtu;
234 } fragneeded;
4efd681a
SE
235 } d;
236};
cfd79482
IJ
237
238static const union icmpinfofield icmp_noinfo;
4efd681a 239
dbe11c20
IJ
240static const char *sender_name(struct netlink_client *sender /* or NULL */)
241{
242 return sender?sender->name:"(local)";
243}
244
70dc107b
SE
245static void netlink_packet_deliver(struct netlink *st,
246 struct netlink_client *client,
247 struct buffer_if *buf);
4efd681a 248
ff05a229
SE
249/* XXX RFC1812 4.3.2.5:
250 All other ICMP error messages (Destination Unreachable,
251 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
252 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
253 CONTROL). The IP Precedence value for these error messages MAY be
254 settable.
255 */
4efd681a
SE
256static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
257 uint32_t dest,uint16_t len)
258{
259 struct icmphdr *h;
260
261 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
3abd18e8 262 buffer_init(&st->icmp,calculate_max_start_pad());
4efd681a
SE
263 h=buf_append(&st->icmp,sizeof(*h));
264
265 h->iph.version=4;
266 h->iph.ihl=5;
267 h->iph.tos=0;
268 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
269 h->iph.id=0;
a6768d7c 270 h->iph.frag=0;
ff05a229 271 h->iph.ttl=255; /* XXX should be configurable */
4efd681a
SE
272 h->iph.protocol=1;
273 h->iph.saddr=htonl(st->secnet_address);
274 h->iph.daddr=htonl(dest);
275 h->iph.check=0;
276 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
277 h->check=0;
278 h->d.unused=0;
279
280 return h;
281}
282
283/* Fill in the ICMP checksum field correctly */
284static void netlink_icmp_csum(struct icmphdr *h)
285{
1caa23ff 286 int32_t len;
4efd681a
SE
287
288 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
289 h->check=0;
290 h->check=ip_csum(&h->type,len);
291}
292
293/* RFC1122:
294 * An ICMP error message MUST NOT be sent as the result of
295 * receiving:
296 *
297 * * an ICMP error message, or
298 *
299 * * a datagram destined to an IP broadcast or IP multicast
300 * address, or
301 *
302 * * a datagram sent as a link-layer broadcast, or
303 *
304 * * a non-initial fragment, or
305 *
306 * * a datagram whose source address does not define a single
307 * host -- e.g., a zero address, a loopback address, a
308 * broadcast address, a multicast address, or a Class E
309 * address.
310 */
311static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
312{
313 struct iphdr *iph;
8dea8d37 314 struct icmphdr *icmph;
4efd681a
SE
315 uint32_t source;
316
975820aa 317 if (buf->size < (int)sizeof(struct icmphdr)) return False;
4efd681a 318 iph=(struct iphdr *)buf->start;
8dea8d37
SE
319 icmph=(struct icmphdr *)buf->start;
320 if (iph->protocol==1) {
321 switch(icmph->type) {
686b7f1d
IJ
322 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
323 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
324 * Deprecated, reserved, unassigned and experimental
325 * options are treated as not safe to reply to.
326 */
327 case 0: /* Echo Reply */
328 case 8: /* Echo */
329 case 13: /* Timestamp */
330 case 14: /* Timestamp Reply */
331 return True;
332 default:
8dea8d37
SE
333 return False;
334 }
335 }
4efd681a 336 /* How do we spot broadcast destination addresses? */
a6768d7c 337 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
4efd681a
SE
338 source=ntohl(iph->saddr);
339 if (source==0) return False;
340 if ((source&0xff000000)==0x7f000000) return False;
341 /* How do we spot broadcast source addresses? */
342 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
343 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
344 return True;
345}
346
347/* How much of the original IP packet do we include in its ICMP
348 response? The header plus up to 64 bits. */
ff05a229
SE
349
350/* XXX TODO RFC1812:
3514.3.2.3 Original Message Header
352
353 Historically, every ICMP error message has included the Internet
354 header and at least the first 8 data bytes of the datagram that
355 triggered the error. This is no longer adequate, due to the use of
356 IP-in-IP tunneling and other technologies. Therefore, the ICMP
357 datagram SHOULD contain as much of the original datagram as possible
358 without the length of the ICMP datagram exceeding 576 bytes. The
359 returned IP header (and user data) MUST be identical to that which
360 was received, except that the router is not required to undo any
361 modifications to the IP header that are normally performed in
362 forwarding that were performed before the error was detected (e.g.,
363 decrementing the TTL, or updating options). Note that the
364 requirements of Section [4.3.3.5] supersede this requirement in some
365 cases (i.e., for a Parameter Problem message, if the problem is in a
366 modified field, the router must undo the modification). See Section
367 [4.3.3.5]).
368 */
4efd681a
SE
369static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
370{
975820aa 371 if (buf->size < (int)sizeof(struct iphdr)) return 0;
4efd681a
SE
372 struct iphdr *iph=(struct iphdr *)buf->start;
373 uint16_t hlen,plen;
374
375 hlen=iph->ihl*4;
376 /* We include the first 8 bytes of the packet data, provided they exist */
377 hlen+=8;
378 plen=ntohs(iph->tot_len);
379 return (hlen>plen?plen:hlen);
380}
381
70dc107b
SE
382/* client indicates where the packet we're constructing a response to
383 comes from. NULL indicates the host. */
ab62c3ed
IJ
384static void netlink_icmp_simple(struct netlink *st,
385 struct netlink_client *origsender,
386 struct buffer_if *buf,
cfd79482
IJ
387 uint8_t type, uint8_t code,
388 union icmpinfofield info)
4efd681a 389{
4efd681a
SE
390 struct icmphdr *h;
391 uint16_t len;
392
393 if (netlink_icmp_may_reply(buf)) {
975820aa 394 struct iphdr *iph=(struct iphdr *)buf->start;
4efd681a
SE
395 len=netlink_icmp_reply_len(buf);
396 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
cfd79482 397 h->type=type; h->code=code; h->d=info;
4efd681a
SE
398 memcpy(buf_append(&st->icmp,len),buf->start,len);
399 netlink_icmp_csum(h);
70dc107b 400 netlink_packet_deliver(st,NULL,&st->icmp);
4efd681a
SE
401 BUF_ASSERT_FREE(&st->icmp);
402 }
403}
404
405/*
406 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
407 * checksum.
ff05a229 408 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
4efd681a
SE
409 *
410 * Is the datagram acceptable?
411 *
412 * 1. Length at least the size of an ip header
413 * 2. Version of 4
414 * 3. Checksums correctly.
415 * 4. Doesn't have a bogus length
416 */
d714da29
IJ
417static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
418 char *errmsgbuf, int errmsgbuflen)
4efd681a 419{
d714da29
IJ
420#define BAD(...) do{ \
421 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
422 return False; \
423 }while(0)
424
975820aa 425 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
4efd681a 426 struct iphdr *iph=(struct iphdr *)buf->start;
1caa23ff 427 int32_t len;
4efd681a 428
d714da29
IJ
429 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
430 if (iph->version != 4) BAD("version %u",iph->version);
431 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
432 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
4efd681a
SE
433 len=ntohs(iph->tot_len);
434 /* There should be no padding */
d714da29
IJ
435 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
436 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
4efd681a
SE
437 /* XXX check that there's no source route specified */
438 return True;
d714da29
IJ
439
440#undef BAD
4efd681a
SE
441}
442
f3d69e41
IJ
443static const char *fragment_filter_header(uint8_t *base, long *hlp)
444{
445 const int fixedhl = sizeof(struct iphdr);
446 long hl = *hlp;
447 const uint8_t *ipend = base + hl;
448 uint8_t *op = base + fixedhl;
449 const uint8_t *ip = op;
450
451 while (ip < ipend) {
452 uint8_t opt = ip[0];
453 int remain = ipend - ip;
454 if (opt == 0x00) /* End of Options List */ break;
455 if (opt == 0x01) /* No Operation */ continue;
456 if (remain < 2) return "IPv4 options truncated at length";
457 int optlen = ip[1];
458 if (remain < optlen) return "IPv4 options truncated in option";
459 if (opt & 0x80) /* copy */ {
460 memmove(op, ip, optlen);
461 op += optlen;
462 }
463 ip += optlen;
464 }
465 while ((hl = (op - base)) & 0x3)
466 *op++ = 0x00 /* End of Option List */;
467 ((struct iphdr*)base)->ihl = hl >> 2;
468 *hlp = hl;
469
470 return 0;
471}
472
473/* Fragment or send ICMP Fragmentation Needed */
474static void netlink_maybe_fragment(struct netlink *st,
ab62c3ed 475 struct netlink_client *sender,
f3d69e41
IJ
476 netlink_deliver_fn *deliver,
477 void *deliver_dst,
478 const char *delivery_name,
479 int32_t mtu,
480 uint32_t source, uint32_t dest,
481 struct buffer_if *buf)
482{
483 struct iphdr *iph=(struct iphdr*)buf->start;
484 long hl = iph->ihl*4;
485 const char *ssource = ipaddr_to_string(source);
486
487 if (buf->size <= mtu) {
488 deliver(deliver_dst, buf);
489 return;
490 }
491
492 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
493 st->name, ssource, delivery_name, buf->size);
494
495#define BADFRAG(m, ...) \
496 Message(M_WARNING, \
497 "%s: fragmenting packet from source %s" \
498 " for transmission via %s: " m "\n", \
499 st->name, ssource, delivery_name, \
500 ## __VA_ARGS__);
501
502 unsigned orig_frag = ntohs(iph->frag);
503
504 if (orig_frag&IPHDR_FRAG_DONT) {
505 union icmpinfofield info =
506 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
ab62c3ed 507 netlink_icmp_simple(st,sender,buf,
f3d69e41
IJ
508 ICMP_TYPE_UNREACHABLE,
509 ICMP_CODE_FRAGMENTATION_REQUIRED,
510 info);
511 BUF_FREE(buf);
512 return;
513 }
514 if (mtu < hl + 8) {
515 BADFRAG("mtu %"PRId32" too small", mtu);
516 BUF_FREE(buf);
517 return;
518 }
519
520 /* we (ab)use the icmp buffer to stash the original packet */
521 struct buffer_if *orig = &st->icmp;
522 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
523 buffer_copy(orig,buf);
524 BUF_FREE(buf);
525
526 const uint8_t *startindata = orig->start + hl;
527 const uint8_t *indata = startindata;
528 const uint8_t *endindata = orig->start + orig->size;
529 _Bool filtered = 0;
530
531 for (;;) {
532 /* compute our fragment offset */
533 long dataoffset = indata - startindata
534 + (orig_frag & IPHDR_FRAG_OFF)*8;
535 assert(!(dataoffset & 7));
536 if (dataoffset > IPHDR_FRAG_OFF*8) {
537 BADFRAG("ultimate fragment offset out of range");
538 break;
539 }
540
541 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
542 buffer_init(buf,calculate_max_start_pad());
543
544 /* copy header (possibly filtered); will adjust in a bit */
545 struct iphdr *fragh = buf_append(buf, hl);
546 memcpy(fragh, orig->start, hl);
547
548 /* decide how much payload to copy and copy it */
549 long avail = mtu - hl;
550 long remain = endindata - indata;
551 long use = avail < remain ? (avail & ~(long)7) : remain;
552 memcpy(buf_append(buf, use), indata, use);
553 indata += use;
554
555 _Bool last_frag = indata >= endindata;
556
557 /* adjust the header */
558 fragh->tot_len = htons(buf->size);
559 fragh->frag =
560 htons((orig_frag & ~IPHDR_FRAG_OFF) |
561 (last_frag ? 0 : IPHDR_FRAG_MORE) |
562 (dataoffset >> 3));
563 fragh->check = 0;
564 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
565
566 /* actually send it */
567 deliver(deliver_dst, buf);
568 if (last_frag)
569 break;
570
571 /* after copying the header for the first frag,
572 * we filter the header for the remaining frags */
573 if (!filtered++) {
574 const char *bad = fragment_filter_header(orig->start, &hl);
575 if (bad) { BADFRAG("%s", bad); break; }
576 }
577 }
578
579 BUF_FREE(orig);
580
581#undef BADFRAG
582}
583
7b6abafa 584/* Deliver a packet _to_ client; used after we have decided
55bc97e6
IJ
585 * what to do with it (and just to check that the client has
586 * actually registered a delivery function with us). */
7b6abafa
IJ
587static void netlink_client_deliver(struct netlink *st,
588 struct netlink_client *client,
589 uint32_t source, uint32_t dest,
590 struct buffer_if *buf)
591{
55bc97e6
IJ
592 if (!client->deliver) {
593 string_t s,d;
594 s=ipaddr_to_string(source);
595 d=ipaddr_to_string(dest);
596 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
597 st->name,s,d);
598 free(s); free(d);
599 BUF_FREE(buf);
600 return;
601 }
ab62c3ed 602 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
f3d69e41 603 client->mtu, source,dest,buf);
7b6abafa
IJ
604 client->outcount++;
605}
606
f928f069
IJ
607/* Deliver a packet to the host; used after we have decided that that
608 * is what to do with it. */
609static void netlink_host_deliver(struct netlink *st,
ab62c3ed 610 struct netlink_client *sender,
f928f069
IJ
611 uint32_t source, uint32_t dest,
612 struct buffer_if *buf)
613{
ab62c3ed 614 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
f3d69e41 615 st->mtu, source,dest,buf);
f928f069
IJ
616 st->outcount++;
617}
618
f2b711bd 619/* Deliver a packet. "sender"==NULL for packets from the host and packets
d3fe100d 620 generated internally in secnet. */
70dc107b 621static void netlink_packet_deliver(struct netlink *st,
f2b711bd 622 struct netlink_client *sender,
70dc107b 623 struct buffer_if *buf)
4efd681a 624{
975820aa
IJ
625 if (buf->size < (int)sizeof(struct iphdr)) {
626 Message(M_ERR,"%s: trying to deliver a too-short packet"
dbe11c20 627 " from %s!\n",st->name, sender_name(sender));
975820aa
IJ
628 BUF_FREE(buf);
629 return;
630 }
631
4efd681a
SE
632 struct iphdr *iph=(struct iphdr *)buf->start;
633 uint32_t dest=ntohl(iph->daddr);
70dc107b
SE
634 uint32_t source=ntohl(iph->saddr);
635 uint32_t best_quality;
469fd1d9
SE
636 bool_t allow_route=False;
637 bool_t found_allowed=False;
70dc107b
SE
638 int best_match;
639 int i;
2fe58dfd 640
4efd681a 641 BUF_ASSERT_USED(buf);
2fe58dfd 642
4efd681a 643 if (dest==st->secnet_address) {
4f5e39ec 644 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
4efd681a 645 BUF_FREE(buf);
2fe58dfd
SE
646 return;
647 }
4efd681a 648
f2b711bd 649 /* Packets from the host (sender==NULL) may always be routed. Packets
469fd1d9 650 from clients with the allow_route option will also be routed. */
f2b711bd 651 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
469fd1d9
SE
652 allow_route=True;
653
654 /* If !allow_route, we check the routing table anyway, and if
655 there's a suitable route with OPT_ALLOWROUTE set we use it. If
656 there's a suitable route, but none with OPT_ALLOWROUTE set then
657 we generate ICMP 'communication with destination network
658 administratively prohibited'. */
659
660 best_quality=0;
661 best_match=-1;
d3fe100d
SE
662 for (i=0; i<st->n_clients; i++) {
663 if (st->routes[i]->up &&
664 ipset_contains_addr(st->routes[i]->networks,dest)) {
469fd1d9
SE
665 /* It's an available route to the correct destination. But is
666 it better than the one we already have? */
667
668 /* If we have already found an allowed route then we don't
669 bother looking at routes we're not allowed to use. If
670 we don't yet have an allowed route we'll consider any. */
671 if (!allow_route && found_allowed) {
d3fe100d 672 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
70dc107b 673 }
469fd1d9 674
d3fe100d 675 if (st->routes[i]->link_quality>best_quality
469fd1d9 676 || best_quality==0) {
d3fe100d 677 best_quality=st->routes[i]->link_quality;
469fd1d9 678 best_match=i;
d3fe100d 679 if (st->routes[i]->options&OPT_ALLOWROUTE)
469fd1d9
SE
680 found_allowed=True;
681 /* If quality isn't perfect we may wish to
682 consider kicking the tunnel with a 0-length
683 packet to prompt it to perform a key setup.
684 Then it'll eventually decide it's up or
685 down. */
686 /* If quality is perfect and we're allowed to use the
687 route we don't need to search any more. */
688 if (best_quality>=MAXIMUM_LINK_QUALITY &&
689 (allow_route || found_allowed)) break;
4efd681a 690 }
70dc107b 691 }
469fd1d9
SE
692 }
693 if (best_match==-1) {
694 /* The packet's not going down a tunnel. It might (ought to)
695 be for the host. */
794f2398 696 if (ipset_contains_addr(st->networks,dest)) {
ab62c3ed 697 netlink_host_deliver(st,sender,source,dest,buf);
70dc107b
SE
698 BUF_ASSERT_FREE(buf);
699 } else {
469fd1d9
SE
700 string_t s,d;
701 s=ipaddr_to_string(source);
702 d=ipaddr_to_string(dest);
ff05a229 703 Message(M_DEBUG,"%s: don't know where to deliver packet "
469fd1d9
SE
704 "(s=%s, d=%s)\n", st->name, s, d);
705 free(s); free(d);
ab62c3ed 706 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 707 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
70dc107b 708 BUF_FREE(buf);
2fe58dfd 709 }
469fd1d9
SE
710 } else {
711 if (!allow_route &&
d3fe100d 712 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
469fd1d9
SE
713 string_t s,d;
714 s=ipaddr_to_string(source);
715 d=ipaddr_to_string(dest);
716 /* We have a usable route but aren't allowed to use it.
717 Generate ICMP destination unreachable: communication
718 with destination network administratively prohibited */
719 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
720 st->name,s,d);
721 free(s); free(d);
722
ab62c3ed 723 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 724 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
469fd1d9 725 BUF_FREE(buf);
469fd1d9 726 } else {
ea7ec970 727 if (best_quality>0) {
7b6abafa
IJ
728 netlink_client_deliver(st,st->routes[best_match],
729 source,dest,buf);
ea7ec970
SE
730 BUF_ASSERT_FREE(buf);
731 } else {
732 /* Generate ICMP destination unreachable */
ab62c3ed 733 netlink_icmp_simple(st,sender,buf,
cfd79482
IJ
734 ICMP_TYPE_UNREACHABLE,
735 ICMP_CODE_NET_UNREACHABLE,
736 icmp_noinfo);
ea7ec970
SE
737 BUF_FREE(buf);
738 }
469fd1d9 739 }
2fe58dfd 740 }
70dc107b 741 BUF_ASSERT_FREE(buf);
4efd681a
SE
742}
743
70dc107b 744static void netlink_packet_forward(struct netlink *st,
f2b711bd 745 struct netlink_client *sender,
70dc107b 746 struct buffer_if *buf)
4efd681a 747{
975820aa 748 if (buf->size < (int)sizeof(struct iphdr)) return;
4efd681a
SE
749 struct iphdr *iph=(struct iphdr *)buf->start;
750
751 BUF_ASSERT_USED(buf);
752
753 /* Packet has already been checked */
754 if (iph->ttl<=1) {
755 /* Generate ICMP time exceeded */
ab62c3ed 756 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
cfd79482 757 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
4efd681a
SE
758 BUF_FREE(buf);
759 return;
760 }
761 iph->ttl--;
762 iph->check=0;
763 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
764
f2b711bd 765 netlink_packet_deliver(st,sender,buf);
4efd681a
SE
766 BUF_ASSERT_FREE(buf);
767}
768
9d3a4132 769/* Deal with packets addressed explicitly to us */
70dc107b 770static void netlink_packet_local(struct netlink *st,
f2b711bd 771 struct netlink_client *sender,
70dc107b 772 struct buffer_if *buf)
4efd681a
SE
773{
774 struct icmphdr *h;
775
469fd1d9
SE
776 st->localcount++;
777
975820aa
IJ
778 if (buf->size < (int)sizeof(struct icmphdr)) {
779 Message(M_WARNING,"%s: short packet addressed to secnet; "
780 "ignoring it\n",st->name);
781 BUF_FREE(buf);
782 return;
783 }
4efd681a
SE
784 h=(struct icmphdr *)buf->start;
785
6e3fd952
IJ
786 unsigned fraginfo = ntohs(h->iph.frag);
787 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
788 if (!(fraginfo & IPHDR_FRAG_OFF))
789 /* report only for first fragment */
790 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
791 "ignoring it\n",st->name);
4efd681a
SE
792 BUF_FREE(buf);
793 return;
794 }
795
796 if (h->iph.protocol==1) {
797 /* It's ICMP */
ff05a229 798 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
4efd681a
SE
799 /* ICMP echo-request. Special case: we re-use the buffer
800 to construct the reply. */
ff05a229 801 h->type=ICMP_TYPE_ECHO_REPLY;
4efd681a
SE
802 h->iph.daddr=h->iph.saddr;
803 h->iph.saddr=htonl(st->secnet_address);
ff05a229 804 h->iph.ttl=255;
4efd681a
SE
805 h->iph.check=0;
806 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
807 netlink_icmp_csum(h);
70dc107b 808 netlink_packet_deliver(st,NULL,buf);
4efd681a
SE
809 return;
810 }
811 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
812 } else {
813 /* Send ICMP protocol unreachable */
ab62c3ed 814 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 815 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
4efd681a
SE
816 BUF_FREE(buf);
817 return;
818 }
819
820 BUF_FREE(buf);
821}
822
9d3a4132
SE
823/* If cid==NULL packet is from host, otherwise cid specifies which tunnel
824 it came from. */
f2b711bd 825static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
469fd1d9 826 struct buffer_if *buf)
4efd681a 827{
4efd681a
SE
828 uint32_t source,dest;
829 struct iphdr *iph;
d714da29 830 char errmsgbuf[50];
f2b711bd 831 const char *sourcedesc=sender?sender->name:"host";
4efd681a
SE
832
833 BUF_ASSERT_USED(buf);
a28d65a5 834
d714da29
IJ
835 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
836 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
a28d65a5 837 st->name,sourcedesc,
d714da29 838 errmsgbuf);
4efd681a
SE
839 BUF_FREE(buf);
840 return;
841 }
e8b1adac 842 assert(buf->size >= (int)sizeof(struct iphdr));
4efd681a
SE
843 iph=(struct iphdr *)buf->start;
844
845 source=ntohl(iph->saddr);
846 dest=ntohl(iph->daddr);
847
d3fe100d
SE
848 /* Check source. If we don't like the source, there's no point
849 generating ICMP because we won't know how to get it to the
850 source of the packet. */
f2b711bd 851 if (sender) {
c6f79b17
SE
852 /* Check that the packet source is appropriate for the tunnel
853 it came down */
f2b711bd 854 if (!ipset_contains_addr(sender->networks,source)) {
9d3a4132
SE
855 string_t s,d;
856 s=ipaddr_to_string(source);
857 d=ipaddr_to_string(dest);
858 Message(M_WARNING,"%s: packet from tunnel %s with bad "
f2b711bd 859 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
9d3a4132
SE
860 free(s); free(d);
861 BUF_FREE(buf);
862 return;
863 }
864 } else {
c6f79b17
SE
865 /* Check that the packet originates in our configured local
866 network, and hasn't been forwarded from elsewhere or
867 generated with the wrong source address */
794f2398 868 if (!ipset_contains_addr(st->networks,source)) {
9d3a4132
SE
869 string_t s,d;
870 s=ipaddr_to_string(source);
871 d=ipaddr_to_string(dest);
872 Message(M_WARNING,"%s: outgoing packet with bad source address "
873 "(s=%s,d=%s)\n",st->name,s,d);
874 free(s); free(d);
875 BUF_FREE(buf);
876 return;
877 }
4efd681a 878 }
c6f79b17 879
794f2398
SE
880 /* If this is a point-to-point device we don't examine the
881 destination address at all; we blindly send it down our
882 one-and-only registered tunnel, or to the host, depending on
d3fe100d
SE
883 where it came from. It's up to external software to check
884 address validity and generate ICMP, etc. */
c6f79b17 885 if (st->ptp) {
f2b711bd 886 if (sender) {
ab62c3ed 887 netlink_host_deliver(st,sender,source,dest,buf);
c6f79b17 888 } else {
7b6abafa 889 netlink_client_deliver(st,st->clients,source,dest,buf);
c6f79b17
SE
890 }
891 BUF_ASSERT_FREE(buf);
892 return;
893 }
894
d3fe100d
SE
895 /* st->secnet_address needs checking before matching destination
896 addresses */
2fe58dfd 897 if (dest==st->secnet_address) {
f2b711bd 898 netlink_packet_local(st,sender,buf);
4efd681a 899 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
900 return;
901 }
f2b711bd 902 netlink_packet_forward(st,sender,buf);
4efd681a
SE
903 BUF_ASSERT_FREE(buf);
904}
905
469fd1d9
SE
906static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
907{
908 struct netlink_client *c=sst;
909 struct netlink *st=c->nst;
910
911 netlink_incoming(st,c,buf);
912}
913
914static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
915{
916 struct netlink *st=sst;
917
918 netlink_incoming(st,NULL,buf);
919}
920
d3fe100d 921static void netlink_set_quality(void *sst, uint32_t quality)
4efd681a 922{
d3fe100d
SE
923 struct netlink_client *c=sst;
924 struct netlink *st=c->nst;
4efd681a 925
d3fe100d
SE
926 c->link_quality=quality;
927 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
928 if (c->options&OPT_SOFTROUTE) {
929 st->set_routes(st->dst,c);
4efd681a 930 }
4efd681a
SE
931}
932
d3fe100d
SE
933static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
934 struct subnet_list *snets)
4efd681a 935{
1caa23ff 936 int32_t i;
d3fe100d 937 string_t net;
4efd681a 938
d3fe100d
SE
939 for (i=0; i<snets->entries; i++) {
940 net=subnet_to_string(snets->list[i]);
941 Message(loglevel,"%s ",net);
942 free(net);
9d3a4132 943 }
4efd681a
SE
944}
945
042a8da9 946static void netlink_dump_routes(struct netlink *st, bool_t requested)
9d3a4132
SE
947{
948 int i;
949 string_t net;
042a8da9 950 uint32_t c=M_INFO;
9d3a4132 951
042a8da9 952 if (requested) c=M_WARNING;
469fd1d9
SE
953 if (st->ptp) {
954 net=ipaddr_to_string(st->secnet_address);
34d3bf4c 955 Message(c,"%s: point-to-point (remote end is %s); routes: ",
469fd1d9 956 st->name, net);
9d3a4132 957 free(net);
d3fe100d 958 netlink_output_subnets(st,c,st->clients->subnets);
469fd1d9
SE
959 Message(c,"\n");
960 } else {
961 Message(c,"%s: routing table:\n",st->name);
d3fe100d
SE
962 for (i=0; i<st->n_clients; i++) {
963 netlink_output_subnets(st,c,st->routes[i]->subnets);
ff05a229 964 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
ea7ec970 965 "quality %d,use %d,pri %lu)\n",
d3fe100d 966 st->routes[i]->name,
ff05a229
SE
967 st->routes[i]->up?"up":"down",
968 st->routes[i]->mtu,
d3fe100d
SE
969 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
970 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
d3fe100d 971 st->routes[i]->link_quality,
ea7ec970
SE
972 st->routes[i]->outcount,
973 (unsigned long)st->routes[i]->priority);
469fd1d9
SE
974 }
975 net=ipaddr_to_string(st->secnet_address);
976 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
977 net,st->name,st->localcount);
9d3a4132 978 free(net);
794f2398
SE
979 for (i=0; i<st->subnets->entries; i++) {
980 net=subnet_to_string(st->subnets->list[i]);
981 Message(c,"%s ",net);
469fd1d9
SE
982 free(net);
983 }
794f2398
SE
984 if (i>0)
985 Message(c,"-> host (use %d)\n",st->outcount);
9d3a4132
SE
986 }
987}
988
d3fe100d
SE
989/* ap is a pointer to a member of the routes array */
990static int netlink_compare_client_priority(const void *ap, const void *bp)
70dc107b 991{
d3fe100d
SE
992 const struct netlink_client *const*a=ap;
993 const struct netlink_client *const*b=bp;
70dc107b 994
d3fe100d
SE
995 if ((*a)->priority==(*b)->priority) return 0;
996 if ((*a)->priority<(*b)->priority) return 1;
70dc107b
SE
997 return -1;
998}
999
1000static void netlink_phase_hook(void *sst, uint32_t new_phase)
1001{
1002 struct netlink *st=sst;
1003 struct netlink_client *c;
1caa23ff 1004 int32_t i;
70dc107b
SE
1005
1006 /* All the networks serviced by the various tunnels should now
1007 * have been registered. We build a routing table by sorting the
d3fe100d 1008 * clients by priority. */
bb9d0561
IJ
1009 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1010 "netlink_phase_hook");
70dc107b
SE
1011 /* Fill the table */
1012 i=0;
59230b9b
IJ
1013 for (c=st->clients; c; c=c->next) {
1014 assert(i<INT_MAX);
d3fe100d 1015 st->routes[i++]=c;
59230b9b 1016 }
d3fe100d
SE
1017 /* Sort the table in descending order of priority */
1018 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1019 netlink_compare_client_priority);
9d3a4132 1020
042a8da9
SE
1021 netlink_dump_routes(st,False);
1022}
1023
1024static void netlink_signal_handler(void *sst, int signum)
1025{
1026 struct netlink *st=sst;
1027 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1028 netlink_dump_routes(st,True);
70dc107b
SE
1029}
1030
1caa23ff 1031static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
d3fe100d
SE
1032{
1033 struct netlink_client *c=sst;
1034
1035 c->mtu=new_mtu;
1036}
1037
469fd1d9 1038static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1c085348 1039 void *dst, uint32_t *localmtu_r)
469fd1d9
SE
1040{
1041 struct netlink_client *c=sst;
1c085348 1042 struct netlink *st=c->nst;
469fd1d9 1043
469fd1d9
SE
1044 c->deliver=deliver;
1045 c->dst=dst;
1c085348
IJ
1046
1047 if (localmtu_r)
1048 *localmtu_r=st->mtu;
469fd1d9
SE
1049}
1050
1051static struct flagstr netlink_option_table[]={
1052 { "soft", OPT_SOFTROUTE },
1053 { "allow-route", OPT_ALLOWROUTE },
1054 { NULL, 0}
1055};
1056/* This is the routine that gets called when the closure that's
1057 returned by an invocation of a netlink device closure (eg. tun,
1058 userv-ipif) is invoked. It's used to create routes and pass in
1059 information about them; the closure it returns is used by site
1060 code. */
1061static closure_t *netlink_inst_create(struct netlink *st,
1062 struct cloc loc, dict_t *dict)
1063{
1064 struct netlink_client *c;
1065 string_t name;
794f2398 1066 struct ipset *networks;
1caa23ff
IJ
1067 uint32_t options,priority;
1068 int32_t mtu;
794f2398 1069 list_t *l;
469fd1d9
SE
1070
1071 name=dict_read_string(dict, "name", True, st->name, loc);
1072
794f2398
SE
1073 l=dict_lookup(dict,"routes");
1074 if (!l)
1075 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1076 networks=string_list_to_ipset(l,loc,st->name,"routes");
469fd1d9
SE
1077 options=string_list_to_word(dict_lookup(dict,"options"),
1078 netlink_option_table,st->name);
1079
d3fe100d
SE
1080 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1081 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1082
1083 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
469fd1d9
SE
1084 cfgfatal(loc,st->name,"this netlink device does not support "
1085 "soft routes.\n");
1086 return NULL;
1087 }
1088
1089 if (options&OPT_SOFTROUTE) {
1090 /* XXX for now we assume that soft routes require root privilege;
1091 this may not always be true. The device driver can tell us. */
1092 require_root_privileges=True;
1093 require_root_privileges_explanation="netlink: soft routes";
1094 if (st->ptp) {
1095 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1096 "soft routes.\n");
1097 return NULL;
1098 }
1099 }
1100
794f2398
SE
1101 /* Check that nets are a subset of st->remote_networks;
1102 refuse to register if they are not. */
1103 if (!ipset_is_subset(st->remote_networks,networks)) {
1104 cfgfatal(loc,st->name,"routes are not allowed\n");
469fd1d9
SE
1105 return NULL;
1106 }
1107
1108 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1109 c->cl.description=name;
1110 c->cl.type=CL_NETLINK;
1111 c->cl.apply=NULL;
1112 c->cl.interface=&c->ops;
1113 c->ops.st=c;
1114 c->ops.reg=netlink_inst_reg;
1115 c->ops.deliver=netlink_inst_incoming;
1116 c->ops.set_quality=netlink_set_quality;
d3fe100d 1117 c->ops.set_mtu=netlink_inst_set_mtu;
469fd1d9
SE
1118 c->nst=st;
1119
1120 c->networks=networks;
794f2398 1121 c->subnets=ipset_to_subnet_list(networks);
d3fe100d 1122 c->priority=priority;
469fd1d9
SE
1123 c->deliver=NULL;
1124 c->dst=NULL;
1125 c->name=name;
f208b9a9 1126 c->link_quality=LINK_QUALITY_UNUSED;
d3fe100d
SE
1127 c->mtu=mtu?mtu:st->mtu;
1128 c->options=options;
1129 c->outcount=0;
1130 c->up=False;
1131 c->kup=False;
469fd1d9
SE
1132 c->next=st->clients;
1133 st->clients=c;
59230b9b 1134 assert(st->n_clients < INT_MAX);
d3fe100d 1135 st->n_clients++;
469fd1d9
SE
1136
1137 return &c->cl;
1138}
1139
1140static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1141 dict_t *context, list_t *args)
1142{
1143 struct netlink *st=self->interface;
1144
1145 dict_t *dict;
1146 item_t *item;
1147 closure_t *cl;
1148
469fd1d9
SE
1149 item=list_elem(args,0);
1150 if (!item || item->type!=t_dict) {
1151 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1152 }
1153 dict=item->data.dict;
1154
1155 cl=netlink_inst_create(st,loc,dict);
1156
1157 return new_closure(cl);
1158}
1159
9d3a4132
SE
1160netlink_deliver_fn *netlink_init(struct netlink *st,
1161 void *dst, struct cloc loc,
fe5e9cc4 1162 dict_t *dict, cstring_t description,
d3fe100d 1163 netlink_route_fn *set_routes,
9d3a4132 1164 netlink_deliver_fn *to_host)
4efd681a 1165{
c6f79b17 1166 item_t *sa, *ptpa;
794f2398 1167 list_t *l;
c6f79b17 1168
4efd681a
SE
1169 st->dst=dst;
1170 st->cl.description=description;
469fd1d9
SE
1171 st->cl.type=CL_PURE;
1172 st->cl.apply=netlink_inst_apply;
1173 st->cl.interface=st;
4efd681a 1174 st->clients=NULL;
d3fe100d
SE
1175 st->routes=NULL;
1176 st->n_clients=0;
1177 st->set_routes=set_routes;
4efd681a
SE
1178 st->deliver_to_host=to_host;
1179
794f2398 1180 st->name=dict_read_string(dict,"name",False,description,loc);
4efd681a 1181 if (!st->name) st->name=description;
794f2398
SE
1182 l=dict_lookup(dict,"networks");
1183 if (l)
1184 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1185 else {
4f5e39ec
SE
1186 struct ipset *empty;
1187 empty=ipset_new();
1188 st->networks=ipset_complement(empty);
1189 ipset_free(empty);
794f2398
SE
1190 }
1191 l=dict_lookup(dict,"remote-networks");
1192 if (l) {
1193 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1194 "remote-networks");
1195 } else {
1196 struct ipset *empty;
1197 empty=ipset_new();
1198 st->remote_networks=ipset_complement(empty);
1199 ipset_free(empty);
1200 }
1201
c6f79b17 1202 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
469fd1d9 1203 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
c6f79b17
SE
1204 if (sa && ptpa) {
1205 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1206 "ptp-address in the same netlink device\n");
1207 }
1208 if (!(sa || ptpa)) {
1209 cfgfatal(loc,st->name,"you must specify secnet-address or "
1210 "ptp-address for this netlink device\n");
1211 }
1212 if (sa) {
794f2398 1213 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
c6f79b17
SE
1214 st->ptp=False;
1215 } else {
794f2398 1216 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
c6f79b17
SE
1217 st->ptp=True;
1218 }
d3fe100d
SE
1219 /* To be strictly correct we could subtract secnet_address from
1220 networks here. It shouldn't make any practical difference,
794f2398
SE
1221 though, and will make the route dump look complicated... */
1222 st->subnets=ipset_to_subnet_list(st->networks);
4efd681a 1223 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
f3d69e41 1224 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
469fd1d9
SE
1225 st->outcount=0;
1226 st->localcount=0;
70dc107b
SE
1227
1228 add_hook(PHASE_SETUP,netlink_phase_hook,st);
042a8da9 1229 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
4efd681a 1230
469fd1d9
SE
1231 /* If we're point-to-point then we return a CL_NETLINK directly,
1232 rather than a CL_NETLINK_OLD or pure closure (depending on
1233 compatibility). This CL_NETLINK is for our one and only
1234 client. Our cl.apply function is NULL. */
1235 if (st->ptp) {
1236 closure_t *cl;
1237 cl=netlink_inst_create(st,loc,dict);
1238 st->cl=*cl;
1239 }
1240 return netlink_dev_incoming;
2fe58dfd
SE
1241}
1242
9d3a4132 1243/* No connection to the kernel at all... */
2fe58dfd 1244
9d3a4132 1245struct null {
4efd681a 1246 struct netlink nl;
4efd681a 1247};
2fe58dfd 1248
d3fe100d 1249static bool_t null_set_route(void *sst, struct netlink_client *routes)
4efd681a 1250{
9d3a4132 1251 struct null *st=sst;
d3fe100d
SE
1252
1253 if (routes->up!=routes->kup) {
1254 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1255 st->nl.name,routes->name,
1256 routes->up?"up":"down");
1257 routes->kup=routes->up;
9d3a4132 1258 return True;
2fe58dfd 1259 }
9d3a4132 1260 return False;
2fe58dfd 1261}
9d3a4132 1262
469fd1d9 1263static void null_deliver(void *sst, struct buffer_if *buf)
2fe58dfd
SE
1264{
1265 return;
1266}
1267
1268static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1269 list_t *args)
1270{
1271 struct null *st;
4efd681a
SE
1272 item_t *item;
1273 dict_t *dict;
2fe58dfd 1274
4efd681a 1275 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1276
4efd681a
SE
1277 item=list_elem(args,0);
1278 if (!item || item->type!=t_dict)
1279 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1280
1281 dict=item->data.dict;
1282
9d3a4132
SE
1283 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1284 null_deliver);
4efd681a
SE
1285
1286 return new_closure(&st->nl.cl);
2fe58dfd
SE
1287}
1288
2fe58dfd
SE
1289void netlink_module(dict_t *dict)
1290{
4efd681a 1291 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1292}