util.h: Provide MIN and MAX macros
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
ff05a229 3/* See RFCs 791, 792, 1123 and 1812 */
2fe58dfd 4
ff05a229
SE
5/* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
8
9/* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
12
13
14/* Points to note from RFC1812 (which may require changes in this
15 file):
16
173.3.4 Maximum Transmission Unit - MTU
18
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
21
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
27 the MTU.
28
294.2.1 A router SHOULD count datagrams discarded.
30
314.2.2.1 Source route options - we probably should implement processing
32of source routes, even though mostly the security policy will prevent
33their use.
34
355.3.13.4 Source Route Options
36
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
41
425.3.13.5 Record Route Option
43
44 Routers MUST support the Record Route option in forwarded packets.
45
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
53
545.3.13.6 Timestamp Option
55
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
58
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
64 it will be sent.
65
66
674.2.2.7 Fragmentation: RFC 791 Section 3.2
68
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
70 router.
71
724.2.2.8 Reassembly: RFC 791 Section 3.2
73
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
76
774.2.2.9 Time to Live: RFC 791 Section 3.2
78
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
81
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
85
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
89
90
918.1 The Simple Network Management Protocol - SNMP
928.1.1 SNMP Protocol Elements
93
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
96
97
98*/
2fe58dfd 99
3b83c932 100#include <string.h>
59230b9b
IJ
101#include <assert.h>
102#include <limits.h>
8689b3a9 103#include "secnet.h"
2fe58dfd 104#include "util.h"
7138d0c5 105#include "ipaddr.h"
9d3a4132 106#include "netlink.h"
042a8da9 107#include "process.h"
2fe58dfd 108
a0b107b8
IJ
109#ifdef NETLINK_DEBUG
110#define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111#else /* !NETLINK_DEBUG */
112#define MDEBUG(...) ((void)0)
113#endif /* !NETLINK_DEBUG */
114
ff05a229
SE
115#define ICMP_TYPE_ECHO_REPLY 0
116
117#define ICMP_TYPE_UNREACHABLE 3
118#define ICMP_CODE_NET_UNREACHABLE 0
119#define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121#define ICMP_CODE_NET_PROHIBITED 13
122
123#define ICMP_TYPE_ECHO_REQUEST 8
124
125#define ICMP_TYPE_TIME_EXCEEDED 11
126#define ICMP_CODE_TTL_EXCEEDED 0
127
4efd681a 128/* Generic IP checksum routine */
211cd627 129static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
2fe58dfd 130{
4efd681a
SE
131 register uint32_t sum=0;
132
133 while (count>1) {
134 sum+=ntohs(*(uint16_t *)iph);
135 iph+=2;
136 count-=2;
137 }
138 if(count>0)
139 sum+=*(uint8_t *)iph;
140 while (sum>>16)
141 sum=(sum&0xffff)+(sum>>16);
142 return htons(~sum);
2fe58dfd
SE
143}
144
4efd681a
SE
145#ifdef i386
146/*
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
149 *
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
151 * Arnt Gulbrandsen.
152 */
211cd627 153static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
4efd681a
SE
154 uint32_t sum;
155
20d324b6
SE
156 __asm__ __volatile__(
157 "movl (%1), %0 ;\n"
158 "subl $4, %2 ;\n"
159 "jbe 2f ;\n"
160 "addl 4(%1), %0 ;\n"
161 "adcl 8(%1), %0 ;\n"
162 "adcl 12(%1), %0 ;\n"
163"1: adcl 16(%1), %0 ;\n"
164 "lea 4(%1), %1 ;\n"
165 "decl %2 ;\n"
166 "jne 1b ;\n"
167 "adcl $0, %0 ;\n"
168 "movl %0, %2 ;\n"
169 "shrl $16, %0 ;\n"
170 "addw %w2, %w0 ;\n"
171 "adcl $0, %0 ;\n"
172 "notl %0 ;\n"
173"2: ;\n"
4efd681a
SE
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
20d324b6
SE
178 : "1" (iph), "2" (ihl)
179 : "memory");
4efd681a
SE
180 return sum;
181}
182#else
1caa23ff 183static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
2fe58dfd 184{
1caa23ff 185 assert(ihl < INT_MAX/4);
4efd681a
SE
186 return ip_csum(iph,ihl*4);
187}
188#endif
189
190struct iphdr {
191#if defined (WORDS_BIGENDIAN)
192 uint8_t version:4,
193 ihl:4;
194#else
195 uint8_t ihl:4,
196 version:4;
197#endif
198 uint8_t tos;
199 uint16_t tot_len;
200 uint16_t id;
a6768d7c 201 uint16_t frag;
eff13010
IJ
202#define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203#define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204#define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205/* reserved 0x8000 */
4efd681a
SE
206 uint8_t ttl;
207 uint8_t protocol;
208 uint16_t check;
209 uint32_t saddr;
210 uint32_t daddr;
211 /* The options start here. */
212};
213
214struct icmphdr {
215 struct iphdr iph;
216 uint8_t type;
217 uint8_t code;
218 uint16_t check;
cfd79482 219 union icmpinfofield {
4efd681a
SE
220 uint32_t unused;
221 struct {
222 uint8_t pointer;
223 uint8_t unused1;
224 uint16_t unused2;
225 } pprob;
226 uint32_t gwaddr;
227 struct {
228 uint16_t id;
229 uint16_t seq;
230 } echo;
231 } d;
232};
cfd79482
IJ
233
234static const union icmpinfofield icmp_noinfo;
4efd681a 235
70dc107b
SE
236static void netlink_packet_deliver(struct netlink *st,
237 struct netlink_client *client,
238 struct buffer_if *buf);
4efd681a 239
ff05a229
SE
240/* XXX RFC1812 4.3.2.5:
241 All other ICMP error messages (Destination Unreachable,
242 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
243 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
244 CONTROL). The IP Precedence value for these error messages MAY be
245 settable.
246 */
4efd681a
SE
247static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
248 uint32_t dest,uint16_t len)
249{
250 struct icmphdr *h;
251
252 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
3abd18e8 253 buffer_init(&st->icmp,calculate_max_start_pad());
4efd681a
SE
254 h=buf_append(&st->icmp,sizeof(*h));
255
256 h->iph.version=4;
257 h->iph.ihl=5;
258 h->iph.tos=0;
259 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
260 h->iph.id=0;
a6768d7c 261 h->iph.frag=0;
ff05a229 262 h->iph.ttl=255; /* XXX should be configurable */
4efd681a
SE
263 h->iph.protocol=1;
264 h->iph.saddr=htonl(st->secnet_address);
265 h->iph.daddr=htonl(dest);
266 h->iph.check=0;
267 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
268 h->check=0;
269 h->d.unused=0;
270
271 return h;
272}
273
274/* Fill in the ICMP checksum field correctly */
275static void netlink_icmp_csum(struct icmphdr *h)
276{
1caa23ff 277 int32_t len;
4efd681a
SE
278
279 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
280 h->check=0;
281 h->check=ip_csum(&h->type,len);
282}
283
284/* RFC1122:
285 * An ICMP error message MUST NOT be sent as the result of
286 * receiving:
287 *
288 * * an ICMP error message, or
289 *
290 * * a datagram destined to an IP broadcast or IP multicast
291 * address, or
292 *
293 * * a datagram sent as a link-layer broadcast, or
294 *
295 * * a non-initial fragment, or
296 *
297 * * a datagram whose source address does not define a single
298 * host -- e.g., a zero address, a loopback address, a
299 * broadcast address, a multicast address, or a Class E
300 * address.
301 */
302static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
303{
304 struct iphdr *iph;
8dea8d37 305 struct icmphdr *icmph;
4efd681a
SE
306 uint32_t source;
307
975820aa 308 if (buf->size < (int)sizeof(struct icmphdr)) return False;
4efd681a 309 iph=(struct iphdr *)buf->start;
8dea8d37
SE
310 icmph=(struct icmphdr *)buf->start;
311 if (iph->protocol==1) {
312 switch(icmph->type) {
686b7f1d
IJ
313 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
314 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
315 * Deprecated, reserved, unassigned and experimental
316 * options are treated as not safe to reply to.
317 */
318 case 0: /* Echo Reply */
319 case 8: /* Echo */
320 case 13: /* Timestamp */
321 case 14: /* Timestamp Reply */
322 return True;
323 default:
8dea8d37
SE
324 return False;
325 }
326 }
4efd681a 327 /* How do we spot broadcast destination addresses? */
a6768d7c 328 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
4efd681a
SE
329 source=ntohl(iph->saddr);
330 if (source==0) return False;
331 if ((source&0xff000000)==0x7f000000) return False;
332 /* How do we spot broadcast source addresses? */
333 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
334 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
335 return True;
336}
337
338/* How much of the original IP packet do we include in its ICMP
339 response? The header plus up to 64 bits. */
ff05a229
SE
340
341/* XXX TODO RFC1812:
3424.3.2.3 Original Message Header
343
344 Historically, every ICMP error message has included the Internet
345 header and at least the first 8 data bytes of the datagram that
346 triggered the error. This is no longer adequate, due to the use of
347 IP-in-IP tunneling and other technologies. Therefore, the ICMP
348 datagram SHOULD contain as much of the original datagram as possible
349 without the length of the ICMP datagram exceeding 576 bytes. The
350 returned IP header (and user data) MUST be identical to that which
351 was received, except that the router is not required to undo any
352 modifications to the IP header that are normally performed in
353 forwarding that were performed before the error was detected (e.g.,
354 decrementing the TTL, or updating options). Note that the
355 requirements of Section [4.3.3.5] supersede this requirement in some
356 cases (i.e., for a Parameter Problem message, if the problem is in a
357 modified field, the router must undo the modification). See Section
358 [4.3.3.5]).
359 */
4efd681a
SE
360static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
361{
975820aa 362 if (buf->size < (int)sizeof(struct iphdr)) return 0;
4efd681a
SE
363 struct iphdr *iph=(struct iphdr *)buf->start;
364 uint16_t hlen,plen;
365
366 hlen=iph->ihl*4;
367 /* We include the first 8 bytes of the packet data, provided they exist */
368 hlen+=8;
369 plen=ntohs(iph->tot_len);
370 return (hlen>plen?plen:hlen);
371}
372
70dc107b
SE
373/* client indicates where the packet we're constructing a response to
374 comes from. NULL indicates the host. */
4efd681a 375static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
cfd79482
IJ
376 uint8_t type, uint8_t code,
377 union icmpinfofield info)
4efd681a 378{
4efd681a
SE
379 struct icmphdr *h;
380 uint16_t len;
381
382 if (netlink_icmp_may_reply(buf)) {
975820aa 383 struct iphdr *iph=(struct iphdr *)buf->start;
4efd681a
SE
384 len=netlink_icmp_reply_len(buf);
385 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
cfd79482 386 h->type=type; h->code=code; h->d=info;
4efd681a
SE
387 memcpy(buf_append(&st->icmp,len),buf->start,len);
388 netlink_icmp_csum(h);
70dc107b 389 netlink_packet_deliver(st,NULL,&st->icmp);
4efd681a
SE
390 BUF_ASSERT_FREE(&st->icmp);
391 }
392}
393
394/*
395 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
396 * checksum.
ff05a229 397 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
4efd681a
SE
398 *
399 * Is the datagram acceptable?
400 *
401 * 1. Length at least the size of an ip header
402 * 2. Version of 4
403 * 3. Checksums correctly.
404 * 4. Doesn't have a bogus length
405 */
d714da29
IJ
406static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
407 char *errmsgbuf, int errmsgbuflen)
4efd681a 408{
d714da29
IJ
409#define BAD(...) do{ \
410 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
411 return False; \
412 }while(0)
413
975820aa 414 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
4efd681a 415 struct iphdr *iph=(struct iphdr *)buf->start;
1caa23ff 416 int32_t len;
4efd681a 417
d714da29
IJ
418 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
419 if (iph->version != 4) BAD("version %u",iph->version);
420 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
421 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
4efd681a
SE
422 len=ntohs(iph->tot_len);
423 /* There should be no padding */
d714da29
IJ
424 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
425 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
4efd681a
SE
426 /* XXX check that there's no source route specified */
427 return True;
d714da29
IJ
428
429#undef BAD
4efd681a
SE
430}
431
7b6abafa 432/* Deliver a packet _to_ client; used after we have decided
55bc97e6
IJ
433 * what to do with it (and just to check that the client has
434 * actually registered a delivery function with us). */
7b6abafa
IJ
435static void netlink_client_deliver(struct netlink *st,
436 struct netlink_client *client,
437 uint32_t source, uint32_t dest,
438 struct buffer_if *buf)
439{
55bc97e6
IJ
440 if (!client->deliver) {
441 string_t s,d;
442 s=ipaddr_to_string(source);
443 d=ipaddr_to_string(dest);
444 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
445 st->name,s,d);
446 free(s); free(d);
447 BUF_FREE(buf);
448 return;
449 }
7b6abafa
IJ
450 client->deliver(client->dst, buf);
451 client->outcount++;
452}
453
f928f069
IJ
454/* Deliver a packet to the host; used after we have decided that that
455 * is what to do with it. */
456static void netlink_host_deliver(struct netlink *st,
457 uint32_t source, uint32_t dest,
458 struct buffer_if *buf)
459{
460 st->deliver_to_host(st->dst,buf);
461 st->outcount++;
462}
463
469fd1d9 464/* Deliver a packet. "client" is the _origin_ of the packet, not its
d3fe100d
SE
465 destination, and is NULL for packets from the host and packets
466 generated internally in secnet. */
70dc107b
SE
467static void netlink_packet_deliver(struct netlink *st,
468 struct netlink_client *client,
469 struct buffer_if *buf)
4efd681a 470{
975820aa
IJ
471 if (buf->size < (int)sizeof(struct iphdr)) {
472 Message(M_ERR,"%s: trying to deliver a too-short packet"
473 " from %s!\n",st->name, client?client->name:"(local)");
474 BUF_FREE(buf);
475 return;
476 }
477
4efd681a
SE
478 struct iphdr *iph=(struct iphdr *)buf->start;
479 uint32_t dest=ntohl(iph->daddr);
70dc107b
SE
480 uint32_t source=ntohl(iph->saddr);
481 uint32_t best_quality;
469fd1d9
SE
482 bool_t allow_route=False;
483 bool_t found_allowed=False;
70dc107b
SE
484 int best_match;
485 int i;
2fe58dfd 486
4efd681a 487 BUF_ASSERT_USED(buf);
2fe58dfd 488
4efd681a 489 if (dest==st->secnet_address) {
4f5e39ec 490 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
4efd681a 491 BUF_FREE(buf);
2fe58dfd
SE
492 return;
493 }
4efd681a 494
d3fe100d 495 /* Packets from the host (client==NULL) may always be routed. Packets
469fd1d9
SE
496 from clients with the allow_route option will also be routed. */
497 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
498 allow_route=True;
499
500 /* If !allow_route, we check the routing table anyway, and if
501 there's a suitable route with OPT_ALLOWROUTE set we use it. If
502 there's a suitable route, but none with OPT_ALLOWROUTE set then
503 we generate ICMP 'communication with destination network
504 administratively prohibited'. */
505
506 best_quality=0;
507 best_match=-1;
d3fe100d
SE
508 for (i=0; i<st->n_clients; i++) {
509 if (st->routes[i]->up &&
510 ipset_contains_addr(st->routes[i]->networks,dest)) {
469fd1d9
SE
511 /* It's an available route to the correct destination. But is
512 it better than the one we already have? */
513
514 /* If we have already found an allowed route then we don't
515 bother looking at routes we're not allowed to use. If
516 we don't yet have an allowed route we'll consider any. */
517 if (!allow_route && found_allowed) {
d3fe100d 518 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
70dc107b 519 }
469fd1d9 520
d3fe100d 521 if (st->routes[i]->link_quality>best_quality
469fd1d9 522 || best_quality==0) {
d3fe100d 523 best_quality=st->routes[i]->link_quality;
469fd1d9 524 best_match=i;
d3fe100d 525 if (st->routes[i]->options&OPT_ALLOWROUTE)
469fd1d9
SE
526 found_allowed=True;
527 /* If quality isn't perfect we may wish to
528 consider kicking the tunnel with a 0-length
529 packet to prompt it to perform a key setup.
530 Then it'll eventually decide it's up or
531 down. */
532 /* If quality is perfect and we're allowed to use the
533 route we don't need to search any more. */
534 if (best_quality>=MAXIMUM_LINK_QUALITY &&
535 (allow_route || found_allowed)) break;
4efd681a 536 }
70dc107b 537 }
469fd1d9
SE
538 }
539 if (best_match==-1) {
540 /* The packet's not going down a tunnel. It might (ought to)
541 be for the host. */
794f2398 542 if (ipset_contains_addr(st->networks,dest)) {
f928f069 543 netlink_host_deliver(st,source,dest,buf);
70dc107b
SE
544 BUF_ASSERT_FREE(buf);
545 } else {
469fd1d9
SE
546 string_t s,d;
547 s=ipaddr_to_string(source);
548 d=ipaddr_to_string(dest);
ff05a229 549 Message(M_DEBUG,"%s: don't know where to deliver packet "
469fd1d9
SE
550 "(s=%s, d=%s)\n", st->name, s, d);
551 free(s); free(d);
311e11e4 552 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 553 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
70dc107b 554 BUF_FREE(buf);
2fe58dfd 555 }
469fd1d9
SE
556 } else {
557 if (!allow_route &&
d3fe100d 558 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
469fd1d9
SE
559 string_t s,d;
560 s=ipaddr_to_string(source);
561 d=ipaddr_to_string(dest);
562 /* We have a usable route but aren't allowed to use it.
563 Generate ICMP destination unreachable: communication
564 with destination network administratively prohibited */
565 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
566 st->name,s,d);
567 free(s); free(d);
568
311e11e4 569 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 570 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
469fd1d9 571 BUF_FREE(buf);
469fd1d9 572 } else {
ea7ec970
SE
573 if (best_quality>0) {
574 /* XXX Fragment if required */
7b6abafa
IJ
575 netlink_client_deliver(st,st->routes[best_match],
576 source,dest,buf);
ea7ec970
SE
577 BUF_ASSERT_FREE(buf);
578 } else {
579 /* Generate ICMP destination unreachable */
311e11e4 580 netlink_icmp_simple(st,buf,
cfd79482
IJ
581 ICMP_TYPE_UNREACHABLE,
582 ICMP_CODE_NET_UNREACHABLE,
583 icmp_noinfo);
ea7ec970
SE
584 BUF_FREE(buf);
585 }
469fd1d9 586 }
2fe58dfd 587 }
70dc107b 588 BUF_ASSERT_FREE(buf);
4efd681a
SE
589}
590
70dc107b
SE
591static void netlink_packet_forward(struct netlink *st,
592 struct netlink_client *client,
593 struct buffer_if *buf)
4efd681a 594{
975820aa 595 if (buf->size < (int)sizeof(struct iphdr)) return;
4efd681a
SE
596 struct iphdr *iph=(struct iphdr *)buf->start;
597
598 BUF_ASSERT_USED(buf);
599
600 /* Packet has already been checked */
601 if (iph->ttl<=1) {
602 /* Generate ICMP time exceeded */
311e11e4 603 netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
cfd79482 604 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
4efd681a
SE
605 BUF_FREE(buf);
606 return;
607 }
608 iph->ttl--;
609 iph->check=0;
610 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
611
70dc107b 612 netlink_packet_deliver(st,client,buf);
4efd681a
SE
613 BUF_ASSERT_FREE(buf);
614}
615
9d3a4132 616/* Deal with packets addressed explicitly to us */
70dc107b
SE
617static void netlink_packet_local(struct netlink *st,
618 struct netlink_client *client,
619 struct buffer_if *buf)
4efd681a
SE
620{
621 struct icmphdr *h;
622
469fd1d9
SE
623 st->localcount++;
624
975820aa
IJ
625 if (buf->size < (int)sizeof(struct icmphdr)) {
626 Message(M_WARNING,"%s: short packet addressed to secnet; "
627 "ignoring it\n",st->name);
628 BUF_FREE(buf);
629 return;
630 }
4efd681a
SE
631 h=(struct icmphdr *)buf->start;
632
a6768d7c 633 if ((ntohs(h->iph.frag)&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
9d3a4132
SE
634 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
635 "ignoring it\n",st->name);
4efd681a
SE
636 BUF_FREE(buf);
637 return;
638 }
639
640 if (h->iph.protocol==1) {
641 /* It's ICMP */
ff05a229 642 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
4efd681a
SE
643 /* ICMP echo-request. Special case: we re-use the buffer
644 to construct the reply. */
ff05a229 645 h->type=ICMP_TYPE_ECHO_REPLY;
4efd681a
SE
646 h->iph.daddr=h->iph.saddr;
647 h->iph.saddr=htonl(st->secnet_address);
ff05a229 648 h->iph.ttl=255;
4efd681a
SE
649 h->iph.check=0;
650 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
651 netlink_icmp_csum(h);
70dc107b 652 netlink_packet_deliver(st,NULL,buf);
4efd681a
SE
653 return;
654 }
655 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
656 } else {
657 /* Send ICMP protocol unreachable */
311e11e4 658 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
cfd79482 659 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
4efd681a
SE
660 BUF_FREE(buf);
661 return;
662 }
663
664 BUF_FREE(buf);
665}
666
9d3a4132
SE
667/* If cid==NULL packet is from host, otherwise cid specifies which tunnel
668 it came from. */
469fd1d9
SE
669static void netlink_incoming(struct netlink *st, struct netlink_client *client,
670 struct buffer_if *buf)
4efd681a 671{
4efd681a
SE
672 uint32_t source,dest;
673 struct iphdr *iph;
d714da29 674 char errmsgbuf[50];
a28d65a5 675 const char *sourcedesc=client?client->name:"host";
4efd681a
SE
676
677 BUF_ASSERT_USED(buf);
a28d65a5 678
d714da29
IJ
679 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
680 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
a28d65a5 681 st->name,sourcedesc,
d714da29 682 errmsgbuf);
4efd681a
SE
683 BUF_FREE(buf);
684 return;
685 }
975820aa 686 assert(buf->size >= (int)sizeof(struct icmphdr));
4efd681a
SE
687 iph=(struct iphdr *)buf->start;
688
689 source=ntohl(iph->saddr);
690 dest=ntohl(iph->daddr);
691
d3fe100d
SE
692 /* Check source. If we don't like the source, there's no point
693 generating ICMP because we won't know how to get it to the
694 source of the packet. */
9d3a4132 695 if (client) {
c6f79b17
SE
696 /* Check that the packet source is appropriate for the tunnel
697 it came down */
794f2398 698 if (!ipset_contains_addr(client->networks,source)) {
9d3a4132
SE
699 string_t s,d;
700 s=ipaddr_to_string(source);
701 d=ipaddr_to_string(dest);
702 Message(M_WARNING,"%s: packet from tunnel %s with bad "
703 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
704 free(s); free(d);
705 BUF_FREE(buf);
706 return;
707 }
708 } else {
c6f79b17
SE
709 /* Check that the packet originates in our configured local
710 network, and hasn't been forwarded from elsewhere or
711 generated with the wrong source address */
794f2398 712 if (!ipset_contains_addr(st->networks,source)) {
9d3a4132
SE
713 string_t s,d;
714 s=ipaddr_to_string(source);
715 d=ipaddr_to_string(dest);
716 Message(M_WARNING,"%s: outgoing packet with bad source address "
717 "(s=%s,d=%s)\n",st->name,s,d);
718 free(s); free(d);
719 BUF_FREE(buf);
720 return;
721 }
4efd681a 722 }
c6f79b17 723
794f2398
SE
724 /* If this is a point-to-point device we don't examine the
725 destination address at all; we blindly send it down our
726 one-and-only registered tunnel, or to the host, depending on
d3fe100d
SE
727 where it came from. It's up to external software to check
728 address validity and generate ICMP, etc. */
c6f79b17
SE
729 if (st->ptp) {
730 if (client) {
f928f069 731 netlink_host_deliver(st,source,dest,buf);
c6f79b17 732 } else {
7b6abafa 733 netlink_client_deliver(st,st->clients,source,dest,buf);
c6f79b17
SE
734 }
735 BUF_ASSERT_FREE(buf);
736 return;
737 }
738
d3fe100d
SE
739 /* st->secnet_address needs checking before matching destination
740 addresses */
2fe58dfd 741 if (dest==st->secnet_address) {
9d3a4132 742 netlink_packet_local(st,client,buf);
4efd681a 743 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
744 return;
745 }
70dc107b 746 netlink_packet_forward(st,client,buf);
4efd681a
SE
747 BUF_ASSERT_FREE(buf);
748}
749
469fd1d9
SE
750static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
751{
752 struct netlink_client *c=sst;
753 struct netlink *st=c->nst;
754
755 netlink_incoming(st,c,buf);
756}
757
758static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
759{
760 struct netlink *st=sst;
761
762 netlink_incoming(st,NULL,buf);
763}
764
d3fe100d 765static void netlink_set_quality(void *sst, uint32_t quality)
4efd681a 766{
d3fe100d
SE
767 struct netlink_client *c=sst;
768 struct netlink *st=c->nst;
4efd681a 769
d3fe100d
SE
770 c->link_quality=quality;
771 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
772 if (c->options&OPT_SOFTROUTE) {
773 st->set_routes(st->dst,c);
4efd681a 774 }
4efd681a
SE
775}
776
d3fe100d
SE
777static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
778 struct subnet_list *snets)
4efd681a 779{
1caa23ff 780 int32_t i;
d3fe100d 781 string_t net;
4efd681a 782
d3fe100d
SE
783 for (i=0; i<snets->entries; i++) {
784 net=subnet_to_string(snets->list[i]);
785 Message(loglevel,"%s ",net);
786 free(net);
9d3a4132 787 }
4efd681a
SE
788}
789
042a8da9 790static void netlink_dump_routes(struct netlink *st, bool_t requested)
9d3a4132
SE
791{
792 int i;
793 string_t net;
042a8da9 794 uint32_t c=M_INFO;
9d3a4132 795
042a8da9 796 if (requested) c=M_WARNING;
469fd1d9
SE
797 if (st->ptp) {
798 net=ipaddr_to_string(st->secnet_address);
34d3bf4c 799 Message(c,"%s: point-to-point (remote end is %s); routes: ",
469fd1d9 800 st->name, net);
9d3a4132 801 free(net);
d3fe100d 802 netlink_output_subnets(st,c,st->clients->subnets);
469fd1d9
SE
803 Message(c,"\n");
804 } else {
805 Message(c,"%s: routing table:\n",st->name);
d3fe100d
SE
806 for (i=0; i<st->n_clients; i++) {
807 netlink_output_subnets(st,c,st->routes[i]->subnets);
ff05a229 808 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
ea7ec970 809 "quality %d,use %d,pri %lu)\n",
d3fe100d 810 st->routes[i]->name,
ff05a229
SE
811 st->routes[i]->up?"up":"down",
812 st->routes[i]->mtu,
d3fe100d
SE
813 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
814 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
d3fe100d 815 st->routes[i]->link_quality,
ea7ec970
SE
816 st->routes[i]->outcount,
817 (unsigned long)st->routes[i]->priority);
469fd1d9
SE
818 }
819 net=ipaddr_to_string(st->secnet_address);
820 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
821 net,st->name,st->localcount);
9d3a4132 822 free(net);
794f2398
SE
823 for (i=0; i<st->subnets->entries; i++) {
824 net=subnet_to_string(st->subnets->list[i]);
825 Message(c,"%s ",net);
469fd1d9
SE
826 free(net);
827 }
794f2398
SE
828 if (i>0)
829 Message(c,"-> host (use %d)\n",st->outcount);
9d3a4132
SE
830 }
831}
832
d3fe100d
SE
833/* ap is a pointer to a member of the routes array */
834static int netlink_compare_client_priority(const void *ap, const void *bp)
70dc107b 835{
d3fe100d
SE
836 const struct netlink_client *const*a=ap;
837 const struct netlink_client *const*b=bp;
70dc107b 838
d3fe100d
SE
839 if ((*a)->priority==(*b)->priority) return 0;
840 if ((*a)->priority<(*b)->priority) return 1;
70dc107b
SE
841 return -1;
842}
843
844static void netlink_phase_hook(void *sst, uint32_t new_phase)
845{
846 struct netlink *st=sst;
847 struct netlink_client *c;
1caa23ff 848 int32_t i;
70dc107b
SE
849
850 /* All the networks serviced by the various tunnels should now
851 * have been registered. We build a routing table by sorting the
d3fe100d 852 * clients by priority. */
bb9d0561
IJ
853 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
854 "netlink_phase_hook");
70dc107b
SE
855 /* Fill the table */
856 i=0;
59230b9b
IJ
857 for (c=st->clients; c; c=c->next) {
858 assert(i<INT_MAX);
d3fe100d 859 st->routes[i++]=c;
59230b9b 860 }
d3fe100d
SE
861 /* Sort the table in descending order of priority */
862 qsort(st->routes,st->n_clients,sizeof(*st->routes),
863 netlink_compare_client_priority);
9d3a4132 864
042a8da9
SE
865 netlink_dump_routes(st,False);
866}
867
868static void netlink_signal_handler(void *sst, int signum)
869{
870 struct netlink *st=sst;
871 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
872 netlink_dump_routes(st,True);
70dc107b
SE
873}
874
1caa23ff 875static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
d3fe100d
SE
876{
877 struct netlink_client *c=sst;
878
879 c->mtu=new_mtu;
880}
881
469fd1d9 882static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
3abd18e8 883 void *dst)
469fd1d9
SE
884{
885 struct netlink_client *c=sst;
469fd1d9 886
469fd1d9
SE
887 c->deliver=deliver;
888 c->dst=dst;
889}
890
891static struct flagstr netlink_option_table[]={
892 { "soft", OPT_SOFTROUTE },
893 { "allow-route", OPT_ALLOWROUTE },
894 { NULL, 0}
895};
896/* This is the routine that gets called when the closure that's
897 returned by an invocation of a netlink device closure (eg. tun,
898 userv-ipif) is invoked. It's used to create routes and pass in
899 information about them; the closure it returns is used by site
900 code. */
901static closure_t *netlink_inst_create(struct netlink *st,
902 struct cloc loc, dict_t *dict)
903{
904 struct netlink_client *c;
905 string_t name;
794f2398 906 struct ipset *networks;
1caa23ff
IJ
907 uint32_t options,priority;
908 int32_t mtu;
794f2398 909 list_t *l;
469fd1d9
SE
910
911 name=dict_read_string(dict, "name", True, st->name, loc);
912
794f2398
SE
913 l=dict_lookup(dict,"routes");
914 if (!l)
915 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
916 networks=string_list_to_ipset(l,loc,st->name,"routes");
469fd1d9
SE
917 options=string_list_to_word(dict_lookup(dict,"options"),
918 netlink_option_table,st->name);
919
d3fe100d
SE
920 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
921 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
922
923 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
469fd1d9
SE
924 cfgfatal(loc,st->name,"this netlink device does not support "
925 "soft routes.\n");
926 return NULL;
927 }
928
929 if (options&OPT_SOFTROUTE) {
930 /* XXX for now we assume that soft routes require root privilege;
931 this may not always be true. The device driver can tell us. */
932 require_root_privileges=True;
933 require_root_privileges_explanation="netlink: soft routes";
934 if (st->ptp) {
935 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
936 "soft routes.\n");
937 return NULL;
938 }
939 }
940
794f2398
SE
941 /* Check that nets are a subset of st->remote_networks;
942 refuse to register if they are not. */
943 if (!ipset_is_subset(st->remote_networks,networks)) {
944 cfgfatal(loc,st->name,"routes are not allowed\n");
469fd1d9
SE
945 return NULL;
946 }
947
948 c=safe_malloc(sizeof(*c),"netlink_inst_create");
949 c->cl.description=name;
950 c->cl.type=CL_NETLINK;
951 c->cl.apply=NULL;
952 c->cl.interface=&c->ops;
953 c->ops.st=c;
954 c->ops.reg=netlink_inst_reg;
955 c->ops.deliver=netlink_inst_incoming;
956 c->ops.set_quality=netlink_set_quality;
d3fe100d 957 c->ops.set_mtu=netlink_inst_set_mtu;
469fd1d9
SE
958 c->nst=st;
959
960 c->networks=networks;
794f2398 961 c->subnets=ipset_to_subnet_list(networks);
d3fe100d 962 c->priority=priority;
469fd1d9
SE
963 c->deliver=NULL;
964 c->dst=NULL;
965 c->name=name;
f208b9a9 966 c->link_quality=LINK_QUALITY_UNUSED;
d3fe100d
SE
967 c->mtu=mtu?mtu:st->mtu;
968 c->options=options;
969 c->outcount=0;
970 c->up=False;
971 c->kup=False;
469fd1d9
SE
972 c->next=st->clients;
973 st->clients=c;
59230b9b 974 assert(st->n_clients < INT_MAX);
d3fe100d 975 st->n_clients++;
469fd1d9
SE
976
977 return &c->cl;
978}
979
980static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
981 dict_t *context, list_t *args)
982{
983 struct netlink *st=self->interface;
984
985 dict_t *dict;
986 item_t *item;
987 closure_t *cl;
988
469fd1d9
SE
989 item=list_elem(args,0);
990 if (!item || item->type!=t_dict) {
991 cfgfatal(loc,st->name,"must have a dictionary argument\n");
992 }
993 dict=item->data.dict;
994
995 cl=netlink_inst_create(st,loc,dict);
996
997 return new_closure(cl);
998}
999
9d3a4132
SE
1000netlink_deliver_fn *netlink_init(struct netlink *st,
1001 void *dst, struct cloc loc,
fe5e9cc4 1002 dict_t *dict, cstring_t description,
d3fe100d 1003 netlink_route_fn *set_routes,
9d3a4132 1004 netlink_deliver_fn *to_host)
4efd681a 1005{
c6f79b17 1006 item_t *sa, *ptpa;
794f2398 1007 list_t *l;
c6f79b17 1008
4efd681a
SE
1009 st->dst=dst;
1010 st->cl.description=description;
469fd1d9
SE
1011 st->cl.type=CL_PURE;
1012 st->cl.apply=netlink_inst_apply;
1013 st->cl.interface=st;
4efd681a 1014 st->clients=NULL;
d3fe100d
SE
1015 st->routes=NULL;
1016 st->n_clients=0;
1017 st->set_routes=set_routes;
4efd681a
SE
1018 st->deliver_to_host=to_host;
1019
794f2398 1020 st->name=dict_read_string(dict,"name",False,description,loc);
4efd681a 1021 if (!st->name) st->name=description;
794f2398
SE
1022 l=dict_lookup(dict,"networks");
1023 if (l)
1024 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1025 else {
4f5e39ec
SE
1026 struct ipset *empty;
1027 empty=ipset_new();
1028 st->networks=ipset_complement(empty);
1029 ipset_free(empty);
794f2398
SE
1030 }
1031 l=dict_lookup(dict,"remote-networks");
1032 if (l) {
1033 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1034 "remote-networks");
1035 } else {
1036 struct ipset *empty;
1037 empty=ipset_new();
1038 st->remote_networks=ipset_complement(empty);
1039 ipset_free(empty);
1040 }
1041
c6f79b17 1042 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
469fd1d9 1043 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
c6f79b17
SE
1044 if (sa && ptpa) {
1045 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1046 "ptp-address in the same netlink device\n");
1047 }
1048 if (!(sa || ptpa)) {
1049 cfgfatal(loc,st->name,"you must specify secnet-address or "
1050 "ptp-address for this netlink device\n");
1051 }
1052 if (sa) {
794f2398 1053 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
c6f79b17
SE
1054 st->ptp=False;
1055 } else {
794f2398 1056 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
c6f79b17
SE
1057 st->ptp=True;
1058 }
d3fe100d
SE
1059 /* To be strictly correct we could subtract secnet_address from
1060 networks here. It shouldn't make any practical difference,
794f2398
SE
1061 though, and will make the route dump look complicated... */
1062 st->subnets=ipset_to_subnet_list(st->networks);
4efd681a
SE
1063 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1064 buffer_new(&st->icmp,ICMP_BUFSIZE);
469fd1d9
SE
1065 st->outcount=0;
1066 st->localcount=0;
70dc107b
SE
1067
1068 add_hook(PHASE_SETUP,netlink_phase_hook,st);
042a8da9 1069 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
4efd681a 1070
469fd1d9
SE
1071 /* If we're point-to-point then we return a CL_NETLINK directly,
1072 rather than a CL_NETLINK_OLD or pure closure (depending on
1073 compatibility). This CL_NETLINK is for our one and only
1074 client. Our cl.apply function is NULL. */
1075 if (st->ptp) {
1076 closure_t *cl;
1077 cl=netlink_inst_create(st,loc,dict);
1078 st->cl=*cl;
1079 }
1080 return netlink_dev_incoming;
2fe58dfd
SE
1081}
1082
9d3a4132 1083/* No connection to the kernel at all... */
2fe58dfd 1084
9d3a4132 1085struct null {
4efd681a 1086 struct netlink nl;
4efd681a 1087};
2fe58dfd 1088
d3fe100d 1089static bool_t null_set_route(void *sst, struct netlink_client *routes)
4efd681a 1090{
9d3a4132 1091 struct null *st=sst;
d3fe100d
SE
1092
1093 if (routes->up!=routes->kup) {
1094 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1095 st->nl.name,routes->name,
1096 routes->up?"up":"down");
1097 routes->kup=routes->up;
9d3a4132 1098 return True;
2fe58dfd 1099 }
9d3a4132 1100 return False;
2fe58dfd 1101}
9d3a4132 1102
469fd1d9 1103static void null_deliver(void *sst, struct buffer_if *buf)
2fe58dfd
SE
1104{
1105 return;
1106}
1107
1108static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1109 list_t *args)
1110{
1111 struct null *st;
4efd681a
SE
1112 item_t *item;
1113 dict_t *dict;
2fe58dfd 1114
4efd681a 1115 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1116
4efd681a
SE
1117 item=list_elem(args,0);
1118 if (!item || item->type!=t_dict)
1119 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1120
1121 dict=item->data.dict;
1122
9d3a4132
SE
1123 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1124 null_deliver);
4efd681a
SE
1125
1126 return new_closure(&st->nl.cl);
2fe58dfd
SE
1127}
1128
2fe58dfd
SE
1129void netlink_module(dict_t *dict)
1130{
4efd681a 1131 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1132}