1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
15 /* Generic IP checksum routine */
16 static inline uint16_t ip_csum(uint8_t *iph
,uint32_t count
)
18 register uint32_t sum
=0;
21 sum
+=ntohs(*(uint16_t *)iph
);
28 sum
=(sum
&0xffff)+(sum
>>16);
34 * This is a version of ip_compute_csum() optimized for IP headers,
35 * which always checksum on 4 octet boundaries.
37 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
40 static inline uint16_t ip_fast_csum(uint8_t *iph
, uint32_t ihl
) {
43 __asm__
__volatile__("
62 /* Since the input registers which are loaded with iph and ipl
63 are modified, we must also specify them as outputs, or gcc
64 will assume they contain their original values. */
65 : "=r" (sum
), "=r" (iph
), "=r" (ihl
)
66 : "1" (iph
), "2" (ihl
));
70 static inline uint16_t ip_fast_csum(uint8_t *iph
, uint32_t ihl
)
72 return ip_csum(iph
,ihl
*4);
77 #if defined (WORDS_BIGENDIAN)
93 /* The options start here. */
116 static void netlink_packet_deliver(struct netlink
*st
,
117 struct netlink_client
*client
,
118 struct buffer_if
*buf
);
120 static struct icmphdr
*netlink_icmp_tmpl(struct netlink
*st
,
121 uint32_t dest
,uint16_t len
)
125 BUF_ALLOC(&st
->icmp
,"netlink_icmp_tmpl");
126 buffer_init(&st
->icmp
,st
->max_start_pad
);
127 h
=buf_append(&st
->icmp
,sizeof(*h
));
132 h
->iph
.tot_len
=htons(len
+(h
->iph
.ihl
*4)+8);
137 h
->iph
.saddr
=htonl(st
->secnet_address
);
138 h
->iph
.daddr
=htonl(dest
);
140 h
->iph
.check
=ip_fast_csum((uint8_t *)&h
->iph
,h
->iph
.ihl
);
147 /* Fill in the ICMP checksum field correctly */
148 static void netlink_icmp_csum(struct icmphdr
*h
)
152 len
=ntohs(h
->iph
.tot_len
)-(4*h
->iph
.ihl
);
154 h
->check
=ip_csum(&h
->type
,len
);
158 * An ICMP error message MUST NOT be sent as the result of
161 * * an ICMP error message, or
163 * * a datagram destined to an IP broadcast or IP multicast
166 * * a datagram sent as a link-layer broadcast, or
168 * * a non-initial fragment, or
170 * * a datagram whose source address does not define a single
171 * host -- e.g., a zero address, a loopback address, a
172 * broadcast address, a multicast address, or a Class E
175 static bool_t
netlink_icmp_may_reply(struct buffer_if
*buf
)
180 iph
=(struct iphdr
*)buf
->start
;
181 if (iph
->protocol
==1) return False
; /* Overly-broad; we may reply to
182 eg. icmp echo-request */
183 /* How do we spot broadcast destination addresses? */
184 if (ntohs(iph
->frag_off
)&0x1fff) return False
; /* Non-initial fragment */
185 source
=ntohl(iph
->saddr
);
186 if (source
==0) return False
;
187 if ((source
&0xff000000)==0x7f000000) return False
;
188 /* How do we spot broadcast source addresses? */
189 if ((source
&0xf0000000)==0xe0000000) return False
; /* Multicast */
190 if ((source
&0xf0000000)==0xf0000000) return False
; /* Class E */
194 /* How much of the original IP packet do we include in its ICMP
195 response? The header plus up to 64 bits. */
196 static uint16_t netlink_icmp_reply_len(struct buffer_if
*buf
)
198 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
202 /* We include the first 8 bytes of the packet data, provided they exist */
204 plen
=ntohs(iph
->tot_len
);
205 return (hlen
>plen?plen
:hlen
);
208 /* client indicates where the packet we're constructing a response to
209 comes from. NULL indicates the host. */
210 static void netlink_icmp_simple(struct netlink
*st
, struct buffer_if
*buf
,
211 struct netlink_client
*client
,
212 uint8_t type
, uint8_t code
)
214 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
218 if (netlink_icmp_may_reply(buf
)) {
219 len
=netlink_icmp_reply_len(buf
);
220 h
=netlink_icmp_tmpl(st
,ntohl(iph
->saddr
),len
);
221 h
->type
=type
; h
->code
=code
;
222 memcpy(buf_append(&st
->icmp
,len
),buf
->start
,len
);
223 netlink_icmp_csum(h
);
224 netlink_packet_deliver(st
,NULL
,&st
->icmp
);
225 BUF_ASSERT_FREE(&st
->icmp
);
230 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
233 * Is the datagram acceptable?
235 * 1. Length at least the size of an ip header
237 * 3. Checksums correctly.
238 * 4. Doesn't have a bogus length
240 static bool_t
netlink_check(struct netlink
*st
, struct buffer_if
*buf
)
242 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
245 if (iph
->ihl
< 5 || iph
->version
!= 4) return False
;
246 if (buf
->size
< iph
->ihl
*4) return False
;
247 if (ip_fast_csum((uint8_t *)iph
, iph
->ihl
)!=0) return False
;
248 len
=ntohs(iph
->tot_len
);
249 /* There should be no padding */
250 if (buf
->size
!=len
|| len
<(iph
->ihl
<<2)) return False
;
251 /* XXX check that there's no source route specified */
255 /* Deliver a packet. "client" points to the _origin_ of the packet, not
256 its destination. (May be used when sending ICMP response - avoid
257 asymmetric routing.) */
258 static void netlink_packet_deliver(struct netlink
*st
,
259 struct netlink_client
*client
,
260 struct buffer_if
*buf
)
262 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
263 uint32_t dest
=ntohl(iph
->daddr
);
264 uint32_t source
=ntohl(iph
->saddr
);
265 uint32_t best_quality
;
269 BUF_ASSERT_USED(buf
);
271 if (dest
==st
->secnet_address
) {
272 Message(M_ERROR
,"%s: trying to deliver a packet to myself!\n");
277 /* XXX we're going to need an extra value 'allow_route' for the
278 source of the packet. It's always True for packets from the
279 host. For packets from tunnels, we consult the client
280 options. If !allow_route and the destination is a tunnel that
281 also doesn't allow routing, we must reject the packet with an
282 'administratively prohibited' or something similar ICMP. */
284 /* Origin of packet is host or secnet. Might be for a tunnel. */
287 for (i
=0; i
<st
->n_routes
; i
++) {
288 if (st
->routes
[i
].up
&& subnet_match(&st
->routes
[i
].net
,dest
)) {
289 if (st
->routes
[i
].c
->link_quality
>best_quality
290 || best_quality
==0) {
291 best_quality
=st
->routes
[i
].c
->link_quality
;
293 /* If quality isn't perfect we may wish to
294 consider kicking the tunnel with a 0-length
295 packet to prompt it to perform a key setup.
296 Then it'll eventually decide it's up or
298 /* If quality is perfect we don't need to search
300 if (best_quality
>=MAXIMUM_LINK_QUALITY
) break;
304 if (best_match
==-1) {
305 /* Not going down a tunnel. Might be for the host.
306 XXX think about this - only situation should be if we're
308 if (source
!=st
->secnet_address
) {
309 Message(M_ERROR
,"netlink_packet_deliver: outgoing packet "
310 "from host that won't fit down any of our tunnels!\n");
311 /* XXX I think this could also occur if a soft tunnel just
312 went down, but still had packets queued in the kernel. */
315 st
->deliver_to_host(st
->dst
,NULL
,buf
);
316 BUF_ASSERT_FREE(buf
);
319 if (best_quality
>0) {
320 st
->routes
[best_match
].c
->deliver(
321 st
->routes
[best_match
].c
->dst
,
322 st
->routes
[best_match
].c
, buf
);
323 BUF_ASSERT_FREE(buf
);
325 /* Generate ICMP destination unreachable */
326 netlink_icmp_simple(st
,buf
,client
,3,0); /* client==NULL */
330 } else { /* client is set */
331 /* We know the origin is a tunnel - packet must be for the host */
332 /* XXX THIS IS NOT NECESSARILY TRUE, AND NEEDS FIXING */
333 /* THIS FUNCTION MUST JUST DELIVER THE PACKET: IT MUST ASSUME
334 THE PACKET HAS ALREADY BEEN CHECKED */
335 if (subnet_matches_list(&st
->networks
,dest
)) {
336 st
->deliver_to_host(st
->dst
,NULL
,buf
);
337 BUF_ASSERT_FREE(buf
);
339 Message(M_ERROR
,"%s: packet from tunnel %s can't be delivered "
340 "to the host\n",st
->name
,client
->name
);
341 netlink_icmp_simple(st
,buf
,client
,3,0);
345 BUF_ASSERT_FREE(buf
);
348 static void netlink_packet_forward(struct netlink
*st
,
349 struct netlink_client
*client
,
350 struct buffer_if
*buf
)
352 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
354 BUF_ASSERT_USED(buf
);
356 /* Packet has already been checked */
358 /* Generate ICMP time exceeded */
359 netlink_icmp_simple(st
,buf
,client
,11,0);
365 iph
->check
=ip_fast_csum((uint8_t *)iph
,iph
->ihl
);
367 netlink_packet_deliver(st
,client
,buf
);
368 BUF_ASSERT_FREE(buf
);
371 /* Deal with packets addressed explicitly to us */
372 static void netlink_packet_local(struct netlink
*st
,
373 struct netlink_client
*client
,
374 struct buffer_if
*buf
)
378 h
=(struct icmphdr
*)buf
->start
;
380 if ((ntohs(h
->iph
.frag_off
)&0xbfff)!=0) {
381 Message(M_WARNING
,"%s: fragmented packet addressed to secnet; "
382 "ignoring it\n",st
->name
);
387 if (h
->iph
.protocol
==1) {
389 if (h
->type
==8 && h
->code
==0) {
390 /* ICMP echo-request. Special case: we re-use the buffer
391 to construct the reply. */
393 h
->iph
.daddr
=h
->iph
.saddr
;
394 h
->iph
.saddr
=htonl(st
->secnet_address
);
395 h
->iph
.ttl
=255; /* Be nice and bump it up again... */
397 h
->iph
.check
=ip_fast_csum((uint8_t *)h
,h
->iph
.ihl
);
398 netlink_icmp_csum(h
);
399 netlink_packet_deliver(st
,NULL
,buf
);
402 Message(M_WARNING
,"%s: unknown incoming ICMP\n",st
->name
);
404 /* Send ICMP protocol unreachable */
405 netlink_icmp_simple(st
,buf
,client
,3,2);
413 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
415 static void netlink_incoming(void *sst
, void *cid
, struct buffer_if
*buf
)
417 struct netlink
*st
=sst
;
418 struct netlink_client
*client
=cid
;
419 uint32_t source
,dest
;
422 BUF_ASSERT_USED(buf
);
423 if (!netlink_check(st
,buf
)) {
424 Message(M_WARNING
,"%s: bad IP packet from %s\n",
425 st
->name
,client?client
->name
:"host");
429 iph
=(struct iphdr
*)buf
->start
;
431 source
=ntohl(iph
->saddr
);
432 dest
=ntohl(iph
->daddr
);
436 /* Check that the packet source is in 'nets' and its destination is
438 if (!subnet_matches_list(client
->networks
,source
)) {
440 s
=ipaddr_to_string(source
);
441 d
=ipaddr_to_string(dest
);
442 Message(M_WARNING
,"%s: packet from tunnel %s with bad "
443 "source address (s=%s,d=%s)\n",st
->name
,client
->name
,s
,d
);
449 if (!subnet_matches_list(&st
->networks
,source
)) {
451 s
=ipaddr_to_string(source
);
452 d
=ipaddr_to_string(dest
);
453 Message(M_WARNING
,"%s: outgoing packet with bad source address "
454 "(s=%s,d=%s)\n",st
->name
,s
,d
);
460 /* (st->secnet_address needs checking before matching destination
462 if (dest
==st
->secnet_address
) {
463 netlink_packet_local(st
,client
,buf
);
464 BUF_ASSERT_FREE(buf
);
468 /* Check for free routing */
469 if (!subnet_matches_list(&st
->networks
,dest
)) {
471 s
=ipaddr_to_string(source
);
472 d
=ipaddr_to_string(dest
);
473 Message(M_WARNING
,"%s: incoming packet from tunnel %s "
474 "with bad destination address "
475 "(s=%s,d=%s)\n",st
->name
,client
->name
,s
,d
);
481 netlink_packet_forward(st
,client
,buf
);
482 BUF_ASSERT_FREE(buf
);
485 static void netlink_set_softlinks(struct netlink
*st
, struct netlink_client
*c
,
490 if (!st
->routes
) return; /* Table has not yet been created */
491 for (i
=0; i
<st
->n_routes
; i
++) {
492 if (!st
->routes
[i
].hard
&& st
->routes
[i
].c
==c
) {
494 st
->set_route(st
->dst
,&st
->routes
[i
]);
499 static void netlink_set_quality(void *sst
, void *cid
, uint32_t quality
)
501 struct netlink
*st
=sst
;
502 struct netlink_client
*c
=cid
;
504 c
->link_quality
=quality
;
505 if (c
->link_quality
==LINK_QUALITY_DOWN
) {
506 netlink_set_softlinks(st
,c
,False
);
508 netlink_set_softlinks(st
,c
,True
);
512 static void *netlink_regnets(void *sst
, struct subnet_list
*nets
,
513 netlink_deliver_fn
*deliver
, void *dst
,
514 uint32_t max_start_pad
, uint32_t max_end_pad
,
515 uint32_t options
, string_t client_name
)
517 struct netlink
*st
=sst
;
518 struct netlink_client
*c
;
520 Message(M_DEBUG_CONFIG
,"netlink_regnets: request for %d networks, "
521 "max_start_pad=%d, max_end_pad=%d\n",
522 nets
->entries
,max_start_pad
,max_end_pad
);
524 if ((options
&NETLINK_OPTION_SOFTROUTE
) && !st
->set_route
) {
525 Message(M_ERROR
,"%s: this netlink device does not support "
530 if (options
&NETLINK_OPTION_SOFTROUTE
) {
531 /* XXX for now we assume that soft routes require root privilege;
532 this may not always be true. The device driver can tell us. */
533 require_root_privileges
=True
;
534 require_root_privileges_explanation
="netlink: soft routes";
537 /* Check that nets do not intersect st->exclude_remote_networks;
538 refuse to register if they do. */
539 if (subnet_lists_intersect(&st
->exclude_remote_networks
,nets
)) {
540 Message(M_ERROR
,"%s: site %s specifies networks that "
541 "intersect with the explicitly excluded remote networks\n",
542 st
->name
,client_name
);
546 c
=safe_malloc(sizeof(*c
),"netlink_regnets");
550 c
->name
=client_name
; /* XXX copy it? */
552 c
->link_quality
=LINK_QUALITY_DOWN
;
555 if (max_start_pad
> st
->max_start_pad
) st
->max_start_pad
=max_start_pad
;
556 if (max_end_pad
> st
->max_end_pad
) st
->max_end_pad
=max_end_pad
;
557 st
->n_routes
+=nets
->entries
;
562 static void netlink_dump_routes(struct netlink
*st
)
567 Message(M_INFO
,"%s: routing table:\n",st
->name
);
568 for (i
=0; i
<st
->n_routes
; i
++) {
569 net
=subnet_to_string(&st
->routes
[i
].net
);
570 Message(M_INFO
,"%s -> tunnel %s (%s,%s route,%s)\n",net
,
571 st
->routes
[i
].c
->name
,
572 st
->routes
[i
].hard?
"hard":"soft",
573 st
->routes
[i
].allow_route?
"free":"restricted",
574 st
->routes
[i
].up?
"up":"down");
577 Message(M_INFO
,"%s/32 -> netlink \"%s\"\n",
578 ipaddr_to_string(st
->secnet_address
),st
->name
);
579 for (i
=0; i
<st
->networks
.entries
; i
++) {
580 net
=subnet_to_string(&st
->networks
.list
[i
]);
581 Message(M_INFO
,"%s -> host\n",net
);
586 static int netlink_compare_route_specificity(const void *ap
, const void *bp
)
588 const struct netlink_route
*a
=ap
;
589 const struct netlink_route
*b
=bp
;
591 if (a
->net
.len
==b
->net
.len
) return 0;
592 if (a
->net
.len
<b
->net
.len
) return 1;
596 static void netlink_phase_hook(void *sst
, uint32_t new_phase
)
598 struct netlink
*st
=sst
;
599 struct netlink_client
*c
;
602 /* All the networks serviced by the various tunnels should now
603 * have been registered. We build a routing table by sorting the
604 * routes into most-specific-first order. */
605 st
->routes
=safe_malloc(st
->n_routes
*sizeof(*st
->routes
),
606 "netlink_phase_hook");
609 for (c
=st
->clients
; c
; c
=c
->next
) {
610 for (j
=0; j
<c
->networks
->entries
; j
++) {
611 st
->routes
[i
].net
=c
->networks
->list
[j
];
613 /* Hard routes are always up;
614 soft routes default to down */
615 st
->routes
[i
].up
=c
->options
&NETLINK_OPTION_SOFTROUTE?False
:True
;
616 st
->routes
[i
].kup
=False
;
617 st
->routes
[i
].hard
=c
->options
&NETLINK_OPTION_SOFTROUTE?False
:True
;
618 st
->routes
[i
].allow_route
=c
->options
&NETLINK_OPTION_ALLOW_ROUTE?
623 /* ASSERT i==st->n_routes */
624 if (i
!=st
->n_routes
) {
625 fatal("netlink: route count error: expected %d got %d\n",
628 /* Sort the table in descending order of specificity */
629 qsort(st
->routes
,st
->n_routes
,sizeof(*st
->routes
),
630 netlink_compare_route_specificity
);
632 netlink_dump_routes(st
);
635 netlink_deliver_fn
*netlink_init(struct netlink
*st
,
636 void *dst
, struct cloc loc
,
637 dict_t
*dict
, string_t description
,
638 netlink_route_fn
*set_route
,
639 netlink_deliver_fn
*to_host
)
642 st
->cl
.description
=description
;
643 st
->cl
.type
=CL_NETLINK
;
645 st
->cl
.interface
=&st
->ops
;
647 st
->ops
.regnets
=netlink_regnets
;
648 st
->ops
.deliver
=netlink_incoming
;
649 st
->ops
.set_quality
=netlink_set_quality
;
653 st
->set_route
=set_route
;
654 st
->deliver_to_host
=to_host
;
656 st
->name
=dict_read_string(dict
,"name",False
,"netlink",loc
);
657 if (!st
->name
) st
->name
=description
;
658 dict_read_subnet_list(dict
, "networks", True
, "netlink", loc
,
660 dict_read_subnet_list(dict
, "exclude-remote-networks", False
, "netlink",
661 loc
, &st
->exclude_remote_networks
);
662 /* secnet-address does not have to be in local-networks;
663 however, it should be advertised in the 'sites' file for the
665 st
->secnet_address
=string_to_ipaddr(
666 dict_find_item(dict
,"secnet-address", True
, "netlink", loc
),"netlink");
667 st
->mtu
=dict_read_number(dict
, "mtu", False
, "netlink", loc
, DEFAULT_MTU
);
668 buffer_new(&st
->icmp
,ICMP_BUFSIZE
);
672 add_hook(PHASE_SETUP
,netlink_phase_hook
,st
);
674 return netlink_incoming
;
677 /* No connection to the kernel at all... */
683 static bool_t
null_set_route(void *sst
, struct netlink_route
*route
)
688 if (route
->up
!=route
->kup
) {
689 t
=subnet_to_string(&route
->net
);
690 Message(M_INFO
,"%s: setting route %s to state %s\n",st
->nl
.name
,
691 t
, route
->up?
"up":"down");
693 route
->kup
=route
->up
;
699 static void null_deliver(void *sst
, void *cid
, struct buffer_if
*buf
)
704 static list_t
*null_apply(closure_t
*self
, struct cloc loc
, dict_t
*context
,
711 st
=safe_malloc(sizeof(*st
),"null_apply");
713 item
=list_elem(args
,0);
714 if (!item
|| item
->type
!=t_dict
)
715 cfgfatal(loc
,"null-netlink","parameter must be a dictionary\n");
717 dict
=item
->data
.dict
;
719 netlink_init(&st
->nl
,st
,loc
,dict
,"null-netlink",null_set_route
,
722 return new_closure(&st
->nl
.cl
);
725 init_module netlink_module
;
726 void netlink_module(dict_t
*dict
)
728 add_closure(dict
,"null-netlink",null_apply
);