1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
17 /* Generic IP checksum routine */
18 static inline uint16_t ip_csum(uint8_t *iph
,uint32_t count
)
20 register uint32_t sum
=0;
23 sum
+=ntohs(*(uint16_t *)iph
);
30 sum
=(sum
&0xffff)+(sum
>>16);
36 * This is a version of ip_compute_csum() optimized for IP headers,
37 * which always checksum on 4 octet boundaries.
39 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
42 static inline uint16_t ip_fast_csum(uint8_t *iph
, uint32_t ihl
) {
45 __asm__
__volatile__("
64 /* Since the input registers which are loaded with iph and ipl
65 are modified, we must also specify them as outputs, or gcc
66 will assume they contain their original values. */
67 : "=r" (sum
), "=r" (iph
), "=r" (ihl
)
68 : "1" (iph
), "2" (ihl
));
72 static inline uint16_t ip_fast_csum(uint8_t *iph
, uint32_t ihl
)
74 return ip_csum(iph
,ihl
*4);
79 #if defined (WORDS_BIGENDIAN)
95 /* The options start here. */
118 static void netlink_packet_deliver(struct netlink
*st
,
119 struct netlink_client
*client
,
120 struct buffer_if
*buf
);
122 static struct icmphdr
*netlink_icmp_tmpl(struct netlink
*st
,
123 uint32_t dest
,uint16_t len
)
127 BUF_ALLOC(&st
->icmp
,"netlink_icmp_tmpl");
128 buffer_init(&st
->icmp
,st
->max_start_pad
);
129 h
=buf_append(&st
->icmp
,sizeof(*h
));
134 h
->iph
.tot_len
=htons(len
+(h
->iph
.ihl
*4)+8);
139 h
->iph
.saddr
=htonl(st
->secnet_address
);
140 h
->iph
.daddr
=htonl(dest
);
142 h
->iph
.check
=ip_fast_csum((uint8_t *)&h
->iph
,h
->iph
.ihl
);
149 /* Fill in the ICMP checksum field correctly */
150 static void netlink_icmp_csum(struct icmphdr
*h
)
154 len
=ntohs(h
->iph
.tot_len
)-(4*h
->iph
.ihl
);
156 h
->check
=ip_csum(&h
->type
,len
);
160 * An ICMP error message MUST NOT be sent as the result of
163 * * an ICMP error message, or
165 * * a datagram destined to an IP broadcast or IP multicast
168 * * a datagram sent as a link-layer broadcast, or
170 * * a non-initial fragment, or
172 * * a datagram whose source address does not define a single
173 * host -- e.g., a zero address, a loopback address, a
174 * broadcast address, a multicast address, or a Class E
177 static bool_t
netlink_icmp_may_reply(struct buffer_if
*buf
)
182 iph
=(struct iphdr
*)buf
->start
;
183 if (iph
->protocol
==1) return False
; /* Overly-broad; we may reply to
184 eg. icmp echo-request */
185 /* How do we spot broadcast destination addresses? */
186 if (ntohs(iph
->frag_off
)&0x1fff) return False
; /* Non-initial fragment */
187 source
=ntohl(iph
->saddr
);
188 if (source
==0) return False
;
189 if ((source
&0xff000000)==0x7f000000) return False
;
190 /* How do we spot broadcast source addresses? */
191 if ((source
&0xf0000000)==0xe0000000) return False
; /* Multicast */
192 if ((source
&0xf0000000)==0xf0000000) return False
; /* Class E */
196 /* How much of the original IP packet do we include in its ICMP
197 response? The header plus up to 64 bits. */
198 static uint16_t netlink_icmp_reply_len(struct buffer_if
*buf
)
200 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
204 /* We include the first 8 bytes of the packet data, provided they exist */
206 plen
=ntohs(iph
->tot_len
);
207 return (hlen
>plen?plen
:hlen
);
210 /* client indicates where the packet we're constructing a response to
211 comes from. NULL indicates the host. */
212 static void netlink_icmp_simple(struct netlink
*st
, struct buffer_if
*buf
,
213 struct netlink_client
*client
,
214 uint8_t type
, uint8_t code
)
216 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
220 if (netlink_icmp_may_reply(buf
)) {
221 len
=netlink_icmp_reply_len(buf
);
222 h
=netlink_icmp_tmpl(st
,ntohl(iph
->saddr
),len
);
223 h
->type
=type
; h
->code
=code
;
224 memcpy(buf_append(&st
->icmp
,len
),buf
->start
,len
);
225 netlink_icmp_csum(h
);
226 netlink_packet_deliver(st
,NULL
,&st
->icmp
);
227 BUF_ASSERT_FREE(&st
->icmp
);
232 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
235 * Is the datagram acceptable?
237 * 1. Length at least the size of an ip header
239 * 3. Checksums correctly.
240 * 4. Doesn't have a bogus length
242 static bool_t
netlink_check(struct netlink
*st
, struct buffer_if
*buf
)
244 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
247 if (iph
->ihl
< 5 || iph
->version
!= 4) return False
;
248 if (buf
->size
< iph
->ihl
*4) return False
;
249 if (ip_fast_csum((uint8_t *)iph
, iph
->ihl
)!=0) return False
;
250 len
=ntohs(iph
->tot_len
);
251 /* There should be no padding */
252 if (buf
->size
!=len
|| len
<(iph
->ihl
<<2)) return False
;
253 /* XXX check that there's no source route specified */
257 /* Deliver a packet. "client" points to the _origin_ of the packet, not
258 its destination. (May be used when sending ICMP response - avoid
259 asymmetric routing.) */
260 static void netlink_packet_deliver(struct netlink
*st
,
261 struct netlink_client
*client
,
262 struct buffer_if
*buf
)
264 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
265 uint32_t dest
=ntohl(iph
->daddr
);
266 uint32_t source
=ntohl(iph
->saddr
);
267 uint32_t best_quality
;
271 BUF_ASSERT_USED(buf
);
273 if (dest
==st
->secnet_address
) {
274 Message(M_ERROR
,"%s: trying to deliver a packet to myself!\n");
279 /* XXX we're going to need an extra value 'allow_route' for the
280 source of the packet. It's always True for packets from the
281 host. For packets from tunnels, we consult the client
282 options. If !allow_route and the destination is a tunnel that
283 also doesn't allow routing, we must reject the packet with an
284 'administratively prohibited' or something similar ICMP. */
286 /* Origin of packet is host or secnet. Might be for a tunnel. */
289 for (i
=0; i
<st
->n_routes
; i
++) {
290 if (st
->routes
[i
].up
&& subnet_match(&st
->routes
[i
].net
,dest
)) {
291 if (st
->routes
[i
].c
->link_quality
>best_quality
292 || best_quality
==0) {
293 best_quality
=st
->routes
[i
].c
->link_quality
;
295 /* If quality isn't perfect we may wish to
296 consider kicking the tunnel with a 0-length
297 packet to prompt it to perform a key setup.
298 Then it'll eventually decide it's up or
300 /* If quality is perfect we don't need to search
302 if (best_quality
>=MAXIMUM_LINK_QUALITY
) break;
306 if (best_match
==-1) {
307 /* Not going down a tunnel. Might be for the host.
308 XXX think about this - only situation should be if we're
310 if (source
!=st
->secnet_address
) {
311 Message(M_ERROR
,"netlink_packet_deliver: outgoing packet "
312 "from host that won't fit down any of our tunnels!\n");
313 /* XXX I think this could also occur if a soft tunnel just
314 went down, but still had packets queued in the kernel. */
317 st
->deliver_to_host(st
->dst
,NULL
,buf
);
318 BUF_ASSERT_FREE(buf
);
321 if (best_quality
>0) {
322 st
->routes
[best_match
].c
->deliver(
323 st
->routes
[best_match
].c
->dst
,
324 st
->routes
[best_match
].c
, buf
);
325 BUF_ASSERT_FREE(buf
);
327 /* Generate ICMP destination unreachable */
328 netlink_icmp_simple(st
,buf
,client
,3,0); /* client==NULL */
332 } else { /* client is set */
333 /* We know the origin is a tunnel - packet must be for the host */
334 /* XXX THIS IS NOT NECESSARILY TRUE, AND NEEDS FIXING */
335 /* THIS FUNCTION MUST JUST DELIVER THE PACKET: IT MUST ASSUME
336 THE PACKET HAS ALREADY BEEN CHECKED */
337 if (subnet_matches_list(&st
->networks
,dest
)) {
338 st
->deliver_to_host(st
->dst
,NULL
,buf
);
339 BUF_ASSERT_FREE(buf
);
341 Message(M_ERROR
,"%s: packet from tunnel %s can't be delivered "
342 "to the host\n",st
->name
,client
->name
);
343 netlink_icmp_simple(st
,buf
,client
,3,0);
347 BUF_ASSERT_FREE(buf
);
350 static void netlink_packet_forward(struct netlink
*st
,
351 struct netlink_client
*client
,
352 struct buffer_if
*buf
)
354 struct iphdr
*iph
=(struct iphdr
*)buf
->start
;
356 BUF_ASSERT_USED(buf
);
358 /* Packet has already been checked */
360 /* Generate ICMP time exceeded */
361 netlink_icmp_simple(st
,buf
,client
,11,0);
367 iph
->check
=ip_fast_csum((uint8_t *)iph
,iph
->ihl
);
369 netlink_packet_deliver(st
,client
,buf
);
370 BUF_ASSERT_FREE(buf
);
373 /* Deal with packets addressed explicitly to us */
374 static void netlink_packet_local(struct netlink
*st
,
375 struct netlink_client
*client
,
376 struct buffer_if
*buf
)
380 h
=(struct icmphdr
*)buf
->start
;
382 if ((ntohs(h
->iph
.frag_off
)&0xbfff)!=0) {
383 Message(M_WARNING
,"%s: fragmented packet addressed to secnet; "
384 "ignoring it\n",st
->name
);
389 if (h
->iph
.protocol
==1) {
391 if (h
->type
==8 && h
->code
==0) {
392 /* ICMP echo-request. Special case: we re-use the buffer
393 to construct the reply. */
395 h
->iph
.daddr
=h
->iph
.saddr
;
396 h
->iph
.saddr
=htonl(st
->secnet_address
);
397 h
->iph
.ttl
=255; /* Be nice and bump it up again... */
399 h
->iph
.check
=ip_fast_csum((uint8_t *)h
,h
->iph
.ihl
);
400 netlink_icmp_csum(h
);
401 netlink_packet_deliver(st
,NULL
,buf
);
404 Message(M_WARNING
,"%s: unknown incoming ICMP\n",st
->name
);
406 /* Send ICMP protocol unreachable */
407 netlink_icmp_simple(st
,buf
,client
,3,2);
415 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
417 static void netlink_incoming(void *sst
, void *cid
, struct buffer_if
*buf
)
419 struct netlink
*st
=sst
;
420 struct netlink_client
*client
=cid
;
421 uint32_t source
,dest
;
424 BUF_ASSERT_USED(buf
);
425 if (!netlink_check(st
,buf
)) {
426 Message(M_WARNING
,"%s: bad IP packet from %s\n",
427 st
->name
,client?client
->name
:"host");
431 iph
=(struct iphdr
*)buf
->start
;
433 source
=ntohl(iph
->saddr
);
434 dest
=ntohl(iph
->daddr
);
438 /* Check that the packet source is appropriate for the tunnel
440 if (!subnet_matches_list(client
->networks
,source
)) {
442 s
=ipaddr_to_string(source
);
443 d
=ipaddr_to_string(dest
);
444 Message(M_WARNING
,"%s: packet from tunnel %s with bad "
445 "source address (s=%s,d=%s)\n",st
->name
,client
->name
,s
,d
);
451 /* Check that the packet originates in our configured local
452 network, and hasn't been forwarded from elsewhere or
453 generated with the wrong source address */
454 if (!subnet_matches_list(&st
->networks
,source
)) {
456 s
=ipaddr_to_string(source
);
457 d
=ipaddr_to_string(dest
);
458 Message(M_WARNING
,"%s: outgoing packet with bad source address "
459 "(s=%s,d=%s)\n",st
->name
,s
,d
);
466 /* If this is a point-to-point device we don't examine the packet at
467 all; we blindly send it down our one-and-only registered tunnel,
468 or to the host, depending on where it came from. */
471 st
->deliver_to_host(st
->dst
,NULL
,buf
);
473 st
->clients
->deliver(st
->clients
->dst
,NULL
,buf
);
475 BUF_ASSERT_FREE(buf
);
479 /* (st->secnet_address needs checking before matching destination
481 if (dest
==st
->secnet_address
) {
482 netlink_packet_local(st
,client
,buf
);
483 BUF_ASSERT_FREE(buf
);
487 /* Check for free routing */
488 if (!subnet_matches_list(&st
->networks
,dest
)) {
490 s
=ipaddr_to_string(source
);
491 d
=ipaddr_to_string(dest
);
492 Message(M_WARNING
,"%s: incoming packet from tunnel %s "
493 "with bad destination address "
494 "(s=%s,d=%s)\n",st
->name
,client
->name
,s
,d
);
500 netlink_packet_forward(st
,client
,buf
);
501 BUF_ASSERT_FREE(buf
);
504 static void netlink_set_softlinks(struct netlink
*st
, struct netlink_client
*c
,
505 bool_t up
, uint32_t quality
)
509 if (!st
->routes
) return; /* Table has not yet been created */
510 for (i
=0; i
<st
->n_routes
; i
++) {
511 if (st
->routes
[i
].c
==c
) {
512 st
->routes
[i
].quality
=quality
;
513 if (!st
->routes
[i
].hard
) {
515 st
->set_route(st
->dst
,&st
->routes
[i
]);
521 static void netlink_set_quality(void *sst
, void *cid
, uint32_t quality
)
523 struct netlink
*st
=sst
;
524 struct netlink_client
*c
=cid
;
526 c
->link_quality
=quality
;
527 if (c
->link_quality
==LINK_QUALITY_DOWN
) {
528 netlink_set_softlinks(st
,c
,False
,c
->link_quality
);
530 netlink_set_softlinks(st
,c
,True
,c
->link_quality
);
534 static void *netlink_regnets(void *sst
, struct subnet_list
*nets
,
535 netlink_deliver_fn
*deliver
, void *dst
,
536 uint32_t max_start_pad
, uint32_t max_end_pad
,
537 uint32_t options
, string_t client_name
)
539 struct netlink
*st
=sst
;
540 struct netlink_client
*c
;
542 Message(M_DEBUG_CONFIG
,"netlink_regnets: request for %d networks, "
543 "max_start_pad=%d, max_end_pad=%d\n",
544 nets
->entries
,max_start_pad
,max_end_pad
);
546 if ((options
&NETLINK_OPTION_SOFTROUTE
) && !st
->set_route
) {
547 Message(M_ERROR
,"%s: this netlink device does not support "
552 if (options
&NETLINK_OPTION_SOFTROUTE
) {
553 /* XXX for now we assume that soft routes require root privilege;
554 this may not always be true. The device driver can tell us. */
555 require_root_privileges
=True
;
556 require_root_privileges_explanation
="netlink: soft routes";
559 /* Check that nets do not intersect st->exclude_remote_networks;
560 refuse to register if they do. */
561 if (subnet_lists_intersect(&st
->exclude_remote_networks
,nets
)) {
562 Message(M_ERROR
,"%s: site %s specifies networks that "
563 "intersect with the explicitly excluded remote networks\n",
564 st
->name
,client_name
);
568 if (st
->clients
&& st
->ptp
) {
569 fatal("%s: only one site may use a point-to-point netlink device\n",
574 c
=safe_malloc(sizeof(*c
),"netlink_regnets");
580 c
->link_quality
=LINK_QUALITY_DOWN
;
583 if (max_start_pad
> st
->max_start_pad
) st
->max_start_pad
=max_start_pad
;
584 if (max_end_pad
> st
->max_end_pad
) st
->max_end_pad
=max_end_pad
;
585 st
->n_routes
+=nets
->entries
;
590 static void netlink_dump_routes(struct netlink
*st
, bool_t requested
)
596 if (requested
) c
=M_WARNING
;
597 Message(c
,"%s: routing table:\n",st
->name
);
598 for (i
=0; i
<st
->n_routes
; i
++) {
599 net
=subnet_to_string(&st
->routes
[i
].net
);
600 Message(c
,"%s -> tunnel %s (%s,%s route,%s,quality %d)\n",net
,
601 st
->routes
[i
].c
->name
,
602 st
->routes
[i
].hard?
"hard":"soft",
603 st
->routes
[i
].allow_route?
"free":"restricted",
604 st
->routes
[i
].up?
"up":"down",
605 st
->routes
[i
].quality
);
608 Message(c
,"%s/32 -> netlink \"%s\"\n",
609 ipaddr_to_string(st
->secnet_address
),st
->name
);
610 for (i
=0; i
<st
->networks
.entries
; i
++) {
611 net
=subnet_to_string(&st
->networks
.list
[i
]);
612 Message(c
,"%s -> host\n",net
);
617 static int netlink_compare_route_specificity(const void *ap
, const void *bp
)
619 const struct netlink_route
*a
=ap
;
620 const struct netlink_route
*b
=bp
;
622 if (a
->net
.len
==b
->net
.len
) return 0;
623 if (a
->net
.len
<b
->net
.len
) return 1;
627 static void netlink_phase_hook(void *sst
, uint32_t new_phase
)
629 struct netlink
*st
=sst
;
630 struct netlink_client
*c
;
633 if (!st
->clients
&& st
->ptp
) {
634 /* Point-to-point netlink devices must have precisely one
635 client. If none has registered by now, complain. */
636 fatal("%s: point-to-point netlink devices must have precisely "
637 "one client. This one doesn't have any.\n",st
->name
);
640 /* All the networks serviced by the various tunnels should now
641 * have been registered. We build a routing table by sorting the
642 * routes into most-specific-first order. */
643 st
->routes
=safe_malloc(st
->n_routes
*sizeof(*st
->routes
),
644 "netlink_phase_hook");
647 for (c
=st
->clients
; c
; c
=c
->next
) {
648 for (j
=0; j
<c
->networks
->entries
; j
++) {
649 st
->routes
[i
].net
=c
->networks
->list
[j
];
651 /* Hard routes are always up;
652 soft routes default to down */
653 st
->routes
[i
].up
=c
->options
&NETLINK_OPTION_SOFTROUTE?False
:True
;
654 st
->routes
[i
].kup
=False
;
655 st
->routes
[i
].hard
=c
->options
&NETLINK_OPTION_SOFTROUTE?False
:True
;
656 st
->routes
[i
].allow_route
=c
->options
&NETLINK_OPTION_ALLOW_ROUTE?
658 st
->routes
[i
].quality
=c
->link_quality
;
662 /* ASSERT i==st->n_routes */
663 if (i
!=st
->n_routes
) {
664 fatal("netlink: route count error: expected %d got %d\n",
667 /* Sort the table in descending order of specificity */
668 qsort(st
->routes
,st
->n_routes
,sizeof(*st
->routes
),
669 netlink_compare_route_specificity
);
671 netlink_dump_routes(st
,False
);
674 static void netlink_signal_handler(void *sst
, int signum
)
676 struct netlink
*st
=sst
;
677 Message(M_INFO
,"%s: route dump requested by SIGUSR1\n",st
->name
);
678 netlink_dump_routes(st
,True
);
681 netlink_deliver_fn
*netlink_init(struct netlink
*st
,
682 void *dst
, struct cloc loc
,
683 dict_t
*dict
, string_t description
,
684 netlink_route_fn
*set_route
,
685 netlink_deliver_fn
*to_host
)
690 st
->cl
.description
=description
;
691 st
->cl
.type
=CL_NETLINK
;
693 st
->cl
.interface
=&st
->ops
;
695 st
->ops
.regnets
=netlink_regnets
;
696 st
->ops
.deliver
=netlink_incoming
;
697 st
->ops
.set_quality
=netlink_set_quality
;
701 st
->set_route
=set_route
;
702 st
->deliver_to_host
=to_host
;
704 st
->name
=dict_read_string(dict
,"name",False
,"netlink",loc
);
705 if (!st
->name
) st
->name
=description
;
706 dict_read_subnet_list(dict
, "networks", True
, "netlink", loc
,
708 dict_read_subnet_list(dict
, "exclude-remote-networks", False
, "netlink",
709 loc
, &st
->exclude_remote_networks
);
710 /* secnet-address does not have to be in local-networks;
711 however, it should be advertised in the 'sites' file for the
713 sa
=dict_find_item(dict
,"secnet-address",False
,"netlink",loc
);
714 ptpa
=dict_find_item(dict
,"ptp-address", False
, "netlink", loc
);
716 cfgfatal(loc
,st
->name
,"you may not specify secnet-address and "
717 "ptp-address in the same netlink device\n");
720 cfgfatal(loc
,st
->name
,"you must specify secnet-address or "
721 "ptp-address for this netlink device\n");
724 st
->secnet_address
=string_to_ipaddr(sa
,"netlink");
727 st
->secnet_address
=string_to_ipaddr(ptpa
,"netlink");
730 st
->mtu
=dict_read_number(dict
, "mtu", False
, "netlink", loc
, DEFAULT_MTU
);
731 buffer_new(&st
->icmp
,ICMP_BUFSIZE
);
735 add_hook(PHASE_SETUP
,netlink_phase_hook
,st
);
736 request_signal_notification(SIGUSR1
, netlink_signal_handler
, st
);
738 return netlink_incoming
;
741 /* No connection to the kernel at all... */
747 static bool_t
null_set_route(void *sst
, struct netlink_route
*route
)
752 if (route
->up
!=route
->kup
) {
753 t
=subnet_to_string(&route
->net
);
754 Message(M_INFO
,"%s: setting route %s to state %s\n",st
->nl
.name
,
755 t
, route
->up?
"up":"down");
757 route
->kup
=route
->up
;
763 static void null_deliver(void *sst
, void *cid
, struct buffer_if
*buf
)
768 static list_t
*null_apply(closure_t
*self
, struct cloc loc
, dict_t
*context
,
775 st
=safe_malloc(sizeof(*st
),"null_apply");
777 item
=list_elem(args
,0);
778 if (!item
|| item
->type
!=t_dict
)
779 cfgfatal(loc
,"null-netlink","parameter must be a dictionary\n");
781 dict
=item
->data
.dict
;
783 netlink_init(&st
->nl
,st
,loc
,dict
,"null-netlink",null_set_route
,
786 return new_closure(&st
->nl
.cl
);
789 init_module netlink_module
;
790 void netlink_module(dict_t
*dict
)
792 add_closure(dict
,"null-netlink",null_apply
);