Import release 0.1.1
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* We support a variety of methods for extracting packets from the
4 kernel: userv-ipif, ipif on its own (when we run as root), the
5 kernel TUN driver. Possible future methods: SLIP to a pty, an
6 external netlink daemon. There is a performance/security
7 tradeoff. */
8
9 /* When dealing with SLIP (to a pty, or ipif) we have separate rx, tx
10 and client buffers. When receiving we may read() any amount, not
11 just whole packets. When transmitting we need to bytestuff anyway,
12 and may be part-way through receiving. */
13
14 /* Each netlink device is actually a router, with its own IP address.
15 We do things like decreasing the TTL and recalculating the header
16 checksum, generating ICMP, responding to pings, etc. */
17
18 /* This is where we have the anti-spoofing paranoia - before sending a
19 packet to the kernel we check that the tunnel it came over could
20 reasonably have produced it. */
21
22 #include "secnet.h"
23 #include <stdio.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <fcntl.h>
27 #include <sys/ioctl.h>
28 #include "util.h"
29
30 #ifdef HAVE_LINUX_IF_H
31 #include <linux/if.h>
32 #include <linux/if_tun.h>
33 #endif
34
35 /* XXX where do we find if_tun on other platforms? */
36
37 #define DEFAULT_BUFSIZE 2048
38 #define DEFAULT_MTU 1000
39 #define ICMP_BUFSIZE 1024
40
41 #define SLIP_END 192
42 #define SLIP_ESC 219
43 #define SLIP_ESCEND 220
44 #define SLIP_ESCESC 221
45
46 struct netlink_client {
47 struct subnet_list *networks;
48 netlink_deliver_fn *deliver;
49 void *dst;
50 string_t name;
51 uint32_t link_quality;
52 struct netlink_client *next;
53 };
54
55 struct netlink_route {
56 struct subnet net;
57 struct netlink_client *c;
58 };
59
60 /* Netlink provides one function to the device driver, to call to deliver
61 a packet from the device. The device driver provides one function to
62 netlink, for it to call to deliver a packet to the device. */
63
64 struct netlink {
65 closure_t cl;
66 struct netlink_if ops;
67 void *dst; /* Pointer to host interface state */
68 string_t name;
69 uint32_t max_start_pad;
70 uint32_t max_end_pad;
71 struct subnet_list networks;
72 struct subnet_list exclude_remote_networks;
73 uint32_t local_address; /* host interface address */
74 uint32_t secnet_address; /* our own address */
75 uint32_t mtu;
76 struct netlink_client *clients;
77 netlink_deliver_fn *deliver_to_host; /* Provided by driver */
78 struct buffer_if icmp; /* Buffer for assembly of outgoing ICMP */
79 uint32_t n_routes; /* How many routes do we know about? */
80 struct netlink_route *routes;
81 };
82
83 /* Generic IP checksum routine */
84 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
85 {
86 register uint32_t sum=0;
87
88 while (count>1) {
89 sum+=ntohs(*(uint16_t *)iph);
90 iph+=2;
91 count-=2;
92 }
93 if(count>0)
94 sum+=*(uint8_t *)iph;
95 while (sum>>16)
96 sum=(sum&0xffff)+(sum>>16);
97 return htons(~sum);
98 }
99
100 #ifdef i386
101 /*
102 * This is a version of ip_compute_csum() optimized for IP headers,
103 * which always checksum on 4 octet boundaries.
104 *
105 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
106 * Arnt Gulbrandsen.
107 */
108 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
109 uint32_t sum;
110
111 __asm__ __volatile__("
112 movl (%1), %0
113 subl $4, %2
114 jbe 2f
115 addl 4(%1), %0
116 adcl 8(%1), %0
117 adcl 12(%1), %0
118 1: adcl 16(%1), %0
119 lea 4(%1), %1
120 decl %2
121 jne 1b
122 adcl $0, %0
123 movl %0, %2
124 shrl $16, %0
125 addw %w2, %w0
126 adcl $0, %0
127 notl %0
128 2:
129 "
130 /* Since the input registers which are loaded with iph and ipl
131 are modified, we must also specify them as outputs, or gcc
132 will assume they contain their original values. */
133 : "=r" (sum), "=r" (iph), "=r" (ihl)
134 : "1" (iph), "2" (ihl));
135 return sum;
136 }
137 #else
138 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
139 {
140 return ip_csum(iph,ihl*4);
141 }
142 #endif
143
144 struct iphdr {
145 #if defined (WORDS_BIGENDIAN)
146 uint8_t version:4,
147 ihl:4;
148 #else
149 uint8_t ihl:4,
150 version:4;
151 #endif
152 uint8_t tos;
153 uint16_t tot_len;
154 uint16_t id;
155 uint16_t frag_off;
156 uint8_t ttl;
157 uint8_t protocol;
158 uint16_t check;
159 uint32_t saddr;
160 uint32_t daddr;
161 /* The options start here. */
162 };
163
164 struct icmphdr {
165 struct iphdr iph;
166 uint8_t type;
167 uint8_t code;
168 uint16_t check;
169 union {
170 uint32_t unused;
171 struct {
172 uint8_t pointer;
173 uint8_t unused1;
174 uint16_t unused2;
175 } pprob;
176 uint32_t gwaddr;
177 struct {
178 uint16_t id;
179 uint16_t seq;
180 } echo;
181 } d;
182 };
183
184 static void netlink_packet_deliver(struct netlink *st,
185 struct netlink_client *client,
186 struct buffer_if *buf);
187
188 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
189 uint32_t dest,uint16_t len)
190 {
191 struct icmphdr *h;
192
193 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
194 buffer_init(&st->icmp,st->max_start_pad);
195 h=buf_append(&st->icmp,sizeof(*h));
196
197 h->iph.version=4;
198 h->iph.ihl=5;
199 h->iph.tos=0;
200 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
201 h->iph.id=0;
202 h->iph.frag_off=0;
203 h->iph.ttl=255;
204 h->iph.protocol=1;
205 h->iph.saddr=htonl(st->secnet_address);
206 h->iph.daddr=htonl(dest);
207 h->iph.check=0;
208 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
209 h->check=0;
210 h->d.unused=0;
211
212 return h;
213 }
214
215 /* Fill in the ICMP checksum field correctly */
216 static void netlink_icmp_csum(struct icmphdr *h)
217 {
218 uint32_t len;
219
220 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
221 h->check=0;
222 h->check=ip_csum(&h->type,len);
223 }
224
225 /* RFC1122:
226 * An ICMP error message MUST NOT be sent as the result of
227 * receiving:
228 *
229 * * an ICMP error message, or
230 *
231 * * a datagram destined to an IP broadcast or IP multicast
232 * address, or
233 *
234 * * a datagram sent as a link-layer broadcast, or
235 *
236 * * a non-initial fragment, or
237 *
238 * * a datagram whose source address does not define a single
239 * host -- e.g., a zero address, a loopback address, a
240 * broadcast address, a multicast address, or a Class E
241 * address.
242 */
243 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
244 {
245 struct iphdr *iph;
246 uint32_t source;
247
248 iph=(struct iphdr *)buf->start;
249 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
250 eg. icmp echo-request */
251 /* How do we spot broadcast destination addresses? */
252 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
253 source=ntohl(iph->saddr);
254 if (source==0) return False;
255 if ((source&0xff000000)==0x7f000000) return False;
256 /* How do we spot broadcast source addresses? */
257 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
258 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
259 return True;
260 }
261
262 /* How much of the original IP packet do we include in its ICMP
263 response? The header plus up to 64 bits. */
264 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
265 {
266 struct iphdr *iph=(struct iphdr *)buf->start;
267 uint16_t hlen,plen;
268
269 hlen=iph->ihl*4;
270 /* We include the first 8 bytes of the packet data, provided they exist */
271 hlen+=8;
272 plen=ntohs(iph->tot_len);
273 return (hlen>plen?plen:hlen);
274 }
275
276 /* client indicates where the packet we're constructing a response to
277 comes from. NULL indicates the host. */
278 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
279 struct netlink_client *client,
280 uint8_t type, uint8_t code)
281 {
282 struct iphdr *iph=(struct iphdr *)buf->start;
283 struct icmphdr *h;
284 uint16_t len;
285
286 if (netlink_icmp_may_reply(buf)) {
287 len=netlink_icmp_reply_len(buf);
288 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
289 h->type=type; h->code=code;
290 memcpy(buf_append(&st->icmp,len),buf->start,len);
291 netlink_icmp_csum(h);
292 netlink_packet_deliver(st,NULL,&st->icmp);
293 BUF_ASSERT_FREE(&st->icmp);
294 }
295 }
296
297 /*
298 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
299 * checksum.
300 *
301 * Is the datagram acceptable?
302 *
303 * 1. Length at least the size of an ip header
304 * 2. Version of 4
305 * 3. Checksums correctly.
306 * 4. Doesn't have a bogus length
307 */
308 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
309 {
310 struct iphdr *iph=(struct iphdr *)buf->start;
311 uint32_t len;
312
313 if (iph->ihl < 5 || iph->version != 4) {
314 printf("ihl/version check failed\n");
315 return False;
316 }
317 if (buf->size < iph->ihl*4) {
318 printf("buffer size check failed\n");
319 return False;
320 }
321 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) {
322 printf("checksum failed\n");
323 return False;
324 }
325 len=ntohs(iph->tot_len);
326 /* There should be no padding */
327 if (buf->size!=len || len<(iph->ihl<<2)) {
328 printf("length check failed buf->size=%d len=%d\n",buf->size,len);
329 return False;
330 }
331
332 /* XXX check that there's no source route specified */
333 return True;
334 }
335
336 /* Deliver a packet. "client" points to the _origin_ of the packet, not
337 its destination. (May be used when sending ICMP response - avoid
338 asymmetric routing.) */
339 static void netlink_packet_deliver(struct netlink *st,
340 struct netlink_client *client,
341 struct buffer_if *buf)
342 {
343 struct iphdr *iph=(struct iphdr *)buf->start;
344 uint32_t dest=ntohl(iph->daddr);
345 uint32_t source=ntohl(iph->saddr);
346 uint32_t best_quality;
347 int best_match;
348 int i;
349
350 BUF_ASSERT_USED(buf);
351
352 if (dest==st->secnet_address) {
353 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
354 BUF_FREE(buf);
355 return;
356 }
357
358 if (!client) {
359 /* Origin of packet is host or secnet. Might be for a tunnel. */
360 best_quality=0;
361 best_match=-1;
362 for (i=0; i<st->n_routes; i++) {
363 if (subnet_match(&st->routes[i].net,dest)) {
364 if (st->routes[i].c->link_quality>best_quality
365 || best_quality==0) {
366 best_quality=st->routes[i].c->link_quality;
367 best_match=i;
368 /* If quality isn't perfect we may wish to
369 consider kicking the tunnel with a 0-length
370 packet to prompt it to perform a key setup.
371 Then it'll eventually decide it's up or
372 down. */
373 /* If quality is perfect we don't need to search
374 any more. */
375 if (best_quality>=MAXIMUM_LINK_QUALITY) break;
376 }
377 }
378 }
379 if (best_match==-1) {
380 /* Not going down a tunnel. Might be for the host.
381 XXX think about this - only situation should be if we're
382 sending ICMP. */
383 if (source!=st->secnet_address) {
384 Message(M_ERROR,"netlink_packet_deliver: outgoing packet "
385 "from host that won't fit down any of our tunnels!\n");
386 BUF_FREE(buf);
387 } else {
388 st->deliver_to_host(st->dst,NULL,buf);
389 BUF_ASSERT_FREE(buf);
390 }
391 } else {
392 if (best_quality>0) {
393 st->routes[best_match].c->deliver(
394 st->routes[best_match].c->dst,
395 st->routes[best_match].c, buf);
396 BUF_ASSERT_FREE(buf);
397 } else {
398 /* Generate ICMP destination unreachable */
399 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
400 BUF_FREE(buf);
401 }
402 }
403 } else { /* client is set */
404 /* We know the origin is a tunnel - packet must be for the host */
405 if (subnet_matches_list(&st->networks,dest)) {
406 st->deliver_to_host(st->dst,NULL,buf);
407 BUF_ASSERT_FREE(buf);
408 } else {
409 Message(M_ERROR,"%s: packet from tunnel %s can't be delivered "
410 "to the host\n",st->name,client->name);
411 netlink_icmp_simple(st,buf,client,3,0);
412 BUF_FREE(buf);
413 }
414 }
415 BUF_ASSERT_FREE(buf);
416 }
417
418 static void netlink_packet_forward(struct netlink *st,
419 struct netlink_client *client,
420 struct buffer_if *buf)
421 {
422 struct iphdr *iph=(struct iphdr *)buf->start;
423
424 BUF_ASSERT_USED(buf);
425
426 /* Packet has already been checked */
427 if (iph->ttl<=1) {
428 /* Generate ICMP time exceeded */
429 netlink_icmp_simple(st,buf,client,11,0);
430 BUF_FREE(buf);
431 return;
432 }
433 iph->ttl--;
434 iph->check=0;
435 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
436
437 netlink_packet_deliver(st,client,buf);
438 BUF_ASSERT_FREE(buf);
439 }
440
441 /* Someone has been foolish enough to address a packet to us. I
442 suppose we should reply to it, just to be polite. */
443 static void netlink_packet_local(struct netlink *st,
444 struct netlink_client *client,
445 struct buffer_if *buf)
446 {
447 struct icmphdr *h;
448
449 h=(struct icmphdr *)buf->start;
450
451 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
452 Message(M_WARNING,"%s: fragmented packet addressed to us\n",st->name);
453 BUF_FREE(buf);
454 return;
455 }
456
457 if (h->iph.protocol==1) {
458 /* It's ICMP */
459 if (h->type==8 && h->code==0) {
460 /* ICMP echo-request. Special case: we re-use the buffer
461 to construct the reply. */
462 h->type=0;
463 h->iph.daddr=h->iph.saddr;
464 h->iph.saddr=htonl(st->secnet_address);
465 h->iph.ttl=255; /* Be nice and bump it up again... */
466 h->iph.check=0;
467 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
468 netlink_icmp_csum(h);
469 netlink_packet_deliver(st,NULL,buf);
470 return;
471 }
472 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
473 } else {
474 /* Send ICMP protocol unreachable */
475 netlink_icmp_simple(st,buf,client,3,2);
476 BUF_FREE(buf);
477 return;
478 }
479
480 BUF_FREE(buf);
481 }
482
483 /* Called by site code when remote packet is available */
484 /* buf is allocated on entry and free on return */
485 static void netlink_from_tunnel(void *sst, void *cst, struct buffer_if *buf)
486 {
487 struct netlink *st=sst;
488 struct netlink_client *client=cst;
489 uint32_t source,dest;
490 struct iphdr *iph;
491
492 BUF_ASSERT_USED(buf);
493 if (!netlink_check(st,buf)) {
494 Message(M_WARNING,"%s: bad IP packet from tunnel %s\n",
495 st->name,client->name);
496 BUF_FREE(buf);
497 return;
498 }
499 iph=(struct iphdr *)buf->start;
500
501 source=ntohl(iph->saddr);
502 dest=ntohl(iph->daddr);
503
504 /* Check that the packet source is in 'nets' and its destination is
505 in st->networks */
506 if (!subnet_matches_list(client->networks,source)) {
507 string_t s,d;
508 s=ipaddr_to_string(source);
509 d=ipaddr_to_string(dest);
510 Message(M_WARNING,"%s: packet from tunnel %s with bad source address "
511 "(s=%s,d=%s)\n",st->name,client->name,s,d);
512 free(s); free(d);
513 BUF_FREE(buf);
514 return;
515 }
516 /* (st->secnet_address needs checking before matching against
517 st->networks because secnet's IP address may not be in the
518 range the host is willing to deal with) */
519 if (dest==st->secnet_address) {
520 netlink_packet_local(st,client,buf);
521 BUF_ASSERT_FREE(buf);
522 return;
523 }
524 if (!subnet_matches_list(&st->networks,dest)) {
525 string_t s,d;
526 s=ipaddr_to_string(source);
527 d=ipaddr_to_string(dest);
528 Message(M_WARNING,"%s: incoming packet from tunnel %s "
529 "with bad destination address "
530 "(s=%s,d=%s)\n",st->name,client->name,s,d);
531 free(s); free(d);
532 BUF_FREE(buf);
533 return;
534 }
535
536 netlink_packet_forward(st,client,buf);
537
538 BUF_ASSERT_FREE(buf);
539 }
540
541 /* Called by driver code when packet is received from kernel */
542 /* cid should be NULL */
543 /* buf should be allocated on entry, and is free on return */
544 static void netlink_from_host(void *sst, void *cid, struct buffer_if *buf)
545 {
546 struct netlink *st=sst;
547 uint32_t source,dest;
548 struct iphdr *iph;
549
550 BUF_ASSERT_USED(buf);
551 if (!netlink_check(st,buf)) {
552 Message(M_WARNING,"%s: bad IP packet from host\n",
553 st->name);
554 BUF_FREE(buf);
555 return;
556 }
557 iph=(struct iphdr *)buf->start;
558
559 source=ntohl(iph->saddr);
560 dest=ntohl(iph->daddr);
561
562 if (!subnet_matches_list(&st->networks,source)) {
563 string_t s,d;
564 s=ipaddr_to_string(source);
565 d=ipaddr_to_string(dest);
566 Message(M_WARNING,"%s: outgoing packet with bad source address "
567 "(s=%s,d=%s)\n",st->name,s,d);
568 free(s); free(d);
569 BUF_FREE(buf);
570 return;
571 }
572 if (dest==st->secnet_address) {
573 netlink_packet_local(st,NULL,buf);
574 BUF_ASSERT_FREE(buf);
575 return;
576 }
577 netlink_packet_forward(st,NULL,buf);
578 BUF_ASSERT_FREE(buf);
579 }
580
581 static void netlink_set_quality(void *sst, void *cid, uint32_t quality)
582 {
583 struct netlink_client *c=cid;
584
585 c->link_quality=quality;
586 }
587
588 static void *netlink_regnets(void *sst, struct subnet_list *nets,
589 netlink_deliver_fn *deliver, void *dst,
590 uint32_t max_start_pad, uint32_t max_end_pad,
591 string_t client_name)
592 {
593 struct netlink *st=sst;
594 struct netlink_client *c;
595
596 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
597 "max_start_pad=%d, max_end_pad=%d\n",
598 nets->entries,max_start_pad,max_end_pad);
599
600 /* Check that nets does not intersect with st->networks or
601 st->exclude_remote_networks; refuse to register if it does. */
602 if (subnet_lists_intersect(&st->networks,nets)) {
603 Message(M_ERROR,"%s: site %s specifies networks that "
604 "intersect with our local networks\n",st->name,client_name);
605 return False;
606 }
607 if (subnet_lists_intersect(&st->exclude_remote_networks,nets)) {
608 Message(M_ERROR,"%s: site %s specifies networks that "
609 "intersect with the explicitly excluded remote networks\n",
610 st->name,client_name);
611 return False;
612 }
613
614 c=safe_malloc(sizeof(*c),"netlink_regnets");
615 c->networks=nets;
616 c->deliver=deliver;
617 c->dst=dst;
618 c->name=client_name; /* XXX copy it? */
619 c->link_quality=LINK_QUALITY_DOWN;
620 c->next=st->clients;
621 st->clients=c;
622 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
623 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
624 st->n_routes+=nets->entries;
625
626 return c;
627 }
628
629 static int netlink_compare_route_specificity(const void *ap, const void *bp)
630 {
631 const struct netlink_route *a=ap;
632 const struct netlink_route *b=bp;
633
634 if (a->net.len==b->net.len) return 0;
635 if (a->net.len<b->net.len) return 1;
636 return -1;
637 }
638
639 static void netlink_phase_hook(void *sst, uint32_t new_phase)
640 {
641 struct netlink *st=sst;
642 struct netlink_client *c;
643 uint32_t i,j;
644
645 /* All the networks serviced by the various tunnels should now
646 * have been registered. We build a routing table by sorting the
647 * routes into most-specific-first order. */
648 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
649 "netlink_phase_hook");
650 /* Fill the table */
651 i=0;
652 for (c=st->clients; c; c=c->next) {
653 for (j=0; j<c->networks->entries; j++) {
654 st->routes[i].net=c->networks->list[j];
655 st->routes[i].c=c;
656 i++;
657 }
658 }
659 /* ASSERT i==st->n_routes */
660 if (i!=st->n_routes) {
661 fatal("netlink: route count error: expected %d got %d\n",
662 st->n_routes,i);
663 }
664 /* Sort the table in descending order of specificity */
665 qsort(st->routes,st->n_routes,sizeof(*st->routes),
666 netlink_compare_route_specificity);
667 Message(M_INFO,"%s: routing table:\n",st->name);
668 for (i=0; i<st->n_routes; i++) {
669 string_t net;
670 net=subnet_to_string(&st->routes[i].net);
671 Message(M_INFO,"%s -> tunnel %s\n",net,st->routes[i].c->name);
672 free(net);
673 }
674 Message(M_INFO,"%s/32 -> netlink \"%s\"\n",
675 ipaddr_to_string(st->secnet_address),st->name);
676 for (i=0; i<st->networks.entries; i++) {
677 string_t net;
678 net=subnet_to_string(&st->networks.list[i]);
679 Message(M_INFO,"%s -> host\n",net);
680 free(net);
681 }
682 }
683
684 static netlink_deliver_fn *netlink_init(struct netlink *st,
685 void *dst, struct cloc loc,
686 dict_t *dict, string_t description,
687 netlink_deliver_fn *to_host)
688 {
689 st->dst=dst;
690 st->cl.description=description;
691 st->cl.type=CL_NETLINK;
692 st->cl.apply=NULL;
693 st->cl.interface=&st->ops;
694 st->ops.st=st;
695 st->ops.regnets=netlink_regnets;
696 st->ops.deliver=netlink_from_tunnel;
697 st->ops.set_quality=netlink_set_quality;
698 st->max_start_pad=0;
699 st->max_end_pad=0;
700 st->clients=NULL;
701 st->deliver_to_host=to_host;
702
703 st->name=dict_read_string(dict,"name",False,"netlink",loc);
704 if (!st->name) st->name=description;
705 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
706 &st->networks);
707 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
708 loc, &st->exclude_remote_networks);
709 /* local-address and secnet-address do not have to be in local-networks;
710 however, they should be advertised in the 'sites' file for the
711 local site. */
712 st->local_address=string_to_ipaddr(
713 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
714 st->secnet_address=string_to_ipaddr(
715 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
716 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
717 buffer_new(&st->icmp,ICMP_BUFSIZE);
718 st->n_routes=0;
719 st->routes=NULL;
720
721 add_hook(PHASE_SETUP,netlink_phase_hook,st);
722
723 return netlink_from_host;
724 }
725
726 /* Connection to the kernel through userv-ipif */
727
728 struct userv {
729 struct netlink nl;
730 int txfd; /* We transmit to userv */
731 int rxfd; /* We receive from userv */
732 string_t userv_path;
733 string_t service_user;
734 string_t service_name;
735 uint32_t txbuflen;
736 struct buffer_if *buff; /* We unstuff received packets into here
737 and send them to the site code. */
738 bool_t pending_esc;
739 netlink_deliver_fn *netlink_to_tunnel;
740 };
741
742 static int userv_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
743 int *timeout_io, const struct timeval *tv_now,
744 uint64_t *now)
745 {
746 struct userv *st=sst;
747 *nfds_io=2;
748 fds[0].fd=st->txfd;
749 fds[0].events=POLLERR; /* Might want to pick up POLLOUT sometime */
750 fds[1].fd=st->rxfd;
751 fds[1].events=POLLIN|POLLERR|POLLHUP;
752 return 0;
753 }
754
755 static void userv_afterpoll(void *sst, struct pollfd *fds, int nfds,
756 const struct timeval *tv_now, uint64_t *now)
757 {
758 struct userv *st=sst;
759 uint8_t rxbuf[DEFAULT_BUFSIZE];
760 int l,i;
761
762 if (fds[1].revents&POLLERR) {
763 printf("userv_afterpoll: hup!\n");
764 }
765 if (fds[1].revents&POLLIN) {
766 l=read(st->rxfd,rxbuf,DEFAULT_BUFSIZE);
767 if (l<0) {
768 fatal_perror("userv_afterpoll: read(rxfd)");
769 }
770 if (l==0) {
771 fatal("userv_afterpoll: read(rxfd)=0; userv gone away?\n");
772 }
773 /* XXX really crude unstuff code */
774 /* XXX check for buffer overflow */
775 BUF_ASSERT_USED(st->buff);
776 for (i=0; i<l; i++) {
777 if (st->pending_esc) {
778 st->pending_esc=False;
779 switch(rxbuf[i]) {
780 case SLIP_ESCEND:
781 *(uint8_t *)buf_append(st->buff,1)=SLIP_END;
782 break;
783 case SLIP_ESCESC:
784 *(uint8_t *)buf_append(st->buff,1)=SLIP_ESC;
785 break;
786 default:
787 fatal("userv_afterpoll: bad SLIP escape character\n");
788 }
789 } else {
790 switch (rxbuf[i]) {
791 case SLIP_END:
792 if (st->buff->size>0) {
793 st->netlink_to_tunnel(&st->nl,NULL,
794 st->buff);
795 BUF_ALLOC(st->buff,"userv_afterpoll");
796 }
797 buffer_init(st->buff,st->nl.max_start_pad);
798 break;
799 case SLIP_ESC:
800 st->pending_esc=True;
801 break;
802 default:
803 *(uint8_t *)buf_append(st->buff,1)=rxbuf[i];
804 break;
805 }
806 }
807 }
808 }
809 }
810
811 /* Send buf to the kernel. Free buf before returning. */
812 static void userv_deliver_to_kernel(void *sst, void *cid,
813 struct buffer_if *buf)
814 {
815 struct userv *st=sst;
816 uint8_t txbuf[DEFAULT_BUFSIZE];
817 uint8_t *i;
818 uint32_t j;
819
820 BUF_ASSERT_USED(buf);
821
822 /* Spit the packet at userv-ipif: SLIP start marker, then
823 bytestuff the packet, then SLIP end marker */
824 /* XXX crunchy bytestuff code */
825 j=0;
826 txbuf[j++]=SLIP_END;
827 for (i=buf->start; i<(buf->start+buf->size); i++) {
828 switch (*i) {
829 case SLIP_END:
830 txbuf[j++]=SLIP_ESC;
831 txbuf[j++]=SLIP_ESCEND;
832 break;
833 case SLIP_ESC:
834 txbuf[j++]=SLIP_ESC;
835 txbuf[j++]=SLIP_ESCESC;
836 break;
837 default:
838 txbuf[j++]=*i;
839 break;
840 }
841 }
842 txbuf[j++]=SLIP_END;
843 if (write(st->txfd,txbuf,j)<0) {
844 fatal_perror("userv_deliver_to_kernel: write()");
845 }
846 BUF_FREE(buf);
847 }
848
849 static void userv_phase_hook(void *sst, uint32_t newphase)
850 {
851 struct userv *st=sst;
852 pid_t child;
853 int c_stdin[2];
854 int c_stdout[2];
855 string_t addrs;
856 string_t nets;
857 string_t s;
858 struct netlink_client *c;
859 int i;
860
861 /* This is where we actually invoke userv - all the networks we'll
862 be using should already have been registered. */
863
864 addrs=safe_malloc(512,"userv_phase_hook:addrs");
865 snprintf(addrs,512,"%s,%s,%d,slip",ipaddr_to_string(st->nl.local_address),
866 ipaddr_to_string(st->nl.secnet_address),st->nl.mtu);
867
868 nets=safe_malloc(1024,"userv_phase_hook:nets");
869 *nets=0;
870 for (c=st->nl.clients; c; c=c->next) {
871 for (i=0; i<c->networks->entries; i++) {
872 s=subnet_to_string(&c->networks->list[i]);
873 strcat(nets,s);
874 strcat(nets,",");
875 free(s);
876 }
877 }
878 nets[strlen(nets)-1]=0;
879
880 Message(M_INFO,"\nuserv_phase_hook: %s %s %s %s %s\n",st->userv_path,
881 st->service_user,st->service_name,addrs,nets);
882
883 /* Allocate buffer, plus space for padding. Make sure we end up
884 with the start of the packet well-aligned. */
885 /* ALIGN(st->max_start_pad,16); */
886 /* ALIGN(st->max_end_pad,16); */
887
888 st->pending_esc=False;
889
890 /* Invoke userv */
891 if (pipe(c_stdin)!=0) {
892 fatal_perror("userv_phase_hook: pipe(c_stdin)");
893 }
894 if (pipe(c_stdout)!=0) {
895 fatal_perror("userv_phase_hook: pipe(c_stdout)");
896 }
897 st->txfd=c_stdin[1];
898 st->rxfd=c_stdout[0];
899
900 child=fork();
901 if (child==-1) {
902 fatal_perror("userv_phase_hook: fork()");
903 }
904 if (child==0) {
905 char **argv;
906
907 /* We are the child. Modify our stdin and stdout, then exec userv */
908 dup2(c_stdin[0],0);
909 dup2(c_stdout[1],1);
910 close(c_stdin[1]);
911 close(c_stdout[0]);
912
913 /* The arguments are:
914 userv
915 service-user
916 service-name
917 local-addr,secnet-addr,mtu,protocol
918 route1,route2,... */
919 argv=malloc(sizeof(*argv)*6);
920 argv[0]=st->userv_path;
921 argv[1]=st->service_user;
922 argv[2]=st->service_name;
923 argv[3]=addrs;
924 argv[4]=nets;
925 argv[5]=NULL;
926 execvp(st->userv_path,argv);
927 perror("netlink-userv-ipif: execvp");
928
929 exit(1);
930 }
931 /* We are the parent... */
932
933 /* Register for poll() */
934 register_for_poll(st, userv_beforepoll, userv_afterpoll, 2, st->nl.name);
935 }
936
937 static list_t *userv_apply(closure_t *self, struct cloc loc, dict_t *context,
938 list_t *args)
939 {
940 struct userv *st;
941 item_t *item;
942 dict_t *dict;
943
944 st=safe_malloc(sizeof(*st),"userv_apply");
945
946 /* First parameter must be a dict */
947 item=list_elem(args,0);
948 if (!item || item->type!=t_dict)
949 cfgfatal(loc,"userv-ipif","parameter must be a dictionary\n");
950
951 dict=item->data.dict;
952
953 st->netlink_to_tunnel=
954 netlink_init(&st->nl,st,loc,dict,
955 "netlink-userv-ipif",userv_deliver_to_kernel);
956
957 st->userv_path=dict_read_string(dict,"userv-path",False,"userv-netlink",
958 loc);
959 st->service_user=dict_read_string(dict,"service-user",False,
960 "userv-netlink",loc);
961 st->service_name=dict_read_string(dict,"service-name",False,
962 "userv-netlink",loc);
963 if (!st->userv_path) st->userv_path="userv";
964 if (!st->service_user) st->service_user="root";
965 if (!st->service_name) st->service_name="ipif";
966 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"userv-netlink",loc);
967 BUF_ALLOC(st->buff,"netlink:userv_apply");
968
969 st->rxfd=-1; st->txfd=-1;
970 add_hook(PHASE_DROPPRIV,userv_phase_hook,st);
971
972 return new_closure(&st->nl.cl);
973 }
974
975 /* Connection to the kernel through the universal TUN/TAP driver */
976
977 struct tun {
978 struct netlink nl;
979 int fd;
980 string_t device_path;
981 string_t interface_name;
982 string_t ifconfig_path;
983 string_t route_path;
984 bool_t tun_old;
985 bool_t search_for_if; /* Applies to tun-old only */
986 struct buffer_if *buff; /* We receive packets into here
987 and send them to the netlink code. */
988 netlink_deliver_fn *netlink_to_tunnel;
989 };
990
991 static int tun_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
992 int *timeout_io, const struct timeval *tv_now,
993 uint64_t *now)
994 {
995 struct tun *st=sst;
996 *nfds_io=1;
997 fds[0].fd=st->fd;
998 fds[0].events=POLLIN|POLLERR|POLLHUP;
999 return 0;
1000 }
1001
1002 static void tun_afterpoll(void *sst, struct pollfd *fds, int nfds,
1003 const struct timeval *tv_now, uint64_t *now)
1004 {
1005 struct tun *st=sst;
1006 int l;
1007
1008 if (fds[0].revents&POLLERR) {
1009 printf("tun_afterpoll: hup!\n");
1010 }
1011 if (fds[0].revents&POLLIN) {
1012 BUF_ALLOC(st->buff,"tun_afterpoll");
1013 buffer_init(st->buff,st->nl.max_start_pad);
1014 l=read(st->fd,st->buff->start,st->buff->len-st->nl.max_start_pad);
1015 if (l<0) {
1016 fatal_perror("tun_afterpoll: read()");
1017 }
1018 if (l==0) {
1019 fatal("tun_afterpoll: read()=0; device gone away?\n");
1020 }
1021 if (l>0) {
1022 st->buff->size=l;
1023 st->netlink_to_tunnel(&st->nl,NULL,st->buff);
1024 BUF_ASSERT_FREE(st->buff);
1025 }
1026 }
1027 }
1028
1029 static void tun_deliver_to_kernel(void *sst, void *cid,
1030 struct buffer_if *buf)
1031 {
1032 struct tun *st=sst;
1033
1034 BUF_ASSERT_USED(buf);
1035
1036 /* No error checking, because we'd just throw the packet away anyway */
1037 write(st->fd,buf->start,buf->size);
1038 BUF_FREE(buf);
1039 }
1040
1041 static void tun_phase_hook(void *sst, uint32_t newphase)
1042 {
1043 struct tun *st=sst;
1044 string_t hostaddr,secnetaddr;
1045 uint8_t mtu[6];
1046 string_t network,mask;
1047 struct netlink_client *c;
1048 int i;
1049
1050 if (st->tun_old) {
1051 if (st->search_for_if) {
1052 string_t dname;
1053 int i;
1054
1055 /* ASSERT st->interface_name */
1056 dname=safe_malloc(strlen(st->device_path)+4,"tun_old_apply");
1057 st->interface_name=safe_malloc(8,"tun_phase_hook");
1058
1059 for (i=0; i<255; i++) {
1060 sprintf(dname,"%s%d",st->device_path,i);
1061 if ((st->fd=open(dname,O_RDWR))>0) {
1062 sprintf(st->interface_name,"tun%d",i);
1063 Message(M_INFO,"%s: allocated network interface %s "
1064 "through %s\n",st->nl.name,st->interface_name,
1065 dname);
1066 break;
1067 }
1068 }
1069 if (st->fd==-1) {
1070 fatal("%s: unable to open any TUN device (%s...)\n",
1071 st->nl.name,st->device_path);
1072 }
1073 } else {
1074 st->fd=open(st->device_path,O_RDWR);
1075 if (st->fd==-1) {
1076 fatal_perror("%s: unable to open TUN device file %s",
1077 st->nl.name,st->device_path);
1078 }
1079 }
1080 } else {
1081 #ifdef HAVE_LINUX_IF_H
1082 struct ifreq ifr;
1083
1084 /* New TUN interface: open the device, then do ioctl TUNSETIFF
1085 to set or find out the network interface name. */
1086 st->fd=open(st->device_path,O_RDWR);
1087 if (st->fd==-1) {
1088 fatal_perror("%s: can't open device file %s",st->nl.name,
1089 st->device_path);
1090 }
1091 memset(&ifr,0,sizeof(ifr));
1092 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; /* Just send/receive IP packets,
1093 no extra headers */
1094 if (st->interface_name)
1095 strncpy(ifr.ifr_name,st->interface_name,IFNAMSIZ);
1096 Message(M_INFO,"%s: about to ioctl(TUNSETIFF)...\n",st->nl.name);
1097 if (ioctl(st->fd,TUNSETIFF,&ifr)<0) {
1098 fatal_perror("%s: ioctl(TUNSETIFF)",st->nl.name);
1099 }
1100 if (!st->interface_name) {
1101 st->interface_name=safe_malloc(strlen(ifr.ifr_name)+1,"tun_apply");
1102 strcpy(st->interface_name,ifr.ifr_name);
1103 Message(M_INFO,"%s: allocated network interface %s\n",st->nl.name,
1104 st->interface_name);
1105 }
1106 #else
1107 fatal("netlink.c:tun_phase_hook:!tun_old unexpected\n");
1108 #endif /* HAVE_LINUX_IF_H */
1109 }
1110 /* All the networks we'll be using have been registered. Invoke ifconfig
1111 to set the TUN device's address, and route to add routes to all
1112 our networks. */
1113
1114 hostaddr=ipaddr_to_string(st->nl.local_address);
1115 secnetaddr=ipaddr_to_string(st->nl.secnet_address);
1116 snprintf(mtu,6,"%d",st->nl.mtu);
1117 mtu[5]=0;
1118
1119 sys_cmd(st->ifconfig_path,"ifconfig",st->interface_name,
1120 hostaddr,"netmask","255.255.255.255","-broadcast",
1121 "pointopoint",secnetaddr,"mtu",mtu,"up",(char *)0);
1122
1123 for (c=st->nl.clients; c; c=c->next) {
1124 for (i=0; i<c->networks->entries; i++) {
1125 network=ipaddr_to_string(c->networks->list[i].prefix);
1126 mask=ipaddr_to_string(c->networks->list[i].mask);
1127 sys_cmd(st->route_path,"route","add","-net",network,
1128 "netmask",mask,"gw",secnetaddr,(char *)0);
1129 }
1130 }
1131
1132 /* Register for poll() */
1133 register_for_poll(st, tun_beforepoll, tun_afterpoll, 1, st->nl.name);
1134 }
1135
1136 #ifdef HAVE_LINUX_IF_H
1137 static list_t *tun_apply(closure_t *self, struct cloc loc, dict_t *context,
1138 list_t *args)
1139 {
1140 struct tun *st;
1141 item_t *item;
1142 dict_t *dict;
1143
1144 st=safe_malloc(sizeof(*st),"tun_apply");
1145
1146 /* First parameter must be a dict */
1147 item=list_elem(args,0);
1148 if (!item || item->type!=t_dict)
1149 cfgfatal(loc,"tun","parameter must be a dictionary\n");
1150
1151 dict=item->data.dict;
1152
1153 st->netlink_to_tunnel=
1154 netlink_init(&st->nl,st,loc,dict,
1155 "netlink-tun",tun_deliver_to_kernel);
1156
1157 st->tun_old=False;
1158 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1159 st->interface_name=dict_read_string(dict,"interface",False,
1160 "tun-netlink",loc);
1161 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",
1162 False,"tun-netlink",loc);
1163 st->route_path=dict_read_string(dict,"route-path",
1164 False,"tun-netlink",loc);
1165
1166 if (!st->device_path) st->device_path="/dev/net/tun";
1167 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1168 if (!st->route_path) st->route_path="route";
1169 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1170
1171 add_hook(PHASE_GETRESOURCES,tun_phase_hook,st);
1172
1173 return new_closure(&st->nl.cl);
1174 }
1175 #endif /* HAVE_LINUX_IF_H */
1176
1177 static list_t *tun_old_apply(closure_t *self, struct cloc loc, dict_t *context,
1178 list_t *args)
1179 {
1180 struct tun *st;
1181 item_t *item;
1182 dict_t *dict;
1183
1184 st=safe_malloc(sizeof(*st),"tun_old_apply");
1185
1186 Message(M_WARNING,"the tun-old code has never been tested. Please report "
1187 "success or failure to steve@greenend.org.uk\n");
1188
1189 /* First parameter must be a dict */
1190 item=list_elem(args,0);
1191 if (!item || item->type!=t_dict)
1192 cfgfatal(loc,"tun","parameter must be a dictionary\n");
1193
1194 dict=item->data.dict;
1195
1196 st->netlink_to_tunnel=
1197 netlink_init(&st->nl,st,loc,dict,
1198 "netlink-tun",tun_deliver_to_kernel);
1199
1200 st->tun_old=True;
1201 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1202 st->interface_name=dict_read_string(dict,"interface",False,
1203 "tun-netlink",loc);
1204 st->search_for_if=dict_read_bool(dict,"interface-search",False,
1205 "tun-netlink",loc,st->device_path==NULL);
1206 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",False,
1207 "tun-netlink",loc);
1208 st->route_path=dict_read_string(dict,"route-path",False,"tun-netlink",loc);
1209
1210 if (!st->device_path) st->device_path="/dev/tun";
1211 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1212 if (!st->route_path) st->route_path="route";
1213 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1214
1215 /* Old TUN interface: the network interface name depends on which
1216 /dev/tunX file we open. If 'interface-search' is set to true, treat
1217 'device' as the prefix and try numbers from 0--255. If it's set
1218 to false, treat 'device' as the whole name, and require than an
1219 appropriate interface name be specified. */
1220 if (st->search_for_if && st->interface_name) {
1221 cfgfatal(loc,"tun-old","you may not specify an interface name "
1222 "in interface-search mode\n");
1223 }
1224 if (!st->search_for_if && !st->interface_name) {
1225 cfgfatal(loc,"tun-old","you must specify an interface name "
1226 "when you explicitly specify a TUN device file\n");
1227 }
1228
1229
1230 add_hook(PHASE_GETRESOURCES,tun_phase_hook,st);
1231
1232 return new_closure(&st->nl.cl);
1233 }
1234
1235 /* No connection to the kernel at all... */
1236
1237 struct null {
1238 struct netlink nl;
1239 };
1240
1241 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
1242 {
1243 return;
1244 }
1245
1246 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1247 list_t *args)
1248 {
1249 struct null *st;
1250 item_t *item;
1251 dict_t *dict;
1252
1253 st=safe_malloc(sizeof(*st),"null_apply");
1254
1255 item=list_elem(args,0);
1256 if (!item || item->type!=t_dict)
1257 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1258
1259 dict=item->data.dict;
1260
1261 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_deliver);
1262
1263 return new_closure(&st->nl.cl);
1264 }
1265
1266 init_module netlink_module;
1267 void netlink_module(dict_t *dict)
1268 {
1269 add_closure(dict,"userv-ipif",userv_apply);
1270 #ifdef HAVE_LINUX_IF_H
1271 add_closure(dict,"tun",tun_apply);
1272 #endif
1273 add_closure(dict,"tun-old",tun_old_apply);
1274 add_closure(dict,"null-netlink",null_apply);
1275 #if 0
1276 /* TODO */
1277 add_closure(dict,"pty-slip",ptyslip_apply);
1278 add_closure(dict,"slipd",slipd_apply);
1279 #endif /* 0 */
1280 }