Import release 0.04
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* We will eventually support a variety of methods for extracting
4 packets from the kernel: userv-ipif, ipif on its own (when we run
5 as root), the kernel TUN driver, SLIP to a pty, an external netlink
6 daemon. There is a performance/security tradeoff. */
7
8 /* When dealing with SLIP (to a pty, or ipif) we have separate rx, tx
9 and client buffers. When receiving we may read() any amount, not
10 just whole packets. When transmitting we need to bytestuff anyway,
11 and may be part-way through receiving. */
12
13 /* Each netlink device is actually a router, with its own IP
14 address. We do things like decreasing the TTL and recalculating the
15 header checksum, generating ICMP, responding to pings, etc. */
16
17 /* This is where we have the anti-spoofing paranoia - before sending a
18 packet to the kernel we check that the tunnel it came over could
19 reasonably have produced it. */
20
21 /* XXX now implement TUN. Kernel needs recompiling. */
22
23 #include <stdio.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <fcntl.h>
27 #include <sys/ioctl.h>
28
29 #include "config.h"
30 #include "secnet.h"
31 #include "util.h"
32
33 #ifdef HAVE_LINUX_IF_H
34 #include <linux/if.h>
35 #include <linux/if_tun.h>
36 #endif
37
38 /* XXX where do we find if_tun on other architectures? */
39
40 #define DEFAULT_BUFSIZE 2048
41 #define DEFAULT_MTU 1000
42 #define ICMP_BUFSIZE 1024
43
44 #define SLIP_END 192
45 #define SLIP_ESC 219
46 #define SLIP_ESCEND 220
47 #define SLIP_ESCESC 221
48
49 struct netlink_client {
50 struct subnet_list *networks;
51 netlink_deliver_fn *deliver;
52 void *dst;
53 string_t name;
54 bool_t can_deliver;
55 struct netlink_client *next;
56 };
57
58 /* Netlink provides one function to the device driver, to call to deliver
59 a packet from the device. The device driver provides one function to
60 netlink, for it to call to deliver a packet to the device. */
61
62 struct netlink {
63 closure_t cl;
64 struct netlink_if ops;
65 void *dst; /* Pointer to host interface state */
66 string_t name;
67 uint32_t max_start_pad;
68 uint32_t max_end_pad;
69 struct subnet_list networks;
70 uint32_t local_address; /* host interface address */
71 uint32_t secnet_address; /* our own address */
72 uint32_t mtu;
73 struct netlink_client *clients;
74 netlink_deliver_fn *deliver_to_host; /* Provided by driver */
75 struct buffer_if icmp; /* Buffer for assembly of outgoing ICMP */
76 };
77
78 /* Generic IP checksum routine */
79 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
80 {
81 register uint32_t sum=0;
82
83 while (count>1) {
84 sum+=ntohs(*(uint16_t *)iph);
85 iph+=2;
86 count-=2;
87 }
88 if(count>0)
89 sum+=*(uint8_t *)iph;
90 while (sum>>16)
91 sum=(sum&0xffff)+(sum>>16);
92 return htons(~sum);
93 }
94
95 #ifdef i386
96 /*
97 * This is a version of ip_compute_csum() optimized for IP headers,
98 * which always checksum on 4 octet boundaries.
99 *
100 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
101 * Arnt Gulbrandsen.
102 */
103 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
104 uint32_t sum;
105
106 __asm__ __volatile__("
107 movl (%1), %0
108 subl $4, %2
109 jbe 2f
110 addl 4(%1), %0
111 adcl 8(%1), %0
112 adcl 12(%1), %0
113 1: adcl 16(%1), %0
114 lea 4(%1), %1
115 decl %2
116 jne 1b
117 adcl $0, %0
118 movl %0, %2
119 shrl $16, %0
120 addw %w2, %w0
121 adcl $0, %0
122 notl %0
123 2:
124 "
125 /* Since the input registers which are loaded with iph and ipl
126 are modified, we must also specify them as outputs, or gcc
127 will assume they contain their original values. */
128 : "=r" (sum), "=r" (iph), "=r" (ihl)
129 : "1" (iph), "2" (ihl));
130 return sum;
131 }
132 #else
133 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
134 {
135 return ip_csum(iph,ihl*4);
136 }
137 #endif
138
139 struct iphdr {
140 #if defined (WORDS_BIGENDIAN)
141 uint8_t version:4,
142 ihl:4;
143 #else
144 uint8_t ihl:4,
145 version:4;
146 #endif
147 uint8_t tos;
148 uint16_t tot_len;
149 uint16_t id;
150 uint16_t frag_off;
151 uint8_t ttl;
152 uint8_t protocol;
153 uint16_t check;
154 uint32_t saddr;
155 uint32_t daddr;
156 /* The options start here. */
157 };
158
159 struct icmphdr {
160 struct iphdr iph;
161 uint8_t type;
162 uint8_t code;
163 uint16_t check;
164 union {
165 uint32_t unused;
166 struct {
167 uint8_t pointer;
168 uint8_t unused1;
169 uint16_t unused2;
170 } pprob;
171 uint32_t gwaddr;
172 struct {
173 uint16_t id;
174 uint16_t seq;
175 } echo;
176 } d;
177 };
178
179 static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf);
180
181 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
182 uint32_t dest,uint16_t len)
183 {
184 struct icmphdr *h;
185
186 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
187 buffer_init(&st->icmp,st->max_start_pad);
188 h=buf_append(&st->icmp,sizeof(*h));
189
190 h->iph.version=4;
191 h->iph.ihl=5;
192 h->iph.tos=0;
193 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
194 h->iph.id=0;
195 h->iph.frag_off=0;
196 h->iph.ttl=255;
197 h->iph.protocol=1;
198 h->iph.saddr=htonl(st->secnet_address);
199 h->iph.daddr=htonl(dest);
200 h->iph.check=0;
201 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
202 h->check=0;
203 h->d.unused=0;
204
205 return h;
206 }
207
208 /* Fill in the ICMP checksum field correctly */
209 static void netlink_icmp_csum(struct icmphdr *h)
210 {
211 uint32_t len;
212
213 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
214 h->check=0;
215 h->check=ip_csum(&h->type,len);
216 }
217
218 /* RFC1122:
219 * An ICMP error message MUST NOT be sent as the result of
220 * receiving:
221 *
222 * * an ICMP error message, or
223 *
224 * * a datagram destined to an IP broadcast or IP multicast
225 * address, or
226 *
227 * * a datagram sent as a link-layer broadcast, or
228 *
229 * * a non-initial fragment, or
230 *
231 * * a datagram whose source address does not define a single
232 * host -- e.g., a zero address, a loopback address, a
233 * broadcast address, a multicast address, or a Class E
234 * address.
235 */
236 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
237 {
238 struct iphdr *iph;
239 uint32_t source;
240
241 iph=(struct iphdr *)buf->start;
242 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
243 eg. icmp echo-request */
244 /* How do we spot broadcast destination addresses? */
245 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
246 source=ntohl(iph->saddr);
247 if (source==0) return False;
248 if ((source&0xff000000)==0x7f000000) return False;
249 /* How do we spot broadcast source addresses? */
250 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
251 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
252 return True;
253 }
254
255 /* How much of the original IP packet do we include in its ICMP
256 response? The header plus up to 64 bits. */
257 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
258 {
259 struct iphdr *iph=(struct iphdr *)buf->start;
260 uint16_t hlen,plen;
261
262 hlen=iph->ihl*4;
263 /* We include the first 8 bytes of the packet data, provided they exist */
264 hlen+=8;
265 plen=ntohs(iph->tot_len);
266 return (hlen>plen?plen:hlen);
267 }
268
269 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
270 uint8_t type, uint8_t code)
271 {
272 struct iphdr *iph=(struct iphdr *)buf->start;
273 struct icmphdr *h;
274 uint16_t len;
275
276 if (netlink_icmp_may_reply(buf)) {
277 len=netlink_icmp_reply_len(buf);
278 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
279 h->type=type; h->code=code;
280 memcpy(buf_append(&st->icmp,len),buf->start,len);
281 netlink_icmp_csum(h);
282 netlink_packet_deliver(st,&st->icmp);
283 BUF_ASSERT_FREE(&st->icmp);
284 }
285 }
286
287 /*
288 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
289 * checksum.
290 *
291 * Is the datagram acceptable?
292 *
293 * 1. Length at least the size of an ip header
294 * 2. Version of 4
295 * 3. Checksums correctly.
296 * 4. Doesn't have a bogus length
297 */
298 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
299 {
300 struct iphdr *iph=(struct iphdr *)buf->start;
301 uint32_t len;
302
303 if (iph->ihl < 5 || iph->version != 4) {
304 printf("ihl/version check failed\n");
305 return False;
306 }
307 if (buf->size < iph->ihl*4) {
308 printf("buffer size check failed\n");
309 return False;
310 }
311 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) {
312 printf("checksum failed\n");
313 return False;
314 }
315 len=ntohs(iph->tot_len);
316 /* There should be no padding */
317 if (buf->size!=len || len<(iph->ihl<<2)) {
318 printf("length check failed buf->size=%d len=%d\n",buf->size,len);
319 return False;
320 }
321
322 /* XXX check that there's no source route specified */
323 return True;
324 }
325
326 static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf)
327 {
328 struct iphdr *iph=(struct iphdr *)buf->start;
329 uint32_t dest=ntohl(iph->daddr);
330 struct netlink_client *c;
331
332 BUF_ASSERT_USED(buf);
333
334 if (dest==st->secnet_address) {
335 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
336 BUF_FREE(buf);
337 return;
338 }
339
340 for (c=st->clients; c; c=c->next) {
341 if (subnet_match(c->networks,dest)) {
342 if (c->can_deliver) {
343 c->deliver(c->dst,c,buf);
344 BUF_ASSERT_FREE(buf);
345 } else {
346 /* Generate ICMP destination unreachable */
347 netlink_icmp_simple(st,buf,3,0);
348 BUF_FREE(buf);
349 }
350 return;
351 }
352 }
353 if (subnet_match(&st->networks,dest)) {
354 st->deliver_to_host(st->dst,NULL,buf);
355 BUF_ASSERT_FREE(buf);
356 return;
357 }
358 Message(M_ERROR,"%s: failed to deliver a packet (bad destination address)"
359 "\nXXX make this message clearer\n");
360 BUF_FREE(buf);
361 }
362
363 static void netlink_packet_forward(struct netlink *st, struct buffer_if *buf)
364 {
365 struct iphdr *iph=(struct iphdr *)buf->start;
366
367 BUF_ASSERT_USED(buf);
368
369 /* Packet has already been checked */
370 if (iph->ttl<=1) {
371 /* Generate ICMP time exceeded */
372 netlink_icmp_simple(st,buf,11,0);
373 BUF_FREE(buf);
374 return;
375 }
376 iph->ttl--;
377 iph->check=0;
378 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
379
380 netlink_packet_deliver(st,buf);
381 BUF_ASSERT_FREE(buf);
382 }
383
384 /* Someone has been foolish enough to address a packet to us. I
385 suppose we should reply to it, just to be polite. */
386 static void netlink_packet_local(struct netlink *st, struct buffer_if *buf)
387 {
388 struct icmphdr *h;
389
390 h=(struct icmphdr *)buf->start;
391
392 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
393 Message(M_WARNING,"%s: fragmented packet addressed to us\n",st->name);
394 BUF_FREE(buf);
395 return;
396 }
397
398 if (h->iph.protocol==1) {
399 /* It's ICMP */
400 if (h->type==8 && h->code==0) {
401 /* ICMP echo-request. Special case: we re-use the buffer
402 to construct the reply. */
403 h->type=0;
404 h->iph.daddr=h->iph.saddr;
405 h->iph.saddr=htonl(st->secnet_address);
406 h->iph.ttl=255; /* Be nice and bump it up again... */
407 h->iph.check=0;
408 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
409 netlink_icmp_csum(h);
410 netlink_packet_deliver(st,buf);
411 return;
412 }
413 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
414 } else {
415 /* Send ICMP protocol unreachable */
416 netlink_icmp_simple(st,buf,3,2);
417 BUF_FREE(buf);
418 return;
419 }
420
421 BUF_FREE(buf);
422 }
423
424 /* Called by site code when remote packet is available */
425 /* buf is allocated on entry and free on return */
426 static void netlink_from_tunnel(void *sst, void *cst, struct buffer_if *buf)
427 {
428 struct netlink *st=sst;
429 struct netlink_client *client=cst;
430 uint32_t source,dest;
431 struct iphdr *iph;
432
433 BUF_ASSERT_USED(buf);
434 if (!netlink_check(st,buf)) {
435 Message(M_WARNING,"%s: bad IP packet from tunnel %s\n",
436 st->name,client->name);
437 BUF_FREE(buf);
438 return;
439 }
440 iph=(struct iphdr *)buf->start;
441
442 source=ntohl(iph->saddr);
443 dest=ntohl(iph->daddr);
444
445 /* Check that the packet source is in 'nets' and its destination is
446 in client->networks */
447 if (!subnet_match(client->networks,source)) {
448 string_t s,d;
449 s=ipaddr_to_string(source);
450 d=ipaddr_to_string(dest);
451 Message(M_WARNING,"%s: packet from tunnel %s with bad source address "
452 "(s=%s,d=%s)\n",st->name,client->name,s,d);
453 free(s); free(d);
454 BUF_FREE(buf);
455 return;
456 }
457 /* (st->secnet_address needs checking before matching against
458 st->networks because secnet's IP address may not be in the
459 range the host is willing to deal with) */
460 if (dest==st->secnet_address) {
461 netlink_packet_local(st,buf);
462 BUF_ASSERT_FREE(buf);
463 return;
464 }
465 if (!subnet_match(&st->networks,dest)) {
466 string_t s,d;
467 s=ipaddr_to_string(source);
468 d=ipaddr_to_string(dest);
469 Message(M_WARNING,"%s: incoming packet from tunnel %s "
470 "with bad destination address "
471 "(s=%s,d=%s)\n",st->name,client->name,s,d);
472 free(s); free(d);
473 BUF_FREE(buf);
474 return;
475 }
476
477 netlink_packet_forward(st,buf);
478
479 BUF_ASSERT_FREE(buf);
480 }
481
482 /* Called by driver code when packet is received from kernel */
483 /* cid should be NULL */
484 /* buf should be allocated on entry, and is free on return */
485 static void netlink_from_host(void *sst, void *cid, struct buffer_if *buf)
486 {
487 struct netlink *st=sst;
488 uint32_t source,dest;
489 struct iphdr *iph;
490
491 BUF_ASSERT_USED(buf);
492 if (!netlink_check(st,buf)) {
493 Message(M_WARNING,"%s: bad IP packet from host\n",
494 st->name);
495 BUF_FREE(buf);
496 return;
497 }
498 iph=(struct iphdr *)buf->start;
499
500 source=ntohl(iph->saddr);
501 dest=ntohl(iph->daddr);
502
503 if (!subnet_match(&st->networks,source)) {
504 string_t s,d;
505 s=ipaddr_to_string(source);
506 d=ipaddr_to_string(dest);
507 Message(M_WARNING,"%s: outgoing packet with bad source address "
508 "(s=%s,d=%s)\n",st->name,s,d);
509 free(s); free(d);
510 BUF_FREE(buf);
511 return;
512 }
513 if (dest==st->secnet_address) {
514 netlink_packet_local(st,buf);
515 BUF_ASSERT_FREE(buf);
516 return;
517 }
518 netlink_packet_forward(st,buf);
519 BUF_ASSERT_FREE(buf);
520 }
521
522 static void netlink_set_delivery(void *sst, void *cid, bool_t can_deliver)
523 {
524 struct netlink_client *c=cid;
525
526 c->can_deliver=can_deliver;
527 }
528
529 static void *netlink_regnets(void *sst, struct subnet_list *nets,
530 netlink_deliver_fn *deliver, void *dst,
531 uint32_t max_start_pad, uint32_t max_end_pad,
532 string_t client_name)
533 {
534 struct netlink *st=sst;
535 struct netlink_client *c;
536
537 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
538 "max_start_pad=%d, max_end_pad=%d\n",
539 nets->entries,max_start_pad,max_end_pad);
540
541 c=safe_malloc(sizeof(*c),"netlink_regnets");
542 c->networks=nets;
543 c->deliver=deliver;
544 c->dst=dst;
545 c->name=client_name; /* XXX copy it? */
546 c->can_deliver=False;
547 c->next=st->clients;
548 st->clients=c;
549 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
550 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
551
552 return c;
553 }
554
555 static netlink_deliver_fn *netlink_init(struct netlink *st,
556 void *dst, struct cloc loc,
557 dict_t *dict, string_t description,
558 netlink_deliver_fn *to_host)
559 {
560 st->dst=dst;
561 st->cl.description=description;
562 st->cl.type=CL_NETLINK;
563 st->cl.apply=NULL;
564 st->cl.interface=&st->ops;
565 st->ops.st=st;
566 st->ops.regnets=netlink_regnets;
567 st->ops.deliver=netlink_from_tunnel;
568 st->ops.set_delivery=netlink_set_delivery;
569 st->max_start_pad=0;
570 st->max_end_pad=0;
571 st->clients=NULL;
572 st->deliver_to_host=to_host;
573
574 st->name=dict_read_string(dict,"name",False,"netlink",loc);
575 if (!st->name) st->name=description;
576 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
577 &st->networks);
578 st->local_address=string_to_ipaddr(
579 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
580 st->secnet_address=string_to_ipaddr(
581 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
582 if (!subnet_match(&st->networks,st->local_address)) {
583 cfgfatal(loc,"netlink","local-address must be in local networks\n");
584 }
585 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
586 buffer_new(&st->icmp,ICMP_BUFSIZE);
587
588 return netlink_from_host;
589 }
590
591 /* Connection to the kernel through userv-ipif */
592
593 struct userv {
594 struct netlink nl;
595 int txfd; /* We transmit to userv */
596 int rxfd; /* We receive from userv */
597 string_t userv_path;
598 string_t service_user;
599 string_t service_name;
600 uint32_t txbuflen;
601 struct buffer_if *buff; /* We unstuff received packets into here
602 and send them to the site code. */
603 bool_t pending_esc;
604 netlink_deliver_fn *netlink_to_tunnel;
605 };
606
607 static int userv_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
608 int *timeout_io, const struct timeval *tv_now,
609 uint64_t *now)
610 {
611 struct userv *st=sst;
612 *nfds_io=2;
613 fds[0].fd=st->txfd;
614 fds[0].events=POLLERR; /* Might want to pick up POLLOUT sometime */
615 fds[1].fd=st->rxfd;
616 fds[1].events=POLLIN|POLLERR|POLLHUP;
617 return 0;
618 }
619
620 static void userv_afterpoll(void *sst, struct pollfd *fds, int nfds,
621 const struct timeval *tv_now, uint64_t *now)
622 {
623 struct userv *st=sst;
624 uint8_t rxbuf[DEFAULT_BUFSIZE];
625 int l,i;
626
627 if (fds[1].revents&POLLERR) {
628 printf("userv_afterpoll: hup!\n");
629 }
630 if (fds[1].revents&POLLIN) {
631 l=read(st->rxfd,rxbuf,DEFAULT_BUFSIZE);
632 if (l<0) {
633 fatal_perror("userv_afterpoll: read(rxfd)");
634 }
635 if (l==0) {
636 fatal("userv_afterpoll: read(rxfd)=0; userv gone away?\n");
637 }
638 /* XXX really crude unstuff code */
639 /* XXX check for buffer overflow */
640 BUF_ASSERT_USED(st->buff);
641 for (i=0; i<l; i++) {
642 if (st->pending_esc) {
643 st->pending_esc=False;
644 switch(rxbuf[i]) {
645 case SLIP_ESCEND:
646 *(uint8_t *)buf_append(st->buff,1)=SLIP_END;
647 break;
648 case SLIP_ESCESC:
649 *(uint8_t *)buf_append(st->buff,1)=SLIP_ESC;
650 break;
651 default:
652 fatal("userv_afterpoll: bad SLIP escape character\n");
653 }
654 } else {
655 switch (rxbuf[i]) {
656 case SLIP_END:
657 if (st->buff->size>0) {
658 st->netlink_to_tunnel(&st->nl,NULL,
659 st->buff);
660 BUF_ALLOC(st->buff,"userv_afterpoll");
661 }
662 buffer_init(st->buff,st->nl.max_start_pad);
663 break;
664 case SLIP_ESC:
665 st->pending_esc=True;
666 break;
667 default:
668 *(uint8_t *)buf_append(st->buff,1)=rxbuf[i];
669 break;
670 }
671 }
672 }
673 }
674 }
675
676 /* Send buf to the kernel. Free buf before returning. */
677 static void userv_deliver_to_kernel(void *sst, void *cid,
678 struct buffer_if *buf)
679 {
680 struct userv *st=sst;
681 uint8_t txbuf[DEFAULT_BUFSIZE];
682 uint8_t *i;
683 uint32_t j;
684
685 BUF_ASSERT_USED(buf);
686
687 /* Spit the packet at userv-ipif: SLIP start marker, then
688 bytestuff the packet, then SLIP end marker */
689 /* XXX crunchy bytestuff code */
690 j=0;
691 txbuf[j++]=SLIP_END;
692 for (i=buf->start; i<(buf->start+buf->size); i++) {
693 switch (*i) {
694 case SLIP_END:
695 txbuf[j++]=SLIP_ESC;
696 txbuf[j++]=SLIP_ESCEND;
697 break;
698 case SLIP_ESC:
699 txbuf[j++]=SLIP_ESC;
700 txbuf[j++]=SLIP_ESCESC;
701 break;
702 default:
703 txbuf[j++]=*i;
704 break;
705 }
706 }
707 txbuf[j++]=SLIP_END;
708 if (write(st->txfd,txbuf,j)<0) {
709 fatal_perror("userv_deliver_to_kernel: write()");
710 }
711 BUF_FREE(buf);
712 }
713
714 static void userv_phase_hook(void *sst, uint32_t newphase)
715 {
716 struct userv *st=sst;
717 pid_t child;
718 int c_stdin[2];
719 int c_stdout[2];
720 string_t addrs;
721 string_t nets;
722 string_t s;
723 struct netlink_client *c;
724 int i;
725
726 /* This is where we actually invoke userv - all the networks we'll
727 be using should already have been registered. */
728
729 addrs=safe_malloc(512,"userv_phase_hook:addrs");
730 snprintf(addrs,512,"%s,%s,%d,slip",ipaddr_to_string(st->nl.local_address),
731 ipaddr_to_string(st->nl.secnet_address),st->nl.mtu);
732
733 nets=safe_malloc(1024,"userv_phase_hook:nets");
734 *nets=0;
735 for (c=st->nl.clients; c; c=c->next) {
736 for (i=0; i<c->networks->entries; i++) {
737 s=subnet_to_string(&c->networks->list[i]);
738 strcat(nets,s);
739 strcat(nets,",");
740 free(s);
741 }
742 }
743 nets[strlen(nets)-1]=0;
744
745 Message(M_INFO,"\nuserv_phase_hook: %s %s %s %s %s\n",st->userv_path,
746 st->service_user,st->service_name,addrs,nets);
747
748 /* Allocate buffer, plus space for padding. Make sure we end up
749 with the start of the packet well-aligned. */
750 /* ALIGN(st->max_start_pad,16); */
751 /* ALIGN(st->max_end_pad,16); */
752
753 st->pending_esc=False;
754
755 /* Invoke userv */
756 if (pipe(c_stdin)!=0) {
757 fatal_perror("userv_phase_hook: pipe(c_stdin)");
758 }
759 if (pipe(c_stdout)!=0) {
760 fatal_perror("userv_phase_hook: pipe(c_stdout)");
761 }
762 st->txfd=c_stdin[1];
763 st->rxfd=c_stdout[0];
764
765 child=fork();
766 if (child==-1) {
767 fatal_perror("userv_phase_hook: fork()");
768 }
769 if (child==0) {
770 char **argv;
771
772 /* We are the child. Modify our stdin and stdout, then exec userv */
773 dup2(c_stdin[0],0);
774 dup2(c_stdout[1],1);
775 close(c_stdin[1]);
776 close(c_stdout[0]);
777
778 /* The arguments are:
779 userv
780 service-user
781 service-name
782 local-addr,secnet-addr,mtu,protocol
783 route1,route2,... */
784 argv=malloc(sizeof(*argv)*6);
785 argv[0]=st->userv_path;
786 argv[1]=st->service_user;
787 argv[2]=st->service_name;
788 argv[3]=addrs;
789 argv[4]=nets;
790 argv[5]=NULL;
791 execvp(st->userv_path,argv);
792 perror("netlink-userv-ipif: execvp");
793
794 exit(1);
795 }
796 /* We are the parent... */
797
798 /* Register for poll() */
799 register_for_poll(st, userv_beforepoll, userv_afterpoll, 2, st->nl.name);
800 }
801
802 static list_t *userv_apply(closure_t *self, struct cloc loc, dict_t *context,
803 list_t *args)
804 {
805 struct userv *st;
806 item_t *item;
807 dict_t *dict;
808
809 st=safe_malloc(sizeof(*st),"userv_apply");
810
811 /* First parameter must be a dict */
812 item=list_elem(args,0);
813 if (!item || item->type!=t_dict)
814 cfgfatal(loc,"userv-ipif","parameter must be a dictionary\n");
815
816 dict=item->data.dict;
817
818 st->netlink_to_tunnel=
819 netlink_init(&st->nl,st,loc,dict,
820 "netlink-userv-ipif",userv_deliver_to_kernel);
821
822 st->userv_path=dict_read_string(dict,"userv-path",False,"userv-netlink",
823 loc);
824 st->service_user=dict_read_string(dict,"service-user",False,
825 "userv-netlink",loc);
826 st->service_name=dict_read_string(dict,"service-name",False,
827 "userv-netlink",loc);
828 if (!st->userv_path) st->userv_path="userv";
829 if (!st->service_user) st->service_user="root";
830 if (!st->service_name) st->service_name="ipif";
831 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"userv-netlink",loc);
832 BUF_ALLOC(st->buff,"netlink:userv_apply");
833
834 st->rxfd=-1; st->txfd=-1;
835 add_hook(PHASE_DROPPRIV,userv_phase_hook,st);
836
837 return new_closure(&st->nl.cl);
838 }
839
840 /* Connection to the kernel through the universal TUN/TAP driver */
841
842 struct tun {
843 struct netlink nl;
844 int fd;
845 string_t device_path;
846 string_t interface_name;
847 string_t ifconfig_path;
848 string_t route_path;
849 struct buffer_if *buff; /* We receive packets into here
850 and send them to the netlink code. */
851 netlink_deliver_fn *netlink_to_tunnel;
852 };
853
854 static int tun_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
855 int *timeout_io, const struct timeval *tv_now,
856 uint64_t *now)
857 {
858 struct tun *st=sst;
859 *nfds_io=1;
860 fds[0].fd=st->fd;
861 fds[0].events=POLLIN|POLLERR|POLLHUP;
862 return 0;
863 }
864
865 static void tun_afterpoll(void *sst, struct pollfd *fds, int nfds,
866 const struct timeval *tv_now, uint64_t *now)
867 {
868 struct tun *st=sst;
869 int l;
870
871 if (fds[0].revents&POLLERR) {
872 printf("tun_afterpoll: hup!\n");
873 }
874 if (fds[0].revents&POLLIN) {
875 BUF_ALLOC(st->buff,"tun_afterpoll");
876 buffer_init(st->buff,st->nl.max_start_pad);
877 l=read(st->fd,st->buff->start,st->buff->len-st->nl.max_start_pad);
878 if (l<0) {
879 fatal_perror("tun_afterpoll: read()");
880 }
881 if (l==0) {
882 fatal("tun_afterpoll: read()=0; device gone away?\n");
883 }
884 if (l>0) {
885 st->buff->size=l;
886 st->netlink_to_tunnel(&st->nl,NULL,st->buff);
887 BUF_ASSERT_FREE(st->buff);
888 }
889 }
890 }
891
892 static void tun_deliver_to_kernel(void *sst, void *cid,
893 struct buffer_if *buf)
894 {
895 struct tun *st=sst;
896
897 BUF_ASSERT_USED(buf);
898
899 /* No error checking, because we'd just throw the packet away anyway */
900 write(st->fd,buf->start,buf->size);
901 BUF_FREE(buf);
902 }
903
904 static void tun_phase_hook(void *sst, uint32_t newphase)
905 {
906 struct tun *st=sst;
907 string_t hostaddr,secnetaddr;
908 uint8_t mtu[6];
909 string_t network,mask;
910 struct netlink_client *c;
911 int i;
912
913 /* All the networks we'll be using have been registered. Invoke ifconfig
914 to set the TUN device's address, and route to add routes to all
915 our networks. */
916
917 hostaddr=ipaddr_to_string(st->nl.local_address);
918 secnetaddr=ipaddr_to_string(st->nl.secnet_address);
919 snprintf(mtu,6,"%d",st->nl.mtu);
920 mtu[5]=0;
921
922 sys_cmd(st->ifconfig_path,"ifconfig",st->interface_name,
923 hostaddr,"netmask","255.255.255.255","-broadcast",
924 "pointopoint",secnetaddr,"mtu",mtu,"up",(char *)0);
925
926 for (c=st->nl.clients; c; c=c->next) {
927 for (i=0; i<c->networks->entries; i++) {
928 network=ipaddr_to_string(c->networks->list[i].prefix);
929 mask=ipaddr_to_string(c->networks->list[i].mask);
930 sys_cmd(st->route_path,"route","add","-net",network,
931 "netmask",mask,"gw",secnetaddr,(char *)0);
932 }
933 }
934
935 /* Register for poll() */
936 register_for_poll(st, tun_beforepoll, tun_afterpoll, 1, st->nl.name);
937 }
938
939 #ifdef HAVE_LINUX_IF_H
940 static list_t *tun_apply(closure_t *self, struct cloc loc, dict_t *context,
941 list_t *args)
942 {
943 struct tun *st;
944 item_t *item;
945 dict_t *dict;
946 struct ifreq ifr;
947
948 st=safe_malloc(sizeof(*st),"tun_apply");
949
950 /* First parameter must be a dict */
951 item=list_elem(args,0);
952 if (!item || item->type!=t_dict)
953 cfgfatal(loc,"tun","parameter must be a dictionary\n");
954
955 dict=item->data.dict;
956
957 st->netlink_to_tunnel=
958 netlink_init(&st->nl,st,loc,dict,
959 "netlink-tun",tun_deliver_to_kernel);
960
961 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
962 st->interface_name=dict_read_string(dict,"interface",False,
963 "tun-netlink",loc);
964 st->ifconfig_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
965 st->route_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
966
967 if (!st->device_path) st->device_path="/dev/net/tun";
968 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
969 if (!st->route_path) st->route_path="route";
970 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
971
972 /* New TUN interface: open the device, then do ioctl TUNSETIFF
973 to set or find out the network interface name. */
974 st->fd=open(st->device_path,O_RDWR);
975 if (st->fd==-1) {
976 fatal_perror("%s: can't open device file %s",st->nl.name,
977 st->device_path);
978 }
979 memset(&ifr,0,sizeof(ifr));
980 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; /* Just send/receive IP packets,
981 no extra headers */
982 if (st->interface_name)
983 strncpy(ifr.ifr_name,st->interface_name,IFNAMSIZ);
984 if (ioctl(st->fd,TUNSETIFF,&ifr)<0) {
985 fatal_perror("%s: ioctl(TUNSETIFF)",st->nl.name);
986 }
987 if (!st->interface_name) {
988 st->interface_name=safe_malloc(strlen(ifr.ifr_name)+1,"tun_apply");
989 strcpy(st->interface_name,ifr.ifr_name);
990 Message(M_INFO,"%s: allocated network interface %s\n",st->nl.name,
991 st->interface_name);
992 }
993
994 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
995
996 return new_closure(&st->nl.cl);
997 }
998 #endif /* HAVE_LINUX_IF_H */
999
1000 static list_t *tun_old_apply(closure_t *self, struct cloc loc, dict_t *context,
1001 list_t *args)
1002 {
1003 struct tun *st;
1004 item_t *item;
1005 dict_t *dict;
1006 bool_t search_for_if;
1007
1008 st=safe_malloc(sizeof(*st),"tun_old_apply");
1009
1010 Message(M_WARNING,"the tun-old code has never been tested. Please report "
1011 "success or failure to steve@greenend.org.uk\n");
1012
1013 /* First parameter must be a dict */
1014 item=list_elem(args,0);
1015 if (!item || item->type!=t_dict)
1016 cfgfatal(loc,"tun","parameter must be a dictionary\n");
1017
1018 dict=item->data.dict;
1019
1020 st->netlink_to_tunnel=
1021 netlink_init(&st->nl,st,loc,dict,
1022 "netlink-tun",tun_deliver_to_kernel);
1023
1024 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1025 st->interface_name=dict_read_string(dict,"interface",False,
1026 "tun-netlink",loc);
1027 search_for_if=dict_read_bool(dict,"interface-search",False,"tun-netlink",
1028 loc,st->device_path==NULL);
1029 st->ifconfig_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1030 st->route_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1031
1032 if (!st->device_path) st->device_path="/dev/tun";
1033 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1034 if (!st->route_path) st->route_path="route";
1035 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1036
1037 /* Old TUN interface: the network interface name depends on which
1038 /dev/tunX file we open. If 'interface-search' is set to true, treat
1039 'device' as the prefix and try numbers from 0--255. If it's set
1040 to false, treat 'device' as the whole name, and require than an
1041 appropriate interface name be specified. */
1042 if (search_for_if) {
1043 string_t dname;
1044 int i;
1045
1046 if (st->interface_name) {
1047 cfgfatal(loc,"tun-old","you may not specify an interface name "
1048 "in interface-search mode\n");
1049 }
1050 dname=safe_malloc(strlen(st->device_path)+4,"tun_old_apply");
1051 st->interface_name=safe_malloc(8,"tun_old_apply");
1052
1053 for (i=0; i<255; i++) {
1054 sprintf(dname,"%s%d",st->device_path,i);
1055 if ((st->fd=open(dname,O_RDWR))>0) {
1056 sprintf(st->interface_name,"tun%d",i);
1057 Message(M_INFO,"%s: allocated network interface %s "
1058 "through %s\n",st->nl.name,st->interface_name,dname);
1059 continue;
1060 }
1061 }
1062 if (st->fd==-1) {
1063 fatal("%s: unable to open any TUN device (%s...)\n",
1064 st->nl.name,st->device_path);
1065 }
1066 } else {
1067 if (!st->interface_name) {
1068 cfgfatal(loc,"tun-old","you must specify an interface name "
1069 "when you explicitly specify a TUN device file\n");
1070 }
1071 st->fd=open(st->device_path,O_RDWR);
1072 if (st->fd==-1) {
1073 fatal_perror("%s: unable to open TUN device file %s",
1074 st->nl.name,st->device_path);
1075 }
1076 }
1077
1078 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
1079
1080 return new_closure(&st->nl.cl);
1081 }
1082
1083 /* No connection to the kernel at all... */
1084
1085 struct null {
1086 struct netlink nl;
1087 };
1088
1089 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
1090 {
1091 return;
1092 }
1093
1094 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1095 list_t *args)
1096 {
1097 struct null *st;
1098 item_t *item;
1099 dict_t *dict;
1100
1101 st=safe_malloc(sizeof(*st),"null_apply");
1102
1103 item=list_elem(args,0);
1104 if (!item || item->type!=t_dict)
1105 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1106
1107 dict=item->data.dict;
1108
1109 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_deliver);
1110
1111 return new_closure(&st->nl.cl);
1112 }
1113
1114 init_module netlink_module;
1115 void netlink_module(dict_t *dict)
1116 {
1117 add_closure(dict,"userv-ipif",userv_apply);
1118 #ifdef HAVE_LINUX_IF_H
1119 add_closure(dict,"tun",tun_apply);
1120 #endif
1121 add_closure(dict,"tun-old",tun_old_apply);
1122 add_closure(dict,"null-netlink",null_apply);
1123 #if 0
1124 /* TODO */
1125 add_closure(dict,"pty-slip",ptyslip_apply);
1126 add_closure(dict,"slipd",slipd_apply);
1127 #endif /* 0 */
1128 }