Import release 0.08
[secnet] / netlink.c
1 /* User-kernel network link */
2
3 /* We will eventually support a variety of methods for extracting
4 packets from the kernel: userv-ipif, ipif on its own (when we run
5 as root), the kernel TUN driver, SLIP to a pty, an external netlink
6 daemon. There is a performance/security tradeoff. */
7
8 /* When dealing with SLIP (to a pty, or ipif) we have separate rx, tx
9 and client buffers. When receiving we may read() any amount, not
10 just whole packets. When transmitting we need to bytestuff anyway,
11 and may be part-way through receiving. */
12
13 /* Each netlink device is actually a router, with its own IP
14 address. We do things like decreasing the TTL and recalculating the
15 header checksum, generating ICMP, responding to pings, etc. */
16
17 /* This is where we have the anti-spoofing paranoia - before sending a
18 packet to the kernel we check that the tunnel it came over could
19 reasonably have produced it. */
20
21 #include "secnet.h"
22 #include <stdio.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <fcntl.h>
26 #include <sys/ioctl.h>
27 #include "util.h"
28
29 #ifdef HAVE_LINUX_IF_H
30 #include <linux/if.h>
31 #include <linux/if_tun.h>
32 #endif
33
34 /* XXX where do we find if_tun on other architectures? */
35
36 #define DEFAULT_BUFSIZE 2048
37 #define DEFAULT_MTU 1000
38 #define ICMP_BUFSIZE 1024
39
40 #define SLIP_END 192
41 #define SLIP_ESC 219
42 #define SLIP_ESCEND 220
43 #define SLIP_ESCESC 221
44
45 struct netlink_client {
46 struct subnet_list *networks;
47 netlink_deliver_fn *deliver;
48 void *dst;
49 string_t name;
50 bool_t can_deliver;
51 struct netlink_client *next;
52 };
53
54 /* Netlink provides one function to the device driver, to call to deliver
55 a packet from the device. The device driver provides one function to
56 netlink, for it to call to deliver a packet to the device. */
57
58 struct netlink {
59 closure_t cl;
60 struct netlink_if ops;
61 void *dst; /* Pointer to host interface state */
62 string_t name;
63 uint32_t max_start_pad;
64 uint32_t max_end_pad;
65 struct subnet_list networks;
66 uint32_t local_address; /* host interface address */
67 uint32_t secnet_address; /* our own address */
68 uint32_t mtu;
69 struct netlink_client *clients;
70 netlink_deliver_fn *deliver_to_host; /* Provided by driver */
71 struct buffer_if icmp; /* Buffer for assembly of outgoing ICMP */
72 };
73
74 /* Generic IP checksum routine */
75 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
76 {
77 register uint32_t sum=0;
78
79 while (count>1) {
80 sum+=ntohs(*(uint16_t *)iph);
81 iph+=2;
82 count-=2;
83 }
84 if(count>0)
85 sum+=*(uint8_t *)iph;
86 while (sum>>16)
87 sum=(sum&0xffff)+(sum>>16);
88 return htons(~sum);
89 }
90
91 #ifdef i386
92 /*
93 * This is a version of ip_compute_csum() optimized for IP headers,
94 * which always checksum on 4 octet boundaries.
95 *
96 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
97 * Arnt Gulbrandsen.
98 */
99 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
100 uint32_t sum;
101
102 __asm__ __volatile__("
103 movl (%1), %0
104 subl $4, %2
105 jbe 2f
106 addl 4(%1), %0
107 adcl 8(%1), %0
108 adcl 12(%1), %0
109 1: adcl 16(%1), %0
110 lea 4(%1), %1
111 decl %2
112 jne 1b
113 adcl $0, %0
114 movl %0, %2
115 shrl $16, %0
116 addw %w2, %w0
117 adcl $0, %0
118 notl %0
119 2:
120 "
121 /* Since the input registers which are loaded with iph and ipl
122 are modified, we must also specify them as outputs, or gcc
123 will assume they contain their original values. */
124 : "=r" (sum), "=r" (iph), "=r" (ihl)
125 : "1" (iph), "2" (ihl));
126 return sum;
127 }
128 #else
129 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
130 {
131 return ip_csum(iph,ihl*4);
132 }
133 #endif
134
135 struct iphdr {
136 #if defined (WORDS_BIGENDIAN)
137 uint8_t version:4,
138 ihl:4;
139 #else
140 uint8_t ihl:4,
141 version:4;
142 #endif
143 uint8_t tos;
144 uint16_t tot_len;
145 uint16_t id;
146 uint16_t frag_off;
147 uint8_t ttl;
148 uint8_t protocol;
149 uint16_t check;
150 uint32_t saddr;
151 uint32_t daddr;
152 /* The options start here. */
153 };
154
155 struct icmphdr {
156 struct iphdr iph;
157 uint8_t type;
158 uint8_t code;
159 uint16_t check;
160 union {
161 uint32_t unused;
162 struct {
163 uint8_t pointer;
164 uint8_t unused1;
165 uint16_t unused2;
166 } pprob;
167 uint32_t gwaddr;
168 struct {
169 uint16_t id;
170 uint16_t seq;
171 } echo;
172 } d;
173 };
174
175 static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf);
176
177 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
178 uint32_t dest,uint16_t len)
179 {
180 struct icmphdr *h;
181
182 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
183 buffer_init(&st->icmp,st->max_start_pad);
184 h=buf_append(&st->icmp,sizeof(*h));
185
186 h->iph.version=4;
187 h->iph.ihl=5;
188 h->iph.tos=0;
189 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
190 h->iph.id=0;
191 h->iph.frag_off=0;
192 h->iph.ttl=255;
193 h->iph.protocol=1;
194 h->iph.saddr=htonl(st->secnet_address);
195 h->iph.daddr=htonl(dest);
196 h->iph.check=0;
197 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
198 h->check=0;
199 h->d.unused=0;
200
201 return h;
202 }
203
204 /* Fill in the ICMP checksum field correctly */
205 static void netlink_icmp_csum(struct icmphdr *h)
206 {
207 uint32_t len;
208
209 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
210 h->check=0;
211 h->check=ip_csum(&h->type,len);
212 }
213
214 /* RFC1122:
215 * An ICMP error message MUST NOT be sent as the result of
216 * receiving:
217 *
218 * * an ICMP error message, or
219 *
220 * * a datagram destined to an IP broadcast or IP multicast
221 * address, or
222 *
223 * * a datagram sent as a link-layer broadcast, or
224 *
225 * * a non-initial fragment, or
226 *
227 * * a datagram whose source address does not define a single
228 * host -- e.g., a zero address, a loopback address, a
229 * broadcast address, a multicast address, or a Class E
230 * address.
231 */
232 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
233 {
234 struct iphdr *iph;
235 uint32_t source;
236
237 iph=(struct iphdr *)buf->start;
238 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
239 eg. icmp echo-request */
240 /* How do we spot broadcast destination addresses? */
241 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
242 source=ntohl(iph->saddr);
243 if (source==0) return False;
244 if ((source&0xff000000)==0x7f000000) return False;
245 /* How do we spot broadcast source addresses? */
246 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
247 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
248 return True;
249 }
250
251 /* How much of the original IP packet do we include in its ICMP
252 response? The header plus up to 64 bits. */
253 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
254 {
255 struct iphdr *iph=(struct iphdr *)buf->start;
256 uint16_t hlen,plen;
257
258 hlen=iph->ihl*4;
259 /* We include the first 8 bytes of the packet data, provided they exist */
260 hlen+=8;
261 plen=ntohs(iph->tot_len);
262 return (hlen>plen?plen:hlen);
263 }
264
265 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
266 uint8_t type, uint8_t code)
267 {
268 struct iphdr *iph=(struct iphdr *)buf->start;
269 struct icmphdr *h;
270 uint16_t len;
271
272 if (netlink_icmp_may_reply(buf)) {
273 len=netlink_icmp_reply_len(buf);
274 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
275 h->type=type; h->code=code;
276 memcpy(buf_append(&st->icmp,len),buf->start,len);
277 netlink_icmp_csum(h);
278 netlink_packet_deliver(st,&st->icmp);
279 BUF_ASSERT_FREE(&st->icmp);
280 }
281 }
282
283 /*
284 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
285 * checksum.
286 *
287 * Is the datagram acceptable?
288 *
289 * 1. Length at least the size of an ip header
290 * 2. Version of 4
291 * 3. Checksums correctly.
292 * 4. Doesn't have a bogus length
293 */
294 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
295 {
296 struct iphdr *iph=(struct iphdr *)buf->start;
297 uint32_t len;
298
299 if (iph->ihl < 5 || iph->version != 4) {
300 printf("ihl/version check failed\n");
301 return False;
302 }
303 if (buf->size < iph->ihl*4) {
304 printf("buffer size check failed\n");
305 return False;
306 }
307 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) {
308 printf("checksum failed\n");
309 return False;
310 }
311 len=ntohs(iph->tot_len);
312 /* There should be no padding */
313 if (buf->size!=len || len<(iph->ihl<<2)) {
314 printf("length check failed buf->size=%d len=%d\n",buf->size,len);
315 return False;
316 }
317
318 /* XXX check that there's no source route specified */
319 return True;
320 }
321
322 static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf)
323 {
324 struct iphdr *iph=(struct iphdr *)buf->start;
325 uint32_t dest=ntohl(iph->daddr);
326 struct netlink_client *c;
327
328 BUF_ASSERT_USED(buf);
329
330 if (dest==st->secnet_address) {
331 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
332 BUF_FREE(buf);
333 return;
334 }
335
336 for (c=st->clients; c; c=c->next) {
337 if (subnet_match(c->networks,dest)) {
338 if (c->can_deliver) {
339 c->deliver(c->dst,c,buf);
340 BUF_ASSERT_FREE(buf);
341 } else {
342 /* Generate ICMP destination unreachable */
343 netlink_icmp_simple(st,buf,3,0);
344 BUF_FREE(buf);
345 }
346 return;
347 }
348 }
349 if (subnet_match(&st->networks,dest)) {
350 st->deliver_to_host(st->dst,NULL,buf);
351 BUF_ASSERT_FREE(buf);
352 return;
353 }
354 Message(M_ERROR,"%s: failed to deliver a packet (bad destination address)"
355 "\nXXX make this message clearer\n");
356 BUF_FREE(buf);
357 }
358
359 static void netlink_packet_forward(struct netlink *st, struct buffer_if *buf)
360 {
361 struct iphdr *iph=(struct iphdr *)buf->start;
362
363 BUF_ASSERT_USED(buf);
364
365 /* Packet has already been checked */
366 if (iph->ttl<=1) {
367 /* Generate ICMP time exceeded */
368 netlink_icmp_simple(st,buf,11,0);
369 BUF_FREE(buf);
370 return;
371 }
372 iph->ttl--;
373 iph->check=0;
374 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
375
376 netlink_packet_deliver(st,buf);
377 BUF_ASSERT_FREE(buf);
378 }
379
380 /* Someone has been foolish enough to address a packet to us. I
381 suppose we should reply to it, just to be polite. */
382 static void netlink_packet_local(struct netlink *st, struct buffer_if *buf)
383 {
384 struct icmphdr *h;
385
386 h=(struct icmphdr *)buf->start;
387
388 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
389 Message(M_WARNING,"%s: fragmented packet addressed to us\n",st->name);
390 BUF_FREE(buf);
391 return;
392 }
393
394 if (h->iph.protocol==1) {
395 /* It's ICMP */
396 if (h->type==8 && h->code==0) {
397 /* ICMP echo-request. Special case: we re-use the buffer
398 to construct the reply. */
399 h->type=0;
400 h->iph.daddr=h->iph.saddr;
401 h->iph.saddr=htonl(st->secnet_address);
402 h->iph.ttl=255; /* Be nice and bump it up again... */
403 h->iph.check=0;
404 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
405 netlink_icmp_csum(h);
406 netlink_packet_deliver(st,buf);
407 return;
408 }
409 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
410 } else {
411 /* Send ICMP protocol unreachable */
412 netlink_icmp_simple(st,buf,3,2);
413 BUF_FREE(buf);
414 return;
415 }
416
417 BUF_FREE(buf);
418 }
419
420 /* Called by site code when remote packet is available */
421 /* buf is allocated on entry and free on return */
422 static void netlink_from_tunnel(void *sst, void *cst, struct buffer_if *buf)
423 {
424 struct netlink *st=sst;
425 struct netlink_client *client=cst;
426 uint32_t source,dest;
427 struct iphdr *iph;
428
429 BUF_ASSERT_USED(buf);
430 if (!netlink_check(st,buf)) {
431 Message(M_WARNING,"%s: bad IP packet from tunnel %s\n",
432 st->name,client->name);
433 BUF_FREE(buf);
434 return;
435 }
436 iph=(struct iphdr *)buf->start;
437
438 source=ntohl(iph->saddr);
439 dest=ntohl(iph->daddr);
440
441 /* Check that the packet source is in 'nets' and its destination is
442 in client->networks */
443 if (!subnet_match(client->networks,source)) {
444 string_t s,d;
445 s=ipaddr_to_string(source);
446 d=ipaddr_to_string(dest);
447 Message(M_WARNING,"%s: packet from tunnel %s with bad source address "
448 "(s=%s,d=%s)\n",st->name,client->name,s,d);
449 free(s); free(d);
450 BUF_FREE(buf);
451 return;
452 }
453 /* (st->secnet_address needs checking before matching against
454 st->networks because secnet's IP address may not be in the
455 range the host is willing to deal with) */
456 if (dest==st->secnet_address) {
457 netlink_packet_local(st,buf);
458 BUF_ASSERT_FREE(buf);
459 return;
460 }
461 if (!subnet_match(&st->networks,dest)) {
462 string_t s,d;
463 s=ipaddr_to_string(source);
464 d=ipaddr_to_string(dest);
465 Message(M_WARNING,"%s: incoming packet from tunnel %s "
466 "with bad destination address "
467 "(s=%s,d=%s)\n",st->name,client->name,s,d);
468 free(s); free(d);
469 BUF_FREE(buf);
470 return;
471 }
472
473 netlink_packet_forward(st,buf);
474
475 BUF_ASSERT_FREE(buf);
476 }
477
478 /* Called by driver code when packet is received from kernel */
479 /* cid should be NULL */
480 /* buf should be allocated on entry, and is free on return */
481 static void netlink_from_host(void *sst, void *cid, struct buffer_if *buf)
482 {
483 struct netlink *st=sst;
484 uint32_t source,dest;
485 struct iphdr *iph;
486
487 BUF_ASSERT_USED(buf);
488 if (!netlink_check(st,buf)) {
489 Message(M_WARNING,"%s: bad IP packet from host\n",
490 st->name);
491 BUF_FREE(buf);
492 return;
493 }
494 iph=(struct iphdr *)buf->start;
495
496 source=ntohl(iph->saddr);
497 dest=ntohl(iph->daddr);
498
499 if (!subnet_match(&st->networks,source)) {
500 string_t s,d;
501 s=ipaddr_to_string(source);
502 d=ipaddr_to_string(dest);
503 Message(M_WARNING,"%s: outgoing packet with bad source address "
504 "(s=%s,d=%s)\n",st->name,s,d);
505 free(s); free(d);
506 BUF_FREE(buf);
507 return;
508 }
509 if (dest==st->secnet_address) {
510 netlink_packet_local(st,buf);
511 BUF_ASSERT_FREE(buf);
512 return;
513 }
514 netlink_packet_forward(st,buf);
515 BUF_ASSERT_FREE(buf);
516 }
517
518 static void netlink_set_delivery(void *sst, void *cid, bool_t can_deliver)
519 {
520 struct netlink_client *c=cid;
521
522 c->can_deliver=can_deliver;
523 }
524
525 static void *netlink_regnets(void *sst, struct subnet_list *nets,
526 netlink_deliver_fn *deliver, void *dst,
527 uint32_t max_start_pad, uint32_t max_end_pad,
528 string_t client_name)
529 {
530 struct netlink *st=sst;
531 struct netlink_client *c;
532
533 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
534 "max_start_pad=%d, max_end_pad=%d\n",
535 nets->entries,max_start_pad,max_end_pad);
536
537 c=safe_malloc(sizeof(*c),"netlink_regnets");
538 c->networks=nets;
539 c->deliver=deliver;
540 c->dst=dst;
541 c->name=client_name; /* XXX copy it? */
542 c->can_deliver=False;
543 c->next=st->clients;
544 st->clients=c;
545 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
546 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
547
548 return c;
549 }
550
551 static netlink_deliver_fn *netlink_init(struct netlink *st,
552 void *dst, struct cloc loc,
553 dict_t *dict, string_t description,
554 netlink_deliver_fn *to_host)
555 {
556 st->dst=dst;
557 st->cl.description=description;
558 st->cl.type=CL_NETLINK;
559 st->cl.apply=NULL;
560 st->cl.interface=&st->ops;
561 st->ops.st=st;
562 st->ops.regnets=netlink_regnets;
563 st->ops.deliver=netlink_from_tunnel;
564 st->ops.set_delivery=netlink_set_delivery;
565 st->max_start_pad=0;
566 st->max_end_pad=0;
567 st->clients=NULL;
568 st->deliver_to_host=to_host;
569
570 st->name=dict_read_string(dict,"name",False,"netlink",loc);
571 if (!st->name) st->name=description;
572 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
573 &st->networks);
574 st->local_address=string_to_ipaddr(
575 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
576 st->secnet_address=string_to_ipaddr(
577 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
578 if (!subnet_match(&st->networks,st->local_address)) {
579 cfgfatal(loc,"netlink","local-address must be in local networks\n");
580 }
581 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
582 buffer_new(&st->icmp,ICMP_BUFSIZE);
583
584 return netlink_from_host;
585 }
586
587 /* Connection to the kernel through userv-ipif */
588
589 struct userv {
590 struct netlink nl;
591 int txfd; /* We transmit to userv */
592 int rxfd; /* We receive from userv */
593 string_t userv_path;
594 string_t service_user;
595 string_t service_name;
596 uint32_t txbuflen;
597 struct buffer_if *buff; /* We unstuff received packets into here
598 and send them to the site code. */
599 bool_t pending_esc;
600 netlink_deliver_fn *netlink_to_tunnel;
601 };
602
603 static int userv_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
604 int *timeout_io, const struct timeval *tv_now,
605 uint64_t *now)
606 {
607 struct userv *st=sst;
608 *nfds_io=2;
609 fds[0].fd=st->txfd;
610 fds[0].events=POLLERR; /* Might want to pick up POLLOUT sometime */
611 fds[1].fd=st->rxfd;
612 fds[1].events=POLLIN|POLLERR|POLLHUP;
613 return 0;
614 }
615
616 static void userv_afterpoll(void *sst, struct pollfd *fds, int nfds,
617 const struct timeval *tv_now, uint64_t *now)
618 {
619 struct userv *st=sst;
620 uint8_t rxbuf[DEFAULT_BUFSIZE];
621 int l,i;
622
623 if (fds[1].revents&POLLERR) {
624 printf("userv_afterpoll: hup!\n");
625 }
626 if (fds[1].revents&POLLIN) {
627 l=read(st->rxfd,rxbuf,DEFAULT_BUFSIZE);
628 if (l<0) {
629 fatal_perror("userv_afterpoll: read(rxfd)");
630 }
631 if (l==0) {
632 fatal("userv_afterpoll: read(rxfd)=0; userv gone away?\n");
633 }
634 /* XXX really crude unstuff code */
635 /* XXX check for buffer overflow */
636 BUF_ASSERT_USED(st->buff);
637 for (i=0; i<l; i++) {
638 if (st->pending_esc) {
639 st->pending_esc=False;
640 switch(rxbuf[i]) {
641 case SLIP_ESCEND:
642 *(uint8_t *)buf_append(st->buff,1)=SLIP_END;
643 break;
644 case SLIP_ESCESC:
645 *(uint8_t *)buf_append(st->buff,1)=SLIP_ESC;
646 break;
647 default:
648 fatal("userv_afterpoll: bad SLIP escape character\n");
649 }
650 } else {
651 switch (rxbuf[i]) {
652 case SLIP_END:
653 if (st->buff->size>0) {
654 st->netlink_to_tunnel(&st->nl,NULL,
655 st->buff);
656 BUF_ALLOC(st->buff,"userv_afterpoll");
657 }
658 buffer_init(st->buff,st->nl.max_start_pad);
659 break;
660 case SLIP_ESC:
661 st->pending_esc=True;
662 break;
663 default:
664 *(uint8_t *)buf_append(st->buff,1)=rxbuf[i];
665 break;
666 }
667 }
668 }
669 }
670 }
671
672 /* Send buf to the kernel. Free buf before returning. */
673 static void userv_deliver_to_kernel(void *sst, void *cid,
674 struct buffer_if *buf)
675 {
676 struct userv *st=sst;
677 uint8_t txbuf[DEFAULT_BUFSIZE];
678 uint8_t *i;
679 uint32_t j;
680
681 BUF_ASSERT_USED(buf);
682
683 /* Spit the packet at userv-ipif: SLIP start marker, then
684 bytestuff the packet, then SLIP end marker */
685 /* XXX crunchy bytestuff code */
686 j=0;
687 txbuf[j++]=SLIP_END;
688 for (i=buf->start; i<(buf->start+buf->size); i++) {
689 switch (*i) {
690 case SLIP_END:
691 txbuf[j++]=SLIP_ESC;
692 txbuf[j++]=SLIP_ESCEND;
693 break;
694 case SLIP_ESC:
695 txbuf[j++]=SLIP_ESC;
696 txbuf[j++]=SLIP_ESCESC;
697 break;
698 default:
699 txbuf[j++]=*i;
700 break;
701 }
702 }
703 txbuf[j++]=SLIP_END;
704 if (write(st->txfd,txbuf,j)<0) {
705 fatal_perror("userv_deliver_to_kernel: write()");
706 }
707 BUF_FREE(buf);
708 }
709
710 static void userv_phase_hook(void *sst, uint32_t newphase)
711 {
712 struct userv *st=sst;
713 pid_t child;
714 int c_stdin[2];
715 int c_stdout[2];
716 string_t addrs;
717 string_t nets;
718 string_t s;
719 struct netlink_client *c;
720 int i;
721
722 /* This is where we actually invoke userv - all the networks we'll
723 be using should already have been registered. */
724
725 addrs=safe_malloc(512,"userv_phase_hook:addrs");
726 snprintf(addrs,512,"%s,%s,%d,slip",ipaddr_to_string(st->nl.local_address),
727 ipaddr_to_string(st->nl.secnet_address),st->nl.mtu);
728
729 nets=safe_malloc(1024,"userv_phase_hook:nets");
730 *nets=0;
731 for (c=st->nl.clients; c; c=c->next) {
732 for (i=0; i<c->networks->entries; i++) {
733 s=subnet_to_string(&c->networks->list[i]);
734 strcat(nets,s);
735 strcat(nets,",");
736 free(s);
737 }
738 }
739 nets[strlen(nets)-1]=0;
740
741 Message(M_INFO,"\nuserv_phase_hook: %s %s %s %s %s\n",st->userv_path,
742 st->service_user,st->service_name,addrs,nets);
743
744 /* Allocate buffer, plus space for padding. Make sure we end up
745 with the start of the packet well-aligned. */
746 /* ALIGN(st->max_start_pad,16); */
747 /* ALIGN(st->max_end_pad,16); */
748
749 st->pending_esc=False;
750
751 /* Invoke userv */
752 if (pipe(c_stdin)!=0) {
753 fatal_perror("userv_phase_hook: pipe(c_stdin)");
754 }
755 if (pipe(c_stdout)!=0) {
756 fatal_perror("userv_phase_hook: pipe(c_stdout)");
757 }
758 st->txfd=c_stdin[1];
759 st->rxfd=c_stdout[0];
760
761 child=fork();
762 if (child==-1) {
763 fatal_perror("userv_phase_hook: fork()");
764 }
765 if (child==0) {
766 char **argv;
767
768 /* We are the child. Modify our stdin and stdout, then exec userv */
769 dup2(c_stdin[0],0);
770 dup2(c_stdout[1],1);
771 close(c_stdin[1]);
772 close(c_stdout[0]);
773
774 /* The arguments are:
775 userv
776 service-user
777 service-name
778 local-addr,secnet-addr,mtu,protocol
779 route1,route2,... */
780 argv=malloc(sizeof(*argv)*6);
781 argv[0]=st->userv_path;
782 argv[1]=st->service_user;
783 argv[2]=st->service_name;
784 argv[3]=addrs;
785 argv[4]=nets;
786 argv[5]=NULL;
787 execvp(st->userv_path,argv);
788 perror("netlink-userv-ipif: execvp");
789
790 exit(1);
791 }
792 /* We are the parent... */
793
794 /* Register for poll() */
795 register_for_poll(st, userv_beforepoll, userv_afterpoll, 2, st->nl.name);
796 }
797
798 static list_t *userv_apply(closure_t *self, struct cloc loc, dict_t *context,
799 list_t *args)
800 {
801 struct userv *st;
802 item_t *item;
803 dict_t *dict;
804
805 st=safe_malloc(sizeof(*st),"userv_apply");
806
807 /* First parameter must be a dict */
808 item=list_elem(args,0);
809 if (!item || item->type!=t_dict)
810 cfgfatal(loc,"userv-ipif","parameter must be a dictionary\n");
811
812 dict=item->data.dict;
813
814 st->netlink_to_tunnel=
815 netlink_init(&st->nl,st,loc,dict,
816 "netlink-userv-ipif",userv_deliver_to_kernel);
817
818 st->userv_path=dict_read_string(dict,"userv-path",False,"userv-netlink",
819 loc);
820 st->service_user=dict_read_string(dict,"service-user",False,
821 "userv-netlink",loc);
822 st->service_name=dict_read_string(dict,"service-name",False,
823 "userv-netlink",loc);
824 if (!st->userv_path) st->userv_path="userv";
825 if (!st->service_user) st->service_user="root";
826 if (!st->service_name) st->service_name="ipif";
827 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"userv-netlink",loc);
828 BUF_ALLOC(st->buff,"netlink:userv_apply");
829
830 st->rxfd=-1; st->txfd=-1;
831 add_hook(PHASE_DROPPRIV,userv_phase_hook,st);
832
833 return new_closure(&st->nl.cl);
834 }
835
836 /* Connection to the kernel through the universal TUN/TAP driver */
837
838 struct tun {
839 struct netlink nl;
840 int fd;
841 string_t device_path;
842 string_t interface_name;
843 string_t ifconfig_path;
844 string_t route_path;
845 struct buffer_if *buff; /* We receive packets into here
846 and send them to the netlink code. */
847 netlink_deliver_fn *netlink_to_tunnel;
848 };
849
850 static int tun_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
851 int *timeout_io, const struct timeval *tv_now,
852 uint64_t *now)
853 {
854 struct tun *st=sst;
855 *nfds_io=1;
856 fds[0].fd=st->fd;
857 fds[0].events=POLLIN|POLLERR|POLLHUP;
858 return 0;
859 }
860
861 static void tun_afterpoll(void *sst, struct pollfd *fds, int nfds,
862 const struct timeval *tv_now, uint64_t *now)
863 {
864 struct tun *st=sst;
865 int l;
866
867 if (fds[0].revents&POLLERR) {
868 printf("tun_afterpoll: hup!\n");
869 }
870 if (fds[0].revents&POLLIN) {
871 BUF_ALLOC(st->buff,"tun_afterpoll");
872 buffer_init(st->buff,st->nl.max_start_pad);
873 l=read(st->fd,st->buff->start,st->buff->len-st->nl.max_start_pad);
874 if (l<0) {
875 fatal_perror("tun_afterpoll: read()");
876 }
877 if (l==0) {
878 fatal("tun_afterpoll: read()=0; device gone away?\n");
879 }
880 if (l>0) {
881 st->buff->size=l;
882 st->netlink_to_tunnel(&st->nl,NULL,st->buff);
883 BUF_ASSERT_FREE(st->buff);
884 }
885 }
886 }
887
888 static void tun_deliver_to_kernel(void *sst, void *cid,
889 struct buffer_if *buf)
890 {
891 struct tun *st=sst;
892
893 BUF_ASSERT_USED(buf);
894
895 /* No error checking, because we'd just throw the packet away anyway */
896 write(st->fd,buf->start,buf->size);
897 BUF_FREE(buf);
898 }
899
900 static void tun_phase_hook(void *sst, uint32_t newphase)
901 {
902 struct tun *st=sst;
903 string_t hostaddr,secnetaddr;
904 uint8_t mtu[6];
905 string_t network,mask;
906 struct netlink_client *c;
907 int i;
908
909 /* All the networks we'll be using have been registered. Invoke ifconfig
910 to set the TUN device's address, and route to add routes to all
911 our networks. */
912
913 hostaddr=ipaddr_to_string(st->nl.local_address);
914 secnetaddr=ipaddr_to_string(st->nl.secnet_address);
915 snprintf(mtu,6,"%d",st->nl.mtu);
916 mtu[5]=0;
917
918 sys_cmd(st->ifconfig_path,"ifconfig",st->interface_name,
919 hostaddr,"netmask","255.255.255.255","-broadcast",
920 "pointopoint",secnetaddr,"mtu",mtu,"up",(char *)0);
921
922 for (c=st->nl.clients; c; c=c->next) {
923 for (i=0; i<c->networks->entries; i++) {
924 network=ipaddr_to_string(c->networks->list[i].prefix);
925 mask=ipaddr_to_string(c->networks->list[i].mask);
926 sys_cmd(st->route_path,"route","add","-net",network,
927 "netmask",mask,"gw",secnetaddr,(char *)0);
928 }
929 }
930
931 /* Register for poll() */
932 register_for_poll(st, tun_beforepoll, tun_afterpoll, 1, st->nl.name);
933 }
934
935 #ifdef HAVE_LINUX_IF_H
936 static list_t *tun_apply(closure_t *self, struct cloc loc, dict_t *context,
937 list_t *args)
938 {
939 struct tun *st;
940 item_t *item;
941 dict_t *dict;
942 struct ifreq ifr;
943
944 st=safe_malloc(sizeof(*st),"tun_apply");
945
946 /* First parameter must be a dict */
947 item=list_elem(args,0);
948 if (!item || item->type!=t_dict)
949 cfgfatal(loc,"tun","parameter must be a dictionary\n");
950
951 dict=item->data.dict;
952
953 st->netlink_to_tunnel=
954 netlink_init(&st->nl,st,loc,dict,
955 "netlink-tun",tun_deliver_to_kernel);
956
957 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
958 st->interface_name=dict_read_string(dict,"interface",False,
959 "tun-netlink",loc);
960 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",
961 False,"tun-netlink",loc);
962 st->route_path=dict_read_string(dict,"route-path",
963 False,"tun-netlink",loc);
964
965 if (!st->device_path) st->device_path="/dev/net/tun";
966 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
967 if (!st->route_path) st->route_path="route";
968 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
969
970 /* New TUN interface: open the device, then do ioctl TUNSETIFF
971 to set or find out the network interface name. */
972 st->fd=open(st->device_path,O_RDWR);
973 if (st->fd==-1) {
974 fatal_perror("%s: can't open device file %s",st->nl.name,
975 st->device_path);
976 }
977 memset(&ifr,0,sizeof(ifr));
978 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; /* Just send/receive IP packets,
979 no extra headers */
980 if (st->interface_name)
981 strncpy(ifr.ifr_name,st->interface_name,IFNAMSIZ);
982 if (ioctl(st->fd,TUNSETIFF,&ifr)<0) {
983 fatal_perror("%s: ioctl(TUNSETIFF)",st->nl.name);
984 }
985 if (!st->interface_name) {
986 st->interface_name=safe_malloc(strlen(ifr.ifr_name)+1,"tun_apply");
987 strcpy(st->interface_name,ifr.ifr_name);
988 Message(M_INFO,"%s: allocated network interface %s\n",st->nl.name,
989 st->interface_name);
990 }
991
992 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
993
994 return new_closure(&st->nl.cl);
995 }
996 #endif /* HAVE_LINUX_IF_H */
997
998 static list_t *tun_old_apply(closure_t *self, struct cloc loc, dict_t *context,
999 list_t *args)
1000 {
1001 struct tun *st;
1002 item_t *item;
1003 dict_t *dict;
1004 bool_t search_for_if;
1005
1006 st=safe_malloc(sizeof(*st),"tun_old_apply");
1007
1008 Message(M_WARNING,"the tun-old code has never been tested. Please report "
1009 "success or failure to steve@greenend.org.uk\n");
1010
1011 /* First parameter must be a dict */
1012 item=list_elem(args,0);
1013 if (!item || item->type!=t_dict)
1014 cfgfatal(loc,"tun","parameter must be a dictionary\n");
1015
1016 dict=item->data.dict;
1017
1018 st->netlink_to_tunnel=
1019 netlink_init(&st->nl,st,loc,dict,
1020 "netlink-tun",tun_deliver_to_kernel);
1021
1022 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1023 st->interface_name=dict_read_string(dict,"interface",False,
1024 "tun-netlink",loc);
1025 search_for_if=dict_read_bool(dict,"interface-search",False,"tun-netlink",
1026 loc,st->device_path==NULL);
1027 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",False,
1028 "tun-netlink",loc);
1029 st->route_path=dict_read_string(dict,"route-path",False,"tun-netlink",loc);
1030
1031 if (!st->device_path) st->device_path="/dev/tun";
1032 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1033 if (!st->route_path) st->route_path="route";
1034 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1035
1036 /* Old TUN interface: the network interface name depends on which
1037 /dev/tunX file we open. If 'interface-search' is set to true, treat
1038 'device' as the prefix and try numbers from 0--255. If it's set
1039 to false, treat 'device' as the whole name, and require than an
1040 appropriate interface name be specified. */
1041 if (search_for_if) {
1042 string_t dname;
1043 int i;
1044
1045 if (st->interface_name) {
1046 cfgfatal(loc,"tun-old","you may not specify an interface name "
1047 "in interface-search mode\n");
1048 }
1049 dname=safe_malloc(strlen(st->device_path)+4,"tun_old_apply");
1050 st->interface_name=safe_malloc(8,"tun_old_apply");
1051
1052 for (i=0; i<255; i++) {
1053 sprintf(dname,"%s%d",st->device_path,i);
1054 if ((st->fd=open(dname,O_RDWR))>0) {
1055 sprintf(st->interface_name,"tun%d",i);
1056 Message(M_INFO,"%s: allocated network interface %s "
1057 "through %s\n",st->nl.name,st->interface_name,dname);
1058 break;
1059 }
1060 }
1061 if (st->fd==-1) {
1062 fatal("%s: unable to open any TUN device (%s...)\n",
1063 st->nl.name,st->device_path);
1064 }
1065 } else {
1066 if (!st->interface_name) {
1067 cfgfatal(loc,"tun-old","you must specify an interface name "
1068 "when you explicitly specify a TUN device file\n");
1069 }
1070 st->fd=open(st->device_path,O_RDWR);
1071 if (st->fd==-1) {
1072 fatal_perror("%s: unable to open TUN device file %s",
1073 st->nl.name,st->device_path);
1074 }
1075 }
1076
1077 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
1078
1079 return new_closure(&st->nl.cl);
1080 }
1081
1082 /* No connection to the kernel at all... */
1083
1084 struct null {
1085 struct netlink nl;
1086 };
1087
1088 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
1089 {
1090 return;
1091 }
1092
1093 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1094 list_t *args)
1095 {
1096 struct null *st;
1097 item_t *item;
1098 dict_t *dict;
1099
1100 st=safe_malloc(sizeof(*st),"null_apply");
1101
1102 item=list_elem(args,0);
1103 if (!item || item->type!=t_dict)
1104 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1105
1106 dict=item->data.dict;
1107
1108 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_deliver);
1109
1110 return new_closure(&st->nl.cl);
1111 }
1112
1113 init_module netlink_module;
1114 void netlink_module(dict_t *dict)
1115 {
1116 add_closure(dict,"userv-ipif",userv_apply);
1117 #ifdef HAVE_LINUX_IF_H
1118 add_closure(dict,"tun",tun_apply);
1119 #endif
1120 add_closure(dict,"tun-old",tun_old_apply);
1121 add_closure(dict,"null-netlink",null_apply);
1122 #if 0
1123 /* TODO */
1124 add_closure(dict,"pty-slip",ptyslip_apply);
1125 add_closure(dict,"slipd",slipd_apply);
1126 #endif /* 0 */
1127 }