Import release 0.07
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
4efd681a
SE
3/* We will eventually support a variety of methods for extracting
4 packets from the kernel: userv-ipif, ipif on its own (when we run
5 as root), the kernel TUN driver, SLIP to a pty, an external netlink
6 daemon. There is a performance/security tradeoff. */
2fe58dfd
SE
7
8/* When dealing with SLIP (to a pty, or ipif) we have separate rx, tx
9 and client buffers. When receiving we may read() any amount, not
10 just whole packets. When transmitting we need to bytestuff anyway,
11 and may be part-way through receiving. */
12
13/* Each netlink device is actually a router, with its own IP
4efd681a
SE
14 address. We do things like decreasing the TTL and recalculating the
15 header checksum, generating ICMP, responding to pings, etc. */
2fe58dfd
SE
16
17/* This is where we have the anti-spoofing paranoia - before sending a
18 packet to the kernel we check that the tunnel it came over could
19 reasonably have produced it. */
20
21#include <stdio.h>
22#include <string.h>
23#include <unistd.h>
24#include <fcntl.h>
4efd681a 25#include <sys/ioctl.h>
2fe58dfd 26
4efd681a 27#include "config.h"
2fe58dfd
SE
28#include "secnet.h"
29#include "util.h"
30
4efd681a
SE
31#ifdef HAVE_LINUX_IF_H
32#include <linux/if.h>
33#include <linux/if_tun.h>
34#endif
35
36/* XXX where do we find if_tun on other architectures? */
37
2fe58dfd 38#define DEFAULT_BUFSIZE 2048
4efd681a
SE
39#define DEFAULT_MTU 1000
40#define ICMP_BUFSIZE 1024
2fe58dfd
SE
41
42#define SLIP_END 192
43#define SLIP_ESC 219
44#define SLIP_ESCEND 220
45#define SLIP_ESCESC 221
46
47struct netlink_client {
48 struct subnet_list *networks;
49 netlink_deliver_fn *deliver;
50 void *dst;
4efd681a
SE
51 string_t name;
52 bool_t can_deliver;
2fe58dfd
SE
53 struct netlink_client *next;
54};
55
4efd681a
SE
56/* Netlink provides one function to the device driver, to call to deliver
57 a packet from the device. The device driver provides one function to
58 netlink, for it to call to deliver a packet to the device. */
59
60struct netlink {
2fe58dfd
SE
61 closure_t cl;
62 struct netlink_if ops;
4efd681a
SE
63 void *dst; /* Pointer to host interface state */
64 string_t name;
2fe58dfd
SE
65 uint32_t max_start_pad;
66 uint32_t max_end_pad;
2fe58dfd 67 struct subnet_list networks;
4efd681a
SE
68 uint32_t local_address; /* host interface address */
69 uint32_t secnet_address; /* our own address */
2fe58dfd 70 uint32_t mtu;
4efd681a
SE
71 struct netlink_client *clients;
72 netlink_deliver_fn *deliver_to_host; /* Provided by driver */
73 struct buffer_if icmp; /* Buffer for assembly of outgoing ICMP */
2fe58dfd
SE
74};
75
4efd681a
SE
76/* Generic IP checksum routine */
77static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
2fe58dfd 78{
4efd681a
SE
79 register uint32_t sum=0;
80
81 while (count>1) {
82 sum+=ntohs(*(uint16_t *)iph);
83 iph+=2;
84 count-=2;
85 }
86 if(count>0)
87 sum+=*(uint8_t *)iph;
88 while (sum>>16)
89 sum=(sum&0xffff)+(sum>>16);
90 return htons(~sum);
2fe58dfd
SE
91}
92
4efd681a
SE
93#ifdef i386
94/*
95 * This is a version of ip_compute_csum() optimized for IP headers,
96 * which always checksum on 4 octet boundaries.
97 *
98 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
99 * Arnt Gulbrandsen.
100 */
101static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
102 uint32_t sum;
103
104 __asm__ __volatile__("
105 movl (%1), %0
106 subl $4, %2
107 jbe 2f
108 addl 4(%1), %0
109 adcl 8(%1), %0
110 adcl 12(%1), %0
1111: adcl 16(%1), %0
112 lea 4(%1), %1
113 decl %2
114 jne 1b
115 adcl $0, %0
116 movl %0, %2
117 shrl $16, %0
118 addw %w2, %w0
119 adcl $0, %0
120 notl %0
1212:
122 "
123 /* Since the input registers which are loaded with iph and ipl
124 are modified, we must also specify them as outputs, or gcc
125 will assume they contain their original values. */
126 : "=r" (sum), "=r" (iph), "=r" (ihl)
127 : "1" (iph), "2" (ihl));
128 return sum;
129}
130#else
131static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
2fe58dfd 132{
4efd681a
SE
133 return ip_csum(iph,ihl*4);
134}
135#endif
136
137struct iphdr {
138#if defined (WORDS_BIGENDIAN)
139 uint8_t version:4,
140 ihl:4;
141#else
142 uint8_t ihl:4,
143 version:4;
144#endif
145 uint8_t tos;
146 uint16_t tot_len;
147 uint16_t id;
148 uint16_t frag_off;
149 uint8_t ttl;
150 uint8_t protocol;
151 uint16_t check;
152 uint32_t saddr;
153 uint32_t daddr;
154 /* The options start here. */
155};
156
157struct icmphdr {
158 struct iphdr iph;
159 uint8_t type;
160 uint8_t code;
161 uint16_t check;
162 union {
163 uint32_t unused;
164 struct {
165 uint8_t pointer;
166 uint8_t unused1;
167 uint16_t unused2;
168 } pprob;
169 uint32_t gwaddr;
170 struct {
171 uint16_t id;
172 uint16_t seq;
173 } echo;
174 } d;
175};
176
177static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf);
178
179static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
180 uint32_t dest,uint16_t len)
181{
182 struct icmphdr *h;
183
184 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
185 buffer_init(&st->icmp,st->max_start_pad);
186 h=buf_append(&st->icmp,sizeof(*h));
187
188 h->iph.version=4;
189 h->iph.ihl=5;
190 h->iph.tos=0;
191 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
192 h->iph.id=0;
193 h->iph.frag_off=0;
194 h->iph.ttl=255;
195 h->iph.protocol=1;
196 h->iph.saddr=htonl(st->secnet_address);
197 h->iph.daddr=htonl(dest);
198 h->iph.check=0;
199 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
200 h->check=0;
201 h->d.unused=0;
202
203 return h;
204}
205
206/* Fill in the ICMP checksum field correctly */
207static void netlink_icmp_csum(struct icmphdr *h)
208{
209 uint32_t len;
210
211 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
212 h->check=0;
213 h->check=ip_csum(&h->type,len);
214}
215
216/* RFC1122:
217 * An ICMP error message MUST NOT be sent as the result of
218 * receiving:
219 *
220 * * an ICMP error message, or
221 *
222 * * a datagram destined to an IP broadcast or IP multicast
223 * address, or
224 *
225 * * a datagram sent as a link-layer broadcast, or
226 *
227 * * a non-initial fragment, or
228 *
229 * * a datagram whose source address does not define a single
230 * host -- e.g., a zero address, a loopback address, a
231 * broadcast address, a multicast address, or a Class E
232 * address.
233 */
234static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
235{
236 struct iphdr *iph;
237 uint32_t source;
238
239 iph=(struct iphdr *)buf->start;
240 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
241 eg. icmp echo-request */
242 /* How do we spot broadcast destination addresses? */
243 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
244 source=ntohl(iph->saddr);
245 if (source==0) return False;
246 if ((source&0xff000000)==0x7f000000) return False;
247 /* How do we spot broadcast source addresses? */
248 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
249 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
250 return True;
251}
252
253/* How much of the original IP packet do we include in its ICMP
254 response? The header plus up to 64 bits. */
255static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
256{
257 struct iphdr *iph=(struct iphdr *)buf->start;
258 uint16_t hlen,plen;
259
260 hlen=iph->ihl*4;
261 /* We include the first 8 bytes of the packet data, provided they exist */
262 hlen+=8;
263 plen=ntohs(iph->tot_len);
264 return (hlen>plen?plen:hlen);
265}
266
267static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
268 uint8_t type, uint8_t code)
269{
270 struct iphdr *iph=(struct iphdr *)buf->start;
271 struct icmphdr *h;
272 uint16_t len;
273
274 if (netlink_icmp_may_reply(buf)) {
275 len=netlink_icmp_reply_len(buf);
276 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
277 h->type=type; h->code=code;
278 memcpy(buf_append(&st->icmp,len),buf->start,len);
279 netlink_icmp_csum(h);
280 netlink_packet_deliver(st,&st->icmp);
281 BUF_ASSERT_FREE(&st->icmp);
282 }
283}
284
285/*
286 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
287 * checksum.
288 *
289 * Is the datagram acceptable?
290 *
291 * 1. Length at least the size of an ip header
292 * 2. Version of 4
293 * 3. Checksums correctly.
294 * 4. Doesn't have a bogus length
295 */
296static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
297{
298 struct iphdr *iph=(struct iphdr *)buf->start;
299 uint32_t len;
300
301 if (iph->ihl < 5 || iph->version != 4) {
302 printf("ihl/version check failed\n");
303 return False;
304 }
305 if (buf->size < iph->ihl*4) {
306 printf("buffer size check failed\n");
307 return False;
308 }
309 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) {
310 printf("checksum failed\n");
311 return False;
312 }
313 len=ntohs(iph->tot_len);
314 /* There should be no padding */
315 if (buf->size!=len || len<(iph->ihl<<2)) {
316 printf("length check failed buf->size=%d len=%d\n",buf->size,len);
317 return False;
318 }
319
320 /* XXX check that there's no source route specified */
321 return True;
322}
323
324static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf)
325{
326 struct iphdr *iph=(struct iphdr *)buf->start;
327 uint32_t dest=ntohl(iph->daddr);
2fe58dfd
SE
328 struct netlink_client *c;
329
4efd681a 330 BUF_ASSERT_USED(buf);
2fe58dfd 331
4efd681a
SE
332 if (dest==st->secnet_address) {
333 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
334 BUF_FREE(buf);
2fe58dfd
SE
335 return;
336 }
4efd681a 337
2fe58dfd
SE
338 for (c=st->clients; c; c=c->next) {
339 if (subnet_match(c->networks,dest)) {
4efd681a
SE
340 if (c->can_deliver) {
341 c->deliver(c->dst,c,buf);
342 BUF_ASSERT_FREE(buf);
343 } else {
344 /* Generate ICMP destination unreachable */
345 netlink_icmp_simple(st,buf,3,0);
346 BUF_FREE(buf);
347 }
2fe58dfd
SE
348 return;
349 }
350 }
4efd681a
SE
351 if (subnet_match(&st->networks,dest)) {
352 st->deliver_to_host(st->dst,NULL,buf);
353 BUF_ASSERT_FREE(buf);
354 return;
355 }
356 Message(M_ERROR,"%s: failed to deliver a packet (bad destination address)"
357 "\nXXX make this message clearer\n");
358 BUF_FREE(buf);
359}
360
361static void netlink_packet_forward(struct netlink *st, struct buffer_if *buf)
362{
363 struct iphdr *iph=(struct iphdr *)buf->start;
364
365 BUF_ASSERT_USED(buf);
366
367 /* Packet has already been checked */
368 if (iph->ttl<=1) {
369 /* Generate ICMP time exceeded */
370 netlink_icmp_simple(st,buf,11,0);
371 BUF_FREE(buf);
372 return;
373 }
374 iph->ttl--;
375 iph->check=0;
376 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
377
378 netlink_packet_deliver(st,buf);
379 BUF_ASSERT_FREE(buf);
380}
381
382/* Someone has been foolish enough to address a packet to us. I
383 suppose we should reply to it, just to be polite. */
384static void netlink_packet_local(struct netlink *st, struct buffer_if *buf)
385{
386 struct icmphdr *h;
387
388 h=(struct icmphdr *)buf->start;
389
390 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
391 Message(M_WARNING,"%s: fragmented packet addressed to us\n",st->name);
392 BUF_FREE(buf);
393 return;
394 }
395
396 if (h->iph.protocol==1) {
397 /* It's ICMP */
398 if (h->type==8 && h->code==0) {
399 /* ICMP echo-request. Special case: we re-use the buffer
400 to construct the reply. */
401 h->type=0;
402 h->iph.daddr=h->iph.saddr;
403 h->iph.saddr=htonl(st->secnet_address);
404 h->iph.ttl=255; /* Be nice and bump it up again... */
405 h->iph.check=0;
406 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
407 netlink_icmp_csum(h);
408 netlink_packet_deliver(st,buf);
409 return;
410 }
411 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
412 } else {
413 /* Send ICMP protocol unreachable */
414 netlink_icmp_simple(st,buf,3,2);
415 BUF_FREE(buf);
416 return;
417 }
418
419 BUF_FREE(buf);
420}
421
422/* Called by site code when remote packet is available */
423/* buf is allocated on entry and free on return */
424static void netlink_from_tunnel(void *sst, void *cst, struct buffer_if *buf)
425{
426 struct netlink *st=sst;
427 struct netlink_client *client=cst;
428 uint32_t source,dest;
429 struct iphdr *iph;
430
431 BUF_ASSERT_USED(buf);
432 if (!netlink_check(st,buf)) {
433 Message(M_WARNING,"%s: bad IP packet from tunnel %s\n",
434 st->name,client->name);
435 BUF_FREE(buf);
436 return;
437 }
438 iph=(struct iphdr *)buf->start;
439
440 source=ntohl(iph->saddr);
441 dest=ntohl(iph->daddr);
442
443 /* Check that the packet source is in 'nets' and its destination is
444 in client->networks */
445 if (!subnet_match(client->networks,source)) {
446 string_t s,d;
447 s=ipaddr_to_string(source);
448 d=ipaddr_to_string(dest);
449 Message(M_WARNING,"%s: packet from tunnel %s with bad source address "
450 "(s=%s,d=%s)\n",st->name,client->name,s,d);
451 free(s); free(d);
452 BUF_FREE(buf);
453 return;
454 }
455 /* (st->secnet_address needs checking before matching against
456 st->networks because secnet's IP address may not be in the
457 range the host is willing to deal with) */
2fe58dfd 458 if (dest==st->secnet_address) {
4efd681a
SE
459 netlink_packet_local(st,buf);
460 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
461 return;
462 }
4efd681a 463 if (!subnet_match(&st->networks,dest)) {
2fe58dfd
SE
464 string_t s,d;
465 s=ipaddr_to_string(source);
466 d=ipaddr_to_string(dest);
4efd681a
SE
467 Message(M_WARNING,"%s: incoming packet from tunnel %s "
468 "with bad destination address "
469 "(s=%s,d=%s)\n",st->name,client->name,s,d);
2fe58dfd 470 free(s); free(d);
4efd681a 471 BUF_FREE(buf);
2fe58dfd
SE
472 return;
473 }
4efd681a
SE
474
475 netlink_packet_forward(st,buf);
476
477 BUF_ASSERT_FREE(buf);
478}
479
480/* Called by driver code when packet is received from kernel */
481/* cid should be NULL */
482/* buf should be allocated on entry, and is free on return */
483static void netlink_from_host(void *sst, void *cid, struct buffer_if *buf)
484{
485 struct netlink *st=sst;
486 uint32_t source,dest;
487 struct iphdr *iph;
488
489 BUF_ASSERT_USED(buf);
490 if (!netlink_check(st,buf)) {
491 Message(M_WARNING,"%s: bad IP packet from host\n",
492 st->name);
493 BUF_FREE(buf);
494 return;
495 }
496 iph=(struct iphdr *)buf->start;
497
498 source=ntohl(iph->saddr);
499 dest=ntohl(iph->daddr);
500
501 if (!subnet_match(&st->networks,source)) {
502 string_t s,d;
503 s=ipaddr_to_string(source);
504 d=ipaddr_to_string(dest);
505 Message(M_WARNING,"%s: outgoing packet with bad source address "
506 "(s=%s,d=%s)\n",st->name,s,d);
507 free(s); free(d);
508 BUF_FREE(buf);
509 return;
510 }
511 if (dest==st->secnet_address) {
512 netlink_packet_local(st,buf);
513 BUF_ASSERT_FREE(buf);
514 return;
515 }
516 netlink_packet_forward(st,buf);
517 BUF_ASSERT_FREE(buf);
518}
519
520static void netlink_set_delivery(void *sst, void *cid, bool_t can_deliver)
521{
522 struct netlink_client *c=cid;
523
524 c->can_deliver=can_deliver;
525}
526
527static void *netlink_regnets(void *sst, struct subnet_list *nets,
528 netlink_deliver_fn *deliver, void *dst,
529 uint32_t max_start_pad, uint32_t max_end_pad,
530 string_t client_name)
531{
532 struct netlink *st=sst;
533 struct netlink_client *c;
534
535 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
536 "max_start_pad=%d, max_end_pad=%d\n",
537 nets->entries,max_start_pad,max_end_pad);
538
539 c=safe_malloc(sizeof(*c),"netlink_regnets");
540 c->networks=nets;
541 c->deliver=deliver;
542 c->dst=dst;
543 c->name=client_name; /* XXX copy it? */
544 c->can_deliver=False;
545 c->next=st->clients;
546 st->clients=c;
547 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
548 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
549
550 return c;
551}
552
553static netlink_deliver_fn *netlink_init(struct netlink *st,
554 void *dst, struct cloc loc,
555 dict_t *dict, string_t description,
556 netlink_deliver_fn *to_host)
557{
558 st->dst=dst;
559 st->cl.description=description;
560 st->cl.type=CL_NETLINK;
561 st->cl.apply=NULL;
562 st->cl.interface=&st->ops;
563 st->ops.st=st;
564 st->ops.regnets=netlink_regnets;
565 st->ops.deliver=netlink_from_tunnel;
566 st->ops.set_delivery=netlink_set_delivery;
567 st->max_start_pad=0;
568 st->max_end_pad=0;
569 st->clients=NULL;
570 st->deliver_to_host=to_host;
571
572 st->name=dict_read_string(dict,"name",False,"netlink",loc);
573 if (!st->name) st->name=description;
574 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
575 &st->networks);
576 st->local_address=string_to_ipaddr(
577 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
578 st->secnet_address=string_to_ipaddr(
579 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
580 if (!subnet_match(&st->networks,st->local_address)) {
581 cfgfatal(loc,"netlink","local-address must be in local networks\n");
582 }
583 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
584 buffer_new(&st->icmp,ICMP_BUFSIZE);
585
586 return netlink_from_host;
587}
588
589/* Connection to the kernel through userv-ipif */
590
591struct userv {
592 struct netlink nl;
593 int txfd; /* We transmit to userv */
594 int rxfd; /* We receive from userv */
595 string_t userv_path;
596 string_t service_user;
597 string_t service_name;
598 uint32_t txbuflen;
599 struct buffer_if *buff; /* We unstuff received packets into here
600 and send them to the site code. */
601 bool_t pending_esc;
602 netlink_deliver_fn *netlink_to_tunnel;
603};
604
605static int userv_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
606 int *timeout_io, const struct timeval *tv_now,
607 uint64_t *now)
608{
609 struct userv *st=sst;
610 *nfds_io=2;
611 fds[0].fd=st->txfd;
612 fds[0].events=POLLERR; /* Might want to pick up POLLOUT sometime */
613 fds[1].fd=st->rxfd;
614 fds[1].events=POLLIN|POLLERR|POLLHUP;
615 return 0;
2fe58dfd
SE
616}
617
618static void userv_afterpoll(void *sst, struct pollfd *fds, int nfds,
619 const struct timeval *tv_now, uint64_t *now)
620{
621 struct userv *st=sst;
622 uint8_t rxbuf[DEFAULT_BUFSIZE];
623 int l,i;
624
625 if (fds[1].revents&POLLERR) {
626 printf("userv_afterpoll: hup!\n");
627 }
628 if (fds[1].revents&POLLIN) {
629 l=read(st->rxfd,rxbuf,DEFAULT_BUFSIZE);
630 if (l<0) {
631 fatal_perror("userv_afterpoll: read(rxfd)");
632 }
633 if (l==0) {
634 fatal("userv_afterpoll: read(rxfd)=0; userv gone away?\n");
635 }
636 /* XXX really crude unstuff code */
637 /* XXX check for buffer overflow */
4efd681a 638 BUF_ASSERT_USED(st->buff);
2fe58dfd
SE
639 for (i=0; i<l; i++) {
640 if (st->pending_esc) {
641 st->pending_esc=False;
642 switch(rxbuf[i]) {
643 case SLIP_ESCEND:
644 *(uint8_t *)buf_append(st->buff,1)=SLIP_END;
645 break;
646 case SLIP_ESCESC:
647 *(uint8_t *)buf_append(st->buff,1)=SLIP_ESC;
648 break;
649 default:
650 fatal("userv_afterpoll: bad SLIP escape character\n");
651 }
652 } else {
653 switch (rxbuf[i]) {
654 case SLIP_END:
4efd681a
SE
655 if (st->buff->size>0) {
656 st->netlink_to_tunnel(&st->nl,NULL,
657 st->buff);
658 BUF_ALLOC(st->buff,"userv_afterpoll");
659 }
660 buffer_init(st->buff,st->nl.max_start_pad);
2fe58dfd
SE
661 break;
662 case SLIP_ESC:
663 st->pending_esc=True;
664 break;
665 default:
666 *(uint8_t *)buf_append(st->buff,1)=rxbuf[i];
667 break;
668 }
669 }
670 }
671 }
4efd681a
SE
672}
673
674/* Send buf to the kernel. Free buf before returning. */
675static void userv_deliver_to_kernel(void *sst, void *cid,
676 struct buffer_if *buf)
677{
678 struct userv *st=sst;
679 uint8_t txbuf[DEFAULT_BUFSIZE];
680 uint8_t *i;
681 uint32_t j;
682
683 BUF_ASSERT_USED(buf);
684
685 /* Spit the packet at userv-ipif: SLIP start marker, then
686 bytestuff the packet, then SLIP end marker */
687 /* XXX crunchy bytestuff code */
688 j=0;
689 txbuf[j++]=SLIP_END;
690 for (i=buf->start; i<(buf->start+buf->size); i++) {
691 switch (*i) {
692 case SLIP_END:
693 txbuf[j++]=SLIP_ESC;
694 txbuf[j++]=SLIP_ESCEND;
695 break;
696 case SLIP_ESC:
697 txbuf[j++]=SLIP_ESC;
698 txbuf[j++]=SLIP_ESCESC;
699 break;
700 default:
701 txbuf[j++]=*i;
702 break;
703 }
704 }
705 txbuf[j++]=SLIP_END;
706 if (write(st->txfd,txbuf,j)<0) {
707 fatal_perror("userv_deliver_to_kernel: write()");
708 }
709 BUF_FREE(buf);
2fe58dfd
SE
710}
711
712static void userv_phase_hook(void *sst, uint32_t newphase)
713{
714 struct userv *st=sst;
715 pid_t child;
716 int c_stdin[2];
717 int c_stdout[2];
718 string_t addrs;
719 string_t nets;
720 string_t s;
721 struct netlink_client *c;
722 int i;
723
724 /* This is where we actually invoke userv - all the networks we'll
725 be using should already have been registered. */
726
727 addrs=safe_malloc(512,"userv_phase_hook:addrs");
4efd681a
SE
728 snprintf(addrs,512,"%s,%s,%d,slip",ipaddr_to_string(st->nl.local_address),
729 ipaddr_to_string(st->nl.secnet_address),st->nl.mtu);
2fe58dfd
SE
730
731 nets=safe_malloc(1024,"userv_phase_hook:nets");
732 *nets=0;
4efd681a 733 for (c=st->nl.clients; c; c=c->next) {
2fe58dfd
SE
734 for (i=0; i<c->networks->entries; i++) {
735 s=subnet_to_string(&c->networks->list[i]);
736 strcat(nets,s);
737 strcat(nets,",");
738 free(s);
739 }
740 }
741 nets[strlen(nets)-1]=0;
742
743 Message(M_INFO,"\nuserv_phase_hook: %s %s %s %s %s\n",st->userv_path,
744 st->service_user,st->service_name,addrs,nets);
745
746 /* Allocate buffer, plus space for padding. Make sure we end up
747 with the start of the packet well-aligned. */
748 /* ALIGN(st->max_start_pad,16); */
749 /* ALIGN(st->max_end_pad,16); */
750
751 st->pending_esc=False;
752
753 /* Invoke userv */
754 if (pipe(c_stdin)!=0) {
755 fatal_perror("userv_phase_hook: pipe(c_stdin)");
756 }
757 if (pipe(c_stdout)!=0) {
758 fatal_perror("userv_phase_hook: pipe(c_stdout)");
759 }
760 st->txfd=c_stdin[1];
761 st->rxfd=c_stdout[0];
762
763 child=fork();
764 if (child==-1) {
765 fatal_perror("userv_phase_hook: fork()");
766 }
767 if (child==0) {
768 char **argv;
769
770 /* We are the child. Modify our stdin and stdout, then exec userv */
771 dup2(c_stdin[0],0);
772 dup2(c_stdout[1],1);
773 close(c_stdin[1]);
774 close(c_stdout[0]);
775
776 /* The arguments are:
777 userv
778 service-user
779 service-name
780 local-addr,secnet-addr,mtu,protocol
781 route1,route2,... */
782 argv=malloc(sizeof(*argv)*6);
783 argv[0]=st->userv_path;
784 argv[1]=st->service_user;
785 argv[2]=st->service_name;
786 argv[3]=addrs;
787 argv[4]=nets;
788 argv[5]=NULL;
789 execvp(st->userv_path,argv);
790 perror("netlink-userv-ipif: execvp");
791
792 exit(1);
793 }
794 /* We are the parent... */
795
796 /* Register for poll() */
4efd681a 797 register_for_poll(st, userv_beforepoll, userv_afterpoll, 2, st->nl.name);
2fe58dfd
SE
798}
799
4efd681a
SE
800static list_t *userv_apply(closure_t *self, struct cloc loc, dict_t *context,
801 list_t *args)
2fe58dfd 802{
4efd681a
SE
803 struct userv *st;
804 item_t *item;
805 dict_t *dict;
2fe58dfd 806
4efd681a 807 st=safe_malloc(sizeof(*st),"userv_apply");
2fe58dfd 808
4efd681a
SE
809 /* First parameter must be a dict */
810 item=list_elem(args,0);
811 if (!item || item->type!=t_dict)
812 cfgfatal(loc,"userv-ipif","parameter must be a dictionary\n");
813
814 dict=item->data.dict;
2fe58dfd 815
4efd681a
SE
816 st->netlink_to_tunnel=
817 netlink_init(&st->nl,st,loc,dict,
818 "netlink-userv-ipif",userv_deliver_to_kernel);
819
820 st->userv_path=dict_read_string(dict,"userv-path",False,"userv-netlink",
821 loc);
822 st->service_user=dict_read_string(dict,"service-user",False,
823 "userv-netlink",loc);
824 st->service_name=dict_read_string(dict,"service-name",False,
825 "userv-netlink",loc);
826 if (!st->userv_path) st->userv_path="userv";
827 if (!st->service_user) st->service_user="root";
828 if (!st->service_name) st->service_name="ipif";
829 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"userv-netlink",loc);
830 BUF_ALLOC(st->buff,"netlink:userv_apply");
831
832 st->rxfd=-1; st->txfd=-1;
833 add_hook(PHASE_DROPPRIV,userv_phase_hook,st);
834
835 return new_closure(&st->nl.cl);
2fe58dfd
SE
836}
837
4efd681a 838/* Connection to the kernel through the universal TUN/TAP driver */
2fe58dfd 839
4efd681a
SE
840struct tun {
841 struct netlink nl;
842 int fd;
843 string_t device_path;
844 string_t interface_name;
845 string_t ifconfig_path;
846 string_t route_path;
847 struct buffer_if *buff; /* We receive packets into here
848 and send them to the netlink code. */
849 netlink_deliver_fn *netlink_to_tunnel;
850};
2fe58dfd 851
4efd681a
SE
852static int tun_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
853 int *timeout_io, const struct timeval *tv_now,
854 uint64_t *now)
855{
856 struct tun *st=sst;
857 *nfds_io=1;
858 fds[0].fd=st->fd;
859 fds[0].events=POLLIN|POLLERR|POLLHUP;
860 return 0;
861}
2fe58dfd 862
4efd681a
SE
863static void tun_afterpoll(void *sst, struct pollfd *fds, int nfds,
864 const struct timeval *tv_now, uint64_t *now)
865{
866 struct tun *st=sst;
867 int l;
868
869 if (fds[0].revents&POLLERR) {
870 printf("tun_afterpoll: hup!\n");
2fe58dfd 871 }
4efd681a
SE
872 if (fds[0].revents&POLLIN) {
873 BUF_ALLOC(st->buff,"tun_afterpoll");
874 buffer_init(st->buff,st->nl.max_start_pad);
875 l=read(st->fd,st->buff->start,st->buff->len-st->nl.max_start_pad);
876 if (l<0) {
877 fatal_perror("tun_afterpoll: read()");
878 }
879 if (l==0) {
880 fatal("tun_afterpoll: read()=0; device gone away?\n");
881 }
882 if (l>0) {
883 st->buff->size=l;
884 st->netlink_to_tunnel(&st->nl,NULL,st->buff);
885 BUF_ASSERT_FREE(st->buff);
886 }
2fe58dfd 887 }
4efd681a 888}
2fe58dfd 889
4efd681a
SE
890static void tun_deliver_to_kernel(void *sst, void *cid,
891 struct buffer_if *buf)
892{
893 struct tun *st=sst;
2fe58dfd 894
4efd681a 895 BUF_ASSERT_USED(buf);
2fe58dfd 896
4efd681a
SE
897 /* No error checking, because we'd just throw the packet away anyway */
898 write(st->fd,buf->start,buf->size);
899 BUF_FREE(buf);
900}
2fe58dfd 901
4efd681a
SE
902static void tun_phase_hook(void *sst, uint32_t newphase)
903{
904 struct tun *st=sst;
905 string_t hostaddr,secnetaddr;
906 uint8_t mtu[6];
907 string_t network,mask;
908 struct netlink_client *c;
909 int i;
910
911 /* All the networks we'll be using have been registered. Invoke ifconfig
912 to set the TUN device's address, and route to add routes to all
913 our networks. */
914
915 hostaddr=ipaddr_to_string(st->nl.local_address);
916 secnetaddr=ipaddr_to_string(st->nl.secnet_address);
917 snprintf(mtu,6,"%d",st->nl.mtu);
918 mtu[5]=0;
919
920 sys_cmd(st->ifconfig_path,"ifconfig",st->interface_name,
921 hostaddr,"netmask","255.255.255.255","-broadcast",
922 "pointopoint",secnetaddr,"mtu",mtu,"up",(char *)0);
923
924 for (c=st->nl.clients; c; c=c->next) {
925 for (i=0; i<c->networks->entries; i++) {
926 network=ipaddr_to_string(c->networks->list[i].prefix);
927 mask=ipaddr_to_string(c->networks->list[i].mask);
928 sys_cmd(st->route_path,"route","add","-net",network,
929 "netmask",mask,"gw",secnetaddr,(char *)0);
2fe58dfd
SE
930 }
931 }
4efd681a
SE
932
933 /* Register for poll() */
934 register_for_poll(st, tun_beforepoll, tun_afterpoll, 1, st->nl.name);
935}
936
937#ifdef HAVE_LINUX_IF_H
938static list_t *tun_apply(closure_t *self, struct cloc loc, dict_t *context,
939 list_t *args)
940{
941 struct tun *st;
942 item_t *item;
943 dict_t *dict;
944 struct ifreq ifr;
945
946 st=safe_malloc(sizeof(*st),"tun_apply");
947
948 /* First parameter must be a dict */
949 item=list_elem(args,0);
950 if (!item || item->type!=t_dict)
951 cfgfatal(loc,"tun","parameter must be a dictionary\n");
952
953 dict=item->data.dict;
954
955 st->netlink_to_tunnel=
956 netlink_init(&st->nl,st,loc,dict,
957 "netlink-tun",tun_deliver_to_kernel);
958
959 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
960 st->interface_name=dict_read_string(dict,"interface",False,
961 "tun-netlink",loc);
59635212
SE
962 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",
963 False,"tun-netlink",loc);
964 st->route_path=dict_read_string(dict,"route-path",
965 False,"tun-netlink",loc);
4efd681a
SE
966
967 if (!st->device_path) st->device_path="/dev/net/tun";
968 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
969 if (!st->route_path) st->route_path="route";
970 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
971
972 /* New TUN interface: open the device, then do ioctl TUNSETIFF
973 to set or find out the network interface name. */
974 st->fd=open(st->device_path,O_RDWR);
975 if (st->fd==-1) {
976 fatal_perror("%s: can't open device file %s",st->nl.name,
977 st->device_path);
978 }
979 memset(&ifr,0,sizeof(ifr));
980 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; /* Just send/receive IP packets,
981 no extra headers */
982 if (st->interface_name)
983 strncpy(ifr.ifr_name,st->interface_name,IFNAMSIZ);
984 if (ioctl(st->fd,TUNSETIFF,&ifr)<0) {
985 fatal_perror("%s: ioctl(TUNSETIFF)",st->nl.name);
986 }
987 if (!st->interface_name) {
988 st->interface_name=safe_malloc(strlen(ifr.ifr_name)+1,"tun_apply");
989 strcpy(st->interface_name,ifr.ifr_name);
990 Message(M_INFO,"%s: allocated network interface %s\n",st->nl.name,
991 st->interface_name);
2fe58dfd
SE
992 }
993
4efd681a
SE
994 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
995
996 return new_closure(&st->nl.cl);
2fe58dfd 997}
4efd681a 998#endif /* HAVE_LINUX_IF_H */
2fe58dfd 999
4efd681a
SE
1000static list_t *tun_old_apply(closure_t *self, struct cloc loc, dict_t *context,
1001 list_t *args)
2fe58dfd 1002{
4efd681a 1003 struct tun *st;
2fe58dfd
SE
1004 item_t *item;
1005 dict_t *dict;
4efd681a 1006 bool_t search_for_if;
2fe58dfd 1007
4efd681a
SE
1008 st=safe_malloc(sizeof(*st),"tun_old_apply");
1009
1010 Message(M_WARNING,"the tun-old code has never been tested. Please report "
1011 "success or failure to steve@greenend.org.uk\n");
2fe58dfd
SE
1012
1013 /* First parameter must be a dict */
1014 item=list_elem(args,0);
1015 if (!item || item->type!=t_dict)
4efd681a 1016 cfgfatal(loc,"tun","parameter must be a dictionary\n");
2fe58dfd
SE
1017
1018 dict=item->data.dict;
4efd681a
SE
1019
1020 st->netlink_to_tunnel=
1021 netlink_init(&st->nl,st,loc,dict,
1022 "netlink-tun",tun_deliver_to_kernel);
1023
1024 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1025 st->interface_name=dict_read_string(dict,"interface",False,
1026 "tun-netlink",loc);
1027 search_for_if=dict_read_bool(dict,"interface-search",False,"tun-netlink",
1028 loc,st->device_path==NULL);
59635212
SE
1029 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",False,
1030 "tun-netlink",loc);
1031 st->route_path=dict_read_string(dict,"route-path",False,"tun-netlink",loc);
4efd681a
SE
1032
1033 if (!st->device_path) st->device_path="/dev/tun";
1034 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1035 if (!st->route_path) st->route_path="route";
1036 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1037
1038 /* Old TUN interface: the network interface name depends on which
1039 /dev/tunX file we open. If 'interface-search' is set to true, treat
1040 'device' as the prefix and try numbers from 0--255. If it's set
1041 to false, treat 'device' as the whole name, and require than an
1042 appropriate interface name be specified. */
1043 if (search_for_if) {
1044 string_t dname;
1045 int i;
1046
1047 if (st->interface_name) {
1048 cfgfatal(loc,"tun-old","you may not specify an interface name "
1049 "in interface-search mode\n");
1050 }
1051 dname=safe_malloc(strlen(st->device_path)+4,"tun_old_apply");
1052 st->interface_name=safe_malloc(8,"tun_old_apply");
1053
1054 for (i=0; i<255; i++) {
1055 sprintf(dname,"%s%d",st->device_path,i);
1056 if ((st->fd=open(dname,O_RDWR))>0) {
1057 sprintf(st->interface_name,"tun%d",i);
1058 Message(M_INFO,"%s: allocated network interface %s "
1059 "through %s\n",st->nl.name,st->interface_name,dname);
59635212 1060 break;
4efd681a
SE
1061 }
1062 }
1063 if (st->fd==-1) {
1064 fatal("%s: unable to open any TUN device (%s...)\n",
1065 st->nl.name,st->device_path);
1066 }
1067 } else {
1068 if (!st->interface_name) {
1069 cfgfatal(loc,"tun-old","you must specify an interface name "
1070 "when you explicitly specify a TUN device file\n");
1071 }
1072 st->fd=open(st->device_path,O_RDWR);
1073 if (st->fd==-1) {
1074 fatal_perror("%s: unable to open TUN device file %s",
1075 st->nl.name,st->device_path);
1076 }
2fe58dfd 1077 }
2fe58dfd 1078
4efd681a 1079 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
2fe58dfd 1080
4efd681a 1081 return new_closure(&st->nl.cl);
2fe58dfd
SE
1082}
1083
4efd681a
SE
1084/* No connection to the kernel at all... */
1085
2fe58dfd 1086struct null {
4efd681a 1087 struct netlink nl;
2fe58dfd
SE
1088};
1089
2fe58dfd
SE
1090static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
1091{
1092 return;
1093}
1094
1095static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1096 list_t *args)
1097{
1098 struct null *st;
4efd681a
SE
1099 item_t *item;
1100 dict_t *dict;
2fe58dfd 1101
4efd681a 1102 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1103
4efd681a
SE
1104 item=list_elem(args,0);
1105 if (!item || item->type!=t_dict)
1106 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1107
1108 dict=item->data.dict;
1109
1110 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_deliver);
1111
1112 return new_closure(&st->nl.cl);
2fe58dfd
SE
1113}
1114
1115init_module netlink_module;
1116void netlink_module(dict_t *dict)
1117{
1118 add_closure(dict,"userv-ipif",userv_apply);
4efd681a
SE
1119#ifdef HAVE_LINUX_IF_H
1120 add_closure(dict,"tun",tun_apply);
1121#endif
1122 add_closure(dict,"tun-old",tun_old_apply);
1123 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1124#if 0
4efd681a 1125 /* TODO */
2fe58dfd
SE
1126 add_closure(dict,"pty-slip",ptyslip_apply);
1127 add_closure(dict,"slipd",slipd_apply);
1128#endif /* 0 */
2fe58dfd 1129}