Import release 0.08
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
4efd681a
SE
3/* We will eventually support a variety of methods for extracting
4 packets from the kernel: userv-ipif, ipif on its own (when we run
5 as root), the kernel TUN driver, SLIP to a pty, an external netlink
6 daemon. There is a performance/security tradeoff. */
2fe58dfd
SE
7
8/* When dealing with SLIP (to a pty, or ipif) we have separate rx, tx
9 and client buffers. When receiving we may read() any amount, not
10 just whole packets. When transmitting we need to bytestuff anyway,
11 and may be part-way through receiving. */
12
13/* Each netlink device is actually a router, with its own IP
4efd681a
SE
14 address. We do things like decreasing the TTL and recalculating the
15 header checksum, generating ICMP, responding to pings, etc. */
2fe58dfd
SE
16
17/* This is where we have the anti-spoofing paranoia - before sending a
18 packet to the kernel we check that the tunnel it came over could
19 reasonably have produced it. */
20
8689b3a9 21#include "secnet.h"
2fe58dfd
SE
22#include <stdio.h>
23#include <string.h>
24#include <unistd.h>
25#include <fcntl.h>
4efd681a 26#include <sys/ioctl.h>
2fe58dfd
SE
27#include "util.h"
28
4efd681a
SE
29#ifdef HAVE_LINUX_IF_H
30#include <linux/if.h>
31#include <linux/if_tun.h>
32#endif
33
34/* XXX where do we find if_tun on other architectures? */
35
2fe58dfd 36#define DEFAULT_BUFSIZE 2048
4efd681a
SE
37#define DEFAULT_MTU 1000
38#define ICMP_BUFSIZE 1024
2fe58dfd
SE
39
40#define SLIP_END 192
41#define SLIP_ESC 219
42#define SLIP_ESCEND 220
43#define SLIP_ESCESC 221
44
45struct netlink_client {
46 struct subnet_list *networks;
47 netlink_deliver_fn *deliver;
48 void *dst;
4efd681a
SE
49 string_t name;
50 bool_t can_deliver;
2fe58dfd
SE
51 struct netlink_client *next;
52};
53
4efd681a
SE
54/* Netlink provides one function to the device driver, to call to deliver
55 a packet from the device. The device driver provides one function to
56 netlink, for it to call to deliver a packet to the device. */
57
58struct netlink {
2fe58dfd
SE
59 closure_t cl;
60 struct netlink_if ops;
4efd681a
SE
61 void *dst; /* Pointer to host interface state */
62 string_t name;
2fe58dfd
SE
63 uint32_t max_start_pad;
64 uint32_t max_end_pad;
2fe58dfd 65 struct subnet_list networks;
4efd681a
SE
66 uint32_t local_address; /* host interface address */
67 uint32_t secnet_address; /* our own address */
2fe58dfd 68 uint32_t mtu;
4efd681a
SE
69 struct netlink_client *clients;
70 netlink_deliver_fn *deliver_to_host; /* Provided by driver */
71 struct buffer_if icmp; /* Buffer for assembly of outgoing ICMP */
2fe58dfd
SE
72};
73
4efd681a
SE
74/* Generic IP checksum routine */
75static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
2fe58dfd 76{
4efd681a
SE
77 register uint32_t sum=0;
78
79 while (count>1) {
80 sum+=ntohs(*(uint16_t *)iph);
81 iph+=2;
82 count-=2;
83 }
84 if(count>0)
85 sum+=*(uint8_t *)iph;
86 while (sum>>16)
87 sum=(sum&0xffff)+(sum>>16);
88 return htons(~sum);
2fe58dfd
SE
89}
90
4efd681a
SE
91#ifdef i386
92/*
93 * This is a version of ip_compute_csum() optimized for IP headers,
94 * which always checksum on 4 octet boundaries.
95 *
96 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
97 * Arnt Gulbrandsen.
98 */
99static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
100 uint32_t sum;
101
102 __asm__ __volatile__("
103 movl (%1), %0
104 subl $4, %2
105 jbe 2f
106 addl 4(%1), %0
107 adcl 8(%1), %0
108 adcl 12(%1), %0
1091: adcl 16(%1), %0
110 lea 4(%1), %1
111 decl %2
112 jne 1b
113 adcl $0, %0
114 movl %0, %2
115 shrl $16, %0
116 addw %w2, %w0
117 adcl $0, %0
118 notl %0
1192:
120 "
121 /* Since the input registers which are loaded with iph and ipl
122 are modified, we must also specify them as outputs, or gcc
123 will assume they contain their original values. */
124 : "=r" (sum), "=r" (iph), "=r" (ihl)
125 : "1" (iph), "2" (ihl));
126 return sum;
127}
128#else
129static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
2fe58dfd 130{
4efd681a
SE
131 return ip_csum(iph,ihl*4);
132}
133#endif
134
135struct iphdr {
136#if defined (WORDS_BIGENDIAN)
137 uint8_t version:4,
138 ihl:4;
139#else
140 uint8_t ihl:4,
141 version:4;
142#endif
143 uint8_t tos;
144 uint16_t tot_len;
145 uint16_t id;
146 uint16_t frag_off;
147 uint8_t ttl;
148 uint8_t protocol;
149 uint16_t check;
150 uint32_t saddr;
151 uint32_t daddr;
152 /* The options start here. */
153};
154
155struct icmphdr {
156 struct iphdr iph;
157 uint8_t type;
158 uint8_t code;
159 uint16_t check;
160 union {
161 uint32_t unused;
162 struct {
163 uint8_t pointer;
164 uint8_t unused1;
165 uint16_t unused2;
166 } pprob;
167 uint32_t gwaddr;
168 struct {
169 uint16_t id;
170 uint16_t seq;
171 } echo;
172 } d;
173};
174
175static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf);
176
177static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
178 uint32_t dest,uint16_t len)
179{
180 struct icmphdr *h;
181
182 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
183 buffer_init(&st->icmp,st->max_start_pad);
184 h=buf_append(&st->icmp,sizeof(*h));
185
186 h->iph.version=4;
187 h->iph.ihl=5;
188 h->iph.tos=0;
189 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
190 h->iph.id=0;
191 h->iph.frag_off=0;
192 h->iph.ttl=255;
193 h->iph.protocol=1;
194 h->iph.saddr=htonl(st->secnet_address);
195 h->iph.daddr=htonl(dest);
196 h->iph.check=0;
197 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
198 h->check=0;
199 h->d.unused=0;
200
201 return h;
202}
203
204/* Fill in the ICMP checksum field correctly */
205static void netlink_icmp_csum(struct icmphdr *h)
206{
207 uint32_t len;
208
209 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
210 h->check=0;
211 h->check=ip_csum(&h->type,len);
212}
213
214/* RFC1122:
215 * An ICMP error message MUST NOT be sent as the result of
216 * receiving:
217 *
218 * * an ICMP error message, or
219 *
220 * * a datagram destined to an IP broadcast or IP multicast
221 * address, or
222 *
223 * * a datagram sent as a link-layer broadcast, or
224 *
225 * * a non-initial fragment, or
226 *
227 * * a datagram whose source address does not define a single
228 * host -- e.g., a zero address, a loopback address, a
229 * broadcast address, a multicast address, or a Class E
230 * address.
231 */
232static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
233{
234 struct iphdr *iph;
235 uint32_t source;
236
237 iph=(struct iphdr *)buf->start;
238 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
239 eg. icmp echo-request */
240 /* How do we spot broadcast destination addresses? */
241 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
242 source=ntohl(iph->saddr);
243 if (source==0) return False;
244 if ((source&0xff000000)==0x7f000000) return False;
245 /* How do we spot broadcast source addresses? */
246 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
247 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
248 return True;
249}
250
251/* How much of the original IP packet do we include in its ICMP
252 response? The header plus up to 64 bits. */
253static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
254{
255 struct iphdr *iph=(struct iphdr *)buf->start;
256 uint16_t hlen,plen;
257
258 hlen=iph->ihl*4;
259 /* We include the first 8 bytes of the packet data, provided they exist */
260 hlen+=8;
261 plen=ntohs(iph->tot_len);
262 return (hlen>plen?plen:hlen);
263}
264
265static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
266 uint8_t type, uint8_t code)
267{
268 struct iphdr *iph=(struct iphdr *)buf->start;
269 struct icmphdr *h;
270 uint16_t len;
271
272 if (netlink_icmp_may_reply(buf)) {
273 len=netlink_icmp_reply_len(buf);
274 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
275 h->type=type; h->code=code;
276 memcpy(buf_append(&st->icmp,len),buf->start,len);
277 netlink_icmp_csum(h);
278 netlink_packet_deliver(st,&st->icmp);
279 BUF_ASSERT_FREE(&st->icmp);
280 }
281}
282
283/*
284 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
285 * checksum.
286 *
287 * Is the datagram acceptable?
288 *
289 * 1. Length at least the size of an ip header
290 * 2. Version of 4
291 * 3. Checksums correctly.
292 * 4. Doesn't have a bogus length
293 */
294static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
295{
296 struct iphdr *iph=(struct iphdr *)buf->start;
297 uint32_t len;
298
299 if (iph->ihl < 5 || iph->version != 4) {
300 printf("ihl/version check failed\n");
301 return False;
302 }
303 if (buf->size < iph->ihl*4) {
304 printf("buffer size check failed\n");
305 return False;
306 }
307 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) {
308 printf("checksum failed\n");
309 return False;
310 }
311 len=ntohs(iph->tot_len);
312 /* There should be no padding */
313 if (buf->size!=len || len<(iph->ihl<<2)) {
314 printf("length check failed buf->size=%d len=%d\n",buf->size,len);
315 return False;
316 }
317
318 /* XXX check that there's no source route specified */
319 return True;
320}
321
322static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf)
323{
324 struct iphdr *iph=(struct iphdr *)buf->start;
325 uint32_t dest=ntohl(iph->daddr);
2fe58dfd
SE
326 struct netlink_client *c;
327
4efd681a 328 BUF_ASSERT_USED(buf);
2fe58dfd 329
4efd681a
SE
330 if (dest==st->secnet_address) {
331 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
332 BUF_FREE(buf);
2fe58dfd
SE
333 return;
334 }
4efd681a 335
2fe58dfd
SE
336 for (c=st->clients; c; c=c->next) {
337 if (subnet_match(c->networks,dest)) {
4efd681a
SE
338 if (c->can_deliver) {
339 c->deliver(c->dst,c,buf);
340 BUF_ASSERT_FREE(buf);
341 } else {
342 /* Generate ICMP destination unreachable */
343 netlink_icmp_simple(st,buf,3,0);
344 BUF_FREE(buf);
345 }
2fe58dfd
SE
346 return;
347 }
348 }
4efd681a
SE
349 if (subnet_match(&st->networks,dest)) {
350 st->deliver_to_host(st->dst,NULL,buf);
351 BUF_ASSERT_FREE(buf);
352 return;
353 }
354 Message(M_ERROR,"%s: failed to deliver a packet (bad destination address)"
355 "\nXXX make this message clearer\n");
356 BUF_FREE(buf);
357}
358
359static void netlink_packet_forward(struct netlink *st, struct buffer_if *buf)
360{
361 struct iphdr *iph=(struct iphdr *)buf->start;
362
363 BUF_ASSERT_USED(buf);
364
365 /* Packet has already been checked */
366 if (iph->ttl<=1) {
367 /* Generate ICMP time exceeded */
368 netlink_icmp_simple(st,buf,11,0);
369 BUF_FREE(buf);
370 return;
371 }
372 iph->ttl--;
373 iph->check=0;
374 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
375
376 netlink_packet_deliver(st,buf);
377 BUF_ASSERT_FREE(buf);
378}
379
380/* Someone has been foolish enough to address a packet to us. I
381 suppose we should reply to it, just to be polite. */
382static void netlink_packet_local(struct netlink *st, struct buffer_if *buf)
383{
384 struct icmphdr *h;
385
386 h=(struct icmphdr *)buf->start;
387
388 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
389 Message(M_WARNING,"%s: fragmented packet addressed to us\n",st->name);
390 BUF_FREE(buf);
391 return;
392 }
393
394 if (h->iph.protocol==1) {
395 /* It's ICMP */
396 if (h->type==8 && h->code==0) {
397 /* ICMP echo-request. Special case: we re-use the buffer
398 to construct the reply. */
399 h->type=0;
400 h->iph.daddr=h->iph.saddr;
401 h->iph.saddr=htonl(st->secnet_address);
402 h->iph.ttl=255; /* Be nice and bump it up again... */
403 h->iph.check=0;
404 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
405 netlink_icmp_csum(h);
406 netlink_packet_deliver(st,buf);
407 return;
408 }
409 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
410 } else {
411 /* Send ICMP protocol unreachable */
412 netlink_icmp_simple(st,buf,3,2);
413 BUF_FREE(buf);
414 return;
415 }
416
417 BUF_FREE(buf);
418}
419
420/* Called by site code when remote packet is available */
421/* buf is allocated on entry and free on return */
422static void netlink_from_tunnel(void *sst, void *cst, struct buffer_if *buf)
423{
424 struct netlink *st=sst;
425 struct netlink_client *client=cst;
426 uint32_t source,dest;
427 struct iphdr *iph;
428
429 BUF_ASSERT_USED(buf);
430 if (!netlink_check(st,buf)) {
431 Message(M_WARNING,"%s: bad IP packet from tunnel %s\n",
432 st->name,client->name);
433 BUF_FREE(buf);
434 return;
435 }
436 iph=(struct iphdr *)buf->start;
437
438 source=ntohl(iph->saddr);
439 dest=ntohl(iph->daddr);
440
441 /* Check that the packet source is in 'nets' and its destination is
442 in client->networks */
443 if (!subnet_match(client->networks,source)) {
444 string_t s,d;
445 s=ipaddr_to_string(source);
446 d=ipaddr_to_string(dest);
447 Message(M_WARNING,"%s: packet from tunnel %s with bad source address "
448 "(s=%s,d=%s)\n",st->name,client->name,s,d);
449 free(s); free(d);
450 BUF_FREE(buf);
451 return;
452 }
453 /* (st->secnet_address needs checking before matching against
454 st->networks because secnet's IP address may not be in the
455 range the host is willing to deal with) */
2fe58dfd 456 if (dest==st->secnet_address) {
4efd681a
SE
457 netlink_packet_local(st,buf);
458 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
459 return;
460 }
4efd681a 461 if (!subnet_match(&st->networks,dest)) {
2fe58dfd
SE
462 string_t s,d;
463 s=ipaddr_to_string(source);
464 d=ipaddr_to_string(dest);
4efd681a
SE
465 Message(M_WARNING,"%s: incoming packet from tunnel %s "
466 "with bad destination address "
467 "(s=%s,d=%s)\n",st->name,client->name,s,d);
2fe58dfd 468 free(s); free(d);
4efd681a 469 BUF_FREE(buf);
2fe58dfd
SE
470 return;
471 }
4efd681a
SE
472
473 netlink_packet_forward(st,buf);
474
475 BUF_ASSERT_FREE(buf);
476}
477
478/* Called by driver code when packet is received from kernel */
479/* cid should be NULL */
480/* buf should be allocated on entry, and is free on return */
481static void netlink_from_host(void *sst, void *cid, struct buffer_if *buf)
482{
483 struct netlink *st=sst;
484 uint32_t source,dest;
485 struct iphdr *iph;
486
487 BUF_ASSERT_USED(buf);
488 if (!netlink_check(st,buf)) {
489 Message(M_WARNING,"%s: bad IP packet from host\n",
490 st->name);
491 BUF_FREE(buf);
492 return;
493 }
494 iph=(struct iphdr *)buf->start;
495
496 source=ntohl(iph->saddr);
497 dest=ntohl(iph->daddr);
498
499 if (!subnet_match(&st->networks,source)) {
500 string_t s,d;
501 s=ipaddr_to_string(source);
502 d=ipaddr_to_string(dest);
503 Message(M_WARNING,"%s: outgoing packet with bad source address "
504 "(s=%s,d=%s)\n",st->name,s,d);
505 free(s); free(d);
506 BUF_FREE(buf);
507 return;
508 }
509 if (dest==st->secnet_address) {
510 netlink_packet_local(st,buf);
511 BUF_ASSERT_FREE(buf);
512 return;
513 }
514 netlink_packet_forward(st,buf);
515 BUF_ASSERT_FREE(buf);
516}
517
518static void netlink_set_delivery(void *sst, void *cid, bool_t can_deliver)
519{
520 struct netlink_client *c=cid;
521
522 c->can_deliver=can_deliver;
523}
524
525static void *netlink_regnets(void *sst, struct subnet_list *nets,
526 netlink_deliver_fn *deliver, void *dst,
527 uint32_t max_start_pad, uint32_t max_end_pad,
528 string_t client_name)
529{
530 struct netlink *st=sst;
531 struct netlink_client *c;
532
533 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
534 "max_start_pad=%d, max_end_pad=%d\n",
535 nets->entries,max_start_pad,max_end_pad);
536
537 c=safe_malloc(sizeof(*c),"netlink_regnets");
538 c->networks=nets;
539 c->deliver=deliver;
540 c->dst=dst;
541 c->name=client_name; /* XXX copy it? */
542 c->can_deliver=False;
543 c->next=st->clients;
544 st->clients=c;
545 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
546 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
547
548 return c;
549}
550
551static netlink_deliver_fn *netlink_init(struct netlink *st,
552 void *dst, struct cloc loc,
553 dict_t *dict, string_t description,
554 netlink_deliver_fn *to_host)
555{
556 st->dst=dst;
557 st->cl.description=description;
558 st->cl.type=CL_NETLINK;
559 st->cl.apply=NULL;
560 st->cl.interface=&st->ops;
561 st->ops.st=st;
562 st->ops.regnets=netlink_regnets;
563 st->ops.deliver=netlink_from_tunnel;
564 st->ops.set_delivery=netlink_set_delivery;
565 st->max_start_pad=0;
566 st->max_end_pad=0;
567 st->clients=NULL;
568 st->deliver_to_host=to_host;
569
570 st->name=dict_read_string(dict,"name",False,"netlink",loc);
571 if (!st->name) st->name=description;
572 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
573 &st->networks);
574 st->local_address=string_to_ipaddr(
575 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
576 st->secnet_address=string_to_ipaddr(
577 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
578 if (!subnet_match(&st->networks,st->local_address)) {
579 cfgfatal(loc,"netlink","local-address must be in local networks\n");
580 }
581 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
582 buffer_new(&st->icmp,ICMP_BUFSIZE);
583
584 return netlink_from_host;
585}
586
587/* Connection to the kernel through userv-ipif */
588
589struct userv {
590 struct netlink nl;
591 int txfd; /* We transmit to userv */
592 int rxfd; /* We receive from userv */
593 string_t userv_path;
594 string_t service_user;
595 string_t service_name;
596 uint32_t txbuflen;
597 struct buffer_if *buff; /* We unstuff received packets into here
598 and send them to the site code. */
599 bool_t pending_esc;
600 netlink_deliver_fn *netlink_to_tunnel;
601};
602
603static int userv_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
604 int *timeout_io, const struct timeval *tv_now,
605 uint64_t *now)
606{
607 struct userv *st=sst;
608 *nfds_io=2;
609 fds[0].fd=st->txfd;
610 fds[0].events=POLLERR; /* Might want to pick up POLLOUT sometime */
611 fds[1].fd=st->rxfd;
612 fds[1].events=POLLIN|POLLERR|POLLHUP;
613 return 0;
2fe58dfd
SE
614}
615
616static void userv_afterpoll(void *sst, struct pollfd *fds, int nfds,
617 const struct timeval *tv_now, uint64_t *now)
618{
619 struct userv *st=sst;
620 uint8_t rxbuf[DEFAULT_BUFSIZE];
621 int l,i;
622
623 if (fds[1].revents&POLLERR) {
624 printf("userv_afterpoll: hup!\n");
625 }
626 if (fds[1].revents&POLLIN) {
627 l=read(st->rxfd,rxbuf,DEFAULT_BUFSIZE);
628 if (l<0) {
629 fatal_perror("userv_afterpoll: read(rxfd)");
630 }
631 if (l==0) {
632 fatal("userv_afterpoll: read(rxfd)=0; userv gone away?\n");
633 }
634 /* XXX really crude unstuff code */
635 /* XXX check for buffer overflow */
4efd681a 636 BUF_ASSERT_USED(st->buff);
2fe58dfd
SE
637 for (i=0; i<l; i++) {
638 if (st->pending_esc) {
639 st->pending_esc=False;
640 switch(rxbuf[i]) {
641 case SLIP_ESCEND:
642 *(uint8_t *)buf_append(st->buff,1)=SLIP_END;
643 break;
644 case SLIP_ESCESC:
645 *(uint8_t *)buf_append(st->buff,1)=SLIP_ESC;
646 break;
647 default:
648 fatal("userv_afterpoll: bad SLIP escape character\n");
649 }
650 } else {
651 switch (rxbuf[i]) {
652 case SLIP_END:
4efd681a
SE
653 if (st->buff->size>0) {
654 st->netlink_to_tunnel(&st->nl,NULL,
655 st->buff);
656 BUF_ALLOC(st->buff,"userv_afterpoll");
657 }
658 buffer_init(st->buff,st->nl.max_start_pad);
2fe58dfd
SE
659 break;
660 case SLIP_ESC:
661 st->pending_esc=True;
662 break;
663 default:
664 *(uint8_t *)buf_append(st->buff,1)=rxbuf[i];
665 break;
666 }
667 }
668 }
669 }
4efd681a
SE
670}
671
672/* Send buf to the kernel. Free buf before returning. */
673static void userv_deliver_to_kernel(void *sst, void *cid,
674 struct buffer_if *buf)
675{
676 struct userv *st=sst;
677 uint8_t txbuf[DEFAULT_BUFSIZE];
678 uint8_t *i;
679 uint32_t j;
680
681 BUF_ASSERT_USED(buf);
682
683 /* Spit the packet at userv-ipif: SLIP start marker, then
684 bytestuff the packet, then SLIP end marker */
685 /* XXX crunchy bytestuff code */
686 j=0;
687 txbuf[j++]=SLIP_END;
688 for (i=buf->start; i<(buf->start+buf->size); i++) {
689 switch (*i) {
690 case SLIP_END:
691 txbuf[j++]=SLIP_ESC;
692 txbuf[j++]=SLIP_ESCEND;
693 break;
694 case SLIP_ESC:
695 txbuf[j++]=SLIP_ESC;
696 txbuf[j++]=SLIP_ESCESC;
697 break;
698 default:
699 txbuf[j++]=*i;
700 break;
701 }
702 }
703 txbuf[j++]=SLIP_END;
704 if (write(st->txfd,txbuf,j)<0) {
705 fatal_perror("userv_deliver_to_kernel: write()");
706 }
707 BUF_FREE(buf);
2fe58dfd
SE
708}
709
710static void userv_phase_hook(void *sst, uint32_t newphase)
711{
712 struct userv *st=sst;
713 pid_t child;
714 int c_stdin[2];
715 int c_stdout[2];
716 string_t addrs;
717 string_t nets;
718 string_t s;
719 struct netlink_client *c;
720 int i;
721
722 /* This is where we actually invoke userv - all the networks we'll
723 be using should already have been registered. */
724
725 addrs=safe_malloc(512,"userv_phase_hook:addrs");
4efd681a
SE
726 snprintf(addrs,512,"%s,%s,%d,slip",ipaddr_to_string(st->nl.local_address),
727 ipaddr_to_string(st->nl.secnet_address),st->nl.mtu);
2fe58dfd
SE
728
729 nets=safe_malloc(1024,"userv_phase_hook:nets");
730 *nets=0;
4efd681a 731 for (c=st->nl.clients; c; c=c->next) {
2fe58dfd
SE
732 for (i=0; i<c->networks->entries; i++) {
733 s=subnet_to_string(&c->networks->list[i]);
734 strcat(nets,s);
735 strcat(nets,",");
736 free(s);
737 }
738 }
739 nets[strlen(nets)-1]=0;
740
741 Message(M_INFO,"\nuserv_phase_hook: %s %s %s %s %s\n",st->userv_path,
742 st->service_user,st->service_name,addrs,nets);
743
744 /* Allocate buffer, plus space for padding. Make sure we end up
745 with the start of the packet well-aligned. */
746 /* ALIGN(st->max_start_pad,16); */
747 /* ALIGN(st->max_end_pad,16); */
748
749 st->pending_esc=False;
750
751 /* Invoke userv */
752 if (pipe(c_stdin)!=0) {
753 fatal_perror("userv_phase_hook: pipe(c_stdin)");
754 }
755 if (pipe(c_stdout)!=0) {
756 fatal_perror("userv_phase_hook: pipe(c_stdout)");
757 }
758 st->txfd=c_stdin[1];
759 st->rxfd=c_stdout[0];
760
761 child=fork();
762 if (child==-1) {
763 fatal_perror("userv_phase_hook: fork()");
764 }
765 if (child==0) {
766 char **argv;
767
768 /* We are the child. Modify our stdin and stdout, then exec userv */
769 dup2(c_stdin[0],0);
770 dup2(c_stdout[1],1);
771 close(c_stdin[1]);
772 close(c_stdout[0]);
773
774 /* The arguments are:
775 userv
776 service-user
777 service-name
778 local-addr,secnet-addr,mtu,protocol
779 route1,route2,... */
780 argv=malloc(sizeof(*argv)*6);
781 argv[0]=st->userv_path;
782 argv[1]=st->service_user;
783 argv[2]=st->service_name;
784 argv[3]=addrs;
785 argv[4]=nets;
786 argv[5]=NULL;
787 execvp(st->userv_path,argv);
788 perror("netlink-userv-ipif: execvp");
789
790 exit(1);
791 }
792 /* We are the parent... */
793
794 /* Register for poll() */
4efd681a 795 register_for_poll(st, userv_beforepoll, userv_afterpoll, 2, st->nl.name);
2fe58dfd
SE
796}
797
4efd681a
SE
798static list_t *userv_apply(closure_t *self, struct cloc loc, dict_t *context,
799 list_t *args)
2fe58dfd 800{
4efd681a
SE
801 struct userv *st;
802 item_t *item;
803 dict_t *dict;
2fe58dfd 804
4efd681a 805 st=safe_malloc(sizeof(*st),"userv_apply");
2fe58dfd 806
4efd681a
SE
807 /* First parameter must be a dict */
808 item=list_elem(args,0);
809 if (!item || item->type!=t_dict)
810 cfgfatal(loc,"userv-ipif","parameter must be a dictionary\n");
811
812 dict=item->data.dict;
2fe58dfd 813
4efd681a
SE
814 st->netlink_to_tunnel=
815 netlink_init(&st->nl,st,loc,dict,
816 "netlink-userv-ipif",userv_deliver_to_kernel);
817
818 st->userv_path=dict_read_string(dict,"userv-path",False,"userv-netlink",
819 loc);
820 st->service_user=dict_read_string(dict,"service-user",False,
821 "userv-netlink",loc);
822 st->service_name=dict_read_string(dict,"service-name",False,
823 "userv-netlink",loc);
824 if (!st->userv_path) st->userv_path="userv";
825 if (!st->service_user) st->service_user="root";
826 if (!st->service_name) st->service_name="ipif";
827 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"userv-netlink",loc);
828 BUF_ALLOC(st->buff,"netlink:userv_apply");
829
830 st->rxfd=-1; st->txfd=-1;
831 add_hook(PHASE_DROPPRIV,userv_phase_hook,st);
832
833 return new_closure(&st->nl.cl);
2fe58dfd
SE
834}
835
4efd681a 836/* Connection to the kernel through the universal TUN/TAP driver */
2fe58dfd 837
4efd681a
SE
838struct tun {
839 struct netlink nl;
840 int fd;
841 string_t device_path;
842 string_t interface_name;
843 string_t ifconfig_path;
844 string_t route_path;
845 struct buffer_if *buff; /* We receive packets into here
846 and send them to the netlink code. */
847 netlink_deliver_fn *netlink_to_tunnel;
848};
2fe58dfd 849
4efd681a
SE
850static int tun_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
851 int *timeout_io, const struct timeval *tv_now,
852 uint64_t *now)
853{
854 struct tun *st=sst;
855 *nfds_io=1;
856 fds[0].fd=st->fd;
857 fds[0].events=POLLIN|POLLERR|POLLHUP;
858 return 0;
859}
2fe58dfd 860
4efd681a
SE
861static void tun_afterpoll(void *sst, struct pollfd *fds, int nfds,
862 const struct timeval *tv_now, uint64_t *now)
863{
864 struct tun *st=sst;
865 int l;
866
867 if (fds[0].revents&POLLERR) {
868 printf("tun_afterpoll: hup!\n");
2fe58dfd 869 }
4efd681a
SE
870 if (fds[0].revents&POLLIN) {
871 BUF_ALLOC(st->buff,"tun_afterpoll");
872 buffer_init(st->buff,st->nl.max_start_pad);
873 l=read(st->fd,st->buff->start,st->buff->len-st->nl.max_start_pad);
874 if (l<0) {
875 fatal_perror("tun_afterpoll: read()");
876 }
877 if (l==0) {
878 fatal("tun_afterpoll: read()=0; device gone away?\n");
879 }
880 if (l>0) {
881 st->buff->size=l;
882 st->netlink_to_tunnel(&st->nl,NULL,st->buff);
883 BUF_ASSERT_FREE(st->buff);
884 }
2fe58dfd 885 }
4efd681a 886}
2fe58dfd 887
4efd681a
SE
888static void tun_deliver_to_kernel(void *sst, void *cid,
889 struct buffer_if *buf)
890{
891 struct tun *st=sst;
2fe58dfd 892
4efd681a 893 BUF_ASSERT_USED(buf);
2fe58dfd 894
4efd681a
SE
895 /* No error checking, because we'd just throw the packet away anyway */
896 write(st->fd,buf->start,buf->size);
897 BUF_FREE(buf);
898}
2fe58dfd 899
4efd681a
SE
900static void tun_phase_hook(void *sst, uint32_t newphase)
901{
902 struct tun *st=sst;
903 string_t hostaddr,secnetaddr;
904 uint8_t mtu[6];
905 string_t network,mask;
906 struct netlink_client *c;
907 int i;
908
909 /* All the networks we'll be using have been registered. Invoke ifconfig
910 to set the TUN device's address, and route to add routes to all
911 our networks. */
912
913 hostaddr=ipaddr_to_string(st->nl.local_address);
914 secnetaddr=ipaddr_to_string(st->nl.secnet_address);
915 snprintf(mtu,6,"%d",st->nl.mtu);
916 mtu[5]=0;
917
918 sys_cmd(st->ifconfig_path,"ifconfig",st->interface_name,
919 hostaddr,"netmask","255.255.255.255","-broadcast",
920 "pointopoint",secnetaddr,"mtu",mtu,"up",(char *)0);
921
922 for (c=st->nl.clients; c; c=c->next) {
923 for (i=0; i<c->networks->entries; i++) {
924 network=ipaddr_to_string(c->networks->list[i].prefix);
925 mask=ipaddr_to_string(c->networks->list[i].mask);
926 sys_cmd(st->route_path,"route","add","-net",network,
927 "netmask",mask,"gw",secnetaddr,(char *)0);
2fe58dfd
SE
928 }
929 }
4efd681a
SE
930
931 /* Register for poll() */
932 register_for_poll(st, tun_beforepoll, tun_afterpoll, 1, st->nl.name);
933}
934
935#ifdef HAVE_LINUX_IF_H
936static list_t *tun_apply(closure_t *self, struct cloc loc, dict_t *context,
937 list_t *args)
938{
939 struct tun *st;
940 item_t *item;
941 dict_t *dict;
942 struct ifreq ifr;
943
944 st=safe_malloc(sizeof(*st),"tun_apply");
945
946 /* First parameter must be a dict */
947 item=list_elem(args,0);
948 if (!item || item->type!=t_dict)
949 cfgfatal(loc,"tun","parameter must be a dictionary\n");
950
951 dict=item->data.dict;
952
953 st->netlink_to_tunnel=
954 netlink_init(&st->nl,st,loc,dict,
955 "netlink-tun",tun_deliver_to_kernel);
956
957 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
958 st->interface_name=dict_read_string(dict,"interface",False,
959 "tun-netlink",loc);
59635212
SE
960 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",
961 False,"tun-netlink",loc);
962 st->route_path=dict_read_string(dict,"route-path",
963 False,"tun-netlink",loc);
4efd681a
SE
964
965 if (!st->device_path) st->device_path="/dev/net/tun";
966 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
967 if (!st->route_path) st->route_path="route";
968 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
969
970 /* New TUN interface: open the device, then do ioctl TUNSETIFF
971 to set or find out the network interface name. */
972 st->fd=open(st->device_path,O_RDWR);
973 if (st->fd==-1) {
974 fatal_perror("%s: can't open device file %s",st->nl.name,
975 st->device_path);
976 }
977 memset(&ifr,0,sizeof(ifr));
978 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; /* Just send/receive IP packets,
979 no extra headers */
980 if (st->interface_name)
981 strncpy(ifr.ifr_name,st->interface_name,IFNAMSIZ);
982 if (ioctl(st->fd,TUNSETIFF,&ifr)<0) {
983 fatal_perror("%s: ioctl(TUNSETIFF)",st->nl.name);
984 }
985 if (!st->interface_name) {
986 st->interface_name=safe_malloc(strlen(ifr.ifr_name)+1,"tun_apply");
987 strcpy(st->interface_name,ifr.ifr_name);
988 Message(M_INFO,"%s: allocated network interface %s\n",st->nl.name,
989 st->interface_name);
2fe58dfd
SE
990 }
991
4efd681a
SE
992 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
993
994 return new_closure(&st->nl.cl);
2fe58dfd 995}
4efd681a 996#endif /* HAVE_LINUX_IF_H */
2fe58dfd 997
4efd681a
SE
998static list_t *tun_old_apply(closure_t *self, struct cloc loc, dict_t *context,
999 list_t *args)
2fe58dfd 1000{
4efd681a 1001 struct tun *st;
2fe58dfd
SE
1002 item_t *item;
1003 dict_t *dict;
4efd681a 1004 bool_t search_for_if;
2fe58dfd 1005
4efd681a
SE
1006 st=safe_malloc(sizeof(*st),"tun_old_apply");
1007
1008 Message(M_WARNING,"the tun-old code has never been tested. Please report "
1009 "success or failure to steve@greenend.org.uk\n");
2fe58dfd
SE
1010
1011 /* First parameter must be a dict */
1012 item=list_elem(args,0);
1013 if (!item || item->type!=t_dict)
4efd681a 1014 cfgfatal(loc,"tun","parameter must be a dictionary\n");
2fe58dfd
SE
1015
1016 dict=item->data.dict;
4efd681a
SE
1017
1018 st->netlink_to_tunnel=
1019 netlink_init(&st->nl,st,loc,dict,
1020 "netlink-tun",tun_deliver_to_kernel);
1021
1022 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1023 st->interface_name=dict_read_string(dict,"interface",False,
1024 "tun-netlink",loc);
1025 search_for_if=dict_read_bool(dict,"interface-search",False,"tun-netlink",
1026 loc,st->device_path==NULL);
59635212
SE
1027 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",False,
1028 "tun-netlink",loc);
1029 st->route_path=dict_read_string(dict,"route-path",False,"tun-netlink",loc);
4efd681a
SE
1030
1031 if (!st->device_path) st->device_path="/dev/tun";
1032 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1033 if (!st->route_path) st->route_path="route";
1034 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1035
1036 /* Old TUN interface: the network interface name depends on which
1037 /dev/tunX file we open. If 'interface-search' is set to true, treat
1038 'device' as the prefix and try numbers from 0--255. If it's set
1039 to false, treat 'device' as the whole name, and require than an
1040 appropriate interface name be specified. */
1041 if (search_for_if) {
1042 string_t dname;
1043 int i;
1044
1045 if (st->interface_name) {
1046 cfgfatal(loc,"tun-old","you may not specify an interface name "
1047 "in interface-search mode\n");
1048 }
1049 dname=safe_malloc(strlen(st->device_path)+4,"tun_old_apply");
1050 st->interface_name=safe_malloc(8,"tun_old_apply");
1051
1052 for (i=0; i<255; i++) {
1053 sprintf(dname,"%s%d",st->device_path,i);
1054 if ((st->fd=open(dname,O_RDWR))>0) {
1055 sprintf(st->interface_name,"tun%d",i);
1056 Message(M_INFO,"%s: allocated network interface %s "
1057 "through %s\n",st->nl.name,st->interface_name,dname);
59635212 1058 break;
4efd681a
SE
1059 }
1060 }
1061 if (st->fd==-1) {
1062 fatal("%s: unable to open any TUN device (%s...)\n",
1063 st->nl.name,st->device_path);
1064 }
1065 } else {
1066 if (!st->interface_name) {
1067 cfgfatal(loc,"tun-old","you must specify an interface name "
1068 "when you explicitly specify a TUN device file\n");
1069 }
1070 st->fd=open(st->device_path,O_RDWR);
1071 if (st->fd==-1) {
1072 fatal_perror("%s: unable to open TUN device file %s",
1073 st->nl.name,st->device_path);
1074 }
2fe58dfd 1075 }
2fe58dfd 1076
4efd681a 1077 add_hook(PHASE_DROPPRIV,tun_phase_hook,st);
2fe58dfd 1078
4efd681a 1079 return new_closure(&st->nl.cl);
2fe58dfd
SE
1080}
1081
4efd681a
SE
1082/* No connection to the kernel at all... */
1083
2fe58dfd 1084struct null {
4efd681a 1085 struct netlink nl;
2fe58dfd
SE
1086};
1087
2fe58dfd
SE
1088static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
1089{
1090 return;
1091}
1092
1093static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1094 list_t *args)
1095{
1096 struct null *st;
4efd681a
SE
1097 item_t *item;
1098 dict_t *dict;
2fe58dfd 1099
4efd681a 1100 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1101
4efd681a
SE
1102 item=list_elem(args,0);
1103 if (!item || item->type!=t_dict)
1104 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1105
1106 dict=item->data.dict;
1107
1108 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_deliver);
1109
1110 return new_closure(&st->nl.cl);
2fe58dfd
SE
1111}
1112
1113init_module netlink_module;
1114void netlink_module(dict_t *dict)
1115{
1116 add_closure(dict,"userv-ipif",userv_apply);
4efd681a
SE
1117#ifdef HAVE_LINUX_IF_H
1118 add_closure(dict,"tun",tun_apply);
1119#endif
1120 add_closure(dict,"tun-old",tun_old_apply);
1121 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1122#if 0
4efd681a 1123 /* TODO */
2fe58dfd
SE
1124 add_closure(dict,"pty-slip",ptyslip_apply);
1125 add_closure(dict,"slipd",slipd_apply);
1126#endif /* 0 */
2fe58dfd 1127}