Import release 0.1.0
[secnet] / netlink.c
CommitLineData
2fe58dfd
SE
1/* User-kernel network link */
2
baa06aeb
SE
3/* We support a variety of methods for extracting packets from the
4 kernel: userv-ipif, ipif on its own (when we run as root), the
5 kernel TUN driver. Possible future methods: SLIP to a pty, an
6 external netlink daemon. There is a performance/security
7 tradeoff. */
2fe58dfd
SE
8
9/* When dealing with SLIP (to a pty, or ipif) we have separate rx, tx
baa06aeb
SE
10 and client buffers. When receiving we may read() any amount, not
11 just whole packets. When transmitting we need to bytestuff anyway,
2fe58dfd
SE
12 and may be part-way through receiving. */
13
baa06aeb
SE
14/* Each netlink device is actually a router, with its own IP address.
15 We do things like decreasing the TTL and recalculating the header
16 checksum, generating ICMP, responding to pings, etc. */
2fe58dfd
SE
17
18/* This is where we have the anti-spoofing paranoia - before sending a
19 packet to the kernel we check that the tunnel it came over could
20 reasonably have produced it. */
21
8689b3a9 22#include "secnet.h"
2fe58dfd
SE
23#include <stdio.h>
24#include <string.h>
25#include <unistd.h>
26#include <fcntl.h>
4efd681a 27#include <sys/ioctl.h>
2fe58dfd
SE
28#include "util.h"
29
4efd681a
SE
30#ifdef HAVE_LINUX_IF_H
31#include <linux/if.h>
32#include <linux/if_tun.h>
33#endif
34
baa06aeb 35/* XXX where do we find if_tun on other platforms? */
4efd681a 36
2fe58dfd 37#define DEFAULT_BUFSIZE 2048
4efd681a
SE
38#define DEFAULT_MTU 1000
39#define ICMP_BUFSIZE 1024
2fe58dfd
SE
40
41#define SLIP_END 192
42#define SLIP_ESC 219
43#define SLIP_ESCEND 220
44#define SLIP_ESCESC 221
45
46struct netlink_client {
47 struct subnet_list *networks;
48 netlink_deliver_fn *deliver;
49 void *dst;
4efd681a
SE
50 string_t name;
51 bool_t can_deliver;
2fe58dfd
SE
52 struct netlink_client *next;
53};
54
4efd681a
SE
55/* Netlink provides one function to the device driver, to call to deliver
56 a packet from the device. The device driver provides one function to
57 netlink, for it to call to deliver a packet to the device. */
58
59struct netlink {
2fe58dfd
SE
60 closure_t cl;
61 struct netlink_if ops;
4efd681a
SE
62 void *dst; /* Pointer to host interface state */
63 string_t name;
2fe58dfd
SE
64 uint32_t max_start_pad;
65 uint32_t max_end_pad;
2fe58dfd 66 struct subnet_list networks;
baa06aeb 67 struct subnet_list exclude_remote_networks;
4efd681a
SE
68 uint32_t local_address; /* host interface address */
69 uint32_t secnet_address; /* our own address */
2fe58dfd 70 uint32_t mtu;
4efd681a
SE
71 struct netlink_client *clients;
72 netlink_deliver_fn *deliver_to_host; /* Provided by driver */
73 struct buffer_if icmp; /* Buffer for assembly of outgoing ICMP */
2fe58dfd
SE
74};
75
4efd681a
SE
76/* Generic IP checksum routine */
77static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
2fe58dfd 78{
4efd681a
SE
79 register uint32_t sum=0;
80
81 while (count>1) {
82 sum+=ntohs(*(uint16_t *)iph);
83 iph+=2;
84 count-=2;
85 }
86 if(count>0)
87 sum+=*(uint8_t *)iph;
88 while (sum>>16)
89 sum=(sum&0xffff)+(sum>>16);
90 return htons(~sum);
2fe58dfd
SE
91}
92
4efd681a
SE
93#ifdef i386
94/*
95 * This is a version of ip_compute_csum() optimized for IP headers,
96 * which always checksum on 4 octet boundaries.
97 *
98 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
99 * Arnt Gulbrandsen.
100 */
101static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
102 uint32_t sum;
103
104 __asm__ __volatile__("
105 movl (%1), %0
106 subl $4, %2
107 jbe 2f
108 addl 4(%1), %0
109 adcl 8(%1), %0
110 adcl 12(%1), %0
1111: adcl 16(%1), %0
112 lea 4(%1), %1
113 decl %2
114 jne 1b
115 adcl $0, %0
116 movl %0, %2
117 shrl $16, %0
118 addw %w2, %w0
119 adcl $0, %0
120 notl %0
1212:
122 "
123 /* Since the input registers which are loaded with iph and ipl
124 are modified, we must also specify them as outputs, or gcc
125 will assume they contain their original values. */
126 : "=r" (sum), "=r" (iph), "=r" (ihl)
127 : "1" (iph), "2" (ihl));
128 return sum;
129}
130#else
131static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
2fe58dfd 132{
4efd681a
SE
133 return ip_csum(iph,ihl*4);
134}
135#endif
136
137struct iphdr {
138#if defined (WORDS_BIGENDIAN)
139 uint8_t version:4,
140 ihl:4;
141#else
142 uint8_t ihl:4,
143 version:4;
144#endif
145 uint8_t tos;
146 uint16_t tot_len;
147 uint16_t id;
148 uint16_t frag_off;
149 uint8_t ttl;
150 uint8_t protocol;
151 uint16_t check;
152 uint32_t saddr;
153 uint32_t daddr;
154 /* The options start here. */
155};
156
157struct icmphdr {
158 struct iphdr iph;
159 uint8_t type;
160 uint8_t code;
161 uint16_t check;
162 union {
163 uint32_t unused;
164 struct {
165 uint8_t pointer;
166 uint8_t unused1;
167 uint16_t unused2;
168 } pprob;
169 uint32_t gwaddr;
170 struct {
171 uint16_t id;
172 uint16_t seq;
173 } echo;
174 } d;
175};
176
177static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf);
178
179static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
180 uint32_t dest,uint16_t len)
181{
182 struct icmphdr *h;
183
184 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
185 buffer_init(&st->icmp,st->max_start_pad);
186 h=buf_append(&st->icmp,sizeof(*h));
187
188 h->iph.version=4;
189 h->iph.ihl=5;
190 h->iph.tos=0;
191 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
192 h->iph.id=0;
193 h->iph.frag_off=0;
194 h->iph.ttl=255;
195 h->iph.protocol=1;
196 h->iph.saddr=htonl(st->secnet_address);
197 h->iph.daddr=htonl(dest);
198 h->iph.check=0;
199 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
200 h->check=0;
201 h->d.unused=0;
202
203 return h;
204}
205
206/* Fill in the ICMP checksum field correctly */
207static void netlink_icmp_csum(struct icmphdr *h)
208{
209 uint32_t len;
210
211 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
212 h->check=0;
213 h->check=ip_csum(&h->type,len);
214}
215
216/* RFC1122:
217 * An ICMP error message MUST NOT be sent as the result of
218 * receiving:
219 *
220 * * an ICMP error message, or
221 *
222 * * a datagram destined to an IP broadcast or IP multicast
223 * address, or
224 *
225 * * a datagram sent as a link-layer broadcast, or
226 *
227 * * a non-initial fragment, or
228 *
229 * * a datagram whose source address does not define a single
230 * host -- e.g., a zero address, a loopback address, a
231 * broadcast address, a multicast address, or a Class E
232 * address.
233 */
234static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
235{
236 struct iphdr *iph;
237 uint32_t source;
238
239 iph=(struct iphdr *)buf->start;
240 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
241 eg. icmp echo-request */
242 /* How do we spot broadcast destination addresses? */
243 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
244 source=ntohl(iph->saddr);
245 if (source==0) return False;
246 if ((source&0xff000000)==0x7f000000) return False;
247 /* How do we spot broadcast source addresses? */
248 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
249 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
250 return True;
251}
252
253/* How much of the original IP packet do we include in its ICMP
254 response? The header plus up to 64 bits. */
255static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
256{
257 struct iphdr *iph=(struct iphdr *)buf->start;
258 uint16_t hlen,plen;
259
260 hlen=iph->ihl*4;
261 /* We include the first 8 bytes of the packet data, provided they exist */
262 hlen+=8;
263 plen=ntohs(iph->tot_len);
264 return (hlen>plen?plen:hlen);
265}
266
267static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
268 uint8_t type, uint8_t code)
269{
270 struct iphdr *iph=(struct iphdr *)buf->start;
271 struct icmphdr *h;
272 uint16_t len;
273
274 if (netlink_icmp_may_reply(buf)) {
275 len=netlink_icmp_reply_len(buf);
276 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
277 h->type=type; h->code=code;
278 memcpy(buf_append(&st->icmp,len),buf->start,len);
279 netlink_icmp_csum(h);
280 netlink_packet_deliver(st,&st->icmp);
281 BUF_ASSERT_FREE(&st->icmp);
282 }
283}
284
285/*
286 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
287 * checksum.
288 *
289 * Is the datagram acceptable?
290 *
291 * 1. Length at least the size of an ip header
292 * 2. Version of 4
293 * 3. Checksums correctly.
294 * 4. Doesn't have a bogus length
295 */
296static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
297{
298 struct iphdr *iph=(struct iphdr *)buf->start;
299 uint32_t len;
300
301 if (iph->ihl < 5 || iph->version != 4) {
302 printf("ihl/version check failed\n");
303 return False;
304 }
305 if (buf->size < iph->ihl*4) {
306 printf("buffer size check failed\n");
307 return False;
308 }
309 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) {
310 printf("checksum failed\n");
311 return False;
312 }
313 len=ntohs(iph->tot_len);
314 /* There should be no padding */
315 if (buf->size!=len || len<(iph->ihl<<2)) {
316 printf("length check failed buf->size=%d len=%d\n",buf->size,len);
317 return False;
318 }
319
320 /* XXX check that there's no source route specified */
321 return True;
322}
323
324static void netlink_packet_deliver(struct netlink *st, struct buffer_if *buf)
325{
326 struct iphdr *iph=(struct iphdr *)buf->start;
327 uint32_t dest=ntohl(iph->daddr);
2fe58dfd
SE
328 struct netlink_client *c;
329
4efd681a 330 BUF_ASSERT_USED(buf);
2fe58dfd 331
4efd681a
SE
332 if (dest==st->secnet_address) {
333 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
334 BUF_FREE(buf);
2fe58dfd
SE
335 return;
336 }
4efd681a 337
2fe58dfd
SE
338 for (c=st->clients; c; c=c->next) {
339 if (subnet_match(c->networks,dest)) {
4efd681a
SE
340 if (c->can_deliver) {
341 c->deliver(c->dst,c,buf);
342 BUF_ASSERT_FREE(buf);
343 } else {
344 /* Generate ICMP destination unreachable */
345 netlink_icmp_simple(st,buf,3,0);
346 BUF_FREE(buf);
347 }
2fe58dfd
SE
348 return;
349 }
350 }
4efd681a
SE
351 if (subnet_match(&st->networks,dest)) {
352 st->deliver_to_host(st->dst,NULL,buf);
353 BUF_ASSERT_FREE(buf);
354 return;
355 }
356 Message(M_ERROR,"%s: failed to deliver a packet (bad destination address)"
357 "\nXXX make this message clearer\n");
358 BUF_FREE(buf);
359}
360
361static void netlink_packet_forward(struct netlink *st, struct buffer_if *buf)
362{
363 struct iphdr *iph=(struct iphdr *)buf->start;
364
365 BUF_ASSERT_USED(buf);
366
367 /* Packet has already been checked */
368 if (iph->ttl<=1) {
369 /* Generate ICMP time exceeded */
370 netlink_icmp_simple(st,buf,11,0);
371 BUF_FREE(buf);
372 return;
373 }
374 iph->ttl--;
375 iph->check=0;
376 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
377
378 netlink_packet_deliver(st,buf);
379 BUF_ASSERT_FREE(buf);
380}
381
382/* Someone has been foolish enough to address a packet to us. I
383 suppose we should reply to it, just to be polite. */
384static void netlink_packet_local(struct netlink *st, struct buffer_if *buf)
385{
386 struct icmphdr *h;
387
388 h=(struct icmphdr *)buf->start;
389
390 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
391 Message(M_WARNING,"%s: fragmented packet addressed to us\n",st->name);
392 BUF_FREE(buf);
393 return;
394 }
395
396 if (h->iph.protocol==1) {
397 /* It's ICMP */
398 if (h->type==8 && h->code==0) {
399 /* ICMP echo-request. Special case: we re-use the buffer
400 to construct the reply. */
401 h->type=0;
402 h->iph.daddr=h->iph.saddr;
403 h->iph.saddr=htonl(st->secnet_address);
404 h->iph.ttl=255; /* Be nice and bump it up again... */
405 h->iph.check=0;
406 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
407 netlink_icmp_csum(h);
408 netlink_packet_deliver(st,buf);
409 return;
410 }
411 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
412 } else {
413 /* Send ICMP protocol unreachable */
414 netlink_icmp_simple(st,buf,3,2);
415 BUF_FREE(buf);
416 return;
417 }
418
419 BUF_FREE(buf);
420}
421
422/* Called by site code when remote packet is available */
423/* buf is allocated on entry and free on return */
424static void netlink_from_tunnel(void *sst, void *cst, struct buffer_if *buf)
425{
426 struct netlink *st=sst;
427 struct netlink_client *client=cst;
428 uint32_t source,dest;
429 struct iphdr *iph;
430
431 BUF_ASSERT_USED(buf);
432 if (!netlink_check(st,buf)) {
433 Message(M_WARNING,"%s: bad IP packet from tunnel %s\n",
434 st->name,client->name);
435 BUF_FREE(buf);
436 return;
437 }
438 iph=(struct iphdr *)buf->start;
439
440 source=ntohl(iph->saddr);
441 dest=ntohl(iph->daddr);
442
443 /* Check that the packet source is in 'nets' and its destination is
444 in client->networks */
445 if (!subnet_match(client->networks,source)) {
446 string_t s,d;
447 s=ipaddr_to_string(source);
448 d=ipaddr_to_string(dest);
449 Message(M_WARNING,"%s: packet from tunnel %s with bad source address "
450 "(s=%s,d=%s)\n",st->name,client->name,s,d);
451 free(s); free(d);
452 BUF_FREE(buf);
453 return;
454 }
455 /* (st->secnet_address needs checking before matching against
456 st->networks because secnet's IP address may not be in the
457 range the host is willing to deal with) */
2fe58dfd 458 if (dest==st->secnet_address) {
4efd681a
SE
459 netlink_packet_local(st,buf);
460 BUF_ASSERT_FREE(buf);
2fe58dfd
SE
461 return;
462 }
4efd681a 463 if (!subnet_match(&st->networks,dest)) {
2fe58dfd
SE
464 string_t s,d;
465 s=ipaddr_to_string(source);
466 d=ipaddr_to_string(dest);
4efd681a
SE
467 Message(M_WARNING,"%s: incoming packet from tunnel %s "
468 "with bad destination address "
469 "(s=%s,d=%s)\n",st->name,client->name,s,d);
2fe58dfd 470 free(s); free(d);
4efd681a 471 BUF_FREE(buf);
2fe58dfd
SE
472 return;
473 }
4efd681a
SE
474
475 netlink_packet_forward(st,buf);
476
477 BUF_ASSERT_FREE(buf);
478}
479
480/* Called by driver code when packet is received from kernel */
481/* cid should be NULL */
482/* buf should be allocated on entry, and is free on return */
483static void netlink_from_host(void *sst, void *cid, struct buffer_if *buf)
484{
485 struct netlink *st=sst;
486 uint32_t source,dest;
487 struct iphdr *iph;
488
489 BUF_ASSERT_USED(buf);
490 if (!netlink_check(st,buf)) {
491 Message(M_WARNING,"%s: bad IP packet from host\n",
492 st->name);
493 BUF_FREE(buf);
494 return;
495 }
496 iph=(struct iphdr *)buf->start;
497
498 source=ntohl(iph->saddr);
499 dest=ntohl(iph->daddr);
500
501 if (!subnet_match(&st->networks,source)) {
502 string_t s,d;
503 s=ipaddr_to_string(source);
504 d=ipaddr_to_string(dest);
505 Message(M_WARNING,"%s: outgoing packet with bad source address "
506 "(s=%s,d=%s)\n",st->name,s,d);
507 free(s); free(d);
508 BUF_FREE(buf);
509 return;
510 }
511 if (dest==st->secnet_address) {
512 netlink_packet_local(st,buf);
513 BUF_ASSERT_FREE(buf);
514 return;
515 }
516 netlink_packet_forward(st,buf);
517 BUF_ASSERT_FREE(buf);
518}
519
520static void netlink_set_delivery(void *sst, void *cid, bool_t can_deliver)
521{
522 struct netlink_client *c=cid;
523
524 c->can_deliver=can_deliver;
525}
526
527static void *netlink_regnets(void *sst, struct subnet_list *nets,
528 netlink_deliver_fn *deliver, void *dst,
529 uint32_t max_start_pad, uint32_t max_end_pad,
530 string_t client_name)
531{
532 struct netlink *st=sst;
533 struct netlink_client *c;
534
535 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
536 "max_start_pad=%d, max_end_pad=%d\n",
537 nets->entries,max_start_pad,max_end_pad);
538
baa06aeb
SE
539 /* Check that nets does not intersect with st->networks or
540 st->exclude_remote_networks; refuse to register if it does. */
541 if (subnet_lists_intersect(&st->networks,nets)) {
542 Message(M_ERROR,"%s: site %s specifies networks that "
543 "intersect with our local networks\n",st->name,client_name);
544 return False;
545 }
546 if (subnet_lists_intersect(&st->exclude_remote_networks,nets)) {
547 Message(M_ERROR,"%s: site %s specifies networks that "
548 "intersect with the explicitly excluded remote networks\n",
549 st->name,client_name);
550 return False;
551 }
552
4efd681a
SE
553 c=safe_malloc(sizeof(*c),"netlink_regnets");
554 c->networks=nets;
555 c->deliver=deliver;
556 c->dst=dst;
557 c->name=client_name; /* XXX copy it? */
558 c->can_deliver=False;
559 c->next=st->clients;
560 st->clients=c;
561 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
562 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
563
564 return c;
565}
566
567static netlink_deliver_fn *netlink_init(struct netlink *st,
568 void *dst, struct cloc loc,
569 dict_t *dict, string_t description,
570 netlink_deliver_fn *to_host)
571{
572 st->dst=dst;
573 st->cl.description=description;
574 st->cl.type=CL_NETLINK;
575 st->cl.apply=NULL;
576 st->cl.interface=&st->ops;
577 st->ops.st=st;
578 st->ops.regnets=netlink_regnets;
579 st->ops.deliver=netlink_from_tunnel;
580 st->ops.set_delivery=netlink_set_delivery;
581 st->max_start_pad=0;
582 st->max_end_pad=0;
583 st->clients=NULL;
584 st->deliver_to_host=to_host;
585
586 st->name=dict_read_string(dict,"name",False,"netlink",loc);
587 if (!st->name) st->name=description;
588 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
589 &st->networks);
baa06aeb
SE
590 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
591 loc, &st->exclude_remote_networks);
592 /* local-address and secnet-address do not have to be in local-networks;
593 however, they should be advertised in the 'sites' file for the
594 local site. */
4efd681a
SE
595 st->local_address=string_to_ipaddr(
596 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
597 st->secnet_address=string_to_ipaddr(
598 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
4efd681a
SE
599 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
600 buffer_new(&st->icmp,ICMP_BUFSIZE);
601
602 return netlink_from_host;
603}
604
605/* Connection to the kernel through userv-ipif */
606
607struct userv {
608 struct netlink nl;
609 int txfd; /* We transmit to userv */
610 int rxfd; /* We receive from userv */
611 string_t userv_path;
612 string_t service_user;
613 string_t service_name;
614 uint32_t txbuflen;
615 struct buffer_if *buff; /* We unstuff received packets into here
616 and send them to the site code. */
617 bool_t pending_esc;
618 netlink_deliver_fn *netlink_to_tunnel;
619};
620
621static int userv_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
622 int *timeout_io, const struct timeval *tv_now,
623 uint64_t *now)
624{
625 struct userv *st=sst;
626 *nfds_io=2;
627 fds[0].fd=st->txfd;
628 fds[0].events=POLLERR; /* Might want to pick up POLLOUT sometime */
629 fds[1].fd=st->rxfd;
630 fds[1].events=POLLIN|POLLERR|POLLHUP;
631 return 0;
2fe58dfd
SE
632}
633
634static void userv_afterpoll(void *sst, struct pollfd *fds, int nfds,
635 const struct timeval *tv_now, uint64_t *now)
636{
637 struct userv *st=sst;
638 uint8_t rxbuf[DEFAULT_BUFSIZE];
639 int l,i;
640
641 if (fds[1].revents&POLLERR) {
642 printf("userv_afterpoll: hup!\n");
643 }
644 if (fds[1].revents&POLLIN) {
645 l=read(st->rxfd,rxbuf,DEFAULT_BUFSIZE);
646 if (l<0) {
647 fatal_perror("userv_afterpoll: read(rxfd)");
648 }
649 if (l==0) {
650 fatal("userv_afterpoll: read(rxfd)=0; userv gone away?\n");
651 }
652 /* XXX really crude unstuff code */
653 /* XXX check for buffer overflow */
4efd681a 654 BUF_ASSERT_USED(st->buff);
2fe58dfd
SE
655 for (i=0; i<l; i++) {
656 if (st->pending_esc) {
657 st->pending_esc=False;
658 switch(rxbuf[i]) {
659 case SLIP_ESCEND:
660 *(uint8_t *)buf_append(st->buff,1)=SLIP_END;
661 break;
662 case SLIP_ESCESC:
663 *(uint8_t *)buf_append(st->buff,1)=SLIP_ESC;
664 break;
665 default:
666 fatal("userv_afterpoll: bad SLIP escape character\n");
667 }
668 } else {
669 switch (rxbuf[i]) {
670 case SLIP_END:
4efd681a
SE
671 if (st->buff->size>0) {
672 st->netlink_to_tunnel(&st->nl,NULL,
673 st->buff);
674 BUF_ALLOC(st->buff,"userv_afterpoll");
675 }
676 buffer_init(st->buff,st->nl.max_start_pad);
2fe58dfd
SE
677 break;
678 case SLIP_ESC:
679 st->pending_esc=True;
680 break;
681 default:
682 *(uint8_t *)buf_append(st->buff,1)=rxbuf[i];
683 break;
684 }
685 }
686 }
687 }
4efd681a
SE
688}
689
690/* Send buf to the kernel. Free buf before returning. */
691static void userv_deliver_to_kernel(void *sst, void *cid,
692 struct buffer_if *buf)
693{
694 struct userv *st=sst;
695 uint8_t txbuf[DEFAULT_BUFSIZE];
696 uint8_t *i;
697 uint32_t j;
698
699 BUF_ASSERT_USED(buf);
700
701 /* Spit the packet at userv-ipif: SLIP start marker, then
702 bytestuff the packet, then SLIP end marker */
703 /* XXX crunchy bytestuff code */
704 j=0;
705 txbuf[j++]=SLIP_END;
706 for (i=buf->start; i<(buf->start+buf->size); i++) {
707 switch (*i) {
708 case SLIP_END:
709 txbuf[j++]=SLIP_ESC;
710 txbuf[j++]=SLIP_ESCEND;
711 break;
712 case SLIP_ESC:
713 txbuf[j++]=SLIP_ESC;
714 txbuf[j++]=SLIP_ESCESC;
715 break;
716 default:
717 txbuf[j++]=*i;
718 break;
719 }
720 }
721 txbuf[j++]=SLIP_END;
722 if (write(st->txfd,txbuf,j)<0) {
723 fatal_perror("userv_deliver_to_kernel: write()");
724 }
725 BUF_FREE(buf);
2fe58dfd
SE
726}
727
728static void userv_phase_hook(void *sst, uint32_t newphase)
729{
730 struct userv *st=sst;
731 pid_t child;
732 int c_stdin[2];
733 int c_stdout[2];
734 string_t addrs;
735 string_t nets;
736 string_t s;
737 struct netlink_client *c;
738 int i;
739
740 /* This is where we actually invoke userv - all the networks we'll
741 be using should already have been registered. */
742
743 addrs=safe_malloc(512,"userv_phase_hook:addrs");
4efd681a
SE
744 snprintf(addrs,512,"%s,%s,%d,slip",ipaddr_to_string(st->nl.local_address),
745 ipaddr_to_string(st->nl.secnet_address),st->nl.mtu);
2fe58dfd
SE
746
747 nets=safe_malloc(1024,"userv_phase_hook:nets");
748 *nets=0;
4efd681a 749 for (c=st->nl.clients; c; c=c->next) {
2fe58dfd
SE
750 for (i=0; i<c->networks->entries; i++) {
751 s=subnet_to_string(&c->networks->list[i]);
752 strcat(nets,s);
753 strcat(nets,",");
754 free(s);
755 }
756 }
757 nets[strlen(nets)-1]=0;
758
759 Message(M_INFO,"\nuserv_phase_hook: %s %s %s %s %s\n",st->userv_path,
760 st->service_user,st->service_name,addrs,nets);
761
762 /* Allocate buffer, plus space for padding. Make sure we end up
763 with the start of the packet well-aligned. */
764 /* ALIGN(st->max_start_pad,16); */
765 /* ALIGN(st->max_end_pad,16); */
766
767 st->pending_esc=False;
768
769 /* Invoke userv */
770 if (pipe(c_stdin)!=0) {
771 fatal_perror("userv_phase_hook: pipe(c_stdin)");
772 }
773 if (pipe(c_stdout)!=0) {
774 fatal_perror("userv_phase_hook: pipe(c_stdout)");
775 }
776 st->txfd=c_stdin[1];
777 st->rxfd=c_stdout[0];
778
779 child=fork();
780 if (child==-1) {
781 fatal_perror("userv_phase_hook: fork()");
782 }
783 if (child==0) {
784 char **argv;
785
786 /* We are the child. Modify our stdin and stdout, then exec userv */
787 dup2(c_stdin[0],0);
788 dup2(c_stdout[1],1);
789 close(c_stdin[1]);
790 close(c_stdout[0]);
791
792 /* The arguments are:
793 userv
794 service-user
795 service-name
796 local-addr,secnet-addr,mtu,protocol
797 route1,route2,... */
798 argv=malloc(sizeof(*argv)*6);
799 argv[0]=st->userv_path;
800 argv[1]=st->service_user;
801 argv[2]=st->service_name;
802 argv[3]=addrs;
803 argv[4]=nets;
804 argv[5]=NULL;
805 execvp(st->userv_path,argv);
806 perror("netlink-userv-ipif: execvp");
807
808 exit(1);
809 }
810 /* We are the parent... */
811
812 /* Register for poll() */
4efd681a 813 register_for_poll(st, userv_beforepoll, userv_afterpoll, 2, st->nl.name);
2fe58dfd
SE
814}
815
4efd681a
SE
816static list_t *userv_apply(closure_t *self, struct cloc loc, dict_t *context,
817 list_t *args)
2fe58dfd 818{
4efd681a
SE
819 struct userv *st;
820 item_t *item;
821 dict_t *dict;
2fe58dfd 822
4efd681a 823 st=safe_malloc(sizeof(*st),"userv_apply");
2fe58dfd 824
4efd681a
SE
825 /* First parameter must be a dict */
826 item=list_elem(args,0);
827 if (!item || item->type!=t_dict)
828 cfgfatal(loc,"userv-ipif","parameter must be a dictionary\n");
829
830 dict=item->data.dict;
2fe58dfd 831
4efd681a
SE
832 st->netlink_to_tunnel=
833 netlink_init(&st->nl,st,loc,dict,
834 "netlink-userv-ipif",userv_deliver_to_kernel);
835
836 st->userv_path=dict_read_string(dict,"userv-path",False,"userv-netlink",
837 loc);
838 st->service_user=dict_read_string(dict,"service-user",False,
839 "userv-netlink",loc);
840 st->service_name=dict_read_string(dict,"service-name",False,
841 "userv-netlink",loc);
842 if (!st->userv_path) st->userv_path="userv";
843 if (!st->service_user) st->service_user="root";
844 if (!st->service_name) st->service_name="ipif";
845 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"userv-netlink",loc);
846 BUF_ALLOC(st->buff,"netlink:userv_apply");
847
848 st->rxfd=-1; st->txfd=-1;
849 add_hook(PHASE_DROPPRIV,userv_phase_hook,st);
850
851 return new_closure(&st->nl.cl);
2fe58dfd
SE
852}
853
4efd681a 854/* Connection to the kernel through the universal TUN/TAP driver */
2fe58dfd 855
4efd681a
SE
856struct tun {
857 struct netlink nl;
858 int fd;
859 string_t device_path;
860 string_t interface_name;
861 string_t ifconfig_path;
862 string_t route_path;
baa06aeb
SE
863 bool_t tun_old;
864 bool_t search_for_if; /* Applies to tun-old only */
4efd681a
SE
865 struct buffer_if *buff; /* We receive packets into here
866 and send them to the netlink code. */
867 netlink_deliver_fn *netlink_to_tunnel;
868};
2fe58dfd 869
4efd681a
SE
870static int tun_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
871 int *timeout_io, const struct timeval *tv_now,
872 uint64_t *now)
873{
874 struct tun *st=sst;
875 *nfds_io=1;
876 fds[0].fd=st->fd;
877 fds[0].events=POLLIN|POLLERR|POLLHUP;
878 return 0;
879}
2fe58dfd 880
4efd681a
SE
881static void tun_afterpoll(void *sst, struct pollfd *fds, int nfds,
882 const struct timeval *tv_now, uint64_t *now)
883{
884 struct tun *st=sst;
885 int l;
886
887 if (fds[0].revents&POLLERR) {
888 printf("tun_afterpoll: hup!\n");
2fe58dfd 889 }
4efd681a
SE
890 if (fds[0].revents&POLLIN) {
891 BUF_ALLOC(st->buff,"tun_afterpoll");
892 buffer_init(st->buff,st->nl.max_start_pad);
893 l=read(st->fd,st->buff->start,st->buff->len-st->nl.max_start_pad);
894 if (l<0) {
895 fatal_perror("tun_afterpoll: read()");
896 }
897 if (l==0) {
898 fatal("tun_afterpoll: read()=0; device gone away?\n");
899 }
900 if (l>0) {
901 st->buff->size=l;
902 st->netlink_to_tunnel(&st->nl,NULL,st->buff);
903 BUF_ASSERT_FREE(st->buff);
904 }
2fe58dfd 905 }
4efd681a 906}
2fe58dfd 907
4efd681a
SE
908static void tun_deliver_to_kernel(void *sst, void *cid,
909 struct buffer_if *buf)
910{
911 struct tun *st=sst;
2fe58dfd 912
4efd681a 913 BUF_ASSERT_USED(buf);
2fe58dfd 914
4efd681a
SE
915 /* No error checking, because we'd just throw the packet away anyway */
916 write(st->fd,buf->start,buf->size);
917 BUF_FREE(buf);
918}
2fe58dfd 919
4efd681a
SE
920static void tun_phase_hook(void *sst, uint32_t newphase)
921{
922 struct tun *st=sst;
923 string_t hostaddr,secnetaddr;
924 uint8_t mtu[6];
925 string_t network,mask;
926 struct netlink_client *c;
927 int i;
928
baa06aeb
SE
929 if (st->tun_old) {
930 if (st->search_for_if) {
931 string_t dname;
932 int i;
933
934 /* ASSERT st->interface_name */
935 dname=safe_malloc(strlen(st->device_path)+4,"tun_old_apply");
936 st->interface_name=safe_malloc(8,"tun_phase_hook");
937
938 for (i=0; i<255; i++) {
939 sprintf(dname,"%s%d",st->device_path,i);
940 if ((st->fd=open(dname,O_RDWR))>0) {
941 sprintf(st->interface_name,"tun%d",i);
942 Message(M_INFO,"%s: allocated network interface %s "
943 "through %s\n",st->nl.name,st->interface_name,
944 dname);
945 break;
946 }
947 }
948 if (st->fd==-1) {
949 fatal("%s: unable to open any TUN device (%s...)\n",
950 st->nl.name,st->device_path);
951 }
952 } else {
953 st->fd=open(st->device_path,O_RDWR);
954 if (st->fd==-1) {
955 fatal_perror("%s: unable to open TUN device file %s",
956 st->nl.name,st->device_path);
957 }
958 }
959 } else {
960#ifdef HAVE_LINUX_IF_H
961 struct ifreq ifr;
962
963 /* New TUN interface: open the device, then do ioctl TUNSETIFF
964 to set or find out the network interface name. */
965 st->fd=open(st->device_path,O_RDWR);
966 if (st->fd==-1) {
967 fatal_perror("%s: can't open device file %s",st->nl.name,
968 st->device_path);
969 }
970 memset(&ifr,0,sizeof(ifr));
971 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; /* Just send/receive IP packets,
972 no extra headers */
973 if (st->interface_name)
974 strncpy(ifr.ifr_name,st->interface_name,IFNAMSIZ);
975 if (ioctl(st->fd,TUNSETIFF,&ifr)<0) {
976 fatal_perror("%s: ioctl(TUNSETIFF)",st->nl.name);
977 }
978 if (!st->interface_name) {
979 st->interface_name=safe_malloc(strlen(ifr.ifr_name)+1,"tun_apply");
980 strcpy(st->interface_name,ifr.ifr_name);
981 Message(M_INFO,"%s: allocated network interface %s\n",st->nl.name,
982 st->interface_name);
983 }
984#else
985 fatal("netlink.c:tun_phase_hook:!tun_old unexpected\n");
986#endif /* HAVE_LINUX_IF_H */
987 }
4efd681a
SE
988 /* All the networks we'll be using have been registered. Invoke ifconfig
989 to set the TUN device's address, and route to add routes to all
990 our networks. */
991
992 hostaddr=ipaddr_to_string(st->nl.local_address);
993 secnetaddr=ipaddr_to_string(st->nl.secnet_address);
994 snprintf(mtu,6,"%d",st->nl.mtu);
995 mtu[5]=0;
996
997 sys_cmd(st->ifconfig_path,"ifconfig",st->interface_name,
998 hostaddr,"netmask","255.255.255.255","-broadcast",
999 "pointopoint",secnetaddr,"mtu",mtu,"up",(char *)0);
1000
1001 for (c=st->nl.clients; c; c=c->next) {
1002 for (i=0; i<c->networks->entries; i++) {
1003 network=ipaddr_to_string(c->networks->list[i].prefix);
1004 mask=ipaddr_to_string(c->networks->list[i].mask);
1005 sys_cmd(st->route_path,"route","add","-net",network,
1006 "netmask",mask,"gw",secnetaddr,(char *)0);
2fe58dfd
SE
1007 }
1008 }
4efd681a
SE
1009
1010 /* Register for poll() */
1011 register_for_poll(st, tun_beforepoll, tun_afterpoll, 1, st->nl.name);
1012}
1013
1014#ifdef HAVE_LINUX_IF_H
1015static list_t *tun_apply(closure_t *self, struct cloc loc, dict_t *context,
1016 list_t *args)
1017{
1018 struct tun *st;
1019 item_t *item;
1020 dict_t *dict;
4efd681a
SE
1021
1022 st=safe_malloc(sizeof(*st),"tun_apply");
1023
1024 /* First parameter must be a dict */
1025 item=list_elem(args,0);
1026 if (!item || item->type!=t_dict)
1027 cfgfatal(loc,"tun","parameter must be a dictionary\n");
1028
1029 dict=item->data.dict;
1030
1031 st->netlink_to_tunnel=
1032 netlink_init(&st->nl,st,loc,dict,
1033 "netlink-tun",tun_deliver_to_kernel);
1034
baa06aeb 1035 st->tun_old=False;
4efd681a
SE
1036 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1037 st->interface_name=dict_read_string(dict,"interface",False,
1038 "tun-netlink",loc);
59635212
SE
1039 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",
1040 False,"tun-netlink",loc);
1041 st->route_path=dict_read_string(dict,"route-path",
1042 False,"tun-netlink",loc);
4efd681a
SE
1043
1044 if (!st->device_path) st->device_path="/dev/net/tun";
1045 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1046 if (!st->route_path) st->route_path="route";
1047 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1048
baa06aeb 1049 add_hook(PHASE_GETRESOURCES,tun_phase_hook,st);
4efd681a
SE
1050
1051 return new_closure(&st->nl.cl);
2fe58dfd 1052}
4efd681a 1053#endif /* HAVE_LINUX_IF_H */
2fe58dfd 1054
4efd681a
SE
1055static list_t *tun_old_apply(closure_t *self, struct cloc loc, dict_t *context,
1056 list_t *args)
2fe58dfd 1057{
4efd681a 1058 struct tun *st;
2fe58dfd
SE
1059 item_t *item;
1060 dict_t *dict;
1061
4efd681a
SE
1062 st=safe_malloc(sizeof(*st),"tun_old_apply");
1063
1064 Message(M_WARNING,"the tun-old code has never been tested. Please report "
1065 "success or failure to steve@greenend.org.uk\n");
2fe58dfd
SE
1066
1067 /* First parameter must be a dict */
1068 item=list_elem(args,0);
1069 if (!item || item->type!=t_dict)
4efd681a 1070 cfgfatal(loc,"tun","parameter must be a dictionary\n");
2fe58dfd
SE
1071
1072 dict=item->data.dict;
4efd681a
SE
1073
1074 st->netlink_to_tunnel=
1075 netlink_init(&st->nl,st,loc,dict,
1076 "netlink-tun",tun_deliver_to_kernel);
1077
baa06aeb 1078 st->tun_old=True;
4efd681a
SE
1079 st->device_path=dict_read_string(dict,"device",False,"tun-netlink",loc);
1080 st->interface_name=dict_read_string(dict,"interface",False,
1081 "tun-netlink",loc);
baa06aeb
SE
1082 st->search_for_if=dict_read_bool(dict,"interface-search",False,
1083 "tun-netlink",loc,st->device_path==NULL);
59635212
SE
1084 st->ifconfig_path=dict_read_string(dict,"ifconfig-path",False,
1085 "tun-netlink",loc);
1086 st->route_path=dict_read_string(dict,"route-path",False,"tun-netlink",loc);
4efd681a
SE
1087
1088 if (!st->device_path) st->device_path="/dev/tun";
1089 if (!st->ifconfig_path) st->ifconfig_path="ifconfig";
1090 if (!st->route_path) st->route_path="route";
1091 st->buff=find_cl_if(dict,"buffer",CL_BUFFER,True,"tun-netlink",loc);
1092
1093 /* Old TUN interface: the network interface name depends on which
1094 /dev/tunX file we open. If 'interface-search' is set to true, treat
1095 'device' as the prefix and try numbers from 0--255. If it's set
1096 to false, treat 'device' as the whole name, and require than an
1097 appropriate interface name be specified. */
baa06aeb
SE
1098 if (st->search_for_if && st->interface_name) {
1099 cfgfatal(loc,"tun-old","you may not specify an interface name "
1100 "in interface-search mode\n");
2fe58dfd 1101 }
baa06aeb
SE
1102 if (!st->search_for_if && !st->interface_name) {
1103 cfgfatal(loc,"tun-old","you must specify an interface name "
1104 "when you explicitly specify a TUN device file\n");
1105 }
1106
2fe58dfd 1107
baa06aeb 1108 add_hook(PHASE_GETRESOURCES,tun_phase_hook,st);
2fe58dfd 1109
4efd681a 1110 return new_closure(&st->nl.cl);
2fe58dfd
SE
1111}
1112
4efd681a
SE
1113/* No connection to the kernel at all... */
1114
2fe58dfd 1115struct null {
4efd681a 1116 struct netlink nl;
2fe58dfd
SE
1117};
1118
2fe58dfd
SE
1119static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
1120{
1121 return;
1122}
1123
1124static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1125 list_t *args)
1126{
1127 struct null *st;
4efd681a
SE
1128 item_t *item;
1129 dict_t *dict;
2fe58dfd 1130
4efd681a 1131 st=safe_malloc(sizeof(*st),"null_apply");
2fe58dfd 1132
4efd681a
SE
1133 item=list_elem(args,0);
1134 if (!item || item->type!=t_dict)
1135 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1136
1137 dict=item->data.dict;
1138
1139 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_deliver);
1140
1141 return new_closure(&st->nl.cl);
2fe58dfd
SE
1142}
1143
1144init_module netlink_module;
1145void netlink_module(dict_t *dict)
1146{
1147 add_closure(dict,"userv-ipif",userv_apply);
4efd681a
SE
1148#ifdef HAVE_LINUX_IF_H
1149 add_closure(dict,"tun",tun_apply);
1150#endif
1151 add_closure(dict,"tun-old",tun_old_apply);
1152 add_closure(dict,"null-netlink",null_apply);
2fe58dfd 1153#if 0
4efd681a 1154 /* TODO */
2fe58dfd
SE
1155 add_closure(dict,"pty-slip",ptyslip_apply);
1156 add_closure(dict,"slipd",slipd_apply);
1157#endif /* 0 */
2fe58dfd 1158}