Commit | Line | Data |
---|---|---|
2fe58dfd SE |
1 | /* User-kernel network link */ |
2 | ||
c215a4bc IJ |
3 | /* |
4 | * This file is part of secnet. | |
5 | * See README for full list of copyright holders. | |
6 | * | |
7 | * secnet is free software; you can redistribute it and/or modify it | |
8 | * under the terms of the GNU General Public License as published by | |
9c6a8729 | 9 | * the Free Software Foundation; either version 3 of the License, or |
c215a4bc IJ |
10 | * (at your option) any later version. |
11 | * | |
12 | * secnet is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * version 3 along with secnet; if not, see | |
19 | * https://www.gnu.org/licenses/gpl.html. | |
20 | */ | |
21 | ||
ff05a229 | 22 | /* See RFCs 791, 792, 1123 and 1812 */ |
2fe58dfd | 23 | |
ff05a229 SE |
24 | /* The netlink device is actually a router. Tunnels are unnumbered |
25 | point-to-point lines (RFC1812 section 2.2.7); the router has a | |
26 | single address (the 'router-id'). */ | |
27 | ||
28 | /* This is where we currently have the anti-spoofing paranoia - before | |
29 | sending a packet to the kernel we check that the tunnel it came | |
30 | over could reasonably have produced it. */ | |
31 | ||
32 | ||
33 | /* Points to note from RFC1812 (which may require changes in this | |
34 | file): | |
35 | ||
36 | 3.3.4 Maximum Transmission Unit - MTU | |
37 | ||
38 | The MTU of each logical interface MUST be configurable within the | |
39 | range of legal MTUs for the interface. | |
40 | ||
41 | Many Link Layer protocols define a maximum frame size that may be | |
42 | sent. In such cases, a router MUST NOT allow an MTU to be set which | |
43 | would allow sending of frames larger than those allowed by the Link | |
44 | Layer protocol. However, a router SHOULD be willing to receive a | |
45 | packet as large as the maximum frame size even if that is larger than | |
46 | the MTU. | |
47 | ||
48 | 4.2.1 A router SHOULD count datagrams discarded. | |
49 | ||
50 | 4.2.2.1 Source route options - we probably should implement processing | |
51 | of source routes, even though mostly the security policy will prevent | |
52 | their use. | |
53 | ||
54 | 5.3.13.4 Source Route Options | |
55 | ||
56 | A router MUST implement support for source route options in forwarded | |
57 | packets. A router MAY implement a configuration option that, when | |
58 | enabled, causes all source-routed packets to be discarded. However, | |
59 | such an option MUST NOT be enabled by default. | |
60 | ||
61 | 5.3.13.5 Record Route Option | |
62 | ||
63 | Routers MUST support the Record Route option in forwarded packets. | |
64 | ||
65 | A router MAY provide a configuration option that, if enabled, will | |
66 | cause the router to ignore (i.e., pass through unchanged) Record | |
67 | Route options in forwarded packets. If provided, such an option MUST | |
68 | default to enabling the record-route. This option should not affect | |
69 | the processing of Record Route options in datagrams received by the | |
70 | router itself (in particular, Record Route options in ICMP echo | |
71 | requests will still be processed according to Section [4.3.3.6]). | |
72 | ||
73 | 5.3.13.6 Timestamp Option | |
74 | ||
75 | Routers MUST support the timestamp option in forwarded packets. A | |
76 | timestamp value MUST follow the rules given [INTRO:2]. | |
77 | ||
78 | If the flags field = 3 (timestamp and prespecified address), the | |
79 | router MUST add its timestamp if the next prespecified address | |
80 | matches any of the router's IP addresses. It is not necessary that | |
81 | the prespecified address be either the address of the interface on | |
82 | which the packet arrived or the address of the interface over which | |
83 | it will be sent. | |
84 | ||
85 | ||
86 | 4.2.2.7 Fragmentation: RFC 791 Section 3.2 | |
87 | ||
88 | Fragmentation, as described in [INTERNET:1], MUST be supported by a | |
89 | router. | |
90 | ||
91 | 4.2.2.8 Reassembly: RFC 791 Section 3.2 | |
92 | ||
93 | As specified in the corresponding section of [INTRO:2], a router MUST | |
94 | support reassembly of datagrams that it delivers to itself. | |
95 | ||
96 | 4.2.2.9 Time to Live: RFC 791 Section 3.2 | |
97 | ||
98 | Note in particular that a router MUST NOT check the TTL of a packet | |
99 | except when forwarding it. | |
100 | ||
101 | A router MUST NOT discard a datagram just because it was received | |
102 | with TTL equal to zero or one; if it is to the router and otherwise | |
103 | valid, the router MUST attempt to receive it. | |
104 | ||
105 | On messages the router originates, the IP layer MUST provide a means | |
106 | for the transport layer to set the TTL field of every datagram that | |
107 | is sent. When a fixed TTL value is used, it MUST be configurable. | |
108 | ||
109 | ||
110 | 8.1 The Simple Network Management Protocol - SNMP | |
111 | 8.1.1 SNMP Protocol Elements | |
112 | ||
113 | Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate | |
114 | using UDP/IP as its transport and network protocols. | |
115 | ||
116 | ||
117 | */ | |
2fe58dfd | 118 | |
3b83c932 | 119 | #include <string.h> |
59230b9b IJ |
120 | #include <assert.h> |
121 | #include <limits.h> | |
8689b3a9 | 122 | #include "secnet.h" |
2fe58dfd | 123 | #include "util.h" |
7138d0c5 | 124 | #include "ipaddr.h" |
9d3a4132 | 125 | #include "netlink.h" |
042a8da9 | 126 | #include "process.h" |
2fe58dfd | 127 | |
a0b107b8 IJ |
128 | #ifdef NETLINK_DEBUG |
129 | #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__) | |
130 | #else /* !NETLINK_DEBUG */ | |
131 | #define MDEBUG(...) ((void)0) | |
132 | #endif /* !NETLINK_DEBUG */ | |
133 | ||
ff05a229 SE |
134 | #define ICMP_TYPE_ECHO_REPLY 0 |
135 | ||
136 | #define ICMP_TYPE_UNREACHABLE 3 | |
137 | #define ICMP_CODE_NET_UNREACHABLE 0 | |
138 | #define ICMP_CODE_PROTOCOL_UNREACHABLE 2 | |
139 | #define ICMP_CODE_FRAGMENTATION_REQUIRED 4 | |
140 | #define ICMP_CODE_NET_PROHIBITED 13 | |
141 | ||
142 | #define ICMP_TYPE_ECHO_REQUEST 8 | |
143 | ||
144 | #define ICMP_TYPE_TIME_EXCEEDED 11 | |
145 | #define ICMP_CODE_TTL_EXCEEDED 0 | |
146 | ||
4efd681a | 147 | /* Generic IP checksum routine */ |
211cd627 | 148 | static inline uint16_t ip_csum(const uint8_t *iph,int32_t count) |
2fe58dfd | 149 | { |
4efd681a SE |
150 | register uint32_t sum=0; |
151 | ||
152 | while (count>1) { | |
153 | sum+=ntohs(*(uint16_t *)iph); | |
154 | iph+=2; | |
155 | count-=2; | |
156 | } | |
157 | if(count>0) | |
158 | sum+=*(uint8_t *)iph; | |
159 | while (sum>>16) | |
160 | sum=(sum&0xffff)+(sum>>16); | |
161 | return htons(~sum); | |
2fe58dfd SE |
162 | } |
163 | ||
4efd681a SE |
164 | #ifdef i386 |
165 | /* | |
166 | * This is a version of ip_compute_csum() optimized for IP headers, | |
167 | * which always checksum on 4 octet boundaries. | |
168 | * | |
169 | * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by | |
170 | * Arnt Gulbrandsen. | |
171 | */ | |
211cd627 | 172 | static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) { |
4efd681a SE |
173 | uint32_t sum; |
174 | ||
20d324b6 SE |
175 | __asm__ __volatile__( |
176 | "movl (%1), %0 ;\n" | |
177 | "subl $4, %2 ;\n" | |
178 | "jbe 2f ;\n" | |
179 | "addl 4(%1), %0 ;\n" | |
180 | "adcl 8(%1), %0 ;\n" | |
181 | "adcl 12(%1), %0 ;\n" | |
182 | "1: adcl 16(%1), %0 ;\n" | |
183 | "lea 4(%1), %1 ;\n" | |
184 | "decl %2 ;\n" | |
185 | "jne 1b ;\n" | |
186 | "adcl $0, %0 ;\n" | |
187 | "movl %0, %2 ;\n" | |
188 | "shrl $16, %0 ;\n" | |
189 | "addw %w2, %w0 ;\n" | |
190 | "adcl $0, %0 ;\n" | |
191 | "notl %0 ;\n" | |
192 | "2: ;\n" | |
4efd681a SE |
193 | /* Since the input registers which are loaded with iph and ipl |
194 | are modified, we must also specify them as outputs, or gcc | |
195 | will assume they contain their original values. */ | |
196 | : "=r" (sum), "=r" (iph), "=r" (ihl) | |
20d324b6 SE |
197 | : "1" (iph), "2" (ihl) |
198 | : "memory"); | |
4efd681a SE |
199 | return sum; |
200 | } | |
201 | #else | |
fe21ce55 | 202 | static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) |
2fe58dfd | 203 | { |
1caa23ff | 204 | assert(ihl < INT_MAX/4); |
4efd681a SE |
205 | return ip_csum(iph,ihl*4); |
206 | } | |
207 | #endif | |
208 | ||
209 | struct iphdr { | |
210 | #if defined (WORDS_BIGENDIAN) | |
211 | uint8_t version:4, | |
212 | ihl:4; | |
213 | #else | |
214 | uint8_t ihl:4, | |
215 | version:4; | |
216 | #endif | |
217 | uint8_t tos; | |
218 | uint16_t tot_len; | |
219 | uint16_t id; | |
a6768d7c | 220 | uint16_t frag; |
eff13010 IJ |
221 | #define IPHDR_FRAG_OFF ((uint16_t)0x1fff) |
222 | #define IPHDR_FRAG_MORE ((uint16_t)0x2000) | |
223 | #define IPHDR_FRAG_DONT ((uint16_t)0x4000) | |
224 | /* reserved 0x8000 */ | |
4efd681a SE |
225 | uint8_t ttl; |
226 | uint8_t protocol; | |
227 | uint16_t check; | |
228 | uint32_t saddr; | |
229 | uint32_t daddr; | |
230 | /* The options start here. */ | |
231 | }; | |
232 | ||
233 | struct icmphdr { | |
234 | struct iphdr iph; | |
235 | uint8_t type; | |
236 | uint8_t code; | |
237 | uint16_t check; | |
cfd79482 | 238 | union icmpinfofield { |
4efd681a SE |
239 | uint32_t unused; |
240 | struct { | |
241 | uint8_t pointer; | |
242 | uint8_t unused1; | |
243 | uint16_t unused2; | |
244 | } pprob; | |
245 | uint32_t gwaddr; | |
246 | struct { | |
247 | uint16_t id; | |
248 | uint16_t seq; | |
249 | } echo; | |
f3d69e41 IJ |
250 | struct { |
251 | uint16_t unused; | |
252 | uint16_t mtu; | |
253 | } fragneeded; | |
4efd681a SE |
254 | } d; |
255 | }; | |
cfd79482 IJ |
256 | |
257 | static const union icmpinfofield icmp_noinfo; | |
4efd681a | 258 | |
826b47e9 IJ |
259 | static void netlink_client_deliver(struct netlink *st, |
260 | struct netlink_client *client, | |
261 | uint32_t source, uint32_t dest, | |
262 | struct buffer_if *buf); | |
263 | static void netlink_host_deliver(struct netlink *st, | |
264 | struct netlink_client *sender, | |
265 | uint32_t source, uint32_t dest, | |
266 | struct buffer_if *buf); | |
267 | ||
dbe11c20 IJ |
268 | static const char *sender_name(struct netlink_client *sender /* or NULL */) |
269 | { | |
270 | return sender?sender->name:"(local)"; | |
271 | } | |
272 | ||
70dc107b SE |
273 | static void netlink_packet_deliver(struct netlink *st, |
274 | struct netlink_client *client, | |
275 | struct buffer_if *buf); | |
4efd681a | 276 | |
ff05a229 SE |
277 | /* XXX RFC1812 4.3.2.5: |
278 | All other ICMP error messages (Destination Unreachable, | |
279 | Redirect, Time Exceeded, and Parameter Problem) SHOULD have their | |
280 | precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK | |
281 | CONTROL). The IP Precedence value for these error messages MAY be | |
282 | settable. | |
283 | */ | |
4efd681a | 284 | static struct icmphdr *netlink_icmp_tmpl(struct netlink *st, |
826b47e9 IJ |
285 | uint32_t source, uint32_t dest, |
286 | uint16_t len) | |
4efd681a SE |
287 | { |
288 | struct icmphdr *h; | |
289 | ||
290 | BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl"); | |
3abd18e8 | 291 | buffer_init(&st->icmp,calculate_max_start_pad()); |
4efd681a SE |
292 | h=buf_append(&st->icmp,sizeof(*h)); |
293 | ||
294 | h->iph.version=4; | |
295 | h->iph.ihl=5; | |
296 | h->iph.tos=0; | |
297 | h->iph.tot_len=htons(len+(h->iph.ihl*4)+8); | |
298 | h->iph.id=0; | |
a6768d7c | 299 | h->iph.frag=0; |
ff05a229 | 300 | h->iph.ttl=255; /* XXX should be configurable */ |
4efd681a | 301 | h->iph.protocol=1; |
826b47e9 | 302 | h->iph.saddr=htonl(source); |
4efd681a SE |
303 | h->iph.daddr=htonl(dest); |
304 | h->iph.check=0; | |
305 | h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl); | |
306 | h->check=0; | |
307 | h->d.unused=0; | |
308 | ||
309 | return h; | |
310 | } | |
311 | ||
312 | /* Fill in the ICMP checksum field correctly */ | |
313 | static void netlink_icmp_csum(struct icmphdr *h) | |
314 | { | |
1caa23ff | 315 | int32_t len; |
4efd681a SE |
316 | |
317 | len=ntohs(h->iph.tot_len)-(4*h->iph.ihl); | |
318 | h->check=0; | |
319 | h->check=ip_csum(&h->type,len); | |
320 | } | |
321 | ||
322 | /* RFC1122: | |
323 | * An ICMP error message MUST NOT be sent as the result of | |
324 | * receiving: | |
325 | * | |
326 | * * an ICMP error message, or | |
327 | * | |
328 | * * a datagram destined to an IP broadcast or IP multicast | |
329 | * address, or | |
330 | * | |
331 | * * a datagram sent as a link-layer broadcast, or | |
332 | * | |
333 | * * a non-initial fragment, or | |
334 | * | |
335 | * * a datagram whose source address does not define a single | |
336 | * host -- e.g., a zero address, a loopback address, a | |
337 | * broadcast address, a multicast address, or a Class E | |
338 | * address. | |
339 | */ | |
340 | static bool_t netlink_icmp_may_reply(struct buffer_if *buf) | |
341 | { | |
342 | struct iphdr *iph; | |
8dea8d37 | 343 | struct icmphdr *icmph; |
4efd681a SE |
344 | uint32_t source; |
345 | ||
975820aa | 346 | if (buf->size < (int)sizeof(struct icmphdr)) return False; |
4efd681a | 347 | iph=(struct iphdr *)buf->start; |
8dea8d37 SE |
348 | icmph=(struct icmphdr *)buf->start; |
349 | if (iph->protocol==1) { | |
350 | switch(icmph->type) { | |
686b7f1d IJ |
351 | /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types |
352 | * as retrieved Thu, 20 Mar 2014 00:16:44 +0000. | |
353 | * Deprecated, reserved, unassigned and experimental | |
354 | * options are treated as not safe to reply to. | |
355 | */ | |
356 | case 0: /* Echo Reply */ | |
357 | case 8: /* Echo */ | |
358 | case 13: /* Timestamp */ | |
359 | case 14: /* Timestamp Reply */ | |
360 | return True; | |
361 | default: | |
8dea8d37 SE |
362 | return False; |
363 | } | |
364 | } | |
4efd681a | 365 | /* How do we spot broadcast destination addresses? */ |
a6768d7c | 366 | if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False; |
4efd681a SE |
367 | source=ntohl(iph->saddr); |
368 | if (source==0) return False; | |
369 | if ((source&0xff000000)==0x7f000000) return False; | |
370 | /* How do we spot broadcast source addresses? */ | |
371 | if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */ | |
372 | if ((source&0xf0000000)==0xf0000000) return False; /* Class E */ | |
373 | return True; | |
374 | } | |
375 | ||
376 | /* How much of the original IP packet do we include in its ICMP | |
377 | response? The header plus up to 64 bits. */ | |
ff05a229 SE |
378 | |
379 | /* XXX TODO RFC1812: | |
380 | 4.3.2.3 Original Message Header | |
381 | ||
382 | Historically, every ICMP error message has included the Internet | |
383 | header and at least the first 8 data bytes of the datagram that | |
384 | triggered the error. This is no longer adequate, due to the use of | |
385 | IP-in-IP tunneling and other technologies. Therefore, the ICMP | |
386 | datagram SHOULD contain as much of the original datagram as possible | |
387 | without the length of the ICMP datagram exceeding 576 bytes. The | |
388 | returned IP header (and user data) MUST be identical to that which | |
389 | was received, except that the router is not required to undo any | |
390 | modifications to the IP header that are normally performed in | |
391 | forwarding that were performed before the error was detected (e.g., | |
392 | decrementing the TTL, or updating options). Note that the | |
393 | requirements of Section [4.3.3.5] supersede this requirement in some | |
394 | cases (i.e., for a Parameter Problem message, if the problem is in a | |
395 | modified field, the router must undo the modification). See Section | |
396 | [4.3.3.5]). | |
397 | */ | |
4efd681a SE |
398 | static uint16_t netlink_icmp_reply_len(struct buffer_if *buf) |
399 | { | |
975820aa | 400 | if (buf->size < (int)sizeof(struct iphdr)) return 0; |
4efd681a SE |
401 | struct iphdr *iph=(struct iphdr *)buf->start; |
402 | uint16_t hlen,plen; | |
403 | ||
404 | hlen=iph->ihl*4; | |
405 | /* We include the first 8 bytes of the packet data, provided they exist */ | |
406 | hlen+=8; | |
407 | plen=ntohs(iph->tot_len); | |
4ea3eeb8 | 408 | return MIN(hlen,plen); |
4efd681a SE |
409 | } |
410 | ||
70dc107b SE |
411 | /* client indicates where the packet we're constructing a response to |
412 | comes from. NULL indicates the host. */ | |
ab62c3ed IJ |
413 | static void netlink_icmp_simple(struct netlink *st, |
414 | struct netlink_client *origsender, | |
415 | struct buffer_if *buf, | |
cfd79482 IJ |
416 | uint8_t type, uint8_t code, |
417 | union icmpinfofield info) | |
4efd681a | 418 | { |
4efd681a SE |
419 | struct icmphdr *h; |
420 | uint16_t len; | |
421 | ||
422 | if (netlink_icmp_may_reply(buf)) { | |
975820aa | 423 | struct iphdr *iph=(struct iphdr *)buf->start; |
826b47e9 IJ |
424 | |
425 | uint32_t icmpdest = ntohl(iph->saddr); | |
426 | uint32_t icmpsource; | |
427 | const char *icmpsourcedebugprefix; | |
428 | if (!st->ptp) { | |
429 | icmpsource=st->secnet_address; | |
430 | icmpsourcedebugprefix=""; | |
431 | } else if (origsender) { | |
432 | /* was from peer, send reply as if from host */ | |
433 | icmpsource=st->local_address; | |
434 | icmpsourcedebugprefix="L!"; | |
435 | } else { | |
436 | /* was from host, send reply as if from peer */ | |
437 | icmpsource=st->secnet_address; /* actually, peer address */ | |
438 | icmpsourcedebugprefix="P!"; | |
439 | } | |
440 | MDEBUG("%s: generating ICMP re %s[%s]->[%s]:" | |
441 | " from %s%s type=%u code=%u\n", | |
442 | st->name, sender_name(origsender), | |
443 | ipaddr_to_string(ntohl(iph->saddr)), | |
444 | ipaddr_to_string(ntohl(iph->daddr)), | |
445 | icmpsourcedebugprefix, | |
446 | ipaddr_to_string(icmpsource), | |
447 | type, code); | |
448 | ||
4efd681a | 449 | len=netlink_icmp_reply_len(buf); |
826b47e9 | 450 | h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len); |
cfd79482 | 451 | h->type=type; h->code=code; h->d=info; |
4f28e77e | 452 | BUF_ADD_BYTES(append,&st->icmp,buf->start,len); |
4efd681a | 453 | netlink_icmp_csum(h); |
826b47e9 IJ |
454 | |
455 | if (!st->ptp) { | |
456 | netlink_packet_deliver(st,NULL,&st->icmp); | |
457 | } else if (origsender) { | |
458 | netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp); | |
459 | } else { | |
460 | netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp); | |
461 | } | |
4efd681a SE |
462 | BUF_ASSERT_FREE(&st->icmp); |
463 | } | |
464 | } | |
465 | ||
466 | /* | |
467 | * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the | |
468 | * checksum. | |
ff05a229 | 469 | * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums. |
4efd681a SE |
470 | * |
471 | * Is the datagram acceptable? | |
472 | * | |
473 | * 1. Length at least the size of an ip header | |
474 | * 2. Version of 4 | |
475 | * 3. Checksums correctly. | |
476 | * 4. Doesn't have a bogus length | |
477 | */ | |
d714da29 IJ |
478 | static bool_t netlink_check(struct netlink *st, struct buffer_if *buf, |
479 | char *errmsgbuf, int errmsgbuflen) | |
4efd681a | 480 | { |
d714da29 IJ |
481 | #define BAD(...) do{ \ |
482 | snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \ | |
483 | return False; \ | |
484 | }while(0) | |
485 | ||
975820aa | 486 | if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size); |
4efd681a | 487 | struct iphdr *iph=(struct iphdr *)buf->start; |
1caa23ff | 488 | int32_t len; |
4efd681a | 489 | |
d714da29 | 490 | if (iph->version != 4) BAD("version %u",iph->version); |
59a5b098 | 491 | if (iph->ihl < 5) BAD("ihl %u",iph->ihl); |
d714da29 IJ |
492 | if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl); |
493 | if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum"); | |
4efd681a SE |
494 | len=ntohs(iph->tot_len); |
495 | /* There should be no padding */ | |
d714da29 IJ |
496 | if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len); |
497 | if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl); | |
4efd681a SE |
498 | /* XXX check that there's no source route specified */ |
499 | return True; | |
d714da29 IJ |
500 | |
501 | #undef BAD | |
4efd681a SE |
502 | } |
503 | ||
f3d69e41 IJ |
504 | static const char *fragment_filter_header(uint8_t *base, long *hlp) |
505 | { | |
506 | const int fixedhl = sizeof(struct iphdr); | |
507 | long hl = *hlp; | |
508 | const uint8_t *ipend = base + hl; | |
509 | uint8_t *op = base + fixedhl; | |
510 | const uint8_t *ip = op; | |
511 | ||
512 | while (ip < ipend) { | |
513 | uint8_t opt = ip[0]; | |
514 | int remain = ipend - ip; | |
515 | if (opt == 0x00) /* End of Options List */ break; | |
516 | if (opt == 0x01) /* No Operation */ continue; | |
517 | if (remain < 2) return "IPv4 options truncated at length"; | |
518 | int optlen = ip[1]; | |
519 | if (remain < optlen) return "IPv4 options truncated in option"; | |
520 | if (opt & 0x80) /* copy */ { | |
521 | memmove(op, ip, optlen); | |
522 | op += optlen; | |
523 | } | |
524 | ip += optlen; | |
525 | } | |
526 | while ((hl = (op - base)) & 0x3) | |
527 | *op++ = 0x00 /* End of Option List */; | |
528 | ((struct iphdr*)base)->ihl = hl >> 2; | |
529 | *hlp = hl; | |
530 | ||
531 | return 0; | |
532 | } | |
533 | ||
534 | /* Fragment or send ICMP Fragmentation Needed */ | |
535 | static void netlink_maybe_fragment(struct netlink *st, | |
ab62c3ed | 536 | struct netlink_client *sender, |
f3d69e41 IJ |
537 | netlink_deliver_fn *deliver, |
538 | void *deliver_dst, | |
539 | const char *delivery_name, | |
540 | int32_t mtu, | |
541 | uint32_t source, uint32_t dest, | |
542 | struct buffer_if *buf) | |
543 | { | |
544 | struct iphdr *iph=(struct iphdr*)buf->start; | |
545 | long hl = iph->ihl*4; | |
546 | const char *ssource = ipaddr_to_string(source); | |
547 | ||
548 | if (buf->size <= mtu) { | |
549 | deliver(deliver_dst, buf); | |
550 | return; | |
551 | } | |
552 | ||
553 | MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n", | |
554 | st->name, ssource, delivery_name, buf->size); | |
555 | ||
556 | #define BADFRAG(m, ...) \ | |
557 | Message(M_WARNING, \ | |
558 | "%s: fragmenting packet from source %s" \ | |
559 | " for transmission via %s: " m "\n", \ | |
560 | st->name, ssource, delivery_name, \ | |
561 | ## __VA_ARGS__); | |
562 | ||
563 | unsigned orig_frag = ntohs(iph->frag); | |
564 | ||
565 | if (orig_frag&IPHDR_FRAG_DONT) { | |
566 | union icmpinfofield info = | |
567 | { .fragneeded = { .unused = 0, .mtu = htons(mtu) } }; | |
ab62c3ed | 568 | netlink_icmp_simple(st,sender,buf, |
f3d69e41 IJ |
569 | ICMP_TYPE_UNREACHABLE, |
570 | ICMP_CODE_FRAGMENTATION_REQUIRED, | |
571 | info); | |
572 | BUF_FREE(buf); | |
573 | return; | |
574 | } | |
575 | if (mtu < hl + 8) { | |
576 | BADFRAG("mtu %"PRId32" too small", mtu); | |
577 | BUF_FREE(buf); | |
578 | return; | |
579 | } | |
580 | ||
581 | /* we (ab)use the icmp buffer to stash the original packet */ | |
582 | struct buffer_if *orig = &st->icmp; | |
583 | BUF_ALLOC(orig,"netlink_client_deliver fragment orig"); | |
584 | buffer_copy(orig,buf); | |
585 | BUF_FREE(buf); | |
586 | ||
587 | const uint8_t *startindata = orig->start + hl; | |
588 | const uint8_t *indata = startindata; | |
589 | const uint8_t *endindata = orig->start + orig->size; | |
590 | _Bool filtered = 0; | |
591 | ||
592 | for (;;) { | |
593 | /* compute our fragment offset */ | |
594 | long dataoffset = indata - startindata | |
595 | + (orig_frag & IPHDR_FRAG_OFF)*8; | |
596 | assert(!(dataoffset & 7)); | |
597 | if (dataoffset > IPHDR_FRAG_OFF*8) { | |
598 | BADFRAG("ultimate fragment offset out of range"); | |
599 | break; | |
600 | } | |
601 | ||
602 | BUF_ALLOC(buf,"netlink_client_deliver fragment frag"); | |
603 | buffer_init(buf,calculate_max_start_pad()); | |
604 | ||
605 | /* copy header (possibly filtered); will adjust in a bit */ | |
606 | struct iphdr *fragh = buf_append(buf, hl); | |
607 | memcpy(fragh, orig->start, hl); | |
608 | ||
609 | /* decide how much payload to copy and copy it */ | |
610 | long avail = mtu - hl; | |
611 | long remain = endindata - indata; | |
612 | long use = avail < remain ? (avail & ~(long)7) : remain; | |
4f28e77e | 613 | BUF_ADD_BYTES(append, buf, indata, use); |
f3d69e41 IJ |
614 | indata += use; |
615 | ||
616 | _Bool last_frag = indata >= endindata; | |
617 | ||
618 | /* adjust the header */ | |
619 | fragh->tot_len = htons(buf->size); | |
620 | fragh->frag = | |
621 | htons((orig_frag & ~IPHDR_FRAG_OFF) | | |
622 | (last_frag ? 0 : IPHDR_FRAG_MORE) | | |
623 | (dataoffset >> 3)); | |
624 | fragh->check = 0; | |
625 | fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl); | |
626 | ||
627 | /* actually send it */ | |
628 | deliver(deliver_dst, buf); | |
629 | if (last_frag) | |
630 | break; | |
631 | ||
632 | /* after copying the header for the first frag, | |
633 | * we filter the header for the remaining frags */ | |
634 | if (!filtered++) { | |
635 | const char *bad = fragment_filter_header(orig->start, &hl); | |
636 | if (bad) { BADFRAG("%s", bad); break; } | |
637 | } | |
638 | } | |
639 | ||
640 | BUF_FREE(orig); | |
641 | ||
642 | #undef BADFRAG | |
643 | } | |
644 | ||
7b6abafa | 645 | /* Deliver a packet _to_ client; used after we have decided |
55bc97e6 IJ |
646 | * what to do with it (and just to check that the client has |
647 | * actually registered a delivery function with us). */ | |
7b6abafa IJ |
648 | static void netlink_client_deliver(struct netlink *st, |
649 | struct netlink_client *client, | |
650 | uint32_t source, uint32_t dest, | |
651 | struct buffer_if *buf) | |
652 | { | |
55bc97e6 IJ |
653 | if (!client->deliver) { |
654 | string_t s,d; | |
655 | s=ipaddr_to_string(source); | |
656 | d=ipaddr_to_string(dest); | |
657 | Message(M_ERR,"%s: dropping %s->%s, client not registered\n", | |
658 | st->name,s,d); | |
55bc97e6 IJ |
659 | BUF_FREE(buf); |
660 | return; | |
661 | } | |
ab62c3ed | 662 | netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name, |
f3d69e41 | 663 | client->mtu, source,dest,buf); |
7b6abafa IJ |
664 | client->outcount++; |
665 | } | |
666 | ||
f928f069 IJ |
667 | /* Deliver a packet to the host; used after we have decided that that |
668 | * is what to do with it. */ | |
669 | static void netlink_host_deliver(struct netlink *st, | |
ab62c3ed | 670 | struct netlink_client *sender, |
f928f069 IJ |
671 | uint32_t source, uint32_t dest, |
672 | struct buffer_if *buf) | |
673 | { | |
ab62c3ed | 674 | netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)", |
f3d69e41 | 675 | st->mtu, source,dest,buf); |
f928f069 IJ |
676 | st->outcount++; |
677 | } | |
678 | ||
f2b711bd | 679 | /* Deliver a packet. "sender"==NULL for packets from the host and packets |
d3fe100d | 680 | generated internally in secnet. */ |
70dc107b | 681 | static void netlink_packet_deliver(struct netlink *st, |
f2b711bd | 682 | struct netlink_client *sender, |
70dc107b | 683 | struct buffer_if *buf) |
4efd681a | 684 | { |
975820aa IJ |
685 | if (buf->size < (int)sizeof(struct iphdr)) { |
686 | Message(M_ERR,"%s: trying to deliver a too-short packet" | |
dbe11c20 | 687 | " from %s!\n",st->name, sender_name(sender)); |
975820aa IJ |
688 | BUF_FREE(buf); |
689 | return; | |
690 | } | |
691 | ||
4efd681a SE |
692 | struct iphdr *iph=(struct iphdr *)buf->start; |
693 | uint32_t dest=ntohl(iph->daddr); | |
70dc107b SE |
694 | uint32_t source=ntohl(iph->saddr); |
695 | uint32_t best_quality; | |
469fd1d9 SE |
696 | bool_t allow_route=False; |
697 | bool_t found_allowed=False; | |
70dc107b SE |
698 | int best_match; |
699 | int i; | |
2fe58dfd | 700 | |
4efd681a | 701 | BUF_ASSERT_USED(buf); |
2fe58dfd | 702 | |
4efd681a | 703 | if (dest==st->secnet_address) { |
4f5e39ec | 704 | Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name); |
4efd681a | 705 | BUF_FREE(buf); |
2fe58dfd SE |
706 | return; |
707 | } | |
4efd681a | 708 | |
f2b711bd | 709 | /* Packets from the host (sender==NULL) may always be routed. Packets |
469fd1d9 | 710 | from clients with the allow_route option will also be routed. */ |
f2b711bd | 711 | if (!sender || (sender && (sender->options & OPT_ALLOWROUTE))) |
469fd1d9 SE |
712 | allow_route=True; |
713 | ||
714 | /* If !allow_route, we check the routing table anyway, and if | |
715 | there's a suitable route with OPT_ALLOWROUTE set we use it. If | |
716 | there's a suitable route, but none with OPT_ALLOWROUTE set then | |
717 | we generate ICMP 'communication with destination network | |
718 | administratively prohibited'. */ | |
719 | ||
720 | best_quality=0; | |
721 | best_match=-1; | |
d3fe100d SE |
722 | for (i=0; i<st->n_clients; i++) { |
723 | if (st->routes[i]->up && | |
724 | ipset_contains_addr(st->routes[i]->networks,dest)) { | |
469fd1d9 SE |
725 | /* It's an available route to the correct destination. But is |
726 | it better than the one we already have? */ | |
727 | ||
728 | /* If we have already found an allowed route then we don't | |
729 | bother looking at routes we're not allowed to use. If | |
730 | we don't yet have an allowed route we'll consider any. */ | |
731 | if (!allow_route && found_allowed) { | |
d3fe100d | 732 | if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue; |
70dc107b | 733 | } |
469fd1d9 | 734 | |
d3fe100d | 735 | if (st->routes[i]->link_quality>best_quality |
469fd1d9 | 736 | || best_quality==0) { |
d3fe100d | 737 | best_quality=st->routes[i]->link_quality; |
469fd1d9 | 738 | best_match=i; |
d3fe100d | 739 | if (st->routes[i]->options&OPT_ALLOWROUTE) |
469fd1d9 SE |
740 | found_allowed=True; |
741 | /* If quality isn't perfect we may wish to | |
742 | consider kicking the tunnel with a 0-length | |
743 | packet to prompt it to perform a key setup. | |
744 | Then it'll eventually decide it's up or | |
745 | down. */ | |
746 | /* If quality is perfect and we're allowed to use the | |
747 | route we don't need to search any more. */ | |
748 | if (best_quality>=MAXIMUM_LINK_QUALITY && | |
749 | (allow_route || found_allowed)) break; | |
4efd681a | 750 | } |
70dc107b | 751 | } |
469fd1d9 SE |
752 | } |
753 | if (best_match==-1) { | |
754 | /* The packet's not going down a tunnel. It might (ought to) | |
755 | be for the host. */ | |
794f2398 | 756 | if (ipset_contains_addr(st->networks,dest)) { |
ab62c3ed | 757 | netlink_host_deliver(st,sender,source,dest,buf); |
70dc107b SE |
758 | BUF_ASSERT_FREE(buf); |
759 | } else { | |
469fd1d9 SE |
760 | string_t s,d; |
761 | s=ipaddr_to_string(source); | |
762 | d=ipaddr_to_string(dest); | |
ff05a229 | 763 | Message(M_DEBUG,"%s: don't know where to deliver packet " |
469fd1d9 | 764 | "(s=%s, d=%s)\n", st->name, s, d); |
ab62c3ed | 765 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE, |
cfd79482 | 766 | ICMP_CODE_NET_UNREACHABLE, icmp_noinfo); |
70dc107b | 767 | BUF_FREE(buf); |
2fe58dfd | 768 | } |
469fd1d9 SE |
769 | } else { |
770 | if (!allow_route && | |
d3fe100d | 771 | !(st->routes[best_match]->options&OPT_ALLOWROUTE)) { |
469fd1d9 SE |
772 | string_t s,d; |
773 | s=ipaddr_to_string(source); | |
774 | d=ipaddr_to_string(dest); | |
775 | /* We have a usable route but aren't allowed to use it. | |
776 | Generate ICMP destination unreachable: communication | |
777 | with destination network administratively prohibited */ | |
778 | Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n", | |
779 | st->name,s,d); | |
469fd1d9 | 780 | |
ab62c3ed | 781 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE, |
cfd79482 | 782 | ICMP_CODE_NET_PROHIBITED, icmp_noinfo); |
469fd1d9 | 783 | BUF_FREE(buf); |
469fd1d9 | 784 | } else { |
ea7ec970 | 785 | if (best_quality>0) { |
7b6abafa IJ |
786 | netlink_client_deliver(st,st->routes[best_match], |
787 | source,dest,buf); | |
ea7ec970 SE |
788 | BUF_ASSERT_FREE(buf); |
789 | } else { | |
790 | /* Generate ICMP destination unreachable */ | |
ab62c3ed | 791 | netlink_icmp_simple(st,sender,buf, |
cfd79482 IJ |
792 | ICMP_TYPE_UNREACHABLE, |
793 | ICMP_CODE_NET_UNREACHABLE, | |
794 | icmp_noinfo); | |
ea7ec970 SE |
795 | BUF_FREE(buf); |
796 | } | |
469fd1d9 | 797 | } |
2fe58dfd | 798 | } |
70dc107b | 799 | BUF_ASSERT_FREE(buf); |
4efd681a SE |
800 | } |
801 | ||
70dc107b | 802 | static void netlink_packet_forward(struct netlink *st, |
f2b711bd | 803 | struct netlink_client *sender, |
70dc107b | 804 | struct buffer_if *buf) |
4efd681a | 805 | { |
975820aa | 806 | if (buf->size < (int)sizeof(struct iphdr)) return; |
4efd681a SE |
807 | struct iphdr *iph=(struct iphdr *)buf->start; |
808 | ||
809 | BUF_ASSERT_USED(buf); | |
810 | ||
811 | /* Packet has already been checked */ | |
812 | if (iph->ttl<=1) { | |
813 | /* Generate ICMP time exceeded */ | |
ab62c3ed | 814 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED, |
cfd79482 | 815 | ICMP_CODE_TTL_EXCEEDED,icmp_noinfo); |
4efd681a SE |
816 | BUF_FREE(buf); |
817 | return; | |
818 | } | |
819 | iph->ttl--; | |
820 | iph->check=0; | |
821 | iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl); | |
822 | ||
f2b711bd | 823 | netlink_packet_deliver(st,sender,buf); |
4efd681a SE |
824 | BUF_ASSERT_FREE(buf); |
825 | } | |
826 | ||
9d3a4132 | 827 | /* Deal with packets addressed explicitly to us */ |
70dc107b | 828 | static void netlink_packet_local(struct netlink *st, |
f2b711bd | 829 | struct netlink_client *sender, |
70dc107b | 830 | struct buffer_if *buf) |
4efd681a SE |
831 | { |
832 | struct icmphdr *h; | |
833 | ||
469fd1d9 SE |
834 | st->localcount++; |
835 | ||
975820aa IJ |
836 | if (buf->size < (int)sizeof(struct icmphdr)) { |
837 | Message(M_WARNING,"%s: short packet addressed to secnet; " | |
838 | "ignoring it\n",st->name); | |
839 | BUF_FREE(buf); | |
840 | return; | |
841 | } | |
4efd681a SE |
842 | h=(struct icmphdr *)buf->start; |
843 | ||
6e3fd952 IJ |
844 | unsigned fraginfo = ntohs(h->iph.frag); |
845 | if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) { | |
846 | if (!(fraginfo & IPHDR_FRAG_OFF)) | |
847 | /* report only for first fragment */ | |
848 | Message(M_WARNING,"%s: fragmented packet addressed to secnet; " | |
849 | "ignoring it\n",st->name); | |
4efd681a SE |
850 | BUF_FREE(buf); |
851 | return; | |
852 | } | |
853 | ||
854 | if (h->iph.protocol==1) { | |
855 | /* It's ICMP */ | |
ff05a229 | 856 | if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) { |
4efd681a SE |
857 | /* ICMP echo-request. Special case: we re-use the buffer |
858 | to construct the reply. */ | |
ff05a229 | 859 | h->type=ICMP_TYPE_ECHO_REPLY; |
4efd681a SE |
860 | h->iph.daddr=h->iph.saddr; |
861 | h->iph.saddr=htonl(st->secnet_address); | |
ff05a229 | 862 | h->iph.ttl=255; |
4efd681a SE |
863 | h->iph.check=0; |
864 | h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl); | |
865 | netlink_icmp_csum(h); | |
70dc107b | 866 | netlink_packet_deliver(st,NULL,buf); |
4efd681a SE |
867 | return; |
868 | } | |
869 | Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name); | |
870 | } else { | |
871 | /* Send ICMP protocol unreachable */ | |
ab62c3ed | 872 | netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE, |
cfd79482 | 873 | ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo); |
4efd681a SE |
874 | BUF_FREE(buf); |
875 | return; | |
876 | } | |
877 | ||
878 | BUF_FREE(buf); | |
879 | } | |
880 | ||
9d3a4132 SE |
881 | /* If cid==NULL packet is from host, otherwise cid specifies which tunnel |
882 | it came from. */ | |
f2b711bd | 883 | static void netlink_incoming(struct netlink *st, struct netlink_client *sender, |
469fd1d9 | 884 | struct buffer_if *buf) |
4efd681a | 885 | { |
4efd681a SE |
886 | uint32_t source,dest; |
887 | struct iphdr *iph; | |
d714da29 | 888 | char errmsgbuf[50]; |
f2b711bd | 889 | const char *sourcedesc=sender?sender->name:"host"; |
4efd681a SE |
890 | |
891 | BUF_ASSERT_USED(buf); | |
a28d65a5 | 892 | |
d714da29 IJ |
893 | if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) { |
894 | Message(M_WARNING,"%s: bad IP packet from %s: %s\n", | |
a28d65a5 | 895 | st->name,sourcedesc, |
d714da29 | 896 | errmsgbuf); |
4efd681a SE |
897 | BUF_FREE(buf); |
898 | return; | |
899 | } | |
e8b1adac | 900 | assert(buf->size >= (int)sizeof(struct iphdr)); |
4efd681a SE |
901 | iph=(struct iphdr *)buf->start; |
902 | ||
903 | source=ntohl(iph->saddr); | |
904 | dest=ntohl(iph->daddr); | |
905 | ||
d3fe100d SE |
906 | /* Check source. If we don't like the source, there's no point |
907 | generating ICMP because we won't know how to get it to the | |
908 | source of the packet. */ | |
f2b711bd | 909 | if (sender) { |
c6f79b17 SE |
910 | /* Check that the packet source is appropriate for the tunnel |
911 | it came down */ | |
f2b711bd | 912 | if (!ipset_contains_addr(sender->networks,source)) { |
9d3a4132 SE |
913 | string_t s,d; |
914 | s=ipaddr_to_string(source); | |
915 | d=ipaddr_to_string(dest); | |
916 | Message(M_WARNING,"%s: packet from tunnel %s with bad " | |
f2b711bd | 917 | "source address (s=%s,d=%s)\n",st->name,sender->name,s,d); |
9d3a4132 SE |
918 | BUF_FREE(buf); |
919 | return; | |
920 | } | |
921 | } else { | |
c6f79b17 SE |
922 | /* Check that the packet originates in our configured local |
923 | network, and hasn't been forwarded from elsewhere or | |
924 | generated with the wrong source address */ | |
794f2398 | 925 | if (!ipset_contains_addr(st->networks,source)) { |
9d3a4132 SE |
926 | string_t s,d; |
927 | s=ipaddr_to_string(source); | |
928 | d=ipaddr_to_string(dest); | |
929 | Message(M_WARNING,"%s: outgoing packet with bad source address " | |
930 | "(s=%s,d=%s)\n",st->name,s,d); | |
9d3a4132 SE |
931 | BUF_FREE(buf); |
932 | return; | |
933 | } | |
4efd681a | 934 | } |
c6f79b17 | 935 | |
794f2398 SE |
936 | /* If this is a point-to-point device we don't examine the |
937 | destination address at all; we blindly send it down our | |
938 | one-and-only registered tunnel, or to the host, depending on | |
d3fe100d SE |
939 | where it came from. It's up to external software to check |
940 | address validity and generate ICMP, etc. */ | |
c6f79b17 | 941 | if (st->ptp) { |
f2b711bd | 942 | if (sender) { |
ab62c3ed | 943 | netlink_host_deliver(st,sender,source,dest,buf); |
c6f79b17 | 944 | } else { |
7b6abafa | 945 | netlink_client_deliver(st,st->clients,source,dest,buf); |
c6f79b17 SE |
946 | } |
947 | BUF_ASSERT_FREE(buf); | |
948 | return; | |
949 | } | |
950 | ||
d3fe100d SE |
951 | /* st->secnet_address needs checking before matching destination |
952 | addresses */ | |
2fe58dfd | 953 | if (dest==st->secnet_address) { |
f2b711bd | 954 | netlink_packet_local(st,sender,buf); |
4efd681a | 955 | BUF_ASSERT_FREE(buf); |
2fe58dfd SE |
956 | return; |
957 | } | |
f2b711bd | 958 | netlink_packet_forward(st,sender,buf); |
4efd681a SE |
959 | BUF_ASSERT_FREE(buf); |
960 | } | |
961 | ||
469fd1d9 SE |
962 | static void netlink_inst_incoming(void *sst, struct buffer_if *buf) |
963 | { | |
964 | struct netlink_client *c=sst; | |
965 | struct netlink *st=c->nst; | |
966 | ||
967 | netlink_incoming(st,c,buf); | |
968 | } | |
969 | ||
970 | static void netlink_dev_incoming(void *sst, struct buffer_if *buf) | |
971 | { | |
972 | struct netlink *st=sst; | |
973 | ||
974 | netlink_incoming(st,NULL,buf); | |
975 | } | |
976 | ||
d3fe100d | 977 | static void netlink_set_quality(void *sst, uint32_t quality) |
4efd681a | 978 | { |
d3fe100d SE |
979 | struct netlink_client *c=sst; |
980 | struct netlink *st=c->nst; | |
4efd681a | 981 | |
d3fe100d SE |
982 | c->link_quality=quality; |
983 | c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True; | |
984 | if (c->options&OPT_SOFTROUTE) { | |
985 | st->set_routes(st->dst,c); | |
4efd681a | 986 | } |
4efd681a SE |
987 | } |
988 | ||
d3fe100d SE |
989 | static void netlink_output_subnets(struct netlink *st, uint32_t loglevel, |
990 | struct subnet_list *snets) | |
4efd681a | 991 | { |
1caa23ff | 992 | int32_t i; |
d3fe100d | 993 | string_t net; |
4efd681a | 994 | |
d3fe100d SE |
995 | for (i=0; i<snets->entries; i++) { |
996 | net=subnet_to_string(snets->list[i]); | |
997 | Message(loglevel,"%s ",net); | |
9d3a4132 | 998 | } |
4efd681a SE |
999 | } |
1000 | ||
042a8da9 | 1001 | static void netlink_dump_routes(struct netlink *st, bool_t requested) |
9d3a4132 SE |
1002 | { |
1003 | int i; | |
1004 | string_t net; | |
042a8da9 | 1005 | uint32_t c=M_INFO; |
9d3a4132 | 1006 | |
042a8da9 | 1007 | if (requested) c=M_WARNING; |
469fd1d9 SE |
1008 | if (st->ptp) { |
1009 | net=ipaddr_to_string(st->secnet_address); | |
34d3bf4c | 1010 | Message(c,"%s: point-to-point (remote end is %s); routes: ", |
469fd1d9 | 1011 | st->name, net); |
d3fe100d | 1012 | netlink_output_subnets(st,c,st->clients->subnets); |
469fd1d9 SE |
1013 | Message(c,"\n"); |
1014 | } else { | |
1015 | Message(c,"%s: routing table:\n",st->name); | |
d3fe100d SE |
1016 | for (i=0; i<st->n_clients; i++) { |
1017 | netlink_output_subnets(st,c,st->routes[i]->subnets); | |
ff05a229 | 1018 | Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s," |
ea7ec970 | 1019 | "quality %d,use %d,pri %lu)\n", |
d3fe100d | 1020 | st->routes[i]->name, |
ff05a229 SE |
1021 | st->routes[i]->up?"up":"down", |
1022 | st->routes[i]->mtu, | |
d3fe100d SE |
1023 | st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard", |
1024 | st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted", | |
d3fe100d | 1025 | st->routes[i]->link_quality, |
ea7ec970 SE |
1026 | st->routes[i]->outcount, |
1027 | (unsigned long)st->routes[i]->priority); | |
469fd1d9 SE |
1028 | } |
1029 | net=ipaddr_to_string(st->secnet_address); | |
1030 | Message(c,"%s/32 -> netlink \"%s\" (use %d)\n", | |
1031 | net,st->name,st->localcount); | |
794f2398 SE |
1032 | for (i=0; i<st->subnets->entries; i++) { |
1033 | net=subnet_to_string(st->subnets->list[i]); | |
1034 | Message(c,"%s ",net); | |
469fd1d9 | 1035 | } |
794f2398 SE |
1036 | if (i>0) |
1037 | Message(c,"-> host (use %d)\n",st->outcount); | |
9d3a4132 SE |
1038 | } |
1039 | } | |
1040 | ||
d3fe100d SE |
1041 | /* ap is a pointer to a member of the routes array */ |
1042 | static int netlink_compare_client_priority(const void *ap, const void *bp) | |
70dc107b | 1043 | { |
d3fe100d SE |
1044 | const struct netlink_client *const*a=ap; |
1045 | const struct netlink_client *const*b=bp; | |
70dc107b | 1046 | |
d3fe100d SE |
1047 | if ((*a)->priority==(*b)->priority) return 0; |
1048 | if ((*a)->priority<(*b)->priority) return 1; | |
70dc107b SE |
1049 | return -1; |
1050 | } | |
1051 | ||
1052 | static void netlink_phase_hook(void *sst, uint32_t new_phase) | |
1053 | { | |
1054 | struct netlink *st=sst; | |
1055 | struct netlink_client *c; | |
1caa23ff | 1056 | int32_t i; |
70dc107b SE |
1057 | |
1058 | /* All the networks serviced by the various tunnels should now | |
1059 | * have been registered. We build a routing table by sorting the | |
d3fe100d | 1060 | * clients by priority. */ |
b4ececfc | 1061 | NEW_ARY(st->routes,st->n_clients); |
70dc107b SE |
1062 | /* Fill the table */ |
1063 | i=0; | |
59230b9b IJ |
1064 | for (c=st->clients; c; c=c->next) { |
1065 | assert(i<INT_MAX); | |
d3fe100d | 1066 | st->routes[i++]=c; |
59230b9b | 1067 | } |
d3fe100d SE |
1068 | /* Sort the table in descending order of priority */ |
1069 | qsort(st->routes,st->n_clients,sizeof(*st->routes), | |
1070 | netlink_compare_client_priority); | |
9d3a4132 | 1071 | |
042a8da9 SE |
1072 | netlink_dump_routes(st,False); |
1073 | } | |
1074 | ||
1075 | static void netlink_signal_handler(void *sst, int signum) | |
1076 | { | |
1077 | struct netlink *st=sst; | |
1078 | Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name); | |
1079 | netlink_dump_routes(st,True); | |
70dc107b SE |
1080 | } |
1081 | ||
1caa23ff | 1082 | static void netlink_inst_set_mtu(void *sst, int32_t new_mtu) |
d3fe100d SE |
1083 | { |
1084 | struct netlink_client *c=sst; | |
1085 | ||
1086 | c->mtu=new_mtu; | |
1087 | } | |
1088 | ||
469fd1d9 | 1089 | static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver, |
1c085348 | 1090 | void *dst, uint32_t *localmtu_r) |
469fd1d9 SE |
1091 | { |
1092 | struct netlink_client *c=sst; | |
1c085348 | 1093 | struct netlink *st=c->nst; |
469fd1d9 | 1094 | |
469fd1d9 SE |
1095 | c->deliver=deliver; |
1096 | c->dst=dst; | |
1c085348 IJ |
1097 | |
1098 | if (localmtu_r) | |
1099 | *localmtu_r=st->mtu; | |
469fd1d9 SE |
1100 | } |
1101 | ||
1102 | static struct flagstr netlink_option_table[]={ | |
1103 | { "soft", OPT_SOFTROUTE }, | |
1104 | { "allow-route", OPT_ALLOWROUTE }, | |
1105 | { NULL, 0} | |
1106 | }; | |
1107 | /* This is the routine that gets called when the closure that's | |
1108 | returned by an invocation of a netlink device closure (eg. tun, | |
1109 | userv-ipif) is invoked. It's used to create routes and pass in | |
1110 | information about them; the closure it returns is used by site | |
1111 | code. */ | |
1112 | static closure_t *netlink_inst_create(struct netlink *st, | |
1113 | struct cloc loc, dict_t *dict) | |
1114 | { | |
1115 | struct netlink_client *c; | |
1116 | string_t name; | |
794f2398 | 1117 | struct ipset *networks; |
1caa23ff IJ |
1118 | uint32_t options,priority; |
1119 | int32_t mtu; | |
794f2398 | 1120 | list_t *l; |
469fd1d9 SE |
1121 | |
1122 | name=dict_read_string(dict, "name", True, st->name, loc); | |
1123 | ||
794f2398 SE |
1124 | l=dict_lookup(dict,"routes"); |
1125 | if (!l) | |
1126 | cfgfatal(loc,st->name,"required parameter \"routes\" not found\n"); | |
1127 | networks=string_list_to_ipset(l,loc,st->name,"routes"); | |
469fd1d9 SE |
1128 | options=string_list_to_word(dict_lookup(dict,"options"), |
1129 | netlink_option_table,st->name); | |
1130 | ||
d3fe100d SE |
1131 | priority=dict_read_number(dict,"priority",False,st->name,loc,0); |
1132 | mtu=dict_read_number(dict,"mtu",False,st->name,loc,0); | |
1133 | ||
1134 | if ((options&OPT_SOFTROUTE) && !st->set_routes) { | |
469fd1d9 SE |
1135 | cfgfatal(loc,st->name,"this netlink device does not support " |
1136 | "soft routes.\n"); | |
1137 | return NULL; | |
1138 | } | |
1139 | ||
1140 | if (options&OPT_SOFTROUTE) { | |
1141 | /* XXX for now we assume that soft routes require root privilege; | |
1142 | this may not always be true. The device driver can tell us. */ | |
1143 | require_root_privileges=True; | |
1144 | require_root_privileges_explanation="netlink: soft routes"; | |
1145 | if (st->ptp) { | |
1146 | cfgfatal(loc,st->name,"point-to-point netlinks do not support " | |
1147 | "soft routes.\n"); | |
1148 | return NULL; | |
1149 | } | |
1150 | } | |
1151 | ||
794f2398 SE |
1152 | /* Check that nets are a subset of st->remote_networks; |
1153 | refuse to register if they are not. */ | |
1154 | if (!ipset_is_subset(st->remote_networks,networks)) { | |
1155 | cfgfatal(loc,st->name,"routes are not allowed\n"); | |
469fd1d9 SE |
1156 | return NULL; |
1157 | } | |
1158 | ||
b7886fd4 | 1159 | NEW(c); |
469fd1d9 SE |
1160 | c->cl.description=name; |
1161 | c->cl.type=CL_NETLINK; | |
1162 | c->cl.apply=NULL; | |
1163 | c->cl.interface=&c->ops; | |
1164 | c->ops.st=c; | |
1165 | c->ops.reg=netlink_inst_reg; | |
1166 | c->ops.deliver=netlink_inst_incoming; | |
1167 | c->ops.set_quality=netlink_set_quality; | |
d3fe100d | 1168 | c->ops.set_mtu=netlink_inst_set_mtu; |
469fd1d9 SE |
1169 | c->nst=st; |
1170 | ||
1171 | c->networks=networks; | |
794f2398 | 1172 | c->subnets=ipset_to_subnet_list(networks); |
d3fe100d | 1173 | c->priority=priority; |
469fd1d9 SE |
1174 | c->deliver=NULL; |
1175 | c->dst=NULL; | |
1176 | c->name=name; | |
f208b9a9 | 1177 | c->link_quality=LINK_QUALITY_UNUSED; |
d3fe100d SE |
1178 | c->mtu=mtu?mtu:st->mtu; |
1179 | c->options=options; | |
1180 | c->outcount=0; | |
1181 | c->up=False; | |
1182 | c->kup=False; | |
469fd1d9 SE |
1183 | c->next=st->clients; |
1184 | st->clients=c; | |
59230b9b | 1185 | assert(st->n_clients < INT_MAX); |
d3fe100d | 1186 | st->n_clients++; |
469fd1d9 SE |
1187 | |
1188 | return &c->cl; | |
1189 | } | |
1190 | ||
1191 | static list_t *netlink_inst_apply(closure_t *self, struct cloc loc, | |
1192 | dict_t *context, list_t *args) | |
1193 | { | |
1194 | struct netlink *st=self->interface; | |
1195 | ||
1196 | dict_t *dict; | |
1197 | item_t *item; | |
1198 | closure_t *cl; | |
1199 | ||
469fd1d9 SE |
1200 | item=list_elem(args,0); |
1201 | if (!item || item->type!=t_dict) { | |
1202 | cfgfatal(loc,st->name,"must have a dictionary argument\n"); | |
1203 | } | |
1204 | dict=item->data.dict; | |
1205 | ||
1206 | cl=netlink_inst_create(st,loc,dict); | |
1207 | ||
1208 | return new_closure(cl); | |
1209 | } | |
1210 | ||
9d3a4132 SE |
1211 | netlink_deliver_fn *netlink_init(struct netlink *st, |
1212 | void *dst, struct cloc loc, | |
fe5e9cc4 | 1213 | dict_t *dict, cstring_t description, |
d3fe100d | 1214 | netlink_route_fn *set_routes, |
9d3a4132 | 1215 | netlink_deliver_fn *to_host) |
4efd681a | 1216 | { |
c6f79b17 | 1217 | item_t *sa, *ptpa; |
794f2398 | 1218 | list_t *l; |
c6f79b17 | 1219 | |
4efd681a SE |
1220 | st->dst=dst; |
1221 | st->cl.description=description; | |
469fd1d9 SE |
1222 | st->cl.type=CL_PURE; |
1223 | st->cl.apply=netlink_inst_apply; | |
1224 | st->cl.interface=st; | |
4efd681a | 1225 | st->clients=NULL; |
d3fe100d SE |
1226 | st->routes=NULL; |
1227 | st->n_clients=0; | |
1228 | st->set_routes=set_routes; | |
4efd681a SE |
1229 | st->deliver_to_host=to_host; |
1230 | ||
794f2398 | 1231 | st->name=dict_read_string(dict,"name",False,description,loc); |
4efd681a | 1232 | if (!st->name) st->name=description; |
794f2398 SE |
1233 | l=dict_lookup(dict,"networks"); |
1234 | if (l) | |
1235 | st->networks=string_list_to_ipset(l,loc,st->name,"networks"); | |
1236 | else { | |
4f5e39ec SE |
1237 | struct ipset *empty; |
1238 | empty=ipset_new(); | |
1239 | st->networks=ipset_complement(empty); | |
1240 | ipset_free(empty); | |
794f2398 SE |
1241 | } |
1242 | l=dict_lookup(dict,"remote-networks"); | |
1243 | if (l) { | |
1244 | st->remote_networks=string_list_to_ipset(l,loc,st->name, | |
1245 | "remote-networks"); | |
1246 | } else { | |
1247 | struct ipset *empty; | |
1248 | empty=ipset_new(); | |
1249 | st->remote_networks=ipset_complement(empty); | |
1250 | ipset_free(empty); | |
1251 | } | |
091433c6 IJ |
1252 | st->local_address=string_item_to_ipaddr( |
1253 | dict_find_item(dict,"local-address", True, "netlink", loc),"netlink"); | |
794f2398 | 1254 | |
c6f79b17 | 1255 | sa=dict_find_item(dict,"secnet-address",False,"netlink",loc); |
469fd1d9 | 1256 | ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc); |
c6f79b17 SE |
1257 | if (sa && ptpa) { |
1258 | cfgfatal(loc,st->name,"you may not specify secnet-address and " | |
1259 | "ptp-address in the same netlink device\n"); | |
1260 | } | |
1261 | if (!(sa || ptpa)) { | |
1262 | cfgfatal(loc,st->name,"you must specify secnet-address or " | |
1263 | "ptp-address for this netlink device\n"); | |
1264 | } | |
1265 | if (sa) { | |
794f2398 | 1266 | st->secnet_address=string_item_to_ipaddr(sa,"netlink"); |
c6f79b17 SE |
1267 | st->ptp=False; |
1268 | } else { | |
794f2398 | 1269 | st->secnet_address=string_item_to_ipaddr(ptpa,"netlink"); |
c6f79b17 SE |
1270 | st->ptp=True; |
1271 | } | |
d3fe100d SE |
1272 | /* To be strictly correct we could subtract secnet_address from |
1273 | networks here. It shouldn't make any practical difference, | |
794f2398 SE |
1274 | though, and will make the route dump look complicated... */ |
1275 | st->subnets=ipset_to_subnet_list(st->networks); | |
4efd681a | 1276 | st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU); |
f3d69e41 | 1277 | buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu)); |
469fd1d9 SE |
1278 | st->outcount=0; |
1279 | st->localcount=0; | |
70dc107b SE |
1280 | |
1281 | add_hook(PHASE_SETUP,netlink_phase_hook,st); | |
042a8da9 | 1282 | request_signal_notification(SIGUSR1, netlink_signal_handler, st); |
4efd681a | 1283 | |
469fd1d9 SE |
1284 | /* If we're point-to-point then we return a CL_NETLINK directly, |
1285 | rather than a CL_NETLINK_OLD or pure closure (depending on | |
1286 | compatibility). This CL_NETLINK is for our one and only | |
1287 | client. Our cl.apply function is NULL. */ | |
1288 | if (st->ptp) { | |
1289 | closure_t *cl; | |
1290 | cl=netlink_inst_create(st,loc,dict); | |
1291 | st->cl=*cl; | |
1292 | } | |
1293 | return netlink_dev_incoming; | |
2fe58dfd SE |
1294 | } |
1295 | ||
9d3a4132 | 1296 | /* No connection to the kernel at all... */ |
2fe58dfd | 1297 | |
9d3a4132 | 1298 | struct null { |
4efd681a | 1299 | struct netlink nl; |
4efd681a | 1300 | }; |
2fe58dfd | 1301 | |
d3fe100d | 1302 | static bool_t null_set_route(void *sst, struct netlink_client *routes) |
4efd681a | 1303 | { |
9d3a4132 | 1304 | struct null *st=sst; |
d3fe100d SE |
1305 | |
1306 | if (routes->up!=routes->kup) { | |
1307 | Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n", | |
1308 | st->nl.name,routes->name, | |
1309 | routes->up?"up":"down"); | |
1310 | routes->kup=routes->up; | |
9d3a4132 | 1311 | return True; |
2fe58dfd | 1312 | } |
9d3a4132 | 1313 | return False; |
2fe58dfd | 1314 | } |
9d3a4132 | 1315 | |
469fd1d9 | 1316 | static void null_deliver(void *sst, struct buffer_if *buf) |
2fe58dfd SE |
1317 | { |
1318 | return; | |
1319 | } | |
1320 | ||
1321 | static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context, | |
1322 | list_t *args) | |
1323 | { | |
1324 | struct null *st; | |
4efd681a SE |
1325 | item_t *item; |
1326 | dict_t *dict; | |
2fe58dfd | 1327 | |
b7886fd4 | 1328 | NEW(st); |
2fe58dfd | 1329 | |
4efd681a SE |
1330 | item=list_elem(args,0); |
1331 | if (!item || item->type!=t_dict) | |
1332 | cfgfatal(loc,"null-netlink","parameter must be a dictionary\n"); | |
1333 | ||
1334 | dict=item->data.dict; | |
1335 | ||
9d3a4132 SE |
1336 | netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route, |
1337 | null_deliver); | |
4efd681a SE |
1338 | |
1339 | return new_closure(&st->nl.cl); | |
2fe58dfd SE |
1340 | } |
1341 | ||
2fe58dfd SE |
1342 | void netlink_module(dict_t *dict) |
1343 | { | |
4efd681a | 1344 | add_closure(dict,"null-netlink",null_apply); |
2fe58dfd | 1345 | } |