2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
19 #define DEFINE_PLUG_METHOD_MACROS
24 #define ipv4_is_loopback(addr) (inet_netof(addr) == IN_LOOPBACKNET)
27 struct socket_function_table
*fn
;
28 /* the above variable absolutely *must* be the first in this structure */
36 int frozen
; /* this causes readability notifications to be ignored */
37 int frozen_readable
; /* this means we missed at least one readability
38 * notification while we were frozen */
39 int localhost_only
; /* for listening sockets */
42 int oobpending
; /* is there OOB data available to read? */
44 int pending_error
; /* in case send() returns error */
49 * We used to typedef struct Socket_tag *Socket.
51 * Since we have made the networking abstraction slightly more
52 * abstract, Socket no longer means a tcp socket (it could mean
53 * an ssl socket). So now we must use Actual_Socket when we know
54 * we are talking about a tcp socket.
56 typedef struct Socket_tag
*Actual_Socket
;
61 * Which address family this address belongs to. AF_INET for
62 * IPv4; AF_INET6 for IPv6; AF_UNSPEC indicates that name
63 * resolution has not been done and a simple host name is held
64 * in this SockAddr structure.
67 unsigned long address
; /* Address IPv4 style. */
69 struct addrinfo
*ai
; /* Address IPv6 style. */
71 char hostname
[512]; /* Store an unresolved host name. */
74 static tree234
*sktree
;
76 static int cmpfortree(void *av
, void *bv
)
78 Actual_Socket a
= (Actual_Socket
) av
, b
= (Actual_Socket
) bv
;
79 int as
= a
->s
, bs
= b
->s
;
87 static int cmpforsearch(void *av
, void *bv
)
89 Actual_Socket b
= (Actual_Socket
) bv
;
90 int as
= (int) av
, bs
= b
->s
;
100 sktree
= newtree234(cmpfortree
);
103 void sk_cleanup(void)
109 for (i
= 0; (s
= index234(sktree
, i
)) != NULL
; i
++) {
115 char *error_string(int error
)
117 return strerror(error
);
120 SockAddr
sk_namelookup(const char *host
, char **canonicalname
)
122 SockAddr ret
= smalloc(sizeof(struct SockAddr_tag
));
124 struct hostent
*h
= NULL
;
127 /* Clear the structure and default to IPv4. */
128 memset(ret
, 0, sizeof(struct SockAddr_tag
));
129 ret
->family
= 0; /* We set this one when we have resolved the host. */
133 if ((a
= inet_addr(host
)) == (unsigned long) INADDR_NONE
) {
135 if (getaddrinfo(host
, NULL
, NULL
, &ret
->ai
) == 0) {
136 ret
->family
= ret
->ai
->ai_family
;
141 * Otherwise use the IPv4-only gethostbyname... (NOTE:
142 * we don't use gethostbyname as a fallback!)
144 if (ret
->family
== 0) {
145 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
146 if ( (h
= gethostbyname(host
)) )
147 ret
->family
= AF_INET
;
149 if (ret
->family
== 0) {
150 ret
->error
= (h_errno
== HOST_NOT_FOUND
||
151 h_errno
== NO_DATA
||
152 h_errno
== NO_ADDRESS ?
"Host does not exist" :
153 h_errno
== TRY_AGAIN ?
154 "Temporary name service failure" :
155 "gethostbyname: unknown error");
161 /* If we got an address info use that... */
164 /* Are we in IPv4 fallback mode? */
165 /* We put the IPv4 address into the a variable so we can further-on use the IPv4 code... */
166 if (ret
->family
== AF_INET
)
168 (char *) &((struct sockaddr_in
*) ret
->ai
->
169 ai_addr
)->sin_addr
, sizeof(a
));
171 /* Now let's find that canonicalname... */
172 if (getnameinfo((struct sockaddr
*) ret
->ai
->ai_addr
,
174 AF_INET ?
sizeof(struct sockaddr_in
) :
175 sizeof(struct sockaddr_in6
), realhost
,
176 sizeof(realhost
), NULL
, 0, 0) != 0) {
177 strncpy(realhost
, host
, sizeof(realhost
));
180 /* We used the IPv4-only gethostbyname()... */
184 memcpy(&a
, h
->h_addr
, sizeof(a
));
185 /* This way we are always sure the h->h_name is valid :) */
186 strncpy(realhost
, h
->h_name
, sizeof(realhost
));
190 * This must be a numeric IPv4 address because it caused a
191 * success return from inet_addr.
193 ret
->family
= AF_INET
;
194 strncpy(realhost
, host
, sizeof(realhost
));
196 ret
->address
= ntohl(a
);
197 realhost
[lenof(realhost
)-1] = '\0';
198 *canonicalname
= smalloc(1+strlen(realhost
));
199 strcpy(*canonicalname
, realhost
);
203 SockAddr
sk_nonamelookup(const char *host
)
205 SockAddr ret
= smalloc(sizeof(struct SockAddr_tag
));
207 ret
->family
= AF_UNSPEC
;
208 strncpy(ret
->hostname
, host
, lenof(ret
->hostname
));
209 ret
->hostname
[lenof(ret
->hostname
)-1] = '\0';
213 void sk_getaddr(SockAddr addr
, char *buf
, int buflen
)
216 if (addr
->family
== AF_INET6
) {
217 FIXME
; /* I don't know how to get a text form of an IPv6 address. */
220 if (addr
->family
== AF_INET
) {
222 a
.s_addr
= htonl(addr
->address
);
223 strncpy(buf
, inet_ntoa(a
), buflen
);
224 buf
[buflen
-1] = '\0';
226 assert(addr
->family
== AF_UNSPEC
);
227 strncpy(buf
, addr
->hostname
, buflen
);
228 buf
[buflen
-1] = '\0';
232 int sk_hostname_is_local(char *name
)
234 return !strcmp(name
, "localhost");
237 int sk_address_is_local(SockAddr addr
)
240 if (addr
->family
== AF_INET6
) {
241 FIXME
; /* someone who can compile for IPV6 had better do this bit */
244 if (addr
->family
== AF_INET
) {
246 a
.s_addr
= htonl(addr
->address
);
247 return ipv4_is_loopback(a
);
249 assert(addr
->family
== AF_UNSPEC
);
250 return 0; /* we don't know; assume not */
254 int sk_addrtype(SockAddr addr
)
256 return (addr
->family
== AF_INET ? ADDRTYPE_IPV4
:
258 addr
->family
== AF_INET6 ? ADDRTYPE_IPV6
:
263 void sk_addrcopy(SockAddr addr
, char *buf
)
265 assert(addr
->family
!= AF_UNSPEC
);
267 if (addr
->family
== AF_INET6
) {
268 memcpy(buf
, (char*) addr
->ai
, 16);
271 if (addr
->family
== AF_INET
) {
273 a
.s_addr
= htonl(addr
->address
);
274 memcpy(buf
, (char*) &a
.s_addr
, 4);
278 void sk_addr_free(SockAddr addr
)
283 static Plug
sk_tcp_plug(Socket sock
, Plug p
)
285 Actual_Socket s
= (Actual_Socket
) sock
;
292 static void sk_tcp_flush(Socket s
)
295 * We send data to the socket as soon as we can anyway,
296 * so we don't need to do anything here. :-)
300 static void sk_tcp_close(Socket s
);
301 static int sk_tcp_write(Socket s
, const char *data
, int len
);
302 static int sk_tcp_write_oob(Socket s
, const char *data
, int len
);
303 static void sk_tcp_set_private_ptr(Socket s
, void *ptr
);
304 static void *sk_tcp_get_private_ptr(Socket s
);
305 static void sk_tcp_set_frozen(Socket s
, int is_frozen
);
306 static char *sk_tcp_socket_error(Socket s
);
308 static struct socket_function_table tcp_fn_table
= {
314 sk_tcp_set_private_ptr
,
315 sk_tcp_get_private_ptr
,
320 Socket
sk_register(void *sock
, Plug plug
)
325 * Create Socket structure.
327 ret
= smalloc(sizeof(struct Socket_tag
));
328 ret
->fn
= &tcp_fn_table
;
331 bufchain_init(&ret
->output_data
);
332 ret
->writable
= 1; /* to start with */
333 ret
->sending_oob
= 0;
335 ret
->frozen_readable
= 0;
336 ret
->localhost_only
= 0; /* unused, but best init anyway */
337 ret
->pending_error
= 0;
338 ret
->oobpending
= FALSE
;
344 ret
->error
= error_string(errno
);
355 Socket
sk_new(SockAddr addr
, int port
, int privport
, int oobinline
,
356 int nodelay
, Plug plug
)
360 struct sockaddr_in6 a6
;
362 struct sockaddr_in a
;
368 * Create Socket structure.
370 ret
= smalloc(sizeof(struct Socket_tag
));
371 ret
->fn
= &tcp_fn_table
;
374 bufchain_init(&ret
->output_data
);
375 ret
->connected
= 0; /* to start with */
376 ret
->writable
= 0; /* to start with */
377 ret
->sending_oob
= 0;
379 ret
->frozen_readable
= 0;
380 ret
->localhost_only
= 0; /* unused, but best init anyway */
381 ret
->pending_error
= 0;
382 ret
->oobpending
= FALSE
;
388 assert(addr
->family
!= AF_UNSPEC
);
389 s
= socket(addr
->family
, SOCK_STREAM
, 0);
393 ret
->error
= error_string(errno
);
397 ret
->oobinline
= oobinline
;
400 setsockopt(s
, SOL_SOCKET
, SO_OOBINLINE
, (void *) &b
, sizeof(b
));
405 setsockopt(s
, IPPROTO_TCP
, TCP_NODELAY
, (void *) &b
, sizeof(b
));
409 * Bind to local address.
412 localport
= 1023; /* count from 1023 downwards */
414 localport
= 0; /* just use port 0 (ie kernel picks) */
416 /* Loop round trying to bind */
421 if (addr
->family
== AF_INET6
) {
422 memset(&a6
, 0, sizeof(a6
));
423 a6
.sin6_family
= AF_INET6
;
424 /*a6.sin6_addr = in6addr_any; *//* == 0 */
425 a6
.sin6_port
= htons(localport
);
429 a
.sin_family
= AF_INET
;
430 a
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
431 a
.sin_port
= htons(localport
);
434 retcode
= bind(s
, (addr
->family
== AF_INET6 ?
435 (struct sockaddr
*) &a6
:
436 (struct sockaddr
*) &a
),
438 AF_INET6 ?
sizeof(a6
) : sizeof(a
)));
440 retcode
= bind(s
, (struct sockaddr
*) &a
, sizeof(a
));
447 if (err
!= EADDRINUSE
) /* failed, for a bad reason */
452 break; /* we're only looping once */
455 break; /* we might have got to the end */
459 ret
->error
= error_string(err
);
464 * Connect to remote address.
467 if (addr
->family
== AF_INET6
) {
468 memset(&a
, 0, sizeof(a
));
469 a6
.sin6_family
= AF_INET6
;
470 a6
.sin6_port
= htons((short) port
);
472 ((struct sockaddr_in6
*) addr
->ai
->ai_addr
)->sin6_addr
;
476 a
.sin_family
= AF_INET
;
477 a
.sin_addr
.s_addr
= htonl(addr
->address
);
478 a
.sin_port
= htons((short) port
);
483 ioctl(s
, FIONBIO
, &i
);
488 connect(s
, ((addr
->family
== AF_INET6
) ?
489 (struct sockaddr
*) &a6
: (struct sockaddr
*) &a
),
490 (addr
->family
== AF_INET6
) ?
sizeof(a6
) : sizeof(a
))
492 connect(s
, (struct sockaddr
*) &a
, sizeof(a
))
495 if ( errno
!= EINPROGRESS
) {
496 ret
->error
= error_string(errno
);
501 * If we _don't_ get EWOULDBLOCK, the connect has completed
502 * and we should set the socket as connected and writable.
513 Socket
sk_newlistener(char *srcaddr
, int port
, Plug plug
, int local_host_only
)
517 struct sockaddr_in6 a6
;
519 struct sockaddr_in a
;
526 * Create Socket structure.
528 ret
= smalloc(sizeof(struct Socket_tag
));
529 ret
->fn
= &tcp_fn_table
;
532 bufchain_init(&ret
->output_data
);
533 ret
->writable
= 0; /* to start with */
534 ret
->sending_oob
= 0;
536 ret
->frozen_readable
= 0;
537 ret
->localhost_only
= local_host_only
;
538 ret
->pending_error
= 0;
539 ret
->oobpending
= FALSE
;
545 s
= socket(AF_INET
, SOCK_STREAM
, 0);
549 ret
->error
= error_string(errno
);
555 setsockopt(s
, SOL_SOCKET
, SO_REUSEADDR
, (const char *)&on
, sizeof(on
));
558 if (addr
->family
== AF_INET6
) {
559 memset(&a6
, 0, sizeof(a6
));
560 a6
.sin6_family
= AF_INET6
;
561 /* FIXME: srcaddr is ignored for IPv6, because I (SGT) don't
562 * know how to do it. :-) */
564 a6
.sin6_addr
= in6addr_loopback
;
566 a6
.sin6_addr
= in6addr_any
;
567 a6
.sin6_port
= htons(port
);
572 a
.sin_family
= AF_INET
;
575 * Bind to source address. First try an explicitly
579 a
.sin_addr
.s_addr
= inet_addr(srcaddr
);
580 if (a
.sin_addr
.s_addr
!= INADDR_NONE
) {
581 /* Override localhost_only with specified listen addr. */
582 ret
->localhost_only
= ipv4_is_loopback(a
.sin_addr
);
588 * ... and failing that, go with one of the standard ones.
592 a
.sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
594 a
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
597 a
.sin_port
= htons((short)port
);
600 retcode
= bind(s
, (addr
->family
== AF_INET6 ?
601 (struct sockaddr
*) &a6
:
602 (struct sockaddr
*) &a
),
604 AF_INET6 ?
sizeof(a6
) : sizeof(a
)));
606 retcode
= bind(s
, (struct sockaddr
*) &a
, sizeof(a
));
615 ret
->error
= error_string(err
);
620 if (listen(s
, SOMAXCONN
) < 0) {
622 ret
->error
= error_string(errno
);
631 static void sk_tcp_close(Socket sock
)
633 Actual_Socket s
= (Actual_Socket
) sock
;
640 int sk_getxdmdata(void *sock
, unsigned long *ip
, int *port
)
642 Actual_Socket s
= (Actual_Socket
) sock
;
643 struct sockaddr_in addr
;
647 * We must check that this socket really _is_ an Actual_Socket.
649 if (s
->fn
!= &tcp_fn_table
)
650 return 0; /* failure */
653 * If we ever implement connecting to a local X server through
654 * a Unix socket, we return 0xFFFFFFFF for the IP address and
655 * our current pid for the port. Bizarre, but such is life.
658 addrlen
= sizeof(addr
);
659 if (getsockname(s
->s
, (struct sockaddr
*)&addr
, &addrlen
) < 0 ||
660 addr
.sin_family
!= AF_INET
)
663 *ip
= ntohl(addr
.sin_addr
.s_addr
);
664 *port
= ntohs(addr
.sin_port
);
670 * The function which tries to send on a socket once it's deemed
673 void try_send(Actual_Socket s
)
675 while (s
->sending_oob
|| bufchain_size(&s
->output_data
) > 0) {
681 if (s
->sending_oob
) {
682 urgentflag
= MSG_OOB
;
683 len
= s
->sending_oob
;
687 bufchain_prefix(&s
->output_data
, &data
, &len
);
689 nsent
= send(s
->s
, data
, len
, urgentflag
);
690 noise_ultralight(nsent
);
692 err
= (nsent
< 0 ? errno
: 0);
693 if (err
== EWOULDBLOCK
) {
695 * Perfectly normal: we've sent all we can for the moment.
699 } else if (nsent
== 0 ||
700 err
== ECONNABORTED
|| err
== ECONNRESET
) {
702 * If send() returns CONNABORTED or CONNRESET, we
703 * unfortunately can't just call plug_closing(),
704 * because it's quite likely that we're currently
705 * _in_ a call from the code we'd be calling back
706 * to, so we'd have to make half the SSH code
707 * reentrant. Instead we flag a pending error on
708 * the socket, to be dealt with (by calling
709 * plug_closing()) at some suitable future moment.
711 s
->pending_error
= err
;
714 /* We're inside the Unix frontend here, so we know
715 * that the frontend handle is unnecessary. */
716 logevent(NULL
, error_string(err
));
717 fatalbox("%s", error_string(err
));
720 if (s
->sending_oob
) {
722 memmove(s
->oobdata
, s
->oobdata
+nsent
, len
-nsent
);
723 s
->sending_oob
= len
- nsent
;
728 bufchain_consume(&s
->output_data
, nsent
);
734 static int sk_tcp_write(Socket sock
, const char *buf
, int len
)
736 Actual_Socket s
= (Actual_Socket
) sock
;
739 * Add the data to the buffer list on the socket.
741 bufchain_add(&s
->output_data
, buf
, len
);
744 * Now try sending from the start of the buffer list.
749 return bufchain_size(&s
->output_data
);
752 static int sk_tcp_write_oob(Socket sock
, const char *buf
, int len
)
754 Actual_Socket s
= (Actual_Socket
) sock
;
757 * Replace the buffer list on the socket with the data.
759 bufchain_clear(&s
->output_data
);
760 assert(len
<= sizeof(s
->oobdata
));
761 memcpy(s
->oobdata
, buf
, len
);
762 s
->sending_oob
= len
;
765 * Now try sending from the start of the buffer list.
770 return s
->sending_oob
;
773 int select_result(int fd
, int event
)
777 char buf
[20480]; /* nice big buffer for plenty of speed */
781 /* Find the Socket structure */
782 s
= find234(sktree
, (void *) fd
, cmpforsearch
);
784 return 1; /* boggle */
786 noise_ultralight(event
);
789 case 4: /* exceptional */
792 * On a non-oobinline socket, this indicates that we
793 * can immediately perform an OOB read and get back OOB
794 * data, which we will send to the back end with
795 * type==2 (urgent data).
797 ret
= recv(s
->s
, buf
, sizeof(buf
), MSG_OOB
);
798 noise_ultralight(ret
);
800 char *str
= (ret
== 0 ?
"Internal networking trouble" :
801 error_string(errno
));
802 /* We're inside the Unix frontend here, so we know
803 * that the frontend handle is unnecessary. */
807 return plug_receive(s
->plug
, 2, buf
, ret
);
813 * If we reach here, this is an oobinline socket, which
814 * means we should set s->oobpending and then deal with it
815 * when we get called for the readability event (which
816 * should also occur).
818 s
->oobpending
= TRUE
;
820 case 1: /* readable; also acceptance */
823 * On a listening socket, the readability event means a
824 * connection is ready to be accepted.
826 struct sockaddr_in isa
;
827 int addrlen
= sizeof(struct sockaddr_in
);
828 int t
; /* socket of connection */
830 memset(&isa
, 0, sizeof(struct sockaddr_in
));
832 t
= accept(s
->s
,(struct sockaddr
*)&isa
,&addrlen
);
837 if (s
->localhost_only
&& !ipv4_is_loopback(isa
.sin_addr
)) {
838 close(t
); /* someone let nonlocal through?! */
839 } else if (plug_accepting(s
->plug
, (void*)t
)) {
840 close(t
); /* denied or error */
846 * If we reach here, this is not a listening socket, so
847 * readability really means readability.
850 /* In the case the socket is still frozen, we don't even bother */
852 s
->frozen_readable
= 1;
857 * We have received data on the socket. For an oobinline
858 * socket, this might be data _before_ an urgent pointer,
859 * in which case we send it to the back end with type==1
860 * (data prior to urgent).
862 if (s
->oobinline
&& s
->oobpending
) {
864 if (ioctl(s
->s
, SIOCATMARK
, &atmark
) == 0 && atmark
)
865 s
->oobpending
= FALSE
; /* clear this indicator */
869 ret
= recv(s
->s
, buf
, s
->oobpending ?
1 : sizeof(buf
), 0);
870 noise_ultralight(ret
);
872 if (errno
== EWOULDBLOCK
) {
877 return plug_closing(s
->plug
, error_string(errno
), errno
, 0);
878 } else if (0 == ret
) {
879 return plug_closing(s
->plug
, NULL
, 0, 0);
881 return plug_receive(s
->plug
, atmark ?
0 : 1, buf
, ret
);
884 case 2: /* writable */
887 * select() reports a socket as _writable_ when an
888 * asynchronous connection is completed.
890 s
->connected
= s
->writable
= 1;
893 int bufsize_before
, bufsize_after
;
895 bufsize_before
= s
->sending_oob
+ bufchain_size(&s
->output_data
);
897 bufsize_after
= s
->sending_oob
+ bufchain_size(&s
->output_data
);
898 if (bufsize_after
< bufsize_before
)
899 plug_sent(s
->plug
, bufsize_after
);
908 * Deal with socket errors detected in try_send().
910 void net_pending_errors(void)
916 * This might be a fiddly business, because it's just possible
917 * that handling a pending error on one socket might cause
918 * others to be closed. (I can't think of any reason this might
919 * happen in current SSH implementation, but to maintain
920 * generality of this network layer I'll assume the worst.)
922 * So what we'll do is search the socket list for _one_ socket
923 * with a pending error, and then handle it, and then search
924 * the list again _from the beginning_. Repeat until we make a
925 * pass with no socket errors present. That way we are
926 * protected against the socket list changing under our feet.
930 for (i
= 0; (s
= index234(sktree
, i
)) != NULL
; i
++) {
931 if (s
->pending_error
) {
933 * An error has occurred on this socket. Pass it to the
936 plug_closing(s
->plug
, error_string(s
->pending_error
),
937 s
->pending_error
, 0);
945 * Each socket abstraction contains a `void *' private field in
946 * which the client can keep state.
948 static void sk_tcp_set_private_ptr(Socket sock
, void *ptr
)
950 Actual_Socket s
= (Actual_Socket
) sock
;
951 s
->private_ptr
= ptr
;
954 static void *sk_tcp_get_private_ptr(Socket sock
)
956 Actual_Socket s
= (Actual_Socket
) sock
;
957 return s
->private_ptr
;
961 * Special error values are returned from sk_namelookup and sk_new
962 * if there's a problem. These functions extract an error message,
963 * or return NULL if there's no problem.
965 char *sk_addr_error(SockAddr addr
)
969 static char *sk_tcp_socket_error(Socket sock
)
971 Actual_Socket s
= (Actual_Socket
) sock
;
975 static void sk_tcp_set_frozen(Socket sock
, int is_frozen
)
977 Actual_Socket s
= (Actual_Socket
) sock
;
978 if (s
->frozen
== is_frozen
)
980 s
->frozen
= is_frozen
;
981 if (!is_frozen
&& s
->frozen_readable
) {
983 recv(s
->s
, &c
, 1, MSG_PEEK
);
985 s
->frozen_readable
= 0;
989 * For Unix select()-based frontends: enumerate all sockets
990 * currently active, and state whether we currently wish to receive
991 * select events on them for reading, writing and exceptional
994 static void set_rwx(Actual_Socket s
, int *rwx
)
998 val
|= 2; /* write == connect */
999 if (s
->connected
&& !s
->frozen
)
1000 val
|= 1 | 4; /* read, except */
1001 if (bufchain_size(&s
->output_data
))
1002 val
|= 2; /* write */
1004 val
|= 1; /* read == accept */
1008 int first_socket(int *state
, int *rwx
)
1012 s
= index234(sktree
, (*state
)++);
1015 return s ? s
->s
: -1;
1018 int next_socket(int *state
, int *rwx
)
1020 Actual_Socket s
= index234(sktree
, (*state
)++);
1023 return s ? s
->s
: -1;
1026 int net_service_lookup(char *service
)
1029 se
= getservbyname(service
, NULL
);
1031 return ntohs(se
->s_port
);