2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
19 #define DEFINE_PLUG_METHOD_MACROS
25 struct socket_function_table
*fn
;
26 /* the above variable absolutely *must* be the first in this structure */
34 int frozen
; /* this causes readability notifications to be ignored */
35 int frozen_readable
; /* this means we missed at least one readability
36 * notification while we were frozen */
37 int localhost_only
; /* for listening sockets */
40 int oobpending
; /* is there OOB data available to read? */
42 int pending_error
; /* in case send() returns error */
47 * We used to typedef struct Socket_tag *Socket.
49 * Since we have made the networking abstraction slightly more
50 * abstract, Socket no longer means a tcp socket (it could mean
51 * an ssl socket). So now we must use Actual_Socket when we know
52 * we are talking about a tcp socket.
54 typedef struct Socket_tag
*Actual_Socket
;
58 /* address family this belongs to, AF_INET for IPv4, AF_INET6 for IPv6. */
60 unsigned long address
; /* Address IPv4 style. */
62 struct addrinfo
*ai
; /* Address IPv6 style. */
66 static tree234
*sktree
;
68 static int cmpfortree(void *av
, void *bv
)
70 Actual_Socket a
= (Actual_Socket
) av
, b
= (Actual_Socket
) bv
;
71 int as
= a
->s
, bs
= b
->s
;
79 static int cmpforsearch(void *av
, void *bv
)
81 Actual_Socket b
= (Actual_Socket
) bv
;
82 int as
= (int) av
, bs
= b
->s
;
92 sktree
= newtree234(cmpfortree
);
101 for (i
= 0; (s
= index234(sktree
, i
)) != NULL
; i
++) {
107 char *error_string(int error
)
111 return "Network error: Permission denied";
113 return "Network error: Address already in use";
115 return "Network error: Cannot assign requested address";
118 "Network error: Address family not supported by protocol family";
120 return "Network error: Operation already in progress";
122 return "Network error: Software caused connection abort";
124 return "Network error: Connection refused";
126 return "Network error: Connection reset by peer";
128 return "Network error: Destination address required";
130 return "Network error: Bad address";
132 return "Network error: Host is down";
134 return "Network error: No route to host";
136 return "Network error: Operation now in progress";
138 return "Network error: Interrupted function call";
140 return "Network error: Invalid argument";
142 return "Network error: Socket is already connected";
144 return "Network error: Too many open files";
146 return "Network error: Message too long";
148 return "Network error: Network is down";
150 return "Network error: Network dropped connection on reset";
152 return "Network error: Network is unreachable";
154 return "Network error: No buffer space available";
156 return "Network error: Bad protocol option";
158 return "Network error: Socket is not connected";
160 return "Network error: Socket operation on non-socket";
162 return "Network error: Operation not supported";
164 return "Network error: Protocol family not supported";
165 case EPROTONOSUPPORT
:
166 return "Network error: Protocol not supported";
168 return "Network error: Protocol wrong type for socket";
170 return "Network error: Cannot send after socket shutdown";
171 case ESOCKTNOSUPPORT
:
172 return "Network error: Socket type not supported";
174 return "Network error: Connection timed out";
176 return "Network error: Resource temporarily unavailable";
178 return "Unknown network error";
182 SockAddr
sk_namelookup(char *host
, char **canonicalname
)
184 SockAddr ret
= smalloc(sizeof(struct SockAddr_tag
));
186 struct hostent
*h
= NULL
;
189 /* Clear the structure and default to IPv4. */
190 memset(ret
, 0, sizeof(struct SockAddr_tag
));
191 ret
->family
= 0; /* We set this one when we have resolved the host. */
195 if ((a
= inet_addr(host
)) == (unsigned long) INADDR_NONE
) {
197 if (getaddrinfo(host
, NULL
, NULL
, &ret
->ai
) == 0) {
198 ret
->family
= ret
->ai
->ai_family
;
203 * Otherwise use the IPv4-only gethostbyname... (NOTE:
204 * we don't use gethostbyname as a fallback!)
206 if (ret
->family
== 0) {
207 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
208 if ( (h
= gethostbyname(host
)) )
209 ret
->family
= AF_INET
;
211 if (ret
->family
== 0) {
212 ret
->error
= (h_errno
== HOST_NOT_FOUND
||
213 h_errno
== NO_DATA
||
214 h_errno
== NO_ADDRESS ?
"Host does not exist" :
215 h_errno
== TRY_AGAIN ?
216 "Temporary name service failure" :
217 "gethostbyname: unknown error");
223 /* If we got an address info use that... */
226 /* Are we in IPv4 fallback mode? */
227 /* We put the IPv4 address into the a variable so we can further-on use the IPv4 code... */
228 if (ret
->family
== AF_INET
)
230 (char *) &((struct sockaddr_in
*) ret
->ai
->
231 ai_addr
)->sin_addr
, sizeof(a
));
233 /* Now let's find that canonicalname... */
234 if (getnameinfo((struct sockaddr
*) ret
->ai
->ai_addr
,
236 AF_INET ?
sizeof(struct sockaddr_in
) :
237 sizeof(struct sockaddr_in6
), realhost
,
238 sizeof(realhost
), NULL
, 0, 0) != 0) {
239 strncpy(realhost
, host
, sizeof(realhost
));
242 /* We used the IPv4-only gethostbyname()... */
246 memcpy(&a
, h
->h_addr
, sizeof(a
));
247 /* This way we are always sure the h->h_name is valid :) */
248 strncpy(realhost
, h
->h_name
, sizeof(realhost
));
252 * This must be a numeric IPv4 address because it caused a
253 * success return from inet_addr.
255 ret
->family
= AF_INET
;
256 strncpy(realhost
, host
, sizeof(realhost
));
258 ret
->address
= ntohl(a
);
259 realhost
[lenof(realhost
)-1] = '\0';
260 *canonicalname
= smalloc(1+strlen(realhost
));
261 strcpy(*canonicalname
, realhost
);
265 void sk_getaddr(SockAddr addr
, char *buf
, int buflen
)
268 if (addr
->family
== AF_INET
) {
271 a
.s_addr
= htonl(addr
->address
);
272 strncpy(buf
, inet_ntoa(a
), buflen
);
275 FIXME
; /* I don't know how to get a text form of an IPv6 address. */
280 int sk_addrtype(SockAddr addr
)
282 return (addr
->family
== AF_INET ? ADDRTYPE_IPV4
: ADDRTYPE_IPV6
);
285 void sk_addrcopy(SockAddr addr
, char *buf
)
288 if (addr
->family
== AF_INET
) {
291 a
.s_addr
= htonl(addr
->address
);
292 memcpy(buf
, (char*) &a
.s_addr
, 4);
295 memcpy(buf
, (char*) addr
->ai
, 16);
300 void sk_addr_free(SockAddr addr
)
305 static Plug
sk_tcp_plug(Socket sock
, Plug p
)
307 Actual_Socket s
= (Actual_Socket
) sock
;
314 static void sk_tcp_flush(Socket s
)
317 * We send data to the socket as soon as we can anyway,
318 * so we don't need to do anything here. :-)
322 static void sk_tcp_close(Socket s
);
323 static int sk_tcp_write(Socket s
, char *data
, int len
);
324 static int sk_tcp_write_oob(Socket s
, char *data
, int len
);
325 static void sk_tcp_set_private_ptr(Socket s
, void *ptr
);
326 static void *sk_tcp_get_private_ptr(Socket s
);
327 static void sk_tcp_set_frozen(Socket s
, int is_frozen
);
328 static char *sk_tcp_socket_error(Socket s
);
330 Socket
sk_register(void *sock
, Plug plug
)
332 static struct socket_function_table fn_table
= {
338 sk_tcp_set_private_ptr
,
339 sk_tcp_get_private_ptr
,
347 * Create Socket structure.
349 ret
= smalloc(sizeof(struct Socket_tag
));
353 bufchain_init(&ret
->output_data
);
354 ret
->writable
= 1; /* to start with */
355 ret
->sending_oob
= 0;
357 ret
->frozen_readable
= 0;
358 ret
->localhost_only
= 0; /* unused, but best init anyway */
359 ret
->pending_error
= 0;
360 ret
->oobpending
= FALSE
;
366 ret
->error
= error_string(errno
);
377 Socket
sk_new(SockAddr addr
, int port
, int privport
, int oobinline
,
378 int nodelay
, Plug plug
)
380 static struct socket_function_table fn_table
= {
386 sk_tcp_set_private_ptr
,
387 sk_tcp_get_private_ptr
,
394 struct sockaddr_in6 a6
;
396 struct sockaddr_in a
;
402 * Create Socket structure.
404 ret
= smalloc(sizeof(struct Socket_tag
));
408 bufchain_init(&ret
->output_data
);
409 ret
->connected
= 0; /* to start with */
410 ret
->writable
= 0; /* to start with */
411 ret
->sending_oob
= 0;
413 ret
->frozen_readable
= 0;
414 ret
->localhost_only
= 0; /* unused, but best init anyway */
415 ret
->pending_error
= 0;
416 ret
->oobpending
= FALSE
;
422 s
= socket(addr
->family
, SOCK_STREAM
, 0);
426 ret
->error
= error_string(errno
);
430 ret
->oobinline
= oobinline
;
433 setsockopt(s
, SOL_SOCKET
, SO_OOBINLINE
, (void *) &b
, sizeof(b
));
438 setsockopt(s
, IPPROTO_TCP
, TCP_NODELAY
, (void *) &b
, sizeof(b
));
442 * Bind to local address.
445 localport
= 1023; /* count from 1023 downwards */
447 localport
= 0; /* just use port 0 (ie kernel picks) */
449 /* Loop round trying to bind */
454 if (addr
->family
== AF_INET6
) {
455 memset(&a6
, 0, sizeof(a6
));
456 a6
.sin6_family
= AF_INET6
;
457 /*a6.sin6_addr = in6addr_any; *//* == 0 */
458 a6
.sin6_port
= htons(localport
);
462 a
.sin_family
= AF_INET
;
463 a
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
464 a
.sin_port
= htons(localport
);
467 retcode
= bind(s
, (addr
->family
== AF_INET6 ?
468 (struct sockaddr
*) &a6
:
469 (struct sockaddr
*) &a
),
471 AF_INET6 ?
sizeof(a6
) : sizeof(a
)));
473 retcode
= bind(s
, (struct sockaddr
*) &a
, sizeof(a
));
480 if (err
!= EADDRINUSE
) /* failed, for a bad reason */
485 break; /* we're only looping once */
488 break; /* we might have got to the end */
492 ret
->error
= error_string(err
);
497 * Connect to remote address.
500 if (addr
->family
== AF_INET6
) {
501 memset(&a
, 0, sizeof(a
));
502 a6
.sin6_family
= AF_INET6
;
503 a6
.sin6_port
= htons((short) port
);
505 ((struct sockaddr_in6
*) addr
->ai
->ai_addr
)->sin6_addr
;
509 a
.sin_family
= AF_INET
;
510 a
.sin_addr
.s_addr
= htonl(addr
->address
);
511 a
.sin_port
= htons((short) port
);
516 connect(s
, ((addr
->family
== AF_INET6
) ?
517 (struct sockaddr
*) &a6
: (struct sockaddr
*) &a
),
518 (addr
->family
== AF_INET6
) ?
sizeof(a6
) : sizeof(a
))
520 connect(s
, (struct sockaddr
*) &a
, sizeof(a
))
524 * FIXME: We are prepared to receive EWOULDBLOCK here,
525 * because we might want the connection to be made
526 * asynchronously; but how do we actually arrange this in
529 if ( errno
!= EWOULDBLOCK
) {
530 ret
->error
= error_string(errno
);
535 * If we _don't_ get EWOULDBLOCK, the connect has completed
536 * and we should set the socket as connected and writable.
547 Socket
sk_newlistener(int port
, Plug plug
, int local_host_only
)
549 static struct socket_function_table fn_table
= {
555 sk_tcp_set_private_ptr
,
556 sk_tcp_get_private_ptr
,
563 struct sockaddr_in6 a6
;
565 struct sockaddr_in a
;
572 * Create Socket structure.
574 ret
= smalloc(sizeof(struct Socket_tag
));
578 bufchain_init(&ret
->output_data
);
579 ret
->writable
= 0; /* to start with */
580 ret
->sending_oob
= 0;
582 ret
->frozen_readable
= 0;
583 ret
->localhost_only
= local_host_only
;
584 ret
->pending_error
= 0;
585 ret
->oobpending
= FALSE
;
591 s
= socket(AF_INET
, SOCK_STREAM
, 0);
595 ret
->error
= error_string(errno
);
601 setsockopt(s
, SOL_SOCKET
, SO_REUSEADDR
, (const char *)&on
, sizeof(on
));
604 if (addr
->family
== AF_INET6
) {
605 memset(&a6
, 0, sizeof(a6
));
606 a6
.sin6_family
= AF_INET6
;
608 a6
.sin6_addr
= in6addr_loopback
;
610 a6
.sin6_addr
= in6addr_any
;
611 a6
.sin6_port
= htons(port
);
615 a
.sin_family
= AF_INET
;
617 a
.sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
619 a
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
620 a
.sin_port
= htons((short)port
);
623 retcode
= bind(s
, (addr
->family
== AF_INET6 ?
624 (struct sockaddr
*) &a6
:
625 (struct sockaddr
*) &a
),
627 AF_INET6 ?
sizeof(a6
) : sizeof(a
)));
629 retcode
= bind(s
, (struct sockaddr
*) &a
, sizeof(a
));
638 ret
->error
= error_string(err
);
643 if (listen(s
, SOMAXCONN
) < 0) {
645 ret
->error
= error_string(errno
);
654 static void sk_tcp_close(Socket sock
)
656 Actual_Socket s
= (Actual_Socket
) sock
;
664 * The function which tries to send on a socket once it's deemed
667 void try_send(Actual_Socket s
)
669 while (s
->sending_oob
|| bufchain_size(&s
->output_data
) > 0) {
675 if (s
->sending_oob
) {
676 urgentflag
= MSG_OOB
;
677 len
= s
->sending_oob
;
681 bufchain_prefix(&s
->output_data
, &data
, &len
);
683 nsent
= send(s
->s
, data
, len
, urgentflag
);
684 noise_ultralight(nsent
);
686 err
= (nsent
< 0 ? errno
: 0);
687 if (err
== EWOULDBLOCK
) {
689 * Perfectly normal: we've sent all we can for the moment.
693 } else if (nsent
== 0 ||
694 err
== ECONNABORTED
|| err
== ECONNRESET
) {
696 * If send() returns CONNABORTED or CONNRESET, we
697 * unfortunately can't just call plug_closing(),
698 * because it's quite likely that we're currently
699 * _in_ a call from the code we'd be calling back
700 * to, so we'd have to make half the SSH code
701 * reentrant. Instead we flag a pending error on
702 * the socket, to be dealt with (by calling
703 * plug_closing()) at some suitable future moment.
705 s
->pending_error
= err
;
708 /* We're inside the Unix frontend here, so we know
709 * that the frontend handle is unnecessary. */
710 logevent(NULL
, error_string(err
));
711 fatalbox("%s", error_string(err
));
714 if (s
->sending_oob
) {
716 memmove(s
->oobdata
, s
->oobdata
+nsent
, len
-nsent
);
717 s
->sending_oob
= len
- nsent
;
722 bufchain_consume(&s
->output_data
, nsent
);
728 static int sk_tcp_write(Socket sock
, char *buf
, int len
)
730 Actual_Socket s
= (Actual_Socket
) sock
;
733 * Add the data to the buffer list on the socket.
735 bufchain_add(&s
->output_data
, buf
, len
);
738 * Now try sending from the start of the buffer list.
743 return bufchain_size(&s
->output_data
);
746 static int sk_tcp_write_oob(Socket sock
, char *buf
, int len
)
748 Actual_Socket s
= (Actual_Socket
) sock
;
751 * Replace the buffer list on the socket with the data.
753 bufchain_clear(&s
->output_data
);
754 assert(len
<= sizeof(s
->oobdata
));
755 memcpy(s
->oobdata
, buf
, len
);
756 s
->sending_oob
= len
;
759 * Now try sending from the start of the buffer list.
764 return s
->sending_oob
;
767 int select_result(int fd
, int event
)
771 char buf
[20480]; /* nice big buffer for plenty of speed */
775 /* Find the Socket structure */
776 s
= find234(sktree
, (void *) fd
, cmpforsearch
);
778 return 1; /* boggle */
780 noise_ultralight(event
);
783 #ifdef FIXME_NONBLOCKING_CONNECTIONS
784 case FIXME
: /* connected */
785 s
->connected
= s
->writable
= 1;
788 case 4: /* exceptional */
791 * On a non-oobinline socket, this indicates that we
792 * can immediately perform an OOB read and get back OOB
793 * data, which we will send to the back end with
794 * type==2 (urgent data).
796 ret
= recv(s
->s
, buf
, sizeof(buf
), MSG_OOB
);
797 noise_ultralight(ret
);
799 char *str
= (ret
== 0 ?
"Internal networking trouble" :
800 error_string(errno
));
801 /* We're inside the Unix frontend here, so we know
802 * that the frontend handle is unnecessary. */
806 return plug_receive(s
->plug
, 2, buf
, ret
);
812 * If we reach here, this is an oobinline socket, which
813 * means we should set s->oobpending and then deal with it
814 * when we get called for the readability event (which
815 * should also occur).
817 s
->oobpending
= TRUE
;
819 case 1: /* readable; also acceptance */
822 * On a listening socket, the readability event means a
823 * connection is ready to be accepted.
825 struct sockaddr_in isa
;
826 int addrlen
= sizeof(struct sockaddr_in
);
827 int t
; /* socket of connection */
829 memset(&isa
, 0, sizeof(struct sockaddr_in
));
831 t
= accept(s
->s
,(struct sockaddr
*)&isa
,&addrlen
);
836 if (s
->localhost_only
&&
837 ntohl(isa
.sin_addr
.s_addr
) != INADDR_LOOPBACK
) {
838 close(t
); /* someone let nonlocal through?! */
839 } else if (plug_accepting(s
->plug
, (void*)t
)) {
840 close(t
); /* denied or error */
846 * If we reach here, this is not a listening socket, so
847 * readability really means readability.
850 /* In the case the socket is still frozen, we don't even bother */
852 s
->frozen_readable
= 1;
857 * We have received data on the socket. For an oobinline
858 * socket, this might be data _before_ an urgent pointer,
859 * in which case we send it to the back end with type==1
860 * (data prior to urgent).
862 if (s
->oobinline
&& s
->oobpending
) {
864 if (ioctl(s
->s
, SIOCATMARK
, &atmark
) == 0 && atmark
)
865 s
->oobpending
= FALSE
; /* clear this indicator */
869 ret
= recv(s
->s
, buf
, s
->oobpending ?
1 : sizeof(buf
), 0);
870 noise_ultralight(ret
);
872 if (errno
== EWOULDBLOCK
) {
877 return plug_closing(s
->plug
, error_string(errno
), errno
, 0);
878 } else if (0 == ret
) {
879 return plug_closing(s
->plug
, NULL
, 0, 0);
881 return plug_receive(s
->plug
, atmark ?
0 : 1, buf
, ret
);
884 case 2: /* writable */
886 int bufsize_before
, bufsize_after
;
888 bufsize_before
= s
->sending_oob
+ bufchain_size(&s
->output_data
);
890 bufsize_after
= s
->sending_oob
+ bufchain_size(&s
->output_data
);
891 if (bufsize_after
< bufsize_before
)
892 plug_sent(s
->plug
, bufsize_after
);
901 * Deal with socket errors detected in try_send().
903 void net_pending_errors(void)
909 * This might be a fiddly business, because it's just possible
910 * that handling a pending error on one socket might cause
911 * others to be closed. (I can't think of any reason this might
912 * happen in current SSH implementation, but to maintain
913 * generality of this network layer I'll assume the worst.)
915 * So what we'll do is search the socket list for _one_ socket
916 * with a pending error, and then handle it, and then search
917 * the list again _from the beginning_. Repeat until we make a
918 * pass with no socket errors present. That way we are
919 * protected against the socket list changing under our feet.
923 for (i
= 0; (s
= index234(sktree
, i
)) != NULL
; i
++) {
924 if (s
->pending_error
) {
926 * An error has occurred on this socket. Pass it to the
929 plug_closing(s
->plug
, error_string(s
->pending_error
),
930 s
->pending_error
, 0);
938 * Each socket abstraction contains a `void *' private field in
939 * which the client can keep state.
941 static void sk_tcp_set_private_ptr(Socket sock
, void *ptr
)
943 Actual_Socket s
= (Actual_Socket
) sock
;
944 s
->private_ptr
= ptr
;
947 static void *sk_tcp_get_private_ptr(Socket sock
)
949 Actual_Socket s
= (Actual_Socket
) sock
;
950 return s
->private_ptr
;
954 * Special error values are returned from sk_namelookup and sk_new
955 * if there's a problem. These functions extract an error message,
956 * or return NULL if there's no problem.
958 char *sk_addr_error(SockAddr addr
)
962 static char *sk_tcp_socket_error(Socket sock
)
964 Actual_Socket s
= (Actual_Socket
) sock
;
968 static void sk_tcp_set_frozen(Socket sock
, int is_frozen
)
970 Actual_Socket s
= (Actual_Socket
) sock
;
971 if (s
->frozen
== is_frozen
)
973 s
->frozen
= is_frozen
;
974 if (!is_frozen
&& s
->frozen_readable
) {
976 recv(s
->s
, &c
, 1, MSG_PEEK
);
978 s
->frozen_readable
= 0;
982 * For Unix select()-based frontends: enumerate all sockets
983 * currently active, and state whether we currently wish to receive
984 * select events on them for reading, writing and exceptional
987 static void set_rwx(Actual_Socket s
, int *rwx
)
990 if (s
->connected
&& !s
->frozen
)
991 val
|= 1 | 4; /* read, except */
992 if (bufchain_size(&s
->output_data
))
993 val
|= 2; /* write */
995 val
|= 1; /* read == accept */
999 int first_socket(int *state
, int *rwx
)
1003 s
= index234(sktree
, (*state
)++);
1006 return s ? s
->s
: -1;
1009 int next_socket(int *state
, int *rwx
)
1011 Actual_Socket s
= index234(sktree
, (*state
)++);
1014 return s ? s
->s
: -1;
1017 int net_service_lookup(char *service
)
1020 se
= getservbyname(service
, NULL
);
1022 return ntohs(se
->s_port
);