Note: the analysis in this paper is based on kernel version 3.10.0-693.el7, namely CentOS 7.4
1. Function prototype
int listen(int sockfd, int backlog);
Parameter Description:
sockfd: the file descriptor of the socket, that is, the fd returned by the socket() system call
backlog: save the queue length of client requests
The system call of listen() is relatively simple, but it involves backlog. This parameter is relatively complex, affecting the semi connection queue and the full connection queue. For detailed analysis, please refer to< Detailed explanation of TCP's backlog>.
2. Kernel Implementation
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;
//according to fd Get the corresponding sock Structure, analysisbind()Discussed at system call time
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
err = security_socket_listen(sock, backlog);
if (!err)
err = sock->ops->listen(sock, backlog);//Call thelistenfunction
fput_light(sock->file, fput_needed);
}
return err;
}
Like the bind() function, since it is operated through fd, the first step must be to obtain the corresponding socket structure through fd in order to operate network related variables and structures. From the analysis of previous system calls, we know that sock - > OPS points to INET [stream] OPS structure, so Sock - > Ops - > Listen points to INET [listen().
/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
//The call to listen can only be a connection in the close or listen state (only the backlog parameter can be modified)
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
//Quick open option, no analysis
if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
err = fastopen_init_queue(sk, backlog);
else if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT2) != 0)
err = fastopen_init_queue(sk, ((uint)sysctl_tcp_fastopen) >> 16);
else
err = 0;
if (err)
goto out;
tcp_fastopen_init_key_once(true);
}
//Initialize socket, including semi connection queue, etc
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;//This actually sets the maximum length of the full connection queue
err = 0;
out:
release_sock(sk);
return err;
}
INET ﹣ CSK ﹣ listen ﹣ start() is mainly composed of two things: one is to allocate the semi connection queue, the other is to add the socket to the listen hash table.
int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
//Allocate the semi connection queue according to NR table entries (related to the backlog parameter set by the user)
int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
if (rc != 0)
return rc;
sk->sk_max_ack_backlog = 0;
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
sk->sk_state = TCP_LISTEN;//Announce socket to listen state
/* In fact, before calling listen(), calling bind() system call has called sk->sk_prot->get_port once.
* This is called again because there is no atomic operation between bind and listen. You may have modified some properties of the connection here
* For example, SK - > reuse, SK - > SK Ou reuse port, etc., so check the port again to ensure that it is available.*/
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);//Set source port
sk_dst_reset(sk);
sk->sk_prot->hash(sk);//Put the connection in the listen state into the listen hash table according to the port number
return 0;
}
sk->sk_state = TCP_CLOSE;//Failed. The socket state is set to the initial value of close
__reqsk_queue_destroy(&icsk->icsk_accept_queue);
return -EADDRINUSE;
}
Finally, add the listen hash table.
void inet_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
__inet_hash(sk);
local_bh_enable();
}
}
static void __inet_hash(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
if (sk->sk_state != TCP_LISTEN) {
__inet_hash_nolisten(sk, NULL);
return;
}
WARN_ON(!sk_unhashed(sk));
//Get the corresponding hash table according to the port number
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
spin_lock(&ilb->lock);
__sk_nulls_add_node_rcu(sk, &ilb->head);//Put socket into listen list
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock);
}
So listen() calculates the actual request queue length according to the backlog set by the user, and then allocates the semi connection queue. Also, confirm that the port is available again, and then add the connection to the listen hash table.
Some structural relationships are as follows: