linux 5.14のlistenの実装を記録します
5374 ワード
概要
面接の時、候補者は半接続に言及し、全接続キューはそれぞれカーネルの定数、backlogパラメータによって決定される.これは前の理解と合わないので、コードを見て探求しました.
ソースプロファイル
ソースバージョン:https://github.com/torvalds/l... d 992 fe 5318 d 8 d 7 af 9510 b 879439 a 3 c 7 f 283 da 442
いりぐち
「sys_listen」を検索すると、backlogは確かにユーザー入力とsysctl_を取得していることがわかります.somaxconnの最小値.int __sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
err = security_socket_listen(sock, backlog);
if (!err)
err = sock->ops->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
}
Listenの実装
sock->opsは関数ポインタ構造体であり、「sock->ops=」を検索すると、一連のproto_が見つかります.ops.選択sock_opsは見続けます.static const struct proto_ops base_sock_ops = {
.family = PF_ISDN,
.owner = THIS_MODULE,
.release = base_sock_release,
.ioctl = base_sock_ioctl,
.bind = base_sock_bind,
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.mmap = sock_no_mmap
};
ほとんどのproto_を発見opsはlistenの実装を定義していない.「.listen=」を検索すると、svc_が見つかります.listen, dn_listen, unix_listen, vsock_listen, x25_Listen等名前とファイルヘッダの説明からinet_を推測するListenは実際に使用されるlisten、すなわちsocket_addrで指定したプロトコル.見続けてlisten.backlogがsk->skに割り当てられていることがわかりますmax_ack_backlog,検索では他のlisten関数も同様であることが分かった.int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err, tcp_fastopen;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
inet_からconnection_sock.c:795に示すように、この値によって決定されるicsk_accept_Queueキューの大きさは、新生代の古い年代の最適化を行った. icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
* one of the following things: synack was lost, ack was lost,
* rtt is high or nobody planned to ack (i.e. synflood).
* When server is a bit loaded, queue is populated with old
* open requests, reducing effective size of queue.
* When server is well loaded, queue size reduces to zero
* after several minutes of work. It is not synflood,
* it is normal operation. The solution is pruning
* too old entries overriding normal timeout, when
* situation becomes dangerous.
*
* Essentially, we reserve half of room for young
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
queue = &icsk->icsk_accept_queue;
qlen = reqsk_queue_len(queue);
if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
while (max_syn_ack_retries > 2) {
if (qlen < young)
break;
max_syn_ack_retries--;
young <<= 1;
}
}
acceptにはfind already established connectionがあります./*
* This will accept the next outstanding connection.
*/
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct request_sock *req;
struct sock *newsk;
int error;
lock_sock(sk);
/* We need to make sure that this socket is listening,
* and that it has something pending.
*/
error = -EINVAL;
if (sk->sk_state != TCP_LISTEN)
goto out_err;
/* Find already established connection */
if (reqsk_queue_empty(queue)) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
if (!timeo)
goto out_err;
error = inet_csk_wait_for_connect(sk, timeo);
if (error)
goto out_err;
}
req = reqsk_queue_remove(queue, sk);
newsk = req->sk;
結論
backlog決定icsk_accept_Queueキューサイズ.このキューにはESTABLISHTEDステータスのリンクがあります.SYNC_についてはRECVのリンクはコードを見続ける必要があります.
ソースバージョン:https://github.com/torvalds/l... d 992 fe 5318 d 8 d 7 af 9510 b 879439 a 3 c 7 f 283 da 442
いりぐち
「sys_listen」を検索すると、backlogは確かにユーザー入力とsysctl_を取得していることがわかります.somaxconnの最小値.
int __sys_listen(int fd, int backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
err = security_socket_listen(sock, backlog);
if (!err)
err = sock->ops->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
}
Listenの実装
sock->opsは関数ポインタ構造体であり、「sock->ops=」を検索すると、一連のproto_が見つかります.ops.選択sock_opsは見続けます.
static const struct proto_ops base_sock_ops = {
.family = PF_ISDN,
.owner = THIS_MODULE,
.release = base_sock_release,
.ioctl = base_sock_ioctl,
.bind = base_sock_bind,
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.mmap = sock_no_mmap
};
ほとんどのproto_を発見opsはlistenの実装を定義していない.「.listen=」を検索すると、svc_が見つかります.listen, dn_listen, unix_listen, vsock_listen, x25_Listen等名前とファイルヘッダの説明からinet_を推測するListenは実際に使用されるlisten、すなわちsocket_addrで指定したプロトコル.見続けてlisten.backlogがsk->skに割り当てられていることがわかりますmax_ack_backlog,検索では他のlisten関数も同様であることが分かった.
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err, tcp_fastopen;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
inet_からconnection_sock.c:795に示すように、この値によって決定されるicsk_accept_Queueキューの大きさは、新生代の古い年代の最適化を行った.
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
* one of the following things: synack was lost, ack was lost,
* rtt is high or nobody planned to ack (i.e. synflood).
* When server is a bit loaded, queue is populated with old
* open requests, reducing effective size of queue.
* When server is well loaded, queue size reduces to zero
* after several minutes of work. It is not synflood,
* it is normal operation. The solution is pruning
* too old entries overriding normal timeout, when
* situation becomes dangerous.
*
* Essentially, we reserve half of room for young
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
queue = &icsk->icsk_accept_queue;
qlen = reqsk_queue_len(queue);
if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
while (max_syn_ack_retries > 2) {
if (qlen < young)
break;
max_syn_ack_retries--;
young <<= 1;
}
}
acceptにはfind already established connectionがあります.
/*
* This will accept the next outstanding connection.
*/
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct request_sock *req;
struct sock *newsk;
int error;
lock_sock(sk);
/* We need to make sure that this socket is listening,
* and that it has something pending.
*/
error = -EINVAL;
if (sk->sk_state != TCP_LISTEN)
goto out_err;
/* Find already established connection */
if (reqsk_queue_empty(queue)) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
if (!timeo)
goto out_err;
error = inet_csk_wait_for_connect(sk, timeo);
if (error)
goto out_err;
}
req = reqsk_queue_remove(queue, sk);
newsk = req->sk;