概述

shutdown系统调用在tcp层会调用两个函数,对于ESTABLISHED状态需要调用tcp_shutdown关闭连接,对于LISTEN和SYN_SENT状态则需要以非阻塞模式调用tcp_disconnect断开连接;本文除了对这两个函数进行分析以外,还会分析在shutdown关闭了读或者写之后,读写系统调用sendmsg和recvmsg将如何处理对应操作;

 1 /* 关闭操作 */
 2 int inet_shutdown(struct socket *sock, int how)
 3 {
 4         /*...*/
 5     switch (sk->sk_state) {
 6     case TCP_CLOSE:
 7         err = -ENOTCONN;
 8         /* Hack to wake up other listeners, who can poll for
 9            POLLHUP, even on eg. unconnected UDP sockets -- RR */
10     default:
11         /* 设置how值到sk_shutdown,并且调用传输层的shutdown */
12         sk->sk_shutdown |= how;
13         if (sk->sk_prot->shutdown)
14             sk->sk_prot->shutdown(sk, how);
15         break;
16 
17     /* Remaining two branches are temporary solution for missing
18      * close() in multithreaded environment. It is _not_ a good idea,
19      * but we have no choice until close() is repaired at VFS level.
20      */
21     case TCP_LISTEN:
22         /* 监听状态,如果无接收方向的关闭操作,跳出 */
23         if (!(how & RCV_SHUTDOWN))
24             break;
25         /* 有接收方向的关闭,继续 */
26         /* Fall through */
27     case TCP_SYN_SENT:
28         /* 调用传输层的disconnect断开连接 */
29         err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
30 
31         /* 调增状态 */
32         sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
33         break;
34     }
35 
36     /* Wake up anyone sleeping in poll. */
37     /* 状态改变,唤醒等待的进程 */
38     sk->sk_state_change(sk);
39     release_sock(sk);
40     return err;
41 }

 

tcp_shutdown

tcp_shutdown函数完成设置关闭之后的状态,并且发送fin;注意只有接收端关闭时,不发送fin,只是在recvmsg系统调用中判断状态,不接收数据;

 1 /*
 2  *    Shutdown the sending side of a connection. Much like close except
 3  *    that we don't receive shut down or sock_set_flag(sk, SOCK_DEAD).
 4  */
 5 
 6 void tcp_shutdown(struct sock *sk, int how)
 7 {
 8     /*    We need to grab some memory, and put together a FIN,
 9      *    and then put it into the queue to be sent.
10      *        Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
11      */
12     /* 不含有SEND_SHUTDOWN,返回,接收方关闭,不发fin */
13     if (!(how & SEND_SHUTDOWN))
14         return;
15 
16     /* If we've already sent a FIN, or it's a closed state, skip this. */
17 
18     /* 以下这几个状态发fin */
19     if ((1 << sk->sk_state) &
20         (TCPF_ESTABLISHED | TCPF_SYN_SENT |
21          TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
22         /* Clear out any half completed packets.  FIN if needed. */
23         /* 设置新状态,发送fin */
24         if (tcp_close_state(sk))
25             tcp_send_fin(sk);
26     }
27 }

 

tcp_close_state函数根据new_state状态表进行跳转,比如TCP_ESTABLISHED关闭时会跳转到TCP_FIN_WAIT1 | TCP_ACTION_FIN;

 1 static const unsigned char new_state[16] = {
 2   /* current state:        new state:      action:    */
 3   [0 /* (Invalid) */]    = TCP_CLOSE,
 4   [TCP_ESTABLISHED]    = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
 5   [TCP_SYN_SENT]    = TCP_CLOSE,
 6   [TCP_SYN_RECV]    = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
 7   [TCP_FIN_WAIT1]    = TCP_FIN_WAIT1,
 8   [TCP_FIN_WAIT2]    = TCP_FIN_WAIT2,
 9   [TCP_TIME_WAIT]    = TCP_CLOSE,
10   [TCP_CLOSE]        = TCP_CLOSE,
11   [TCP_CLOSE_WAIT]    = TCP_LAST_ACK  | TCP_ACTION_FIN,
12   [TCP_LAST_ACK]    = TCP_LAST_ACK,
13   [TCP_LISTEN]        = TCP_CLOSE,
14   [TCP_CLOSING]        = TCP_CLOSING,
15   [TCP_NEW_SYN_RECV]    = TCP_CLOSE,    /* should not happen ! */
16 };
17 
18 static int tcp_close_state(struct sock *sk)
19 {
20     int next = (int)new_state[sk->sk_state];
21     int ns = next & TCP_STATE_MASK;
22 
23     tcp_set_state(sk, ns);
24 
25     return next & TCP_ACTION_FIN;
26 }

 

tcp_send_fin完成fin的发送,如果队列中有数据段未发送,则共用最后一个数据段,在上面打fin标记,没有能重用的情况下,则新分配数据段;然后关闭nagle算法,并将队列中的数据段都发送出去;(注: 对于压力下,判断是否有数据这个逻辑未理解清楚)

 1 /* Send a FIN. The caller locks the socket for us.
 2  * We should try to send a FIN packet really hard, but eventually give up.
 3  */
 4 void tcp_send_fin(struct sock *sk)
 5 {
 6     struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
 7     struct tcp_sock *tp = tcp_sk(sk);
 8 
 9     /* Optimization, tack on the FIN if we have one skb in write queue and
10      * this skb was not yet sent, or we are under memory pressure.
11      * Note: in the latter case, FIN packet will be sent after a timeout,
12      * as TCP stack thinks it has already been transmitted.
13      */
14     /* 取到尾skb指针&& (有数据要发送 || 内存压力之下) */
15     if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
16 coalesce:
17         /* 尾skb上打fin标记 */
18         TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
19         /* fin标记占用一个序号 */
20         TCP_SKB_CB(tskb)->end_seq++;
21         tp->write_seq++;
22 
23         /* tskb已经发送了,压力之下,认为已经发送了?? */
24         if (!tcp_send_head(sk)) {
25             /* This means tskb was already sent.
26              * Pretend we included the FIN on previous transmit.
27              * We need to set tp->snd_nxt to the value it would have
28              * if FIN had been sent. This is because retransmit path
29              * does not change tp->snd_nxt.
30              */
31             tp->snd_nxt++;
32             return;
33         }
34     }
35     /* 不满足上述情况,需要重新分配内存 */
36     else {
37         /* 分配skb */
38         skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
39         if (unlikely(!skb)) {
40             /* 队列为空无压力情况??  冲走一遍最后包共用fin流程*/
41             if (tskb)
42                 goto coalesce;
43             return;
44         }
45 
46         /* 初始化skb */
47         skb_reserve(skb, MAX_TCP_HEADER);
48         sk_forced_mem_schedule(sk, skb->truesize);
49         /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
50         tcp_init_nondata_skb(skb, tp->write_seq,
51                      TCPHDR_ACK | TCPHDR_FIN);
52 
53         /* 添加到发送队列 */
54         tcp_queue_skb(sk, skb);
55     }
56 
57     /* 关闭nagle算法,将队列中的数据段全部发送出去 */
58     __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
59 }

 

tcp_disconnect

在连接为LISTEN或者SYN_SENT状态,会调用tcp_disconnect端口连接;函数首先对各种状态做分别的特有处理,然后再统一清理资源;

 1 int tcp_disconnect(struct sock *sk, int flags)
 2 {
 3     struct inet_sock *inet = inet_sk(sk);
 4     struct inet_connection_sock *icsk = inet_csk(sk);
 5     struct tcp_sock *tp = tcp_sk(sk);
 6     int err = 0;
 7     int old_state = sk->sk_state;
 8 
 9     /* 不是close状态则设置为close,从hash中删除控制块 */
10     if (old_state != TCP_CLOSE)
11         tcp_set_state(sk, TCP_CLOSE);
12 
13     /* ABORT function of RFC793 */
14     /* LISTEN状态,停止监听 */
15     if (old_state == TCP_LISTEN) {
16         inet_csk_listen_stop(sk);
17     }
18     /* 修复模式 */
19     else if (unlikely(tp->repair)) {
20         sk->sk_err = ECONNABORTED;
21     } 
22     /* 需要发送rst 
23     || 下一个发送序号并不是最后一个队列数据段序号
24         && 是被动关闭的结束状态 */
25     else if (tcp_need_reset(old_state) ||
26            (tp->snd_nxt != tp->write_seq &&
27             (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
28         /* The last check adjusts for discrepancy of Linux wrt. RFC
29          * states
30          */
31         /* 发送rst */
32         tcp_send_active_reset(sk, gfp_any());
33         sk->sk_err = ECONNRESET;
34     } 
35     /* SYN_SENT状态 */
36     else if (old_state == TCP_SYN_SENT)
37         sk->sk_err = ECONNRESET;
38 
39     /* 清除定时器 */
40     tcp_clear_xmit_timers(sk);
41 
42     /* 释放接收队列中的skb */
43     __skb_queue_purge(&sk->sk_receive_queue);
44 
45     /* 释放发送队列中的skb */
46     tcp_write_queue_purge(sk);
47     tcp_fastopen_active_disable_ofo_check(sk);
48     /*释放未按顺序达到的skb */
49     skb_rbtree_purge(&tp->out_of_order_queue);
50 
51 
52     /* 其他各种清理工作 */
53     
54     inet->inet_dport = 0;
55 
56     if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
57         inet_reset_saddr(sk);
58 
59     sk->sk_shutdown = 0;
60     sock_reset_flag(sk, SOCK_DONE);
61     tp->srtt_us = 0;
62     tp->write_seq += tp->max_window + 2;
63     if (tp->write_seq == 0)
64         tp->write_seq = 1;
65     icsk->icsk_backoff = 0;
66     tp->snd_cwnd = 2;
67     icsk->icsk_probes_out = 0;
68     tp->packets_out = 0;
69     tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
70     tp->snd_cwnd_cnt = 0;
71     tp->window_clamp = 0;
72     tcp_set_ca_state(sk, TCP_CA_Open);
73     tcp_clear_retrans(tp);
74     inet_csk_delack_init(sk);
75     /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
76      * issue in __tcp_select_window()
77      */
78     icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
79     tcp_init_send_head(sk);
80     memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
81     __sk_dst_reset(sk);
82     dst_release(sk->sk_rx_dst);
83     sk->sk_rx_dst = NULL;
84     tcp_saved_syn_free(tp);
85 
86     /* Clean up fastopen related fields */
87     tcp_free_fastopen_req(tp);
88     inet->defer_connect = 0;
89 
90     WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
91 
92     sk->sk_error_report(sk);
93     return err;
94 }

 

tcp_sendmsg&&tcp_recvmsg

在使用shutdown关闭了发送之后,再次调用tcp_sendmsg发送数据,那么该函数会返回错误;

1 int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
2 {
3     err = -EPIPE;
4     if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
5         goto do_error;
6 }

 

在使用shutdown关闭了接收之后,再次调用tcp_recvmsg接收数据,那么函数不会读取数据,而是立即返回;

 1 int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 2         int flags, int *addr_len)
 3 {
 4         /*... */
 5 
 6     do {
 7         u32 offset;
 8 
 9         /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
10         if (tp->urg_data && tp->urg_seq == *seq) {
11             if (copied)
12                 break;
13             if (signal_pending(current)) {
14                 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
15                 break;
16             }
17         }
18 
19         /* Next get a buffer. */
20 
21         last = skb_peek_tail(&sk->sk_receive_queue);
22         skb_queue_walk(&sk->sk_receive_queue, skb) {
23             last = skb;
24             /* Now that we have two receive queues this
25              * shouldn't happen.
26              */
27             if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
28                  "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
29                  *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
30                  flags))
31                 break;
32 
33             offset = *seq - TCP_SKB_CB(skb)->seq;
34             if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
35                 pr_err_once("%s: found a SYN, please report !\n", __func__);
36                 offset--;
37             }
38             if (offset < skb->len)
39                 goto found_ok_skb;
40             if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
41                 goto found_fin_ok;
42             WARN(!(flags & MSG_PEEK),
43                  "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
44                  *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
45         }
46 
47         /* Well, if we have backlog, try to process it now yet. */
48 
49         if (copied >= target && !sk->sk_backlog.tail)
50             break;
51 
52         if (copied) {
53             if (sk->sk_err ||
54                 sk->sk_state == TCP_CLOSE ||
55                 (sk->sk_shutdown & RCV_SHUTDOWN) ||
56                 !timeo ||
57                 signal_pending(current))
58                 break;
59         } else {
60             if (sock_flag(sk, SOCK_DONE))
61                 break;
62 
63             if (sk->sk_err) {
64                 copied = sock_error(sk);
65                 break;
66             }
67 
68             if (sk->sk_shutdown & RCV_SHUTDOWN)
69                 break;
70 
71             if (sk->sk_state == TCP_CLOSE) {
72                 if (!sock_flag(sk, SOCK_DONE)) {
73                     /* This occurs when user tries to read
74                      * from never connected socket.
75                      */
76                     copied = -ENOTCONN;
77                     break;
78                 }
79                 break;
80             }
81 
82             if (!timeo) {
83                 copied = -EAGAIN;
84                 break;
85             }
86 
87             if (signal_pending(current)) {
88                 copied = sock_intr_errno(timeo);
89                 break;
90             }
91         }
92     } while (len > 0);
93 }

 

相关文章:

  • 2021-10-22
  • 2021-12-27
  • 2022-12-23
  • 2021-06-05
  • 2022-12-23
  • 2021-08-10
  • 2021-12-07
  • 2021-11-02
猜你喜欢
  • 2021-12-16
  • 2022-02-10
  • 2021-07-07
  • 2022-12-23
  • 2022-12-23
  • 2021-09-09
  • 2022-12-23
相关资源
相似解决方案