一个SYN可能包括这些内容:初始系列号、初始窗口大小、MSS、窗口扩大因子、时间戳。
那么是怎么决定是否设置某个选项,怎样设置的呢?例如窗口扩大因子有关传输的性能,我可以怎么样改动这个值呢?
下面来看看内核是怎么做的?
设置SYN包的TCP选项的函数(tcp_output.c中):
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
static unsigned int tcp_syn_options( struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
struct tcp_sock *tp = tcp_sk(sk);
//TCP选项的最大长度
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
}
#else
*md5 = NULL;
#endif
/* We always get an MSS option. The option bytes which will be seen in
* normal data packets should timestamps be used, must be in the MSS
* advertised. But we subtract them from tp->mss_cache so that
* calculations in tcp_sendmsg are simpler etc. So account for this
* fact here if necessary. If we don't do this correctly, as a
* receiver we won't recognize data packets as being full sized when we
* should, and thus we won't abide by the delayed ACK rules correctly.
* SACKs don't matter, we never delay an ACK when we have any of those
* going out. */
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
opts->options |= OPTION_TS;
opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(sysctl_tcp_window_scaling)) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
if (likely(sysctl_tcp_sack)) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
if (fastopen && fastopen->cookie.len >= 0) {
u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
opts->options |= OPTION_FAST_OPEN_COOKIE;
opts->fastopen_cookie = &fastopen->cookie;
remaining -= need;
tp->syn_fastopen = 1;
}
}
return MAX_TCP_OPTION_SPACE - remaining;
}
是否设置窗口扩大选项是由 sysctl_tcp_window_scaling 的值决定的, sysctl_tcp_window_scaling 是一个sysctl变量
/* sysctl variables for tcp */
extern int sysctl_tcp_window_scaling;
如果sysctl_tcp_window_scaling的值为真, 则设置窗口扩大选项。它的具体的值由是由 tp ->rx_opt.rcv_wscale 决定的。
rx_opt的结构如下:
struct tcp_options_received {
/* PAWS/RTTM data */
long ts_recent_stamp ; /* Time we stored ts_recent (for aging) */
u32 ts_recent ; /* Time stamp to echo next */
u32 rcv_tsval ; /* Time stamp value */
u32 rcv_tsecr ; /* Time stamp echo reply */
u16 saw_tstamp : 1, /* Saw TIMESTAMP on last packet */
tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */
dsack : 1, /* D-SACK is scheduled */
wscale_ok : 1, /* Wscale seen on SYN packet */
sack_ok : 4, /* SACK seen on SYN packet */
snd_wscale : 4, /* Window scaling received from sender */
rcv_wscale : 4; /* Window scaling to send to receiver */
u8 num_sacks ; /* Number of SACK blocks */
u16 user_mss ; /* mss requested by user in ioctl */
u16 mss_clamp ; /* Maximal mss, negotiated at connection setup */
};
可见 rcv_wscale 的值就是接收窗口扩大因子。这个值的计算式在函数 tcp_select_initial_window 中完成的:
/* Determine a window scaling and initial window to offer.
* Based on the assumption that the given amount of space
* will be offered. Store the results in the tp structure.
* NOTE: for smooth operation initial space offering should
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space); //接收缓存不能为负
/* If no clamp set the clamp to the max possible scaled window */
/* 如果通告窗口最大值的初始值为0,则把它设为最大值*/
if (*window_clamp == 0)
(*window_clamp) = (65535 << 14);
/* 接收窗口不能超过它的上限 */
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
/* 调整为mss的整数倍 */
if (space > mss)
space = (space / mss) * mss;
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. If the admin tells us
* it is likely we could be speaking with such a buggy stack
* we will truncate our initial window offering to 32K-1
* unless the remote has sent us a window scaling option,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
/*当协议使用有符号的接收窗口时,则接收窗口大小不能超过32767 */
if (sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space;
(*rcv_wscale) = 0;
if (wscale_ok) { /* Wscale seen on SYN packet */
/* Set window scaling on max possible window
* See RFC1323 for an explanation of the limit to 14
*/
/* tcp_rmem[2]为接收缓冲区长度上限的最大值,用于调整sk_rcvbuf。
* rmem_max为系统接收窗口的最大大小 */
space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
space = min_t(u32, space, *window_clamp); /* 不能超过上限 */
/* 计算窗口扩大因子 */
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
(*rcv_wscale)++;
}
}
if (mss > (1 << *rcv_wscale)) {
if (!init_rcv_wnd) /* Use default unless specified otherwise */
init_rcv_wnd = tcp_default_init_rwnd(mss);
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}
tcp_select_initial_window 确定窗口扩大因子和初始窗口大小。window_clamp是通告窗口上限
u32 window_clamp ; /* Maximal window to advertise */
初始窗口大小一般设置为 TCP_INIT_CWND 的2倍乘以mss
u32 tcp_default_init_rwnd ( u32 mss )
{
/* Initial receive window should be twice of TCP_INIT_CWND to
* enable proper sending of new unsent data during fast recovery
* (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
* limit when mss is larger than 1460.
*/
u32 init_rwnd = TCP_INIT_CWND * 2;
if (mss > 1460)
init_rwnd = max ((1460 * init_rwnd) / mss , 2U);
return init_rwnd ;
}
问题:如何更改窗口扩大因子
窗口扩大选项允许使用大于64K 的TCP窗口,这有助于提升高延迟或高带宽网络的性能。为了使用窗口扩大选项,需要增加发送和接收缓冲的大小。全局设置可以通过/proc/sys/net/ipv4/tcp_wmen和/proc/sys/net/ipv4/tcp_rmem文件。针对单个socket设置可以通过使用SO_SNDBUF和SO_RCVBUF套接字选项,调用setsockopt实现。
不过要注意:通过SO_SNDBUF和SO_RCVBUF机制声明的socket缓冲的最大值被这两个文件/proc/sys/net/core/rmem_max和/proc/sys/net/core/wmem_max的值限定了,即
设置超过这两个文件中的值的值会被忽略。
所以通过SO_SNDBUF和SO_RCVBUF改变窗口大小是有限度的。更多选项的设置可以查看man tcp。
下面是全局设置的一个例子:
如前所述,我们可以看到,是否支持窗口扩大选项由sysctl变量 sysctl_tcp_window_scaling 决定,我们可以在/proc/sys/net/ipv4中找到它目前的值。
决定窗口扩大因子的主要是sysctl_tcp_rmem[2],这是/proc下tcp_rmem三元组中的最后一个值。
可以在/etc/sysctl.conf文件中添加下面两行来更新
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 2097120
sudo sysctl -p使设置立即生效
这样设置后,窗口扩大因子应该是32
版权声明:本文为wy5761原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。