Support for TCP Jumbo options as described at: From: <> http://www.imperialviolet.org/binary/jumbo-tcp-options.html --- include/linux/tcp.h | 5 +++ include/net/inet_connection_sock.h | 1 + include/net/inet_sock.h | 3 ++ include/net/tcp.h | 13 +++++++ net/ipv4/Kconfig | 9 +++++ net/ipv4/tcp_input.c | 36 ++++++++++++++++++++- net/ipv4/tcp_ipv4.c | 7 ++-- net/ipv4/tcp_output.c | 63 ++++++++++++++++++++++++++++++++++-- 8 files changed, 130 insertions(+), 7 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 08027f1..f633d3a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -222,6 +222,11 @@ struct tcp_options_received { u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ + +#ifdef CONFIG_TCP_JUMBO_OPTIONS + u8 jumbo : 1; /* Jumbo options supported */ + int data_offset; /* The true data offset for the packet */ +#endif }; struct tcp_request_sock { diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index f00f057..a4fd2bb 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -38,6 +38,7 @@ struct tcp_congestion_ops; struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sk_buff *skb, int ipfragok); void (*send_check)(struct sock *sk, int len, + int header_len, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); int (*conn_request)(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 89cd011..33136ad 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -74,6 +74,9 @@ struct inet_request_sock { wscale_ok : 1, ecn_ok : 1, acked : 1; +#ifdef CONFIG_TCP_JUMBO_OPTIONS + u16 jumbo : 1; +#endif struct ip_options *opt; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fd3eb2..d636091 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -164,6 +164,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_JUMBO_OPTIONS 42 /* Jumbo options included */ +#define TCPOPT_JUMBO_SUPPORTED 43 /* Jumbo options supported */ /* * TCP option lengths @@ -174,6 +176,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_JUMBO_SUPPORTED 2 +#define TCPOLEN_JUMBO_OPTIONS 4 /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -183,6 +187,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_MD5SIG_ALIGNED 20 +#define TCPOLEN_JUMBO_SUPPORTED_ALIGNED 4 +#define TCPOLEN_JUMBO_OPTIONS_ALIGNED 4 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -332,6 +338,9 @@ extern void tcp_enter_quickack_mode(struct sock *sk); static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; +#ifdef CONFIG_TCP_JUMBO_OPTIONS + rx_opt->jumbo = 0; +#endif } #define TCP_ECN_OK 1 @@ -402,6 +411,7 @@ extern void tcp_parse_options(struct sk_buff *skb, */ extern void tcp_v4_send_check(struct sock *sk, int len, + int header_len, struct sk_buff *skb); extern int tcp_v4_conn_request(struct sock *sk, @@ -960,6 +970,9 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->acked = 0; ireq->ecn_ok = 0; ireq->rmt_port = tcp_hdr(skb)->source; +#ifdef CONFIG_TCP_JUMBO_OPTIONS + ireq->jumbo = rx_opt->jumbo; +#endif } extern void tcp_enter_memory_pressure(void); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 4670683..2459a94 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -632,5 +632,14 @@ config TCP_MD5SIG If unsure, say N. +config TCP_JUMBO_OPTIONS + bool "TCP: Jumbo options support (EXPERIMENTAL)" + ---help--- + This causes the kernel to advertise support for jumbo TCP options. + This allows TCP options larger than would normally fit in the + 40-byte, RFC793 space. + + If unsure, say N. + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5119856..13df767 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3305,6 +3305,10 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); +#ifdef CONFIG_TCP_JUMBO_OPTIONS + opt_rx->data_offset = length; +#endif + ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; @@ -3383,6 +3387,23 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, */ break; #endif +#ifdef CONFIG_TCP_JUMBO_OPTIONS + case TCPOPT_JUMBO_SUPPORTED: + opt_rx->jumbo = 1; + break; + case TCPOPT_JUMBO_OPTIONS: + if (opsize == 4) { + u16 new_length = ntohs(get_unaligned((__be16 *)ptr)); + if (new_length >= th->doff * 4 && + new_length <= skb->len) { + const int delta = new_length - th->doff * 4; + length += delta; + TCP_SKB_CB(skb)->end_seq -= delta; + opt_rx->data_offset = new_length; + } + } + break; +#endif } ptr += opsize-2; @@ -3397,6 +3418,10 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_sock *tp) { +#ifdef CONFIG_TCP_JUMBO_OPTIONS + tp->rx_opt.data_offset = th->doff * 4; +#endif + if (th->doff == sizeof(struct tcphdr) >> 2) { tp->rx_opt.saw_tstamp = 0; return 0; @@ -3852,7 +3877,11 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) goto drop; +#ifdef CONFIG_TCP_JUMBO_OPTIONS + __skb_pull(skb, tp->rx_opt.data_offset); +#else __skb_pull(skb, th->doff * 4); +#endif TCP_ECN_accept_cwr(tp, skb); @@ -4467,7 +4496,12 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) /* Do we wait for any urgent data? - normally not... */ if (tp->urg_data == TCP_URG_NOTYET) { - u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - +#ifdef CONFIG_TCP_JUMBO_OPTIONS + const u32 data_length = th->doff * 4; +#else + const u32 data_length = th->rx_opt.data_length; +#endif + u32 ptr = tp->urg_seq - ntohl(th->seq) + data_length - th->syn; /* Is the urgent pointer pointing into this packet? */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00156bf..b47256c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -91,7 +91,7 @@ int sysctl_tcp_low_latency __read_mostly; /* Socket used for sending RSTs */ static struct socket *tcp_socket __read_mostly; -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); +void tcp_v4_send_check(struct sock *sk, int len, int header_len, struct sk_buff *skb); #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, @@ -484,7 +484,8 @@ out: } /* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +void tcp_v4_send_check(struct sock *sk, int len, int header_len, + struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); struct tcphdr *th = tcp_hdr(skb); @@ -497,7 +498,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) } else { th->check = tcp_v4_check(len, inet->saddr, inet->daddr, csum_partial((char *)th, - th->doff << 2, + header_len, skb->csum)); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d29ef79..23e2347 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -400,7 +400,11 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, */ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, int offer_wscale, int wscale, __u32 tstamp, - __u32 ts_recent, __u8 **md5_hash) + __u32 ts_recent, __u8 **md5_hash +#ifdef CONFIG_TCP_JUMBO_OPTIONS + , int jumbo_support +#endif + ) { /* We always get an MSS option. * The option bytes which will be seen in normal data @@ -453,6 +457,16 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, *md5_hash = (__u8 *)ptr; } #endif + +#ifdef CONFIG_TCP_JUMBO_OPTIONS + if (jumbo_support) { + /* Advertise our support for jumbo options */ + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_JUMBO_SUPPORTED << 8) | + 2); + } +#endif } /* This routine actually transmits TCP packets queued in by @@ -481,6 +495,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcphdr *th; int sysctl_flags; int err; +#ifdef CONFIG_TCP_JUMBO_OPTIONS + int tcp_header_overflow = 0; +#endif BUG_ON(!skb || !tcp_skb_pcount(skb)); @@ -546,7 +563,17 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; #endif +#ifdef CONFIG_TCP_JUMBO_OPTIONS + if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { + tcp_header_size += TCPOLEN_JUMBO_SUPPORTED_ALIGNED; + } +#endif + +#ifndef CONFIG_TCP_JUMBO_OPTIONS skb_push(skb, tcp_header_size); +#else + skb_push(skb, tcp_header_size + tcp_header_overflow); +#endif skb_reset_transport_header(skb); skb_set_owner_w(skb, sk); @@ -589,7 +616,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, #ifdef CONFIG_TCP_MD5SIG md5 ? &md5_hash_location : #endif - NULL); + NULL +#ifdef CONFIG_TCP_JUMBO_OPTIONS + , 1 +#endif + ); } else { tcp_build_and_update_options((__be32 *)(th + 1), tp, tcb->when, @@ -612,7 +643,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } #endif - icsk->icsk_af_ops->send_check(sk, skb->len, skb); +#ifndef CONFIG_TCP_JUMBO_OPTIONS + icsk->icsk_af_ops->send_check(sk, skb->len, tcp_header_size, skb); +#else + icsk->icsk_af_ops->send_check(sk, skb->len, + tcp_header_size + tcp_header_overflow, + skb); +#endif if (likely(tcb->flags & TCPCB_FLAG_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); @@ -2181,6 +2218,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_md5sig_key *md5; __u8 *md5_hash_location; #endif +#ifdef CONFIG_TCP_JUMBO_OPTIONS + /* This stores the number of additional bytes which have overflowed the + * RFC793 header. Unless we are using Jumbo options, it will always be + * zero */ + int tcp_header_overflow = 0; +#endif skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (skb == NULL) @@ -2203,7 +2246,18 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (md5) tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; #endif + +#ifdef CONFIG_TCP_JUMBO_OPTIONS + if (ireq->jumbo) { + tcp_header_size += TCPOLEN_JUMBO_SUPPORTED_ALIGNED; + } +#endif + +#ifndef CONFIG_TCP_JUMBO_OPTIONS skb_push(skb, tcp_header_size); +#else + skb_push(skb, tcp_header_size + tcp_header_overflow); +#endif skb_reset_transport_header(skb); th = tcp_hdr(skb); @@ -2247,6 +2301,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, md5 ? &md5_hash_location : #endif NULL) +#ifdef CONFIG_TCP_JUMBO_OPTIONS + , ireq->jumbo +#endif ); th->doff = (tcp_header_size >> 2);