diff --git a/include/net/route.h b/include/net/route.h index dd4ae0029fd802..f68c167280a7f6 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -313,12 +313,20 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } -static inline int ip_skb_dst_mtu(struct sk_buff *skb) +static inline bool ip_sk_accept_pmtu(const struct sock *sk) { - struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; + return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE; +} - return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +static inline bool ip_sk_use_pmtu(const struct sock *sk) +{ + return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; +} + +static inline int ip_skb_dst_mtu(const struct sk_buff *skb) +{ + return (!skb->sk || ip_sk_use_pmtu(skb->sk)) ? + dst_mtu(skb_dst(skb)) : skb_dst(skb)->dev->mtu; } #endif /* _ROUTE_H */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index f9e8e496ae5d45..393c5de09d42c6 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -115,6 +115,11 @@ struct in_addr { #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF */ #define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ +/* Always use interface mtu (ignores dst pmtu) but don't set DF flag. + * Also incoming ICMP frag_needed notifications will be ignored on + * this socket to prevent accepting spoofed ones. + */ +#define IP_PMTUDISC_INTERFACE 4 #define IP_MULTICAST_IF 32 #define IP_MULTICAST_TTL 33 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 720c36225ed9b3..d9f65fc66db5ea 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -174,6 +174,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && + ip_sk_accept_pmtu(sk) && inet_csk(sk)->icsk_pmtu_cookie > mtu) { dccp_sync_mss(sk, mtu); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 51be64e18e32e9..912402752f2ffc 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1037,7 +1037,6 @@ static int __ip_append_data(struct sock *sk, static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, struct ipcm_cookie *ipc, struct rtable **rtp) { - struct inet_sock *inet = inet_sk(sk); struct ip_options_rcu *opt; struct rtable *rt; @@ -1063,8 +1062,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, * We steal reference to this route, caller should not release it */ *rtp = NULL; - cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : rt->dst.dev->mtu; cork->dst = &rt->dst; cork->length = 0; cork->ttl = ipc->ttl; @@ -1315,7 +1314,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, /* DF bit is set when we want to see DF on outgoing frames. * If local_df is set too, we still allow to fragment this frame * locally. */ - if (inet->pmtudisc >= IP_PMTUDISC_DO || + if (inet->pmtudisc == IP_PMTUDISC_DO || + inet->pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0626f2cb192e69..3f858266fa7e33 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -627,7 +627,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->nodefrag = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE) goto e_inval; inet->pmtudisc = val; break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d2d325382b13f4..f428935c50dba6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1036,6 +1036,10 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) bool new = false; bh_lock_sock(sk); + + if (!ip_sk_accept_pmtu(sk)) + goto out; + rt = (struct rtable *) __sk_dst_get(sk); if (sock_owned_by_user(sk) || !rt) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 300ab2c93f2927..14bba8a1c5a74e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -288,6 +288,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk) mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && + ip_sk_accept_pmtu(sk) && inet_csk(sk)->icsk_pmtu_cookie > mtu) { tcp_sync_mss(sk, mtu);