Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next:

1) Inspect the reply packets coming from DR/TUN and refresh connection
   state and timeout, from longguang yue and Julian Anastasov.

2) Series to add support for the inet ingress chain type in nf_tables.
====================

Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Oct 12, 2020
2 parents 547848a + 793d5d6 commit a308283
Show file tree
Hide file tree
Showing 11 changed files with 282 additions and 76 deletions.
6 changes: 6 additions & 0 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,12 @@ struct nft_table {
u8 *udata;
};

static inline bool nft_base_chain_netdev(int family, u32 hooknum)
{
return family == NFPROTO_NETDEV ||
(family == NFPROTO_INET && hooknum == NF_INET_INGRESS);
}

void nft_register_chain_type(const struct nft_chain_type *);
void nft_unregister_chain_type(const struct nft_chain_type *);

Expand Down
33 changes: 33 additions & 0 deletions include/net/netfilter/nf_tables_ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,37 @@ static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
nft_set_pktinfo_unspec(pkt, skb);
}

static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt,
struct sk_buff *skb)
{
struct iphdr *iph;
u32 len, thoff;

if (!pskb_may_pull(skb, sizeof(*iph)))
return -1;

iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;

len = ntohs(iph->tot_len);
thoff = iph->ihl * 4;
if (skb->len < len) {
__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);
return -1;
} else if (len < thoff) {
goto inhdr_error;
}

pkt->tprot_set = true;
pkt->tprot = iph->protocol;
pkt->xt.thoff = thoff;
pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;

return 0;

inhdr_error:
__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS);
return -1;
}
#endif
46 changes: 46 additions & 0 deletions include/net/netfilter/nf_tables_ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,50 @@ static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
nft_set_pktinfo_unspec(pkt, skb);
}

static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt,
struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int flags = IP6_FH_F_AUTH;
unsigned short frag_off;
unsigned int thoff = 0;
struct inet6_dev *idev;
struct ipv6hdr *ip6h;
int protohdr;
u32 pkt_len;

if (!pskb_may_pull(skb, sizeof(*ip6h)))
return -1;

ip6h = ipv6_hdr(skb);
if (ip6h->version != 6)
goto inhdr_error;

pkt_len = ntohs(ip6h->payload_len);
if (pkt_len + sizeof(*ip6h) > skb->len) {
idev = __in6_dev_get(nft_in(pkt));
__IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS);
return -1;
}

protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0)
goto inhdr_error;

pkt->tprot_set = true;
pkt->tprot = protohdr;
pkt->xt.thoff = thoff;
pkt->xt.fragoff = frag_off;

return 0;

inhdr_error:
idev = __in6_dev_get(nft_in(pkt));
__IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INHDRERRORS);
return -1;
#else
return -1;
#endif
}

#endif
1 change: 1 addition & 0 deletions include/uapi/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ enum nf_inet_hooks {
NF_INET_FORWARD,
NF_INET_LOCAL_OUT,
NF_INET_POST_ROUTING,
NF_INET_INGRESS,
NF_INET_NUMHOOKS
};

Expand Down
129 changes: 103 additions & 26 deletions net/netfilter/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,16 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
return NULL;
return net->nf.hooks_bridge + hooknum;
#endif
#ifdef CONFIG_NETFILTER_INGRESS
case NFPROTO_INET:
if (WARN_ON_ONCE(hooknum != NF_INET_INGRESS))
return NULL;
if (!dev || dev_net(dev) != net) {
WARN_ON_ONCE(1);
return NULL;
}
return &dev->nf_hooks_ingress;
#endif
case NFPROTO_IPV4:
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
Expand Down Expand Up @@ -311,20 +321,80 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
return NULL;
}

static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg,
int hooknum)
{
#ifndef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == hooknum)
return -EOPNOTSUPP;
#endif
if (reg->hooknum != hooknum ||
!reg->dev || dev_net(reg->dev) != net)
return -EINVAL;

return 0;
}

static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
{
if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) ||
(pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS))
return true;

return false;
}

static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
{
#ifdef CONFIG_JUMP_LABEL
int hooknum;

if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) {
pf = NFPROTO_NETDEV;
hooknum = NF_NETDEV_INGRESS;
} else {
hooknum = reg->hooknum;
}
static_key_slow_inc(&nf_hooks_needed[pf][hooknum]);
#endif
}

static void nf_static_key_dec(const struct nf_hook_ops *reg, int pf)
{
#ifdef CONFIG_JUMP_LABEL
int hooknum;

if (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS) {
pf = NFPROTO_NETDEV;
hooknum = NF_NETDEV_INGRESS;
} else {
hooknum = reg->hooknum;
}
static_key_slow_dec(&nf_hooks_needed[pf][hooknum]);
#endif
}

static int __nf_register_net_hook(struct net *net, int pf,
const struct nf_hook_ops *reg)
{
struct nf_hook_entries *p, *new_hooks;
struct nf_hook_entries __rcu **pp;
int err;

if (pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS)
return -EOPNOTSUPP;
#endif
if (reg->hooknum != NF_NETDEV_INGRESS ||
!reg->dev || dev_net(reg->dev) != net)
return -EINVAL;
switch (pf) {
case NFPROTO_NETDEV:
err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
if (err < 0)
return err;
break;
case NFPROTO_INET:
if (reg->hooknum != NF_INET_INGRESS)
break;

err = nf_ingress_check(net, reg, NF_INET_INGRESS);
if (err < 0)
return err;
break;
}

pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
Expand All @@ -345,12 +415,11 @@ static int __nf_register_net_hook(struct net *net, int pf,

hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
#endif
#ifdef CONFIG_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
#endif
nf_static_key_inc(reg, pf);

BUG_ON(p == new_hooks);
nf_hook_entries_free(p);
return 0;
Expand Down Expand Up @@ -403,12 +472,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf,

if (nf_remove_net_hook(p, reg)) {
#ifdef CONFIG_NETFILTER_INGRESS
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
if (nf_ingress_hook(reg, pf))
net_dec_ingress_queue();
#endif
#ifdef CONFIG_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
#endif
nf_static_key_dec(reg, pf);
} else {
WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum);
}
Expand All @@ -425,8 +492,12 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
if (reg->pf == NFPROTO_INET) {
__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
__nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
if (reg->hooknum == NF_INET_INGRESS) {
__nf_unregister_net_hook(net, NFPROTO_INET, reg);
} else {
__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
__nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
}
} else {
__nf_unregister_net_hook(net, reg->pf, reg);
}
Expand All @@ -451,14 +522,20 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
int err;

if (reg->pf == NFPROTO_INET) {
err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
if (err < 0)
return err;

err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
if (err < 0) {
__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
return err;
if (reg->hooknum == NF_INET_INGRESS) {
err = __nf_register_net_hook(net, NFPROTO_INET, reg);
if (err < 0)
return err;
} else {
err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
if (err < 0)
return err;

err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
if (err < 0) {
__nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
return err;
}
}
} else {
err = __nf_register_net_hook(net, reg->pf, reg);
Expand Down
18 changes: 15 additions & 3 deletions net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,8 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
{
unsigned int hash;
struct ip_vs_conn *cp, *ret=NULL;
const union nf_inet_addr *saddr;
__be16 sport;

/*
* Check for "full" addressed entries
Expand All @@ -411,10 +413,20 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
rcu_read_lock();

hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (p->vport == cp->cport && p->cport == cp->dport &&
cp->af == p->af &&
if (p->vport != cp->cport)
continue;

if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
sport = cp->vport;
saddr = &cp->vaddr;
} else {
sport = cp->dport;
saddr = &cp->daddr;
}

if (p->cport == sport && cp->af == p->af &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
ip_vs_addr_equal(p->af, p->caddr, saddr) &&
p->protocol == cp->protocol &&
cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
Expand Down
19 changes: 7 additions & 12 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
unsigned int verdict = NF_DROP;

if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
goto ignore_cp;
goto after_nat;

/* Ensure the checksum is correct */
if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
Expand All @@ -901,6 +901,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto out;

after_nat:
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);

Expand All @@ -909,8 +910,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 0);

ignore_cp:
verdict = NF_ACCEPT;

out:
Expand Down Expand Up @@ -1276,6 +1275,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
{
struct ip_vs_protocol *pp = pd->pp;

if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
goto after_nat;

IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");

if (skb_ensure_writable(skb, iph->len))
Expand Down Expand Up @@ -1316,6 +1318,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,

IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");

after_nat:
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
skb->ipvs_property = 1;
Expand Down Expand Up @@ -1412,11 +1415,8 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
ipvs, af, skb, &iph);

if (likely(cp)) {
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
goto ignore_cp;
if (likely(cp))
return handle_response(af, skb, pd, cp, &iph, hooknum);
}

/* Check for real-server-started requests */
if (atomic_read(&ipvs->conn_out_counter)) {
Expand Down Expand Up @@ -1475,14 +1475,9 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
}
}

out:
IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;

ignore_cp:
__ip_vs_conn_put(cp);
goto out;
}

/*
Expand Down
Loading

0 comments on commit a308283

Please sign in to comment.