Skip to content

Commit

Permalink
net: support marking accepting TCP sockets
Browse files Browse the repository at this point in the history
When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.

This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.

This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established.  If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.

Black-box tested using user-mode linux:

- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
  mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
  incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.

Signed-off-by: Lorenzo Colitti <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
lcolitti authored and davem330 committed May 13, 2014
1 parent 1b3c61d commit 84f39b0
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 5 deletions.
10 changes: 10 additions & 0 deletions include/net/inet_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,23 @@ struct inet_request_sock {
kmemcheck_bitfield_end(flags);
struct ip_options_rcu *opt;
struct sk_buff *pktopts;
u32 ir_mark;
};

static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
{
return (struct inet_request_sock *)sk;
}

static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
{
if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
return skb->mark;
} else {
return sk->sk_mark;
}
}

struct inet_cork {
unsigned int flags;
__be32 addr;
Expand Down
1 change: 1 addition & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct netns_ipv4 {
int sysctl_ip_fwd_use_pmtu;

int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;

struct ping_group_range ping_group_range;

Expand Down
6 changes: 4 additions & 2 deletions net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct net *net = sock_net(sk);
int flags = inet_sk_flowi_flags(sk);

flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol,
flags,
Expand Down Expand Up @@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,

rcu_read_lock();
opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
Expand Down Expand Up @@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
newsk->sk_write_space = sk_stream_write_space;

newsk->sk_mark = inet_rsk(req)->ir_mark;

newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0;
newicsk->icsk_probes_out = 0;
Expand Down
3 changes: 2 additions & 1 deletion net/ipv4/syncookies.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->ir_rmt_port = th->source;
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
ireq->ir_mark = inet_request_mark(sk, skb);
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
Expand Down Expand Up @@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* hasn't changed since we received the original syn, but I see
* no easy way to do this.
*/
flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
inet_sk_flowi_flags(sk),
(opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
Expand Down
7 changes: 7 additions & 0 deletions net/ipv4/sysctl_net_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_fwmark_accept",
.data = &init_net.ipv4.sysctl_tcp_fwmark_accept,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};

Expand Down
1 change: 1 addition & 0 deletions net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ir_rmt_addr = saddr;
ireq->no_srccheck = inet_sk(sk)->transparent;
ireq->opt = tcp_v4_save_options(skb);
ireq->ir_mark = inet_request_mark(sk, skb);

if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/inet6_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
final_p = fl6_update_dst(fl6, np->opt, &final);
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = sk->sk_mark;
fl6->flowi6_mark = ireq->ir_mark;
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
security_req_classify_flow(req, flowi6_to_flowi(fl6));
Expand Down
4 changes: 3 additions & 1 deletion net/ipv6/syncookies.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = inet6_iif(skb);

ireq->ir_mark = inet_request_mark(sk, skb);

req->expires = 0UL;
req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
Expand All @@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
final_p = fl6_update_dst(&fl6, np->opt, &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
Expand Down
1 change: 1 addition & 0 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
TCP_ECN_create_request(req, skb, sock_net(sk));

ireq->ir_iif = sk->sk_bound_dev_if;
ireq->ir_mark = inet_request_mark(sk, skb);

/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
Expand Down

0 comments on commit 84f39b0

Please sign in to comment.