Skip to content

Commit

Permalink
tcp memory pressure controls
Browse files Browse the repository at this point in the history
This patch introduces memory pressure controls for the tcp
protocol. It uses the generic socket memory pressure code
introduced in earlier patches, and fills in the
necessary data in cg_proto struct.

Signed-off-by: Glauber Costa <[email protected]>
Reviewed-by: KAMEZAWA Hiroyuki <[email protected]>
CC: Eric W. Biederman <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
Glauber Costa authored and davem330 committed Dec 13, 2011
1 parent e1aab16 commit d1a4c0b
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 5 deletions.
2 changes: 2 additions & 0 deletions Documentation/cgroups/memory.txt
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ to trigger slab reclaim when those limits are reached.
thresholds. The Memory Controller allows them to be controlled individually
per cgroup, instead of globally.

* tcp memory pressure: sockets memory pressure for the tcp protocol.

3. User Interface

0. Configuration
Expand Down
1 change: 1 addition & 0 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);

extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);

static inline
int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
Expand Down
2 changes: 2 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@
#include <net/dst.h>
#include <net/checksum.h>

int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
/*
* This structure really needs to be cleaned up.
* Most of it is for TCP, and not used by any of
Expand Down
17 changes: 17 additions & 0 deletions include/net/tcp_memcontrol.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef _TCP_MEMCG_H
#define _TCP_MEMCG_H

struct tcp_memcontrol {
struct cg_proto cg_proto;
/* per-cgroup tcp memory pressure knobs */
struct res_counter tcp_memory_allocated;
struct percpu_counter tcp_sockets_allocated;
/* those two are read-mostly, leave them at the end */
long tcp_prot_mem[3];
int tcp_memory_pressure;
};

struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
#endif /* _TCP_MEMCG_H */
40 changes: 39 additions & 1 deletion mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
#include <linux/cpu.h>
#include <linux/oom.h>
#include "internal.h"
#include <net/sock.h>
#include <net/tcp_memcontrol.h>

#include <asm/uaccess.h>

Expand Down Expand Up @@ -295,6 +297,10 @@ struct mem_cgroup {
*/
struct mem_cgroup_stat_cpu nocpu_base;
spinlock_t pcp_counter_lock;

#ifdef CONFIG_INET
struct tcp_memcontrol tcp_mem;
#endif
};

/* Stuffs for move charges at task migration. */
Expand Down Expand Up @@ -384,6 +390,7 @@ static void mem_cgroup_put(struct mem_cgroup *memcg);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
#ifdef CONFIG_INET
#include <net/sock.h>
#include <net/ip.h>

static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
void sock_update_memcg(struct sock *sk)
Expand Down Expand Up @@ -418,6 +425,15 @@ void sock_release_memcg(struct sock *sk)
mem_cgroup_put(memcg);
}
}

struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
{
if (!memcg || mem_cgroup_is_root(memcg))
return NULL;

return &memcg->tcp_mem.cg_proto;
}
EXPORT_SYMBOL(tcp_proto_cgroup);
#endif /* CONFIG_INET */
#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */

Expand Down Expand Up @@ -800,7 +816,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
preempt_enable();
}

static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
{
return container_of(cgroup_subsys_state(cont,
mem_cgroup_subsys_id), struct mem_cgroup,
Expand Down Expand Up @@ -4732,14 +4748,34 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)

ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
ARRAY_SIZE(kmem_cgroup_files));

/*
* Part of this would be better living in a separate allocation
* function, leaving us with just the cgroup tree population work.
* We, however, depend on state such as network's proto_list that
* is only initialized after cgroup creation. I found the less
* cumbersome way to deal with it to defer it all to populate time
*/
if (!ret)
ret = mem_cgroup_sockets_init(cont, ss);
return ret;
};

static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
mem_cgroup_sockets_destroy(cont, ss);
}
#else
static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
{
return 0;
}

static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
}
#endif

static struct cftype mem_cgroup_files[] = {
Expand Down Expand Up @@ -5098,6 +5134,8 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);

kmem_cgroup_destroy(ss, cont);

mem_cgroup_put(memcg);
}

Expand Down
43 changes: 40 additions & 3 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,46 @@
#include <net/tcp.h>
#endif

static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);

#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct proto *proto;
int ret = 0;

read_lock(&proto_list_lock);
list_for_each_entry(proto, &proto_list, node) {
if (proto->init_cgroup) {
ret = proto->init_cgroup(cgrp, ss);
if (ret)
goto out;
}
}

read_unlock(&proto_list_lock);
return ret;
out:
list_for_each_entry_continue_reverse(proto, &proto_list, node)
if (proto->destroy_cgroup)
proto->destroy_cgroup(cgrp, ss);
read_unlock(&proto_list_lock);
return ret;
}

void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct proto *proto;

read_lock(&proto_list_lock);
list_for_each_entry_reverse(proto, &proto_list, node)
if (proto->destroy_cgroup)
proto->destroy_cgroup(cgrp, ss);
read_unlock(&proto_list_lock);
}
#endif

/*
* Each address family might have different locking rules, so we have
* one slock key per address family:
Expand Down Expand Up @@ -2291,9 +2331,6 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);

static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);

#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o

obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
Expand Down
9 changes: 8 additions & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
#include <net/xfrm.h>
#include <net/netdma.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>

#include <linux/inet.h>
#include <linux/ipv6.h>
Expand Down Expand Up @@ -1917,6 +1918,7 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];

local_bh_disable();
sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();

Expand Down Expand Up @@ -1974,6 +1976,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
}

sk_sockets_allocated_dec(sk);
sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);

Expand Down Expand Up @@ -2634,10 +2637,14 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
.init_cgroup = tcp_init_cgroup,
.destroy_cgroup = tcp_destroy_cgroup,
.proto_cgroup = tcp_proto_cgroup,
#endif
};
EXPORT_SYMBOL(tcp_prot);


static int __net_init tcp_sk_init(struct net *net)
{
return inet_ctl_sock_create(&net->ipv4.tcp_sock,
Expand Down
74 changes: 74 additions & 0 deletions net/ipv4/tcp_memcontrol.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include <net/tcp.h>
#include <net/tcp_memcontrol.h>
#include <net/sock.h>
#include <linux/memcontrol.h>
#include <linux/module.h>

static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
{
return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
}

static void memcg_tcp_enter_memory_pressure(struct sock *sk)
{
if (!sk->sk_cgrp->memory_pressure)
*sk->sk_cgrp->memory_pressure = 1;
}
EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);

int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
/*
* The root cgroup does not use res_counters, but rather,
* rely on the data already collected by the network
* subsystem
*/
struct res_counter *res_parent = NULL;
struct cg_proto *cg_proto, *parent_cg;
struct tcp_memcontrol *tcp;
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent = parent_mem_cgroup(memcg);

cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;

tcp = tcp_from_cgproto(cg_proto);

tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
tcp->tcp_memory_pressure = 0;

parent_cg = tcp_prot.proto_cgroup(parent);
if (parent_cg)
res_parent = parent_cg->memory_allocated;

res_counter_init(&tcp->tcp_memory_allocated, res_parent);
percpu_counter_init(&tcp->tcp_sockets_allocated, 0);

cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
cg_proto->sysctl_mem = tcp->tcp_prot_mem;
cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
cg_proto->memcg = memcg;

return 0;
}
EXPORT_SYMBOL(tcp_init_cgroup);

void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct cg_proto *cg_proto;
struct tcp_memcontrol *tcp;

cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return;

tcp = tcp_from_cgproto(cg_proto);
percpu_counter_destroy(&tcp->tcp_sockets_allocated);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
5 changes: 5 additions & 0 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#include <net/netdma.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>

#include <asm/uaccess.h>

Expand Down Expand Up @@ -1994,6 +1995,7 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];

local_bh_disable();
sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();

Expand Down Expand Up @@ -2227,6 +2229,9 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
};

static const struct inet6_protocol tcpv6_protocol = {
Expand Down

0 comments on commit d1a4c0b

Please sign in to comment.