Skip to content

Commit

Permalink
memcg: allocate all page_cgroup at boot
Browse files Browse the repository at this point in the history
Allocate all page_cgroup at boot and remove page_cgroup poitner from
struct page.  This patch adds an interface as

 struct page_cgroup *lookup_page_cgroup(struct page*)

All FLATMEM/DISCONTIGMEM/SPARSEMEM  and MEMORY_HOTPLUG is supported.

Remove page_cgroup pointer reduces the amount of memory by
 - 4 bytes per PAGE_SIZE.
 - 8 bytes per PAGE_SIZE
if memory controller is disabled. (even if configured.)

On usual 8GB x86-32 server, this saves 8MB of NORMAL_ZONE memory.
On my x86-64 server with 48GB of memory, this saves 96MB of memory.
I think this reduction makes sense.

By pre-allocation, kmalloc/kfree in charge/uncharge are removed.
This means
  - we're not necessary to be afraid of kmalloc faiulre.
    (this can happen because of gfp_mask type.)
  - we can avoid calling kmalloc/kfree.
  - we can avoid allocating tons of small objects which can be fragmented.
  - we can know what amount of memory will be used for this extra-lru handling.

I added printk message as

	"allocated %ld bytes of page_cgroup"
        "please try cgroup_disable=memory option if you don't want"

maybe enough informative for users.

Signed-off-by: KAMEZAWA Hiroyuki <[email protected]>
Reviewed-by: Balbir Singh <[email protected]>
Cc: Daisuke Nishimura <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
hkamezawa authored and torvalds committed Oct 20, 2008
1 parent c05555b commit 52d4b9a
Show file tree
Hide file tree
Showing 8 changed files with 438 additions and 194 deletions.
13 changes: 1 addition & 12 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ struct mm_struct;

#ifdef CONFIG_CGROUP_MEM_RES_CTLR

#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0)

extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
Expand Down Expand Up @@ -72,16 +69,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
int priority, enum lru_list lru);

#else /* CONFIG_CGROUP_MEM_RES_CTLR */
static inline void page_reset_bad_cgroup(struct page *page)
{
}

static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
{
return NULL;
}

#else /* CONFIG_CGROUP_MEM_RES_CTLR */
static inline int mem_cgroup_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
Expand Down
3 changes: 0 additions & 3 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long page_cgroup;
#endif
};

/*
Expand Down
14 changes: 13 additions & 1 deletion include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -601,8 +601,11 @@ typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
struct zonelist node_zonelists[MAX_ZONELISTS];
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
struct page_cgroup *node_page_cgroup;
#endif
#endif
struct bootmem_data *bdata;
#ifdef CONFIG_MEMORY_HOTPLUG
Expand Down Expand Up @@ -931,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#endif

struct page;
struct page_cgroup;
struct mem_section {
/*
* This is, logically, a pointer to an array of struct
Expand All @@ -948,6 +952,14 @@ struct mem_section {

/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/*
* If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
* section. (see memcontrol.h/page_cgroup.h about this.)
*/
struct page_cgroup *page_cgroup;
unsigned long pad;
#endif
};

#ifdef CONFIG_SPARSEMEM_EXTREME
Expand Down
103 changes: 103 additions & 0 deletions include/linux/page_cgroup.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#ifndef __LINUX_PAGE_CGROUP_H
#define __LINUX_PAGE_CGROUP_H

#ifdef CONFIG_CGROUP_MEM_RES_CTLR
#include <linux/bit_spinlock.h>
/*
* Page Cgroup can be considered as an extended mem_map.
* A page_cgroup page is associated with every page descriptor. The
* page_cgroup helps us identify information about the cgroup
* All page cgroups are allocated at boot or memory hotplug event,
* then the page cgroup for pfn always exists.
*/
struct page_cgroup {
unsigned long flags;
struct mem_cgroup *mem_cgroup;
struct page *page;
struct list_head lru; /* per cgroup LRU list */
};

void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
void __init page_cgroup_init(void);
struct page_cgroup *lookup_page_cgroup(struct page *page);

enum {
/* flags for mem_cgroup */
PCG_LOCK, /* page cgroup is locked */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
/* flags for LRU placement */
PCG_ACTIVE, /* page is active in this cgroup */
PCG_FILE, /* page is file system backed */
PCG_UNEVICTABLE, /* page is unevictableable */
};

#define TESTPCGFLAG(uname, lname) \
static inline int PageCgroup##uname(struct page_cgroup *pc) \
{ return test_bit(PCG_##lname, &pc->flags); }

#define SETPCGFLAG(uname, lname) \
static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
{ set_bit(PCG_##lname, &pc->flags); }

#define CLEARPCGFLAG(uname, lname) \
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
{ clear_bit(PCG_##lname, &pc->flags); }

/* Cache flag is set only once (at allocation) */
TESTPCGFLAG(Cache, CACHE)

TESTPCGFLAG(Used, USED)
CLEARPCGFLAG(Used, USED)

/* LRU management flags (from global-lru definition) */
TESTPCGFLAG(File, FILE)
SETPCGFLAG(File, FILE)
CLEARPCGFLAG(File, FILE)

TESTPCGFLAG(Active, ACTIVE)
SETPCGFLAG(Active, ACTIVE)
CLEARPCGFLAG(Active, ACTIVE)

TESTPCGFLAG(Unevictable, UNEVICTABLE)
SETPCGFLAG(Unevictable, UNEVICTABLE)
CLEARPCGFLAG(Unevictable, UNEVICTABLE)

static inline int page_cgroup_nid(struct page_cgroup *pc)
{
return page_to_nid(pc->page);
}

static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
{
return page_zonenum(pc->page);
}

static inline void lock_page_cgroup(struct page_cgroup *pc)
{
bit_spin_lock(PCG_LOCK, &pc->flags);
}

static inline int trylock_page_cgroup(struct page_cgroup *pc)
{
return bit_spin_trylock(PCG_LOCK, &pc->flags);
}

static inline void unlock_page_cgroup(struct page_cgroup *pc)
{
bit_spin_unlock(PCG_LOCK, &pc->flags);
}

#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup;

static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
}

static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
{
return NULL;
}
#endif
#endif
3 changes: 1 addition & 2 deletions mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,4 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o

obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
Loading

0 comments on commit 52d4b9a

Please sign in to comment.