Skip to content

Commit

Permalink
hugetlb_cgroup: add reservation accounting for private mappings
Browse files Browse the repository at this point in the history
Normally the pointer to the cgroup to uncharge hangs off the struct page,
and gets queried when it's time to free the page.  With hugetlb_cgroup
reservations, this is not possible.  Because it's possible for a page to
be reserved by one task and actually faulted in by another task.

The best place to put the hugetlb_cgroup pointer to uncharge for
reservations is in the resv_map.  But, because the resv_map has different
semantics for private and shared mappings, the code patch to
charge/uncharge shared and private mappings is different.  This patch
implements charging and uncharging for private mappings.

For private mappings, the counter to uncharge is in
resv_map->reservation_counter.  On initializing the resv_map this is set
to NULL.  On reservation of a region in private mapping, the tasks
hugetlb_cgroup is charged and the hugetlb_cgroup is placed is
resv_map->reservation_counter.

On hugetlb_vm_op_close, we uncharge resv_map->reservation_counter.

[[email protected]: forward declare struct resv_map]
Signed-off-by: Mina Almasry <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Mike Kravetz <[email protected]>
Acked-by: David Rientjes <[email protected]>
Cc: Greg Thelen <[email protected]>
Cc: Sandipan Das <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Shuah Khan <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
mina authored and torvalds committed Apr 2, 2020
1 parent 9808895 commit e9fe92a
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 40 deletions.
10 changes: 10 additions & 0 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ struct resv_map {
long adds_in_progress;
struct list_head region_cache;
long region_cache_count;
#ifdef CONFIG_CGROUP_HUGETLB
/*
* On private mappings, the counter to uncharge reservations is stored
* here. If these fields are 0, then either the mapping is shared, or
* cgroup accounting is disabled for this resv_map.
*/
struct page_counter *reservation_counter;
unsigned long pages_per_hpage;
struct cgroup_subsys_state *css;
#endif
};
extern struct resv_map *resv_map_alloc(void);
void resv_map_release(struct kref *ref);
Expand Down
41 changes: 38 additions & 3 deletions include/linux/hugetlb_cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <linux/mmdebug.h>

struct hugetlb_cgroup;
struct resv_map;

/*
* Minimum page order trackable by hugetlb cgroup.
* At least 4 pages are necessary for all the tracking information.
Expand All @@ -27,6 +29,33 @@ struct hugetlb_cgroup;
#define HUGETLB_CGROUP_MIN_ORDER 2

#ifdef CONFIG_CGROUP_HUGETLB
enum hugetlb_memory_event {
HUGETLB_MAX,
HUGETLB_NR_MEMORY_EVENTS,
};

struct hugetlb_cgroup {
struct cgroup_subsys_state css;

/*
* the counter to account for hugepages from hugetlb.
*/
struct page_counter hugepage[HUGE_MAX_HSTATE];

/*
* the counter to account for hugepage reservations from hugetlb.
*/
struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];

atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];

/* Handle for "hugetlb.events" */
struct cgroup_file events_file[HUGE_MAX_HSTATE];

/* Handle for "hugetlb.events.local" */
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
};

static inline struct hugetlb_cgroup *
__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
Expand Down Expand Up @@ -102,9 +131,9 @@ extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
extern void hugetlb_cgroup_uncharge_counter(struct page_counter *p,
unsigned long nr_pages,
struct cgroup_subsys_state *css);
extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
unsigned long start,
unsigned long end);

extern void hugetlb_cgroup_file_init(void) __init;
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
Expand Down Expand Up @@ -193,6 +222,12 @@ hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
{
}

static inline void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
unsigned long start,
unsigned long end)
{
}

static inline void hugetlb_cgroup_file_init(void)
{
}
Expand Down
47 changes: 44 additions & 3 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,25 @@ static void set_vma_private_data(struct vm_area_struct *vma,
vma->vm_private_data = (void *)value;
}

static void
resv_map_set_hugetlb_cgroup_uncharge_info(struct resv_map *resv_map,
struct hugetlb_cgroup *h_cg,
struct hstate *h)
{
#ifdef CONFIG_CGROUP_HUGETLB
if (!h_cg || !h) {
resv_map->reservation_counter = NULL;
resv_map->pages_per_hpage = 0;
resv_map->css = NULL;
} else {
resv_map->reservation_counter =
&h_cg->rsvd_hugepage[hstate_index(h)];
resv_map->pages_per_hpage = pages_per_huge_page(h);
resv_map->css = &h_cg->css;
}
#endif
}

struct resv_map *resv_map_alloc(void)
{
struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
Expand All @@ -666,6 +685,13 @@ struct resv_map *resv_map_alloc(void)
INIT_LIST_HEAD(&resv_map->regions);

resv_map->adds_in_progress = 0;
/*
* Initialize these to 0. On shared mappings, 0's here indicate these
* fields don't do cgroup accounting. On private mappings, these will be
* re-initialized to the proper values, to indicate that hugetlb cgroup
* reservations are to be un-charged from here.
*/
resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, NULL, NULL);

INIT_LIST_HEAD(&resv_map->region_cache);
list_add(&rg->link, &resv_map->region_cache);
Expand Down Expand Up @@ -3296,9 +3322,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
end = vma_hugecache_offset(h, vma, vma->vm_end);

reserve = (end - start) - region_count(resv, start, end);

kref_put(&resv->refs, resv_map_release);

hugetlb_cgroup_uncharge_counter(resv, start, end);
if (reserve) {
/*
* Decrement reserve counts. The global reserve count may be
Expand All @@ -3307,6 +3331,8 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
gbl_reserve = hugepage_subpool_put_pages(spool, reserve);
hugetlb_acct_memory(h, -gbl_reserve);
}

kref_put(&resv->refs, resv_map_release);
}

static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
Expand Down Expand Up @@ -4691,6 +4717,7 @@ int hugetlb_reserve_pages(struct inode *inode,
struct hstate *h = hstate_inode(inode);
struct hugepage_subpool *spool = subpool_inode(inode);
struct resv_map *resv_map;
struct hugetlb_cgroup *h_cg;
long gbl_reserve;

/* This should never happen */
Expand Down Expand Up @@ -4724,12 +4751,26 @@ int hugetlb_reserve_pages(struct inode *inode,
chg = region_chg(resv_map, from, to);

} else {
/* Private mapping. */
resv_map = resv_map_alloc();
if (!resv_map)
return -ENOMEM;

chg = to - from;

if (hugetlb_cgroup_charge_cgroup_rsvd(
hstate_index(h), chg * pages_per_huge_page(h),
&h_cg)) {
kref_put(&resv_map->refs, resv_map_release);
return -ENOMEM;
}

/*
* Since this branch handles private mappings, we attach the
* counter to uncharge for this reservation off resv_map.
*/
resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);

set_vma_resv_map(vma, resv_map);
set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
}
Expand Down
41 changes: 7 additions & 34 deletions mm/hugetlb_cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,6 @@
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>

enum hugetlb_memory_event {
HUGETLB_MAX,
HUGETLB_NR_MEMORY_EVENTS,
};

struct hugetlb_cgroup {
struct cgroup_subsys_state css;

/*
* the counter to account for hugepages from hugetlb.
*/
struct page_counter hugepage[HUGE_MAX_HSTATE];

/*
* the counter to account for hugepage reservations from hugetlb.
*/
struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];

atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];

/* Handle for "hugetlb.events" */
struct cgroup_file events_file[HUGE_MAX_HSTATE];

/* Handle for "hugetlb.events.local" */
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
};

#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
#define MEMFILE_ATTR(val) ((val) & 0xffff)
Expand Down Expand Up @@ -407,15 +379,16 @@ void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
__hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
}

void hugetlb_cgroup_uncharge_counter(struct page_counter *p,
unsigned long nr_pages,
struct cgroup_subsys_state *css)
void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
unsigned long end)
{
if (hugetlb_cgroup_disabled() || !p || !css)
if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
!resv->css)
return;

page_counter_uncharge(p, nr_pages);
css_put(css);
page_counter_uncharge(resv->reservation_counter,
(end - start) * resv->pages_per_hpage);
css_put(resv->css);
}

enum {
Expand Down

0 comments on commit e9fe92a

Please sign in to comment.