Skip to content

Commit

Permalink
hugetlbfs: allow the creation of files suitable for MAP_PRIVATE on th…
Browse files Browse the repository at this point in the history
…e vfs internal mount

This patchset adds a flag to mmap that allows the user to request that an
anonymous mapping be backed with huge pages.  This mapping will borrow
functionality from the huge page shm code to create a file on the kernel
internal mount and use it to approximate an anonymous mapping.  The
MAP_HUGETLB flag is a modifier to MAP_ANONYMOUS and will not work without
both flags being preset.

A new flag is necessary because there is no other way to hook into huge
pages without creating a file on a hugetlbfs mount which wouldn't be
MAP_ANONYMOUS.

To userspace, this mapping will behave just like an anonymous mapping
because the file is not accessible outside of the kernel.

This patchset is meant to simplify the programming model.  Presently there
is a large chunk of boiler platecode, contained in libhugetlbfs, required
to create private, hugepage backed mappings.  This patch set would allow
use of hugepages without linking to libhugetlbfs or having hugetblfs
mounted.

Unification of the VM code would provide these same benefits, but it has
been resisted each time that it has been suggested for several reasons: it
would break PAGE_SIZE assumptions across the kernel, it makes page-table
abstractions really expensive, and it does not provide any benefit on
architectures that do not support huge pages, incurring fast path
penalties without providing any benefit on these architectures.

This patch:

There are two means of creating mappings backed by huge pages:

        1. mmap() a file created on hugetlbfs
        2. Use shm which creates a file on an internal mount which essentially
           maps it MAP_SHARED

The internal mount is only used for shared mappings but there is very
little that stops it being used for private mappings. This patch extends
hugetlbfs_file_setup() to deal with the creation of files that will be
mapped MAP_PRIVATE on the internal hugetlbfs mount. This extended API is
used in a subsequent patch to implement the MAP_HUGETLB mmap() flag.

Signed-off-by: Eric Munson <[email protected]>
Acked-by: David Rientjes <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Adam Litke <[email protected]>
Cc: David Gibson <[email protected]>
Cc: Lee Schermerhorn <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Hugh Dickins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
khers authored and torvalds committed Sep 22, 2009
1 parent f8dbf0a commit 6bfde05
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 7 deletions.
21 changes: 17 additions & 4 deletions fs/hugetlbfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
INIT_LIST_HEAD(&inode->i_mapping->private_list);
info = HUGETLBFS_I(inode);
/*
* The policy is initialized here even if we are creating a
* private inode because initialization simply creates an
* an empty rb tree and calls spin_lock_init(), later when we
* call mpol_free_shared_policy() it will just return because
* the rb tree will still be empty.
*/
mpol_shared_policy_init(&info->policy, NULL);
switch (mode & S_IFMT) {
default:
Expand Down Expand Up @@ -931,13 +938,19 @@ static struct file_system_type hugetlbfs_fs_type = {

static struct vfsmount *hugetlbfs_vfsmount;

static int can_do_hugetlb_shm(void)
static int can_do_hugetlb_shm(int creat_flags)
{
return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
if (creat_flags != HUGETLB_SHMFS_INODE)
return 0;
if (capable(CAP_IPC_LOCK))
return 1;
if (in_group_p(sysctl_hugetlb_shm_group))
return 1;
return 0;
}

struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
struct user_struct **user)
struct user_struct **user, int creat_flags)
{
int error = -ENOMEM;
struct file *file;
Expand All @@ -949,7 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
if (!hugetlbfs_vfsmount)
return ERR_PTR(-ENOENT);

if (!can_do_hugetlb_shm()) {
if (!can_do_hugetlb_shm(creat_flags)) {
*user = current_user();
if (user_shm_lock(size, *user)) {
WARN_ONCE(1,
Expand Down
12 changes: 10 additions & 2 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)

#endif /* !CONFIG_HUGETLB_PAGE */

enum {
/*
* The file will be used as an shm file so shmfs accounting rules
* apply
*/
HUGETLB_SHMFS_INODE = 1,
};

#ifdef CONFIG_HUGETLBFS
struct hugetlbfs_config {
uid_t uid;
Expand Down Expand Up @@ -150,7 +158,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
extern const struct file_operations hugetlbfs_file_operations;
extern struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
struct user_struct **user);
struct user_struct **user, int creat_flags);
int hugetlb_get_quota(struct address_space *mapping, long delta);
void hugetlb_put_quota(struct address_space *mapping, long delta);

Expand All @@ -172,7 +180,7 @@ static inline void set_file_hugepages(struct file *file)

#define is_file_hugepages(file) 0
#define set_file_hugepages(file) BUG()
#define hugetlb_file_setup(name,size,acct,user) ERR_PTR(-ENOSYS)
#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS)

#endif /* !CONFIG_HUGETLBFS */

Expand Down
2 changes: 1 addition & 1 deletion ipc/shm.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (shmflg & SHM_NORESERVE)
acctflag = VM_NORESERVE;
file = hugetlb_file_setup(name, size, acctflag,
&shp->mlock_user);
&shp->mlock_user, HUGETLB_SHMFS_INODE);
} else {
/*
* Do not allow no accounting for OVERCOMMIT_NEVER, even
Expand Down

0 comments on commit 6bfde05

Please sign in to comment.