Skip to content

Commit

Permalink
mm: cleancache core ops functions and config
Browse files Browse the repository at this point in the history
This third patch of eight in this cleancache series provides
the core code for cleancache that interfaces between the hooks in
VFS and individual filesystems and a cleancache backend.  It also
includes build and config patches.

Two new files are added: mm/cleancache.c and include/linux/cleancache.h.

Note that CONFIG_CLEANCACHE can default to on; in systems that do
not provide a cleancache backend, all hooks devolve to a simple
check of a global enable flag, so performance impact should
be negligible but can be reduced to zero impact if config'ed off.
However for this first commit, it defaults to off.

Details and a FAQ can be found in Documentation/vm/cleancache.txt

Credits: Cleancache_ops design derived from Jeremy Fitzhardinge
design for tmem

[v8: [email protected]: fix exportfs call affecting btrfs]
[v8: [email protected]: use static inline function, not macro]
[v7: [email protected]: cleanup sysfs and remove cleancache prefix]
[v6: [email protected]: robustly handle buggy fs encode_fh actor definition]
[v5: [email protected]: clean up global usage and static var names]
[v5: [email protected]: simplify init hook and any future fs init changes]
[v5: [email protected]: cleaner non-global interface for ops registration]
[v4: [email protected]: interface must support exportfs FS's]
[v4: [email protected]: interface must support 64-bit FS on 32-bit kernel]
[v3: [email protected]: use one ops struct to avoid pointer hops]
[v3: [email protected]: document and ensure PageLocked reqts are met]
[v3: [email protected]: fix success/fail codes, change funcs to void]
[v2: [email protected]: use sane types]
Signed-off-by: Dan Magenheimer <[email protected]>
Reviewed-by: Jeremy Fitzhardinge <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Acked-by: Al Viro <[email protected]>
Acked-by: Andrew Morton <[email protected]>
Acked-by: Nitin Gupta <[email protected]>
Acked-by: Minchan Kim <[email protected]>
Acked-by: Andreas Dilger <[email protected]>
Acked-by: Jan Beulich <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Rik Van Riel <[email protected]>
Cc: Chris Mason <[email protected]>
Cc: Ted Ts'o <[email protected]>
Cc: Mark Fasheh <[email protected]>
Cc: Joel Becker <[email protected]>
  • Loading branch information
Dan Magenheimer committed May 26, 2011
1 parent 9fdfdcf commit 077b1f8
Show file tree
Hide file tree
Showing 4 changed files with 390 additions and 0 deletions.
122 changes: 122 additions & 0 deletions include/linux/cleancache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#ifndef _LINUX_CLEANCACHE_H
#define _LINUX_CLEANCACHE_H

#include <linux/fs.h>
#include <linux/exportfs.h>
#include <linux/mm.h>

#define CLEANCACHE_KEY_MAX 6

/*
* cleancache requires every file with a page in cleancache to have a
* unique key unless/until the file is removed/truncated. For some
* filesystems, the inode number is unique, but for "modern" filesystems
* an exportable filehandle is required (see exportfs.h)
*/
struct cleancache_filekey {
union {
ino_t ino;
__u32 fh[CLEANCACHE_KEY_MAX];
u32 key[CLEANCACHE_KEY_MAX];
} u;
};

struct cleancache_ops {
int (*init_fs)(size_t);
int (*init_shared_fs)(char *uuid, size_t);
int (*get_page)(int, struct cleancache_filekey,
pgoff_t, struct page *);
void (*put_page)(int, struct cleancache_filekey,
pgoff_t, struct page *);
void (*flush_page)(int, struct cleancache_filekey, pgoff_t);
void (*flush_inode)(int, struct cleancache_filekey);
void (*flush_fs)(int);
};

extern struct cleancache_ops
cleancache_register_ops(struct cleancache_ops *ops);
extern void __cleancache_init_fs(struct super_block *);
extern void __cleancache_init_shared_fs(char *, struct super_block *);
extern int __cleancache_get_page(struct page *);
extern void __cleancache_put_page(struct page *);
extern void __cleancache_flush_page(struct address_space *, struct page *);
extern void __cleancache_flush_inode(struct address_space *);
extern void __cleancache_flush_fs(struct super_block *);
extern int cleancache_enabled;

#ifdef CONFIG_CLEANCACHE
static inline bool cleancache_fs_enabled(struct page *page)
{
return page->mapping->host->i_sb->cleancache_poolid >= 0;
}
static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
{
return mapping->host->i_sb->cleancache_poolid >= 0;
}
#else
#define cleancache_enabled (0)
#define cleancache_fs_enabled(_page) (0)
#define cleancache_fs_enabled_mapping(_page) (0)
#endif

/*
* The shim layer provided by these inline functions allows the compiler
* to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
* is disabled, to a single global variable check if CONFIG_CLEANCACHE
* is enabled but no cleancache "backend" has dynamically enabled it,
* and, for the most frequent cleancache ops, to a single global variable
* check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
* and a cleancache backend has dynamically enabled cleancache, but the
* filesystem referenced by that cleancache op has not enabled cleancache.
* As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
* no measurable performance impact.
*/

static inline void cleancache_init_fs(struct super_block *sb)
{
if (cleancache_enabled)
__cleancache_init_fs(sb);
}

static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb)
{
if (cleancache_enabled)
__cleancache_init_shared_fs(uuid, sb);
}

static inline int cleancache_get_page(struct page *page)
{
int ret = -1;

if (cleancache_enabled && cleancache_fs_enabled(page))
ret = __cleancache_get_page(page);
return ret;
}

static inline void cleancache_put_page(struct page *page)
{
if (cleancache_enabled && cleancache_fs_enabled(page))
__cleancache_put_page(page);
}

static inline void cleancache_flush_page(struct address_space *mapping,
struct page *page)
{
/* careful... page->mapping is NULL sometimes when this is called */
if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
__cleancache_flush_page(mapping, page);
}

static inline void cleancache_flush_inode(struct address_space *mapping)
{
if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
__cleancache_flush_inode(mapping);
}

static inline void cleancache_flush_fs(struct super_block *sb)
{
if (cleancache_enabled)
__cleancache_flush_fs(sb);
}

#endif /* _LINUX_CLEANCACHE_H */
23 changes: 23 additions & 0 deletions mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,26 @@ config NEED_PER_CPU_KM
depends on !SMP
bool
default y

config CLEANCACHE
bool "Enable cleancache driver to cache clean pages if tmem is present"
default n
help
Cleancache can be thought of as a page-granularity victim cache
for clean pages that the kernel's pageframe replacement algorithm
(PFRA) would like to keep around, but can't since there isn't enough
memory. So when the PFRA "evicts" a page, it first attempts to use
cleancacne code to put the data contained in that page into
"transcendent memory", memory that is not directly accessible or
addressable by the kernel and is of unknown and possibly
time-varying size. And when a cleancache-enabled
filesystem wishes to access a page in a file on disk, it first
checks cleancache to see if it already contains it; if it does,
the page is copied into the kernel and a disk access is avoided.
When a transcendent memory driver is available (such as zcache or
Xen transcendent memory), a significant I/O reduction
may be achieved. When none is available, all cleancache calls
are reduced to a single pointer-compare-against-NULL resulting
in a negligible performance hit.

If unsure, say Y to enable cleancache
1 change: 1 addition & 0 deletions mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
244 changes: 244 additions & 0 deletions mm/cleancache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* Cleancache frontend
*
* This code provides the generic "frontend" layer to call a matching
* "backend" driver implementation of cleancache. See
* Documentation/vm/cleancache.txt for more information.
*
* Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
* Author: Dan Magenheimer
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/exportfs.h>
#include <linux/mm.h>
#include <linux/cleancache.h>

/*
* This global enablement flag may be read thousands of times per second
* by cleancache_get/put/flush even on systems where cleancache_ops
* is not claimed (e.g. cleancache is config'ed on but remains
* disabled), so is preferred to the slower alternative: a function
* call that checks a non-global.
*/
int cleancache_enabled;
EXPORT_SYMBOL(cleancache_enabled);

/*
* cleancache_ops is set by cleancache_ops_register to contain the pointers
* to the cleancache "backend" implementation functions.
*/
static struct cleancache_ops cleancache_ops;

/* useful stats available in /sys/kernel/mm/cleancache */
static unsigned long cleancache_succ_gets;
static unsigned long cleancache_failed_gets;
static unsigned long cleancache_puts;
static unsigned long cleancache_flushes;

/*
* register operations for cleancache, returning previous thus allowing
* detection of multiple backends and possible nesting
*/
struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
{
struct cleancache_ops old = cleancache_ops;

cleancache_ops = *ops;
cleancache_enabled = 1;
return old;
}
EXPORT_SYMBOL(cleancache_register_ops);

/* Called by a cleancache-enabled filesystem at time of mount */
void __cleancache_init_fs(struct super_block *sb)
{
sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
}
EXPORT_SYMBOL(__cleancache_init_fs);

/* Called by a cleancache-enabled clustered filesystem at time of mount */
void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
{
sb->cleancache_poolid =
(*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
}
EXPORT_SYMBOL(__cleancache_init_shared_fs);

/*
* If the filesystem uses exportable filehandles, use the filehandle as
* the key, else use the inode number.
*/
static int cleancache_get_key(struct inode *inode,
struct cleancache_filekey *key)
{
int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
int len = 0, maxlen = CLEANCACHE_KEY_MAX;
struct super_block *sb = inode->i_sb;

key->u.ino = inode->i_ino;
if (sb->s_export_op != NULL) {
fhfn = sb->s_export_op->encode_fh;
if (fhfn) {
struct dentry d;
d.d_inode = inode;
len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
if (len <= 0 || len == 255)
return -1;
if (maxlen > CLEANCACHE_KEY_MAX)
return -1;
}
}
return 0;
}

/*
* "Get" data from cleancache associated with the poolid/inode/index
* that were specified when the data was put to cleanache and, if
* successful, use it to fill the specified page with data and return 0.
* The pageframe is unchanged and returns -1 if the get fails.
* Page must be locked by caller.
*/
int __cleancache_get_page(struct page *page)
{
int ret = -1;
int pool_id;
struct cleancache_filekey key = { .u.key = { 0 } };

VM_BUG_ON(!PageLocked(page));
pool_id = page->mapping->host->i_sb->cleancache_poolid;
if (pool_id < 0)
goto out;

if (cleancache_get_key(page->mapping->host, &key) < 0)
goto out;

ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
if (ret == 0)
cleancache_succ_gets++;
else
cleancache_failed_gets++;
out:
return ret;
}
EXPORT_SYMBOL(__cleancache_get_page);

/*
* "Put" data from a page to cleancache and associate it with the
* (previously-obtained per-filesystem) poolid and the page's,
* inode and page index. Page must be locked. Note that a put_page
* always "succeeds", though a subsequent get_page may succeed or fail.
*/
void __cleancache_put_page(struct page *page)
{
int pool_id;
struct cleancache_filekey key = { .u.key = { 0 } };

VM_BUG_ON(!PageLocked(page));
pool_id = page->mapping->host->i_sb->cleancache_poolid;
if (pool_id >= 0 &&
cleancache_get_key(page->mapping->host, &key) >= 0) {
(*cleancache_ops.put_page)(pool_id, key, page->index, page);
cleancache_puts++;
}
}
EXPORT_SYMBOL(__cleancache_put_page);

/*
* Flush any data from cleancache associated with the poolid and the
* page's inode and page index so that a subsequent "get" will fail.
*/
void __cleancache_flush_page(struct address_space *mapping, struct page *page)
{
/* careful... page->mapping is NULL sometimes when this is called */
int pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };

if (pool_id >= 0) {
VM_BUG_ON(!PageLocked(page));
if (cleancache_get_key(mapping->host, &key) >= 0) {
(*cleancache_ops.flush_page)(pool_id, key, page->index);
cleancache_flushes++;
}
}
}
EXPORT_SYMBOL(__cleancache_flush_page);

/*
* Flush all data from cleancache associated with the poolid and the
* mappings's inode so that all subsequent gets to this poolid/inode
* will fail.
*/
void __cleancache_flush_inode(struct address_space *mapping)
{
int pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };

if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
(*cleancache_ops.flush_inode)(pool_id, key);
}
EXPORT_SYMBOL(__cleancache_flush_inode);

/*
* Called by any cleancache-enabled filesystem at time of unmount;
* note that pool_id is surrendered and may be reutrned by a subsequent
* cleancache_init_fs or cleancache_init_shared_fs
*/
void __cleancache_flush_fs(struct super_block *sb)
{
if (sb->cleancache_poolid >= 0) {
int old_poolid = sb->cleancache_poolid;
sb->cleancache_poolid = -1;
(*cleancache_ops.flush_fs)(old_poolid);
}
}
EXPORT_SYMBOL(__cleancache_flush_fs);

#ifdef CONFIG_SYSFS

/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */

#define CLEANCACHE_SYSFS_RO(_name) \
static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf) \
{ \
return sprintf(buf, "%lu\n", cleancache_##_name); \
} \
static struct kobj_attribute cleancache_##_name##_attr = { \
.attr = { .name = __stringify(_name), .mode = 0444 }, \
.show = cleancache_##_name##_show, \
}

CLEANCACHE_SYSFS_RO(succ_gets);
CLEANCACHE_SYSFS_RO(failed_gets);
CLEANCACHE_SYSFS_RO(puts);
CLEANCACHE_SYSFS_RO(flushes);

static struct attribute *cleancache_attrs[] = {
&cleancache_succ_gets_attr.attr,
&cleancache_failed_gets_attr.attr,
&cleancache_puts_attr.attr,
&cleancache_flushes_attr.attr,
NULL,
};

static struct attribute_group cleancache_attr_group = {
.attrs = cleancache_attrs,
.name = "cleancache",
};

#endif /* CONFIG_SYSFS */

static int __init init_cleancache(void)
{
#ifdef CONFIG_SYSFS
int err;

err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
#endif /* CONFIG_SYSFS */
return 0;
}
module_init(init_cleancache)

0 comments on commit 077b1f8

Please sign in to comment.