Skip to content

Commit

Permalink
Merge tag 'afs-fixes-20210913' of git://git.kernel.org/pub/scm/linux/…
Browse files Browse the repository at this point in the history
…kernel/git/dhowells/linux-fs

Pull AFS fixes from David Howells:
 "Fixes for AFS problems that can cause data corruption due to
  interaction with another client modifying data cached locally:

   - When d_revalidating a dentry, don't look at the inode to which it
     points. Only check the directory to which the dentry belongs. This
     was confusing things and causing the silly-rename cleanup code to
     remove the file now at the dentry of a file that got deleted.

   - Fix mmap data coherency. When a callback break is received that
     relates to a file that we have cached, the data content may have
     been changed (there are other reasons, such as the user's rights
     having been changed). However, we're checking it lazily, only on
     entry to the kernel, which doesn't happen if we have a writeable
     shared mapped page on that file.

     We make the kernel keep track of mmapped files and clear all PTEs
     mapping to that file as soon as the callback comes in by calling
     unmap_mapping_pages() (we don't necessarily want to zap the
     pagecache). This causes the kernel to be reentered when userspace
     tries to access the mmapped address range again - and at that point
     we can query the server and, if we need to, zap the page cache.

     Ideally, I would check each file at the point of notification, but
     that involves poking the server[*] - which is holding an exclusive
     lock on the vnode it is changing, waiting for all the clients it
     notified to reply. This could then deadlock against the server.
     Further, invalidating the pagecache might call ->launder_page(),
     which would try to write to the file, which would definitely
     deadlock. (AFS doesn't lease file access).

     [*] Checking to see if the file content has changed is a matter of
         comparing the current data version number, but we have to ask
         the server for that. We also need to get a new callback promise
         and we need to poke the server for that too.

   - Add some more points at which the inode is validated, since we're
     doing it lazily, notably in ->read_iter() and ->page_mkwrite(), but
     also when performing some directory operations.

     Ideally, checking in ->read_iter() would be done in some derivation
     of filemap_read(). If we're going to call the server to read the
     file, then we get the file status fetch as part of that.

   - The above is now causing us to make a lot more calls to
     afs_validate() to check the inode - and afs_validate() takes the
     RCU read lock each time to make a quick check (ie.
     afs_check_validity()). This is entirely for the purpose of checking
     cb_s_break to see if the server we're using reinitialised its list
     of callbacks - however this isn't a very common event, so most of
     the time we're taking this needlessly.

     Add a new cell-wide counter to count the number of
     reinitialisations done by any server and check that - and only if
     that changes, take the RCU read lock and check the server list (the
     server list may change, but the cell a file is part of won't).

   - Don't update vnode->cb_s_break and ->cb_v_break inside the validity
     checking loop. The cb_lock is done with read_seqretry, so we might
     go round the loop a second time after resetting those values - and
     that could cause someone else checking validity to miss something
     (I think).

  Also included are patches for fixes for some bugs encountered whilst
  debugging this:

   - Fix a leak of afs_read objects and fix a leak of keys hidden by
     that.

   - Fix a leak of pages that couldn't be added to extend a writeback.

   - Fix the maintenance of i_blocks when i_size is changed by a local
     write or a local dir edit"

Link: https://bugzilla.kernel.org/show_bug.cgi?id=214217 [1]
Link: https://lore.kernel.org/r/163111665183.283156.17200205573146438918.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/163113612442.352844.11162345591911691150.stgit@warthog.procyon.org.uk/ # i_blocks patch

* tag 'afs-fixes-20210913' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: Fix updating of i_blocks on file/dir extension
  afs: Fix corruption in reads at fpos 2G-4G from an OpenAFS server
  afs: Try to avoid taking RCU read lock when checking vnode validity
  afs: Fix mmap coherency vs 3rd-party changes
  afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation
  afs: Add missing vnode validation checks
  afs: Fix page leak
  afs: Fix missing put on afs_read objects and missing get on the key therein
  • Loading branch information
torvalds committed Sep 20, 2021
2 parents 707a63e + 9d37e1c commit d9fb678
Show file tree
Hide file tree
Showing 17 changed files with 294 additions and 120 deletions.
44 changes: 42 additions & 2 deletions fs/afs/callback.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,37 @@
#include <linux/sched.h>
#include "internal.h"

/*
* Handle invalidation of an mmap'd file. We invalidate all the PTEs referring
* to the pages in this file's pagecache, forcing the kernel to go through
* ->fault() or ->page_mkwrite() - at which point we can handle invalidation
* more fully.
*/
void afs_invalidate_mmap_work(struct work_struct *work)
{
struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work);

unmap_mapping_pages(vnode->vfs_inode.i_mapping, 0, 0, false);
}

void afs_server_init_callback_work(struct work_struct *work)
{
struct afs_server *server = container_of(work, struct afs_server, initcb_work);
struct afs_vnode *vnode;
struct afs_cell *cell = server->cell;

down_read(&cell->fs_open_mmaps_lock);

list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) {
if (vnode->cb_server == server) {
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
queue_work(system_unbound_wq, &vnode->cb_work);
}
}

up_read(&cell->fs_open_mmaps_lock);
}

/*
* Allow the fileserver to request callback state (re-)initialisation.
* Unfortunately, UUIDs are not guaranteed unique.
Expand All @@ -29,8 +60,11 @@ void afs_init_callback_state(struct afs_server *server)
rcu_read_lock();
do {
server->cb_s_break++;
server = rcu_dereference(server->uuid_next);
} while (0);
atomic_inc(&server->cell->fs_s_break);
if (!list_empty(&server->cell->fs_open_mmaps))
queue_work(system_unbound_wq, &server->initcb_work);

} while ((server = rcu_dereference(server->uuid_next)));
rcu_read_unlock();
}

Expand All @@ -44,11 +78,17 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
vnode->cb_break++;
vnode->cb_v_break = vnode->volume->cb_v_break;
afs_clear_permits(vnode);

if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_lock_may_be_available(vnode);

if (reason != afs_cb_break_for_deleted &&
vnode->status.type == AFS_FTYPE_FILE &&
atomic_read(&vnode->cb_nr_mmap))
queue_work(system_unbound_wq, &vnode->cb_work);

trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
} else {
trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
Expand Down
2 changes: 2 additions & 0 deletions fs/afs/cell.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
seqlock_init(&cell->volume_lock);
cell->fs_servers = RB_ROOT;
seqlock_init(&cell->fs_lock);
INIT_LIST_HEAD(&cell->fs_open_mmaps);
init_rwsem(&cell->fs_open_mmaps_lock);
rwlock_init(&cell->vl_servers_lock);
cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);

Expand Down
57 changes: 18 additions & 39 deletions fs/afs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,9 +1077,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
*/
static int afs_d_revalidate_rcu(struct dentry *dentry)
{
struct afs_vnode *dvnode, *vnode;
struct afs_vnode *dvnode;
struct dentry *parent;
struct inode *dir, *inode;
struct inode *dir;
long dir_version, de_version;

_enter("%p", dentry);
Expand Down Expand Up @@ -1109,18 +1109,6 @@ static int afs_d_revalidate_rcu(struct dentry *dentry)
return -ECHILD;
}

/* Check to see if the vnode referred to by the dentry still
* has a callback.
*/
if (d_really_is_positive(dentry)) {
inode = d_inode_rcu(dentry);
if (inode) {
vnode = AFS_FS_I(inode);
if (!afs_check_validity(vnode))
return -ECHILD;
}
}

return 1; /* Still valid */
}

Expand Down Expand Up @@ -1156,17 +1144,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (IS_ERR(key))
key = NULL;

if (d_really_is_positive(dentry)) {
inode = d_inode(dentry);
if (inode) {
vnode = AFS_FS_I(inode);
afs_validate(vnode, key);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
goto out_bad;
}
}

/* lock down the parent dentry so we can peer at it */
/* Hold the parent dentry so we can peer at it */
parent = dget_parent(dentry);
dir = AFS_FS_I(d_inode(parent));

Expand All @@ -1175,7 +1153,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)

if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
_debug("%pd: parent dir deleted", dentry);
goto out_bad_parent;
goto not_found;
}

/* We only need to invalidate a dentry if the server's copy changed
Expand All @@ -1201,12 +1179,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
case 0:
/* the filename maps to something */
if (d_really_is_negative(dentry))
goto out_bad_parent;
goto not_found;
inode = d_inode(dentry);
if (is_bad_inode(inode)) {
printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
dentry);
goto out_bad_parent;
goto not_found;
}

vnode = AFS_FS_I(inode);
Expand All @@ -1228,9 +1206,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
dentry, fid.unique,
vnode->fid.unique,
vnode->vfs_inode.i_generation);
write_seqlock(&vnode->cb_lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
write_sequnlock(&vnode->cb_lock);
goto not_found;
}
goto out_valid;
Expand All @@ -1245,7 +1220,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
default:
_debug("failed to iterate dir %pd: %d",
parent, ret);
goto out_bad_parent;
goto not_found;
}

out_valid:
Expand All @@ -1256,16 +1231,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
_leave(" = 1 [valid]");
return 1;

/* the dirent, if it exists, now points to a different vnode */
not_found:
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock);

out_bad_parent:
_debug("dropping dentry %pd2", dentry);
dput(parent);
out_bad:
key_put(key);

_leave(" = 0 [bad]");
Expand Down Expand Up @@ -1792,6 +1760,10 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error;
}

ret = afs_validate(vnode, op->key);
if (ret < 0)
goto error_op;

afs_op_set_vnode(op, 0, dvnode);
afs_op_set_vnode(op, 1, vnode);
op->file[0].dv_delta = 1;
Expand All @@ -1805,6 +1777,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
op->create.reason = afs_edit_dir_for_link;
return afs_do_sync_operation(op);

error_op:
afs_put_operation(op);
error:
d_drop(dentry);
_leave(" = %d", ret);
Expand Down Expand Up @@ -1989,6 +1963,11 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
if (IS_ERR(op))
return PTR_ERR(op);

ret = afs_validate(vnode, op->key);
op->error = ret;
if (ret < 0)
goto error;

afs_op_set_vnode(op, 0, orig_dvnode);
afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
op->file[0].dv_delta = 1;
Expand Down
4 changes: 2 additions & 2 deletions fs/afs/dir_edit.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
if (b == nr_blocks) {
_debug("init %u", b);
afs_edit_init_block(meta, block, b);
i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE);
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
}

/* Only lower dir pages have a counter in the header. */
Expand Down Expand Up @@ -296,7 +296,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
new_directory:
afs_edit_init_block(meta, meta, 0);
i_size = AFS_DIR_BLOCK_SIZE;
i_size_write(&vnode->vfs_inode, i_size);
afs_set_i_size(vnode, i_size);
slot = AFS_DIR_RESV_BLOCKS0;
page = page0;
block = meta;
Expand Down
86 changes: 83 additions & 3 deletions fs/afs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
static int afs_releasepage(struct page *page, gfp_t gfp_flags);

static void afs_readahead(struct readahead_control *ractl);
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
static void afs_vm_open(struct vm_area_struct *area);
static void afs_vm_close(struct vm_area_struct *area);
static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);

const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.read_iter = afs_file_read_iter,
.write_iter = afs_file_write,
.mmap = afs_file_mmap,
.splice_read = generic_file_splice_read,
Expand Down Expand Up @@ -59,8 +63,10 @@ const struct address_space_operations afs_fs_aops = {
};

static const struct vm_operations_struct afs_vm_ops = {
.open = afs_vm_open,
.close = afs_vm_close,
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.map_pages = afs_vm_map_pages,
.page_mkwrite = afs_page_mkwrite,
};

Expand Down Expand Up @@ -295,7 +301,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq)
fsreq->subreq = subreq;
fsreq->pos = subreq->start + subreq->transferred;
fsreq->len = subreq->len - subreq->transferred;
fsreq->key = subreq->rreq->netfs_priv;
fsreq->key = key_get(subreq->rreq->netfs_priv);
fsreq->vnode = vnode;
fsreq->iter = &fsreq->def_iter;

Expand All @@ -304,6 +310,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq)
fsreq->pos, fsreq->len);

afs_fetch_data(fsreq->vnode, fsreq);
afs_put_read(fsreq);
}

static int afs_symlink_readpage(struct page *page)
Expand Down Expand Up @@ -490,15 +497,88 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
return 1;
}

static void afs_add_open_mmap(struct afs_vnode *vnode)
{
if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
down_write(&vnode->volume->cell->fs_open_mmaps_lock);

list_add_tail(&vnode->cb_mmap_link,
&vnode->volume->cell->fs_open_mmaps);

up_write(&vnode->volume->cell->fs_open_mmaps_lock);
}
}

static void afs_drop_open_mmap(struct afs_vnode *vnode)
{
if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
return;

down_write(&vnode->volume->cell->fs_open_mmaps_lock);

if (atomic_read(&vnode->cb_nr_mmap) == 0)
list_del_init(&vnode->cb_mmap_link);

up_write(&vnode->volume->cell->fs_open_mmaps_lock);
flush_work(&vnode->cb_work);
}

/*
* Handle setting up a memory mapping on an AFS file.
*/
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
int ret;

afs_add_open_mmap(vnode);

ret = generic_file_mmap(file, vma);
if (ret == 0)
vma->vm_ops = &afs_vm_ops;
else
afs_drop_open_mmap(vnode);
return ret;
}

static void afs_vm_open(struct vm_area_struct *vma)
{
afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
}

static void afs_vm_close(struct vm_area_struct *vma)
{
afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
}

static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
struct afs_file *af = vmf->vma->vm_file->private_data;

switch (afs_validate(vnode, af->key)) {
case 0:
return filemap_map_pages(vmf, start_pgoff, end_pgoff);
case -ENOMEM:
return VM_FAULT_OOM;
case -EINTR:
case -ERESTARTSYS:
return VM_FAULT_RETRY;
case -ESTALE:
default:
return VM_FAULT_SIGBUS;
}
}

static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
struct afs_file *af = iocb->ki_filp->private_data;
int ret;

ret = afs_validate(vnode, af->key);
if (ret < 0)
return ret;

return generic_file_read_iter(iocb, iter);
}
Loading

0 comments on commit d9fb678

Please sign in to comment.