diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 192d476d7c76b1..45b7fc405fa646 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -24,11 +24,10 @@ kafs-objs := \ rxrpc.o \ security.o \ server.o \ + server_list.o \ super.o \ netdevices.o \ vlclient.o \ - vlocation.o \ - vnode.o \ volume.o \ write.o \ xattr.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index ecb9c72aebd2e5..b91e59a77f0e13 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -227,6 +227,37 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) return alist; } +/* + * Merge an IPv4 entry into a fileserver address list. + */ +void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr) +{ + struct sockaddr_in6 *a; + int i; + + for (i = 0; i < alist->nr_ipv4; i++) { + a = &alist->addrs[i].transport.sin6; + if (xdr == a->sin6_addr.s6_addr32[3]) + return; + if (xdr < a->sin6_addr.s6_addr32[3]) + break; + } + + if (i < alist->nr_addrs) + memmove(alist->addrs + i + 1, + alist->addrs + i, + sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); + + a = &alist->addrs[i].transport.sin6; + a->sin6_port = htons(AFS_FS_PORT); + a->sin6_addr.s6_addr32[0] = 0; + a->sin6_addr.s6_addr32[1] = 0; + a->sin6_addr.s6_addr32[2] = htonl(0xffff); + a->sin6_addr.s6_addr32[3] = xdr; + alist->nr_ipv4++; + alist->nr_addrs++; +} + /* * Get an address to try. */ diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 05395d0f1941d1..d47b6d01e4c022 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -38,6 +38,7 @@ enum AFS_FS_Operations { FSFETCHDATA64 = 65537, /* AFS Fetch file data */ FSSTOREDATA64 = 65538, /* AFS Store file data */ FSGIVEUPALLCALLBACKS = 65539, /* AFS Give up all outstanding callbacks on a server */ + FSGETCAPABILITIES = 65540, /* Probe and get the capabilities of a fileserver */ }; enum AFS_FS_Errors { diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index 4eaa620992c80a..6350b417aee93e 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -88,4 +88,46 @@ struct afs_vldbentry { #define AFS_VLDB_MAXNAMELEN 65 + +struct afs_ListAddrByAttributes__xdr { + __be32 Mask; +#define AFS_VLADDR_IPADDR 0x1 /* Match by ->ipaddr */ +#define AFS_VLADDR_INDEX 0x2 /* Match by ->index */ +#define AFS_VLADDR_UUID 0x4 /* Match by ->uuid */ + __be32 ipaddr; + __be32 index; + __be32 spare; + struct afs_uuid__xdr uuid; +}; + +struct afs_uvldbentry__xdr { + __be32 name[AFS_VLDB_MAXNAMELEN]; + __be32 nServers; + struct afs_uuid__xdr serverNumber[AFS_NMAXNSERVERS]; + __be32 serverUnique[AFS_NMAXNSERVERS]; + __be32 serverPartition[AFS_NMAXNSERVERS]; + __be32 serverFlags[AFS_NMAXNSERVERS]; + __be32 volumeId[AFS_MAXTYPES]; + __be32 cloneId; + __be32 flags; + __be32 spares1; + __be32 spares2; + __be32 spares3; + __be32 spares4; + __be32 spares5; + __be32 spares6; + __be32 spares7; + __be32 spares8; + __be32 spares9; +}; + +struct afs_address_list { + refcount_t usage; + unsigned int version; + unsigned int nr_addrs; + struct sockaddr_rxrpc addrs[]; +}; + +extern void afs_put_address_list(struct afs_address_list *alist); + #endif /* AFS_VL_H */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 82f4c7a3b7b697..f4291b57605437 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -26,10 +26,10 @@ * - Called with volume->server_sem held. */ int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_cb_interest **ppcbi, - struct afs_server *server) + struct afs_server_entry *entry) { - struct afs_cb_interest *cbi = *ppcbi, *vcbi, *new, *x; + struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x; + struct afs_server *server = entry->server; again: vcbi = vnode->cb_interest; @@ -47,7 +47,7 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode, if (!cbi && vcbi->server == server) { afs_get_cb_interest(vcbi); - x = cmpxchg(ppcbi, cbi, vcbi); + x = cmpxchg(&entry->cb_interest, cbi, vcbi); if (x != cbi) { cbi = x; afs_put_cb_interest(afs_v2net(vnode), vcbi); @@ -72,7 +72,7 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode, list_add_tail(&new->cb_link, &server->cb_interests); write_unlock(&server->cb_break_lock); - x = cmpxchg(ppcbi, cbi, new); + x = cmpxchg(&entry->cb_interest, cbi, new); if (x == cbi) { cbi = new; } else { @@ -137,7 +137,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) */ void afs_init_callback_state(struct afs_server *server) { - if (!test_and_clear_bit(AFS_SERVER_NEW, &server->flags)) + if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags)) server->cb_s_break++; } @@ -233,12 +233,12 @@ void afs_break_callbacks(struct afs_server *server, size_t count, /* * Clear the callback interests in a server list. */ -void afs_clear_callback_interests(struct afs_net *net, struct afs_volume *volume) +void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist) { int i; - for (i = 0; i < ARRAY_SIZE(volume->cb_interests); i++) { - afs_put_cb_interest(net, volume->cb_interests[i]); - volume->cb_interests[i] = NULL; + for (i = 0; i < slist->nr_servers; i++) { + afs_put_cb_interest(net, slist->servers[i].cb_interest); + slist->servers[i].cb_interest = NULL; } } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index a0e08d3a108c83..1858c91169e4fc 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -146,13 +146,10 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); - rwlock_init(&cell->servers_lock); - INIT_LIST_HEAD(&cell->servers); - init_rwsem(&cell->vl_sem); - INIT_LIST_HEAD(&cell->vl_list); - spin_lock_init(&cell->vl_lock); cell->flags = ((1 << AFS_CELL_FL_NOT_READY) | (1 << AFS_CELL_FL_NO_LOOKUP_YET)); + INIT_LIST_HEAD(&cell->proc_volumes); + rwlock_init(&cell->proc_lock); rwlock_init(&cell->vl_addrs_lock); /* Fill in the VL server list if we were given a list of addresses to diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 37083699a0dfef..53f3917440e7c4 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -553,7 +553,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version; /* instantiate the dentry */ - inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL); + inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL); key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); @@ -740,21 +740,49 @@ static void afs_d_release(struct dentry *dentry) _enter("%pd", dentry); } +/* + * Create a new inode for create/mkdir/symlink + */ +static void afs_vnode_new_inode(struct afs_fs_cursor *fc, + struct dentry *new_dentry, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct inode *inode; + + if (fc->ac.error < 0) + return; + + inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key, + newfid, newstatus, newcb, fc->cbi); + if (IS_ERR(inode)) { + /* ENOMEM or EINTR at a really inconvenient time - just abandon + * the new directory on the server. + */ + fc->ac.error = PTR_ERR(inode); + return; + } + + d_instantiate(new_dentry, inode); + if (d_unhashed(new_dentry)) + d_rehash(new_dentry); +} + /* * create a directory on an AFS filesystem */ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct afs_file_status status; - struct afs_callback cb; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_file_status newstatus; + struct afs_fs_cursor fc; + struct afs_callback newcb; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); + mode |= S_IFDIR; _enter("{%x:%u},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); @@ -765,40 +793,27 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto error; } - mode |= S_IFDIR; - ret = afs_vnode_create(dvnode, key, dentry->d_name.name, - mode, &fid, &status, &cb, &server); - if (ret < 0) - goto mkdir_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_create(&fc, dentry->d_name.name, mode, + &newfid, &newstatus, &newcb); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_i2net(dir), server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(afs_i2net(dir), server); -mkdir_error: +error_key: key_put(key); error: d_drop(dentry); @@ -806,17 +821,30 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return ret; } +/* + * Remove a subdir from a directory. + */ +static void afs_dir_remove_subdir(struct dentry *dentry) +{ + if (d_really_is_positive(dentry)) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + clear_nlink(&vnode->vfs_inode); + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } +} + /* * remove a directory from an AFS filesystem */ static int afs_rmdir(struct inode *dir, struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir); struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); @@ -826,45 +854,69 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) goto error; } - ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true); - if (ret < 0) - goto rmdir_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_remove(&fc, dentry->d_name.name, true); + } - if (d_really_is_positive(dentry)) { - vnode = AFS_FS_I(d_inode(dentry)); - clear_nlink(&vnode->vfs_inode); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) + afs_dir_remove_subdir(dentry); } key_put(key); - _leave(" = 0"); - return 0; - -rmdir_error: - key_put(key); error: - _leave(" = %d", ret); return ret; } /* - * remove a file from an AFS filesystem + * Remove a link to a file or symlink from a directory. + * + * If the file was not deleted due to excess hard links, the fileserver will + * break the callback promise on the file - if it had one - before it returns + * to us, and if it was deleted, it won't + * + * However, if we didn't have a callback promise outstanding, or it was + * outstanding on a different server, then it won't break it either... + */ +static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +{ + int ret = 0; + + if (d_really_is_positive(dentry)) { + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + } + + return ret; +} + +/* + * Remove a file or symlink from an AFS filesystem. */ static int afs_unlink(struct inode *dir, struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_fs_cursor fc; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) - goto error; + return -ENAMETOOLONG; key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { @@ -872,42 +924,28 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) goto error; } + /* Try to make sure we have a callback promise on the victim. */ if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); - - /* make sure we have a callback promise on the victim */ ret = afs_validate(vnode, key); if (ret < 0) - goto error; + goto error_key; } - ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false); - if (ret < 0) - goto remove_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_remove(&fc, dentry->d_name.name, false); + } - if (d_really_is_positive(dentry)) { - /* if the file wasn't deleted due to excess hard links, the - * fileserver will break the callback promise on the file - if - * it had one - before it returns to us, and if it was deleted, - * it won't - * - * however, if we didn't have a callback promise outstanding, - * or it was outstanding on a different server, then it won't - * break it either... - */ - vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - _debug("AFS_VNODE_DELETED"); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - ret = afs_validate(vnode, key); - _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + if (ret == 0) + ret = afs_dir_remove_link(dentry, key); } - key_put(key); - _leave(" = 0"); - return 0; - -remove_error: +error_key: key_put(key); error: _leave(" = %d", ret); @@ -920,60 +958,50 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct afs_file_status status; - struct afs_callback cb; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_fs_cursor fc; + struct afs_file_status newstatus; + struct afs_callback newcb; + struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); + mode |= S_IFREG; _enter("{%x:%u},{%pd},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - mode |= S_IFREG; - ret = afs_vnode_create(dvnode, key, dentry->d_name.name, - mode, &fid, &status, &cb, &server); - if (ret < 0) - goto create_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_create(&fc, dentry->d_name.name, mode, + &newfid, &newstatus, &newcb); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, &cb); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_i2net(dir), server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(afs_i2net(dir), server); -create_error: +error_key: key_put(key); error: d_drop(dentry); @@ -987,6 +1015,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry) { + struct afs_fs_cursor fc; struct afs_vnode *dvnode, *vnode; struct key *key; int ret; @@ -999,23 +1028,45 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode->fid.vid, dvnode->fid.vnode, dentry); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } - ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name); - if (ret < 0) - goto link_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { + afs_end_vnode_operation(&fc); + return -ERESTARTSYS; + } + + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; + afs_fs_link(&fc, vnode, dentry->d_name.name); + } + + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, vnode, fc.cb_break_2); + ihold(&vnode->vfs_inode); + d_instantiate(dentry, &vnode->vfs_inode); + + mutex_unlock(&vnode->io_lock); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; + } - ihold(&vnode->vfs_inode); - d_instantiate(dentry, &vnode->vfs_inode); key_put(key); _leave(" = 0"); return 0; -link_error: +error_key: key_put(key); error: d_drop(dentry); @@ -1029,20 +1080,21 @@ static int afs_link(struct dentry *from, struct inode *dir, static int afs_symlink(struct inode *dir, struct dentry *dentry, const char *content) { - struct afs_file_status status; - struct afs_server *server; - struct afs_vnode *dvnode, *vnode; - struct afs_fid fid; - struct inode *inode; + struct afs_fs_cursor fc; + struct afs_file_status newstatus; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid newfid; struct key *key; int ret; - dvnode = AFS_FS_I(dir); - _enter("{%x:%u},{%pd},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry, content); + ret = -ENAMETOOLONG; + if (dentry->d_name.len >= AFSNAMEMAX) + goto error; + ret = -EINVAL; if (strlen(content) >= AFSPATHMAX) goto error; @@ -1053,39 +1105,27 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, goto error; } - ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content, - &fid, &status, &server); - if (ret < 0) - goto create_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, dvnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + afs_fs_symlink(&fc, dentry->d_name.name, content, + &newfid, &newstatus); + } - inode = afs_iget(dir->i_sb, key, &fid, &status, NULL); - if (IS_ERR(inode)) { - /* ENOMEM at a really inconvenient time - just abandon the new - * directory on the server */ - ret = PTR_ERR(inode); - goto iget_error; + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, dvnode, fc.cb_break); + afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; } - /* apply the status report we've got for the new vnode */ - vnode = AFS_FS_I(inode); - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - afs_vnode_finalise_status_update(vnode, server); - afs_put_server(afs_i2net(dir), server); - - d_instantiate(dentry, inode); - if (d_unhashed(dentry)) { - _debug("not hashed"); - d_rehash(dentry); - } key_put(key); _leave(" = 0"); return 0; -iget_error: - afs_put_server(afs_i2net(dir), server); -create_error: +error_key: key_put(key); error: d_drop(dentry); @@ -1100,6 +1140,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + struct afs_fs_cursor fc; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; struct key *key; int ret; @@ -1123,16 +1164,35 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, goto error; } - ret = afs_vnode_rename(orig_dvnode, new_dvnode, key, - old_dentry->d_name.name, - new_dentry->d_name.name); - if (ret < 0) - goto rename_error; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) { + if (orig_dvnode != new_dvnode) { + if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { + afs_end_vnode_operation(&fc); + return -ERESTARTSYS; + } + } + while (afs_select_fileserver(&fc)) { + fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; + fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; + afs_fs_rename(&fc, old_dentry->d_name.name, + new_dvnode, new_dentry->d_name.name); + } + + afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break); + afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2); + if (orig_dvnode != new_dvnode) + mutex_unlock(&new_dvnode->io_lock); + ret = afs_end_vnode_operation(&fc); + if (ret < 0) + goto error_key; + } + key_put(key); _leave(" = 0"); return 0; -rename_error: +error_key: key_put(key); error: d_drop(new_dentry); diff --git a/fs/afs/file.c b/fs/afs/file.c index 08f9f0c5dfacf0..1f26ac9f816d60 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -137,6 +137,37 @@ static void afs_file_readpage_read_complete(struct page *page, } #endif +/* + * Fetch file data from the volume. + */ +int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x,,,", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_fetch_data(&fc, desc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * read page from file, directory or symlink, given a key to use */ @@ -199,7 +230,7 @@ int afs_page_filler(void *data, struct page *page) /* read the contents of the file from the server into the * page */ - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { if (ret == -ENOENT) { @@ -264,7 +295,7 @@ static int afs_readpage(struct file *file, struct page *page) ret = afs_page_filler(key, page); } else { struct inode *inode = page->mapping->host; - key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); + key = afs_request_key(AFS_FS_S(inode->i_sb)->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); } else { @@ -369,7 +400,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, return 0; } - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); if (ret < 0) goto error; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 2b31ea58c50ca2..77b0a4606efd88 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -67,6 +67,100 @@ static void afs_grant_locks(struct afs_vnode *vnode, struct file_lock *fl) } } +/* + * Get a lock on a file + */ +static int afs_set_lock(struct afs_vnode *vnode, struct key *key, + afs_lock_type_t type) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x,%u", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key), type); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_set_lock(&fc, type); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Extend a lock on a file + */ +static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_current_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_extend_lock(&fc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Release a lock on a file + */ +static int afs_release_lock(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s{%x:%u.%u},%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(key)); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_current_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_release_lock(&fc); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * do work for a lock, including: * - probing for a lock we're waiting on but didn't get immediately @@ -91,7 +185,7 @@ void afs_lock_work(struct work_struct *work) /* attempt to release the server lock; if it fails, we just * wait 5 minutes and it'll time out anyway */ - ret = afs_vnode_release_lock(vnode, vnode->unlock_key); + ret = afs_release_lock(vnode, vnode->unlock_key); if (ret < 0) printk(KERN_WARNING "AFS:" " Failed to release lock on {%x:%x} error %d\n", @@ -115,7 +209,7 @@ void afs_lock_work(struct work_struct *work) key = key_get(fl->fl_file->private_data); spin_unlock(&vnode->lock); - ret = afs_vnode_extend_lock(vnode, key); + ret = afs_extend_lock(vnode, key); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); key_put(key); switch (ret) { @@ -151,7 +245,7 @@ void afs_lock_work(struct work_struct *work) AFS_LOCK_READ : AFS_LOCK_WRITE; spin_unlock(&vnode->lock); - ret = afs_vnode_set_lock(vnode, key, type); + ret = afs_set_lock(vnode, key, type); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); switch (ret) { case -EWOULDBLOCK: @@ -182,7 +276,7 @@ void afs_lock_work(struct work_struct *work) clear_bit(AFS_VNODE_READLOCKED, &vnode->flags); clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags); spin_unlock(&vnode->lock); - afs_vnode_release_lock(vnode, key); + afs_release_lock(vnode, key); if (!list_empty(&vnode->pending_locks)) afs_lock_may_be_available(vnode); } @@ -280,7 +374,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) set_bit(AFS_VNODE_LOCKING, &vnode->flags); spin_unlock(&vnode->lock); - ret = afs_vnode_set_lock(vnode, key, type); + ret = afs_set_lock(vnode, key, type); clear_bit(AFS_VNODE_LOCKING, &vnode->flags); switch (ret) { case 0: @@ -383,7 +477,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) /* again, make sure we've got a callback on this file and, again, make * sure that our view of the data version is up to date (we ignore * errors incurred here and deal with the consequences elsewhere) */ - afs_vnode_fetch_status(vnode, key, false); + afs_validate(vnode, key); error: spin_unlock(&inode->i_lock); @@ -455,7 +549,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl) posix_test_lock(file, fl); if (fl->fl_type == F_UNLCK) { /* no local locks; consult the server */ - ret = afs_vnode_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key); if (ret < 0) goto error; lock_count = vnode->status.lock_count; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 6614d0a78daa2c..72ff3679fa2a81 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -22,9 +22,9 @@ */ static u8 afs_discard_buffer[64]; -static inline void afs_use_fs_server(struct afs_call *call, struct afs_server *server) +static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) { - call->server = afs_get_server(server); + call->cbi = afs_get_cb_interest(cbi); } /* @@ -56,7 +56,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, kuid_t owner; kgid_t group; - write_seqlock(&vnode->cb_lock); + if (vnode) + write_seqlock(&vnode->cb_lock); #define EXTRACT(DST) \ do { \ @@ -141,7 +142,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, status->data_version = data_version; } - write_sequnlock(&vnode->cb_lock); + if (vnode) + write_sequnlock(&vnode->cb_lock); } /* @@ -151,22 +153,29 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, struct afs_vnode *vnode, const __be32 **_bp) { + struct afs_cb_interest *old, *cbi = call->cbi; const __be32 *bp = *_bp; u32 cb_expiry; write_seqlock(&vnode->cb_lock); - if (call->cb_break == (vnode->cb_break + call->server->cb_s_break)) { + if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + old = vnode->cb_interest; + if (old != call->cbi) { + vnode->cb_interest = cbi; + cbi = old; + } set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } else { bp += 3; } write_sequnlock(&vnode->cb_lock); + call->cbi = cbi; *_bp = bp; } @@ -297,24 +306,23 @@ static const struct afs_call_type afs_RXFSFetchStatus = { /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_volsync *volsync, - bool async) +int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); - if (!call) + if (!call) { + fc->ac.error = -ENOMEM; return -ENOMEM; + } - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = volsync; @@ -325,9 +333,9 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, bp[2] = htonl(vnode->fid.vnode); bp[3] = htonl(vnode->fid.unique); - call->cb_break = vnode->cb_break + fc->server->cb_s_break; - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -502,12 +510,9 @@ static const struct afs_call_type afs_RXFSFetchData64 = { /* * fetch data from a very large file */ -static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_read *req, - bool async) +static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -518,7 +523,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; @@ -536,20 +541,17 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, bp[7] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - call->cb_break = vnode->cb_break + fc->server->cb_s_break; - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * fetch data from a file */ -int afs_fs_fetch_data(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_read *req, - bool async) +int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -557,7 +559,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) - return afs_fs_fetch_data64(fc, key, vnode, req, async); + return afs_fs_fetch_data64(fc, req); _enter(""); @@ -565,7 +567,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = NULL; /* volsync */ call->reply[2] = req; @@ -581,9 +583,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, bp[5] = htonl(lower_32_bits(req->len)); atomic_inc(&req->usage); - call->cb_break = vnode->cb_break + fc->server->cb_s_break; - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -626,15 +628,13 @@ static const struct afs_call_type afs_RXFSCreateXXXX = { * create a file or make a directory */ int afs_fs_create(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, const char *name, umode_t mode, struct afs_fid *newfid, struct afs_file_status *newstatus, - struct afs_callback *newcb, - bool async) + struct afs_callback *newcb) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; @@ -651,7 +651,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = newfid; call->reply[2] = newstatus; @@ -677,8 +677,8 @@ int afs_fs_create(struct afs_fs_cursor *fc, *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -717,13 +717,9 @@ static const struct afs_call_type afs_RXFSRemoveXXXX = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - const char *name, - bool isdir, - bool async) +int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; @@ -739,7 +735,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -756,8 +752,8 @@ int afs_fs_remove(struct afs_fs_cursor *fc, bp = (void *) bp + padsz; } - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -797,13 +793,10 @@ static const struct afs_call_type afs_RXFSLink = { /* * make a hard link */ -int afs_fs_link(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *dvnode, - struct afs_vnode *vnode, - const char *name, - bool async) +int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name) { + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; @@ -819,7 +812,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = dvnode; call->reply[1] = vnode; @@ -840,8 +833,8 @@ int afs_fs_link(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -883,14 +876,12 @@ static const struct afs_call_type afs_RXFSSymlink = { * create a symbolic link */ int afs_fs_symlink(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, const char *name, const char *contents, struct afs_fid *newfid, - struct afs_file_status *newstatus, - bool async) + struct afs_file_status *newstatus) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz, c_namesz, c_padsz; @@ -911,7 +902,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = newfid; call->reply[2] = newstatus; @@ -943,8 +934,8 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -987,13 +978,11 @@ static const struct afs_call_type afs_RXFSRename = { * create a symbolic link */ int afs_fs_rename(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *orig_dvnode, const char *orig_name, struct afs_vnode *new_dvnode, - const char *new_name, - bool async) + const char *new_name) { + struct afs_vnode *orig_dvnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(orig_dvnode); size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; @@ -1016,7 +1005,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = orig_dvnode; call->reply[1] = new_dvnode; @@ -1045,8 +1034,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc, bp = (void *) bp + n_padsz; } - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1098,8 +1087,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct afs_writeback *wb, pgoff_t first, pgoff_t last, unsigned offset, unsigned to, - loff_t size, loff_t pos, loff_t i_size, - bool async) + loff_t size, loff_t pos, loff_t i_size) { struct afs_vnode *vnode = wb->vnode; struct afs_call *call; @@ -1147,8 +1135,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, *bp++ = htonl(i_size >> 32); *bp++ = htonl((u32) i_size); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1156,8 +1143,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, */ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, - bool async) + unsigned offset, unsigned to) { struct afs_vnode *vnode = wb->vnode; struct afs_call *call; @@ -1184,7 +1170,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) return afs_fs_store_data64(fc, wb, first, last, offset, to, - size, pos, i_size, async); + size, pos, i_size); call = afs_alloc_flat_call(net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, @@ -1221,8 +1207,8 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct afs_writeback *wb, *bp++ = htonl(size); *bp++ = htonl(i_size); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1279,16 +1265,15 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = { * set the attributes on a very large file, using FS.StoreData rather than * FS.StoreStatus so as to alter the file size also */ -static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); @@ -1298,7 +1283,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1319,28 +1304,27 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct key *key, *bp++ = htonl(attr->ia_size >> 32); /* new file length */ *bp++ = htonl((u32) attr->ia_size); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also */ -static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); if (attr->ia_size >> 32) - return afs_fs_setattr_size64(fc, key, vnode, attr, async); + return afs_fs_setattr_size64(fc, attr); call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, @@ -1348,7 +1332,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->store_version = vnode->status.data_version + 1; call->operation_ID = FSSTOREDATA; @@ -1366,27 +1350,26 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct key *key, *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * set the attributes on a file, using FS.StoreData if there's a change in file * size, and FS.StoreStatus otherwise */ -int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, - struct afs_vnode *vnode, struct iattr *attr, - bool async) +int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; if (attr->ia_valid & ATTR_SIZE) - return afs_fs_setattr_size(fc, key, vnode, attr, async); + return afs_fs_setattr_size(fc, attr); _enter(",%x,{%x:%u},,", - key_serial(key), vnode->fid.vid, vnode->fid.vnode); + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, (4 + 6) * 4, @@ -1394,7 +1377,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->operation_ID = FSSTORESTATUS; @@ -1407,8 +1390,8 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1606,11 +1589,9 @@ static const struct afs_call_type afs_RXFSGetVolumeStatus = { * fetch the status of a volume */ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - struct afs_volume_status *vs, - bool async) + struct afs_volume_status *vs) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1628,7 +1609,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, return -ENOMEM; } - call->key = key; + call->key = fc->key; call->reply[0] = vnode; call->reply[1] = vs; call->reply[2] = tmpbuf; @@ -1638,8 +1619,8 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, bp[0] = htonl(FSGETVOLUMESTATUS); bp[1] = htonl(vnode->fid.vid); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1692,14 +1673,11 @@ static const struct afs_call_type afs_RXFSReleaseLock = { }; /* - * get a lock on a file + * Set a lock on a file */ -int afs_fs_set_lock(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - afs_lock_type_t type, - bool async) +int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1710,7 +1688,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -1721,18 +1699,16 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.unique); *bp++ = htonl(type); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * extend a lock on a file */ -int afs_fs_extend_lock(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - bool async) +int afs_fs_extend_lock(struct afs_fs_cursor *fc) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1743,7 +1719,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -1753,18 +1729,16 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* * release a lock on a file */ -int afs_fs_release_lock(struct afs_fs_cursor *fc, - struct key *key, - struct afs_vnode *vnode, - bool async) +int afs_fs_release_lock(struct afs_fs_cursor *fc) { + struct afs_vnode *vnode = fc->vnode; struct afs_call *call; struct afs_net *net = afs_v2net(vnode); __be32 *bp; @@ -1775,7 +1749,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc, if (!call) return -ENOMEM; - call->key = key; + call->key = fc->key; call->reply[0] = vnode; /* marshall the parameters */ @@ -1785,8 +1759,8 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - afs_use_fs_server(call, fc->server); - return afs_make_call(&fc->ac, call, GFP_NOFS, async); + afs_use_fs_server(call, fc->cbi); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); } /* @@ -1809,17 +1783,17 @@ static const struct afs_call_type afs_RXFSGiveUpAllCallBacks = { /* * Flush all the callbacks we have on a server. */ -int afs_fs_give_up_all_callbacks(struct afs_server *server, +int afs_fs_give_up_all_callbacks(struct afs_net *net, + struct afs_server *server, struct afs_addr_cursor *ac, - struct key *key, - bool async) + struct key *key) { struct afs_call *call; __be32 *bp; _enter(""); - call = afs_alloc_flat_call(server->net, &afs_RXFSGiveUpAllCallBacks, 2 * 4, 0); + call = afs_alloc_flat_call(net, &afs_RXFSGiveUpAllCallBacks, 1 * 4, 0); if (!call) return -ENOMEM; @@ -1830,5 +1804,96 @@ int afs_fs_give_up_all_callbacks(struct afs_server *server, *bp++ = htonl(FSGIVEUPALLCALLBACKS); /* Can't take a ref on server */ - return afs_make_call(ac, call, GFP_NOFS, async); + return afs_make_call(ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.GetCapabilities operation. + */ +static int afs_deliver_fs_get_capabilities(struct afs_call *call) +{ + u32 count; + int ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the capabilities word count */ + case 1: + ret = afs_extract_data(call, &call->tmp, + 1 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + count = ntohl(call->tmp); + + call->count = count; + call->count2 = count; + call->offset = 0; + call->unmarshall++; + + /* Extract capabilities words */ + case 2: + count = min(call->count, 16U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 16); + if (ret < 0) + return ret; + + /* TODO: Examine capabilities */ + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.GetCapabilities operation type + */ +static const struct afs_call_type afs_RXFSGetCapabilities = { + .name = "FS.GetCapabilities", + .deliver = afs_deliver_fs_get_capabilities, + .destructor = afs_flat_call_destructor, +}; + +/* + * Probe a fileserver for the capabilities that it supports. This can + * return up to 196 words. + */ +int afs_fs_get_capabilities(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac, + struct key *key) +{ + struct afs_call *call; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4); + if (!call) + return -ENOMEM; + + call->key = key; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSGETCAPABILITIES); + + /* Can't take a ref on server */ + return afs_make_call(ac, call, GFP_NOFS, false); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ee86d5ad22d1dc..5a2f5854f349cb 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -101,6 +101,35 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) return 0; } +/* + * Fetch file status from the volume. + */ +int afs_fetch_status(struct afs_vnode *vnode, struct key *key) +{ + struct afs_fs_cursor fc; + int ret; + + _enter("%s,{%x:%u.%u,S=%lx}", + vnode->volume->name, + vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, + vnode->flags); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_fetch_file_status(&fc, NULL); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * iget5() comparator */ @@ -205,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, */ struct inode *afs_iget(struct super_block *sb, struct key *key, struct afs_fid *fid, struct afs_file_status *status, - struct afs_callback *cb) + struct afs_callback *cb, struct afs_cb_interest *cbi) { struct afs_iget_data data = { .fid = *fid }; struct afs_super_info *as; @@ -238,7 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, if (!status) { /* it's a remotely extant inode */ - ret = afs_vnode_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key); if (ret < 0) goto bad_inode; } else { @@ -255,6 +284,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = cb->version; vnode->cb_type = cb->type; vnode->cb_expires_at = cb->expiry; + vnode->cb_interest = afs_get_cb_interest(cbi); set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } @@ -358,7 +388,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * access */ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { _debug("not promised"); - ret = afs_vnode_fetch_status(vnode, key, false); + ret = afs_fetch_status(vnode, key); if (ret < 0) { if (ret == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -468,6 +498,7 @@ void afs_evict_inode(struct inode *inode) */ int afs_setattr(struct dentry *dentry, struct iattr *attr) { + struct afs_fs_cursor fc; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; @@ -498,7 +529,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) } } - ret = afs_vnode_setattr(vnode, key, attr); + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_setattr(&fc, attr); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + if (!(attr->ia_valid & ATTR_FILE)) key_put(key); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index df52bf18a2636a..1fadf40551fd3e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -32,16 +32,6 @@ struct pagevec; struct afs_call; -typedef enum { - AFS_VL_NEW, /* new, uninitialised record */ - AFS_VL_CREATING, /* creating record */ - AFS_VL_VALID, /* record is pending */ - AFS_VL_NO_VOLUME, /* no such volume available */ - AFS_VL_UPDATING, /* update in progress */ - AFS_VL_VOLUME_DELETED, /* volume was deleted */ - AFS_VL_UNCERTAIN, /* uncertain state (update failed) */ -} __attribute__((packed)) afs_vlocation_state_t; - struct afs_mount_params { bool rwpath; /* T if the parent should be considered R/W */ bool force; /* T to force cell type */ @@ -76,8 +66,10 @@ enum afs_call_state { struct afs_addr_list { struct rcu_head rcu; /* Must be first */ refcount_t usage; + u32 version; /* Version */ unsigned short nr_addrs; unsigned short index; /* Address currently in use */ + unsigned short nr_ipv4; /* Number of IPv4 addresses */ struct sockaddr_rxrpc addrs[]; }; @@ -93,7 +85,7 @@ struct afs_call { struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ struct afs_server *cm_server; /* Server affected by incoming CM call */ - struct afs_server *server; /* Server used by client call */ + struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ struct address_space *mapping; /* page set */ struct afs_writeback *wb; /* writeback being performed */ @@ -192,7 +184,6 @@ struct afs_super_info { struct afs_net *net; /* Network namespace */ struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ - char rwparent; /* T if parent is R/W AFS volume */ }; static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) @@ -228,26 +219,26 @@ struct afs_net { spinlock_t proc_cells_lock; struct list_head proc_cells; - /* Volume location database */ - struct list_head vl_updates; /* VL records in need-update order */ - struct list_head vl_graveyard; /* Inactive VL records */ - struct delayed_work vl_reaper; - struct delayed_work vl_updater; - spinlock_t vl_updates_lock; - spinlock_t vl_graveyard_lock; + /* Known servers. Theoretically each fileserver can only be in one + * cell, but in practice, people create aliases and subsets and there's + * no easy way to distinguish them. + */ + seqlock_t fs_lock; /* For fs_servers */ + struct rb_root fs_servers; /* afs_server (by server UUID or address) */ + struct list_head fs_updates; /* afs_server (by update_at) */ + struct hlist_head fs_proc; /* procfs servers list */ - /* File locking renewal management */ - struct mutex lock_manager_mutex; + struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ + struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */ + seqlock_t fs_addr_lock; /* For fs_addresses[46] */ - /* Server database */ - struct rb_root servers; /* Active servers */ - rwlock_t servers_lock; - struct list_head server_graveyard; /* Inactive server LRU list */ - spinlock_t server_graveyard_lock; - struct timer_list server_timer; - struct work_struct server_reaper; + struct work_struct fs_manager; + struct timer_list fs_timer; atomic_t servers_outstanding; + /* File locking renewal management */ + struct mutex lock_manager_mutex; + /* Misc */ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ }; @@ -264,7 +255,21 @@ enum afs_cell_state { }; /* - * AFS cell record + * AFS cell record. + * + * This is a tricky concept to get right as it is possible to create aliases + * simply by pointing AFSDB/SRV records for two names at the same set of VL + * servers; it is also possible to do things like setting up two sets of VL + * servers, one of which provides a superset of the volumes provided by the + * other (for internal/external division, for example). + * + * Cells only exist in the sense that (a) a cell's name maps to a set of VL + * servers and (b) a cell's name is used by the client to select the key to use + * for authentication and encryption. The cell name is not typically used in + * the protocol. + * + * There is no easy way to determine if two cells are aliases or one is a + * subset of another. */ struct afs_cell { union { @@ -278,14 +283,6 @@ struct afs_cell { #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - - /* server record management */ - rwlock_t servers_lock; /* active server list lock */ - struct list_head servers; /* active server list */ - - /* volume location record management */ - struct rw_semaphore vl_sem; /* volume management serialisation semaphore */ - struct list_head vl_list; /* cell's active VL record list */ time64_t dns_expiry; /* Time AFSDB/SRV record expires */ time64_t last_inactive; /* Time of last drop of usage count */ atomic_t usage; @@ -298,9 +295,11 @@ struct afs_cell { enum afs_cell_state state; short error; - spinlock_t vl_lock; /* vl_list lock */ + /* Active fileserver interaction state. */ + struct list_head proc_volumes; /* procfs volume list */ + rwlock_t proc_lock; - /* VLDB server list. */ + /* VL server list. */ rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ u8 name_len; /* Length of name */ @@ -308,65 +307,62 @@ struct afs_cell { }; /* - * entry in the cached volume location catalogue + * Cached VLDB entry. + * + * This is pointed to by cell->vldb_entries, indexed by name. */ -struct afs_cache_vlocation { - /* volume name (lowercase, padded with NULs) */ - uint8_t name[AFS_MAXVOLNAME + 1]; +struct afs_vldb_entry { + afs_volid_t vid[3]; /* Volume IDs for R/W, R/O and Bak volumes */ - uint8_t nservers; /* number of entries used in servers[] */ - uint8_t vidmask; /* voltype mask for vid[] */ - uint8_t srvtmask[8]; /* voltype masks for servers[] */ + unsigned long flags; +#define AFS_VLDB_HAS_RW 0 /* - R/W volume exists */ +#define AFS_VLDB_HAS_RO 1 /* - R/O volume exists */ +#define AFS_VLDB_HAS_BAK 2 /* - Backup volume exists */ +#define AFS_VLDB_QUERY_VALID 3 /* - Record is valid */ +#define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */ + + uuid_t fs_server[AFS_NMAXNSERVERS]; + u8 fs_mask[AFS_NMAXNSERVERS]; #define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ #define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ #define AFS_VOL_VTM_BAK 0x04 /* backup version of the volume is available (on this server) */ - - afs_volid_t vid[3]; /* volume IDs for R/W, R/O and Bak volumes */ - struct sockaddr_rxrpc servers[8]; /* fileserver addresses */ - time_t rtime; /* last retrieval time */ -}; - -/* - * AFS volume location record - */ -struct afs_vlocation { - atomic_t usage; - time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct list_head link; /* link in cell volume location list */ - struct list_head grave; /* link in master graveyard list */ - struct list_head update; /* link in master update list */ - struct afs_cell *cell; /* cell to which volume belongs */ - struct afs_cache_vlocation vldb; /* volume information DB record */ - struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ - wait_queue_head_t waitq; /* status change waitqueue */ - time64_t update_at; /* time at which record should be updated */ - spinlock_t lock; /* access lock */ - afs_vlocation_state_t state; /* volume location state */ - unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ - unsigned short upd_busy_cnt; /* EBUSY count during update */ - bool valid; /* T if valid */ + short error; + u8 nr_servers; /* Number of server records */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; /* - * AFS fileserver record + * Record of fileserver with which we're actively communicating. */ struct afs_server { - atomic_t usage; - time64_t time_of_death; /* time at which put reduced usage to 0 */ - struct afs_addr_list __rcu *addrs; /* List of addresses for this server */ - struct afs_net *net; /* Network namespace in which the server resides */ - struct afs_cell *cell; /* cell in which server resides */ - struct list_head link; /* link in cell's server list */ - struct list_head grave; /* link in master graveyard list */ - - struct rb_node master_rb; /* link in master by-addr tree */ - struct rw_semaphore sem; /* access lock */ + struct rcu_head rcu; + union { + uuid_t uuid; /* Server ID */ + struct afs_uuid _uuid; + }; + + struct afs_addr_list __rcu *addresses; + struct rb_node uuid_rb; /* Link in net->servers */ + struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ + struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node proc_link; /* Link in net->fs_proc */ + struct afs_server *gc_next; /* Next server in manager's list */ + time64_t put_time; /* Time at which last put */ + time64_t update_at; /* Time at which to next update the record */ unsigned long flags; -#define AFS_SERVER_NEW 0 /* New server, don't inc cb_s_break */ +#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */ +#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */ +#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ +#define AFS_SERVER_FL_UPDATING 4 +#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ +#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ + atomic_t usage; + u32 addr_version; /* Address list version */ /* file service access */ - int fs_state; /* 0 or reason FS currently marked dead (-errno) */ - spinlock_t fs_lock; /* access lock */ + rwlock_t fs_lock; /* access lock */ /* callback promise management */ struct list_head cb_interests; /* List of superblocks using this server */ @@ -386,32 +382,50 @@ struct afs_cb_interest { }; /* - * AFS volume access record + * Replaceable server list. + */ +struct afs_server_entry { + struct afs_server *server; + struct afs_cb_interest *cb_interest; +}; + +struct afs_server_list { + refcount_t usage; + unsigned short nr_servers; + unsigned short index; /* Server currently in use */ + unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ + unsigned int seq; /* Set to ->servers_seq when installed */ + struct afs_server_entry servers[]; +}; + +/* + * Live AFS volume management. */ struct afs_volume { + afs_volid_t vid; /* volume ID */ atomic_t usage; - struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ - struct afs_vlocation *vlocation; /* volume location */ + time64_t update_at; /* Time at which to next update */ + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ + struct list_head proc_link; /* Link in cell->vl_proc */ + unsigned long flags; +#define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */ +#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */ +#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */ +#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */ +#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ +#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ #ifdef CONFIG_AFS_FSCACHE struct fscache_cookie *cache; /* caching cookie */ #endif - afs_volid_t vid; /* volume ID */ + struct afs_server_list *servers; /* List of servers on which volume resides */ + rwlock_t servers_lock; /* Lock for ->servers */ + unsigned int servers_seq; /* Incremented each time ->servers changes */ + afs_voltype_t type; /* type of volume */ + short error; char type_force; /* force volume type (suppress R/O -> R/W) */ - unsigned short nservers; /* number of server slots filled */ - unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ - struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ - struct afs_cb_interest *cb_interests[8]; /* Interests on servers for callbacks */ - struct rw_semaphore server_sem; /* lock for accessing current server */ -}; - -/* - * vnode catalogue entry - */ -struct afs_cache_vnode { - afs_vnodeid_t vnode_id; /* vnode ID */ - unsigned vnode_unique; /* vnode ID uniquifier */ - afs_dataversion_t data_version; /* data version */ + u8 name_len; + u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ }; /* @@ -427,10 +441,8 @@ struct afs_vnode { struct fscache_cookie *cache; /* caching cookie */ #endif struct afs_permits *permit_cache; /* cache of permits so far obtained */ + struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct mutex validate_lock; /* lock for validating this vnode */ - wait_queue_head_t update_waitq; /* status fetch waitqueue */ - int update_cnt; /* number of outstanding ops that will update the - * status */ spinlock_t writeback_lock; /* lock for writebacks */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; @@ -501,6 +513,7 @@ struct afs_interface { struct afs_addr_cursor { struct afs_addr_list *alist; /* Current address list (pins ref) */ struct sockaddr_rxrpc *addr; + u32 abort_code; unsigned short start; /* Starting point in alist->addrs[] */ unsigned short index; /* Wrapping offset from start to current addr */ short error; @@ -513,7 +526,21 @@ struct afs_addr_cursor { */ struct afs_fs_cursor { struct afs_addr_cursor ac; - struct afs_server *server; /* Current server (pins ref) */ + struct afs_vnode *vnode; + struct afs_server_list *server_list; /* Current server list (pins ref) */ + struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ + struct key *key; /* Key for the server */ + unsigned int cb_break; /* cb_break + cb_s_break before the call */ + unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ + unsigned char start; /* Initial index in server list */ + unsigned char index; /* Number of servers tried beyond start */ + unsigned short flags; +#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */ +#define AFS_FS_CURSOR_VMOVED 0x0004 /* Set if seen VMOVED */ +#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ +#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ +#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ }; /*****************************************************************************/ @@ -537,6 +564,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); +extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32); + /* * cache.c */ @@ -558,10 +587,9 @@ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]); -extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_cb_interest **, - struct afs_server *); +extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); -extern void afs_clear_callback_interests(struct afs_net *, struct afs_volume *); +extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) { @@ -603,6 +631,7 @@ extern const struct file_operations afs_file_operations; extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); +extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); extern int afs_page_filler(void *, struct page *); extern void afs_put_read(struct afs_read *); @@ -619,51 +648,40 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, struct afs_volsync *, - bool); -extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool); -extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, struct afs_read *, bool); -extern int afs_fs_create(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, umode_t, - struct afs_fid *, struct afs_file_status *, - struct afs_callback *, bool); -extern int afs_fs_remove(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, bool, bool); -extern int afs_fs_link(struct afs_fs_cursor *, struct key *, struct afs_vnode *, - struct afs_vnode *, const char *, bool); -extern int afs_fs_symlink(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, const char *, - struct afs_fid *, struct afs_file_status *, bool); -extern int afs_fs_rename(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, const char *, - struct afs_vnode *, const char *, bool); +extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *); +extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); +extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); +extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool); +extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *); +extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, + struct afs_fid *, struct afs_file_status *); +extern int afs_fs_rename(struct afs_fs_cursor *, const char *, + struct afs_vnode *, const char *); extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *, - pgoff_t, pgoff_t, unsigned, unsigned, bool); -extern int afs_fs_setattr(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, struct iattr *, bool); -extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, - struct afs_volume_status *, bool); -extern int afs_fs_set_lock(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, afs_lock_type_t, bool); -extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, bool); -extern int afs_fs_release_lock(struct afs_fs_cursor *, struct key *, - struct afs_vnode *, bool); -extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct afs_addr_cursor *, - struct key *, bool); + pgoff_t, pgoff_t, unsigned, unsigned); +extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *); +extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); +extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); +extern int afs_fs_extend_lock(struct afs_fs_cursor *); +extern int afs_fs_release_lock(struct afs_fs_cursor *); +extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); +extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); /* * inode.c */ +extern int afs_fetch_status(struct afs_vnode *, struct key *); extern int afs_iget5_test(struct inode *, void *); extern struct inode *afs_iget_autocell(struct inode *, const char *, int, struct key *); extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, - struct afs_callback *); + struct afs_callback *, + struct afs_cb_interest *); extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); @@ -733,6 +751,15 @@ extern void __net_exit afs_proc_cleanup(struct afs_net *); extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); +/* + * rotate.c + */ +extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *, + struct key *); +extern bool afs_select_fileserver(struct afs_fs_cursor *); +extern bool afs_select_current_fileserver(struct afs_fs_cursor *); +extern int afs_end_vnode_operation(struct afs_fs_cursor *); + /* * rxrpc.c */ @@ -779,104 +806,62 @@ static inline struct afs_server *afs_get_server(struct afs_server *server) return server; } -extern void afs_server_timer(struct timer_list *); -extern struct afs_server *afs_lookup_server(struct afs_cell *, - struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); +extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); +extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); extern void afs_put_server(struct afs_net *, struct afs_server *); -extern void afs_reap_server(struct work_struct *); +extern void afs_manage_servers(struct work_struct *); +extern void afs_servers_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); +extern bool afs_probe_fileserver(struct afs_fs_cursor *); +extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); /* - * super.c + * server_list.c */ -extern int __init afs_fs_init(void); -extern void __exit afs_fs_exit(void); +static inline struct afs_server_list *afs_get_serverlist(struct afs_server_list *slist) +{ + refcount_inc(&slist->usage); + return slist; +} -/* - * vlclient.c - */ -extern int afs_vl_get_entry_by_name(struct afs_net *, struct afs_addr_cursor *, - struct key *, const char *, - struct afs_cache_vlocation *, bool); -extern int afs_vl_get_entry_by_id(struct afs_net *, struct afs_addr_cursor *, - struct key *, afs_volid_t, afs_voltype_t, - struct afs_cache_vlocation *, bool); +extern void afs_put_serverlist(struct afs_net *, struct afs_server_list *); +extern struct afs_server_list *afs_alloc_server_list(struct afs_cell *, struct key *, + struct afs_vldb_entry *, + u8); +extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server_list *); /* - * vlocation.c + * super.c */ -extern struct workqueue_struct *afs_vlocation_update_worker; - -#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0) - -extern struct afs_vlocation *afs_vlocation_lookup(struct afs_net *, - struct afs_cell *, - struct key *, - const char *, size_t); -extern void afs_put_vlocation(struct afs_net *, struct afs_vlocation *); -extern void afs_vlocation_updater(struct work_struct *); -extern void afs_vlocation_reaper(struct work_struct *); -extern void __net_exit afs_vlocation_purge(struct afs_net *); +extern int __init afs_fs_init(void); +extern void __exit afs_fs_exit(void); /* - * vnode.c + * vlclient.c */ -static inline struct afs_vnode *AFS_FS_I(struct inode *inode) -{ - return container_of(inode, struct afs_vnode, vfs_inode); -} - -static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) -{ - return &vnode->vfs_inode; -} - -extern void afs_vnode_finalise_status_update(struct afs_vnode *, - struct afs_server *); -extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool); -extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *, - struct afs_read *); -extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *, - umode_t, struct afs_fid *, struct afs_file_status *, - struct afs_callback *, struct afs_server **); -extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *, - bool); -extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *, - const char *); -extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *, - const char *, struct afs_fid *, - struct afs_file_status *, struct afs_server **); -extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *, - struct key *, const char *, const char *); -extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t, - unsigned, unsigned); -extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *); -extern int afs_vnode_get_volume_status(struct afs_vnode *, struct key *, - struct afs_volume_status *); -extern int afs_vnode_set_lock(struct afs_vnode *, struct key *, - afs_lock_type_t); -extern int afs_vnode_extend_lock(struct afs_vnode *, struct key *); -extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, + struct afs_addr_cursor *, + struct key *, const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, + struct key *, const uuid_t *); /* * volume.c */ -static inline struct afs_volume *afs_get_volume(struct afs_volume *volume) +static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) { if (volume) atomic_inc(&volume->usage); return volume; } +extern struct afs_volume *afs_create_volume(struct afs_mount_params *); +extern void afs_activate_volume(struct afs_volume *); +extern void afs_deactivate_volume(struct afs_volume *); extern void afs_put_volume(struct afs_cell *, struct afs_volume *); -extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *); -extern void afs_init_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); -extern int afs_set_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); -extern bool afs_volume_pick_fileserver(struct afs_fs_cursor *, struct afs_vnode *); -extern bool afs_iterate_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *); -extern int afs_end_fs_cursor(struct afs_fs_cursor *, struct afs_net *); +extern int afs_check_volume_status(struct afs_volume *, struct key *); /* * write.c @@ -903,6 +888,38 @@ extern int afs_fsync(struct file *, loff_t, loff_t, int); extern const struct xattr_handler *afs_xattr_handlers[]; extern ssize_t afs_listxattr(struct dentry *, char *, size_t); + +/* + * Miscellaneous inline functions. + */ +static inline struct afs_vnode *AFS_FS_I(struct inode *inode) +{ + return container_of(inode, struct afs_vnode, vfs_inode); +} + +static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) +{ + return &vnode->vfs_inode; +} + +static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc, + struct afs_vnode *vnode, + unsigned int cb_break) +{ + if (fc->ac.error == 0) + afs_cache_permit(vnode, fc->key, cb_break); +} + +static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + if (fc->ac.error == -ENOENT) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + afs_break_callback(vnode); + } +} + + /*****************************************************************************/ /* * debug tracing diff --git a/fs/afs/main.c b/fs/afs/main.c index e7f87d723761c7..15a02a05ff4095 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -55,18 +55,17 @@ static int __net_init afs_net_init(struct afs_net *net) spin_lock_init(&net->proc_cells_lock); INIT_LIST_HEAD(&net->proc_cells); - INIT_LIST_HEAD(&net->vl_updates); - INIT_LIST_HEAD(&net->vl_graveyard); - INIT_DELAYED_WORK(&net->vl_reaper, afs_vlocation_reaper); - INIT_DELAYED_WORK(&net->vl_updater, afs_vlocation_updater); - spin_lock_init(&net->vl_updates_lock); - spin_lock_init(&net->vl_graveyard_lock); - net->servers = RB_ROOT; - rwlock_init(&net->servers_lock); - INIT_LIST_HEAD(&net->server_graveyard); - spin_lock_init(&net->server_graveyard_lock); - INIT_WORK(&net->server_reaper, afs_reap_server); - timer_setup(&net->server_timer, afs_server_timer, 0); + seqlock_init(&net->fs_lock); + net->fs_servers = RB_ROOT; + INIT_LIST_HEAD(&net->fs_updates); + INIT_HLIST_HEAD(&net->fs_proc); + + INIT_HLIST_HEAD(&net->fs_addresses4); + INIT_HLIST_HEAD(&net->fs_addresses6); + seqlock_init(&net->fs_addr_lock); + + INIT_WORK(&net->fs_manager, afs_manage_servers); + timer_setup(&net->fs_timer, afs_servers_timer, 0); /* Register the /proc stuff */ ret = afs_proc_init(net); @@ -87,8 +86,8 @@ static int __net_init afs_net_init(struct afs_net *net) error_open_socket: net->live = false; - afs_vlocation_purge(net); afs_cell_purge(net); + afs_purge_servers(net); error_cell_init: net->live = false; afs_proc_cleanup(net); @@ -103,9 +102,8 @@ static int __net_init afs_net_init(struct afs_net *net) static void __net_exit afs_net_exit(struct afs_net *net) { net->live = false; - afs_purge_servers(net); - afs_vlocation_purge(net); afs_cell_purge(net); + afs_purge_servers(net); afs_close_socket(net); afs_proc_cleanup(net); } @@ -125,10 +123,6 @@ static int __init afs_init(void) afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); if (!afs_async_calls) goto error_async; - afs_vlocation_update_worker = - alloc_workqueue("kafs_vlupdated", WQ_MEM_RECLAIM, 0); - if (!afs_vlocation_update_worker) - goto error_vl_up; afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0); if (!afs_lock_manager) goto error_lockmgr; @@ -160,8 +154,6 @@ static int __init afs_init(void) #endif destroy_workqueue(afs_lock_manager); error_lockmgr: - destroy_workqueue(afs_vlocation_update_worker); -error_vl_up: destroy_workqueue(afs_async_calls); error_async: destroy_workqueue(afs_wq); @@ -189,7 +181,6 @@ static void __exit afs_exit(void) fscache_unregister_netfs(&afs_cache_netfs); #endif destroy_workqueue(afs_lock_manager); - destroy_workqueue(afs_vlocation_update_worker); destroy_workqueue(afs_async_calls); destroy_workqueue(afs_wq); afs_clean_up_permit_cache(); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 9cf9ce88a8dd34..4508dd54f78968 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -105,22 +105,22 @@ static const struct file_operations afs_proc_cell_vlservers_fops = { .release = seq_release, }; -static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); -static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, +static int afs_proc_servers_open(struct inode *inode, struct file *file); +static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); +static void *afs_proc_servers_next(struct seq_file *p, void *v, loff_t *pos); -static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); -static int afs_proc_cell_servers_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_cell_servers_ops = { - .start = afs_proc_cell_servers_start, - .next = afs_proc_cell_servers_next, - .stop = afs_proc_cell_servers_stop, - .show = afs_proc_cell_servers_show, +static void afs_proc_servers_stop(struct seq_file *p, void *v); +static int afs_proc_servers_show(struct seq_file *m, void *v); + +static const struct seq_operations afs_proc_servers_ops = { + .start = afs_proc_servers_start, + .next = afs_proc_servers_next, + .stop = afs_proc_servers_stop, + .show = afs_proc_servers_show, }; -static const struct file_operations afs_proc_cell_servers_fops = { - .open = afs_proc_cell_servers_open, +static const struct file_operations afs_proc_servers_fops = { + .open = afs_proc_servers_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -138,7 +138,8 @@ int afs_proc_init(struct afs_net *net) goto error_dir; if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || - !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops)) + !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || + !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops)) goto error_tree; _leave(" = 0"); @@ -174,7 +175,6 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file) m = file->private_data; m->private = PDE_DATA(inode); - return 0; } @@ -357,12 +357,10 @@ int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) if (!dir) goto error_dir; - if (!proc_create_data("servers", 0, dir, - &afs_proc_cell_servers_fops, cell) || - !proc_create_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_fops, cell) || + if (!proc_create_data("vlservers", 0, dir, + &afs_proc_cell_vlservers_fops, cell) || !proc_create_data("volumes", 0, dir, - &afs_proc_cell_volumes_fops, cell)) + &afs_proc_cell_volumes_fops, cell)) goto error_tree; _leave(" = 0"); @@ -420,9 +418,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) _enter("cell=%p pos=%Ld", cell, *_pos); - /* lock the list against modification */ - down_read(&cell->vl_sem); - return seq_list_start_head(&cell->vl_list, *_pos); + read_lock(&cell->proc_lock); + return seq_list_start_head(&cell->proc_volumes, *_pos); } /* @@ -434,7 +431,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, struct afs_cell *cell = p->private; _enter("cell=%p pos=%Ld", cell, *_pos); - return seq_list_next(v, &cell->vl_list, _pos); + return seq_list_next(v, &cell->proc_volumes, _pos); } /* @@ -444,17 +441,13 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) { struct afs_cell *cell = p->private; - up_read(&cell->vl_sem); + read_unlock(&cell->proc_lock); } -static const char afs_vlocation_states[][4] = { - [AFS_VL_NEW] = "New", - [AFS_VL_CREATING] = "Crt", - [AFS_VL_VALID] = "Val", - [AFS_VL_NO_VOLUME] = "NoV", - [AFS_VL_UPDATING] = "Upd", - [AFS_VL_VOLUME_DELETED] = "Del", - [AFS_VL_UNCERTAIN] = "Unc", +static const char afs_vol_types[3][3] = { + [AFSVL_RWVOL] = "RW", + [AFSVL_ROVOL] = "RO", + [AFSVL_BACKVOL] = "BK", }; /* @@ -463,23 +456,17 @@ static const char afs_vlocation_states[][4] = { static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { struct afs_cell *cell = m->private; - struct afs_vlocation *vlocation = - list_entry(v, struct afs_vlocation, link); + struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); - /* display header on line 1 */ - if (v == &cell->vl_list) { - seq_puts(m, "USE STT VLID[0] VLID[1] VLID[2] NAME\n"); + /* Display header on line 1 */ + if (v == &cell->proc_volumes) { + seq_puts(m, "USE VID TY\n"); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%3d %s %08x %08x %08x %s\n", - atomic_read(&vlocation->usage), - afs_vlocation_states[vlocation->state], - vlocation->vldb.vid[0], - vlocation->vldb.vid[1], - vlocation->vldb.vid[2], - vlocation->vldb.name); + seq_printf(m, "%3d %08x %s\n", + atomic_read(&vol->usage), vol->vid, + afs_vol_types[vol->type]); return 0; } @@ -580,86 +567,62 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) } /* - * open "/proc/fs/afs//servers" which provides a summary of active + * open "/proc/fs/afs/servers" which provides a summary of active * servers */ -static int afs_proc_cell_servers_open(struct inode *inode, struct file *file) +static int afs_proc_servers_open(struct inode *inode, struct file *file) { - struct afs_cell *cell; - struct seq_file *m; - int ret; - - cell = PDE_DATA(inode); - if (!cell) - return -ENOENT; - - ret = seq_open(file, &afs_proc_cell_servers_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = cell; - return 0; + return seq_open(file, &afs_proc_servers_ops); } /* - * set up the iterator to start reading from the cells list and return the - * first item + * Set up the iterator to start reading from the server list and return the + * first item. */ -static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) - __acquires(m->private->servers_lock) +static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) { - struct afs_cell *cell = m->private; - - _enter("cell=%p pos=%Ld", cell, *_pos); + struct afs_net *net = afs_seq2net(m); - /* lock the list against modification */ - read_lock(&cell->servers_lock); - return seq_list_start_head(&cell->servers, *_pos); + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->fs_proc, *_pos); } /* * move to next cell in cells list */ -static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, - loff_t *_pos) +static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = p->private; + struct afs_net *net = afs_seq2net(m); - _enter("cell=%p pos=%Ld", cell, *_pos); - return seq_list_next(v, &cell->servers, _pos); + return seq_hlist_next_rcu(v, &net->fs_proc, _pos); } /* * clean up after reading from the cells list */ -static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) - __releases(p->private->servers_lock) +static void afs_proc_servers_stop(struct seq_file *p, void *v) { - struct afs_cell *cell = p->private; - - read_unlock(&cell->servers_lock); + rcu_read_unlock(); } /* * display a header line followed by a load of volume lines */ -static int afs_proc_cell_servers_show(struct seq_file *m, void *v) +static int afs_proc_servers_show(struct seq_file *m, void *v) { - struct afs_cell *cell = m->private; - struct afs_server *server = list_entry(v, struct afs_server, link); - char ipaddr[64]; + struct afs_server *server; + struct afs_addr_list *alist; - /* display header on line 1 */ - if (v == &cell->servers) { - seq_puts(m, "USE ADDR STATE\n"); + if (v == SEQ_START_TOKEN) { + seq_puts(m, "UUID USE ADDR\n"); return 0; } - /* display one cell per line on subsequent lines */ - sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport); - seq_printf(m, "%3d %-15s %5d\n", - atomic_read(&server->usage), ipaddr, server->fs_state); - + server = list_entry(v, struct afs_server, proc_link); + alist = rcu_dereference(server->addresses); + seq_printf(m, "%pU %3d %pISp\n", + &server->uuid, + atomic_read(&server->usage), + &alist->addrs[alist->index].transport); return 0; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index c7975b3ba59afd..e728ca1776c9b3 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -11,7 +11,12 @@ #include #include +#include +#include +#include +#include #include "internal.h" +#include "afs_fs.h" /* * Initialise a filesystem server cursor for iterating over FS servers. @@ -21,6 +26,460 @@ void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) memset(fc, 0, sizeof(*fc)); } +/* + * Begin an operation on the fileserver. + * + * Fileserver operations are serialised on the server by vnode, so we serialise + * them here also using the io_lock. + */ +bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + struct key *key) +{ + afs_init_fs_cursor(fc, vnode); + fc->vnode = vnode; + fc->key = key; + fc->ac.error = SHRT_MAX; + + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + fc->ac.error = -EINTR; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) || + test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags)) + fc->flags |= AFS_FS_CURSOR_CUR_ONLY; + return true; +} + +/* + * Begin iteration through a server list, starting with the vnode's last used + * server if possible, or the last recorded good server if not. + */ +static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, + struct afs_vnode *vnode) +{ + struct afs_cb_interest *cbi; + int i; + + read_lock(&vnode->volume->servers_lock); + fc->server_list = afs_get_serverlist(vnode->volume->servers); + read_unlock(&vnode->volume->servers_lock); + + cbi = vnode->cb_interest; + if (cbi) { + /* See if the vnode's preferred record is still available */ + for (i = 0; i < fc->server_list->nr_servers; i++) { + if (fc->server_list->servers[i].cb_interest == cbi) { + fc->start = i; + goto found_interest; + } + } + + /* If we have a lock outstanding on a server that's no longer + * serving this vnode, then we can't switch to another server + * and have to return an error. + */ + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + fc->ac.error = -ESTALE; + return false; + } + + /* Note that the callback promise is effectively broken */ + write_seqlock(&vnode->cb_lock); + ASSERTCMP(cbi, ==, vnode->cb_interest); + vnode->cb_interest = NULL; + if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + vnode->cb_break++; + write_sequnlock(&vnode->cb_lock); + + afs_put_cb_interest(afs_v2net(vnode), cbi); + cbi = NULL; + } else { + fc->start = READ_ONCE(fc->server_list->index); + } + +found_interest: + fc->index = fc->start; + return true; +} + +/* + * Post volume busy note. + */ +static void afs_busy(struct afs_volume *volume, u32 abort_code) +{ + const char *m; + + switch (abort_code) { + case VOFFLINE: m = "offline"; break; + case VRESTARTING: m = "restarting"; break; + case VSALVAGING: m = "being salvaged"; break; + default: m = "busy"; break; + } + + pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); +} + +/* + * Sleep and retry the operation to the same fileserver. + */ +static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) +{ + msleep_interruptible(1000); + if (signal_pending(current)) { + fc->ac.error = -ERESTARTSYS; + return false; + } + + return true; +} + +/* + * Select the fileserver to use. May be called multiple times to rotate + * through the fileservers. + */ +bool afs_select_fileserver(struct afs_fs_cursor *fc) +{ + struct afs_addr_list *alist; + struct afs_server *server; + struct afs_vnode *vnode = fc->vnode; + + _enter("%u/%u,%u/%u,%d,%d", + fc->index, fc->start, + fc->ac.index, fc->ac.start, + fc->ac.error, fc->ac.abort_code); + + if (fc->flags & AFS_FS_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + /* Evaluate the result of the previous operation, if there was one. */ + switch (fc->ac.error) { + case SHRT_MAX: + goto start; + + case 0: + default: + /* Success or local failure. Stop. */ + fc->flags |= AFS_FS_CURSOR_STOP; + _leave(" = f [okay/local %d]", fc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (fc->ac.abort_code) { + case VNOVOL: + /* This fileserver doesn't know about the volume. + * - May indicate that the VL is wrong - retry once and compare + * the results. + * - May indicate that the fileserver couldn't attach to the vol. + */ + if (fc->flags & AFS_FS_CURSOR_VNOVOL) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + + write_lock(&vnode->volume->servers_lock); + fc->server_list->vnovol_mask |= 1 << fc->index; + write_unlock(&vnode->volume->servers_lock); + + set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { + fc->ac.error = -ENOMEDIUM; + goto failed; + } + + /* If the server list didn't change, then assume that + * it's the fileserver having trouble. + */ + if (vnode->volume->servers == fc->server_list) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + + /* Try again */ + fc->flags |= AFS_FS_CURSOR_VNOVOL; + _leave(" = t [vnovol]"); + return true; + + case VSALVAGE: /* TODO: Should this return an error or iterate? */ + case VVOLEXISTS: + case VNOSERVICE: + case VONLINE: + case VDISKFULL: + case VOVERQUOTA: + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto next_server; + + case VOFFLINE: + if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { + afs_busy(vnode->volume, fc->ac.abort_code); + clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + } + if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { + fc->ac.error = -EADV; + goto failed; + } + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + fc->ac.error = -ESTALE; + goto failed; + } + goto busy; + + case VSALVAGING: + case VRESTARTING: + case VBUSY: + /* Retry after going round all the servers unless we + * have a file lock we need to maintain. + */ + if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { + fc->ac.error = -EBUSY; + goto failed; + } + if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { + afs_busy(vnode->volume, fc->ac.abort_code); + clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + } + busy: + if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { + if (!afs_sleep_and_retry(fc)) + goto failed; + + /* Retry with same server & address */ + _leave(" = t [vbusy]"); + return true; + } + + fc->flags |= AFS_FS_CURSOR_VBUSY; + goto next_server; + + case VMOVED: + /* The volume migrated to another server. We consider + * consider all locks and callbacks broken and request + * an update from the VLDB. + * + * We also limit the number of VMOVED hops we will + * honour, just in case someone sets up a loop. + */ + if (fc->flags & AFS_FS_CURSOR_VMOVED) { + fc->ac.error = -EREMOTEIO; + goto failed; + } + fc->flags |= AFS_FS_CURSOR_VMOVED; + + set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); + set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + /* If the server list didn't change, then the VLDB is + * out of sync with the fileservers. This is hopefully + * a temporary condition, however, so we don't want to + * permanently block access to the file. + * + * TODO: Try other fileservers if we can. + * + * TODO: Retry a few times with sleeps. + */ + if (vnode->volume->servers == fc->server_list) { + fc->ac.error = -ENOMEDIUM; + goto failed; + } + + goto restart_from_beginning; + + default: + clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto failed; + } + + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn"); + goto iterate_address; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&fc->ac); + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + afs_put_serverlist(afs_v2net(vnode), fc->server_list); + fc->server_list = NULL; +start: + _debug("start"); + /* See if we need to do an update of the volume record. Note that the + * volume may have moved or even have been deleted. + */ + fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); + if (fc->ac.error < 0) + goto failed; + + if (!afs_start_fs_iteration(fc, vnode)) + goto failed; + goto use_server; + +next_server: + _debug("next"); + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + fc->index++; + if (fc->index >= fc->server_list->nr_servers) + fc->index = 0; + if (fc->index != fc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (fc->flags & AFS_FS_CURSOR_VBUSY) + goto restart_from_beginning; + + fc->ac.error = -EDESTADDRREQ; + goto failed; + +use_server: + _debug("use"); + /* We're starting on a different fileserver from the list. We need to + * check it, create a callback intercept, find its address list and + * probe its capabilities before we use it. + */ + ASSERTCMP(fc->ac.alist, ==, NULL); + server = fc->server_list->servers[fc->index].server; + + if (!afs_check_server_record(fc, server)) + goto failed; + + _debug("USING SERVER: %pU", &server->uuid); + + /* Make sure we've got a callback interest record for this server. We + * have to link it in before we send the request as we can be sent a + * break request before we've finished decoding the reply and + * installing the vnode. + */ + fc->ac.error = afs_register_server_cb_interest( + vnode, &fc->server_list->servers[fc->index]); + if (fc->ac.error < 0) + goto failed; + + fc->cbi = afs_get_cb_interest(vnode->cb_interest); + + read_lock(&server->fs_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(alist); + read_unlock(&server->fs_lock); + + + /* Probe the current fileserver if we haven't done so yet. */ + if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { + fc->ac.alist = afs_get_addrlist(alist); + + if (!afs_probe_fileserver(fc)) + goto failed; + } + + if (!fc->ac.alist) + fc->ac.alist = alist; + else + afs_put_addrlist(alist); + + fc->ac.addr = NULL; + fc->ac.start = READ_ONCE(alist->index); + fc->ac.index = fc->ac.start; + fc->ac.error = 0; + fc->ac.begun = false; + goto iterate_address; + +iterate_address: + ASSERT(fc->ac.alist); + _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (afs_iterate_addresses(&fc->ac)) { + _leave(" = t"); + return true; + } + + afs_end_cursor(&fc->ac); + goto next_server; + +failed: + fc->flags |= AFS_FS_CURSOR_STOP; + _leave(" = f [failed %d]", fc->ac.error); + return false; +} + +/* + * Select the same fileserver we used for a vnode before and only that + * fileserver. We use this when we have a lock on that file, which is backed + * only by the fileserver we obtained it from. + */ +bool afs_select_current_fileserver(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_cb_interest *cbi = vnode->cb_interest; + struct afs_addr_list *alist; + + _enter(""); + + if (!cbi) { + fc->ac.error = -ESTALE; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + read_lock(&cbi->server->fs_lock); + alist = afs_get_addrlist(cbi->server->addresses); + read_unlock(&cbi->server->fs_lock); + if (!alist) { + fc->ac.error = -ESTALE; + fc->flags |= AFS_FS_CURSOR_STOP; + return false; + } + + fc->ac.alist = alist; + fc->ac.error = 0; + return true; +} + +/* + * Tidy up a filesystem cursor and unlock the vnode. + */ +int afs_end_vnode_operation(struct afs_fs_cursor *fc) +{ + struct afs_net *net = afs_v2net(fc->vnode); + int ret; + + mutex_unlock(&fc->vnode->io_lock); + + afs_end_cursor(&fc->ac); + afs_put_cb_interest(net, fc->cbi); + afs_put_serverlist(net, fc->server_list); + + ret = fc->ac.error; + if (ret == -ECONNABORTED) + afs_abort_to_error(fc->ac.abort_code); + + return fc->ac.error; +} + +#if 0 /* * Set a filesystem server cursor for using a specific FS server. */ @@ -252,3 +711,5 @@ int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) afs_put_server(net, fc->server); return fc->ac.error; } + +#endif diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5ddfb7c4cf7815..1bbd5854507d03 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -20,7 +20,7 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); -static long afs_wait_for_call_to_complete(struct afs_call *); +static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); @@ -162,6 +162,7 @@ void afs_put_call(struct afs_call *call) call->type->destructor(call); afs_put_server(call->net, call->cm_server); + afs_put_cb_interest(call->net, call->cbi); kfree(call->request); kfree(call); @@ -330,7 +331,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, struct kvec iov[1]; size_t offset; s64 tx_total_len; - u32 abort_code; int ret; _enter(",{%pISp},", &srx->transport); @@ -362,7 +362,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade); - call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); goto error_kill_call; @@ -406,7 +405,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, if (call->async) return -EINPROGRESS; - return afs_wait_for_call_to_complete(call); + return afs_wait_for_call_to_complete(call, ac); error_do_abort: call->state = AFS_CALL_COMPLETE; @@ -414,15 +413,16 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); } else { - abort_code = 0; offset = 0; rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL, 0, &offset, false, &call->abort_code, &call->service_id); - ret = afs_abort_to_error(call->abort_code); + ac->abort_code = call->abort_code; + ac->responded = true; } error_kill_call: afs_put_call(call); + ac->error = ret; _leave(" = %d", ret); return ret; } @@ -510,7 +510,8 @@ static void afs_deliver_to_call(struct afs_call *call) /* * wait synchronously for a call to complete */ -static long afs_wait_for_call_to_complete(struct afs_call *call) +static long afs_wait_for_call_to_complete(struct afs_call *call, + struct afs_addr_cursor *ac) { signed long rtt2, timeout; long ret; @@ -563,16 +564,25 @@ static long afs_wait_for_call_to_complete(struct afs_call *call) /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { _debug("call interrupted"); - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI"); + if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + RX_USER_ABORT, -EINTR, "KWI")) + call->error = -ERESTARTSYS; } - ret = call->error; - if (ret < 0) { - ret = afs_abort_to_error(call->abort_code); - } else if (ret == 0 && call->ret_reply0) { - ret = (long)call->reply[0]; - call->reply[0] = NULL; + ac->abort_code = call->abort_code; + ac->error = call->error; + + ret = ac->error; + switch (ret) { + case 0: + if (call->ret_reply0) { + ret = (long)call->reply[0]; + call->reply[0] = NULL; + } + /* Fall through */ + case -ECONNABORTED: + ac->responded = true; + break; } _debug("call complete"); @@ -882,10 +892,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, return 0; } - if (ret == -ECONNABORTED) - call->error = afs_abort_to_error(call->abort_code); - else - call->error = ret; + call->error = ret; call->state = AFS_CALL_COMPLETE; return ret; } diff --git a/fs/afs/security.c b/fs/afs/security.c index 1b5198fc165794..46a881a4d08f6a 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -325,7 +325,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, */ _debug("no valid permit"); - ret = afs_vnode_fetch_status(vnode, key, true); + ret = afs_fetch_status(vnode, key); if (ret < 0) { *_access = 0; _leave(" = %d", ret); diff --git a/fs/afs/server.c b/fs/afs/server.c index 9ca174b24f5b7d..a6c860bcf391b3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -14,7 +14,8 @@ #include "afs_fs.h" #include "internal.h" -static unsigned afs_server_timeout = 10; /* server timeout in seconds */ +static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ +static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ static void afs_inc_servers_outstanding(struct afs_net *net) { @@ -27,60 +28,201 @@ static void afs_dec_servers_outstanding(struct afs_net *net) wake_up_atomic_t(&net->servers_outstanding); } -void afs_server_timer(struct timer_list *timer) +/* + * Find a server by one of its addresses. + */ +struct afs_server *afs_find_server(struct afs_net *net, + const struct sockaddr_rxrpc *srx) { - struct afs_net *net = container_of(timer, struct afs_net, server_timer); + const struct sockaddr_in6 *a = &srx->transport.sin6, *b; + const struct afs_addr_list *alist; + struct afs_server *server = NULL; + unsigned int i; + bool ipv6 = true; + int seq = 0, diff; + + if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || + srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || + srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) + ipv6 = false; + + rcu_read_lock(); + + do { + if (server) + afs_put_server(net, server); + server = NULL; + read_seqbegin_or_lock(&net->fs_addr_lock, &seq); + + if (ipv6) { + hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + alist = rcu_dereference(server->addresses); + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { + b = &alist->addrs[i].transport.sin6; + diff = (u16)a->sin6_port - (u16)b->sin6_port; + if (diff == 0) + diff = memcmp(&a->sin6_addr, + &b->sin6_addr, + sizeof(struct in6_addr)); + if (diff == 0) + goto found; + if (diff < 0) { + // TODO: Sort the list + //if (i == alist->nr_ipv4) + // goto not_found; + break; + } + } + } + } else { + hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { + alist = rcu_dereference(server->addresses); + for (i = 0; i < alist->nr_ipv4; i++) { + b = &alist->addrs[i].transport.sin6; + diff = (u16)a->sin6_port - (u16)b->sin6_port; + if (diff == 0) + diff = ((u32)a->sin6_addr.s6_addr32[3] - + (u32)b->sin6_addr.s6_addr32[3]); + if (diff == 0) + goto found; + if (diff < 0) { + // TODO: Sort the list + //if (i == 0) + // goto not_found; + break; + } + } + } + } - if (!queue_work(afs_wq, &net->server_reaper)) - afs_dec_servers_outstanding(net); + //not_found: + server = NULL; + found: + if (server && !atomic_inc_not_zero(&server->usage)) + server = NULL; + + } while (need_seqretry(&net->fs_addr_lock, seq)); + + done_seqretry(&net->fs_addr_lock, seq); + + rcu_read_unlock(); + return server; } /* - * install a server record in the master tree + * Look up a server by its UUID */ -static int afs_install_server(struct afs_server *server) +struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) { - struct afs_server *xserver; - struct afs_net *net = server->cell->net; + struct afs_server *server = NULL; + struct rb_node *p; + int diff, seq = 0; + + _enter("%pU", uuid); + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + if (server) + afs_put_server(net, server); + server = NULL; + + read_seqbegin_or_lock(&net->fs_lock, &seq); + + p = net->fs_servers.rb_node; + while (p) { + server = rb_entry(p, struct afs_server, uuid_rb); + + diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); + if (diff < 0) { + p = p->rb_left; + } else if (diff > 0) { + p = p->rb_right; + } else { + afs_get_server(server); + break; + } + + server = NULL; + } + } while (need_seqretry(&net->fs_lock, seq)); + + done_seqretry(&net->fs_lock, seq); + + _leave(" = %p", server); + return server; +} + +/* + * Install a server record in the namespace tree + */ +static struct afs_server *afs_install_server(struct afs_net *net, + struct afs_server *candidate) +{ + const struct afs_addr_list *alist; + struct afs_server *server; struct rb_node **pp, *p; - int ret, diff; + int ret = -EEXIST, diff; - _enter("%p", server); + _enter("%p", candidate); - write_lock(&net->servers_lock); + write_seqlock(&net->fs_lock); - ret = -EEXIST; - pp = &net->servers.rb_node; + /* Firstly install the server in the UUID lookup tree */ + pp = &net->fs_servers.rb_node; p = NULL; while (*pp) { p = *pp; _debug("- consider %p", p); - xserver = rb_entry(p, struct afs_server, master_rb); - diff = memcmp(&server->addrs->addrs[0], - &xserver->addrs->addrs[0], - sizeof(sizeof(server->addrs->addrs[0]))); + server = rb_entry(p, struct afs_server, uuid_rb); + diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); if (diff < 0) pp = &(*pp)->rb_left; else if (diff > 0) pp = &(*pp)->rb_right; else - goto error; + goto exists; } - rb_link_node(&server->master_rb, p, pp); - rb_insert_color(&server->master_rb, &net->servers); + server = candidate; + rb_link_node(&server->uuid_rb, p, pp); + rb_insert_color(&server->uuid_rb, &net->fs_servers); + hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + + write_seqlock(&net->fs_addr_lock); + alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&net->fs_addr_lock.lock)); + + /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install + * it in the IPv4 and/or IPv6 reverse-map lists. + * + * TODO: For speed we want to use something other than a flat list + * here; even sorting the list in terms of lowest address would help a + * bit, but anything we might want to do gets messy and memory + * intensive. + */ + if (alist->nr_ipv4 > 0) + hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); + if (alist->nr_addrs > alist->nr_ipv4) + hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); + + write_sequnlock(&net->fs_addr_lock); ret = 0; -error: - write_unlock(&net->servers_lock); - return ret; +exists: + afs_get_server(server); + write_sequnlock(&net->fs_lock); + return server; } /* * allocate a new server record */ -static struct afs_server *afs_alloc_server(struct afs_cell *cell, - const struct sockaddr_rxrpc *addr) +static struct afs_server *afs_alloc_server(struct afs_net *net, + const uuid_t *uuid, + struct afs_addr_list *alist) { struct afs_server *server; @@ -89,194 +231,155 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); if (!server) goto enomem; - server->addrs = kzalloc(sizeof(struct afs_addr_list) + - sizeof(struct sockaddr_rxrpc), - GFP_KERNEL); - if (!server->addrs) - goto enomem_server; atomic_set(&server->usage, 1); - server->net = cell->net; - server->cell = cell; - - INIT_LIST_HEAD(&server->link); - INIT_LIST_HEAD(&server->grave); - init_rwsem(&server->sem); - spin_lock_init(&server->fs_lock); + RCU_INIT_POINTER(server->addresses, alist); + server->addr_version = alist->version; + server->uuid = *uuid; + server->flags = (1UL << AFS_SERVER_FL_NEW); + server->update_at = ktime_get_real_seconds() + afs_server_update_delay; + rwlock_init(&server->fs_lock); INIT_LIST_HEAD(&server->cb_interests); rwlock_init(&server->cb_break_lock); - refcount_set(&server->addrs->usage, 1); - server->addrs->nr_addrs = 1; - server->addrs->addrs[0] = *addr; - afs_inc_servers_outstanding(cell->net); - - _leave(" = %p{%d}", server, atomic_read(&server->usage)); + afs_inc_servers_outstanding(net); + _leave(" = %p", server); return server; -enomem_server: - kfree(server); enomem: _leave(" = NULL [nomem]"); return NULL; } /* - * get an FS-server record for a cell + * Look up an address record for a server */ -struct afs_server *afs_lookup_server(struct afs_cell *cell, - struct sockaddr_rxrpc *addr) +static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, + struct key *key, const uuid_t *uuid) { - struct afs_server *server, *candidate; - - _enter("%p,%pIS", cell, &addr->transport); - - /* quick scan of the list to see if we already have the server */ - read_lock(&cell->servers_lock); - - list_for_each_entry(server, &cell->servers, link) { - if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0) - goto found_server_quickly; - } - read_unlock(&cell->servers_lock); - - candidate = afs_alloc_server(cell, addr); - if (!candidate) { - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); - } - - write_lock(&cell->servers_lock); - - /* check the cell's server list again */ - list_for_each_entry(server, &cell->servers, link) { - if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0) - goto found_server; - } - - _debug("new"); - server = candidate; - if (afs_install_server(server) < 0) - goto server_in_two_cells; - - afs_get_cell(cell); - list_add_tail(&server->link, &cell->servers); - - write_unlock(&cell->servers_lock); - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - return server; - - /* found a matching server quickly */ -found_server_quickly: - _debug("found quickly"); - afs_get_server(server); - read_unlock(&cell->servers_lock); -no_longer_unused: - if (!list_empty(&server->grave)) { - spin_lock(&cell->net->server_graveyard_lock); - list_del_init(&server->grave); - spin_unlock(&cell->net->server_graveyard_lock); + struct afs_addr_cursor ac; + struct afs_addr_list *alist; + int ret; + + ret = afs_set_vl_cursor(&ac, cell); + if (ret < 0) + return ERR_PTR(ret); + + while (afs_iterate_addresses(&ac)) { + alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); + switch (ac.error) { + case 0: + afs_end_cursor(&ac); + return alist; + case -ECONNABORTED: + ac.error = afs_abort_to_error(ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + break; + default: + ac.error = -EIO; + goto error; + } } - _leave(" = %p{%d}", server, atomic_read(&server->usage)); - return server; - /* found a matching server on the second pass */ -found_server: - _debug("found"); - afs_get_server(server); - write_unlock(&cell->servers_lock); - kfree(candidate); - goto no_longer_unused; - - /* found a server that seems to be in two cells */ -server_in_two_cells: - write_unlock(&cell->servers_lock); - kfree(candidate); - afs_dec_servers_outstanding(cell->net); - printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n", - addr); - _leave(" = -EEXIST"); - return ERR_PTR(-EEXIST); +error: + return ERR_PTR(afs_end_cursor(&ac)); } /* - * look up a server by its IP address + * Get or create a fileserver record. */ -struct afs_server *afs_find_server(struct afs_net *net, - const struct sockaddr_rxrpc *srx) +struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, + const uuid_t *uuid) { - struct afs_server *server = NULL; - struct rb_node *p; - int diff; + struct afs_addr_list *alist; + struct afs_server *server, *candidate; - _enter("{%d,%pIS}", srx->transport.family, &srx->transport); + _enter("%p,%pU", cell->net, uuid); - read_lock(&net->servers_lock); + server = afs_find_server_by_uuid(cell->net, uuid); + if (server) + return server; - p = net->servers.rb_node; - while (p) { - server = rb_entry(p, struct afs_server, master_rb); + alist = afs_vl_lookup_addrs(cell, key, uuid); + if (IS_ERR(alist)) + return ERR_CAST(alist); - _debug("- consider %p", p); + candidate = afs_alloc_server(cell->net, uuid, alist); + if (!candidate) { + afs_put_addrlist(alist); + return ERR_PTR(-ENOMEM); + } - diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx)); - if (diff < 0) { - p = p->rb_left; - } else if (diff > 0) { - p = p->rb_right; - } else { - afs_get_server(server); - goto found; - } + server = afs_install_server(cell->net, candidate); + if (server != candidate) { + afs_put_addrlist(alist); + kfree(candidate); } - server = NULL; -found: - read_unlock(&net->servers_lock); - _leave(" = %p", server); + _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; } +/* + * Set the server timer to fire after a given delay, assuming it's not already + * set for an earlier time. + */ static void afs_set_server_timer(struct afs_net *net, time64_t delay) { - afs_inc_servers_outstanding(net); if (net->live) { - if (timer_reduce(&net->server_timer, jiffies + delay * HZ)) - afs_dec_servers_outstanding(net); - } else { - if (!queue_work(afs_wq, &net->server_reaper)) + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) afs_dec_servers_outstanding(net); } } /* - * destroy a server record - * - removes from the cell list + * Server management timer. We have an increment on fs_outstanding that we + * need to pass along to the work item. + */ +void afs_servers_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, fs_timer); + + _enter(""); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); +} + +/* + * Release a reference on a server record. */ void afs_put_server(struct afs_net *net, struct afs_server *server) { + unsigned int usage; + if (!server) return; - _enter("%p{%d}", server, atomic_read(&server->usage)); + server->put_time = ktime_get_real_seconds(); - _debug("PUT SERVER %d", atomic_read(&server->usage)); + usage = atomic_dec_return(&server->usage); - ASSERTCMP(atomic_read(&server->usage), >, 0); + _enter("{%u}", usage); - if (likely(!atomic_dec_and_test(&server->usage))) { - _leave(""); + if (likely(usage > 0)) return; - } - spin_lock(&net->server_graveyard_lock); - if (atomic_read(&server->usage) == 0) { - list_move_tail(&server->grave, &net->server_graveyard); - server->time_of_death = ktime_get_real_seconds(); - afs_set_server_timer(net, afs_server_timeout); - } - spin_unlock(&net->server_graveyard_lock); - _leave(" [dead]"); + afs_set_server_timer(net, afs_server_gc_delay); +} + +static void afs_server_rcu(struct rcu_head *rcu) +{ + struct afs_server *server = container_of(rcu, struct afs_server, rcu); + + afs_put_addrlist(server->addresses); + kfree(server); } /* @@ -284,7 +387,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server) */ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) { - struct afs_addr_list *alist = server->addrs; + struct afs_addr_list *alist = server->addresses; struct afs_addr_cursor ac = { .alist = alist, .addr = &alist->addrs[0], @@ -294,79 +397,300 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) }; _enter("%p", server); - afs_fs_give_up_all_callbacks(server, &ac, NULL, false); - afs_put_cell(net, server->cell); - afs_put_addrlist(server->addrs); - kfree(server); + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } /* - * reap dead server records + * Garbage collect any expired servers. */ -void afs_reap_server(struct work_struct *work) +static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) { - LIST_HEAD(corpses); struct afs_server *server; - struct afs_net *net = container_of(work, struct afs_net, server_reaper); - unsigned long delay, expiry; - time64_t now; - - now = ktime_get_real_seconds(); - spin_lock(&net->server_graveyard_lock); - - while (!list_empty(&net->server_graveyard)) { - server = list_entry(net->server_graveyard.next, - struct afs_server, grave); - - /* the queue is ordered most dead first */ - if (net->live) { - expiry = server->time_of_death + afs_server_timeout; - if (expiry > now) { - delay = (expiry - now); - afs_set_server_timer(net, delay); - break; - } + bool deleted; + int usage; + + while ((server = gc_list)) { + gc_list = server->gc_next; + + write_seqlock(&net->fs_lock); + usage = 1; + deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); + if (deleted) { + rb_erase(&server->uuid_rb, &net->fs_servers); + hlist_del_rcu(&server->proc_link); } + write_sequnlock(&net->fs_lock); - write_lock(&server->cell->servers_lock); - write_lock(&net->servers_lock); - if (atomic_read(&server->usage) > 0) { - list_del_init(&server->grave); - } else { - list_move_tail(&server->grave, &corpses); - list_del_init(&server->link); - rb_erase(&server->master_rb, &net->servers); + if (deleted) + afs_destroy_server(net, server); + } +} + +/* + * Manage the records of servers known to be within a network namespace. This + * includes garbage collecting unused servers. + * + * Note also that we were given an increment on net->servers_outstanding by + * whoever queued us that we need to deal with before returning. + */ +void afs_manage_servers(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, fs_manager); + struct afs_server *gc_list = NULL; + struct rb_node *cursor; + time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; + bool purging = !net->live; + + _enter(""); + + /* Trawl the server list looking for servers that have expired from + * lack of use. + */ + read_seqlock_excl(&net->fs_lock); + + for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { + struct afs_server *server = + rb_entry(cursor, struct afs_server, uuid_rb); + int usage = atomic_read(&server->usage); + + _debug("manage %pU %u", &server->uuid, usage); + + ASSERTCMP(usage, >=, 1); + ASSERTIFCMP(purging, usage, ==, 1); + + if (usage == 1) { + time64_t expire_at = server->put_time; + + if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && + !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) + expire_at += afs_server_gc_delay; + if (purging || expire_at <= now) { + server->gc_next = gc_list; + gc_list = server; + } else if (expire_at < next_manage) { + next_manage = expire_at; + } } - write_unlock(&net->servers_lock); - write_unlock(&server->cell->servers_lock); } - spin_unlock(&net->server_graveyard_lock); + read_sequnlock_excl(&net->fs_lock); - /* now reap the corpses we've extracted */ - while (!list_empty(&corpses)) { - server = list_entry(corpses.next, struct afs_server, grave); - list_del(&server->grave); - afs_destroy_server(net, server); + /* Update the timer on the way out. We have to pass an increment on + * servers_outstanding in the namespace that we are in to the timer or + * the work scheduler. + */ + if (!purging && next_manage < TIME64_MAX) { + now = ktime_get_real_seconds(); + + if (next_manage - now <= 0) { + if (queue_work(afs_wq, &net->fs_manager)) + afs_inc_servers_outstanding(net); + } else { + afs_set_server_timer(net, next_manage - now); + } } + afs_gc_servers(net, gc_list); + afs_dec_servers_outstanding(net); + _leave(" [%d]", atomic_read(&net->servers_outstanding)); +} + +static void afs_queue_server_manager(struct afs_net *net) +{ + afs_inc_servers_outstanding(net); + if (!queue_work(afs_wq, &net->fs_manager)) + afs_dec_servers_outstanding(net); } /* - * Discard all the server records from a net namespace when it is destroyed or - * the afs module is removed. + * Purge list of servers. */ -void __net_exit afs_purge_servers(struct afs_net *net) +void afs_purge_servers(struct afs_net *net) { - if (del_timer_sync(&net->server_timer)) + _enter(""); + + if (del_timer_sync(&net->fs_timer)) atomic_dec(&net->servers_outstanding); - afs_inc_servers_outstanding(net); - if (!queue_work(afs_wq, &net->server_reaper)) - afs_dec_servers_outstanding(net); + afs_queue_server_manager(net); + _debug("wait"); wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait, TASK_UNINTERRUPTIBLE); + _leave(""); +} + +/* + * Probe a fileserver to find its capabilities. + * + * TODO: Try service upgrade. + */ +static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) +{ + _enter(""); + + fc->ac.addr = NULL; + fc->ac.start = READ_ONCE(fc->ac.alist->index); + fc->ac.index = fc->ac.start; + fc->ac.error = 0; + fc->ac.begun = false; + + while (afs_iterate_addresses(&fc->ac)) { + afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, + &fc->ac, fc->key); + switch (fc->ac.error) { + case 0: + afs_end_cursor(&fc->ac); + set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); + return true; + case -ECONNABORTED: + fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + break; + default: + fc->ac.error = -EIO; + goto error; + } + } + +error: + afs_end_cursor(&fc->ac); + return false; +} + +/* + * If we haven't already, try probing the fileserver to get its capabilities. + * We try not to instigate parallel probes, but it's possible that the parallel + * probes will fail due to authentication failure when ours would succeed. + * + * TODO: Try sending an anonymous probe if an authenticated probe fails. + */ +bool afs_probe_fileserver(struct afs_fs_cursor *fc) +{ + bool success; + int ret, retries = 0; + + _enter(""); + +retry: + if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { + _leave(" = t"); + return true; + } + + if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { + success = afs_do_probe_fileserver(fc); + clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); + wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); + _leave(" = t"); + return success; + } + + _debug("wait"); + ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, + TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + fc->ac.error = ret; + _leave(" = f [%d]", ret); + return false; + } + + retries++; + if (retries == 4) { + fc->ac.error = -ESTALE; + _leave(" = f [stale]"); + return false; + } + _debug("retry"); + goto retry; +} + +/* + * Get an update for a server's address list. + */ +static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +{ + struct afs_addr_list *alist, *discard; + + _enter(""); + + alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, + &server->uuid); + if (IS_ERR(alist)) { + fc->ac.error = PTR_ERR(alist); + _leave(" = f [%d]", fc->ac.error); + return false; + } + + discard = alist; + if (server->addr_version != alist->version) { + write_lock(&server->fs_lock); + discard = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + rcu_assign_pointer(server->addresses, alist); + server->addr_version = alist->version; + write_unlock(&server->fs_lock); + } + + server->update_at = ktime_get_real_seconds() + afs_server_update_delay; + afs_put_addrlist(discard); + _leave(" = t"); + return true; +} + +/* + * See if a server's address list needs updating. + */ +bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +{ + time64_t now = ktime_get_real_seconds(); + long diff; + bool success; + int ret, retries = 0; + + _enter(""); + + ASSERT(server); + +retry: + diff = READ_ONCE(server->update_at) - now; + if (diff > 0) { + _leave(" = t [not now %ld]", diff); + return true; + } + + if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { + success = afs_update_server_record(fc, server); + clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); + wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); + _leave(" = %d", success); + return success; + } + + ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, + TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + fc->ac.error = ret; + _leave(" = f [intr]"); + return false; + } + + retries++; + if (retries == 4) { + _leave(" = f [stale]"); + ret = -ESTALE; + return false; + } + goto retry; } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c new file mode 100644 index 00000000000000..26bad7032bbaec --- /dev/null +++ b/fs/afs/server_list.c @@ -0,0 +1,153 @@ +/* AFS fileserver list management. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) +{ + int i; + + if (refcount_dec_and_test(&slist->usage)) { + for (i = 0; i < slist->nr_servers; i++) { + afs_put_cb_interest(net, slist->servers[i].cb_interest); + afs_put_server(net, slist->servers[i].server); + } + kfree(slist); + } +} + +/* + * Build a server list from a VLDB record. + */ +struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, + struct key *key, + struct afs_vldb_entry *vldb, + u8 type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + slist = kzalloc(sizeof(struct afs_server_list) + + sizeof(struct afs_server_entry) * nr_servers, + GFP_KERNEL); + if (!slist) + goto error; + + refcount_set(&slist->usage, 1); + + /* Make sure a records exists for each server in the list. */ + for (i = 0; i < vldb->nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(cell, key, &vldb->fs_server[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT) + continue; + goto error_2; + } + + /* Insertion-sort by server pointer */ + for (j = 0; j < slist->nr_servers; j++) + if (slist->servers[j].server >= server) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(cell->net, server); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return slist; + +error_2: + afs_put_serverlist(cell->net, slist); +error: + return ERR_PTR(ret); +} + +/* + * Copy the annotations from an old server list to its potential replacement. + */ +bool afs_annotate_server_list(struct afs_server_list *new, + struct afs_server_list *old) +{ + struct afs_server *cur; + int i, j; + + if (old->nr_servers != new->nr_servers) + goto changed; + + for (i = 0; i < old->nr_servers; i++) + if (old->servers[i].server != new->servers[i].server) + goto changed; + + return false; + +changed: + /* Maintain the same current server as before if possible. */ + cur = old->servers[old->index].server; + for (j = 0; j < new->nr_servers; j++) { + if (new->servers[j].server == cur) { + new->index = j; + break; + } + } + + /* Keep the old callback interest records where possible so that we + * maintain callback interception. + */ + i = 0; + j = 0; + while (i < old->nr_servers && j < new->nr_servers) { + if (new->servers[j].server == old->servers[i].server) { + struct afs_cb_interest *cbi = old->servers[i].cb_interest; + if (cbi) { + new->servers[j].cb_interest = cbi; + refcount_inc(&cbi->usage); + } + i++; + j++; + continue; + } + + if (new->servers[j].server < old->servers[i].server) { + j++; + continue; + } + + i++; + continue; + } + + return true; +} diff --git a/fs/afs/super.c b/fs/afs/super.c index 3d53b78b350d1f..af1e769aaebff3 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -142,9 +142,9 @@ void __exit afs_fs_exit(void) */ static int afs_show_devname(struct seq_file *m, struct dentry *root) { - struct afs_super_info *as = root->d_sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(root->d_sb); struct afs_volume *volume = as->volume; - struct afs_cell *cell = volume->cell; + struct afs_cell *cell = as->cell; const char *suf = ""; char pref = '%'; @@ -162,7 +162,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) break; } - seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->vlocation->vldb.name, suf); + seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); return 0; } @@ -334,14 +334,16 @@ static int afs_parse_device_name(struct afs_mount_params *params, static int afs_test_super(struct super_block *sb, void *data) { struct afs_super_info *as1 = data; - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); - return as->net == as1->net && as->volume == as1->volume; + return as->net == as1->net && as->volume->vid == as1->volume->vid; } static int afs_set_super(struct super_block *sb, void *data) { - sb->s_fs_info = data; + struct afs_super_info *as = data; + + sb->s_fs_info = as; return set_anon_super(sb, NULL); } @@ -351,7 +353,7 @@ static int afs_set_super(struct super_block *sb, void *data) static int afs_fill_super(struct super_block *sb, struct afs_mount_params *params) { - struct afs_super_info *as = sb->s_fs_info; + struct afs_super_info *as = AFS_FS_S(sb); struct afs_fid fid; struct inode *inode = NULL; int ret; @@ -368,13 +370,15 @@ static int afs_fill_super(struct super_block *sb, if (ret) return ret; sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); + sprintf(sb->s_id, "%u", as->volume->vid); + + afs_activate_volume(as->volume); /* allocate the root inode and dentry */ fid.vid = as->volume->vid; fid.vnode = 1; fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL); + inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -426,7 +430,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, { struct afs_mount_params params; struct super_block *sb; - struct afs_volume *vol; + struct afs_volume *candidate; struct key *key; struct afs_super_info *as; int ret; @@ -464,15 +468,19 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, ret = -ENOMEM; as = afs_alloc_sbi(¶ms); if (!as) - goto error; + goto error_key; - /* parse the device name */ - vol = afs_volume_lookup(¶ms); - if (IS_ERR(vol)) { - ret = PTR_ERR(vol); - goto error; + /* Assume we're going to need a volume record; at the very least we can + * use it to update the volume record if we have one already. This + * checks that the volume exists within the cell. + */ + candidate = afs_create_volume(¶ms); + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_as; } - as->volume = vol; + + as->volume = candidate; /* allocate a deviceless superblock */ sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); @@ -503,11 +511,13 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, error_sb: deactivate_locked_super(sb); + goto error_key; error_as: afs_destroy_sbi(as); +error_key: + key_put(params.key); error: afs_put_cell(params.net, params.cell); - key_put(params.key); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -519,8 +529,9 @@ static void afs_kill_super(struct super_block *sb) /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ - afs_clear_callback_interests(as->net, as->volume); + afs_clear_callback_interests(as->net, as->volume->servers); kill_anon_super(sb); + afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } @@ -533,7 +544,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); - init_waitqueue_head(&vnode->update_waitq); + mutex_init(&vnode->io_lock); mutex_init(&vnode->validate_lock); spin_lock_init(&vnode->writeback_lock); spin_lock_init(&vnode->lock); @@ -561,7 +572,6 @@ static struct inode *afs_alloc_inode(struct super_block *sb) memset(&vnode->status, 0, sizeof(vnode->status)); vnode->volume = NULL; - vnode->update_cnt = 0; vnode->flags = 1 << AFS_VNODE_UNSET; _leave(" = %p", &vnode->vfs_inode); @@ -597,6 +607,7 @@ static void afs_destroy_inode(struct inode *inode) */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct afs_fs_cursor fc; struct afs_volume_status vs; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; @@ -606,21 +617,32 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) if (IS_ERR(key)) return PTR_ERR(key); - ret = afs_vnode_get_volume_status(vnode, key, &vs); - key_put(key); - if (ret < 0) { - _leave(" = %d", ret); - return ret; + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, key)) { + fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_get_volume_status(&fc, &vs); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); } - buf->f_type = dentry->d_sb->s_magic; - buf->f_bsize = AFS_BLOCK_SIZE; - buf->f_namelen = AFSNAMEMAX - 1; + key_put(key); - if (vs.max_quota == 0) - buf->f_blocks = vs.part_max_blocks; - else - buf->f_blocks = vs.max_quota; - buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; - return 0; + if (ret == 0) { + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (vs.max_quota == 0) + buf->f_blocks = vs.part_max_blocks; + else + buf->f_blocks = vs.max_quota; + buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; + } + + return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 1d1e7df77dd5c0..173c652fe875b9 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -16,14 +16,15 @@ #include "internal.h" /* - * deliver reply data to a VL.GetEntryByXXX call + * Deliver reply data to a VL.GetEntryByNameU call. */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) +static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) { - struct afs_cache_vlocation *entry; - __be32 *bp; + struct afs_uvldbentry__xdr *uvldb; + struct afs_vldb_entry *entry; + bool new_only = false; u32 tmp; - int loop, ret; + int i, ret; _enter(""); @@ -32,152 +33,270 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) return ret; /* unmarshall the reply once we've received all of it */ + uvldb = call->buffer; entry = call->reply[0]; - bp = call->buffer; - - for (loop = 0; loop < 64; loop++) - entry->name[loop] = ntohl(*bp++); - entry->name[loop] = 0; - bp++; /* final NUL */ - - bp++; /* type */ - entry->nservers = ntohl(*bp++); - - for (loop = 0; loop < 8; loop++) { - entry->servers[loop].srx_family = AF_RXRPC; - entry->servers[loop].srx_service = FS_SERVICE; - entry->servers[loop].transport_type = SOCK_DGRAM; - entry->servers[loop].transport_len = sizeof(entry->servers[loop].transport.sin6); - entry->servers[loop].transport.sin6.sin6_family = AF_INET6; - entry->servers[loop].transport.sin6.sin6_port = htons(AFS_FS_PORT); - entry->servers[loop].transport.sin6.sin6_flowinfo = 0; - entry->servers[loop].transport.sin6.sin6_scope_id = 0; - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[0] = 0; - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[1] = 0; - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - entry->servers[loop].transport.sin6.sin6_addr.s6_addr32[3] = *bp++; + + for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++) + entry->name[i] = (u8)ntohl(uvldb->name[i]); + entry->name[i] = 0; + entry->name_len = strlen(entry->name); + + /* If there is a new replication site that we can use, ignore all the + * sites that aren't marked as new. + */ + for (i = 0; i < AFS_NMAXNSERVERS; i++) { + tmp = ntohl(uvldb->serverFlags[i]); + if (!(tmp & AFS_VLSF_DONTUSE) && + (tmp & AFS_VLSF_NEWREPSITE)) + new_only = true; } - bp += 8; /* partition IDs */ + for (i = 0; i < AFS_NMAXNSERVERS; i++) { + struct afs_uuid__xdr *xdr; + struct afs_uuid *uuid; + int j; - for (loop = 0; loop < 8; loop++) { - tmp = ntohl(*bp++); - entry->srvtmask[loop] = 0; + tmp = ntohl(uvldb->serverFlags[i]); + if (tmp & AFS_VLSF_DONTUSE || + (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) + continue; if (tmp & AFS_VLSF_RWVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_RW; + entry->fs_mask[i] |= AFS_VOL_VTM_RW; if (tmp & AFS_VLSF_ROVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_RO; + entry->fs_mask[i] |= AFS_VOL_VTM_RO; if (tmp & AFS_VLSF_BACKVOL) - entry->srvtmask[loop] |= AFS_VOL_VTM_BAK; - } + entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + if (!entry->fs_mask[i]) + continue; + + xdr = &uvldb->serverNumber[i]; + uuid = (struct afs_uuid *)&entry->fs_server[i]; + uuid->time_low = xdr->time_low; + uuid->time_mid = htons(ntohl(xdr->time_mid)); + uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); + uuid->clock_seq_hi_and_reserved = (u8)ntohl(xdr->clock_seq_hi_and_reserved); + uuid->clock_seq_low = (u8)ntohl(xdr->clock_seq_low); + for (j = 0; j < 6; j++) + uuid->node[j] = (u8)ntohl(xdr->node[j]); - entry->vid[0] = ntohl(*bp++); - entry->vid[1] = ntohl(*bp++); - entry->vid[2] = ntohl(*bp++); + entry->nr_servers++; + } - bp++; /* clone ID */ + for (i = 0; i < AFS_MAXTYPES; i++) + entry->vid[i] = ntohl(uvldb->volumeId[i]); - tmp = ntohl(*bp++); /* flags */ - entry->vidmask = 0; + tmp = ntohl(uvldb->flags); if (tmp & AFS_VLF_RWEXISTS) - entry->vidmask |= AFS_VOL_VTM_RW; + __set_bit(AFS_VLDB_HAS_RW, &entry->flags); if (tmp & AFS_VLF_ROEXISTS) - entry->vidmask |= AFS_VOL_VTM_RO; + __set_bit(AFS_VLDB_HAS_RO, &entry->flags); if (tmp & AFS_VLF_BACKEXISTS) - entry->vidmask |= AFS_VOL_VTM_BAK; - if (!entry->vidmask) - return -EBADMSG; + __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); + if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + entry->error = -ENOMEDIUM; + __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); + } + + __set_bit(AFS_VLDB_QUERY_VALID, &entry->flags); _leave(" = 0 [done]"); return 0; } -/* - * VL.GetEntryByName operation type - */ -static const struct afs_call_type afs_RXVLGetEntryByName = { - .name = "VL.GetEntryByName", - .deliver = afs_deliver_vl_get_entry_by_xxx, - .destructor = afs_flat_call_destructor, -}; +static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call) +{ + kfree(call->reply[0]); + afs_flat_call_destructor(call); +} /* - * VL.GetEntryById operation type + * VL.GetEntryByNameU operation type. */ -static const struct afs_call_type afs_RXVLGetEntryById = { - .name = "VL.GetEntryById", - .deliver = afs_deliver_vl_get_entry_by_xxx, - .destructor = afs_flat_call_destructor, +static const struct afs_call_type afs_RXVLGetEntryByNameU = { + .name = "VL.GetEntryByNameU", + .deliver = afs_deliver_vl_get_entry_by_name_u, + .destructor = afs_destroy_vl_get_entry_by_name_u, }; /* - * dispatch a get volume entry by name operation + * Dispatch a get volume entry by name or ID operation (uuid variant). If the + * volname is a decimal number then it's a volume ID not a volume name. */ -int afs_vl_get_entry_by_name(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, - const char *volname, - struct afs_cache_vlocation *entry, - bool async) +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const char *volname, + int volnamesz) { + struct afs_vldb_entry *entry; struct afs_call *call; - size_t volnamesz, reqsz, padsz; + size_t reqsz, padsz; __be32 *bp; _enter(""); - volnamesz = strlen(volname); padsz = (4 - (volnamesz & 3)) & 3; reqsz = 8 + volnamesz + padsz; - call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByName, reqsz, 384); - if (!call) - return -ENOMEM; + entry = kzalloc(sizeof(struct afs_vldb_entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + call = afs_alloc_flat_call(net, &afs_RXVLGetEntryByNameU, reqsz, + sizeof(struct afs_uvldbentry__xdr)); + if (!call) { + kfree(entry); + return ERR_PTR(-ENOMEM); + } call->key = key; call->reply[0] = entry; + call->ret_reply0 = true; - /* marshall the parameters */ + /* Marshall the parameters */ bp = call->request; - *bp++ = htonl(VLGETENTRYBYNAME); + *bp++ = htonl(VLGETENTRYBYNAMEU); *bp++ = htonl(volnamesz); memcpy(bp, volname, volnamesz); if (padsz > 0) - memset((void *) bp + volnamesz, 0, padsz); + memset((void *)bp + volnamesz, 0, padsz); - /* initiate the call */ - return afs_make_call(ac, call, GFP_KERNEL, async); + return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); } /* - * dispatch a get volume entry by ID operation + * Deliver reply data to a VL.GetAddrsU call. + * + * GetAddrsU(IN ListAddrByAttributes *inaddr, + * OUT afsUUID *uuidp1, + * OUT uint32_t *uniquifier, + * OUT uint32_t *nentries, + * OUT bulkaddrs *blkaddrs); */ -int afs_vl_get_entry_by_id(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, - afs_volid_t volid, - afs_voltype_t voltype, - struct afs_cache_vlocation *entry, - bool async) +static int afs_deliver_vl_get_addrs_u(struct afs_call *call) { + struct afs_addr_list *alist; + __be32 *bp; + u32 uniquifier, nentries, count; + int i, ret; + + _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + +again: + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->unmarshall++; + + /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */ + case 1: + ret = afs_extract_data(call, call->buffer, + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32), + true); + if (ret < 0) + return ret; + + bp = call->buffer + sizeof(struct afs_uuid__xdr); + uniquifier = ntohl(*bp++); + nentries = ntohl(*bp++); + count = ntohl(*bp); + + nentries = min(nentries, count); + alist = afs_alloc_addrlist(nentries, FS_SERVICE, AFS_FS_PORT); + if (!alist) + return -ENOMEM; + alist->version = uniquifier; + call->reply[0] = alist; + call->count = count; + call->count2 = nentries; + call->offset = 0; + call->unmarshall++; + + /* Extract entries */ + case 2: + count = min(call->count, 4U); + ret = afs_extract_data(call, call->buffer, + count * sizeof(__be32), + call->count > 4); + if (ret < 0) + return ret; + + alist = call->reply[0]; + bp = call->buffer; + for (i = 0; i < count; i++) + if (alist->nr_addrs < call->count2) + afs_merge_fs_addr4(alist, *bp++); + + call->count -= count; + if (call->count > 0) + goto again; + call->offset = 0; + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_vl_get_addrs_u_destructor(struct afs_call *call) +{ + afs_put_server(call->net, (struct afs_server *)call->reply[0]); + kfree(call->reply[1]); + return afs_flat_call_destructor(call); +} + +/* + * VL.GetAddrsU operation type. + */ +static const struct afs_call_type afs_RXVLGetAddrsU = { + .name = "VL.GetAddrsU", + .deliver = afs_deliver_vl_get_addrs_u, + .destructor = afs_vl_get_addrs_u_destructor, +}; + +/* + * Dispatch an operation to get the addresses for a server, where the server is + * nominated by UUID. + */ +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + const uuid_t *uuid) +{ + struct afs_ListAddrByAttributes__xdr *r; + const struct afs_uuid *u = (const struct afs_uuid *)uuid; struct afs_call *call; __be32 *bp; + int i; _enter(""); - call = afs_alloc_flat_call(net, &afs_RXVLGetEntryById, 12, 384); + call = afs_alloc_flat_call(net, &afs_RXVLGetAddrsU, + sizeof(__be32) + sizeof(struct afs_ListAddrByAttributes__xdr), + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); if (!call) - return -ENOMEM; + return ERR_PTR(-ENOMEM); call->key = key; - call->reply[0] = entry; + call->reply[0] = NULL; + call->ret_reply0 = true; - /* marshall the parameters */ + /* Marshall the parameters */ bp = call->request; - *bp++ = htonl(VLGETENTRYBYID); - *bp++ = htonl(volid); - *bp = htonl(voltype); + *bp++ = htonl(VLGETADDRSU); + r = (struct afs_ListAddrByAttributes__xdr *)bp; + r->Mask = htonl(AFS_VLADDR_UUID); + r->ipaddr = 0; + r->index = 0; + r->spare = 0; + r->uuid.time_low = u->time_low; + r->uuid.time_mid = htonl(ntohs(u->time_mid)); + r->uuid.time_hi_and_version = htonl(ntohs(u->time_hi_and_version)); + r->uuid.clock_seq_hi_and_reserved = htonl(u->clock_seq_hi_and_reserved); + r->uuid.clock_seq_low = htonl(u->clock_seq_low); + for (i = 0; i < 6; i++) + r->uuid.node[i] = ntohl(u->node[i]); - /* initiate the call */ - return afs_make_call(ac, call, GFP_KERNEL, async); + return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); } diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c deleted file mode 100644 index 52c31ad0ef6076..00000000000000 --- a/fs/afs/vlocation.c +++ /dev/null @@ -1,669 +0,0 @@ -/* AFS volume location management - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "internal.h" - -struct workqueue_struct *afs_vlocation_update_worker; - -static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ -static unsigned afs_vlocation_update_timeout = 10 * 60; - -/* - * iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - */ -static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, - struct key *key, - struct afs_cache_vlocation *vldb) -{ - struct afs_addr_cursor ac; - int ret; - - _enter("%s,%s", vl->cell->name, vl->vldb.name); - - ret = afs_set_vl_cursor(&ac, vl->cell); - if (ret < 0) - return ret; - - down_write(&vl->cell->vl_sem); - - ret = -ENOMEDIUM; - while (afs_iterate_addresses(&ac)) { - _debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport); - - /* attempt to access the VL server */ - ac.error = afs_vl_get_entry_by_name(vl->cell->net, &ac, key, - vl->vldb.name, vldb, false); - switch (ac.error) { - case 0: - goto out; - case -ENOMEM: - case -ENONET: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - if (ac.error == -ENOMEM || ac.error == -ENONET) - goto out; - break; - case -ENOMEDIUM: - case -EKEYREJECTED: - case -EKEYEXPIRED: - ac.responded = true; - goto out; - default: - ac.responded = true; - ac.error = -EIO; - break; - } - } - -out: - up_write(&vl->cell->vl_sem); - ret = afs_end_cursor(&ac); - _leave(" = %d", ret); - return ret; -} - -/* - * iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - */ -static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, - struct key *key, - afs_volid_t volid, - afs_voltype_t voltype, - struct afs_cache_vlocation *vldb) -{ - struct afs_addr_cursor ac; - int ret; - - _enter("%s,%x,%d,", vl->cell->name, volid, voltype); - - ret = afs_set_vl_cursor(&ac, vl->cell); - if (ret < 0) - return ret; - - down_write(&vl->cell->vl_sem); - ret = -ENOMEDIUM; - while (afs_iterate_addresses(&ac)) { - _debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport); - - /* attempt to access the VL server */ - ac.error = afs_vl_get_entry_by_id(vl->cell->net, &ac, key, volid, - voltype, vldb, false); - switch (ac.error) { - case 0: - goto out; - case -ENOMEM: - case -ENONET: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - if (ac.error == -ENOMEM || ac.error == -ENONET) - goto out; - goto rotate; - case -EBUSY: - ac.responded = true; - vl->upd_busy_cnt++; - if (vl->upd_busy_cnt <= 3) { - if (vl->upd_busy_cnt > 1) { - /* second+ BUSY - sleep a little bit */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); - } - continue; - } - break; - case -ENOMEDIUM: - ac.responded = true; - vl->upd_rej_cnt++; - goto rotate; - default: - ac.responded = true; - ac.error = -EIO; - goto rotate; - } - - /* rotate the server records upon lookup failure */ - rotate: - vl->upd_busy_cnt = 0; - } - -out: - if (ac.error < 0 && vl->upd_rej_cnt > 0) { - printk(KERN_NOTICE "kAFS:" - " Active volume no longer valid '%s'\n", - vl->vldb.name); - vl->valid = 0; - ac.error = -ENOMEDIUM; - } - - up_write(&vl->cell->vl_sem); - ret = afs_end_cursor(&ac); - _leave(" = %d", ret); - return ret; -} - -/* - * allocate a volume location record - */ -static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, - const char *name, - size_t namesz) -{ - struct afs_vlocation *vl; - - vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); - if (vl) { - vl->cell = cell; - vl->state = AFS_VL_NEW; - atomic_set(&vl->usage, 1); - INIT_LIST_HEAD(&vl->link); - INIT_LIST_HEAD(&vl->grave); - INIT_LIST_HEAD(&vl->update); - init_waitqueue_head(&vl->waitq); - spin_lock_init(&vl->lock); - memcpy(vl->vldb.name, name, namesz); - } - - _leave(" = %p", vl); - return vl; -} - -/* - * update record if we found it in the cache - */ -static int afs_vlocation_update_record(struct afs_vlocation *vl, - struct key *key, - struct afs_cache_vlocation *vldb) -{ - afs_voltype_t voltype; - afs_volid_t vid; - int ret; - - /* try to look up a cached volume in the cell VL databases by ID */ - _debug("Locally Cached: %s %02x", vl->vldb.name, vl->vldb.vidmask); - - _debug("Vids: %08x %08x %08x", - vl->vldb.vid[0], - vl->vldb.vid[1], - vl->vldb.vid[2]); - - if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { - vid = vl->vldb.vid[0]; - voltype = AFSVL_RWVOL; - } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { - vid = vl->vldb.vid[1]; - voltype = AFSVL_ROVOL; - } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { - vid = vl->vldb.vid[2]; - voltype = AFSVL_BACKVOL; - } else { - BUG(); - vid = 0; - voltype = 0; - } - - /* contact the server to make sure the volume is still available - * - TODO: need to handle disconnected operation here - */ - ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb); - switch (ret) { - /* net error */ - default: - printk(KERN_WARNING "kAFS:" - " failed to update volume '%s' (%x) up in '%s': %d\n", - vl->vldb.name, vid, vl->cell->name, ret); - _leave(" = %d", ret); - return ret; - - /* pulled from local cache into memory */ - case 0: - _leave(" = 0"); - return 0; - - /* uh oh... looks like the volume got deleted */ - case -ENOMEDIUM: - printk(KERN_ERR "kAFS:" - " volume '%s' (%x) does not exist '%s'\n", - vl->vldb.name, vid, vl->cell->name); - - /* TODO: make existing record unavailable */ - _leave(" = %d", ret); - return ret; - } -} - -/* - * apply the update to a VL record - */ -static void afs_vlocation_apply_update(struct afs_vlocation *vl, - struct afs_cache_vlocation *vldb) -{ - _debug("Done VL Lookup: %s %02x", vldb->name, vldb->vidmask); - - _debug("Vids: %08x %08x %08x", - vldb->vid[0], vldb->vid[1], vldb->vid[2]); - - if (strcmp(vldb->name, vl->vldb.name) != 0) - printk(KERN_NOTICE "kAFS:" - " name of volume '%s' changed to '%s' on server\n", - vl->vldb.name, vldb->name); - - vl->vldb = *vldb; -} - -/* - * fill in a volume location record, consulting the cache and the VL server - * both - */ -static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, - struct key *key) -{ - struct afs_cache_vlocation vldb; - int ret; - - _enter(""); - - ASSERTCMP(vl->valid, ==, 0); - - memset(&vldb, 0, sizeof(vldb)); - - /* Try to look up an unknown volume in the cell VL databases by name */ - ret = afs_vlocation_access_vl_by_name(vl, key, &vldb); - if (ret < 0) { - printk("kAFS: failed to locate '%s' in cell '%s'\n", - vl->vldb.name, vl->cell->name); - return ret; - } - - afs_vlocation_apply_update(vl, &vldb); - _leave(" = 0"); - return 0; -} - -/* - * queue a vlocation record for updates - */ -static void afs_vlocation_queue_for_updates(struct afs_net *net, - struct afs_vlocation *vl) -{ - struct afs_vlocation *xvl; - - /* wait at least 10 minutes before updating... */ - vl->update_at = ktime_get_real_seconds() + - afs_vlocation_update_timeout; - - spin_lock(&net->vl_updates_lock); - - if (!list_empty(&net->vl_updates)) { - /* ... but wait at least 1 second more than the newest record - * already queued so that we don't spam the VL server suddenly - * with lots of requests - */ - xvl = list_entry(net->vl_updates.prev, - struct afs_vlocation, update); - if (vl->update_at <= xvl->update_at) - vl->update_at = xvl->update_at + 1; - } else if (net->live) { - queue_delayed_work(afs_vlocation_update_worker, - &net->vl_updater, - afs_vlocation_update_timeout * HZ); - } - - list_add_tail(&vl->update, &net->vl_updates); - spin_unlock(&net->vl_updates_lock); -} - -/* - * lookup volume location - * - iterate through the VL servers in a cell until one of them admits knowing - * about the volume in question - * - lookup in the local cache if not able to find on the VL server - * - insert/update in the local cache if did get a VL response - */ -struct afs_vlocation *afs_vlocation_lookup(struct afs_net *net, - struct afs_cell *cell, - struct key *key, - const char *name, - size_t namesz) -{ - struct afs_vlocation *vl; - int ret; - - _enter("{%s},{%x},%*.*s,%zu", - cell->name, key_serial(key), - (int) namesz, (int) namesz, name, namesz); - - if (namesz >= sizeof(vl->vldb.name)) { - _leave(" = -ENAMETOOLONG"); - return ERR_PTR(-ENAMETOOLONG); - } - - /* see if we have an in-memory copy first */ - down_write(&cell->vl_sem); - spin_lock(&cell->vl_lock); - list_for_each_entry(vl, &cell->vl_list, link) { - if (vl->vldb.name[namesz] != '\0') - continue; - if (memcmp(vl->vldb.name, name, namesz) == 0) - goto found_in_memory; - } - spin_unlock(&cell->vl_lock); - - /* not in the cell's in-memory lists - create a new record */ - vl = afs_vlocation_alloc(cell, name, namesz); - if (!vl) { - up_write(&cell->vl_sem); - return ERR_PTR(-ENOMEM); - } - - afs_get_cell(cell); - - list_add_tail(&vl->link, &cell->vl_list); - vl->state = AFS_VL_CREATING; - up_write(&cell->vl_sem); - -fill_in_record: - ret = afs_vlocation_fill_in_record(vl, key); - if (ret < 0) - goto error_abandon; - spin_lock(&vl->lock); - vl->state = AFS_VL_VALID; - spin_unlock(&vl->lock); - wake_up(&vl->waitq); - - /* schedule for regular updates */ - afs_vlocation_queue_for_updates(net, vl); - goto success; - -found_in_memory: - /* found in memory */ - _debug("found in memory"); - atomic_inc(&vl->usage); - spin_unlock(&cell->vl_lock); - if (!list_empty(&vl->grave)) { - spin_lock(&net->vl_graveyard_lock); - list_del_init(&vl->grave); - spin_unlock(&net->vl_graveyard_lock); - } - up_write(&cell->vl_sem); - - /* see if it was an abandoned record that we might try filling in */ - spin_lock(&vl->lock); - while (vl->state != AFS_VL_VALID) { - afs_vlocation_state_t state = vl->state; - - _debug("invalid [state %d]", state); - - if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) { - vl->state = AFS_VL_CREATING; - spin_unlock(&vl->lock); - goto fill_in_record; - } - - /* must now wait for creation or update by someone else to - * complete */ - _debug("wait"); - - spin_unlock(&vl->lock); - ret = wait_event_interruptible(vl->waitq, - vl->state == AFS_VL_NEW || - vl->state == AFS_VL_VALID || - vl->state == AFS_VL_NO_VOLUME); - if (ret < 0) - goto error; - spin_lock(&vl->lock); - } - spin_unlock(&vl->lock); - -success: - _leave(" = %p", vl); - return vl; - -error_abandon: - spin_lock(&vl->lock); - vl->state = AFS_VL_NEW; - spin_unlock(&vl->lock); - wake_up(&vl->waitq); -error: - ASSERT(vl != NULL); - afs_put_vlocation(net, vl); - _leave(" = %d", ret); - return ERR_PTR(ret); -} - -/* - * finish using a volume location record - */ -void afs_put_vlocation(struct afs_net *net, struct afs_vlocation *vl) -{ - if (!vl) - return; - - _enter("%s", vl->vldb.name); - - ASSERTCMP(atomic_read(&vl->usage), >, 0); - - if (likely(!atomic_dec_and_test(&vl->usage))) { - _leave(""); - return; - } - - spin_lock(&net->vl_graveyard_lock); - if (atomic_read(&vl->usage) == 0) { - _debug("buried"); - list_move_tail(&vl->grave, &net->vl_graveyard); - vl->time_of_death = ktime_get_real_seconds(); - queue_delayed_work(afs_wq, &net->vl_reaper, - afs_vlocation_timeout * HZ); - - /* suspend updates on this record */ - if (!list_empty(&vl->update)) { - spin_lock(&net->vl_updates_lock); - list_del_init(&vl->update); - spin_unlock(&net->vl_updates_lock); - } - } - spin_unlock(&net->vl_graveyard_lock); - _leave(" [killed?]"); -} - -/* - * destroy a dead volume location record - */ -static void afs_vlocation_destroy(struct afs_net *net, struct afs_vlocation *vl) -{ - _enter("%p", vl); - - afs_put_cell(net, vl->cell); - kfree(vl); -} - -/* - * reap dead volume location records - */ -void afs_vlocation_reaper(struct work_struct *work) -{ - LIST_HEAD(corpses); - struct afs_vlocation *vl; - struct afs_net *net = container_of(work, struct afs_net, vl_reaper.work); - unsigned long delay, expiry; - time64_t now; - - _enter(""); - - now = ktime_get_real_seconds(); - spin_lock(&net->vl_graveyard_lock); - - while (!list_empty(&net->vl_graveyard)) { - vl = list_entry(net->vl_graveyard.next, - struct afs_vlocation, grave); - - _debug("check %p", vl); - - /* the queue is ordered most dead first */ - if (net->live) { - expiry = vl->time_of_death + afs_vlocation_timeout; - if (expiry > now) { - delay = (expiry - now) * HZ; - _debug("delay %lu", delay); - mod_delayed_work(afs_wq, &net->vl_reaper, delay); - break; - } - } - - spin_lock(&vl->cell->vl_lock); - if (atomic_read(&vl->usage) > 0) { - _debug("no reap"); - list_del_init(&vl->grave); - } else { - _debug("reap"); - list_move_tail(&vl->grave, &corpses); - list_del_init(&vl->link); - } - spin_unlock(&vl->cell->vl_lock); - } - - spin_unlock(&net->vl_graveyard_lock); - - /* now reap the corpses we've extracted */ - while (!list_empty(&corpses)) { - vl = list_entry(corpses.next, struct afs_vlocation, grave); - list_del(&vl->grave); - afs_vlocation_destroy(net, vl); - } - - _leave(""); -} - -/* - * discard all the volume location records for rmmod - */ -void __net_exit afs_vlocation_purge(struct afs_net *net) -{ - spin_lock(&net->vl_updates_lock); - list_del_init(&net->vl_updates); - spin_unlock(&net->vl_updates_lock); - mod_delayed_work(afs_vlocation_update_worker, &net->vl_updater, 0); - mod_delayed_work(afs_wq, &net->vl_reaper, 0); -} - -/* - * update a volume location - */ -void afs_vlocation_updater(struct work_struct *work) -{ - struct afs_cache_vlocation vldb; - struct afs_vlocation *vl, *xvl; - struct afs_net *net = container_of(work, struct afs_net, vl_updater.work); - time64_t now; - long timeout; - int ret; - - if (!net->live) - return; - - _enter(""); - - now = ktime_get_real_seconds(); - - /* find a record to update */ - spin_lock(&net->vl_updates_lock); - for (;;) { - if (list_empty(&net->vl_updates) || !net->live) { - spin_unlock(&net->vl_updates_lock); - _leave(" [nothing]"); - return; - } - - vl = list_entry(net->vl_updates.next, - struct afs_vlocation, update); - if (atomic_read(&vl->usage) > 0) - break; - list_del_init(&vl->update); - } - - timeout = vl->update_at - now; - if (timeout > 0) { - queue_delayed_work(afs_vlocation_update_worker, - &net->vl_updater, timeout * HZ); - spin_unlock(&net->vl_updates_lock); - _leave(" [nothing]"); - return; - } - - list_del_init(&vl->update); - atomic_inc(&vl->usage); - spin_unlock(&net->vl_updates_lock); - - /* we can now perform the update */ - _debug("update %s", vl->vldb.name); - vl->state = AFS_VL_UPDATING; - vl->upd_rej_cnt = 0; - vl->upd_busy_cnt = 0; - - ret = afs_vlocation_update_record(vl, NULL, &vldb); - spin_lock(&vl->lock); - switch (ret) { - case 0: - afs_vlocation_apply_update(vl, &vldb); - vl->state = AFS_VL_VALID; - break; - case -ENOMEDIUM: - vl->state = AFS_VL_VOLUME_DELETED; - break; - default: - vl->state = AFS_VL_UNCERTAIN; - break; - } - spin_unlock(&vl->lock); - wake_up(&vl->waitq); - - /* and then reschedule */ - _debug("reschedule"); - vl->update_at = ktime_get_real_seconds() + - afs_vlocation_update_timeout; - - spin_lock(&net->vl_updates_lock); - - if (!list_empty(&net->vl_updates)) { - /* next update in 10 minutes, but wait at least 1 second more - * than the newest record already queued so that we don't spam - * the VL server suddenly with lots of requests - */ - xvl = list_entry(net->vl_updates.prev, - struct afs_vlocation, update); - if (vl->update_at <= xvl->update_at) - vl->update_at = xvl->update_at + 1; - xvl = list_entry(net->vl_updates.next, - struct afs_vlocation, update); - timeout = xvl->update_at - now; - if (timeout < 0) - timeout = 0; - } else { - timeout = afs_vlocation_update_timeout; - } - - ASSERT(list_empty(&vl->update)); - - list_add_tail(&vl->update, &net->vl_updates); - - _debug("timeout %ld", timeout); - queue_delayed_work(afs_vlocation_update_worker, &net->vl_updater, timeout * HZ); - spin_unlock(&net->vl_updates_lock); - afs_put_vlocation(net, vl); -} diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c deleted file mode 100644 index 9c7333eb01c2b8..00000000000000 --- a/fs/afs/vnode.c +++ /dev/null @@ -1,750 +0,0 @@ -/* AFS vnode management - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "internal.h" - -/* - * Handle remote file deletion. - */ -static void afs_vnode_deleted_remotely(struct afs_vnode *vnode) -{ - struct afs_cb_interest *cbi = vnode->cb_interest; - - _enter("{%p}", cbi); - - set_bit(AFS_VNODE_DELETED, &vnode->flags); - - if (cbi) { - vnode->cb_interest = NULL; - afs_put_cb_interest(afs_v2net(vnode), cbi); - } - - _leave(""); -} - -/* - * finish off updating the recorded status of a file after a successful - * operation completion - * - starts callback expiry timer - * - adds to server's callback list - */ -void afs_vnode_finalise_status_update(struct afs_vnode *vnode, - struct afs_server *server) -{ - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - - wake_up_all(&vnode->update_waitq); - _leave(""); -} - -/* - * finish off updating the recorded status of a file after an operation failed - */ -static void afs_vnode_status_update_failed(struct afs_fs_cursor *fc, - struct afs_vnode *vnode) -{ - _enter("{%x:%u},%d", vnode->fid.vid, vnode->fid.vnode, fc->ac.error); - - spin_lock(&vnode->lock); - - if (fc->ac.error == -ENOENT) { - /* the file was deleted on the server */ - _debug("got NOENT from server - marking file deleted"); - afs_vnode_deleted_remotely(vnode); - } - - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - - wake_up_all(&vnode->update_waitq); - _leave(""); -} - -/* - * fetch file status from the volume - * - don't issue a fetch if: - * - the changed bit is not set and there's a valid callback - * - there are any outstanding ops that will fetch the status - * - TODO implement local caching - */ -int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force) -{ - struct afs_fs_cursor fc; - unsigned int cb_break = 0; - - DECLARE_WAITQUEUE(myself, current); - - _enter("%s,{%x:%u.%u,S=%lx},%u", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - vnode->flags, - force); - - if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - _leave(" [unchanged]"); - return 0; - } - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - _leave(" [deleted]"); - return -ENOENT; - } - - cb_break = vnode->cb_break + vnode->cb_s_break; - - spin_lock(&vnode->lock); - - if (!force && test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - spin_unlock(&vnode->lock); - _leave(" [unchanged]"); - return 0; - } - - ASSERTCMP(vnode->update_cnt, >=, 0); - - if (vnode->update_cnt > 0) { - /* someone else started a fetch */ - _debug("wait on fetch %d", vnode->update_cnt); - - set_current_state(TASK_UNINTERRUPTIBLE); - ASSERT(myself.func != NULL); - add_wait_queue(&vnode->update_waitq, &myself); - - /* wait for the status to be updated */ - for (;;) { - if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) - break; - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - break; - - /* check to see if it got updated and invalidated all - * before we saw it */ - if (vnode->update_cnt == 0) { - remove_wait_queue(&vnode->update_waitq, - &myself); - set_current_state(TASK_RUNNING); - goto get_anyway; - } - - spin_unlock(&vnode->lock); - - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - - spin_lock(&vnode->lock); - } - - remove_wait_queue(&vnode->update_waitq, &myself); - spin_unlock(&vnode->lock); - set_current_state(TASK_RUNNING); - - return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? - -ENOENT : 0; - } - -get_anyway: - /* okay... we're going to have to initiate the op */ - vnode->update_cnt++; - - spin_unlock(&vnode->lock); - - /* merge AFS status fetches and clear outstanding callback on this - * vnode */ - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_fetch_file_status(&fc, key, vnode, NULL, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - _debug("adjust"); - afs_cache_permit(vnode, key, cb_break); - afs_vnode_finalise_status_update(vnode, fc.server); - } else { - _debug("failed [%d]", fc.ac.error); - afs_vnode_status_update_failed(&fc, vnode); - } - -out: - afs_end_fs_cursor(&fc, afs_v2net(vnode)); - ASSERTCMP(vnode->update_cnt, >=, 0); - _leave(" = %d [cnt %d]", fc.ac.error, vnode->update_cnt); - return fc.ac.error; - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - spin_unlock(&vnode->lock); - goto out; -} - -/* - * fetch file data from the volume - * - TODO implement caching - */ -int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key, - struct afs_read *desc) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - /* merge in AFS status fetches and clear outstanding callback on this - * vnode */ - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_fetch_data(&fc, key, vnode, desc, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) - afs_vnode_finalise_status_update(vnode, fc.server); - else - afs_vnode_status_update_failed(&fc, vnode); - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * make a file or a directory - */ -int afs_vnode_create(struct afs_vnode *vnode, struct key *key, - const char *name, umode_t mode, struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_callback *newcb, struct afs_server **_server) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%s,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're creating in */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_create(&fc, key, vnode, name, mode, newfid, - newstatus, newcb, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - *_server = fc.server; - fc.server = NULL; - } else { - afs_vnode_status_update_failed(&fc, vnode); - *_server = NULL; - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * remove a file or directory - */ -int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name, - bool isdir) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%s", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're removing from */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_remove(&fc, key, vnode, name, isdir, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) - afs_vnode_finalise_status_update(vnode, fc.server); - else - afs_vnode_status_update_failed(&fc, vnode); - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * create a hard link - */ -int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode, - struct key *key, const char *name) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s", - dvnode->volume->vlocation->vldb.name, - dvnode->fid.vid, - dvnode->fid.vnode, - dvnode->fid.unique, - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name); - - /* this op will fetch the status on the directory we're removing from */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - spin_lock(&dvnode->lock); - dvnode->update_cnt++; - spin_unlock(&dvnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, dvnode)) - goto no_server; - - fc.ac.error = afs_fs_link(&fc, key, dvnode, vnode, name, false); - - } while (afs_iterate_fs_cursor(&fc, dvnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - afs_vnode_finalise_status_update(dvnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, vnode); - afs_vnode_status_update_failed(&fc, dvnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - spin_lock(&dvnode->lock); - dvnode->update_cnt--; - ASSERTCMP(dvnode->update_cnt, >=, 0); - spin_unlock(&dvnode->lock); - goto out; -} - -/* - * create a symbolic link - */ -int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key, - const char *name, const char *content, - struct afs_fid *newfid, - struct afs_file_status *newstatus, - struct afs_server **_server) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%s,%s,,,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), - name, content); - - /* this op will fetch the status on the directory we're creating in */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_symlink(&fc, key, vnode, name, content, - newfid, newstatus, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - *_server = fc.server; - fc.server = NULL; - } else { - afs_vnode_status_update_failed(&fc, vnode); - *_server = NULL; - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - *_server = NULL; - goto out; -} - -/* - * rename a file - */ -int afs_vnode_rename(struct afs_vnode *orig_dvnode, - struct afs_vnode *new_dvnode, - struct key *key, - const char *orig_name, - const char *new_name) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s", - orig_dvnode->volume->vlocation->vldb.name, - orig_dvnode->fid.vid, - orig_dvnode->fid.vnode, - orig_dvnode->fid.unique, - new_dvnode->volume->vlocation->vldb.name, - new_dvnode->fid.vid, - new_dvnode->fid.vnode, - new_dvnode->fid.unique, - key_serial(key), - orig_name, - new_name); - - /* this op will fetch the status on both the directories we're dealing - * with */ - spin_lock(&orig_dvnode->lock); - orig_dvnode->update_cnt++; - spin_unlock(&orig_dvnode->lock); - if (new_dvnode != orig_dvnode) { - spin_lock(&new_dvnode->lock); - new_dvnode->update_cnt++; - spin_unlock(&new_dvnode->lock); - } - - afs_init_fs_cursor(&fc, orig_dvnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, orig_dvnode)) - goto no_server; - - fc.ac.error = afs_fs_rename(&fc, key, orig_dvnode, orig_name, - new_dvnode, new_name, false); - - } while (afs_iterate_fs_cursor(&fc, orig_dvnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(orig_dvnode, fc.server); - if (new_dvnode != orig_dvnode) - afs_vnode_finalise_status_update(new_dvnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, orig_dvnode); - if (new_dvnode != orig_dvnode) - afs_vnode_status_update_failed(&fc, new_dvnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(orig_dvnode)); - -no_server: - spin_lock(&orig_dvnode->lock); - orig_dvnode->update_cnt--; - ASSERTCMP(orig_dvnode->update_cnt, >=, 0); - spin_unlock(&orig_dvnode->lock); - if (new_dvnode != orig_dvnode) { - spin_lock(&new_dvnode->lock); - new_dvnode->update_cnt--; - ASSERTCMP(new_dvnode->update_cnt, >=, 0); - spin_unlock(&new_dvnode->lock); - } - goto out; -} - -/* - * write to a file - */ -int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) -{ - struct afs_fs_cursor fc; - struct afs_vnode *vnode = wb->vnode; - - _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(wb->key), - first, last, offset, to); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_store_data(&fc, wb, first, last, offset, to, - false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, vnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * set the attributes on a file - */ -int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key, - struct iattr *attr) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - /* this op will fetch the status */ - spin_lock(&vnode->lock); - vnode->update_cnt++; - spin_unlock(&vnode->lock); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - goto no_server; - - fc.ac.error = afs_fs_setattr(&fc, key, vnode, attr, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - /* adjust the flags */ - if (fc.ac.error == 0) { - afs_vnode_finalise_status_update(vnode, fc.server); - } else { - afs_vnode_status_update_failed(&fc, vnode); - } - -out: - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); - -no_server: - spin_lock(&vnode->lock); - vnode->update_cnt--; - ASSERTCMP(vnode->update_cnt, >=, 0); - spin_unlock(&vnode->lock); - goto out; -} - -/* - * get the status of a volume - */ -int afs_vnode_get_volume_status(struct afs_vnode *vnode, struct key *key, - struct afs_volume_status *vs) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - break; - - fc.ac.error = afs_fs_get_volume_status(&fc, key, vnode, vs, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} - -/* - * get a lock on a file - */ -int afs_vnode_set_lock(struct afs_vnode *vnode, struct key *key, - afs_lock_type_t type) -{ - struct afs_fs_cursor fc; - - _enter("%s{%x:%u.%u},%x,%u", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key), type); - - afs_init_fs_cursor(&fc, vnode); - do { - /* pick a server to query */ - if (!afs_volume_pick_fileserver(&fc, vnode)) - break; - - fc.ac.error = afs_fs_set_lock(&fc, key, vnode, type, false); - - } while (afs_iterate_fs_cursor(&fc, vnode)); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} - -/* - * extend a lock on a file - */ -int afs_vnode_extend_lock(struct afs_vnode *vnode, struct key *key) -{ - struct afs_fs_cursor fc; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - ret = afs_set_fs_cursor(&fc, vnode); - if (ret < 0) - return ret; - - fc.ac.error = afs_fs_extend_lock(&fc, key, vnode, false); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} - -/* - * release a lock on a file - */ -int afs_vnode_release_lock(struct afs_vnode *vnode, struct key *key) -{ - struct afs_fs_cursor fc; - int ret; - - _enter("%s{%x:%u.%u},%x", - vnode->volume->vlocation->vldb.name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - key_serial(key)); - - ret = afs_set_fs_cursor(&fc, vnode); - if (ret < 0) - return ret; - - fc.ac.error = afs_fs_release_lock(&fc, key, vnode, false); - - return afs_end_fs_cursor(&fc, afs_v2net(vnode)); -} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 3c5ad1cc50f36c..2295dd4f9b15f1 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -13,11 +13,148 @@ #include #include "internal.h" -static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; +unsigned __read_mostly afs_volume_gc_delay = 10; +unsigned __read_mostly afs_volume_record_life = 60 * 60; + +static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; /* - * lookup a volume by name - * - this can be one of the following: + * Allocate a volume record and load it up from a vldb record. + */ +static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, + struct afs_vldb_entry *vldb, + unsigned long type_mask) +{ + struct afs_server_list *slist; + struct afs_server *server; + struct afs_volume *volume; + int ret = -ENOMEM, nr_servers = 0, i, j; + + for (i = 0; i < vldb->nr_servers; i++) + if (vldb->fs_mask[i] & type_mask) + nr_servers++; + + volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); + if (!volume) + goto error_0; + + volume->vid = vldb->vid[params->type]; + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + volume->cell = afs_get_cell(params->cell); + volume->type = params->type; + volume->type_force = params->force; + volume->name_len = vldb->name_len; + + atomic_set(&volume->usage, 1); + INIT_LIST_HEAD(&volume->proc_link); + rwlock_init(&volume->servers_lock); + memcpy(volume->name, vldb->name, vldb->name_len + 1); + + slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); + if (IS_ERR(slist)) { + ret = PTR_ERR(slist); + goto error_1; + } + + refcount_set(&slist->usage, 1); + volume->servers = slist; + + /* Make sure a records exists for each server this volume occupies. */ + for (i = 0; i < nr_servers; i++) { + if (!(vldb->fs_mask[i] & type_mask)) + continue; + + server = afs_lookup_server(params->cell, params->key, + &vldb->fs_server[i]); + if (IS_ERR(server)) { + ret = PTR_ERR(server); + if (ret == -ENOENT) + continue; + goto error_2; + } + + /* Insertion-sort by server pointer */ + for (j = 0; j < slist->nr_servers; j++) + if (slist->servers[j].server >= server) + break; + if (j < slist->nr_servers) { + if (slist->servers[j].server == server) { + afs_put_server(params->net, server); + continue; + } + + memmove(slist->servers + j + 1, + slist->servers + j, + (slist->nr_servers - j) * sizeof(struct afs_server_entry)); + } + + slist->servers[j].server = server; + slist->nr_servers++; + } + + if (slist->nr_servers == 0) { + ret = -EDESTADDRREQ; + goto error_2; + } + + return volume; + +error_2: + afs_put_serverlist(params->net, slist); +error_1: + kfree(volume); +error_0: + return ERR_PTR(ret); +} + +/* + * Look up a VLDB record for a volume. + */ +static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, + struct key *key, + const char *volname, + size_t volnamesz) +{ + struct afs_addr_cursor ac; + struct afs_vldb_entry *vldb; + int ret; + + ret = afs_set_vl_cursor(&ac, cell); + if (ret < 0) + return ERR_PTR(ret); + + while (afs_iterate_addresses(&ac)) { + vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, + volname, volnamesz); + switch (ac.error) { + case 0: + afs_end_cursor(&ac); + return vldb; + case -ECONNABORTED: + ac.error = afs_abort_to_error(ac.abort_code); + goto error; + case -ENOMEM: + case -ENONET: + goto error; + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + break; + default: + ac.error = -EIO; + goto error; + } + } + +error: + return ERR_PTR(afs_end_cursor(&ac)); +} + +/* + * Look up a volume in the VL server and create a candidate volume record for + * it. + * + * The volume name can be one of the following: * "%[cell:]volume[.]" R/W volume * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), * or R/W (rwparent=1) volume @@ -37,169 +174,218 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; * - Rule 3: If parent volume is R/W, then only mount R/W volume unless * explicitly told otherwise */ -struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) +struct afs_volume *afs_create_volume(struct afs_mount_params *params) { - struct afs_vlocation *vlocation = NULL; - struct afs_volume *volume = NULL; - struct afs_server *server = NULL; - char srvtmask; - int ret, loop; - - _enter("{%*.*s,%d}", - params->volnamesz, params->volnamesz, params->volname, params->rwpath); - - /* lookup the volume location record */ - vlocation = afs_vlocation_lookup(params->net, params->cell, params->key, - params->volname, params->volnamesz); - if (IS_ERR(vlocation)) { - ret = PTR_ERR(vlocation); - vlocation = NULL; - goto error; - } + struct afs_vldb_entry *vldb; + struct afs_volume *volume; + unsigned long type_mask = 1UL << params->type; - /* make the final decision on the type we want */ - ret = -ENOMEDIUM; - if (params->force && !(vlocation->vldb.vidmask & (1 << params->type))) - goto error; + vldb = afs_vl_lookup_vldb(params->cell, params->key, + params->volname, params->volnamesz); + if (IS_ERR(vldb)) + return ERR_CAST(vldb); - srvtmask = 0; - for (loop = 0; loop < vlocation->vldb.nservers; loop++) - srvtmask |= vlocation->vldb.srvtmask[loop]; + if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { + volume = ERR_PTR(vldb->error); + goto error; + } + /* Make the final decision on the type we want */ + volume = ERR_PTR(-ENOMEDIUM); if (params->force) { - if (!(srvtmask & (1 << params->type))) + if (!(vldb->flags & type_mask)) goto error; - } else if (srvtmask & AFS_VOL_VTM_RO) { + } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { params->type = AFSVL_ROVOL; - } else if (srvtmask & AFS_VOL_VTM_RW) { + } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { params->type = AFSVL_RWVOL; } else { goto error; } - down_write(¶ms->cell->vl_sem); + type_mask = 1UL << params->type; + volume = afs_alloc_volume(params, vldb, type_mask); - /* is the volume already active? */ - if (vlocation->vols[params->type]) { - /* yes - re-use it */ - volume = vlocation->vols[params->type]; - afs_get_volume(volume); - goto success; - } +error: + kfree(vldb); + return volume; +} - /* create a new volume record */ - _debug("creating new volume record"); +/* + * Destroy a volume record + */ +static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) +{ + _enter("%p", volume); - ret = -ENOMEM; - volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); - if (!volume) - goto error_up; +#ifdef CONFIG_AFS_FSCACHE + ASSERTCMP(volume->cache, ==, NULL); +#endif - atomic_set(&volume->usage, 1); - volume->type = params->type; - volume->type_force = params->force; - volume->cell = params->cell; - volume->vid = vlocation->vldb.vid[params->type]; - - init_rwsem(&volume->server_sem); - - /* look up all the applicable server records */ - for (loop = 0; loop < 8; loop++) { - if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { - server = afs_lookup_server( - volume->cell, &vlocation->vldb.servers[loop]); - if (IS_ERR(server)) { - ret = PTR_ERR(server); - goto error_discard; - } + afs_put_serverlist(net, volume->servers); + afs_put_cell(net, volume->cell); + kfree(volume); - volume->servers[volume->nservers] = server; - volume->nservers++; - } + _leave(" [destroyed]"); +} + +/* + * Drop a reference on a volume record. + */ +void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) +{ + if (volume) { + _enter("%s", volume->name); + + if (atomic_dec_and_test(&volume->usage)) + afs_destroy_volume(cell->net, volume); } +} - /* attach the cache and volume location */ +/* + * Activate a volume. + */ +void afs_activate_volume(struct afs_volume *volume) +{ #ifdef CONFIG_AFS_FSCACHE volume->cache = fscache_acquire_cookie(volume->cell->cache, &afs_volume_cache_index_def, volume, true); #endif - afs_get_vlocation(vlocation); - volume->vlocation = vlocation; - - vlocation->vols[volume->type] = volume; -success: - _debug("kAFS selected %s volume %08x", - afs_voltypes[volume->type], volume->vid); - up_write(¶ms->cell->vl_sem); - afs_put_vlocation(params->net, vlocation); - _leave(" = %p", volume); - return volume; + write_lock(&volume->cell->proc_lock); + list_add_tail(&volume->proc_link, &volume->cell->proc_volumes); + write_unlock(&volume->cell->proc_lock); +} - /* clean up */ -error_up: - up_write(¶ms->cell->vl_sem); -error: - afs_put_vlocation(params->net, vlocation); - _leave(" = %d", ret); - return ERR_PTR(ret); +/* + * Deactivate a volume. + */ +void afs_deactivate_volume(struct afs_volume *volume) +{ + _enter("%s", volume->name); -error_discard: - up_write(¶ms->cell->vl_sem); + write_lock(&volume->cell->proc_lock); + list_del_init(&volume->proc_link); + write_unlock(&volume->cell->proc_lock); - for (loop = volume->nservers - 1; loop >= 0; loop--) { - afs_put_cb_interest(params->net, volume->cb_interests[loop]); - afs_put_server(params->net, volume->servers[loop]); - } +#ifdef CONFIG_AFS_FSCACHE + fscache_relinquish_cookie(volume->cache, + test_bit(AFS_VOLUME_DELETED, &volume->flags)); + volume->cache = NULL; +#endif - kfree(volume); - goto error; + _leave(""); } /* - * destroy a volume record + * Query the VL service to update the volume status. */ -void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) +static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { - struct afs_vlocation *vlocation; - int loop; + struct afs_server_list *new, *old, *discard; + struct afs_vldb_entry *vldb; + char idbuf[16]; + int ret, idsz; - if (!volume) - return; + _enter(""); - _enter("%p", volume); + /* We look up an ID by passing it as a decimal string in the + * operation's name parameter. + */ + idsz = sprintf(idbuf, "%u", volume->vid); - ASSERTCMP(atomic_read(&volume->usage), >, 0); + vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); + if (IS_ERR(vldb)) { + ret = PTR_ERR(vldb); + goto error; + } - vlocation = volume->vlocation; + /* See if the volume got renamed. */ + if (vldb->name_len != volume->name_len || + memcmp(vldb->name, volume->name, vldb->name_len) != 0) { + /* TODO: Use RCU'd string. */ + memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); + volume->name_len = vldb->name_len; + } + + /* See if the volume's server list got updated. */ + new = afs_alloc_server_list(volume->cell, key, + vldb, (1 << volume->type)); + if (IS_ERR(new)) { + ret = PTR_ERR(new); + goto error_vldb; + } - /* to prevent a race, the decrement and the dequeue must be effectively - * atomic */ - down_write(&cell->vl_sem); + write_lock(&volume->servers_lock); - if (likely(!atomic_dec_and_test(&volume->usage))) { - up_write(&vlocation->cell->vl_sem); - _leave(""); - return; + discard = new; + old = volume->servers; + if (afs_annotate_server_list(new, old)) { + new->seq = volume->servers_seq + 1; + volume->servers = new; + smp_wmb(); + volume->servers_seq++; + discard = old; } - vlocation->vols[volume->type] = NULL; + volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; + clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + write_unlock(&volume->servers_lock); + ret = 0; - up_write(&cell->vl_sem); + afs_put_serverlist(volume->cell->net, discard); +error_vldb: + kfree(vldb); +error: + _leave(" = %d", ret); + return ret; +} - /* finish cleaning up the volume */ -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(volume->cache, 0); -#endif - afs_put_vlocation(cell->net, vlocation); +/* + * Make sure the volume record is up to date. + */ +int afs_check_volume_status(struct afs_volume *volume, struct key *key) +{ + time64_t now = ktime_get_real_seconds(); + int ret, retries = 0; - for (loop = volume->nservers - 1; loop >= 0; loop--) { - afs_put_cb_interest(cell->net, volume->cb_interests[loop]); - afs_put_server(cell->net, volume->servers[loop]); + _enter(""); + + if (volume->update_at <= now) + set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + +retry: + if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) && + !test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0"); + return 0; } - kfree(volume); + if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { + ret = afs_update_volume_status(volume, key); + clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); + clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); + wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); + _leave(" = %d", ret); + return ret; + } - _leave(" [destroyed]"); + if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { + _leave(" = 0 [no wait]"); + return 0; + } + + ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE); + if (ret == -ERESTARTSYS) { + _leave(" = %d", ret); + return ret; + } + + retries++; + if (retries == 4) { + _leave(" = -ESTALE"); + return -ESTALE; + } + goto retry; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 106e43db11153f..1377a40ecdbb44 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -103,7 +103,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, req->pages[0] = page; get_page(page); - ret = afs_vnode_fetch_data(vnode, key, req); + ret = afs_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { if (ret == -ENOENT) { @@ -337,6 +337,40 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, _leave(""); } +/* + * write to a file + */ +static int afs_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_fs_cursor fc; + struct afs_vnode *vnode = wb->vnode; + int ret; + + _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + key_serial(wb->key), + first, last, offset, to); + + ret = -ERESTARTSYS; + if (afs_begin_vnode_operation(&fc, vnode, wb->key)) { + while (afs_select_fileserver(&fc)) { + fc.cb_break = vnode->cb_break + vnode->cb_s_break; + afs_fs_store_data(&fc, wb, first, last, offset, to); + } + + afs_check_for_remote_deletion(&fc, fc.vnode); + afs_vnode_commit_status(&fc, vnode, fc.cb_break); + ret = afs_end_vnode_operation(&fc); + } + + _leave(" = %d", ret); + return ret; +} + /* * synchronously write back the locked page and any subsequent non-locked dirty * pages also covered by the same writeback record @@ -420,7 +454,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); - ret = afs_vnode_store_data(wb, first, last, offset, to); + ret = afs_store_data(wb, first, last, offset, to); if (ret < 0) { switch (ret) { case -EDQUOT: diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index e58e00ee974708..cfcc674e64a55b 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -96,7 +96,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler, void *buffer, size_t size) { struct afs_vnode *vnode = AFS_FS_I(inode); - const char *volname = vnode->volume->vlocation->vldb.name; + const char *volname = vnode->volume->name; size_t namelen; namelen = strlen(volname);