Skip to content

Commit

Permalink
Merge pull request ceph#44281 from athanatos/sjust/wip-53555
Browse files Browse the repository at this point in the history
crimson/os/seastore: index lba pins atomically with addition to cache

Reviewed-by: Xuehan Xu <[email protected]>
Reviewed-by: Yingxin Cheng <[email protected]>
  • Loading branch information
athanatos authored Dec 13, 2021
2 parents 8c4e9d6 + 62f3cf1 commit 0200b02
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 59 deletions.
17 changes: 8 additions & 9 deletions src/crimson/os/seastore/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ class Cache {
ret->set_paddr(offset);
ret->state = CachedExtent::extent_state_t::CLEAN_PENDING;
add_extent(ret);
extent_init_func(*ret);
return read_extent<T>(
std::move(ret), std::forward<Func>(extent_init_func));
std::move(ret));
}

// extent PRESENT in cache
Expand All @@ -211,13 +212,13 @@ class Cache {
}

cached->state = CachedExtent::extent_state_t::INVALID;
extent_init_func(*ret);
return read_extent<T>(
std::move(ret), std::forward<Func>(extent_init_func));
std::move(ret));
} else {
auto ret = TCachedExtentRef<T>(static_cast<T*>(cached.get()));
return ret->wait_io(
).then([ret=std::move(ret),
extent_init_func=std::forward<Func>(extent_init_func)]() mutable
).then([ret=std::move(ret)]() mutable
-> get_extent_ret<T> {
// ret may be invalid, caller must check
return get_extent_ret<T>(
Expand Down Expand Up @@ -880,10 +881,9 @@ class Cache {
/// Introspect transaction when it is being destructed
void on_transaction_destruct(Transaction& t);

template <typename T, typename Func>
template <typename T>
get_extent_ret<T> read_extent(
TCachedExtentRef<T>&& extent,
Func &&func
TCachedExtentRef<T>&& extent
) {
assert(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING);
extent->set_io_wait();
Expand All @@ -892,13 +892,12 @@ class Cache {
extent->get_length(),
extent->get_bptr()
).safe_then(
[extent=std::move(extent), func=std::forward<Func>(func)]() mutable {
[extent=std::move(extent)]() mutable {
extent->state = CachedExtent::extent_state_t::CLEAN;
/* TODO: crc should be checked against LBA manager */
extent->last_committed_crc = extent->get_crc32c();

extent->on_clean_read();
func(*extent);
extent->complete_io();
return get_extent_ertr::make_ready_future<TCachedExtentRef<T>>(
std::move(extent));
Expand Down
88 changes: 57 additions & 31 deletions src/crimson/os/seastore/lba_manager/btree/lba_btree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -469,25 +469,23 @@ LBABtree::rewrite_lba_extent_ret LBABtree::rewrite_lba_extent(
LBABtree::get_internal_node_ret LBABtree::get_internal_node(
op_context_t c,
depth_t depth,
paddr_t offset)
paddr_t offset,
laddr_t begin,
laddr_t end)
{
LOG_PREFIX(LBATree::get_internal_node);
DEBUGT(
"reading internal at offset {}, depth {}",
"reading internal at offset {}, depth {}, begin {}, end {}",
c.trans,
offset,
depth);
depth,
begin,
end);
assert(depth > 1);
auto init_internal = [c, depth](LBAInternalNode &node) {
auto meta = node.get_meta();
if (node.get_size()) {
ceph_assert(meta.begin <= node.begin()->get_key());
ceph_assert(meta.end > (node.end() - 1)->get_key());
}
ceph_assert(depth == meta.depth);
auto init_internal = [c, depth, begin, end](LBAInternalNode &node) {
assert(!node.is_pending());
assert(!node.pin.is_linked());
node.pin.set_range(meta);
node.pin.set_range(lba_node_meta_t{begin, end, depth});
if (c.pins) {
c.pins->add_pin(node.pin);
}
Expand All @@ -497,7 +495,8 @@ LBABtree::get_internal_node_ret LBABtree::get_internal_node(
offset,
LBA_BLOCK_SIZE,
init_internal
).si_then([FNAME, c, offset, init_internal](LBAInternalNodeRef ret) {
).si_then([FNAME, c, offset, init_internal, depth, begin, end](
LBAInternalNodeRef ret) {
DEBUGT(
"read internal at offset {} {}",
c.trans,
Expand All @@ -508,6 +507,14 @@ LBABtree::get_internal_node_ret LBABtree::get_internal_node(
assert(ret->is_dirty());
init_internal(*ret);
}
auto meta = ret->get_meta();
if (ret->get_size()) {
ceph_assert(meta.begin <= ret->begin()->get_key());
ceph_assert(meta.end > (ret->end() - 1)->get_key());
}
ceph_assert(depth == meta.depth);
ceph_assert(begin == meta.begin);
ceph_assert(end == meta.end);
return get_internal_node_ret(
interruptible::ready_future_marker{},
ret);
Expand All @@ -516,23 +523,21 @@ LBABtree::get_internal_node_ret LBABtree::get_internal_node(

LBABtree::get_leaf_node_ret LBABtree::get_leaf_node(
op_context_t c,
paddr_t offset)
paddr_t offset,
laddr_t begin,
laddr_t end)
{
LOG_PREFIX(LBATree::get_leaf_node);
DEBUGT(
"reading leaf at offset {}",
"reading leaf at offset {}, begin {}, end {}",
c.trans,
offset);
auto init_leaf = [c](LBALeafNode &node) {
auto meta = node.get_meta();
if (node.get_size()) {
ceph_assert(meta.begin <= node.begin()->get_key());
ceph_assert(meta.end > (node.end() - 1)->get_key());
}
ceph_assert(1 == meta.depth);
offset,
begin,
end);
auto init_leaf = [c, begin, end](LBALeafNode &node) {
assert(!node.is_pending());
assert(!node.pin.is_linked());
node.pin.set_range(meta);
node.pin.set_range(lba_node_meta_t{begin, end, 1});
if (c.pins) {
c.pins->add_pin(node.pin);
}
Expand All @@ -542,7 +547,7 @@ LBABtree::get_leaf_node_ret LBABtree::get_leaf_node(
offset,
LBA_BLOCK_SIZE,
init_leaf
).si_then([FNAME, c, offset, init_leaf](LBALeafNodeRef ret) {
).si_then([FNAME, c, offset, init_leaf, begin, end](LBALeafNodeRef ret) {
DEBUGT(
"read leaf at offset {} {}",
c.trans,
Expand All @@ -553,6 +558,14 @@ LBABtree::get_leaf_node_ret LBABtree::get_leaf_node(
assert(ret->is_dirty());
init_leaf(*ret);
}
auto meta = ret->get_meta();
if (ret->get_size()) {
ceph_assert(meta.begin <= ret->begin()->get_key());
ceph_assert(meta.end > (ret->end() - 1)->get_key());
}
ceph_assert(1 == meta.depth);
ceph_assert(begin == meta.begin);
ceph_assert(end == meta.end);
return get_leaf_node_ret(
interruptible::ready_future_marker{},
ret);
Expand Down Expand Up @@ -709,23 +722,29 @@ template <typename NodeType>
LBABtree::base_iertr::future<typename NodeType::Ref> get_node(
op_context_t c,
depth_t depth,
paddr_t addr);
paddr_t addr,
laddr_t begin,
laddr_t end);

template <>
LBABtree::base_iertr::future<LBALeafNodeRef> get_node<LBALeafNode>(
op_context_t c,
depth_t depth,
paddr_t addr) {
paddr_t addr,
laddr_t begin,
laddr_t end) {
assert(depth == 1);
return LBABtree::get_leaf_node(c, addr);
return LBABtree::get_leaf_node(c, addr, begin, end);
}

template <>
LBABtree::base_iertr::future<LBAInternalNodeRef> get_node<LBAInternalNode>(
op_context_t c,
depth_t depth,
paddr_t addr) {
return LBABtree::get_internal_node(c, depth, addr);
paddr_t addr,
laddr_t begin,
laddr_t end) {
return LBABtree::get_internal_node(c, depth, addr, begin, end);
}

template <typename NodeType>
Expand All @@ -746,12 +765,19 @@ LBABtree::handle_merge_ret merge_level(
assert(iter.get_offset() < parent_pos.node->get_size());
bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);

auto next_iter = donor_iter + 1;
auto begin = donor_iter->get_key();
auto end = next_iter == parent_pos.node->end()
? parent_pos.node->get_node_meta().end
: next_iter->get_key();

DEBUGT("parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
return get_node<NodeType>(
c,
depth,
donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr())
donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
begin,
end
).si_then([c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
typename NodeType::Ref donor) {
LOG_PREFIX(LBABtree::merge_level);
Expand Down
38 changes: 31 additions & 7 deletions src/crimson/os/seastore/lba_manager/btree/lba_btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,17 @@ class LBABtree {
static get_internal_node_ret get_internal_node(
op_context_t c,
depth_t depth,
paddr_t offset);
paddr_t offset,
laddr_t begin,
laddr_t end);

using get_leaf_node_iertr = base_iertr;
using get_leaf_node_ret = get_leaf_node_iertr::future<LBALeafNodeRef>;
static get_leaf_node_ret get_leaf_node(
op_context_t c,
paddr_t offset);
paddr_t offset,
laddr_t begin,
laddr_t end);

using lookup_root_iertr = base_iertr;
using lookup_root_ret = lookup_root_iertr::future<>;
Expand All @@ -439,7 +443,9 @@ class LBABtree {
return get_internal_node(
c,
root.get_depth(),
root.get_location()
root.get_location(),
0,
L_ADDR_MAX
).si_then([this, visitor, &iter](LBAInternalNodeRef root_node) {
iter.get_internal(root.get_depth()).node = root_node;
if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
Expand All @@ -448,7 +454,9 @@ class LBABtree {
} else {
return get_leaf_node(
c,
root.get_location()
root.get_location(),
0,
L_ADDR_MAX
).si_then([visitor, &iter](LBALeafNodeRef root_node) {
iter.leaf.node = root_node;
if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
Expand All @@ -471,10 +479,17 @@ class LBABtree {
auto &parent_entry = iter.get_internal(depth + 1);
auto parent = parent_entry.node;
auto node_iter = parent->iter_idx(parent_entry.pos);
auto next_iter = node_iter + 1;
auto begin = node_iter->get_key();
auto end = next_iter == parent->end()
? parent->get_node_meta().end
: next_iter->get_key();
return get_internal_node(
c,
depth,
node_iter->get_val().maybe_relative_to(parent->get_paddr())
node_iter->get_val().maybe_relative_to(parent->get_paddr()),
begin,
end
).si_then([depth, visitor, &iter, &f](LBAInternalNodeRef node) {
auto &entry = iter.get_internal(depth);
entry.node = node;
Expand All @@ -499,10 +514,17 @@ class LBABtree {
auto parent = parent_entry.node;
assert(parent);
auto node_iter = parent->iter_idx(parent_entry.pos);
auto next_iter = node_iter + 1;
auto begin = node_iter->get_key();
auto end = next_iter == parent->end()
? parent->get_node_meta().end
: next_iter->get_key();

return get_leaf_node(
c,
node_iter->get_val().maybe_relative_to(parent->get_paddr())
node_iter->get_val().maybe_relative_to(parent->get_paddr()),
begin,
end
).si_then([visitor, &iter, &f](LBALeafNodeRef node) {
iter.leaf.node = node;
auto node_iter = f(*node);
Expand Down Expand Up @@ -675,7 +697,9 @@ class LBABtree {
friend base_iertr::future<typename NodeType::Ref> get_node(
op_context_t c,
depth_t depth,
paddr_t addr);
paddr_t addr,
laddr_t begin,
laddr_t end);

template <typename NodeType>
friend handle_merge_ret merge_level(
Expand Down
11 changes: 5 additions & 6 deletions src/crimson/os/seastore/transaction_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -444,12 +444,11 @@ TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_liv
len,
[this, pin=std::move(pin)](CachedExtent &extent) mutable {
auto lref = extent.cast<LogicalCachedExtent>();
if (!lref->has_pin()) {
assert(!(pin->has_been_invalidated() ||
lref->has_been_invalidated()));
lref->set_pin(std::move(pin));
lba_manager->add_pin(lref->get_pin());
}
assert(!lref->has_pin());
assert(!lref->has_been_invalidated());
assert(!pin->has_been_invalidated());
lref->set_pin(std::move(pin));
lba_manager->add_pin(lref->get_pin());
});
} else {
return inner_ret(
Expand Down
11 changes: 5 additions & 6 deletions src/crimson/os/seastore/transaction_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,11 @@ class TransactionManager : public SegmentCleaner::ExtentCallbackInterface {
pref.get_paddr(),
pref.get_length(),
[this, pin=std::move(pin)](T &extent) mutable {
if (!extent.has_pin()) {
assert(!(extent.has_been_invalidated() ||
pin->has_been_invalidated()));
extent.set_pin(std::move(pin));
lba_manager->add_pin(extent.get_pin());
}
assert(!extent.has_pin());
assert(!extent.has_been_invalidated());
assert(!pin->has_been_invalidated());
extent.set_pin(std::move(pin));
lba_manager->add_pin(extent.get_pin());
}
).si_then([FNAME, &t](auto ref) mutable -> ret {
DEBUGT("got extent {}", t, *ref);
Expand Down

0 comments on commit 0200b02

Please sign in to comment.