Skip to content

Commit f4a1747

Browse files
committed
Further optimization for iterators and bulk-input sorting.
1 parent a719639 commit f4a1747

File tree

4 files changed

+199
-72
lines changed

4 files changed

+199
-72
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ PSI_add_compile_options( Release ${PSI_compiler_LTO} ${PSI_compiler_optimize_for
5555
if ( WIN32 )
5656
add_compile_definitions( WIN32_LEAN_AND_MEAN NOMINMAX NOCOMM )
5757
endif()
58+
add_compile_definitions( BOOST_ALL_NO_LIB )
5859

5960
if ( ${CMAKE_SYSTEM_NAME} MATCHES "Linux" )
6061
PSI_add_link_options( Release -flto ) # lld does not seem to be enough

include/psi/vm/containers/b+tree.hpp

+180-44
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,18 @@
55
#include <psi/vm/align.hpp>
66
#include <psi/vm/allocation.hpp>
77
#include <psi/vm/containers/abi.hpp>
8+
#include <psi/vm/containers/crt_vector.hpp>
89

910
#include <psi/build/disable_warnings.hpp>
1011

1112
#include <boost/assert.hpp>
1213
#include <boost/config_ex.hpp>
1314
#include <boost/integer.hpp>
15+
#if __has_include( <boost/sort/pdqsort/pdqsort.hpp> )
16+
#include <boost/sort/pdqsort/pdqsort.hpp>
17+
#else
1418
#include <boost/move/algo/detail/pdqsort.hpp>
19+
#endif
1520
#include <boost/stl_interfaces/iterator_interface.hpp>
1621
#if 0 // reexamining...
1722
#include <boost/stl_interfaces/sequence_container_interface.hpp>
@@ -46,8 +51,8 @@ concept InsertableType = ( transparent_comparator && std::is_convertible_v<K, St
4651
// user specializations are allowed and intended:
4752

4853
template <typename T> constexpr bool is_simple_comparator{ false };
49-
template <typename T> constexpr bool is_simple_comparator<std::less <T>>{ true };
50-
template <typename T> constexpr bool is_simple_comparator<std::greater<T>>{ true };
54+
template <typename T> constexpr bool is_simple_comparator<std::less <T>>{ std::is_fundamental_v<T> };
55+
template <typename T> constexpr bool is_simple_comparator<std::greater<T>>{ std::is_fundamental_v<T> };
5156

5257
template <typename T> constexpr bool is_statically_sized { true };
5358
template <typename T> requires requires{ T{}.size(); } constexpr bool is_statically_sized<T>{ T{}.size() != 0 };
@@ -385,6 +390,7 @@ class [[ clang::trivial_abi ]] bptree_base::base_iterator
385390
{
386391
public:
387392
constexpr base_iterator() noexcept = default;
393+
constexpr base_iterator( base_iterator const & ) noexcept = default;
388394

389395
base_iterator & operator++() noexcept { return ( *this = incremented<true>() ); }
390396
base_iterator & operator--() noexcept;
@@ -393,16 +399,31 @@ class [[ clang::trivial_abi ]] bptree_base::base_iterator
393399

394400
base_iterator & operator+=( difference_type n ) noexcept;
395401

402+
constexpr base_iterator & operator=( base_iterator const & other ) noexcept
403+
{
404+
# if defined( NDEBUG ) && __has_builtin( __builtin_constant_p )
405+
// try to skip the redundant assignment of the nodes pointer yet at the
406+
// same time support default constructed iterators - so it cannot be
407+
// skipped unconditionally
408+
if ( __builtin_constant_p( this->nodes_ ) && this->nodes_ )
409+
{ BOOST_ASSUME( this->nodes_ == other.nodes_ ); }
410+
else
411+
# endif
412+
this->nodes_ = other.nodes_;
413+
this->pos_ = other.pos_ ;
414+
return *this;
415+
}
416+
396417
public: // extensions
397418
iter_pos const & pos() const noexcept { return pos_; }
398419

399420
protected: friend class bptree_base;
400421
using nodes_t =
401-
#ifndef NDEBUG // for bounds checking
422+
# ifndef NDEBUG // for bounds checking
402423
std::span<node_placeholder>;
403-
#else
424+
# else
404425
node_placeholder * __restrict;
405-
#endif
426+
# endif
406427

407428
base_iterator( node_pool &, iter_pos ) noexcept;
408429

@@ -430,6 +451,7 @@ private: template <typename T, typename Comparator> friend class bp_tree_impl;
430451
void update_pool_ptr( node_pool & ) const noexcept;
431452
}; // class base_iterator
432453

454+
433455
////////////////////////////////////////////////////////////////////////////////
434456
// \class bptree_base::base_random_access_iterator
435457
////////////////////////////////////////////////////////////////////////////////
@@ -449,9 +471,19 @@ class bptree_base::base_random_access_iterator : public base_iterator
449471
# endif
450472
return static_cast<difference_type>( this->index_ - other.index_ );
451473
}
452-
[[ using gnu: sysv_abi, hot, pure ]]
453-
base_random_access_iterator operator+ ( difference_type n ) const noexcept;
454-
base_random_access_iterator & operator+=( difference_type n ) noexcept { return (*this = *this + n); }
474+
base_random_access_iterator & operator+=( difference_type const n ) noexcept { return (*this = *this + n); }
475+
base_random_access_iterator operator+ ( difference_type const n ) const noexcept
476+
{
477+
# if __has_builtin( __builtin_constant_p )
478+
if ( __builtin_constant_p( n ) ) // has to be in the header (even w/ LTO)
479+
{
480+
if ( n == +1 ) return ++auto(*this);
481+
else if ( n == -1 ) return --auto(*this);
482+
else if ( n == 0 ) return (*this);
483+
}
484+
# endif
485+
return at_offset( n );
486+
}
455487

456488
// same reason for 'precise_end_handling=true' as in operator+
457489
base_random_access_iterator & operator++( ) noexcept { static_cast<base_iterator &>( *this ) = base_iterator::incremented<true>(); ++index_; return *this; }
@@ -482,6 +514,9 @@ class bptree_base::base_random_access_iterator : public base_iterator
482514
: base_iterator{ base }, index_{ start_index } {}
483515

484516
size_type index_;
517+
private:
518+
[[ using gnu: sysv_abi, hot, pure ]]
519+
base_random_access_iterator at_offset( difference_type n ) const noexcept;
485520
}; // class base_random_access_iterator
486521

487522

@@ -513,8 +548,8 @@ class bptree_base_wkey : public bptree_base
513548
using key_rv_arg = std::conditional_t<can_be_passed_in_reg<Key>, Key const, pass_rv_in_reg<Key>>;
514549
using key_const_arg = std::conditional_t<can_be_passed_in_reg<Key>, Key const, pass_in_reg <Key>>;
515550

516-
class fwd_iterator;
517-
class ra_iterator;
551+
class [[ clang::trivial_abi ]] fwd_iterator;
552+
class [[ clang::trivial_abi ]] ra_iterator;
518553

519554
using iterator = fwd_iterator;
520555
using const_iterator = std::basic_const_iterator<iterator>;
@@ -838,6 +873,10 @@ class bptree_base_wkey : public bptree_base
838873
node_slot inner;
839874
}; // struct key_locations
840875

876+
// internal deque-like simpler/faster random access iterator for/over full
877+
// nodes (used for sorting input data in bulk insert operations)
878+
class [[ clang::trivial_abi ]] ra_full_node_iterator;
879+
841880
[[ gnu::pure, nodiscard ]] const_iterator make_iter( auto const &... args ) const noexcept { return static_cast<iterator &&>( const_cast<bptree_base_wkey &>( *this ).bptree_base::make_iter( args... ) ); }
842881
[[ gnu::pure, nodiscard ]] const_iterator make_iter( key_locations const loc ) const noexcept { return make_iter( loc.leaf, loc.leaf_offset.pos ); }
843882

@@ -1058,19 +1097,33 @@ class bptree_base_wkey : public bptree_base
10581097
node_slot begin;
10591098
iter_pos end;
10601099
size_type size;
1061-
};
1100+
// save a linearized array of (full) nodes in order to be able to use
1101+
// "really random access iterators" (similar to std::deque iterators)
1102+
// for subsequent sorting
1103+
crt_vector<leaf_node *, std::uint32_t> nodes;
1104+
}; // struct bulk_copied_input
10621105
template <typename I, typename S, std::ranges::subrange_kind kind>
10631106
bulk_copied_input
10641107
bulk_insert_prepare( std::ranges::subrange<I, S, kind> keys )
10651108
{
1066-
if ( keys.empty() ) [[ unlikely ]]
1067-
return bulk_copied_input{};
1068-
10691109
auto constexpr can_preallocate{ kind == std::ranges::subrange_kind::sized };
1070-
if constexpr ( can_preallocate )
1071-
reserve_additional( static_cast<size_type>( keys.size() ) );
1072-
else
1110+
size_type input_size;
1111+
crt_vector<leaf_node *, std::uint32_t> nodes;
1112+
typename crt_vector<leaf_node *, std::uint32_t>::iterator p_node;
1113+
if constexpr ( can_preallocate ) {
1114+
input_size = static_cast<size_type>( keys.size() );
1115+
if ( !input_size ) [[ unlikely ]] // minor optimization for 'complex' ranges (like complex/compound views which have size methods but which are non trivial) - reuse size info for empty check
1116+
return bulk_copied_input{};
1117+
auto const required_nodes{ node_count_required_for_values( input_size ) };
1118+
nodes.grow_to( required_nodes, default_init );
1119+
p_node = nodes.begin();
1120+
bptree_base::reserve_additional( required_nodes );
1121+
} else {
1122+
if ( keys.empty() ) [[ unlikely ]]
1123+
return bulk_copied_input{};
1124+
input_size = 0;
10731125
reserve_additional( 42 );
1126+
}
10741127
// w/o preallocation a saved hdr reference could get invalidated
10751128
auto const begin { can_preallocate ? hdr().free_list_ : slot_of( new_node<leaf_node>() ) };
10761129
auto leaf_slot{ begin };
@@ -1080,42 +1133,55 @@ class bptree_base_wkey : public bptree_base
10801133
{
10811134
auto & leaf{ this->leaf( leaf_slot ) };
10821135
BOOST_ASSUME( leaf.num_vals == 0 );
1136+
// fill this leaf
10831137
if constexpr ( can_preallocate ) {
1084-
auto const size_to_copy{ static_cast<node_size_type>( std::min<std::size_t>( leaf.max_values, static_cast<std::size_t>( keys.end() - p_keys ) ) ) };
1138+
auto const size_to_copy{ static_cast<node_size_type>( std::min<size_type>( leaf.max_values, input_size - count ) ) };
10851139
BOOST_ASSUME( size_to_copy );
10861140
std::copy_n( p_keys, size_to_copy, leaf.keys );
10871141
leaf.num_vals = size_to_copy;
10881142
count += size_to_copy;
10891143
p_keys += size_to_copy;
1144+
*p_node++ = &leaf;
10901145
} else {
1146+
BOOST_ASSUME( !input_size );
10911147
while ( ( p_keys != keys.end() ) && ( leaf.num_vals < leaf.max_values ) ) {
10921148
leaf.keys[ leaf.num_vals++ ] = *p_keys++;
10931149
}
10941150
count += leaf.num_vals;
1151+
// ugh - cannot save pointer right away as they may get
1152+
// invalidated by calls to new_node
1153+
nodes.push_back( reinterpret_cast<leaf_node * const &>( leaf_slot ) );
10951154
}
1155+
10961156
BOOST_ASSUME( leaf.num_vals > 0 );
10971157
--this->hdr().free_node_count_;
1098-
if ( p_keys != keys.end() )
1099-
{
1100-
if constexpr ( can_preallocate ) {
1158+
1159+
// move to the next one or cleanup if we are at the end and return
1160+
if constexpr ( can_preallocate ) {
1161+
if ( count != input_size ) {
11011162
leaf_slot = leaf.right;
1163+
continue;
11021164
} else {
1165+
this->hdr().free_list_ = leaf.right;
1166+
unlink_right( leaf );
1167+
BOOST_ASSERT( p_keys == keys.end() );
1168+
BOOST_ASSUME( count == input_size );
1169+
count = input_size; // help the compiler eliminate the accumulation code above
1170+
}
1171+
} else {
1172+
BOOST_ASSUME( !input_size );
1173+
if ( p_keys != keys.end() ) {
11031174
auto & new_leaf{ new_node<leaf_node>() };
11041175
link( leaf, new_leaf );
11051176
leaf_slot = slot_of( new_leaf );
1177+
continue;
11061178
}
1107-
BOOST_ASSUME( !!leaf_slot );
1108-
}
1109-
else
1110-
{
1111-
if constexpr ( can_preallocate ) {
1112-
this->hdr().free_list_ = leaf.right;
1113-
unlink_right( leaf );
1114-
BOOST_ASSERT( count == static_cast<size_type>( keys.size() ) );
1115-
count = static_cast<size_type>( keys.size() ); // help the compiler eliminate the accumulation code above
1179+
#pragma clang loop unroll( disable )
1180+
for ( auto & leaf_ptr : nodes ) {
1181+
leaf_ptr = &this->leaf( reinterpret_cast<node_slot const &>( leaf_ptr ) );
11161182
}
1117-
return bulk_copied_input{ begin, { leaf_slot, leaf.num_vals }, count };
11181183
}
1184+
return bulk_copied_input{ begin, { leaf_slot, leaf.num_vals }, count, std::move( nodes ) };
11191185
}
11201186
std::unreachable();
11211187
}
@@ -1464,7 +1530,7 @@ class bptree_base_wkey : public bptree_base
14641530
////////////////////////////////////////////////////////////////////////////////
14651531

14661532
template <typename Key>
1467-
class bptree_base_wkey<Key>::fwd_iterator
1533+
class [[ clang::trivial_abi ]] bptree_base_wkey<Key>::fwd_iterator
14681534
:
14691535
public base_iterator,
14701536
public iter_impl<fwd_iterator, std::bidirectional_iterator_tag>
@@ -1508,7 +1574,7 @@ class bptree_base_wkey<Key>::fwd_iterator
15081574
////////////////////////////////////////////////////////////////////////////////
15091575

15101576
template <typename Key>
1511-
class bptree_base_wkey<Key>::ra_iterator
1577+
class [[ clang::trivial_abi ]] bptree_base_wkey<Key>::ra_iterator
15121578
:
15131579
public base_random_access_iterator,
15141580
public iter_impl<ra_iterator, std::random_access_iterator_tag>
@@ -1553,6 +1619,61 @@ private: friend class bptree_base_wkey<Key>;
15531619
operator fwd_iterator() const noexcept { return static_cast<fwd_iterator const &>( static_cast<base_iterator const &>( *this ) ); }
15541620
}; // class ra_iterator
15551621

1622+
1623+
template <typename Key>
1624+
class [[ clang::trivial_abi ]] bptree_base_wkey<Key>::ra_full_node_iterator
1625+
:
1626+
public iter_impl<ra_full_node_iterator, std::random_access_iterator_tag>
1627+
{
1628+
public:
1629+
// Have to provide default construction in order to model
1630+
// std::random_access_iterator (yet at the same time do not want to in order
1631+
// to be able to omit the check in the assignment operator as is required
1632+
// for base_iterator.
1633+
constexpr ra_full_node_iterator() noexcept { std::unreachable(); }
1634+
constexpr ra_full_node_iterator( leaf_node * leaves[], size_type const value_index ) noexcept : pp_leaf_{ leaves }, value_index_{ value_index } {};
1635+
ra_full_node_iterator( ra_full_node_iterator const & ) = default;
1636+
1637+
Key & operator*() const noexcept
1638+
{
1639+
auto const node_index { static_cast<std::uint32_t >( value_index_ / leaf_node::max_values ) };
1640+
auto const node_offset{ static_cast<node_size_type>( value_index_ % leaf_node::max_values ) };
1641+
auto & leaf{ *pp_leaf_[ node_index ] };
1642+
BOOST_ASSUME( node_offset < leaf.num_vals );
1643+
return leaf.keys[ node_offset ];
1644+
}
1645+
1646+
PSI_WARNING_DISABLE_PUSH()
1647+
PSI_WARNING_GCC_OR_CLANG_DISABLE( -Wsign-conversion )
1648+
ra_full_node_iterator operator+ ( difference_type const n ) const noexcept { return { pp_leaf_, value_index_ + n }; }
1649+
ra_full_node_iterator & operator+=( difference_type const n ) noexcept { value_index_ += n; return *this; }
1650+
ra_full_node_iterator operator- ( difference_type const n ) const noexcept { return { pp_leaf_, value_index_ - n }; }
1651+
ra_full_node_iterator & operator-=( difference_type const n ) noexcept { value_index_ -= n; return *this; }
1652+
PSI_WARNING_DISABLE_POP()
1653+
1654+
[[ gnu::pure ]]
1655+
friend constexpr auto operator<=>( ra_full_node_iterator const & left, ra_full_node_iterator const & right ) noexcept
1656+
{
1657+
BOOST_ASSUME( left.pp_leaf_ == right.pp_leaf_ );
1658+
return left.value_index_ <=> right.value_index_;
1659+
}
1660+
difference_type operator-( ra_full_node_iterator const & other ) const noexcept
1661+
{
1662+
BOOST_ASSUME( this->pp_leaf_ == other.pp_leaf_ );
1663+
return static_cast<difference_type>( this->value_index_ - other.value_index_ );
1664+
}
1665+
ra_full_node_iterator & operator=( ra_full_node_iterator const & other ) noexcept
1666+
{
1667+
BOOST_ASSUME( this->pp_leaf_ == other.pp_leaf_ );
1668+
this->value_index_ = other.value_index_;
1669+
return *this;
1670+
}
1671+
1672+
private:
1673+
leaf_node * * pp_leaf_{};
1674+
size_type value_index_{};
1675+
}; // class ra_full_node_iterator
1676+
15561677
template <typename Key>
15571678
typename
15581679
bptree_base_wkey<Key>::const_iterator
@@ -2374,7 +2495,7 @@ namespace detail
23742495

23752496
template <typename Key, typename Comparator>
23762497
bp_tree_impl<Key, Comparator>::size_type
2377-
bp_tree_impl<Key, Comparator>::insert( typename base::bulk_copied_input const input, bool const unique )
2498+
bp_tree_impl<Key, Comparator>::insert( typename base::bulk_copied_input input, bool const unique )
23782499
{
23792500
// https://www.sciencedirect.com/science/article/abs/pii/S0020025502002025 On batch-constructing B+-trees: algorithm and its performance
23802501
// https://www.vldb.org/conf/2001/P461.pdf An Evaluation of Generic Bulk Loading Techniques
@@ -2383,18 +2504,33 @@ bp_tree_impl<Key, Comparator>::insert( typename base::bulk_copied_input const in
23832504
if ( input.size == 0 )
23842505
return 0;
23852506

2386-
auto const [begin_leaf, end_pos, total_size]{ input };
2507+
auto const begin_leaf{ input.begin };
2508+
auto const end_pos { input.end };
2509+
auto const total_size{ input.size };
2510+
{
2511+
// use specialized/optimized iterators (that can assume all nodes are
2512+
// full)
2513+
typename base::ra_full_node_iterator const sort_begin{ input.nodes.data(), 0 };
2514+
typename base::ra_full_node_iterator const sort_end { input.nodes.data(), total_size };
2515+
// Standard sort ABIs/impls pass the comparator around by-value: wrkrnd for
2516+
// big or non-trivial comparators.
2517+
using comp_by_val_helper = std::conditional_t<can_be_passed_in_reg<Comparator>, Comparator, detail::comp_ref<Comparator>>;
2518+
if constexpr ( requires{ comp().sort( sort_begin, sort_end ); } ) // does the comparator offer a specialized sort function?
2519+
comp().sort( sort_begin, sort_end );
2520+
else
2521+
# if __has_include( <boost/sort/pdqsort/pdqsort.hpp> )
2522+
if constexpr ( requires{ Comparator::is_branchless; requires( Comparator::is_branchless ); } ) // is it branchless
2523+
boost::sort::pdqsort_branchless( sort_begin, sort_end, comp_by_val_helper{ comp() } );
2524+
else
2525+
boost::sort::pdqsort( sort_begin, sort_end, comp_by_val_helper{ comp() } );
2526+
# else
2527+
boost::movelib::pdqsort( sort_begin, sort_end, comp_by_val_helper{ comp() } );
2528+
# endif
2529+
input.nodes.clear();
2530+
}
2531+
23872532
ra_iterator const p_new_nodes_begin{ *this, { begin_leaf, 0 }, 0 };
23882533
ra_iterator const p_new_nodes_end { *this, end_pos , total_size };
2389-
// Standard sort ABIs/impls pass the comparator around by-value: wrkrnd for
2390-
// big or non-trivial comparators.
2391-
using comp_by_val_helper = std::conditional_t<can_be_passed_in_reg<Comparator>, Comparator, detail::comp_ref<Comparator>>;
2392-
#if 0 // slower
2393-
std::sort( p_new_nodes_begin, p_new_nodes_end, comp_by_val_helper{ comp() } );
2394-
#else
2395-
boost::movelib::pdqsort( p_new_nodes_begin, p_new_nodes_end, comp_by_val_helper{ comp() } );
2396-
#endif
2397-
23982534
if ( empty() )
23992535
{
24002536
base::bulk_insert_into_empty( begin_leaf, end_pos, total_size );

0 commit comments

Comments
 (0)