5
5
#include < psi/vm/align.hpp>
6
6
#include < psi/vm/allocation.hpp>
7
7
#include < psi/vm/containers/abi.hpp>
8
+ #include < psi/vm/containers/crt_vector.hpp>
8
9
9
10
#include < psi/build/disable_warnings.hpp>
10
11
11
12
#include < boost/assert.hpp>
12
13
#include < boost/config_ex.hpp>
13
14
#include < boost/integer.hpp>
15
+ #if __has_include( <boost/sort/pdqsort/pdqsort.hpp> )
16
+ #include < boost/sort/pdqsort/pdqsort.hpp>
17
+ #else
14
18
#include < boost/move/algo/detail/pdqsort.hpp>
19
+ #endif
15
20
#include < boost/stl_interfaces/iterator_interface.hpp>
16
21
#if 0 // reexamining...
17
22
#include <boost/stl_interfaces/sequence_container_interface.hpp>
@@ -46,8 +51,8 @@ concept InsertableType = ( transparent_comparator && std::is_convertible_v<K, St
46
51
// user specializations are allowed and intended:
47
52
48
53
template <typename T> constexpr bool is_simple_comparator{ false };
49
- template <typename T> constexpr bool is_simple_comparator<std::less <T>>{ true };
50
- template <typename T> constexpr bool is_simple_comparator<std::greater<T>>{ true };
54
+ template <typename T> constexpr bool is_simple_comparator<std::less <T>>{ std::is_fundamental_v<T> };
55
+ template <typename T> constexpr bool is_simple_comparator<std::greater<T>>{ std::is_fundamental_v<T> };
51
56
52
57
template <typename T> constexpr bool is_statically_sized { true };
53
58
template <typename T> requires requires{ T{}.size (); } constexpr bool is_statically_sized<T>{ T{}.size () != 0 };
@@ -385,6 +390,7 @@ class [[ clang::trivial_abi ]] bptree_base::base_iterator
385
390
{
386
391
public:
387
392
constexpr base_iterator () noexcept = default ;
393
+ constexpr base_iterator ( base_iterator const & ) noexcept = default ;
388
394
389
395
base_iterator & operator ++() noexcept { return ( *this = incremented<true >() ); }
390
396
base_iterator & operator --() noexcept ;
@@ -393,16 +399,31 @@ class [[ clang::trivial_abi ]] bptree_base::base_iterator
393
399
394
400
base_iterator & operator +=( difference_type n ) noexcept ;
395
401
402
+ constexpr base_iterator & operator =( base_iterator const & other ) noexcept
403
+ {
404
+ # if defined( NDEBUG ) && __has_builtin( __builtin_constant_p )
405
+ // try to skip the redundant assignment of the nodes pointer yet at the
406
+ // same time support default constructed iterators - so it cannot be
407
+ // skipped unconditionally
408
+ if ( __builtin_constant_p ( this ->nodes_ ) && this ->nodes_ )
409
+ { BOOST_ASSUME ( this ->nodes_ == other.nodes_ ); }
410
+ else
411
+ # endif
412
+ this ->nodes_ = other.nodes_ ;
413
+ this ->pos_ = other.pos_ ;
414
+ return *this ;
415
+ }
416
+
396
417
public: // extensions
397
418
iter_pos const & pos () const noexcept { return pos_; }
398
419
399
420
protected: friend class bptree_base ;
400
421
using nodes_t =
401
- #ifndef NDEBUG // for bounds checking
422
+ # ifndef NDEBUG // for bounds checking
402
423
std::span<node_placeholder>;
403
- #else
424
+ # else
404
425
node_placeholder * __restrict;
405
- #endif
426
+ # endif
406
427
407
428
base_iterator ( node_pool &, iter_pos ) noexcept ;
408
429
@@ -430,6 +451,7 @@ private: template <typename T, typename Comparator> friend class bp_tree_impl;
430
451
void update_pool_ptr ( node_pool & ) const noexcept ;
431
452
}; // class base_iterator
432
453
454
+
433
455
// //////////////////////////////////////////////////////////////////////////////
434
456
// \class bptree_base::base_random_access_iterator
435
457
// //////////////////////////////////////////////////////////////////////////////
@@ -449,9 +471,19 @@ class bptree_base::base_random_access_iterator : public base_iterator
449
471
# endif
450
472
return static_cast <difference_type>( this ->index_ - other.index_ );
451
473
}
452
- [[ using gnu: sysv_abi, hot, pure ]]
453
- base_random_access_iterator operator + ( difference_type n ) const noexcept ;
454
- base_random_access_iterator & operator +=( difference_type n ) noexcept { return (*this = *this + n); }
474
+ base_random_access_iterator & operator +=( difference_type const n ) noexcept { return (*this = *this + n); }
475
+ base_random_access_iterator operator + ( difference_type const n ) const noexcept
476
+ {
477
+ # if __has_builtin( __builtin_constant_p )
478
+ if ( __builtin_constant_p ( n ) ) // has to be in the header (even w/ LTO)
479
+ {
480
+ if ( n == +1 ) return ++auto (*this );
481
+ else if ( n == -1 ) return --auto (*this );
482
+ else if ( n == 0 ) return (*this );
483
+ }
484
+ # endif
485
+ return at_offset ( n );
486
+ }
455
487
456
488
// same reason for 'precise_end_handling=true' as in operator+
457
489
base_random_access_iterator & operator ++( ) noexcept { static_cast <base_iterator &>( *this ) = base_iterator::incremented<true >(); ++index_; return *this ; }
@@ -482,6 +514,9 @@ class bptree_base::base_random_access_iterator : public base_iterator
482
514
: base_iterator{ base }, index_{ start_index } {}
483
515
484
516
size_type index_;
517
+ private:
518
+ [[ using gnu: sysv_abi, hot, pure ]]
519
+ base_random_access_iterator at_offset ( difference_type n ) const noexcept ;
485
520
}; // class base_random_access_iterator
486
521
487
522
@@ -513,8 +548,8 @@ class bptree_base_wkey : public bptree_base
513
548
using key_rv_arg = std::conditional_t <can_be_passed_in_reg<Key>, Key const , pass_rv_in_reg<Key>>;
514
549
using key_const_arg = std::conditional_t <can_be_passed_in_reg<Key>, Key const , pass_in_reg <Key>>;
515
550
516
- class fwd_iterator ;
517
- class ra_iterator ;
551
+ class [[ clang::trivial_abi ]] fwd_iterator;
552
+ class [[ clang::trivial_abi ]] ra_iterator;
518
553
519
554
using iterator = fwd_iterator;
520
555
using const_iterator = std::basic_const_iterator<iterator>;
@@ -838,6 +873,10 @@ class bptree_base_wkey : public bptree_base
838
873
node_slot inner;
839
874
}; // struct key_locations
840
875
876
+ // internal deque-like simpler/faster random access iterator for/over full
877
+ // nodes (used for sorting input data in bulk insert operations)
878
+ class [[ clang::trivial_abi ]] ra_full_node_iterator;
879
+
841
880
[[ gnu::pure, nodiscard ]] const_iterator make_iter ( auto const &... args ) const noexcept { return static_cast <iterator &&>( const_cast <bptree_base_wkey &>( *this ).bptree_base ::make_iter ( args... ) ); }
842
881
[[ gnu::pure, nodiscard ]] const_iterator make_iter ( key_locations const loc ) const noexcept { return make_iter ( loc.leaf , loc.leaf_offset .pos ); }
843
882
@@ -1058,19 +1097,33 @@ class bptree_base_wkey : public bptree_base
1058
1097
node_slot begin;
1059
1098
iter_pos end;
1060
1099
size_type size;
1061
- };
1100
+ // save a linearized array of (full) nodes in order to be able to use
1101
+ // "really random access iterators" (similar to std::deque iterators)
1102
+ // for subsequent sorting
1103
+ crt_vector<leaf_node *, std::uint32_t > nodes;
1104
+ }; // struct bulk_copied_input
1062
1105
template <typename I, typename S, std::ranges::subrange_kind kind>
1063
1106
bulk_copied_input
1064
1107
bulk_insert_prepare ( std::ranges::subrange<I, S, kind> keys )
1065
1108
{
1066
- if ( keys.empty () ) [[ unlikely ]]
1067
- return bulk_copied_input{};
1068
-
1069
1109
auto constexpr can_preallocate{ kind == std::ranges::subrange_kind::sized };
1070
- if constexpr ( can_preallocate )
1071
- reserve_additional ( static_cast <size_type>( keys.size () ) );
1072
- else
1110
+ size_type input_size;
1111
+ crt_vector<leaf_node *, std::uint32_t > nodes;
1112
+ typename crt_vector<leaf_node *, std::uint32_t >::iterator p_node;
1113
+ if constexpr ( can_preallocate ) {
1114
+ input_size = static_cast <size_type>( keys.size () );
1115
+ if ( !input_size ) [[ unlikely ]] // minor optimization for 'complex' ranges (like complex/compound views which have size methods but which are non trivial) - reuse size info for empty check
1116
+ return bulk_copied_input{};
1117
+ auto const required_nodes{ node_count_required_for_values ( input_size ) };
1118
+ nodes.grow_to ( required_nodes, default_init );
1119
+ p_node = nodes.begin ();
1120
+ bptree_base::reserve_additional ( required_nodes );
1121
+ } else {
1122
+ if ( keys.empty () ) [[ unlikely ]]
1123
+ return bulk_copied_input{};
1124
+ input_size = 0 ;
1073
1125
reserve_additional ( 42 );
1126
+ }
1074
1127
// w/o preallocation a saved hdr reference could get invalidated
1075
1128
auto const begin { can_preallocate ? hdr ().free_list_ : slot_of ( new_node<leaf_node>() ) };
1076
1129
auto leaf_slot{ begin };
@@ -1080,42 +1133,55 @@ class bptree_base_wkey : public bptree_base
1080
1133
{
1081
1134
auto & leaf{ this ->leaf ( leaf_slot ) };
1082
1135
BOOST_ASSUME ( leaf.num_vals == 0 );
1136
+ // fill this leaf
1083
1137
if constexpr ( can_preallocate ) {
1084
- auto const size_to_copy{ static_cast <node_size_type>( std::min<std:: size_t >( leaf.max_values , static_cast <std:: size_t >( keys. end () - p_keys ) ) ) };
1138
+ auto const size_to_copy{ static_cast <node_size_type>( std::min<size_type >( leaf.max_values , input_size - count ) ) };
1085
1139
BOOST_ASSUME ( size_to_copy );
1086
1140
std::copy_n ( p_keys, size_to_copy, leaf.keys );
1087
1141
leaf.num_vals = size_to_copy;
1088
1142
count += size_to_copy;
1089
1143
p_keys += size_to_copy;
1144
+ *p_node++ = &leaf;
1090
1145
} else {
1146
+ BOOST_ASSUME ( !input_size );
1091
1147
while ( ( p_keys != keys.end () ) && ( leaf.num_vals < leaf.max_values ) ) {
1092
1148
leaf.keys [ leaf.num_vals ++ ] = *p_keys++;
1093
1149
}
1094
1150
count += leaf.num_vals ;
1151
+ // ugh - cannot save pointer right away as they may get
1152
+ // invalidated by calls to new_node
1153
+ nodes.push_back ( reinterpret_cast <leaf_node * const &>( leaf_slot ) );
1095
1154
}
1155
+
1096
1156
BOOST_ASSUME ( leaf.num_vals > 0 );
1097
1157
--this ->hdr ().free_node_count_ ;
1098
- if ( p_keys != keys.end () )
1099
- {
1100
- if constexpr ( can_preallocate ) {
1158
+
1159
+ // move to the next one or cleanup if we are at the end and return
1160
+ if constexpr ( can_preallocate ) {
1161
+ if ( count != input_size ) {
1101
1162
leaf_slot = leaf.right ;
1163
+ continue ;
1102
1164
} else {
1165
+ this ->hdr ().free_list_ = leaf.right ;
1166
+ unlink_right ( leaf );
1167
+ BOOST_ASSERT ( p_keys == keys.end () );
1168
+ BOOST_ASSUME ( count == input_size );
1169
+ count = input_size; // help the compiler eliminate the accumulation code above
1170
+ }
1171
+ } else {
1172
+ BOOST_ASSUME ( !input_size );
1173
+ if ( p_keys != keys.end () ) {
1103
1174
auto & new_leaf{ new_node<leaf_node>() };
1104
1175
link ( leaf, new_leaf );
1105
1176
leaf_slot = slot_of ( new_leaf );
1177
+ continue ;
1106
1178
}
1107
- BOOST_ASSUME ( !!leaf_slot );
1108
- }
1109
- else
1110
- {
1111
- if constexpr ( can_preallocate ) {
1112
- this ->hdr ().free_list_ = leaf.right ;
1113
- unlink_right ( leaf );
1114
- BOOST_ASSERT ( count == static_cast <size_type>( keys.size () ) );
1115
- count = static_cast <size_type>( keys.size () ); // help the compiler eliminate the accumulation code above
1179
+ #pragma clang loop unroll( disable )
1180
+ for ( auto & leaf_ptr : nodes ) {
1181
+ leaf_ptr = &this ->leaf ( reinterpret_cast <node_slot const &>( leaf_ptr ) );
1116
1182
}
1117
- return bulk_copied_input{ begin, { leaf_slot, leaf.num_vals }, count };
1118
1183
}
1184
+ return bulk_copied_input{ begin, { leaf_slot, leaf.num_vals }, count, std::move ( nodes ) };
1119
1185
}
1120
1186
std::unreachable ();
1121
1187
}
@@ -1464,7 +1530,7 @@ class bptree_base_wkey : public bptree_base
1464
1530
// //////////////////////////////////////////////////////////////////////////////
1465
1531
1466
1532
template <typename Key>
1467
- class bptree_base_wkey <Key>::fwd_iterator
1533
+ class [[ clang::trivial_abi ]] bptree_base_wkey<Key>::fwd_iterator
1468
1534
:
1469
1535
public base_iterator,
1470
1536
public iter_impl<fwd_iterator, std::bidirectional_iterator_tag>
@@ -1508,7 +1574,7 @@ class bptree_base_wkey<Key>::fwd_iterator
1508
1574
// //////////////////////////////////////////////////////////////////////////////
1509
1575
1510
1576
template <typename Key>
1511
- class bptree_base_wkey <Key>::ra_iterator
1577
+ class [[ clang::trivial_abi ]] bptree_base_wkey<Key>::ra_iterator
1512
1578
:
1513
1579
public base_random_access_iterator,
1514
1580
public iter_impl<ra_iterator, std::random_access_iterator_tag>
@@ -1553,6 +1619,61 @@ private: friend class bptree_base_wkey<Key>;
1553
1619
operator fwd_iterator () const noexcept { return static_cast <fwd_iterator const &>( static_cast <base_iterator const &>( *this ) ); }
1554
1620
}; // class ra_iterator
1555
1621
1622
+
1623
+ template <typename Key>
1624
+ class [[ clang::trivial_abi ]] bptree_base_wkey<Key>::ra_full_node_iterator
1625
+ :
1626
+ public iter_impl<ra_full_node_iterator, std::random_access_iterator_tag>
1627
+ {
1628
+ public:
1629
+ // Have to provide default construction in order to model
1630
+ // std::random_access_iterator (yet at the same time do not want to in order
1631
+ // to be able to omit the check in the assignment operator as is required
1632
+ // for base_iterator.
1633
+ constexpr ra_full_node_iterator () noexcept { std::unreachable (); }
1634
+ constexpr ra_full_node_iterator ( leaf_node * leaves[], size_type const value_index ) noexcept : pp_leaf_{ leaves }, value_index_{ value_index } {};
1635
+ ra_full_node_iterator ( ra_full_node_iterator const & ) = default ;
1636
+
1637
+ Key & operator *() const noexcept
1638
+ {
1639
+ auto const node_index { static_cast <std::uint32_t >( value_index_ / leaf_node::max_values ) };
1640
+ auto const node_offset{ static_cast <node_size_type>( value_index_ % leaf_node::max_values ) };
1641
+ auto & leaf{ *pp_leaf_[ node_index ] };
1642
+ BOOST_ASSUME ( node_offset < leaf.num_vals );
1643
+ return leaf.keys [ node_offset ];
1644
+ }
1645
+
1646
+ PSI_WARNING_DISABLE_PUSH ()
1647
+ PSI_WARNING_GCC_OR_CLANG_DISABLE ( -Wsign-conversion )
1648
+ ra_full_node_iterator operator + ( difference_type const n ) const noexcept { return { pp_leaf_, value_index_ + n }; }
1649
+ ra_full_node_iterator & operator +=( difference_type const n ) noexcept { value_index_ += n; return *this ; }
1650
+ ra_full_node_iterator operator - ( difference_type const n ) const noexcept { return { pp_leaf_, value_index_ - n }; }
1651
+ ra_full_node_iterator & operator -=( difference_type const n ) noexcept { value_index_ -= n; return *this ; }
1652
+ PSI_WARNING_DISABLE_POP ()
1653
+
1654
+ [[ gnu::pure ]]
1655
+ friend constexpr auto operator <=>( ra_full_node_iterator const & left, ra_full_node_iterator const & right ) noexcept
1656
+ {
1657
+ BOOST_ASSUME ( left.pp_leaf_ == right.pp_leaf_ );
1658
+ return left.value_index_ <=> right.value_index_ ;
1659
+ }
1660
+ difference_type operator -( ra_full_node_iterator const & other ) const noexcept
1661
+ {
1662
+ BOOST_ASSUME ( this ->pp_leaf_ == other.pp_leaf_ );
1663
+ return static_cast <difference_type>( this ->value_index_ - other.value_index_ );
1664
+ }
1665
+ ra_full_node_iterator & operator =( ra_full_node_iterator const & other ) noexcept
1666
+ {
1667
+ BOOST_ASSUME ( this ->pp_leaf_ == other.pp_leaf_ );
1668
+ this ->value_index_ = other.value_index_ ;
1669
+ return *this ;
1670
+ }
1671
+
1672
+ private:
1673
+ leaf_node * * pp_leaf_{};
1674
+ size_type value_index_{};
1675
+ }; // class ra_full_node_iterator
1676
+
1556
1677
template <typename Key>
1557
1678
typename
1558
1679
bptree_base_wkey<Key>::const_iterator
@@ -2374,7 +2495,7 @@ namespace detail
2374
2495
2375
2496
template <typename Key, typename Comparator>
2376
2497
bp_tree_impl<Key, Comparator>::size_type
2377
- bp_tree_impl<Key, Comparator>::insert( typename base::bulk_copied_input const input, bool const unique )
2498
+ bp_tree_impl<Key, Comparator>::insert( typename base::bulk_copied_input input, bool const unique )
2378
2499
{
2379
2500
// https://www.sciencedirect.com/science/article/abs/pii/S0020025502002025 On batch-constructing B+-trees: algorithm and its performance
2380
2501
// https://www.vldb.org/conf/2001/P461.pdf An Evaluation of Generic Bulk Loading Techniques
@@ -2383,18 +2504,33 @@ bp_tree_impl<Key, Comparator>::insert( typename base::bulk_copied_input const in
2383
2504
if ( input.size == 0 )
2384
2505
return 0 ;
2385
2506
2386
- auto const [begin_leaf, end_pos, total_size]{ input };
2507
+ auto const begin_leaf{ input.begin };
2508
+ auto const end_pos { input.end };
2509
+ auto const total_size{ input.size };
2510
+ {
2511
+ // use specialized/optimized iterators (that can assume all nodes are
2512
+ // full)
2513
+ typename base::ra_full_node_iterator const sort_begin{ input.nodes .data (), 0 };
2514
+ typename base::ra_full_node_iterator const sort_end { input.nodes .data (), total_size };
2515
+ // Standard sort ABIs/impls pass the comparator around by-value: wrkrnd for
2516
+ // big or non-trivial comparators.
2517
+ using comp_by_val_helper = std::conditional_t <can_be_passed_in_reg<Comparator>, Comparator, detail::comp_ref<Comparator>>;
2518
+ if constexpr ( requires{ comp ().sort ( sort_begin, sort_end ); } ) // does the comparator offer a specialized sort function?
2519
+ comp ().sort ( sort_begin, sort_end );
2520
+ else
2521
+ # if __has_include( <boost/sort/pdqsort/pdqsort.hpp> )
2522
+ if constexpr ( requires{ Comparator::is_branchless; requires ( Comparator::is_branchless ); } ) // is it branchless
2523
+ boost::sort::pdqsort_branchless ( sort_begin, sort_end, comp_by_val_helper{ comp () } );
2524
+ else
2525
+ boost::sort::pdqsort ( sort_begin, sort_end, comp_by_val_helper{ comp () } );
2526
+ # else
2527
+ boost::movelib::pdqsort ( sort_begin, sort_end, comp_by_val_helper{ comp () } );
2528
+ # endif
2529
+ input.nodes .clear ();
2530
+ }
2531
+
2387
2532
ra_iterator const p_new_nodes_begin{ *this , { begin_leaf, 0 }, 0 };
2388
2533
ra_iterator const p_new_nodes_end { *this , end_pos , total_size };
2389
- // Standard sort ABIs/impls pass the comparator around by-value: wrkrnd for
2390
- // big or non-trivial comparators.
2391
- using comp_by_val_helper = std::conditional_t <can_be_passed_in_reg<Comparator>, Comparator, detail::comp_ref<Comparator>>;
2392
- #if 0 // slower
2393
- std::sort( p_new_nodes_begin, p_new_nodes_end, comp_by_val_helper{ comp() } );
2394
- #else
2395
- boost::movelib::pdqsort ( p_new_nodes_begin, p_new_nodes_end, comp_by_val_helper{ comp () } );
2396
- #endif
2397
-
2398
2534
if ( empty () )
2399
2535
{
2400
2536
base::bulk_insert_into_empty ( begin_leaf, end_pos, total_size );
0 commit comments