diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index beef089ba86a5..3be229db1f00f 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1602,10 +1602,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (target != leafblkno) - ItemPointerSetBlockNumber(&trunctuple.t_tid, target); + BTreeTupleSetTopParent(&trunctuple, target); else - ItemPointerSetInvalid(&trunctuple.t_tid); - BTreeTupleSetNAtts(&trunctuple, 0); + BTreeTupleSetTopParent(&trunctuple, InvalidBlockNumber); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) @@ -1690,7 +1689,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) BTPageOpaque opaque; bool rightsib_is_rightmost; int targetlevel; - ItemPointer leafhikey; + IndexTuple leafhikey; BlockNumber nextchild; page = BufferGetPage(leafbuf); @@ -1702,7 +1701,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) * Remember some information about the leaf page. */ itemid = PageGetItemId(page, P_HIKEY); - leafhikey = &((IndexTuple) PageGetItem(page, itemid))->t_tid; + leafhikey = (IndexTuple) PageGetItem(page, itemid); leafleftsib = opaque->btpo_prev; leafrightsib = opaque->btpo_next; @@ -1714,9 +1713,10 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) * parent in the branch. Set 'target' and 'buf' to reference the page * actually being unlinked. */ - if (ItemPointerIsValid(leafhikey)) + target = BTreeTupleGetTopParent(leafhikey); + + if (target != InvalidBlockNumber) { - target = ItemPointerGetBlockNumberNoCheck(leafhikey); Assert(target != leafblkno); /* fetch the block number of the topmost parent's left sibling */ @@ -1919,12 +1919,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) * branch. */ if (target != leafblkno) - { - if (nextchild == InvalidBlockNumber) - ItemPointerSetInvalid(leafhikey); - else - ItemPointerSetBlockNumber(leafhikey, nextchild); - } + BTreeTupleSetTopParent(leafhikey, nextchild); /* * Mark the page itself deleted. It can be recycled when all current diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index fb8c769df9ac5..67a94cb80a2ce 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -800,11 +800,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); - if (xlrec->topparent != InvalidBlockNumber) - ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); - else - ItemPointerSetInvalid(&trunctuple.t_tid); - BTreeTupleSetNAtts(&trunctuple, 0); + BTreeTupleSetTopParent(&trunctuple, xlrec->topparent); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) @@ -912,11 +908,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) /* Add a dummy hikey item */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); - if (xlrec->topparent != InvalidBlockNumber) - ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); - else - ItemPointerSetInvalid(&trunctuple.t_tid); - BTreeTupleSetNAtts(&trunctuple, 0); + BTreeTupleSetTopParent(&trunctuple, xlrec->topparent); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 1194be9281105..892aeca300399 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -226,6 +226,20 @@ typedef struct BTMetaPageData #define BTreeInnerTupleSetDownLink(itup, blkno) \ ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)) +/* + * Get/set leaf page highkey's link. During the second phase of deletion, the + * target leaf page's high key may point to an ancestor page (at all other + * times, the leaf level high key's link is not used). See the nbtree README + * for full details. + */ +#define BTreeTupleGetTopParent(itup) \ + ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid)) +#define BTreeTupleSetTopParent(itup, blkno) \ + do { \ + ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \ + BTreeTupleSetNAtts((itup), 0); \ + } while(0) + /* * Get/set number of attributes within B-tree index tuple. Asserts should be * removed when BT_RESERVED_OFFSET_MASK bits will be used. diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index fe5b698669c40..fc81088d4b83e 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -3052,6 +3052,16 @@ explain (costs off) Filter: (NOT b) (4 rows) +-- +-- Test for multilevel page deletion +-- +CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint); +INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i; +ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d); +DELETE FROM delete_test_table WHERE a > 40000; +VACUUM delete_test_table; +DELETE FROM delete_test_table WHERE a > 10; +VACUUM delete_test_table; -- -- REINDEX (VERBOSE) -- diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 8afb1f2f7e3a7..0aa5357917530 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -38,6 +38,7 @@ d_star|f date_tbl|f default_tbl|f defaultexpr_tbl|f +delete_test_table|t dept|f dupindexcols|t e_star|f diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index f7731265a08c2..f9e7118f0d3d8 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -1061,6 +1061,17 @@ explain (costs off) explain (costs off) select * from boolindex where not b order by i limit 10; +-- +-- Test for multilevel page deletion +-- +CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint); +INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i; +ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d); +DELETE FROM delete_test_table WHERE a > 40000; +VACUUM delete_test_table; +DELETE FROM delete_test_table WHERE a > 10; +VACUUM delete_test_table; + -- -- REINDEX (VERBOSE) --