Skip to content

Commit

Permalink
reconcile analyze
Browse files Browse the repository at this point in the history
  • Loading branch information
y123456yz committed Sep 21, 2023
1 parent ea5afe8 commit e0accd8
Show file tree
Hide file tree
Showing 12 changed files with 111 additions and 37 deletions.
10 changes: 10 additions & 0 deletions python安装.txt
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
/usr/bin/ld: /usr/local/lib64/libsnappy.a(snappy.cc.o): relocation R_X86_64_32 against `.rodata' can not be used when making a shared object; recompile with -fPIC
/usr/bin/ld: /usr/local/lib64/libsnappy.a(snappy-sinksource.cc.o): relocation R_X86_64_32 against symbol `_ZTVN6snappy6SourceE' can not be used when making a shared object; recompile with -fPIC
/usr/bin/ld: /usr/local/lib64/libsnappy.a(snappy-c.cc.o): relocation R_X86_64_PC32 against symbol `snappy_max_compressed_length' can not be used when making a shared object; recompile with -fPIC
/usr/bin/ld: final link failed: Bad value
collect2: error: ld returned 1 exit status

如果编译报错,可以跳过snappy,对应命令如下:
cmake -DENABLE_SNAPPY=0 ../.


./configure --prefix=/usr/local/python-3.8.8 --enable-shared CFLAGS=-fPIC --enable-optimizations 升级python时候源码编译一定带上,否则高版本wiredtiger编译的时候会报错
8 changes: 4 additions & 4 deletions wiredtiger-11.1.0/wiredtiger-11.1.0/examples/c/ex_access.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,17 @@ access_example(void)

/* Open a connection to the database, creating it if necessary. */
//error_check(wiredtiger_open(home, NULL, "create,statistics=(all),create,verbose=[evictserver=5,evict=5,split=5,evict_stuck=5]", &conn));
// error_check(wiredtiger_open(home, NULL, "create,cache_size=1M, statistics=(all),create,verbose=[vessrify=5, split=5, overflow=5, generation=5, block=5, write=5, evictserver=5, evict_stuck=5, block_cache=5, checkpoint_progress=5, checkpoint=5, checkpoint_cleanup=5, block=5,overflow=5,reconcile=5,evictserver=5,evict=5,split=5,evict_stuck=5]", &conn));
//error_check(wiredtiger_open(home, NULL, "create,cache_size=1M, statistics=(all),create,verbose=[split=5, overflow=5, generation=5, block=5, write=5, evictserver=5, evict_stuck=5, block_cache=5, checkpoint_progress=5, checkpoint=5, checkpoint_cleanup=5, block=5,overflow=5,reconcile=5,evictserver=5,evict=5,split=5,evict_stuck=5]", &conn));

error_check(wiredtiger_open(home, NULL, "create,cache_size=1M, statistics=(all),create,verbose=[\
backup=5, block=5, block_cache=5, checkpoint=5, checkpoint_cleanup=5,checkpoint_progress=5,compact=5,\
compact_progress=5,error_returns=5,evict=5,evict_stuck=5,evictserver=5,fileops=5,generation=5,handleops=5,log=5,\
hs=5, history_store_activity=5,lsm=5,lsm_manager=5,metadata=5,mutex=5,out_of_order=5,overflow=5,read=5,reconcile=5,recovery=5, \
recovery_progress=5,rts=5, salvage=5, shared_cache=5,split=5,temporary=5,thread_group=5,timestamp=5,tiered=5,transaction=5,verify=5,\
version=5,write=5, config_all_verbos=1, api=5] ", &conn));
version=5,write=5, config_all_verbos=1, api=-3, metadata=-3] ", &conn));

/* Open a session handle for the database. */
error_check(conn->open_session(conn, NULL, NULL, &session));
error_check(conn->open_session(conn, NULL, NULL, &session));
/*! [access example connection] */


Expand All @@ -103,7 +103,7 @@ access_example(void)
value_item.size = strlen(value_item.data);
__wt_random_init_seed(NULL, &rnd);

for (i=400;i > 0; i--) {
for (i=50;i > 0; i--) {
rval = __wt_random(&rnd);

cursor->set_key(cursor, i); /* Insert a record. */
Expand Down
15 changes: 12 additions & 3 deletions wiredtiger-11.1.0/wiredtiger-11.1.0/src/btree/bt_split.c
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,8 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
/*
* __split_internal_lock --
* Lock an internal page.
*/ //获取page对应的page_lock锁,也就是锁住ref->home这个internal page
*/
//获取page对应的page_lock锁,也就是锁住ref->home这个internal page, 并返回parent page
static int
__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, WT_PAGE **parentp)
{
Expand Down Expand Up @@ -1670,6 +1671,8 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *mult
* __wt_multi_to_ref --
* Move a multi-block entry into a WT_REF structure.
*/
//__wt_evict->__evict_page_dirty_update->__wt_split_multi->__split_multi_lock->__split_multi->__wt_multi_to_ref
//让page指向拆分后的磁盘元数据
int
__wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp,
size_t *incrp, bool closing)
Expand Down Expand Up @@ -2103,7 +2106,9 @@ __split_insert_lock(WT_SESSION_IMPL *session, WT_REF *ref)
/*
* __wt_split_insert --
* Split a page's last insert list entries into a separate page.
*/ //__wt_evict
*/
//__wt_evict: inmem_split,内存中的page进行拆分,拆分后的还是在内存中不会写入磁盘
//__evict_reconcile: 对page拆分为多个page后写入磁盘中
int
__wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
{
Expand All @@ -2123,6 +2128,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* __split_multi --
* Split a page into multiple pages.
*/
//__wt_evict->__evict_page_dirty_update->__wt_split_multi->__split_multi_lock->__split_multi
static int
__split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
{
Expand All @@ -2147,7 +2153,7 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
*/
WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
for (i = 0; i < new_entries; ++i)
WT_ERR(
WT_ERR( //让page指向拆分后的磁盘元数据
__wt_multi_to_ref(session, page, &mod->mod_multi[i], &ref_new[i], &parent_incr, closing));

/*
Expand Down Expand Up @@ -2189,13 +2195,15 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* __split_multi_lock --
* Split a page into multiple pages.
*/
//__wt_evict->__evict_page_dirty_update->__wt_split_multi->__split_multi_lock
static int
__split_multi_lock(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
{
WT_DECL_RET;
WT_PAGE *parent;

/* Lock the parent page, then proceed with the split. */
//获取page对应的page_lock锁,也就是锁住ref->home这个internal page, 并返回parent page
WT_RET(__split_internal_lock(session, ref, false, &parent));
if ((ret = __split_multi(session, ref, closing)) != 0 || closing) {
__split_internal_unlock(session, parent);
Expand All @@ -2213,6 +2221,7 @@ __split_multi_lock(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
* __wt_split_multi --
* Split a page into multiple pages.
*/
//__wt_evict->__evict_page_dirty_update->__wt_split_multi
int
__wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
{
Expand Down
3 changes: 2 additions & 1 deletion wiredtiger-11.1.0/wiredtiger-11.1.0/src/conn/conn_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,8 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
*/
verbose_value = WT_VERBOSE_DEBUG_1;
goto verbos_assign;
} else if (sval.type == WT_CONFIG_ITEM_NUM && sval.val >= WT_VERBOSE_INFO &&
//yang add todo xxxxx ,这里应该是
} else if (sval.type == WT_CONFIG_ITEM_NUM && sval.val >= WT_VERBOSE_ERROR &&
sval.val <= WT_VERBOSE_DEBUG_5) {
verbose_value = (WT_VERBOSE_LEVEL)sval.val;
goto verbos_assign;
Expand Down
8 changes: 7 additions & 1 deletion wiredtiger-11.1.0/wiredtiger-11.1.0/src/evict/evict_page.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint8_t previous_state, uint32
* Review the page for conditions that would block its eviction. If the check fails (for
* example, we find a page with active children), quit. Make this check for clean pages, too:
* while unlikely eviction would choose an internal page with children, it's not disallowed.
*/ //判断是否可以进行mem split,结果存入inmem_split
*/
//判断是否可以进行mem split,结果存入inmem_split
WT_ERR(__evict_review(session, ref, flags, &inmem_split));

/*
Expand All @@ -197,6 +198,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint8_t previous_state, uint32
* Fail 0.1% of the time after we have done reconciliation. We should always evict the page of a
* dead tree.
*/
//让0.1%的evict reconcile跳过后续流程,直接err
if (!closing && !tree_dead &&
__wt_failpoint(session, WT_TIMING_STRESS_FAILPOINT_EVICTION_FAIL_AFTER_RECONCILIATION, 10)) {
ret = EBUSY;
Expand Down Expand Up @@ -395,6 +397,7 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* __evict_page_dirty_update --
* Update a dirty page's reference on eviction.
*/
//__wt_evict->__evict_page_dirty_update
static int
__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags)
{
Expand Down Expand Up @@ -441,6 +444,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_
WT_ASSERT(session, closing == false);
WT_RET(__wt_split_rewrite(session, ref, &mod->mod_multi[0]));
} else
//一般走这里,一个page通过evict reconcile拆分为多个page
WT_RET(__wt_split_multi(session, ref, closing));
break;
case WT_PM_REC_REPLACE:
Expand Down Expand Up @@ -717,6 +721,8 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
* __evict_reconcile --
* Reconcile the page for eviction.
*/
//__wt_evict: inmem_split,内存中的page进行拆分,拆分后的还是在内存中不会写入磁盘
//__evict_reconcile: 对page拆分为多个page后写入磁盘中
static int
__evict_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags)
{
Expand Down
18 changes: 16 additions & 2 deletions wiredtiger-11.1.0/wiredtiger-11.1.0/src/include/btmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#define WT_REC_CHECKPOINT_RUNNING 0x008u
#define WT_REC_CLEAN_AFTER_REC 0x010u
#define WT_REC_EVICT 0x020u
//__evict_reconcile中如果是leaf page设置该标识
#define WT_REC_HS 0x040u
#define WT_REC_IN_MEMORY 0x080u
#define WT_REC_SCRUB 0x100u
Expand Down Expand Up @@ -135,7 +136,7 @@ __wt_page_header_byteswap(WT_PAGE_HEADER *dsk)
* WT_PAGE_HEADER_BYTE_SIZE --
* The first usable data byte on the block (past the combined headers).
*/
//page header + block header
//page header(WT_PAGE_HEADER_SIZE) + block header(WT_BLOCK_HEADER_SIZE)
#define WT_PAGE_HEADER_BYTE_SIZE(btree) ((u_int)(WT_PAGE_HEADER_SIZE + (btree)->block_header))
//dsk开始跳过page header + block header
#define WT_PAGE_HEADER_BYTE(btree, dsk) \
Expand Down Expand Up @@ -273,7 +274,7 @@ struct __wt_multi {
/*
* Block's key: either a column-store record number or a row-store variable length byte string.
*/
//page拆分后对应的ref key,参考__rec_split_write
//page拆分后对应的ref key,参考__rec_split_dump_keys的打印,赋值的地方在__rec_split_write
union {
uint64_t recno;
WT_IKEY *ikey;
Expand Down Expand Up @@ -403,8 +404,10 @@ struct __wt_page_modify {
uint32_t multi_entries; /* Multiple blocks element count */
} m;
#undef mod_multi
//赋值参考__rec_write_wrapup, 一次reconcile结束后,reconcile的multi信息转存到mod->mod_multi中
#define mod_multi u1.m.multi
#undef mod_multi_entries
//赋值参考__rec_write_wrapup,一次reconcile结束后,reconcile的multi信息转存到mod->mod_multi中
#define mod_multi_entries u1.m.multi_entries
} u1;

Expand Down Expand Up @@ -464,13 +467,17 @@ struct __wt_page_modify {
WT_INSERT_HEAD **insert;

/* Updated items for row-stores. */
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程
WT_UPDATE **update;
} row_leaf;
#undef mod_row_insert
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程

//WT_PAGE_ALLOC_AND_SWAP、__split_insert分配空间
//WT_ROW_INSERT_SLOT获取对应的跳跃表,实际上insert是个数组,数组每个成员对应一个跳跃表,参考__wt_leaf_page_can_split
#define mod_row_insert u2.row_leaf.insert
#undef mod_row_update
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程
#define mod_row_update u2.row_leaf.update
} u2;

Expand Down Expand Up @@ -505,6 +512,7 @@ struct __wt_page_modify {
* WT_PAGE_DIRTY --
* Two or more updates have been added to the page.
*/
//__wt_page_modify_clear
#define WT_PAGE_CLEAN 0
#define WT_PAGE_DIRTY_FIRST 1
#define WT_PAGE_DIRTY 2
Expand All @@ -514,6 +522,7 @@ struct __wt_page_modify {
#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */
#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
//复制参考__rec_write_wrapup
uint8_t rec_result; /* Reconciliation state */

#define WT_PAGE_RS_RESTORED 0x1
Expand Down Expand Up @@ -690,6 +699,7 @@ struct __wt_page {
//指向该page存储的真实数据,见__wt_page_alloc
WT_ROW *row; /* Key/value pairs */
#undef pg_row
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程
//指向该page存储的真实数据,见__wt_page_alloc
#define pg_row u.row

Expand Down Expand Up @@ -1023,6 +1033,7 @@ struct __wt_ref {
* or NULL if page created in-memory.
*/
//数据对应的磁盘地址???????
//例如evict reconcile流程中的__wt_multi_to_ref,指向该page对应的磁盘元数据信息
void *addr;

/*
Expand Down Expand Up @@ -1248,6 +1259,7 @@ struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */
* WT_ROW_FOREACH --
* Walk the entries of an in-memory row-store leaf page.
*/
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程
#define WT_ROW_FOREACH(page, rip, i) \
for ((i) = (page)->entries, (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i))
#define WT_ROW_FOREACH_REVERSE(page, rip, i) \
Expand Down Expand Up @@ -1557,6 +1569,7 @@ struct __wt_insert_head {
* following macros return an array entry if the array of pointers and the specific structure exist,
* else NULL.
*/
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程
#define WT_ROW_INSERT_SLOT(page, slot) \
((page)->modify == NULL || (page)->modify->mod_row_insert == NULL ? \
NULL : \
Expand All @@ -1571,6 +1584,7 @@ struct __wt_insert_head {
* insert array. That's because the insert array requires an extra slot to hold keys that sort
* before any key found on the original page.
*/
//pg_row指向磁盘KV相关数据,mod_row_insert指向内存相关KV数据,mod_row_update记录内存中同一个K的变更过程
#define WT_ROW_INSERT_SMALLEST(page) \
((page)->modify == NULL || (page)->modify->mod_row_insert == NULL ? \
NULL : \
Expand Down
4 changes: 3 additions & 1 deletion wiredtiger-11.1.0/wiredtiger-11.1.0/src/include/btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ split_pct - The percentage of the leaf_page_max we will fill on-disk pages up to
uint32_t maxleafpage; /* Leaf page max size */
uint32_t maxleafkey; /* Leaf page max key size */
uint32_t maxleafvalue; /* Leaf page max value size */
//注意cache_size这里是/1000而不是除100
//memory_page_max配置默认5M,取MIN(5M, (conn->cache->eviction_dirty_trigger * cache_size) / 1000) example测试也就是默认2M
uint64_t maxmempage; /* In-memory page max size */
//4 * WT_MAX(btree->maxintlpage, btree->maxleafpage);
Expand Down Expand Up @@ -198,7 +199,8 @@ split_pct - The percentage of the leaf_page_max we will fill on-disk pages up to

//__wt_btree_open->__wt_blkcache_open
WT_BM *bm; /* Block manager reference */ //__wt_btree.bm
u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
//WT_BLOCK_HEADER_SIZE
u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ //yang add todo xxxxx 备注长度错了,容易误解

//__rec_set_page_write_gen中自增
uint64_t write_gen; /* Write generation */
Expand Down
23 changes: 13 additions & 10 deletions wiredtiger-11.1.0/wiredtiger-11.1.0/src/include/btree_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -1715,7 +1715,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_INSERT *ins;
WT_INSERT_HEAD *ins_head;
size_t size;
int count;
int count1 = 0, count2 = 0;

btree = S2BT(session);

Expand Down Expand Up @@ -1744,7 +1744,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
* correctness (the page must be reconciled again before being evicted after the split,
* information from a previous reconciliation will be wrong, so we can't evict immediately).
*/
//这里是大阈值,如果page内存找过这个阈值,则不能进行memsplite,而是__evict_reconcile
//这里是大阈值,如果page内存超过这个阈值,则不能进行memsplite,而是__evict_reconcile
if (page->memory_footprint < btree->splitmempage)
return (false);
if (WT_PAGE_IS_INTERNAL(page))
Expand Down Expand Up @@ -1774,14 +1774,14 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
//page消耗的内存较高,大于maxleafpage * 2,并且至少有5个KV
#define WT_MAX_SPLIT_COUNT 5
if (page->memory_footprint > (size_t)btree->maxleafpage * 2) {
for (count = 0, ins = ins_head->head[0]; ins != NULL; ins = ins->next[0]) {
if (++count < WT_MAX_SPLIT_COUNT)
for (count1 = 0, ins = ins_head->head[0]; ins != NULL; ins = ins->next[0]) {
if (++count1 < WT_MAX_SPLIT_COUNT)
continue;

//当一个page使用内存较高的时候一般从这里返回
WT_STAT_CONN_DATA_INCR(session, cache_inmem_splittable);
printf("yang test ......111111........page->entries:%d..........__wt_leaf_page_can_split......count:%d..........\r\n",
(int)page->entries, count);
(int)page->entries, count1);
return (true);
}

Expand All @@ -1797,17 +1797,20 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
#define WT_MIN_SPLIT_COUNT 30
#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
//通过level2大概算一下有多少KV及使用的内存,这样可以避免扫描所有KV影响性能
for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; ins != NULL;
for (count2 = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; ins != NULL;
ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
count += WT_MIN_SPLIT_MULTIPLIER;
count2 += WT_MIN_SPLIT_MULTIPLIER;
size += WT_MIN_SPLIT_MULTIPLIER * (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd));
if (count > WT_MIN_SPLIT_COUNT && size > (size_t)btree->maxleafpage) {
if (count2 > WT_MIN_SPLIT_COUNT && size > (size_t)btree->maxleafpage) {
WT_STAT_CONN_DATA_INCR(session, cache_inmem_splittable);
printf("yang test ..__wt_leaf_page_can_split....sssssssssssssssss........count:%d..........size:%d................\r\n", (int)count, (int)size);
printf("yang test ..__wt_leaf_page_can_split....sssssssssssssssss........count2:%d..........size:%d................\r\n", (int)count2, (int)size);
return (true);
}
}
printf("yang test ..__wt_leaf_page_can_split....11111111111111........count:%d..........size:%d................\r\n", (int)count, (int)size);

printf("yang test ..__wt_leaf_page_can_split....11111111111111...page->memory_footprint:%d \
..btree->splitmempage:%d...count1:%d..........size:%d.....count2:%d...........\r\n",
(int)page->memory_footprint, (int)btree->splitmempage, (int)count1, (int)size, (int)count2);
return (false);
}

Expand Down
1 change: 1 addition & 0 deletions wiredtiger-11.1.0/wiredtiger-11.1.0/src/include/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ struct __wt_cache {
uint64_t app_waits; /* User threads waited for cache */
uint64_t app_evicts; /* Pages evicted by user threads */

//也就是历史最大的page->memory_footprint
uint64_t evict_max_page_size; /* Largest page seen at eviction */
struct timespec stuck_time; /* Stuck time */

Expand Down
Loading

0 comments on commit e0accd8

Please sign in to comment.