Skip to content

Commit

Permalink
Import wiredtiger: 63b8cf2e0b786296f5b99c363e604a3348299c06 from bran…
Browse files Browse the repository at this point in the history
…ch mongodb-4.2

ref: c91b804126..63b8cf2e0b
for: 4.1.3

WT-3735       Add a workgen workload that generates a lot of page splits
WT-3894       Timestamp queue implementation and statistics improvements
WT-4104       Fix test/format failure during comparing data content with berkeley db.
WT-4144       Fix rollback_to_stable with lookaside history
WT-4176       Expose a WT_SESSION.query_timestamp method
WT-4211       Add automated test for long running prepared transactions
WT-4212       Update lookaside schema to handle prepared transactions
WT-4216       Use separate counters for page_swap yield and sleep
WT-4233       Change log corruption errors to warnings and truncate log
WT-4239       Don't allow checkpoints to perform insert-splits in the tree
WT-4241       GNU-stack section should never be conditionally compiled out
WT-4248       Fix checkpoints in schema_abort for slow machines
WT-4249       Attempt to discard dirty page during verify operation.
WT-4251       Prepared updates cannot be discarded
WT-4252       Btree debug functions can leak scratch buffers on error.
WT-4253       Btree debug function to do blind reads doesn't handle row-store internal pages
WT-4256       Loosen check during rollback_to_stable
WT-4257       Don't assume timestamps from lookaside are aligned in memory
WT-4262       Lock deleted children in eviction of internal pages
WT-4263       Use the right tree when copying a key for a lookaside write
WT-4264       Compaction can race with page modifications
  • Loading branch information
lukech committed Aug 27, 2018
1 parent b93c2dc commit 311f3ca
Show file tree
Hide file tree
Showing 42 changed files with 1,194 additions and 517 deletions.
83 changes: 83 additions & 0 deletions src/third_party/wiredtiger/bench/workgen/runner/split_stress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python
#
# Public Domain 2014-2018 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#

# A workload with small cache, small internal and leaf page sizes, faster splits
# and multiple threads inserting keys in random order. It stresses the page
# splits in order to catch split races.
#
from runner import *
from wiredtiger import *
from workgen import *

context = Context()
# Connection configuration.
conn_config = "cache_size=100MB,log=(enabled=false),statistics=[fast],statistics_log=(wait=1,json=false)"
conn = wiredtiger_open("WT_TEST", "create," + conn_config)
s = conn.open_session("")

# Table configuration.
table_config = "leaf_page_max=8k,internal_page_max=8k,leaf_item_max=1433,internal_item_max=3100,type=file,memory_page_max=1MB,split_deepen_min_child=100"
tables = []
table_count = 3
for i in range(0, table_count):
tname = "file:test" + str(i)
table = Table(tname)
s.create(tname, 'key_format=S,value_format=S,' + table_config)
table.options.key_size = 64
table.options.value_size = 200
table.options.range = 100000000 # 100 million
tables.append(table)

# Populate phase.
populate_threads = 1
icount = 50000
# There are multiple tables to be filled during populate,
# the icount is split between them all.
pop_ops = Operation(Operation.OP_INSERT, tables[0])
pop_ops = op_multi_table(pop_ops, tables)
nops_per_thread = icount / (populate_threads * table_count)
pop_thread = Thread(pop_ops * nops_per_thread)
pop_workload = Workload(context, populate_threads * pop_thread)
print('populate:')
pop_workload.run(conn)

# Run phase.
ops = Operation(Operation.OP_INSERT, tables[0])
ops = op_multi_table(ops, tables, False)
thread0 = Thread(ops)

workload = Workload(context, 20 * thread0)
workload.options.report_interval=5
workload.options.run_time=300
print('Split stress workload running...')
workload.run(conn)

latency_filename = "WT_TEST/latency.out"
latency.workload_latency(workload, latency_filename)
conn.close()
10 changes: 10 additions & 0 deletions src/third_party/wiredtiger/dist/api_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,16 @@ def __cmp__(self, other):
type='list'),
]),

'WT_SESSION.query_timestamp' : Method([
Config('get', 'read', r'''
specify which timestamp to query: \c commit returns the most recently
set commit_timestamp. \c first_commit returns the first set
commit_timestamp. \c prepare returns the timestamp used in preparing a
transaction. \c read returns the timestamp at which the transaction is
reading at. See @ref transaction_timestamps''',
choices=['commit', 'first_commit', 'prepare', 'read']),
]),

'WT_SESSION.rebalance' : Method([]),
'WT_SESSION.rename' : Method([]),
'WT_SESSION.reset' : Method([]),
Expand Down
5 changes: 4 additions & 1 deletion src/third_party/wiredtiger/dist/stat_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ def __init__(self, name, desc, flags=''):
##########################################
SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
SessionStat('session_open', 'open session count', 'no_clear,no_scale'),
SessionStat('session_query_ts', 'session query timestamp calls'),
SessionStat('session_table_alter_fail', 'table alter failed calls', 'no_clear,no_scale'),
SessionStat('session_table_alter_skip', 'table alter unchanged and skipped', 'no_clear,no_scale'),
SessionStat('session_table_alter_success', 'table alter successful calls', 'no_clear,no_scale'),
Expand Down Expand Up @@ -508,9 +509,10 @@ def __init__(self, name, desc, flags=''):
TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_commit', 'transactions committed'),
TxnStat('txn_commit_queue_empty', 'commit timestamp queue insert to empty'),
TxnStat('txn_commit_queue_head', 'commit timestamp queue inserts to head'),
TxnStat('txn_commit_queue_inserts', 'commit timestamp queue inserts total'),
TxnStat('txn_commit_queue_len', 'commit timestamp queue length'),
TxnStat('txn_commit_queue_tail', 'commit timestamp queue inserts to tail'),
TxnStat('txn_commit_queue_walked', 'commit timestamp queue entries walked'),
TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
Expand All @@ -527,6 +529,7 @@ def __init__(self, name, desc, flags=''):
TxnStat('txn_read_queue_head', 'read timestamp queue inserts to head'),
TxnStat('txn_read_queue_inserts', 'read timestamp queue inserts total'),
TxnStat('txn_read_queue_len', 'read timestamp queue length'),
TxnStat('txn_read_queue_walked', 'read timestamp queue entries walked'),
TxnStat('txn_rollback', 'transactions rolled back'),
TxnStat('txn_rollback_las_removed', 'rollback to stable updates removed from lookaside'),
TxnStat('txn_rollback_to_stable', 'rollback to stable calls'),
Expand Down
2 changes: 1 addition & 1 deletion src/third_party/wiredtiger/import.data
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"commit": "c91b80412603f283532e267893f9238dd4a5ec0f",
"commit": "63b8cf2e0b786296f5b99c363e604a3348299c06",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.2"
Expand Down
1 change: 1 addition & 0 deletions src/third_party/wiredtiger/lang/java/java_doc.i
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ COPYDOC(__wt_session, WT_SESSION, commit_transaction)
COPYDOC(__wt_session, WT_SESSION, prepare_transaction)
COPYDOC(__wt_session, WT_SESSION, rollback_transaction)
COPYDOC(__wt_session, WT_SESSION, timestamp_transaction)
COPYDOC(__wt_session, WT_SESSION, query_timestamp)
COPYDOC(__wt_session, WT_SESSION, checkpoint)
COPYDOC(__wt_session, WT_SESSION, snapshot)
COPYDOC(__wt_session, WT_SESSION, transaction_pinned_range)
Expand Down
40 changes: 18 additions & 22 deletions src/third_party/wiredtiger/src/block/block_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,39 +181,35 @@ err: __wt_scr_free(session, &tmp);
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_block_read_off_blind --
* Read the block at an offset, try to figure out what it looks like,
* debugging only.
* Read the block at an offset, return the size and checksum, debugging
* only.
*/
int
__wt_block_read_off_blind(
WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset)
__wt_block_read_off_blind(WT_SESSION_IMPL *session,
WT_BLOCK *block, wt_off_t offset, uint32_t *sizep, uint32_t *checksump)
{
WT_BLOCK_HEADER *blk;
uint32_t checksum, size;
WT_DECL_ITEM(tmp);
WT_DECL_RET;

*sizep = 0;
*checksump = 0;

/*
* Make sure the buffer is large enough for the header and read the
* the first allocation-size block.
*/
WT_RET(__wt_buf_init(session, buf, block->allocsize));
WT_RET(__wt_read(
session, block->fh, offset, (size_t)block->allocsize, buf->mem));
blk = WT_BLOCK_HEADER_REF(buf->mem);
WT_RET(__wt_scr_alloc(session, block->allocsize, &tmp));
WT_ERR(__wt_read(
session, block->fh, offset, (size_t)block->allocsize, tmp->mem));
blk = WT_BLOCK_HEADER_REF(tmp->mem);
__wt_block_header_byteswap(blk);

/*
* Copy out the size and checksum (we're about to re-use the buffer),
* and if the size isn't insane, read the rest of the block.
*/
size = blk->disk_size;
checksum = blk->checksum;
if (__wt_block_offset_invalid(block, offset, size))
WT_RET_MSG(session, EINVAL,
"block at offset %" PRIuMAX " cannot be a valid block, no "
"read attempted",
(uintmax_t)offset);
return (
__wt_block_read_off(session, block, buf, offset, size, checksum));
*sizep = blk->disk_size;
*checksump = blk->checksum;

err: __wt_scr_free(session, &tmp);
return (ret);
}
#endif

Expand Down
44 changes: 17 additions & 27 deletions src/third_party/wiredtiger/src/btree/bt_compact.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ static int
__compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
WT_BM *bm;
WT_DECL_RET;
WT_MULTI *multi;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
Expand All @@ -28,13 +27,8 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)

bm = S2BT(session)->bm;
page = ref->page;
mod = page->modify;

/*
* If the page is clean, test the original addresses.
* If the page is a replacement, test the replacement addresses.
* Ignore empty pages, they get merged into the parent.
*/
/* If the page is clean, test the original addresses. */
if (__wt_page_evict_clean(page)) {
__wt_ref_info(ref, &addr, &addr_size, NULL);
if (addr == NULL)
Expand All @@ -44,34 +38,31 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
}

/*
* The page's modification information can change underfoot if the page
* is being reconciled, serialize with reconciliation.
* If the page is a replacement, test the replacement addresses.
* Ignore empty pages, they get merged into the parent.
*
* Page-modify variable initialization done here because the page could
* be modified while we're looking at it, so the page modified structure
* may appear at any time (but cannot disappear). We've confirmed there
* is a page modify structure, it's OK to look at it.
*/
if (mod->rec_result == WT_PM_REC_REPLACE ||
mod->rec_result == WT_PM_REC_MULTIBLOCK)
WT_PAGE_LOCK(session, page);

mod = page->modify;
if (mod->rec_result == WT_PM_REC_REPLACE)
ret = bm->compact_page_skip(bm, session,
mod->mod_replace.addr, mod->mod_replace.size, skipp);
return (bm->compact_page_skip(bm, session,
mod->mod_replace.addr, mod->mod_replace.size, skipp));

if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
for (multi = mod->mod_multi,
i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
if (multi->addr.addr == NULL)
continue;
if ((ret = bm->compact_page_skip(bm, session,
multi->addr.addr, multi->addr.size, skipp)) != 0)
break;
WT_RET(bm->compact_page_skip(bm, session,
multi->addr.addr, multi->addr.size, skipp));
if (!*skipp)
break;
}

if (mod->rec_result == WT_PM_REC_REPLACE ||
mod->rec_result == WT_PM_REC_MULTIBLOCK)
WT_PAGE_UNLOCK(session, page);

return (ret);
return (0);
}

/*
Expand All @@ -98,10 +89,9 @@ __compact_rewrite_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
* There are two ways we call reconciliation: checkpoints and eviction.
* Get the tree's flush lock which blocks threads writing pages for
* checkpoints. If checkpoint is holding the lock, quit working this
* file, we'll visit it again in our next pass.
*
* Serializing with eviction is not quite as simple, and it gets done
* in the underlying function that checks modification information.
* file, we'll visit it again in our next pass. We don't have to worry
* about eviction, we're holding a hazard pointer on the WT_REF, it's
* not going anywhere.
*/
WT_RET(__wt_spin_trylock(session, &btree->flush_lock));

Expand Down
29 changes: 13 additions & 16 deletions src/third_party/wiredtiger/src/btree/bt_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,24 +253,25 @@ static int
__debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
{
WT_BTREE *btree;
WT_DECL_RET;

memset(ds, 0, sizeof(WT_DBG));

ds->session = session;

WT_RET(__wt_scr_alloc(session, 512, &ds->t1));
WT_RET(__wt_scr_alloc(session, 512, &ds->t2));
WT_ERR(__wt_scr_alloc(session, 512, &ds->t1));
WT_ERR(__wt_scr_alloc(session, 512, &ds->t2));

/*
* If we weren't given a file, we use the default event handler, and
* we'll have to buffer messages.
*/
if (ofile == NULL) {
WT_RET(__wt_scr_alloc(session, 512, &ds->msg));
WT_ERR(__wt_scr_alloc(session, 512, &ds->msg));
ds->f = __dmsg_event;
} else {
if ((ds->fp = fopen(ofile, "w")) == NULL)
return (__wt_set_return(session, EIO));
WT_ERR(__wt_set_return(session, EIO));
__wt_stream_set_line_buffer(ds->fp);
ds->f = __dmsg_file;
}
Expand All @@ -279,6 +280,9 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
ds->key_format = btree->key_format;
ds->value_format = btree->value_format;
return (0);

err: WT_TRET(__debug_wrapup(ds));
return (ret);
}

/*
Expand Down Expand Up @@ -366,25 +370,18 @@ int
__wt_debug_offset_blind(
WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile)
{
WT_DECL_ITEM(buf);
WT_DECL_RET;
uint32_t checksum, size;

WT_ASSERT(session, S2BT_SAFE(session) != NULL);

/*
* This routine depends on the default block manager's view of files,
* where an address consists of a file offset, length, and checksum.
* This is for debugging only. Other block managers might not see a
* file or address the same way, that's why there's no block manager
* method.
* This is for debugging only.
*/
WT_RET(__wt_scr_alloc(session, 1024, &buf));
WT_ERR(__wt_block_read_off_blind(
session, S2BT(session)->bm->block, buf, offset));
ret = __wt_debug_disk(session, buf->mem, ofile);

err: __wt_scr_free(session, &buf);
return (ret);
WT_RET(__wt_block_read_off_blind(
session, S2BT(session)->bm->block, offset, &size, &checksum));
return (__wt_debug_offset(session, offset, size, checksum, ofile));
}

/*
Expand Down
9 changes: 4 additions & 5 deletions src/third_party/wiredtiger/src/btree/bt_delete.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,13 +357,12 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
/*
* Give the page a modify structure.
*
* If the tree is already dirty and so will be written, mark the page
* dirty. (We'd like to free the deleted pages, but if the handle is
* read-only or if the application never modifies the tree, we're not
* able to do so.)
* Mark tree dirty, unless the handle is read-only.
* (We'd like to free the deleted pages, but if the handle is read-only,
* we're not able to do so.)
*/
WT_RET(__wt_page_modify_init(session, page));
if (btree->modified)
if (!F_ISSET(btree, WT_BTREE_READONLY))
__wt_page_modify_set(session, page);

if (ref->page_del != NULL &&
Expand Down
Loading

0 comments on commit 311f3ca

Please sign in to comment.