forked from facebook/rocksdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
db_bench_tool.cc
8179 lines (7371 loc) · 296 KB
/
db_bench_tool.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifdef GFLAGS
#ifdef NUMA
#include <numa.h>
#endif
#ifndef OS_WIN
#include <unistd.h>
#endif
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef __APPLE__
#include <mach/host_info.h>
#include <mach/mach_host.h>
#include <sys/sysctl.h>
#endif
#ifdef __FreeBSD__
#include <sys/sysctl.h>
#endif
#include <atomic>
#include <cinttypes>
#include <condition_variable>
#include <cstddef>
#include <memory>
#include <mutex>
#include <queue>
#include <thread>
#include <unordered_map>
#include "db/db_impl/db_impl.h"
#include "db/malloc_stats.h"
#include "db/version_set.h"
#include "hdfs/env_hdfs.h"
#include "monitoring/histogram.h"
#include "monitoring/statistics.h"
#include "options/cf_options.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/options.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/persistent_cache.h"
#include "rocksdb/rate_limiter.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/stats_history.h"
#include "rocksdb/utilities/object_registry.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"
#include "rocksdb/utilities/options_type.h"
#include "rocksdb/utilities/options_util.h"
#ifndef ROCKSDB_LITE
#include "rocksdb/utilities/replayer.h"
#endif // ROCKSDB_LITE
#include "rocksdb/utilities/sim_cache.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/write_batch.h"
#include "test_util/testutil.h"
#include "test_util/transaction_test_util.h"
#include "tools/simulated_hybrid_file_system.h"
#include "util/cast_util.h"
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/gflags_compat.h"
#include "util/mutexlock.h"
#include "util/random.h"
#include "util/stderr_logger.h"
#include "util/string_util.h"
#include "util/xxhash.h"
#include "utilities/blob_db/blob_db.h"
#include "utilities/merge_operators.h"
#include "utilities/merge_operators/bytesxor.h"
#include "utilities/merge_operators/sortlist.h"
#include "utilities/persistent_cache/block_cache_tier.h"
#ifdef MEMKIND
#include "memory/memkind_kmem_allocator.h"
#endif
#ifdef OS_WIN
#include <io.h> // open/close
#endif
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
using GFLAGS_NAMESPACE::RegisterFlagValidator;
using GFLAGS_NAMESPACE::SetUsageMessage;
#ifdef ROCKSDB_LITE
#define IF_ROCKSDB_LITE(Then, Else) Then
#else
#define IF_ROCKSDB_LITE(Then, Else) Else
#endif
DEFINE_string(
benchmarks,
"fillseq,"
"fillseqdeterministic,"
"fillsync,"
"fillrandom,"
"filluniquerandomdeterministic,"
"overwrite,"
"readrandom,"
"newiterator,"
"newiteratorwhilewriting,"
"seekrandom,"
"seekrandomwhilewriting,"
"seekrandomwhilemerging,"
"readseq,"
"readreverse,"
"compact,"
"compactall,"
"flush,"
IF_ROCKSDB_LITE("",
"compact0,"
"compact1,"
"waitforcompaction,"
)
"multireadrandom,"
"mixgraph,"
"readseq,"
"readtorowcache,"
"readtocache,"
"readreverse,"
"readwhilewriting,"
"readwhilemerging,"
"readwhilescanning,"
"readrandomwriterandom,"
"updaterandom,"
"xorupdaterandom,"
"approximatesizerandom,"
"randomwithverify,"
"fill100K,"
"crc32c,"
"xxhash,"
"compress,"
"uncompress,"
"acquireload,"
"fillseekseq,"
"randomtransaction,"
"randomreplacekeys,"
"timeseries,"
"getmergeoperands",
"Comma-separated list of operations to run in the specified"
" order. Available benchmarks:\n"
"\tfillseq -- write N values in sequential key"
" order in async mode\n"
"\tfillseqdeterministic -- write N values in the specified"
" key order and keep the shape of the LSM tree\n"
"\tfillrandom -- write N values in random key order in async"
" mode\n"
"\tfilluniquerandomdeterministic -- write N values in a random"
" key order and keep the shape of the LSM tree\n"
"\toverwrite -- overwrite N values in random key order in"
" async mode\n"
"\tfillsync -- write N/1000 values in random key order in "
"sync mode\n"
"\tfill100K -- write N/1000 100K values in random order in"
" async mode\n"
"\tdeleteseq -- delete N keys in sequential order\n"
"\tdeleterandom -- delete N keys in random order\n"
"\treadseq -- read N times sequentially\n"
"\treadtocache -- 1 thread reading database sequentially\n"
"\treadreverse -- read N times in reverse order\n"
"\treadrandom -- read N times in random order\n"
"\treadmissing -- read N missing keys in random order\n"
"\treadwhilewriting -- 1 writer, N threads doing random "
"reads\n"
"\treadwhilemerging -- 1 merger, N threads doing random "
"reads\n"
"\treadwhilescanning -- 1 thread doing full table scan, "
"N threads doing random reads\n"
"\treadrandomwriterandom -- N threads doing random-read, "
"random-write\n"
"\tupdaterandom -- N threads doing read-modify-write for random "
"keys\n"
"\txorupdaterandom -- N threads doing read-XOR-write for "
"random keys\n"
"\tappendrandom -- N threads doing read-modify-write with "
"growing values\n"
"\tmergerandom -- same as updaterandom/appendrandom using merge"
" operator. "
"Must be used with merge_operator\n"
"\treadrandommergerandom -- perform N random read-or-merge "
"operations. Must be used with merge_operator\n"
"\tnewiterator -- repeated iterator creation\n"
"\tseekrandom -- N random seeks, call Next seek_nexts times "
"per seek\n"
"\tseekrandomwhilewriting -- seekrandom and 1 thread doing "
"overwrite\n"
"\tseekrandomwhilemerging -- seekrandom and 1 thread doing "
"merge\n"
"\tcrc32c -- repeated crc32c of 4K of data\n"
"\txxhash -- repeated xxHash of 4K of data\n"
"\tacquireload -- load N*1000 times\n"
"\tfillseekseq -- write N values in sequential key, then read "
"them by seeking to each key\n"
"\trandomtransaction -- execute N random transactions and "
"verify correctness\n"
"\trandomreplacekeys -- randomly replaces N keys by deleting "
"the old version and putting the new version\n\n"
"\ttimeseries -- 1 writer generates time series data "
"and multiple readers doing random reads on id\n\n"
"Meta operations:\n"
"\tcompact -- Compact the entire DB; If multiple, randomly choose one\n"
"\tcompactall -- Compact the entire DB\n"
IF_ROCKSDB_LITE("",
"\tcompact0 -- compact L0 into L1\n"
"\tcompact1 -- compact L1 into L2\n"
"\twaitforcompaction - pause until compaction is (probably) done\n"
)
"\tflush - flush the memtable\n"
"\tstats -- Print DB stats\n"
"\tresetstats -- Reset DB stats\n"
"\tlevelstats -- Print the number of files and bytes per level\n"
"\tmemstats -- Print memtable stats\n"
"\tsstables -- Print sstable info\n"
"\theapprofile -- Dump a heap profile (if supported by this port)\n"
#ifndef ROCKSDB_LITE
"\treplay -- replay the trace file specified with trace_file\n"
#endif // ROCKSDB_LITE
"\tgetmergeoperands -- Insert lots of merge records which are a list of "
"sorted ints for a key and then compare performance of lookup for another "
"key "
"by doing a Get followed by binary searching in the large sorted list vs "
"doing a GetMergeOperands and binary searching in the operands which are"
"sorted sub-lists. The MergeOperator used is sortlist.h\n");
DEFINE_int64(num, 1000000, "Number of key/values to place in database");
DEFINE_int64(numdistinct, 1000,
"Number of distinct keys to use. Used in RandomWithVerify to "
"read/write on fewer keys so that gets are more likely to find the"
" key and puts are more likely to update the same key");
DEFINE_int64(merge_keys, -1,
"Number of distinct keys to use for MergeRandom and "
"ReadRandomMergeRandom. "
"If negative, there will be FLAGS_num keys.");
DEFINE_int32(num_column_families, 1, "Number of Column Families to use.");
DEFINE_int32(
num_hot_column_families, 0,
"Number of Hot Column Families. If more than 0, only write to this "
"number of column families. After finishing all the writes to them, "
"create new set of column families and insert to them. Only used "
"when num_column_families > 1.");
DEFINE_string(column_family_distribution, "",
"Comma-separated list of percentages, where the ith element "
"indicates the probability of an op using the ith column family. "
"The number of elements must be `num_hot_column_families` if "
"specified; otherwise, it must be `num_column_families`. The "
"sum of elements must be 100. E.g., if `num_column_families=4`, "
"and `num_hot_column_families=0`, a valid list could be "
"\"10,20,30,40\".");
DEFINE_int64(reads, -1, "Number of read operations to do. "
"If negative, do FLAGS_num reads.");
DEFINE_int64(deletes, -1, "Number of delete operations to do. "
"If negative, do FLAGS_num deletions.");
DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality");
DEFINE_int64(seed, 0, "Seed base for random number generators. "
"When 0 it is deterministic.");
DEFINE_int32(threads, 1, "Number of concurrent threads to run.");
DEFINE_int32(duration, 0, "Time in seconds for the random-ops tests to run."
" When 0 then num & reads determine the test duration");
DEFINE_string(value_size_distribution_type, "fixed",
"Value size distribution type: fixed, uniform, normal");
DEFINE_int32(value_size, 100, "Size of each value in fixed distribution");
static unsigned int value_size = 100;
DEFINE_int32(value_size_min, 100, "Min size of random value");
DEFINE_int32(value_size_max, 102400, "Max size of random value");
DEFINE_int32(seek_nexts, 0,
"How many times to call Next() after Seek() in "
"fillseekseq, seekrandom, seekrandomwhilewriting and "
"seekrandomwhilemerging");
DEFINE_bool(reverse_iterator, false,
"When true use Prev rather than Next for iterators that do "
"Seek and then Next");
DEFINE_int64(max_scan_distance, 0,
"Used to define iterate_upper_bound (or iterate_lower_bound "
"if FLAGS_reverse_iterator is set to true) when value is nonzero");
DEFINE_bool(use_uint64_comparator, false, "use Uint64 user comparator");
DEFINE_int64(batch_size, 1, "Batch size");
static bool ValidateKeySize(const char* /*flagname*/, int32_t /*value*/) {
return true;
}
static bool ValidateUint32Range(const char* flagname, uint64_t value) {
if (value > std::numeric_limits<uint32_t>::max()) {
fprintf(stderr, "Invalid value for --%s: %lu, overflow\n", flagname,
(unsigned long)value);
return false;
}
return true;
}
DEFINE_int32(key_size, 16, "size of each key");
DEFINE_int32(user_timestamp_size, 0,
"number of bytes in a user-defined timestamp");
DEFINE_int32(num_multi_db, 0,
"Number of DBs used in the benchmark. 0 means single DB.");
DEFINE_double(compression_ratio, 0.5, "Arrange to generate values that shrink"
" to this fraction of their original size after compression");
DEFINE_double(
overwrite_probability, 0.0,
"Used in 'filluniquerandom' benchmark: for each write operation, "
"we give a probability to perform an overwrite instead. The key used for "
"the overwrite is randomly chosen from the last 'overwrite_window_size' "
"keys "
"previously inserted into the DB. "
"Valid overwrite_probability values: [0.0, 1.0].");
DEFINE_uint32(overwrite_window_size, 1,
"Used in 'filluniquerandom' benchmark. For each write "
"operation, when "
"the overwrite_probability flag is set by the user, the key used "
"to perform "
"an overwrite is randomly chosen from the last "
"'overwrite_window_size' keys "
"previously inserted into the DB. "
"Warning: large values can affect throughput. "
"Valid overwrite_window_size values: [1, kMaxUint32].");
DEFINE_uint64(
disposable_entries_delete_delay, 0,
"Minimum delay in microseconds for the series of Deletes "
"to be issued. When 0 the insertion of the last disposable entry is "
"immediately followed by the issuance of the Deletes. "
"(only compatible with fillanddeleteuniquerandom benchmark).");
DEFINE_uint64(disposable_entries_batch_size, 0,
"Number of consecutively inserted disposable KV entries "
"that will be deleted after 'delete_delay' microseconds. "
"A series of Deletes is always issued once all the "
"disposable KV entries it targets have been inserted "
"into the DB. When 0 no deletes are issued and a "
"regular 'filluniquerandom' benchmark occurs. "
"(only compatible with fillanddeleteuniquerandom benchmark)");
DEFINE_int32(disposable_entries_value_size, 64,
"Size of the values (in bytes) of the entries targeted by "
"selective deletes. "
"(only compatible with fillanddeleteuniquerandom benchmark)");
DEFINE_uint64(
persistent_entries_batch_size, 0,
"Number of KV entries being inserted right before the deletes "
"targeting the disposable KV entries are issued. These "
"persistent keys are not targeted by the deletes, and will always "
"remain valid in the DB. (only compatible with "
"--benchmarks='fillanddeleteuniquerandom' "
"and used when--disposable_entries_batch_size is > 0).");
DEFINE_int32(persistent_entries_value_size, 64,
"Size of the values (in bytes) of the entries not targeted by "
"deletes. (only compatible with "
"--benchmarks='fillanddeleteuniquerandom' "
"and used when--disposable_entries_batch_size is > 0).");
DEFINE_double(read_random_exp_range, 0.0,
"Read random's key will be generated using distribution of "
"num * exp(-r) where r is uniform number from 0 to this value. "
"The larger the number is, the more skewed the reads are. "
"Only used in readrandom and multireadrandom benchmarks.");
DEFINE_bool(histogram, false, "Print histogram of operation timings");
DEFINE_bool(enable_numa, false,
"Make operations aware of NUMA architecture and bind memory "
"and cpus corresponding to nodes together. In NUMA, memory "
"in same node as CPUs are closer when compared to memory in "
"other nodes. Reads can be faster when the process is bound to "
"CPU and memory of same node. Use \"$numactl --hardware\" command "
"to see NUMA memory architecture.");
DEFINE_int64(db_write_buffer_size,
ROCKSDB_NAMESPACE::Options().db_write_buffer_size,
"Number of bytes to buffer in all memtables before compacting");
DEFINE_bool(cost_write_buffer_to_cache, false,
"The usage of memtable is costed to the block cache");
DEFINE_int64(arena_block_size, ROCKSDB_NAMESPACE::Options().arena_block_size,
"The size, in bytes, of one block in arena memory allocation.");
DEFINE_int64(write_buffer_size, ROCKSDB_NAMESPACE::Options().write_buffer_size,
"Number of bytes to buffer in memtable before compacting");
DEFINE_int32(max_write_buffer_number,
ROCKSDB_NAMESPACE::Options().max_write_buffer_number,
"The number of in-memory memtables. Each memtable is of size"
" write_buffer_size bytes.");
DEFINE_int32(min_write_buffer_number_to_merge,
ROCKSDB_NAMESPACE::Options().min_write_buffer_number_to_merge,
"The minimum number of write buffers that will be merged together"
"before writing to storage. This is cheap because it is an"
"in-memory merge. If this feature is not enabled, then all these"
"write buffers are flushed to L0 as separate files and this "
"increases read amplification because a get request has to check"
" in all of these files. Also, an in-memory merge may result in"
" writing less data to storage if there are duplicate records "
" in each of these individual write buffers.");
DEFINE_int32(max_write_buffer_number_to_maintain,
ROCKSDB_NAMESPACE::Options().max_write_buffer_number_to_maintain,
"The total maximum number of write buffers to maintain in memory "
"including copies of buffers that have already been flushed. "
"Unlike max_write_buffer_number, this parameter does not affect "
"flushing. This controls the minimum amount of write history "
"that will be available in memory for conflict checking when "
"Transactions are used. If this value is too low, some "
"transactions may fail at commit time due to not being able to "
"determine whether there were any write conflicts. Setting this "
"value to 0 will cause write buffers to be freed immediately "
"after they are flushed. If this value is set to -1, "
"'max_write_buffer_number' will be used.");
DEFINE_int64(max_write_buffer_size_to_maintain,
ROCKSDB_NAMESPACE::Options().max_write_buffer_size_to_maintain,
"The total maximum size of write buffers to maintain in memory "
"including copies of buffers that have already been flushed. "
"Unlike max_write_buffer_number, this parameter does not affect "
"flushing. This controls the minimum amount of write history "
"that will be available in memory for conflict checking when "
"Transactions are used. If this value is too low, some "
"transactions may fail at commit time due to not being able to "
"determine whether there were any write conflicts. Setting this "
"value to 0 will cause write buffers to be freed immediately "
"after they are flushed. If this value is set to -1, "
"'max_write_buffer_number' will be used.");
DEFINE_int32(max_background_jobs,
ROCKSDB_NAMESPACE::Options().max_background_jobs,
"The maximum number of concurrent background jobs that can occur "
"in parallel.");
DEFINE_int32(num_bottom_pri_threads, 0,
"The number of threads in the bottom-priority thread pool (used "
"by universal compaction only).");
DEFINE_int32(num_high_pri_threads, 0,
"The maximum number of concurrent background compactions"
" that can occur in parallel.");
DEFINE_int32(num_low_pri_threads, 0,
"The maximum number of concurrent background compactions"
" that can occur in parallel.");
DEFINE_int32(max_background_compactions,
ROCKSDB_NAMESPACE::Options().max_background_compactions,
"The maximum number of concurrent background compactions"
" that can occur in parallel.");
DEFINE_int32(base_background_compactions, -1, "DEPRECATED");
DEFINE_uint64(subcompactions, 1,
"Maximum number of subcompactions to divide L0-L1 compactions "
"into.");
static const bool FLAGS_subcompactions_dummy
__attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_subcompactions,
&ValidateUint32Range);
DEFINE_int32(max_background_flushes,
ROCKSDB_NAMESPACE::Options().max_background_flushes,
"The maximum number of concurrent background flushes"
" that can occur in parallel.");
static ROCKSDB_NAMESPACE::CompactionStyle FLAGS_compaction_style_e;
DEFINE_int32(compaction_style,
(int32_t)ROCKSDB_NAMESPACE::Options().compaction_style,
"style of compaction: level-based, universal and fifo");
static ROCKSDB_NAMESPACE::CompactionPri FLAGS_compaction_pri_e;
DEFINE_int32(compaction_pri,
(int32_t)ROCKSDB_NAMESPACE::Options().compaction_pri,
"priority of files to compaction: by size or by data age");
DEFINE_int32(universal_size_ratio, 0,
"Percentage flexibility while comparing file size"
" (for universal compaction only).");
DEFINE_int32(universal_min_merge_width, 0, "The minimum number of files in a"
" single compaction run (for universal compaction only).");
DEFINE_int32(universal_max_merge_width, 0, "The max number of files to compact"
" in universal style compaction");
DEFINE_int32(universal_max_size_amplification_percent, 0,
"The max size amplification for universal style compaction");
DEFINE_int32(universal_compression_size_percent, -1,
"The percentage of the database to compress for universal "
"compaction. -1 means compress everything.");
DEFINE_bool(universal_allow_trivial_move, false,
"Allow trivial move in universal compaction.");
DEFINE_int64(cache_size, 8 << 20, // 8MB
"Number of bytes to use as a cache of uncompressed data");
DEFINE_int32(cache_numshardbits, 6,
"Number of shards for the block cache"
" is 2 ** cache_numshardbits. Negative means use default settings."
" This is applied only if FLAGS_cache_size is non-negative.");
DEFINE_double(cache_high_pri_pool_ratio, 0.0,
"Ratio of block cache reserve for high pri blocks. "
"If > 0.0, we also enable "
"cache_index_and_filter_blocks_with_high_priority.");
DEFINE_bool(use_clock_cache, false,
"Replace default LRU block cache with clock cache.");
DEFINE_int64(simcache_size, -1,
"Number of bytes to use as a simcache of "
"uncompressed data. Nagative value disables simcache.");
DEFINE_bool(cache_index_and_filter_blocks, false,
"Cache index/filter blocks in block cache.");
DEFINE_bool(use_cache_memkind_kmem_allocator, false,
"Use memkind kmem allocator for block cache.");
DEFINE_bool(partition_index_and_filters, false,
"Partition index and filter blocks.");
DEFINE_bool(partition_index, false, "Partition index blocks");
DEFINE_bool(index_with_first_key, false, "Include first key in the index");
DEFINE_bool(
optimize_filters_for_memory,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().optimize_filters_for_memory,
"Minimize memory footprint of filters");
DEFINE_int64(
index_shortening_mode, 2,
"mode to shorten index: 0 for no shortening; 1 for only shortening "
"separaters; 2 for shortening shortening and successor");
DEFINE_int64(metadata_block_size,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().metadata_block_size,
"Max partition size when partitioning index/filters");
// The default reduces the overhead of reading time with flash. With HDD, which
// offers much less throughput, however, this number better to be set to 1.
DEFINE_int32(ops_between_duration_checks, 1000,
"Check duration limit every x ops");
DEFINE_bool(pin_l0_filter_and_index_blocks_in_cache, false,
"Pin index/filter blocks of L0 files in block cache.");
DEFINE_bool(
pin_top_level_index_and_filter, false,
"Pin top-level index of partitioned index/filter blocks in block cache.");
DEFINE_int32(block_size,
static_cast<int32_t>(
ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_size),
"Number of bytes in a block.");
DEFINE_int32(format_version,
static_cast<int32_t>(
ROCKSDB_NAMESPACE::BlockBasedTableOptions().format_version),
"Format version of SST files.");
DEFINE_int32(block_restart_interval,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_restart_interval,
"Number of keys between restart points "
"for delta encoding of keys in data block.");
DEFINE_int32(
index_block_restart_interval,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().index_block_restart_interval,
"Number of keys between restart points "
"for delta encoding of keys in index block.");
DEFINE_int32(read_amp_bytes_per_bit,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().read_amp_bytes_per_bit,
"Number of bytes per bit to be used in block read-amp bitmap");
DEFINE_bool(
enable_index_compression,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().enable_index_compression,
"Compress the index block");
DEFINE_bool(block_align,
ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_align,
"Align data blocks on page size");
DEFINE_int64(prepopulate_block_cache, 0,
"Pre-populate hot/warm blocks in block cache. 0 to disable and 1 "
"to insert during flush");
DEFINE_bool(use_data_block_hash_index, false,
"if use kDataBlockBinaryAndHash "
"instead of kDataBlockBinarySearch. "
"This is valid if only we use BlockTable");
DEFINE_double(data_block_hash_table_util_ratio, 0.75,
"util ratio for data block hash index table. "
"This is only valid if use_data_block_hash_index is "
"set to true");
DEFINE_int64(compressed_cache_size, -1,
"Number of bytes to use as a cache of compressed data.");
DEFINE_int64(row_cache_size, 0,
"Number of bytes to use as a cache of individual rows"
" (0 = disabled).");
DEFINE_int32(open_files, ROCKSDB_NAMESPACE::Options().max_open_files,
"Maximum number of files to keep open at the same time"
" (use default if == 0)");
DEFINE_int32(file_opening_threads,
ROCKSDB_NAMESPACE::Options().max_file_opening_threads,
"If open_files is set to -1, this option set the number of "
"threads that will be used to open files during DB::Open()");
DEFINE_bool(new_table_reader_for_compaction_inputs, true,
"If true, uses a separate file handle for compaction inputs");
DEFINE_int32(compaction_readahead_size, 0, "Compaction readahead size");
DEFINE_int32(log_readahead_size, 0, "WAL and manifest readahead size");
DEFINE_int32(random_access_max_buffer_size, 1024 * 1024,
"Maximum windows randomaccess buffer size");
DEFINE_int32(writable_file_max_buffer_size, 1024 * 1024,
"Maximum write buffer for Writable File");
DEFINE_int32(bloom_bits, -1,
"Bloom filter bits per key. Negative means use default."
"Zero disables.");
DEFINE_bool(use_ribbon_filter, false, "Use Ribbon instead of Bloom filter");
DEFINE_double(memtable_bloom_size_ratio, 0,
"Ratio of memtable size used for bloom filter. 0 means no bloom "
"filter.");
DEFINE_bool(memtable_whole_key_filtering, false,
"Try to use whole key bloom filter in memtables.");
DEFINE_bool(memtable_use_huge_page, false,
"Try to use huge page in memtables.");
DEFINE_bool(use_existing_db, false, "If true, do not destroy the existing"
" database. If you set this flag and also specify a benchmark that"
" wants a fresh database, that benchmark will fail.");
DEFINE_bool(use_existing_keys, false,
"If true, uses existing keys in the DB, "
"rather than generating new ones. This involves some startup "
"latency to load all keys into memory. It is supported for the "
"same read/overwrite benchmarks as `-use_existing_db=true`, which "
"must also be set for this flag to be enabled. When this flag is "
"set, the value for `-num` will be ignored.");
DEFINE_bool(show_table_properties, false,
"If true, then per-level table"
" properties will be printed on every stats-interval when"
" stats_interval is set and stats_per_interval is on.");
DEFINE_string(db, "", "Use the db with the following name.");
// Read cache flags
DEFINE_string(read_cache_path, "",
"If not empty string, a read cache will be used in this path");
DEFINE_int64(read_cache_size, 4LL * 1024 * 1024 * 1024,
"Maximum size of the read cache");
DEFINE_bool(read_cache_direct_write, true,
"Whether to use Direct IO for writing to the read cache");
DEFINE_bool(read_cache_direct_read, true,
"Whether to use Direct IO for reading from read cache");
DEFINE_bool(use_keep_filter, false, "Whether to use a noop compaction filter");
static bool ValidateCacheNumshardbits(const char* flagname, int32_t value) {
if (value >= 20) {
fprintf(stderr, "Invalid value for --%s: %d, must be < 20\n",
flagname, value);
return false;
}
return true;
}
DEFINE_bool(verify_checksum, true,
"Verify checksum for every block read"
" from storage");
DEFINE_bool(statistics, false, "Database statistics");
DEFINE_int32(stats_level, ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers,
"stats level for statistics");
DEFINE_string(statistics_string, "", "Serialized statistics string");
static class std::shared_ptr<ROCKSDB_NAMESPACE::Statistics> dbstats;
DEFINE_int64(writes, -1, "Number of write operations to do. If negative, do"
" --num reads.");
DEFINE_bool(finish_after_writes, false, "Write thread terminates after all writes are finished");
DEFINE_bool(sync, false, "Sync all writes to disk");
DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync");
DEFINE_bool(disable_wal, false, "If true, do not write WAL for write.");
DEFINE_string(wal_dir, "", "If not empty, use the given dir for WAL");
DEFINE_string(truth_db, "/dev/shm/truth_db/dbbench",
"Truth key/values used when using verify");
DEFINE_int32(num_levels, 7, "The total number of levels");
DEFINE_int64(target_file_size_base,
ROCKSDB_NAMESPACE::Options().target_file_size_base,
"Target file size at level-1");
DEFINE_int32(target_file_size_multiplier,
ROCKSDB_NAMESPACE::Options().target_file_size_multiplier,
"A multiplier to compute target level-N file size (N >= 2)");
DEFINE_uint64(max_bytes_for_level_base,
ROCKSDB_NAMESPACE::Options().max_bytes_for_level_base,
"Max bytes for level-1");
DEFINE_bool(level_compaction_dynamic_level_bytes, false,
"Whether level size base is dynamic");
DEFINE_double(max_bytes_for_level_multiplier, 10,
"A multiplier to compute max bytes for level-N (N >= 2)");
static std::vector<int> FLAGS_max_bytes_for_level_multiplier_additional_v;
DEFINE_string(max_bytes_for_level_multiplier_additional, "",
"A vector that specifies additional fanout per level");
DEFINE_int32(level0_stop_writes_trigger,
ROCKSDB_NAMESPACE::Options().level0_stop_writes_trigger,
"Number of files in level-0"
" that will trigger put stop.");
DEFINE_int32(level0_slowdown_writes_trigger,
ROCKSDB_NAMESPACE::Options().level0_slowdown_writes_trigger,
"Number of files in level-0"
" that will slow down writes.");
DEFINE_int32(level0_file_num_compaction_trigger,
ROCKSDB_NAMESPACE::Options().level0_file_num_compaction_trigger,
"Number of files in level-0"
" when compactions start");
DEFINE_uint64(periodic_compaction_seconds,
ROCKSDB_NAMESPACE::Options().periodic_compaction_seconds,
"Files older than this will be picked up for compaction and"
" rewritten to the same level");
static bool ValidateInt32Percent(const char* flagname, int32_t value) {
if (value <= 0 || value>=100) {
fprintf(stderr, "Invalid value for --%s: %d, 0< pct <100 \n",
flagname, value);
return false;
}
return true;
}
DEFINE_int32(readwritepercent, 90, "Ratio of reads to reads/writes (expressed"
" as percentage) for the ReadRandomWriteRandom workload. The "
"default value 90 means 90% operations out of all reads and writes"
" operations are reads. In other words, 9 gets for every 1 put.");
DEFINE_int32(mergereadpercent, 70, "Ratio of merges to merges&reads (expressed"
" as percentage) for the ReadRandomMergeRandom workload. The"
" default value 70 means 70% out of all read and merge operations"
" are merges. In other words, 7 merges for every 3 gets.");
DEFINE_int32(deletepercent, 2, "Percentage of deletes out of reads/writes/"
"deletes (used in RandomWithVerify only). RandomWithVerify "
"calculates writepercent as (100 - FLAGS_readwritepercent - "
"deletepercent), so deletepercent must be smaller than (100 - "
"FLAGS_readwritepercent)");
DEFINE_bool(optimize_filters_for_hits, false,
"Optimizes bloom filters for workloads for most lookups return "
"a value. For now this doesn't create bloom filters for the max "
"level of the LSM to reduce metadata that should fit in RAM. ");
DEFINE_uint64(delete_obsolete_files_period_micros, 0,
"Ignored. Left here for backward compatibility");
DEFINE_int64(writes_before_delete_range, 0,
"Number of writes before DeleteRange is called regularly.");
DEFINE_int64(writes_per_range_tombstone, 0,
"Number of writes between range tombstones");
DEFINE_int64(range_tombstone_width, 100, "Number of keys in tombstone's range");
DEFINE_int64(max_num_range_tombstones, 0,
"Maximum number of range tombstones "
"to insert.");
DEFINE_bool(expand_range_tombstones, false,
"Expand range tombstone into sequential regular tombstones.");
#ifndef ROCKSDB_LITE
// Transactions Options
DEFINE_bool(optimistic_transaction_db, false,
"Open a OptimisticTransactionDB instance. "
"Required for randomtransaction benchmark.");
DEFINE_bool(transaction_db, false,
"Open a TransactionDB instance. "
"Required for randomtransaction benchmark.");
DEFINE_uint64(transaction_sets, 2,
"Number of keys each transaction will "
"modify (use in RandomTransaction only). Max: 9999");
DEFINE_bool(transaction_set_snapshot, false,
"Setting to true will have each transaction call SetSnapshot()"
" upon creation.");
DEFINE_int32(transaction_sleep, 0,
"Max microseconds to sleep in between "
"reading and writing a value (used in RandomTransaction only). ");
DEFINE_uint64(transaction_lock_timeout, 100,
"If using a transaction_db, specifies the lock wait timeout in"
" milliseconds before failing a transaction waiting on a lock");
DEFINE_string(
options_file, "",
"The path to a RocksDB options file. If specified, then db_bench will "
"run with the RocksDB options in the default column family of the "
"specified options file. "
"Note that with this setting, db_bench will ONLY accept the following "
"RocksDB options related command-line arguments, all other arguments "
"that are related to RocksDB options will be ignored:\n"
"\t--use_existing_db\n"
"\t--use_existing_keys\n"
"\t--statistics\n"
"\t--row_cache_size\n"
"\t--row_cache_numshardbits\n"
"\t--enable_io_prio\n"
"\t--dump_malloc_stats\n"
"\t--num_multi_db\n");
// FIFO Compaction Options
DEFINE_uint64(fifo_compaction_max_table_files_size_mb, 0,
"The limit of total table file sizes to trigger FIFO compaction");
DEFINE_bool(fifo_compaction_allow_compaction, true,
"Allow compaction in FIFO compaction.");
DEFINE_uint64(fifo_compaction_ttl, 0, "TTL for the SST Files in seconds.");
DEFINE_uint64(fifo_age_for_warm, 0, "age_for_warm for FIFO compaction.");
// Stacked BlobDB Options
DEFINE_bool(use_blob_db, false, "[Stacked BlobDB] Open a BlobDB instance.");
DEFINE_bool(
blob_db_enable_gc,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().enable_garbage_collection,
"[Stacked BlobDB] Enable BlobDB garbage collection.");
DEFINE_double(
blob_db_gc_cutoff,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().garbage_collection_cutoff,
"[Stacked BlobDB] Cutoff ratio for BlobDB garbage collection.");
DEFINE_bool(blob_db_is_fifo,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().is_fifo,
"[Stacked BlobDB] Enable FIFO eviction strategy in BlobDB.");
DEFINE_uint64(blob_db_max_db_size,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().max_db_size,
"[Stacked BlobDB] Max size limit of the directory where blob "
"files are stored.");
DEFINE_uint64(blob_db_max_ttl_range, 0,
"[Stacked BlobDB] TTL range to generate BlobDB data (in "
"seconds). 0 means no TTL.");
DEFINE_uint64(
blob_db_ttl_range_secs,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().ttl_range_secs,
"[Stacked BlobDB] TTL bucket size to use when creating blob files.");
DEFINE_uint64(
blob_db_min_blob_size,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().min_blob_size,
"[Stacked BlobDB] Smallest blob to store in a file. Blobs "
"smaller than this will be inlined with the key in the LSM tree.");
DEFINE_uint64(blob_db_bytes_per_sync,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().bytes_per_sync,
"[Stacked BlobDB] Bytes to sync blob file at.");
DEFINE_uint64(blob_db_file_size,
ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().blob_file_size,
"[Stacked BlobDB] Target size of each blob file.");
DEFINE_string(
blob_db_compression_type, "snappy",
"[Stacked BlobDB] Algorithm to use to compress blobs in blob files.");
static enum ROCKSDB_NAMESPACE::CompressionType
FLAGS_blob_db_compression_type_e = ROCKSDB_NAMESPACE::kSnappyCompression;
#endif // ROCKSDB_LITE
// Integrated BlobDB options
DEFINE_bool(
enable_blob_files,
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().enable_blob_files,
"[Integrated BlobDB] Enable writing large values to separate blob files.");
DEFINE_uint64(min_blob_size,
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().min_blob_size,
"[Integrated BlobDB] The size of the smallest value to be stored "
"separately in a blob file.");
DEFINE_uint64(blob_file_size,
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions().blob_file_size,
"[Integrated BlobDB] The size limit for blob files.");
DEFINE_string(blob_compression_type, "none",
"[Integrated BlobDB] The compression algorithm to use for large "
"values stored in blob files.");
DEFINE_bool(enable_blob_garbage_collection,
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
.enable_blob_garbage_collection,
"[Integrated BlobDB] Enable blob garbage collection.");
DEFINE_double(blob_garbage_collection_age_cutoff,
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
.blob_garbage_collection_age_cutoff,
"[Integrated BlobDB] The cutoff in terms of blob file age for "
"garbage collection.");
DEFINE_double(blob_garbage_collection_force_threshold,
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
.blob_garbage_collection_force_threshold,
"[Integrated BlobDB] The threshold for the ratio of garbage in "
"the oldest blob files for forcing garbage collection.");
#ifndef ROCKSDB_LITE
// Secondary DB instance Options
DEFINE_bool(use_secondary_db, false,
"Open a RocksDB secondary instance. A primary instance can be "
"running in another db_bench process.");
DEFINE_string(secondary_path, "",
"Path to a directory used by the secondary instance to store "
"private files, e.g. info log.");
DEFINE_int32(secondary_update_interval, 5,
"Secondary instance attempts to catch up with the primary every "