Skip to content

Commit

Permalink
[scratchpad] new_node_hashes_since()
Browse files Browse the repository at this point in the history
Get newly created node hashes by traversing the diff on top of a base
generation, instead of looking up the tree by updated leaves. This is
more efficient.

Closes: aptos-labs#845
  • Loading branch information
msmouse authored and aptos-bot committed May 8, 2022
1 parent 9d3010f commit 4310729
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 101 deletions.
7 changes: 3 additions & 4 deletions storage/scratchpad/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ edition = "2018"

[dependencies]
arc-swap = "1.2.0"
bitvec = {version = "0.19.4", optional = true}
bitvec = { version = "0.19.4" }
itertools = "0.10.0"
once_cell = "1.7.2"
proptest = { version = "1.0.0", optional = true }
Expand All @@ -25,7 +25,6 @@ aptos-workspace-hack = { version = "0.1", path = "../../crates/aptos-workspace-h


[dev-dependencies]
bitvec = "0.19.4"
criterion = "0.3.4"
rand = "0.8.3"
once_cell = "1.7.2"
Expand All @@ -35,8 +34,8 @@ aptos-types = { path = "../../types", features = ["fuzzing"] }
storage-interface = { path = "../storage-interface" }

[features]
fuzzing = ["bitvec", "aptos-types/fuzzing", "proptest"]
bench = ["bitvec", "proptest"]
fuzzing = ["aptos-types/fuzzing", "proptest"]
bench = ["proptest"]

[[bench]]
name = "sparse_merkle"
Expand Down
181 changes: 84 additions & 97 deletions storage/scratchpad/src/sparse_merkle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,21 @@ use crate::sparse_merkle::{
metrics::{LATEST_GENERATION, OLDEST_GENERATION, TIMER},
node::{NodeInner, SubTree},
updater::SubTreeUpdater,
utils::partition,
};
use aptos_crypto::{
hash::{CryptoHash, SPARSE_MERKLE_PLACEHOLDER_HASH},
HashValue,
};
use aptos_infallible::Mutex;
use aptos_types::{
nibble::{nibble_path::NibblePath, ROOT_NIBBLE_HEIGHT},
proof::SparseMerkleProof,
};
use aptos_types::{nibble::nibble_path::NibblePath, proof::SparseMerkleProof};
use std::{
borrow::Borrow,
collections::{BTreeMap, BTreeSet, HashMap},
collections::{BTreeMap, HashMap},
sync::{Arc, Weak},
};

type NodePosition = bitvec::vec::BitVec<bitvec::order::Msb0, u8>;

/// To help finding the oldest ancestor of any SMT, a branch tracker is created each time
/// the chain of SMTs forked (two or more SMTs updating the same parent).
#[derive(Debug)]
Expand Down Expand Up @@ -318,7 +316,7 @@ where
let base_generation = base_smt.inner.generation;

FrozenSparseMerkleTree {
_base_smt: base_smt,
base_smt,
base_generation,
smt: self,
}
Expand All @@ -339,6 +337,14 @@ where
pub fn root_hash(&self) -> HashValue {
self.inner.root.hash()
}

fn generation(&self) -> u64 {
self.inner.generation
}

fn is_the_same(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.inner, &other.inner)
}
}

/// In tests and benchmark, reference to ancestors are manually managed
Expand Down Expand Up @@ -409,7 +415,7 @@ pub enum StateStoreStatus<V> {
/// SMT is held inside.
#[derive(Clone, Debug)]
pub struct FrozenSparseMerkleTree<V> {
_base_smt: SparseMerkleTree<V>,
base_smt: SparseMerkleTree<V>,
base_generation: u64,
smt: SparseMerkleTree<V>,
}
Expand All @@ -420,7 +426,7 @@ where
{
fn spawn(&self, child_root: SubTree<V>) -> Self {
Self {
_base_smt: self._base_smt.clone(),
base_smt: self.base_smt.clone(),
base_generation: self.base_generation,
smt: SparseMerkleTree {
inner: self.smt.inner.spawn(child_root),
Expand All @@ -447,116 +453,97 @@ where
update_batch: Vec<Vec<(HashValue, &V)>>,
proof_reader: &impl ProofRead<V>,
) -> Result<(Vec<(HashValue, HashMap<NibblePath, HashValue>)>, Self), UpdateError> {
let mut current_state_tree = self.clone();
let mut cur = self.clone();
let mut result = Vec::with_capacity(update_batch.len());
for updates in update_batch {
// sort and dedup the accounts
let keys = updates
.iter()
.map(|(key, _)| *key)
.collect::<BTreeSet<_>>()
.into_iter()
.collect::<Vec<_>>();
current_state_tree = current_state_tree.batch_update(updates, proof_reader)?;
result.push((
current_state_tree.smt.root_hash(),
current_state_tree.generate_node_hashes(keys),
));
let new = cur.batch_update(updates, proof_reader)?;
result.push((new.smt.root_hash(), new.new_node_hashes_since(&cur)));
cur = new;
}
Ok((result, current_state_tree))
Ok((result, cur))
}

/// This is a helper function that compares an updated in-memory sparse merkle with the
/// current on-disk jellyfish sparse merkle to get the hashes of newly generated nodes.
pub fn generate_node_hashes(
&self,
// must be sorted
touched_accounts: Vec<HashValue>,
) -> HashMap<NibblePath, HashValue> {
/// Compares an old and a new SMTs and return the newly created node hashes in between.
pub fn new_node_hashes_since(&self, since_smt: &Self) -> HashMap<NibblePath, HashValue> {
let _timer = TIMER
.with_label_values(&["generate_node_hashes"])
.with_label_values(&["new_node_hashes_since"])
.start_timer();

assert!(self.base_smt.is_the_same(&since_smt.base_smt));
let mut node_hashes = HashMap::new();
let mut nibble_path = NibblePath::new_even(vec![]);
self.collect_new_hashes(
touched_accounts.as_slice(),
Self::new_node_hashes_since_impl(
self.smt.root_weak(),
0, /* depth in nibble */
0, /* level within a nibble*/
&mut nibble_path,
since_smt.smt.generation() + 1,
&mut NodePosition::with_capacity(256),
&mut node_hashes,
);
node_hashes
}

/// Recursively generate the partial node update batch of jellyfish merkle
fn collect_new_hashes(
&self,
keys: &[HashValue],
fn new_node_hashes_since_impl(
subtree: SubTree<V>,
depth_in_nibble: usize,
level_within_nibble: usize,
cur_nibble_path: &mut NibblePath,
since_generation: u64,
pos: &mut NodePosition,
node_hashes: &mut HashMap<NibblePath, HashValue>,
) {
assert!(depth_in_nibble <= ROOT_NIBBLE_HEIGHT);
if keys.is_empty() {
return;
}

if level_within_nibble == 0 {
if depth_in_nibble != 0 {
cur_nibble_path
.push(NibblePath::new_even(keys[0].to_vec()).get_nibble(depth_in_nibble - 1));
if let Some(node) = subtree.get_node_if_in_mem(since_generation) {
let is_nibble = if let Some(path) = Self::maybe_to_nibble_path(pos) {
node_hashes.insert(path, subtree.hash());
true
} else {
false
};
match node.inner().borrow() {
NodeInner::Internal(internal_node) => {
let depth = pos.len();
pos.push(false);
Self::new_node_hashes_since_impl(
internal_node.left.weak(),
since_generation,
pos,
node_hashes,
);
*pos.get_mut(depth).unwrap() = true;
Self::new_node_hashes_since_impl(
internal_node.right.weak(),
since_generation,
pos,
node_hashes,
);
pos.pop();
}
NodeInner::Leaf(leaf_node) => {
let mut path = NibblePath::new_even(leaf_node.key.to_vec());
if !is_nibble {
path.truncate(pos.len() as usize / 4 + 1);
}
node_hashes.insert(path, subtree.hash());
}
}
node_hashes.insert(cur_nibble_path.clone(), subtree.hash());
}
match subtree
.get_node_if_in_mem(self.base_generation)
.expect("must exist")
.inner()
.borrow()
{
NodeInner::Internal(internal_node) => {
let (next_nibble_depth, next_level_within_nibble) = if level_within_nibble == 3 {
(depth_in_nibble + 1, 0)
} else {
(depth_in_nibble, level_within_nibble + 1)
};
let pivot = partition(
&keys.iter().map(|k| (*k, ())).collect::<Vec<_>>()[..],
depth_in_nibble * 4 + level_within_nibble,
);
self.collect_new_hashes(
&keys[..pivot],
internal_node.left.weak(),
next_nibble_depth,
next_level_within_nibble,
cur_nibble_path,
node_hashes,
);
self.collect_new_hashes(
&keys[pivot..],
internal_node.right.weak(),
next_nibble_depth,
next_level_within_nibble,
cur_nibble_path,
node_hashes,
);
}
NodeInner::Leaf(leaf_node) => {
assert_eq!(keys.len(), 1);
assert_eq!(keys[0], leaf_node.key);
if level_within_nibble != 0 {
let mut leaf_nibble_path = cur_nibble_path.clone();
leaf_nibble_path
.push(NibblePath::new_even(keys[0].to_vec()).get_nibble(depth_in_nibble));
node_hashes.insert(leaf_nibble_path, subtree.hash());
}

fn maybe_to_nibble_path(pos: &NodePosition) -> Option<NibblePath> {
assert!(pos.len() <= HashValue::LENGTH_IN_BITS);

const BITS_IN_NIBBLE: usize = 4;
const BITS_IN_BYTE: usize = 8;

if pos.len() % BITS_IN_NIBBLE == 0 {
let mut bytes = pos.clone().into_vec();
if pos.len() % BITS_IN_BYTE == 0 {
Some(NibblePath::new_even(bytes))
} else {
// Unused bits in `BitVec` is uninitialized, setting to 0 to make sure.
if let Some(b) = bytes.last_mut() {
*b &= 0xf0
}

Some(NibblePath::new_odd(bytes))
}
}
if level_within_nibble == 0 && depth_in_nibble != 0 {
cur_nibble_path.pop();
} else {
None
}
}

Expand Down
9 changes: 9 additions & 0 deletions types/src/nibble/nibble_path/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ impl NibblePath {
pub fn bytes(&self) -> &[u8] {
&self.bytes
}

pub fn truncate(&mut self, len: usize) {
assert!(len <= self.num_nibbles);
self.num_nibbles = len;
self.bytes.truncate((len + 1) / 2);
if len % 2 != 0 {
*self.bytes.last_mut().expect("must exist.") &= 0xf0;
}
}
}

pub trait Peekable: Iterator {
Expand Down

0 comments on commit 4310729

Please sign in to comment.