Skip to content

Commit

Permalink
Creates temporary accounts hash cache files inside accounts hash cach…
Browse files Browse the repository at this point in the history
  • Loading branch information
brooksprumo authored May 23, 2023
1 parent a3fc622 commit a474cb2
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 32 deletions.
4 changes: 3 additions & 1 deletion runtime/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7585,7 +7585,8 @@ impl AccountsDb {
let slot = storages.max_slot_inclusive();
let use_bg_thread_pool = config.use_bg_thread_pool;
let scan_and_hash = || {
let cache_hash_data = Self::get_cache_hash_data(accounts_hash_cache_path, config, slot);
let cache_hash_data =
Self::get_cache_hash_data(accounts_hash_cache_path.clone(), config, slot);

let bounds = Range {
start: 0,
Expand All @@ -7599,6 +7600,7 @@ impl AccountsDb {
None
},
zero_lamport_accounts: flavor.zero_lamport_accounts(),
dir_for_temp_cache_files: accounts_hash_cache_path,
};

// get raw data by scanning
Expand Down
94 changes: 63 additions & 31 deletions runtime/src/accounts_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ use {
convert::TryInto,
fs::File,
io::{BufWriter, Write},
path::PathBuf,
sync::{
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc, Mutex,
},
},
tempfile::tempfile,
tempfile::tempfile_in,
};
pub const MERKLE_FANOUT: usize = 16;

Expand All @@ -51,10 +52,11 @@ impl MmapAccountHashesFile {
}

/// 1 file containing account hashes sorted by pubkey
#[derive(Default)]
pub struct AccountHashesFile {
/// # hashes and an open file that will be deleted on drop. None if there are zero hashes to represent, and thus, no file.
count_and_writer: Option<(usize, BufWriter<File>)>,
/// The directory where temporary cache files are put
dir_for_temp_cache_files: PathBuf,
}

impl AccountHashesFile {
Expand Down Expand Up @@ -84,7 +86,10 @@ impl AccountHashesFile {
pub fn write(&mut self, hash: &Hash) {
if self.count_and_writer.is_none() {
// we have hashes to write but no file yet, so create a file that will auto-delete on drop
self.count_and_writer = Some((0, BufWriter::new(tempfile().unwrap())));
self.count_and_writer = Some((
0,
BufWriter::new(tempfile_in(&self.dir_for_temp_cache_files).unwrap()),
));
}
let count_and_writer = self.count_and_writer.as_mut().unwrap();
assert_eq!(
Expand Down Expand Up @@ -455,15 +460,8 @@ impl CumulativeOffsets {
pub struct AccountsHasher {
pub filler_account_suffix: Option<Pubkey>,
pub zero_lamport_accounts: ZeroLamportAccounts,
}

impl Default for AccountsHasher {
fn default() -> Self {
Self {
filler_account_suffix: None,
zero_lamport_accounts: ZeroLamportAccounts::Excluded,
}
}
/// The directory where temporary cache files are put
pub dir_for_temp_cache_files: PathBuf,
}

impl AccountsHasher {
Expand Down Expand Up @@ -931,7 +929,10 @@ impl AccountsHasher {
// map from index of an item in first_items[] to index of the corresponding item in pubkey_division[]
// this will change as items in pubkey_division[] are exhausted
let mut first_item_to_pubkey_division = Vec::with_capacity(len);
let mut hashes = AccountHashesFile::default();
let mut hashes = AccountHashesFile {
count_and_writer: None,
dir_for_temp_cache_files: self.dir_for_temp_cache_files.clone(),
};
// initialize 'first_items', which holds the current lowest item in each slot group
pubkey_division.iter().enumerate().for_each(|(i, bins)| {
// check to make sure we can do bins[pubkey_bin]
Expand Down Expand Up @@ -1111,12 +1112,32 @@ pub struct AccountsDeltaHash(pub Hash);

#[cfg(test)]
pub mod tests {
use {super::*, std::str::FromStr};
use {super::*, std::str::FromStr, tempfile::tempdir};

impl AccountsHasher {
fn new(dir_for_temp_cache_files: PathBuf) -> Self {
Self {
filler_account_suffix: None,
zero_lamport_accounts: ZeroLamportAccounts::Excluded,
dir_for_temp_cache_files,
}
}
}

impl AccountHashesFile {
fn new(dir_for_temp_cache_files: PathBuf) -> Self {
Self {
count_and_writer: None,
dir_for_temp_cache_files,
}
}
}

#[test]
fn test_account_hashes_file() {
let dir_for_temp_cache_files = tempdir().unwrap();
// 0 hashes
let mut file = AccountHashesFile::default();
let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
assert!(file.get_reader().is_none());
let hashes = (0..2).map(|i| Hash::new(&[i; 32])).collect::<Vec<_>>();

Expand All @@ -1127,7 +1148,7 @@ pub mod tests {
assert!(reader.1.read(1).is_empty());

// multiple hashes
let mut file = AccountHashesFile::default();
let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
assert!(file.get_reader().is_none());
hashes.iter().for_each(|hash| file.write(hash));
let reader = file.get_reader().unwrap();
Expand All @@ -1137,21 +1158,22 @@ pub mod tests {

#[test]
fn test_cumulative_hashes_from_files() {
let dir_for_temp_cache_files = tempdir().unwrap();
(0..4).for_each(|permutation| {
let hashes = (0..2).map(|i| Hash::new(&[i + 1; 32])).collect::<Vec<_>>();

let mut combined = Vec::default();

// 0 hashes
let file0 = AccountHashesFile::default();
let file0 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());

// 1 hash
let mut file1 = AccountHashesFile::default();
let mut file1 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
file1.write(&hashes[0]);
combined.push(hashes[0]);

// multiple hashes
let mut file2 = AccountHashesFile::default();
let mut file2 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
hashes.iter().for_each(|hash| {
file2.write(hash);
combined.push(*hash);
Expand All @@ -1164,9 +1186,9 @@ pub mod tests {
vec![
file0,
file1,
AccountHashesFile::default(),
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
file2,
AccountHashesFile::default(),
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
]
} else if permutation == 2 {
vec![file1, file2]
Expand All @@ -1176,8 +1198,8 @@ pub mod tests {
combined.push(one);
vec![
file2,
AccountHashesFile::default(),
AccountHashesFile::default(),
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
file1,
]
};
Expand Down Expand Up @@ -1240,7 +1262,8 @@ pub mod tests {
let val = CalculateHashIntermediate::new(hash, 0, key);
account_maps.push(val);

let accounts_hash = AccountsHasher::default();
let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
let result = accounts_hash
.rest_of_hash_calculation(for_rest(&account_maps), &mut HashStats::default());
let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
Expand Down Expand Up @@ -1285,8 +1308,9 @@ pub mod tests {
}]]];
let temp_vec = vec.to_vec();
let slice = convert_to_slice2(&temp_vec);
let (mut hashes, lamports, _) =
AccountsHasher::default().de_dup_accounts_in_parallel(&slice, 0);
let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
let (mut hashes, lamports, _) = accounts_hasher.de_dup_accounts_in_parallel(&slice, 0);
assert_eq!(&[Hash::default()], hashes.get_reader().unwrap().1.read(0));
assert_eq!(lamports, 1);
}
Expand All @@ -1304,7 +1328,8 @@ pub mod tests {
#[test]
fn test_accountsdb_de_dup_accounts_empty() {
solana_logger::setup();
let accounts_hash = AccountsHasher::default();
let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());

let vec = vec![vec![], vec![]];
let (hashes, lamports) =
Expand Down Expand Up @@ -1399,7 +1424,8 @@ pub mod tests {
result
}).collect();

let hash = AccountsHasher::default();
let dir_for_temp_cache_files = tempdir().unwrap();
let hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
let mut expected_index = 0;
for last_slice in 0..2 {
for start in 0..COUNT {
Expand Down Expand Up @@ -1532,7 +1558,9 @@ pub mod tests {
fn test_de_dup_accounts_in_parallel<'a>(
account_maps: &'a [SortedDataByPubkey<'a>],
) -> (AccountHashesFile, u64, usize) {
AccountsHasher::default().de_dup_accounts_in_parallel(account_maps, 0)
let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
accounts_hasher.de_dup_accounts_in_parallel(account_maps, 0)
}

#[test]
Expand Down Expand Up @@ -1908,7 +1936,9 @@ pub mod tests {
),
CalculateHashIntermediate::new(Hash::new(&[2u8; 32]), offset + 1, Pubkey::new_unique()),
];
AccountsHasher::default().de_dup_accounts_in_parallel(&[convert_to_slice(&[input])], 0);
let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
accounts_hasher.de_dup_accounts_in_parallel(&[convert_to_slice(&[input])], 0);
}

fn convert_to_slice(
Expand Down Expand Up @@ -1944,7 +1974,9 @@ pub mod tests {
Pubkey::new_unique(),
)],
];
AccountsHasher::default().de_dup_accounts(
let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
accounts_hasher.de_dup_accounts(
&[convert_to_slice(&input)],
&mut HashStats::default(),
2, // accounts above are in 2 groups
Expand Down

0 comments on commit a474cb2

Please sign in to comment.