patches shred::merkle::make_shreds_from_data when data is empty (solana-labs#29295)

behzadnouri · web-flow · commit 92ca725c6b64 · 2022-12-16T18:56:21.000Z
If data is empty, make_shreds_from_data will now return one data shred
with empty data. This preserves invariants verified in tests regardless
of data size.
diff --git a/ledger/src/shred.rs b/ledger/src/shred.rs
@@ -88,7 +88,8 @@ mod stats;
 mod traits;
 
 pub type Nonce = u32;
-pub const SIZE_OF_NONCE: usize = 4;
+const_assert_eq!(SIZE_OF_NONCE, 4);
+pub const SIZE_OF_NONCE: usize = std::mem::size_of::<Nonce>();
 
 /// The following constants are computed by hand, and hardcoded.
 /// `test_shred_constants` ensures that the values are correct.
diff --git a/ledger/src/shred/merkle.rs b/ledger/src/shred/merkle.rs
@@ -16,7 +16,7 @@ use {
         shredder::{self, ReedSolomonCache},
     },
     assert_matches::debug_assert_matches,
-    itertools::Itertools,
+    itertools::{Either, Itertools},
     rayon::{prelude::*, ThreadPool},
     reed_solomon_erasure::Error::{InvalidIndex, TooFewParityShards, TooFewShards},
     solana_perf::packet::deserialize_from_with_limit,
@@ -875,21 +875,30 @@ pub(super) fn make_shreds_from_data(
         }
         data = rest;
     }
-    if !data.is_empty() {
+    // If shreds.is_empty() then the data argument was empty. In that case we
+    // want to generate one data shred with empty data.
+    if !data.is_empty() || shreds.is_empty() {
         // Find the Merkle proof_size and data_buffer_size
         // which can embed the remaining data.
         let (proof_size, data_buffer_size) = (1u8..32)
             .find_map(|proof_size| {
                 let data_buffer_size = ShredData::capacity(proof_size).ok()?;
                 let num_data_shreds = (data.len() + data_buffer_size - 1) / data_buffer_size;
+                let num_data_shreds = num_data_shreds.max(1);
                 let erasure_batch_size = shredder::get_erasure_batch_size(num_data_shreds);
                 (proof_size == get_proof_size(erasure_batch_size))
                     .then_some((proof_size, data_buffer_size))
             })
             .ok_or(Error::UnknownProofSize)?;
         common_header.shred_variant = ShredVariant::MerkleData(proof_size);
         common_header.fec_set_index = common_header.index;
-        for shred in data.chunks(data_buffer_size) {
+        let chunks = if data.is_empty() {
+            // Generate one data shred with empty data.
+            Either::Left(std::iter::once(data))
+        } else {
+            Either::Right(data.chunks(data_buffer_size))
+        };
+        for shred in chunks {
             let shred = new_shred_data(common_header, data_header, shred);
             shreds.push(shred);
             common_header.index += 1;
@@ -1274,12 +1283,8 @@ mod test {
         assert_eq!(shreds.iter().map(Shred::signature).dedup().count(), 1);
         for size in num_data_shreds..num_shreds {
             let mut shreds = shreds.clone();
-            let mut removed_shreds = Vec::new();
-            while shreds.len() > size {
-                let index = rng.gen_range(0, shreds.len());
-                removed_shreds.push(shreds.swap_remove(index));
-            }
             shreds.shuffle(rng);
+            let mut removed_shreds = shreds.split_off(size);
             // Should at least contain one coding shred.
             if shreds.iter().all(|shred| {
                 matches!(
@@ -1337,9 +1342,9 @@ mod test {
     #[test_case(46800)]
     fn test_make_shreds_from_data(data_size: usize) {
         let mut rng = rand::thread_rng();
-        let data_size = data_size.saturating_sub(16).max(1);
+        let data_size = data_size.saturating_sub(16);
         let reed_solomon_cache = ReedSolomonCache::default();
-        for data_size in (data_size..data_size + 32).step_by(3) {
+        for data_size in data_size..data_size + 32 {
             run_make_shreds_from_data(&mut rng, data_size, &reed_solomon_cache);
         }
     }
@@ -1392,7 +1397,7 @@ mod test {
                 Shred::ShredData(shred) => Some(shred),
             })
             .collect();
-        // Assert that the input data can be recovered from data sherds.
+        // Assert that the input data can be recovered from data shreds.
         assert_eq!(
             data,
             data_shreds
diff --git a/ledger/src/sigverify_shreds.rs b/ledger/src/sigverify_shreds.rs
@@ -295,7 +295,7 @@ pub fn sign_shreds_cpu(keypair: &Keypair, batches: &mut [PacketBatch]) {
                 .for_each(|p| sign_shred_cpu(keypair, p));
         });
     });
-    inc_new_counter_debug!("ed25519_shred_verify_cpu", packet_count);
+    inc_new_counter_debug!("ed25519_shred_sign_cpu", packet_count);
 }
 
 pub fn sign_shreds_gpu_pinned_keypair(keypair: &Keypair, cache: &RecyclerCache) -> PinnedVec<u8> {

Original file line number	Diff line number	Diff line change
`@@ -295,7 +295,7 @@ pub fn sign_shreds_cpu(keypair: &Keypair, batches: &mut [PacketBatch]) {`
`295`	`295`	`.for_each(\|p\| sign_shred_cpu(keypair, p));`
`296`	`296`	`});`
`297`	`297`	`});`
`298`		`- inc_new_counter_debug!("ed25519_shred_verify_cpu", packet_count);`
	`298`	`+ inc_new_counter_debug!("ed25519_shred_sign_cpu", packet_count);`
`299`	`299`	`}`
`300`	`300`
`301`	`301`	`pub fn sign_shreds_gpu_pinned_keypair(keypair: &Keypair, cache: &RecyclerCache) -> PinnedVec<u8> {`