From 090166f821bbec9e17652529a9748c941ba8eb61 Mon Sep 17 00:00:00 2001
From: Anastasios Kichidis <akihidis@gmail.com>
Date: Fri, 31 Mar 2023 17:22:05 +0100
Subject: [PATCH] [chore] extend timeout time for narwhal nightly tests
 (#10212)

## Description

Currently [narwhal nightly tests
fail](https://github.com/MystenLabs/sui/actions/runs/4571153993) due to
the `bullshark_randomised_tests` timeout as they take long time. This PR
is configuring the narwhalnightly CI test timeout periods to give more
grace. Also, reduced the number of iterations in the randomised tests so
we can decrease the running time. Will tune further according to
results. Hopefully once we introduce some parallelisation randomised
will achieve to run more iterations in same time.

## Test Plan

How did you test the new or updated feature?

---
If your changes are not user-facing and not a breaking change, you can
skip the following section. Otherwise, please indicate what changed, and
then add to the Release Notes section as highlighted during the release
process.

### Type of Change (Check all that apply)

- [ ] user-visible impact
- [ ] breaking change for a client SDKs
- [ ] breaking change for FNs (FN binary must upgrade)
- [ ] breaking change for validators or node operators (must upgrade
binaries)
- [ ] breaking change for on-chain data layout
- [ ] necessitate either a data wipe or data migration

### Release notes
---
 .config/nextest.toml                           |  4 ++--
 .../consensus/src/tests/randomized_tests.rs    | 18 ++++++++----------
 2 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/.config/nextest.toml b/.config/nextest.toml
index 1c22e8ca00c62..e18085fe4df5e 100644
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -20,8 +20,8 @@ status-level = "skip"
 fail-fast = false
 # Retry failing tests in order to not block builds on flaky tests
 retries = 2
-# Mark tests as slow after 25mins, kill them after 50
-slow-timeout = { period = "1500s", terminate-after = 1 }
+# Mark tests as slow after 3 hours, kill them right after
+slow-timeout = { period = "3h", terminate-after = 1 }
 
 [profile.simtestnightly]
 # Print out output for failing tests as soon as they fail, and also at the end
diff --git a/narwhal/consensus/src/tests/randomized_tests.rs b/narwhal/consensus/src/tests/randomized_tests.rs
index b8540f1c5ea80..596c45c745c09 100644
--- a/narwhal/consensus/src/tests/randomized_tests.rs
+++ b/narwhal/consensus/src/tests/randomized_tests.rs
@@ -71,29 +71,27 @@ async fn bullshark_randomised_tests() {
     // on the below parameters to increase the different cases we can generate.
 
     // A range of gc_depth to be used
-    const GC_DEPTH: RangeInclusive<Round> = 4..=15;
+    const GC_DEPTH: RangeInclusive<Round> = 7..=8;
     // A range of the committee size to be used
     const COMMITTEE_SIZE: RangeInclusive<usize> = 4..=4;
     // A range of rounds for which we will create DAGs
-    const DAG_ROUNDS: RangeInclusive<Round> = 7..=20;
+    const DAG_ROUNDS: RangeInclusive<Round> = 7..=15;
     // The number of different execution plans to be created and tested against for every generated DAG
-    const EXECUTION_PLANS: u64 = 500;
+    const EXECUTION_PLANS: u64 = 400;
     // The number of DAGs that should be generated and tested against for every set of properties.
-    const DAGS_PER_SETUP: u64 = 100;
+    const DAGS_PER_SETUP: u64 = 50;
     // DAGs will be created for these failure modes
     let failure_modes: Vec<FailureModes> = vec![
-        // No failures
-        FailureModes {
-            nodes_failure_probability: 0.0,
-            slow_nodes_percentage: 0.0,
-            slow_nodes_failure_probability: 0.0,
-        },
         // Some failures
+        // TODO: re-enable once we do have parallel testing - now it worth testing the most severe
+        // edge cases
+        /*
         FailureModes {
             nodes_failure_probability: 0.05,     // 5%
             slow_nodes_percentage: 0.20,         // 20%
             slow_nodes_failure_probability: 0.3, // 30%
         },
+         */
         // Severe failures
         FailureModes {
             nodes_failure_probability: 0.0,      // 0%