From a352390bae7ed96106eebe194e2a307f7241446f Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Wed, 27 Nov 2024 14:52:59 +0100
Subject: [PATCH 01/57] `MockBackend`: Fix removed sub-machines (#2162)

Depends on #2153 (in merge queue), likely needs a rebase after it's
merged.

To test:
```
$ cargo run pil test_data/asm/block_to_block_empty_submachine.asm -o output -f --prove-with mock
```
---
 backend/src/mock/connection_constraint_checker.rs |  8 +++++++-
 backend/src/mock/machine.rs                       | 14 ++++++++++----
 backend/src/mock/mod.rs                           |  4 +++-
 3 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/backend/src/mock/connection_constraint_checker.rs b/backend/src/mock/connection_constraint_checker.rs
index e090481154..81b3813960 100644
--- a/backend/src/mock/connection_constraint_checker.rs
+++ b/backend/src/mock/connection_constraint_checker.rs
@@ -236,7 +236,13 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
         machine_name: &str,
         selected_expressions: &SelectedExpressions<F>,
     ) -> Vec<Tuple<F>> {
-        let machine = &self.machines[machine_name];
+        let machine = match self.machines.get(machine_name) {
+            Some(machine) => machine,
+            None => {
+                // The machine is empty, so there are no tuples.
+                return Vec::new();
+            }
+        };
 
         (0..machine.size)
             .into_par_iter()
diff --git a/backend/src/mock/machine.rs b/backend/src/mock/machine.rs
index 2c05c200a7..2bcf0a67f1 100644
--- a/backend/src/mock/machine.rs
+++ b/backend/src/mock/machine.rs
@@ -16,12 +16,13 @@ pub struct Machine<'a, F> {
 }
 
 impl<'a, F: FieldElement> Machine<'a, F> {
-    pub fn new(
+    /// Creates a new machine from a witness, fixed columns, and a PIL - if it is not empty.
+    pub fn try_new(
         machine_name: String,
         witness: &'a [(String, Vec<F>)],
         fixed: &'a [(String, VariablySizedColumn<F>)],
         pil: &'a Analyzed<F>,
-    ) -> Self {
+    ) -> Option<Self> {
         let witness = machine_witness_columns(witness, pil, &machine_name);
         let size = witness
             .iter()
@@ -30,6 +31,11 @@ impl<'a, F: FieldElement> Machine<'a, F> {
             .exactly_one()
             .unwrap();
 
+        if size == 0 {
+            // Empty machines are removed always valid.
+            return None;
+        }
+
         let fixed = machine_fixed_columns(fixed, pil);
         let fixed = fixed.get(&(size as DegreeType)).unwrap();
 
@@ -53,12 +59,12 @@ impl<'a, F: FieldElement> Machine<'a, F> {
             })
             .collect();
 
-        Self {
+        Some(Self {
             machine_name,
             size,
             columns,
             pil,
             intermediate_definitions,
-        }
+        })
     }
 }
diff --git a/backend/src/mock/mod.rs b/backend/src/mock/mod.rs
index 828e874ef2..c0c5e8478d 100644
--- a/backend/src/mock/mod.rs
+++ b/backend/src/mock/mod.rs
@@ -79,7 +79,9 @@ impl<F: FieldElement> Backend<F> for MockBackend<F> {
         let machines = self
             .machine_to_pil
             .iter()
-            .map(|(machine, pil)| Machine::new(machine.clone(), witness, &self.fixed, pil))
+            .filter_map(|(machine, pil)| {
+                Machine::try_new(machine.clone(), witness, &self.fixed, pil)
+            })
             .map(|machine| (machine.machine_name.clone(), machine))
             .collect::<BTreeMap<_, _>>();
 

From 3b499a05417e46d521eaf8ca8ca956d99cc049c9 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Wed, 27 Nov 2024 17:58:55 +0100
Subject: [PATCH 02/57] Add `stwo-composite` backend (#2164)

This allows us too have VADCOP proofs with Stwo.
---
 backend/src/lib.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/backend/src/lib.rs b/backend/src/lib.rs
index 66690a13cc..db1f898227 100644
--- a/backend/src/lib.rs
+++ b/backend/src/lib.rs
@@ -62,6 +62,9 @@ pub enum BackendType {
     #[cfg(feature = "stwo")]
     #[strum(serialize = "stwo")]
     Stwo,
+    #[cfg(feature = "stwo")]
+    #[strum(serialize = "stwo-composite")]
+    StwoComposite,
 }
 
 pub type BackendOptions = String;
@@ -112,6 +115,10 @@ impl BackendType {
             }
             #[cfg(feature = "stwo")]
             BackendType::Stwo => Box::new(stwo::Factory),
+            #[cfg(feature = "stwo")]
+            BackendType::StwoComposite => {
+                Box::new(composite::CompositeBackendFactory::new(stwo::Factory))
+            }
         }
     }
 }

From dd1842d600cb4fa0e2253cf0f54ae467111b97cc Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Thu, 28 Nov 2024 10:12:18 +0100
Subject: [PATCH 03/57] keccak with memory for goldilocks (#2108)

Co-authored-by: Steve Wang <qian.wang.wg24@wharton.upenn.edu>
---
 pipeline/tests/powdr_std.rs             |   8 +
 std/machines/hash/keccakf32_memory.asm  | 769 ++++++++++++++++++++++++
 std/machines/hash/mod.asm               |   3 +-
 test_data/std/keccakf32_memory_test.asm |  83 +++
 4 files changed, 862 insertions(+), 1 deletion(-)
 create mode 100644 std/machines/hash/keccakf32_memory.asm
 create mode 100644 test_data/std/keccakf32_memory_test.asm

diff --git a/pipeline/tests/powdr_std.rs b/pipeline/tests/powdr_std.rs
index db54738c3b..b8e7d6654c 100644
--- a/pipeline/tests/powdr_std.rs
+++ b/pipeline/tests/powdr_std.rs
@@ -70,6 +70,14 @@ fn keccakf16_memory_test() {
     test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
 }
 
+#[test]
+#[ignore = "Too slow"]
+fn keccakf32_memory_test() {
+    let f = "std/keccakf32_memory_test.asm";
+    test_mock_backend(make_simple_prepared_pipeline::<GoldilocksField>(f));
+    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+}
+
 #[test]
 #[ignore = "Too slow"]
 fn poseidon_bb_test() {
diff --git a/std/machines/hash/keccakf32_memory.asm b/std/machines/hash/keccakf32_memory.asm
new file mode 100644
index 0000000000..8b166b9777
--- /dev/null
+++ b/std/machines/hash/keccakf32_memory.asm
@@ -0,0 +1,769 @@
+use std::array;
+use std::utils;
+use std::utils::unchanged_until;
+use std::utils::force_bool;
+use std::convert::expr;
+use std::convert::int;
+use std::convert::fe;
+use std::prelude::set_hint;
+use std::prelude::Query;
+use std::prover::eval;
+use std::prover::provide_value;
+use std::machines::large_field::memory::Memory;
+
+machine Keccakf32Memory(mem: Memory) with
+    latch: final_step,
+    operation_id: operation_id,
+    call_selectors: sel,
+{
+    /*
+    ------------- Begin memory read / write ---------------
+    Additional columns compared to the non-memory version:
+    - 1 column for user input address (of first byte of input).
+    - 1 column for user output address (of first byte of output).
+    - 1 column for time step.
+    Overall, given that there are 2,600+ columns in the non-memory version, this isn't a huge cost
+    Methodology description:
+    1. The latch with the input and output addresses + time step is in the last row of each block.
+    2. User input address is copied to the first row.
+    3. Input addresses for all bytes are calculated from user input address in the first row.
+    4. Load all input bytes from memory to the preimage columns.
+    5. Keccak is computed from top to bottom.
+    6. Output addresses for all bytes are calculated from user output address in the last row.
+    7. Store all output bytes from keccak computation columns to memory.
+    Essentially, we conduct all memory reads in the first row and all memory writes in the last row.
+    Our current methodology performs all memory reads at once in the first row, then immediately does the keccak computation,
+    and finally performs all memory writes at once in the last row, and thus only requires one pass with auto witgen.
+    Though note that input address need to be first copied from the last row to the first row.
+    */
+
+    operation keccakf32_memory<0> input_addr, output_addr, time_step ->;
+
+    // Get an intermediate column that indicates that we're in an
+    // actual block, not a default block. Its value is constant
+    // within the block.
+    let used = array::sum(sel);
+    array::map(sel, |s| unchanged_until(s, final_step + is_last));
+    std::utils::force_bool(used);
+    let first_step_used: expr = used * first_step;
+    let final_step_used: expr = used * final_step;
+
+    // Repeat the time step and input address in the whole block.
+    col witness time_step;
+    unchanged_until(time_step, final_step + is_last);
+
+    // Input address for the first byte of input array from the user.
+    // Copied from user input in the last row to the first row.
+    col witness input_addr;
+    unchanged_until(input_addr, final_step + is_last);
+
+    // Output address for the first byte of output array from the user.
+    // Used in the last row directly from user input.
+    col witness output_addr;
+
+    // Load memory while converting to little endian format for keccak computation.
+    // Specifically, this keccakf32 machine accepts big endian inputs in memory.
+    // However the keccak computation constraints are written for little endian iputs.
+    // Therefore memory load converts big endian inputs to little endian for the preimage.
+    link if first_step_used ~> preimage[1] = mem.mload(input_addr, time_step);
+    link if first_step_used ~> preimage[0] = mem.mload(input_addr + 4, time_step);
+    link if first_step_used ~> preimage[3] = mem.mload(input_addr + 8, time_step);
+    link if first_step_used ~> preimage[2] = mem.mload(input_addr + 12, time_step);
+    link if first_step_used ~> preimage[5] = mem.mload(input_addr + 16, time_step);
+    link if first_step_used ~> preimage[4] = mem.mload(input_addr + 20, time_step);
+    link if first_step_used ~> preimage[7] = mem.mload(input_addr + 24, time_step);
+    link if first_step_used ~> preimage[6] = mem.mload(input_addr + 28, time_step);
+    link if first_step_used ~> preimage[9] = mem.mload(input_addr + 32, time_step);
+    link if first_step_used ~> preimage[8] = mem.mload(input_addr + 36, time_step);
+    link if first_step_used ~> preimage[11] = mem.mload(input_addr + 40, time_step);
+    link if first_step_used ~> preimage[10] = mem.mload(input_addr + 44, time_step);
+    link if first_step_used ~> preimage[13] = mem.mload(input_addr + 48, time_step);
+    link if first_step_used ~> preimage[12] = mem.mload(input_addr + 52, time_step);
+    link if first_step_used ~> preimage[15] = mem.mload(input_addr + 56, time_step);
+    link if first_step_used ~> preimage[14] = mem.mload(input_addr + 60, time_step);
+    link if first_step_used ~> preimage[17] = mem.mload(input_addr + 64, time_step);
+    link if first_step_used ~> preimage[16] = mem.mload(input_addr + 68, time_step);
+    link if first_step_used ~> preimage[19] = mem.mload(input_addr + 72, time_step);
+    link if first_step_used ~> preimage[18] = mem.mload(input_addr + 76, time_step);
+    link if first_step_used ~> preimage[21] = mem.mload(input_addr + 80, time_step);
+    link if first_step_used ~> preimage[20] = mem.mload(input_addr + 84, time_step);
+    link if first_step_used ~> preimage[23] = mem.mload(input_addr + 88, time_step);
+    link if first_step_used ~> preimage[22] = mem.mload(input_addr + 92, time_step);
+    link if first_step_used ~> preimage[25] = mem.mload(input_addr + 96, time_step);
+    link if first_step_used ~> preimage[24] = mem.mload(input_addr + 100, time_step);
+    link if first_step_used ~> preimage[27] = mem.mload(input_addr + 104, time_step);
+    link if first_step_used ~> preimage[26] = mem.mload(input_addr + 108, time_step);
+    link if first_step_used ~> preimage[29] = mem.mload(input_addr + 112, time_step);
+    link if first_step_used ~> preimage[28] = mem.mload(input_addr + 116, time_step);
+    link if first_step_used ~> preimage[31] = mem.mload(input_addr + 120, time_step);
+    link if first_step_used ~> preimage[30] = mem.mload(input_addr + 124, time_step);
+    link if first_step_used ~> preimage[33] = mem.mload(input_addr + 128, time_step);
+    link if first_step_used ~> preimage[32] = mem.mload(input_addr + 132, time_step);
+    link if first_step_used ~> preimage[35] = mem.mload(input_addr + 136, time_step);
+    link if first_step_used ~> preimage[34] = mem.mload(input_addr + 140, time_step);
+    link if first_step_used ~> preimage[37] = mem.mload(input_addr + 144, time_step);
+    link if first_step_used ~> preimage[36] = mem.mload(input_addr + 148, time_step);
+    link if first_step_used ~> preimage[39] = mem.mload(input_addr + 152, time_step);
+    link if first_step_used ~> preimage[38] = mem.mload(input_addr + 156, time_step);
+    link if first_step_used ~> preimage[41] = mem.mload(input_addr + 160, time_step);
+    link if first_step_used ~> preimage[40] = mem.mload(input_addr + 164, time_step);
+    link if first_step_used ~> preimage[43] = mem.mload(input_addr + 168, time_step);
+    link if first_step_used ~> preimage[42] = mem.mload(input_addr + 172, time_step);
+    link if first_step_used ~> preimage[45] = mem.mload(input_addr + 176, time_step);
+    link if first_step_used ~> preimage[44] = mem.mload(input_addr + 180, time_step);
+    link if first_step_used ~> preimage[47] = mem.mload(input_addr + 184, time_step);
+    link if first_step_used ~> preimage[46] = mem.mload(input_addr + 188, time_step);
+    link if first_step_used ~> preimage[49] = mem.mload(input_addr + 192, time_step);
+    link if first_step_used ~> preimage[48] = mem.mload(input_addr + 196, time_step);
+
+    // Expects input of 25 64-bit numbers decomposed to 25 chunks of 2 32-bit little endian limbs. 
+    // The output is a_prime_prime_prime_0_0_limbs for the first 2 and a_prime_prime for the rest.
+
+    // Write memory while converting output to big endian format.
+    // Specifically, output obtained from the keccak computation are little endian.
+    // However, this keccakf32_memory machine produces big endian outputs in memory.
+    // Therefore, memory write converts little endian from keccak computation to big endian for the output in memory.
+    link if final_step_used ~> mem.mstore(output_addr, time_step + 1, a_prime_prime_prime_0_0_limbs[1]);
+    link if final_step_used ~> mem.mstore(output_addr + 4, time_step + 1, a_prime_prime_prime_0_0_limbs[0]);
+    link if final_step_used ~> mem.mstore(output_addr + 8, time_step + 1, a_prime_prime[3]);
+    link if final_step_used ~> mem.mstore(output_addr + 12, time_step + 1, a_prime_prime[2]);
+    link if final_step_used ~> mem.mstore(output_addr + 16, time_step + 1, a_prime_prime[5]);
+    link if final_step_used ~> mem.mstore(output_addr + 20, time_step + 1, a_prime_prime[4]);
+    link if final_step_used ~> mem.mstore(output_addr + 24, time_step + 1, a_prime_prime[7]);
+    link if final_step_used ~> mem.mstore(output_addr + 28, time_step + 1, a_prime_prime[6]);
+    link if final_step_used ~> mem.mstore(output_addr + 32, time_step + 1, a_prime_prime[9]);
+    link if final_step_used ~> mem.mstore(output_addr + 36, time_step + 1, a_prime_prime[8]);
+    link if final_step_used ~> mem.mstore(output_addr + 40, time_step + 1, a_prime_prime[11]);
+    link if final_step_used ~> mem.mstore(output_addr + 44, time_step + 1, a_prime_prime[10]);
+    link if final_step_used ~> mem.mstore(output_addr + 48, time_step + 1, a_prime_prime[13]);
+    link if final_step_used ~> mem.mstore(output_addr + 52, time_step + 1, a_prime_prime[12]);
+    link if final_step_used ~> mem.mstore(output_addr + 56, time_step + 1, a_prime_prime[15]);
+    link if final_step_used ~> mem.mstore(output_addr + 60, time_step + 1, a_prime_prime[14]);
+    link if final_step_used ~> mem.mstore(output_addr + 64, time_step + 1, a_prime_prime[17]);
+    link if final_step_used ~> mem.mstore(output_addr + 68, time_step + 1, a_prime_prime[16]);
+    link if final_step_used ~> mem.mstore(output_addr + 72, time_step + 1, a_prime_prime[19]);
+    link if final_step_used ~> mem.mstore(output_addr + 76, time_step + 1, a_prime_prime[18]);
+    link if final_step_used ~> mem.mstore(output_addr + 80, time_step + 1, a_prime_prime[21]);
+    link if final_step_used ~> mem.mstore(output_addr + 84, time_step + 1, a_prime_prime[20]);
+    link if final_step_used ~> mem.mstore(output_addr + 88, time_step + 1, a_prime_prime[23]);
+    link if final_step_used ~> mem.mstore(output_addr + 92, time_step + 1, a_prime_prime[22]);
+    link if final_step_used ~> mem.mstore(output_addr + 96, time_step + 1, a_prime_prime[25]);
+    link if final_step_used ~> mem.mstore(output_addr + 100, time_step + 1, a_prime_prime[24]);
+    link if final_step_used ~> mem.mstore(output_addr + 104, time_step + 1, a_prime_prime[27]);
+    link if final_step_used ~> mem.mstore(output_addr + 108, time_step + 1, a_prime_prime[26]);
+    link if final_step_used ~> mem.mstore(output_addr + 112, time_step + 1, a_prime_prime[29]);
+    link if final_step_used ~> mem.mstore(output_addr + 116, time_step + 1, a_prime_prime[28]);
+    link if final_step_used ~> mem.mstore(output_addr + 120, time_step + 1, a_prime_prime[31]);
+    link if final_step_used ~> mem.mstore(output_addr + 124, time_step + 1, a_prime_prime[30]);
+    link if final_step_used ~> mem.mstore(output_addr + 128, time_step + 1, a_prime_prime[33]);
+    link if final_step_used ~> mem.mstore(output_addr + 132, time_step + 1, a_prime_prime[32]);
+    link if final_step_used ~> mem.mstore(output_addr + 136, time_step + 1, a_prime_prime[35]);
+    link if final_step_used ~> mem.mstore(output_addr + 140, time_step + 1, a_prime_prime[34]);
+    link if final_step_used ~> mem.mstore(output_addr + 144, time_step + 1, a_prime_prime[37]);
+    link if final_step_used ~> mem.mstore(output_addr + 148, time_step + 1, a_prime_prime[36]);
+    link if final_step_used ~> mem.mstore(output_addr + 152, time_step + 1, a_prime_prime[39]);
+    link if final_step_used ~> mem.mstore(output_addr + 156, time_step + 1, a_prime_prime[38]);
+    link if final_step_used ~> mem.mstore(output_addr + 160, time_step + 1, a_prime_prime[41]);
+    link if final_step_used ~> mem.mstore(output_addr + 164, time_step + 1, a_prime_prime[40]);
+    link if final_step_used ~> mem.mstore(output_addr + 168, time_step + 1, a_prime_prime[43]);
+    link if final_step_used ~> mem.mstore(output_addr + 172, time_step + 1, a_prime_prime[42]);
+    link if final_step_used ~> mem.mstore(output_addr + 176, time_step + 1, a_prime_prime[45]);
+    link if final_step_used ~> mem.mstore(output_addr + 180, time_step + 1, a_prime_prime[44]);
+    link if final_step_used ~> mem.mstore(output_addr + 184, time_step + 1, a_prime_prime[47]);
+    link if final_step_used ~> mem.mstore(output_addr + 188, time_step + 1, a_prime_prime[46]);
+    link if final_step_used ~> mem.mstore(output_addr + 192, time_step + 1, a_prime_prime[49]);
+    link if final_step_used ~> mem.mstore(output_addr + 196, time_step + 1, a_prime_prime[48]);
+    // ------------- End memory read / write ---------------
+
+    // Adapted from Plonky3 implementation of Keccak: https://github.com/Plonky3/Plonky3/tree/main/keccak-air/src
+
+    std::check::require_field_bits(32, || "The field modulus should be at least 2^32 - 1 to work in the keccakf32 machine.");
+
+    col witness operation_id;
+
+    let NUM_ROUNDS: int = 24;
+
+    // pub struct KeccakCols<T> {
+    //     /// The `i`th value is set to 1 if we are in the `i`th round, otherwise 0.
+    //     pub step_flags: [T; NUM_ROUNDS],
+
+    //     /// A register which indicates if a row should be exported, i.e. included in a multiset equality
+    //     /// argument. Should be 1 only for certain rows which are final steps, i.e. with
+    //     /// `step_flags[23] = 1`.
+    //     pub export: T,
+
+    //     /// Permutation inputs, stored in y-major order.
+    //     pub preimage: [[[T; U64_LIMBS]; 5]; 5],
+
+    //     pub a: [[[T; U64_LIMBS]; 5]; 5],
+
+    //     /// ```ignore
+    //     /// C[x] = xor(A[x, 0], A[x, 1], A[x, 2], A[x, 3], A[x, 4])
+    //     /// ```
+    //     pub c: [[T; 64]; 5],
+
+    //     /// ```ignore
+    //     /// C'[x, z] = xor(C[x, z], C[x - 1, z], C[x + 1, z - 1])
+    //     /// ```
+    //     pub c_prime: [[T; 64]; 5],
+
+    //     // Note: D is inlined, not stored in the witness.
+    //     /// ```ignore
+    //     /// A'[x, y] = xor(A[x, y], D[x])
+    //     ///          = xor(A[x, y], C[x - 1], ROT(C[x + 1], 1))
+    //     /// ```
+    //     pub a_prime: [[[T; 64]; 5]; 5],
+
+    //     /// ```ignore
+    //     /// A''[x, y] = xor(B[x, y], andn(B[x + 1, y], B[x + 2, y])).
+    //     /// ```
+    //     pub a_prime_prime: [[[T; U64_LIMBS]; 5]; 5],
+
+    //     /// The bits of `A''[0, 0]`.
+    //     pub a_prime_prime_0_0_bits: [T; 64],
+
+    //     /// ```ignore
+    //     /// A'''[0, 0, z] = A''[0, 0, z] ^ RC[k, z]
+    //     /// ```
+    //     pub a_prime_prime_prime_0_0_limbs: [T; U64_LIMBS],
+    // }
+
+    col witness preimage[5 * 5 * 2];
+    col witness a[5 * 5 * 2];
+    col witness c[5 * 64];
+    array::map(c, |i| force_bool(i));
+    col witness c_prime[5 * 64];
+    col witness a_prime[5 * 5 * 64];
+    array::map(a_prime, |i| force_bool(i));
+    col witness a_prime_prime[5 * 5 * 2];
+    col witness a_prime_prime_0_0_bits[64];
+    array::map(a_prime_prime_0_0_bits, |i| force_bool(i));
+    col witness a_prime_prime_prime_0_0_limbs[2];
+
+    // Initially, the first step flag should be 1 while the others should be 0.
+    // builder.when_first_row().assert_one(local.step_flags[0]);
+    // for i in 1..NUM_ROUNDS {
+    //     builder.when_first_row().assert_zero(local.step_flags[i]);
+    // }
+    // for i in 0..NUM_ROUNDS {
+    //     let current_round_flag = local.step_flags[i];
+    //     let next_round_flag = next.step_flags[(i + 1) % NUM_ROUNDS];
+    //     builder
+    //         .when_transition()
+    //         .assert_eq(next_round_flag, current_round_flag);
+    // }
+
+    let step_flags: col[NUM_ROUNDS] = array::new(NUM_ROUNDS, |i| |row| if row % NUM_ROUNDS == i { 1 } else { 0 } );
+
+    // let main = builder.main();
+    // let (local, next) = (main.row_slice(0), main.row_slice(1));
+    // let local: &KeccakCols<AB::Var> = (*local).borrow();
+    // let next: &KeccakCols<AB::Var> = (*next).borrow();
+
+    // let first_step = local.step_flags[0];
+    // let final_step = local.step_flags[NUM_ROUNDS - 1];
+    // let not_final_step = AB::Expr::one() - final_step;
+
+    let first_step: expr = step_flags[0]; // Aliasing instead of defining a new fixed column.
+    let final_step: expr = step_flags[NUM_ROUNDS - 1];
+    col fixed is_last = [0]* + [1];
+
+    // // If this is the first step, the input A must match the preimage.
+    // for y in 0..5 {
+    //     for x in 0..5 {
+    //         for limb in 0..U64_LIMBS {
+    //             builder
+    //                 .when(first_step)
+    //                 .assert_eq(local.preimage[y][x][limb], local.a[y][x][limb]);
+    //         }
+    //     }
+    // }
+
+    array::zip(preimage, a, |p_i, a_i| first_step * (p_i - a_i) = 0);
+
+    // // The export flag must be 0 or 1.
+    // builder.assert_bool(local.export);
+
+    // force_bool(export);
+
+    // // If this is not the final step, the export flag must be off.
+    // builder
+    //     .when(not_final_step.clone())
+    //     .assert_zero(local.export);
+
+    // not_final_step * export = 0;
+
+    // // If this is not the final step, the local and next preimages must match.
+    // for y in 0..5 {
+    //     for x in 0..5 {
+    //         for limb in 0..U64_LIMBS {
+    //             builder
+    //                 .when(not_final_step.clone())
+    //                 .when_transition()
+    //                 .assert_eq(local.preimage[y][x][limb], next.preimage[y][x][limb]);
+    //         }
+    //     }
+    // }
+
+    array::map(preimage, |p| unchanged_until(p, final_step + is_last));
+
+    // for x in 0..5 {
+    //     for z in 0..64 {
+    //         builder.assert_bool(local.c[x][z]);
+    //         let xor = xor3_gen::<AB::Expr>(
+    //             local.c[x][z].into(),
+    //             local.c[(x + 4) % 5][z].into(),
+    //             local.c[(x + 1) % 5][(z + 63) % 64].into(),
+    //         );
+    //         let c_prime = local.c_prime[x][z];
+    //         builder.assert_eq(c_prime, xor);
+    //     }
+    // }
+    
+    let andn: expr, expr -> expr = |a, b| (1 - a) * b;
+    let xor: expr, expr -> expr = |a, b| a + b - 2*a*b;
+    let xor3: expr, expr, expr -> expr = |a, b, c| xor(xor(a, b), c);
+    // a b c xor3
+    // 0 0 0  0
+    // 0 0 1  1
+    // 0 1 0  1
+    // 0 1 1  0
+    // 1 0 0  1
+    // 1 0 1  0
+    // 1 1 0  0
+    // 1 1 1  1
+
+    array::new(320, |i| {
+        let x = i / 64;
+        let z = i % 64;
+        c_prime[i] = xor3(
+            c[i], 
+            c[((x + 4) % 5) * 64 + z], 
+            c[((x + 1) % 5) * 64 + ((z + 63) % 64)]
+        )
+    });
+
+    // // Check that the input limbs are consistent with A' and D.
+    // // A[x, y, z] = xor(A'[x, y, z], D[x, y, z])
+    // //            = xor(A'[x, y, z], C[x - 1, z], C[x + 1, z - 1])
+    // //            = xor(A'[x, y, z], C[x, z], C'[x, z]).
+    // // The last step is valid based on the identity we checked above.
+    // // It isn't required, but makes this check a bit cleaner.
+    // for y in 0..5 {
+    //     for x in 0..5 {
+    //         let get_bit = |z| {
+    //             let a_prime: AB::Var = local.a_prime[y][x][z];
+    //             let c: AB::Var = local.c[x][z];
+    //             let c_prime: AB::Var = local.c_prime[x][z];
+    //             xor3_gen::<AB::Expr>(a_prime.into(), c.into(), c_prime.into())
+    //         };
+
+    //         for limb in 0..U64_LIMBS {
+    //             let a_limb = local.a[y][x][limb];
+    //             let computed_limb = (limb * BITS_PER_LIMB..(limb + 1) * BITS_PER_LIMB) // bigger address correspond to more significant bit
+    //                 .rev()
+    //                 .fold(AB::Expr::zero(), |acc, z| {
+    //                     builder.assert_bool(local.a_prime[y][x][z]);
+    //                     acc.double() + get_bit(z)
+    //                 });
+    //             builder.assert_eq(computed_limb, a_limb);
+    //         }
+    //     }
+    // }
+
+    let bits_to_value_be: expr[] -> expr = |bits_be| array::fold(bits_be, 0, |acc, e| (acc * 2 + e));
+
+    array::new(50, |i| {
+        let y = i / 10;
+        let x = (i / 2) % 5;
+        let limb = i % 2;
+        let get_bit: int -> expr = |z| xor3(a_prime[y * 320 + x * 64 + z], c[x * 64 + z], c_prime[x * 64 + z]);
+
+        let limb_bits_be: expr[] = array::reverse(array::new(32, |z| get_bit(limb * 32 + z)));
+        a[i] = bits_to_value_be(limb_bits_be)
+    });
+
+    // // xor_{i=0}^4 A'[x, i, z] = C'[x, z], so for each x, z,
+    // // diff * (diff - 2) * (diff - 4) = 0, where
+    // // diff = sum_{i=0}^4 A'[x, i, z] - C'[x, z]
+    // for x in 0..5 {
+    //     for z in 0..64 {
+    //         let sum: AB::Expr = (0..5).map(|y| local.a_prime[y][x][z].into()).sum();
+    //         let diff = sum - local.c_prime[x][z];
+    //         let four = AB::Expr::from_canonical_u8(4);
+    //         builder
+    //             .assert_zero(diff.clone() * (diff.clone() - AB::Expr::two()) * (diff - four));
+    //     }
+    // }
+
+    array::new(320, |i| {
+        let x = i / 64;
+        let z = i % 64;
+        let sum = utils::sum(5, |y| a_prime[y * 320 + i]);
+        let diff = sum - c_prime[i];
+        diff * (diff - 2) * (diff - 4) = 0
+    });
+
+    // // A''[x, y] = xor(B[x, y], andn(B[x + 1, y], B[x + 2, y])).
+    // for y in 0..5 {
+    //     for x in 0..5 {
+    //         let get_bit = |z| {
+    //             let andn = andn_gen::<AB::Expr>(
+    //                 local.b((x + 1) % 5, y, z).into(),
+    //                 local.b((x + 2) % 5, y, z).into(),
+    //             );
+    //             xor_gen::<AB::Expr>(local.b(x, y, z).into(), andn)
+    //         };
+
+    //         for limb in 0..U64_LIMBS {
+    //             let computed_limb = (limb * BITS_PER_LIMB..(limb + 1) * BITS_PER_LIMB)
+    //                 .rev()
+    //                 .fold(AB::Expr::zero(), |acc, z| acc.double() + get_bit(z));
+    //             builder.assert_eq(computed_limb, local.a_prime_prime[y][x][limb]);
+    //         }
+    //     }
+    // }
+
+    array::new(50, |i| {
+        let y = i / 10;
+        let x = (i / 2) % 5;
+        let limb = i % 2;
+
+        let get_bit: int -> expr = |z| {
+            xor(b(x, y, z), andn(b((x + 1) % 5, y, z), b((x + 2) % 5, y, z)))
+        };
+        let limb_bits_be: expr[] = array::reverse(array::new(32, |z| get_bit(limb * 32 + z)));
+        a_prime_prime[i] = bits_to_value_be(limb_bits_be)
+    });
+
+    // pub fn b(&self, x: usize, y: usize, z: usize) -> T {
+    //     debug_assert!(x < 5);
+    //     debug_assert!(y < 5);
+    //     debug_assert!(z < 64);
+
+    //     // B is just a rotation of A', so these are aliases for A' registers.
+    //     // From the spec,
+    //     //     B[y, (2x + 3y) % 5] = ROT(A'[x, y], r[x, y])
+    //     // So,
+    //     //     B[x, y] = f((x + 3y) % 5, x)
+    //     // where f(a, b) = ROT(A'[a, b], r[a, b])
+    //     let a = (x + 3 * y) % 5;
+    //     let b = x;
+    //     let rot = R[a][b] as usize;
+    //     self.a_prime[b][a][(z + 64 - rot) % 64]
+    // }
+
+    let b: int, int, int -> expr = |x, y, z| {
+        let a: int = (x + 3 * y) % 5;
+        let rot: int = R[a * 5 + x]; // b = x
+        a_prime[x * 320 + a * 64 + (z + 64 - rot) % 64]
+    };
+
+    // // A'''[0, 0] = A''[0, 0] XOR RC
+    // for limb in 0..U64_LIMBS {
+    //     let computed_a_prime_prime_0_0_limb = (limb * BITS_PER_LIMB
+    //         ..(limb + 1) * BITS_PER_LIMB)
+    //         .rev()
+    //         .fold(AB::Expr::zero(), |acc, z| {
+    //             builder.assert_bool(local.a_prime_prime_0_0_bits[z]);
+    //             acc.double() + local.a_prime_prime_0_0_bits[z]
+    //         });
+    //     let a_prime_prime_0_0_limb = local.a_prime_prime[0][0][limb];
+    //     builder.assert_eq(computed_a_prime_prime_0_0_limb, a_prime_prime_0_0_limb);
+    // }
+
+    array::new(2, |limb| {
+        let limb_bits_be: expr[] = array::reverse(array::new(32, |z| a_prime_prime_0_0_bits[limb * 32 + z]));
+        a_prime_prime[limb] = bits_to_value_be(limb_bits_be)
+    });
+
+    // let get_xored_bit = |i| {
+    //     let mut rc_bit_i = AB::Expr::zero();
+    //     for r in 0..NUM_ROUNDS {
+    //         let this_round = local.step_flags[r];
+    //         let this_round_constant = AB::Expr::from_canonical_u8(rc_value_bit(r, i));
+    //         rc_bit_i += this_round * this_round_constant;
+    //     }
+
+    //     xor_gen::<AB::Expr>(local.a_prime_prime_0_0_bits[i].into(), rc_bit_i)
+    // };
+
+    let get_xored_bit: int -> expr = |i| xor(a_prime_prime_0_0_bits[i], utils::sum(NUM_ROUNDS, |r| expr(RC_BITS[r * 64 + i]) * step_flags[r] ));
+
+    // for limb in 0..U64_LIMBS {
+    //     let a_prime_prime_prime_0_0_limb = local.a_prime_prime_prime_0_0_limbs[limb];
+    //     let computed_a_prime_prime_prime_0_0_limb = (limb * BITS_PER_LIMB
+    //         ..(limb + 1) * BITS_PER_LIMB)
+    //         .rev()
+    //         .fold(AB::Expr::zero(), |acc, z| acc.double() + get_xored_bit(z));
+    //     builder.assert_eq(
+    //         computed_a_prime_prime_prime_0_0_limb,
+    //         a_prime_prime_prime_0_0_limb,
+    //     );
+    // }
+
+    array::new(2, |limb| {
+        let limb_bits_be: expr[] = array::reverse(array::new(32, |z| get_xored_bit(limb * 32 + z)));
+        a_prime_prime_prime_0_0_limbs[limb] = bits_to_value_be(limb_bits_be)
+    });
+
+    // // Enforce that this round's output equals the next round's input.
+    // for x in 0..5 {
+    //     for y in 0..5 {
+    //         for limb in 0..U64_LIMBS {
+    //             let output = local.a_prime_prime_prime(y, x, limb);
+    //             let input = next.a[y][x][limb];
+    //             builder
+    //                 .when_transition()
+    //                 .when(not_final_step.clone())
+    //                 .assert_eq(output, input);
+    //         }
+    //     }
+    // }
+
+    // final_step and is_last should never be 1 at the same time, because final_step is 1 at multiples of 24 and can never be 1 at power of 2.
+    // (1 - final_step - is_last) is used to deactivate constraints that reference the next row, whenever we are at the latch row or the last row of the trace (so that we don't incorrectly cycle to the first row).
+    array::new(50, |i| {
+        let y = i / 10;
+        let x = (i / 2) % 5;
+        let limb = i % 2;
+        (1 - final_step - is_last) * (a_prime_prime_prime(y, x, limb) - a[i]') = 0
+    });
+
+    // pub fn a_prime_prime_prime(&self, y: usize, x: usize, limb: usize) -> T {
+    //     debug_assert!(y < 5);
+    //     debug_assert!(x < 5);
+    //     debug_assert!(limb < U64_LIMBS);
+
+    //     if y == 0 && x == 0 {
+    //         self.a_prime_prime_prime_0_0_limbs[limb]
+    //     } else {
+    //         self.a_prime_prime[y][x][limb]
+    //     }
+    // }
+
+    let a_prime_prime_prime: int, int, int -> expr = |y, x, limb| if y == 0 && x == 0 { a_prime_prime_prime_0_0_limbs[limb] } else { a_prime_prime[y * 10 + x * 2 + limb] };
+
+    let R: int[] = [
+        0, 36, 3, 41, 18, 
+        1, 44, 10, 45, 2,
+        62, 6, 43, 15, 61,
+        28, 55, 25, 21, 56,
+        27, 20, 39, 8, 14
+    ];
+
+    let RC: int[] = [
+        0x0000000000000001,
+        0x0000000000008082,
+        0x800000000000808A,
+        0x8000000080008000,
+        0x000000000000808B,
+        0x0000000080000001,
+        0x8000000080008081,
+        0x8000000000008009,
+        0x000000000000008A,
+        0x0000000000000088,
+        0x0000000080008009,
+        0x000000008000000A,
+        0x000000008000808B,
+        0x800000000000008B,
+        0x8000000000008089,
+        0x8000000000008003,
+        0x8000000000008002,
+        0x8000000000000080,
+        0x000000000000800A,
+        0x800000008000000A,
+        0x8000000080008081,
+        0x8000000000008080,
+        0x0000000080000001,
+        0x8000000080008008
+    ];
+
+    let RC_BITS: int[] = array::new(24 * 64, |i| {
+        let rc_idx = i / 64;
+        let bit = i % 64;
+        RC[rc_idx] >> bit & 0x1
+    });
+
+    // Prover function section (for witness generation).
+
+    // // Populate C[x] = xor(A[x, 0], A[x, 1], A[x, 2], A[x, 3], A[x, 4]).
+    // for x in 0..5 {
+    //     for z in 0..64 {
+    //         let limb = z / BITS_PER_LIMB;
+    //         let bit_in_limb = z % BITS_PER_LIMB;
+    //         let a = (0..5).map(|y| {
+    //             let a_limb = row.a[y][x][limb].as_canonical_u64() as u16;
+    //             ((a_limb >> bit_in_limb) & 1) != 0
+    //         });
+    //         row.c[x][z] = F::from_bool(a.fold(false, |acc, x| acc ^ x));
+    //     }
+    // }
+
+    let query_c: int, int, int -> int = query |x, limb, bit_in_limb|
+        utils::fold(
+            5, 
+            |y| (int(eval(a[y * 10 + x * 2 + limb])) >> bit_in_limb) & 0x1, 
+            0, 
+            |acc, e| acc ^ e
+        );
+
+    query |row| {
+        let _ = array::map_enumerated(c, |i, c_i| {
+            let x = i / 64;
+            let z = i % 64;
+            let limb = z / 32;
+            let bit_in_limb = z % 32;
+
+            provide_value(c_i, row, fe(query_c(x, limb, bit_in_limb)));
+        });
+    };
+
+    // // Populate C'[x, z] = xor(C[x, z], C[x - 1, z], C[x + 1, z - 1]).
+    // for x in 0..5 {
+    //     for z in 0..64 {
+    //         row.c_prime[x][z] = xor([
+    //             row.c[x][z],
+    //             row.c[(x + 4) % 5][z],
+    //             row.c[(x + 1) % 5][(z + 63) % 64],
+    //         ]);
+    //     }
+    // }
+
+    let query_c_prime: int, int -> int = query |x, z| 
+        int(eval(c[x * 64 + z])) ^ 
+        int(eval(c[((x + 4) % 5) * 64 + z])) ^ 
+        int(eval(c[((x + 1) % 5) * 64 + (z + 63) % 64]));
+
+    query |row| {
+        let _ = array::map_enumerated(c_prime, |i, c_i| {
+            let x = i / 64;
+            let z = i % 64;
+
+            provide_value(c_i, row, fe(query_c_prime(x, z)));
+        });
+    };
+
+    // // Populate A'. To avoid shifting indices, we rewrite
+    // //     A'[x, y, z] = xor(A[x, y, z], C[x - 1, z], C[x + 1, z - 1])
+    // // as
+    // //     A'[x, y, z] = xor(A[x, y, z], C[x, z], C'[x, z]).
+    // for x in 0..5 {
+    //     for y in 0..5 {
+    //         for z in 0..64 {
+    //             let limb = z / BITS_PER_LIMB;
+    //             let bit_in_limb = z % BITS_PER_LIMB;
+    //             let a_limb = row.a[y][x][limb].as_canonical_u64() as u16;
+    //             let a_bit = F::from_bool(((a_limb >> bit_in_limb) & 1) != 0);
+    //             row.a_prime[y][x][z] = xor([a_bit, row.c[x][z], row.c_prime[x][z]]);
+    //         }
+    //     }
+    // }
+
+    let query_a_prime: int, int, int, int, int -> int = query |x, y, z, limb, bit_in_limb| 
+        ((int(eval(a[y * 10 + x * 2 + limb])) >> bit_in_limb) & 0x1) ^ 
+        int(eval(c[x * 64 + z])) ^ 
+        int(eval(c_prime[x * 64 + z]));
+
+    query |row| {
+        let _ = array::map_enumerated(a_prime, |i, a_i| {
+            let y = i / 320;
+            let x = (i / 64) % 5;
+            let z = i % 64;
+            let limb = z / 32;
+            let bit_in_limb = z % 32;
+
+            provide_value(a_i, row, fe(query_a_prime(x, y, z, limb, bit_in_limb)));
+        });
+    };
+
+    // // Populate A''.P
+    // // A''[x, y] = xor(B[x, y], andn(B[x + 1, y], B[x + 2, y])).
+    // for y in 0..5 {
+    //     for x in 0..5 {
+    //         for limb in 0..U64_LIMBS {
+    //             row.a_prime_prime[y][x][limb] = (limb * BITS_PER_LIMB..(limb + 1) * BITS_PER_LIMB)
+    //                 .rev()
+    //                 .fold(F::zero(), |acc, z| {
+    //                     let bit = xor([
+    //                         row.b(x, y, z),
+    //                         andn(row.b((x + 1) % 5, y, z), row.b((x + 2) % 5, y, z)),
+    //                     ]);
+    //                     acc.double() + bit
+    //                 });
+    //         }
+    //     }
+    // }
+
+    let query_a_prime_prime: int, int, int -> int = query |x, y, limb| 
+        utils::fold(
+            32, 
+            |z| 
+                int(eval(b(x, y, (limb + 1) * 32 - 1 - z))) ^ 
+                int(eval(andn(b((x + 1) % 5, y, (limb + 1) * 32 - 1 - z), 
+                b((x + 2) % 5, y, (limb + 1) * 32 - 1 - z)))), 
+            0, 
+            |acc, e| acc * 2 + e
+        );
+
+    query |row| {
+        let _ = array::map_enumerated(a_prime_prime, |i, a_i| {
+            let y = i / 10;
+            let x = (i / 2) % 5;
+            let limb = i % 2;
+
+            provide_value(a_i, row, fe(query_a_prime_prime(x, y, limb)));
+        });
+    };
+
+    // // For the XOR, we split A''[0, 0] to bits.
+    // let mut val = 0; // smaller address correspond to less significant limb
+    // for limb in 0..U64_LIMBS {
+    //     let val_limb = row.a_prime_prime[0][0][limb].as_canonical_u64();
+    //     val |= val_limb << (limb * BITS_PER_LIMB);
+    // }
+    // let val_bits: Vec<bool> = (0..64) // smaller address correspond to less significant bit
+    //     .scan(val, |acc, _| {
+    //         let bit = (*acc & 1) != 0;
+    //         *acc >>= 1;
+    //         Some(bit)
+    //     })
+    //     .collect();
+    // for (i, bit) in row.a_prime_prime_0_0_bits.iter_mut().enumerate() {
+    //     *bit = F::from_bool(val_bits[i]);
+    // }
+
+    query |row| {
+        let _ = array::map_enumerated(a_prime_prime_0_0_bits, |i, a_i| {
+            let limb = i / 32;
+            let bit_in_limb = i % 32;
+
+            provide_value(
+                a_i, 
+                row, 
+                fe((int(eval(a_prime_prime[limb])) >> bit_in_limb) & 0x1)
+            );
+        });
+    };
+
+    // // A''[0, 0] is additionally xor'd with RC.
+    // for limb in 0..U64_LIMBS {
+    //     let rc_lo = rc_value_limb(round, limb);
+    //     row.a_prime_prime_prime_0_0_limbs[limb] =
+    //         F::from_canonical_u16(row.a_prime_prime[0][0][limb].as_canonical_u64() as u16 ^ rc_lo);
+    // }
+
+    let query_a_prime_prime_prime_0_0_limbs: int, int -> int = query |round, limb| 
+        int(eval(a_prime_prime[limb])) ^ 
+        ((RC[round] >> (limb * 32)) & 0xffffffff);
+
+    query |row| {
+        let _ = array::new(2, |limb| {
+            provide_value(
+                a_prime_prime_prime_0_0_limbs[limb], 
+                row, 
+                fe(query_a_prime_prime_prime_0_0_limbs(row % NUM_ROUNDS, limb)
+            ));
+        });
+    };
+}
diff --git a/std/machines/hash/mod.asm b/std/machines/hash/mod.asm
index 46f3c9e610..3b9a215595 100644
--- a/std/machines/hash/mod.asm
+++ b/std/machines/hash/mod.asm
@@ -6,4 +6,5 @@ mod poseidon2_common;
 mod poseidon2_bb;
 mod poseidon2_gl;
 mod keccakf16;
-mod keccakf16_memory;
\ No newline at end of file
+mod keccakf16_memory;
+mod keccakf32_memory;
diff --git a/test_data/std/keccakf32_memory_test.asm b/test_data/std/keccakf32_memory_test.asm
new file mode 100644
index 0000000000..fbea832b3e
--- /dev/null
+++ b/test_data/std/keccakf32_memory_test.asm
@@ -0,0 +1,83 @@
+use std::machines::hash::keccakf32_memory::Keccakf32Memory;
+use std::machines::large_field::memory::Memory;
+use std::machines::range::Byte2;
+
+let MIN: int = 2**5;
+let MAX: int = 2**8;
+machine Main with min_degree: MIN, max_degree: MAX {
+    reg pc[@pc];
+
+    reg X[<=];
+
+    reg Y[<=];
+
+    Byte2 byte2;
+    Memory memory(byte2, MIN, MAX);
+
+    Keccakf32Memory keccakf32_memory(memory, MIN, MAX);
+
+    col fixed STEP(i) { i * 2 };
+
+    // Big endian.
+    // Usage: mstore addr, val;
+    instr mstore X, Y -> link ~> memory.mstore(X, STEP, Y);
+    // Usage: keccakf32_memory input_addr, output_addr;
+    instr keccakf32_memory X, Y -> link ~> keccakf32_memory.keccakf32_memory(X, Y, STEP);
+
+    col witness val;
+    // Usage: assert_eq addr, val;
+    instr assert_eq X, Y ->
+        link ~> val = memory.mload(X, STEP)
+    {
+        val = Y
+    }
+
+    function main {
+        // Test 1: 0 for all 25 64-bit inputs except setting the second 64-bit input to 1. All 64-bit inputs in chunks of 2 32-bit big endian limbs.
+        mstore 0, 0;
+        mstore 4, 0;
+        mstore 8, 0;
+        mstore 12, 1;
+        mstore 16, 0;
+        mstore 20, 0;
+        mstore 24, 0;
+        mstore 28, 0;
+        mstore 32, 0;
+        mstore 36, 0;
+        mstore 40, 0;
+        mstore 44, 0;
+        mstore 48, 0;
+        mstore 52, 0;
+        mstore 56, 0;
+        mstore 60, 0;
+        mstore 64, 0;
+        mstore 68, 0;
+        mstore 72, 0;
+        mstore 76, 0;
+        mstore 80, 0;
+        mstore 84, 0;
+        mstore 88, 0;
+        mstore 92, 0;
+        mstore 96, 0;
+        // Input address 0. Output address 200.
+        keccakf32_memory 0, 200;
+        // Selectively checking a few registers only.
+        // Test vector generated from Tiny Keccak.
+        assert_eq 200, 0xfdbbbbdf;
+        assert_eq 204, 0x9001405f;
+        assert_eq 392, 0xeac9f006;
+        assert_eq 396, 0x664deb35;
+
+        // Test 2: Same as Test 1 but sets input and output addresses to be the same.
+        // No need to rerun the mstores because input values from Test 1 should be intact.
+        keccakf32_memory 0, 0;
+        // Selectively checking a few registers only.
+        // Test vector generated from Tiny Keccak.
+        assert_eq 0, 0xfdbbbbdf;
+        assert_eq 4, 0x9001405f;
+        assert_eq 192, 0xeac9f006;
+        assert_eq 196, 0x664deb35;
+
+        return;
+    }
+}

From 72632ab2d32f5ada9b6a77b2461fd5388250b6d7 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Thu, 28 Nov 2024 11:38:05 +0100
Subject: [PATCH 04/57] Simplify tests (#2161)

This PR:
- Changes the RISCV tests to use MockProver + Plonky3 only
- Changes the pipeline std/pil/asm tests to use mostly pilcom/mock, and
sometimes use p3/halo2/estark
- Changes the degree of a couple std/asm tests to be smaller

---------

Co-authored-by: Leandro Pacheco <contact@leandropacheco.com>
Co-authored-by: Georg Wiese <georgwiese@gmail.com>
---
 pipeline/src/test_util.rs                     |  38 ++--
 pipeline/tests/asm.rs                         | 196 +++++++++---------
 pipeline/tests/pil.rs                         | 171 +++++++--------
 pipeline/tests/powdr_std.rs                   | 134 +++++-------
 riscv-executor/src/lib.rs                     |  25 ++-
 riscv-executor/src/memory.rs                  |  68 ++----
 test_data/std/memory_large_test.asm           |   2 +-
 .../memory_large_test_parallel_accesses.asm   |   4 +-
 ...emory_large_with_bootloader_write_test.asm |   4 +-
 test_data/std/memory_small_test.asm           |   4 +-
 test_data/std/poseidon2_bb_test.asm           |   8 +-
 test_data/std/poseidon2_gl_test.asm           |   8 +-
 test_data/std/poseidon_bb_test.asm            |  10 +-
 test_data/std/poseidon_gl_memory_test.asm     |   8 +-
 test_data/std/rotate_large_test.asm           |   4 +-
 test_data/std/rotate_small_test.asm           |   6 +-
 test_data/std/shift_large_test.asm            |   4 +-
 test_data/std/shift_small_test.asm            |   4 +-
 test_data/std/split_bb_test.asm               |   4 +-
 test_data/std/split_bn254_test.asm            |   4 +-
 test_data/std/split_gl_test.asm               |   4 +-
 test_data/std/write_once_memory_test.asm      |   2 +-
 22 files changed, 321 insertions(+), 391 deletions(-)

diff --git a/pipeline/src/test_util.rs b/pipeline/src/test_util.rs
index 73e1120cee..a058728657 100644
--- a/pipeline/src/test_util.rs
+++ b/pipeline/src/test_util.rs
@@ -47,40 +47,42 @@ pub fn make_prepared_pipeline<T: FieldElement>(
     pipeline
 }
 
-/// Tests witness generation, pilcom, halo2, estark and plonky3.
-pub fn regular_test(file_name: &str, inputs: &[i32]) {
-    let inputs_gl = inputs.iter().map(|x| GoldilocksField::from(*x)).collect();
-    let pipeline_gl = make_prepared_pipeline(file_name, inputs_gl, vec![]);
-
-    test_mock_backend(pipeline_gl.clone());
-    run_pilcom_with_backend_variant(pipeline_gl.clone(), BackendVariant::Composite).unwrap();
-    gen_estark_proof_with_backend_variant(pipeline_gl.clone(), BackendVariant::Composite);
-    test_plonky3_pipeline(pipeline_gl);
+/// Tests witness generation, mock prover, pilcom and plonky3 with
+/// Goldilocks, BabyBear and KoalaBear.
+pub fn regular_test_all_fields(file_name: &str, inputs: &[i32]) {
+    regular_test_gl(file_name, inputs);
+    regular_test_small_field(file_name, inputs);
+}
 
-    let inputs_bn = inputs.iter().map(|x| Bn254Field::from(*x)).collect();
-    let pipeline_bn = make_prepared_pipeline(file_name, inputs_bn, vec![]);
-    test_halo2_with_backend_variant(pipeline_bn, BackendVariant::Composite);
+pub fn regular_test_small_field(file_name: &str, inputs: &[i32]) {
+    regular_test_bb(file_name, inputs);
+    regular_test_kb(file_name, inputs);
+}
 
+/// Tests witness generation, mock prover, pilcom and plonky3 with BabyBear.
+pub fn regular_test_bb(file_name: &str, inputs: &[i32]) {
     let inputs_bb = inputs.iter().map(|x| BabyBearField::from(*x)).collect();
     let pipeline_bb = make_prepared_pipeline(file_name, inputs_bb, vec![]);
+    test_mock_backend(pipeline_bb.clone());
     test_plonky3_pipeline(pipeline_bb);
+}
 
+/// Tests witness generation, mock prover, pilcom and plonky3 with BabyBear and KoalaBear.
+pub fn regular_test_kb(file_name: &str, inputs: &[i32]) {
     let inputs_kb = inputs.iter().map(|x| KoalaBearField::from(*x)).collect();
     let pipeline_kb = make_prepared_pipeline(file_name, inputs_kb, vec![]);
+    test_mock_backend(pipeline_kb.clone());
     test_plonky3_pipeline(pipeline_kb);
 }
 
-pub fn regular_test_without_small_field(file_name: &str, inputs: &[i32]) {
+/// Tests witness generation, mock prover, pilcom and plonky3 with Goldilocks.
+pub fn regular_test_gl(file_name: &str, inputs: &[i32]) {
     let inputs_gl = inputs.iter().map(|x| GoldilocksField::from(*x)).collect();
     let pipeline_gl = make_prepared_pipeline(file_name, inputs_gl, vec![]);
 
     test_mock_backend(pipeline_gl.clone());
     run_pilcom_with_backend_variant(pipeline_gl.clone(), BackendVariant::Composite).unwrap();
-    gen_estark_proof_with_backend_variant(pipeline_gl, BackendVariant::Composite);
-
-    let inputs_bn = inputs.iter().map(|x| Bn254Field::from(*x)).collect();
-    let pipeline_bn = make_prepared_pipeline(file_name, inputs_bn, vec![]);
-    test_halo2_with_backend_variant(pipeline_bn, BackendVariant::Composite);
+    test_plonky3_pipeline(pipeline_gl);
 }
 
 pub fn test_pilcom(pipeline: Pipeline<GoldilocksField>) {
diff --git a/pipeline/tests/asm.rs b/pipeline/tests/asm.rs
index c9078bcd3f..04c1e327a8 100644
--- a/pipeline/tests/asm.rs
+++ b/pipeline/tests/asm.rs
@@ -5,10 +5,10 @@ use powdr_linker::LinkerParams;
 use powdr_number::{FieldElement, GoldilocksField};
 use powdr_pipeline::{
     test_util::{
-        asm_string_to_pil, gen_estark_proof_with_backend_variant, make_prepared_pipeline,
-        make_simple_prepared_pipeline, regular_test, regular_test_without_small_field,
-        resolve_test_file, run_pilcom_with_backend_variant, test_halo2_with_backend_variant,
-        test_pilcom, test_plonky3_pipeline, test_plonky3_with_backend_variant, BackendVariant,
+        asm_string_to_pil, make_prepared_pipeline, make_simple_prepared_pipeline,
+        regular_test_all_fields, regular_test_gl, resolve_test_file,
+        run_pilcom_with_backend_variant, test_mock_backend, test_pilcom, test_plonky3_pipeline,
+        BackendVariant,
     },
     Pipeline,
 };
@@ -22,7 +22,8 @@ fn slice_to_vec<T: FieldElement>(arr: &[i32]) -> Vec<T> {
 fn sqrt_asm() {
     let f = "asm/sqrt.asm";
     let i = [3];
-    regular_test_without_small_field(f, &i);
+    let pipeline: Pipeline<GoldilocksField> = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -30,26 +31,23 @@ fn block_machine_exact_number_of_rows_asm() {
     let f = "asm/block_machine_exact_number_of_rows.asm";
     // This test needs machines to be of unequal length. Also, this is mostly testing witgen, so
     // we just run one backend that supports variable-length machines.
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, Vec::new(), BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn challenges_asm() {
     let f = "asm/challenges.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    // TODO Mock prover doesn't support this test yet.
+    test_plonky3_pipeline(pipeline);
 }
 
 #[test]
 fn simple_sum_asm() {
     let f = "asm/simple_sum.asm";
     let i = [16, 4, 1, 2, 8, 5];
-    regular_test(f, &i);
-    test_plonky3_with_backend_variant::<GoldilocksField>(
-        f,
-        slice_to_vec(&i),
-        BackendVariant::Monolithic,
-    );
+    regular_test_all_fields(f, &i);
 }
 
 #[test]
@@ -57,26 +55,28 @@ fn simple_sum_asm() {
 fn secondary_machine_plonk() {
     // Currently fails because no copy constraints are expressed in PIL yet.
     let f = "asm/secondary_machine_plonk.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn secondary_block_machine_add2() {
     let f = "asm/secondary_block_machine_add2.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn second_phase_hint() {
     let f = "asm/second_phase_hint.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    // TODO Mock prover doesn't support this test yet.
+    test_plonky3_pipeline(pipeline);
 }
 
 #[test]
 fn mem_write_once() {
     let f = "asm/mem_write_once.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
@@ -95,69 +95,72 @@ fn mem_write_once_external_write() {
             ..Default::default()
         });
     pipeline.compute_witness().unwrap();
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn block_machine_cache_miss() {
     let f = "asm/block_machine_cache_miss.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn palindrome() {
     let f = "asm/palindrome.asm";
     let i = [7, 1, 7, 3, 9, 3, 7, 1];
-    regular_test_without_small_field(f, &i);
+    let pipeline: Pipeline<GoldilocksField> = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn single_function_vm() {
     let f = "asm/single_function_vm.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn empty() {
     let f = "asm/empty.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn single_operation() {
     let f = "asm/single_operation.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn empty_vm() {
     let f = "asm/empty_vm.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn vm_to_block_unique_interface() {
     let f = "asm/vm_to_block_unique_interface.asm";
-    regular_test_without_small_field(f, &[]);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn vm_to_block_to_block() {
     let f = "asm/vm_to_block_to_block.asm";
-    test_pilcom(make_simple_prepared_pipeline(f));
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 fn block_to_block() {
     let f = "asm/block_to_block.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn block_to_block_empty_submachine() {
     let f = "asm/block_to_block_empty_submachine.asm";
-    let mut pipeline = make_simple_prepared_pipeline(f);
+    let mut pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
 
     let witness = pipeline.compute_witness().unwrap();
     let arith_size = witness
@@ -168,25 +171,23 @@ fn block_to_block_empty_submachine() {
         .len();
     assert_eq!(arith_size, 0);
 
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
-
-    let pipeline = make_simple_prepared_pipeline::<GoldilocksField>(f);
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
 }
 
 #[test]
 fn block_to_block_with_bus_monolithic() {
     let f = "asm/block_to_block_with_bus.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_halo2_with_backend_variant(pipeline.clone(), BackendVariant::Monolithic);
-    let pipeline = make_simple_prepared_pipeline::<GoldilocksField>(f);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    // TODO Mock prover doesn't support this test yet.
     test_plonky3_pipeline(pipeline);
 }
 
 #[test]
 fn block_to_block_with_bus_different_sizes() {
     let f = "asm/block_to_block_with_bus_different_sizes.asm";
-    let pipeline = make_simple_prepared_pipeline::<GoldilocksField>(f);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    // TODO Mock prover doesn't support this test yet.
     test_plonky3_pipeline(pipeline);
 }
 
@@ -200,52 +201,49 @@ fn block_to_block_with_bus_composite() {
     // - `CompositeBackend::verify` simply verifies each machine proof independently, using the local
     //   challenges. As a result, the challenges during verification differ and the constraints are
     //   not satisfied.
+
+    use powdr_pipeline::test_util::test_halo2_with_backend_variant;
     let f = "asm/block_to_block_with_bus.asm";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_halo2_with_backend_variant(pipeline.clone(), BackendVariant::Composite);
+    // TODO Mock prover doesn't support this test yet.
+    test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
 }
 
 #[test]
 fn vm_instr_param_mapping() {
     let f = "asm/vm_instr_param_mapping.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn vm_to_block_multiple_interfaces() {
     let f = "asm/vm_to_block_multiple_interfaces.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn vm_to_vm() {
     let f = "asm/vm_to_vm.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn vm_to_vm_dynamic_trace_length() {
     let f = "asm/vm_to_vm_dynamic_trace_length.asm";
-    run_pilcom_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite)
-        .unwrap();
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
-    gen_estark_proof_with_backend_variant(
-        make_simple_prepared_pipeline(f),
-        BackendVariant::Composite,
-    );
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 fn vm_to_vm_to_block() {
     let f = "asm/vm_to_vm_to_block.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn vm_to_block_array() {
     let f = "asm/vm_to_block_array.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
@@ -268,115 +266,122 @@ fn dynamic_vadcop() {
     // Because machines have different lengths, this can only be proven
     // with a composite proof.
     run_pilcom_with_backend_variant(pipeline_gl.clone(), BackendVariant::Composite).unwrap();
-    gen_estark_proof_with_backend_variant(pipeline_gl, BackendVariant::Composite);
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
+    test_mock_backend(pipeline_gl.clone());
+    test_plonky3_pipeline(pipeline_gl);
 }
 
 #[test]
 fn vm_to_vm_to_vm() {
     let f = "asm/vm_to_vm_to_vm.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn vm_to_block_multiple_links() {
     let f = "asm/permutations/vm_to_block_multiple_links.asm";
-    regular_test(f, &[]);
+    regular_test_all_fields(f, &[]);
 }
 
 #[test]
 fn mem_read_write() {
     let f = "asm/mem_read_write.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn mem_read_write_no_memory_accesses() {
     let f = "asm/mem_read_write_no_memory_accesses.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn mem_read_write_with_bootloader() {
     let f = "asm/mem_read_write_with_bootloader.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn mem_read_write_large_diffs() {
     let f = "asm/mem_read_write_large_diffs.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn multi_assign() {
     let f = "asm/multi_assign.asm";
     let i = [7];
-    regular_test(f, &i);
+    regular_test_all_fields(f, &i);
 }
 
 #[test]
 fn multi_return() {
     let f = "asm/multi_return.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 #[should_panic = "called `Result::unwrap()` on an `Err` value: [\"Assignment register `Z` is incompatible with `square_and_double(3)`. Try using `<==` with no explicit assignment registers.\", \"Assignment register `Y` is incompatible with `square_and_double(3)`. Try using `<==` with no explicit assignment registers.\"]"]
 fn multi_return_wrong_assignment_registers() {
     let f = "asm/multi_return_wrong_assignment_registers.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 #[should_panic = "Result::unwrap()` on an `Err` value: [\"Mismatched number of registers for assignment A, B <=Y= square_and_double(3);\"]"]
 fn multi_return_wrong_assignment_register_length() {
     let f = "asm/multi_return_wrong_assignment_register_length.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn bit_access() {
     let f = "asm/bit_access.asm";
     let i = [20];
-    regular_test_without_small_field(f, &i);
+    let pipeline: Pipeline<GoldilocksField> = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn sqrt() {
     let f = "asm/sqrt.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn functional_instructions() {
     let f = "asm/functional_instructions.asm";
     let i = [20];
-    regular_test_without_small_field(f, &i);
+    let pipeline: Pipeline<GoldilocksField> = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn full_pil_constant() {
     let f = "asm/full_pil_constant.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn intermediate() {
     let f = "asm/intermediate.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn intermediate_nested() {
     let f = "asm/intermediate_nested.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn pil_at_module_level() {
     let f = "asm/pil_at_module_level.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[cfg(feature = "estark-starky")]
@@ -421,37 +426,38 @@ fn read_poly_files() {
 #[test]
 fn enum_in_asm() {
     let f = "asm/enum_in_asm.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn pass_range_constraints() {
     let f = "asm/pass_range_constraints.asm";
-    regular_test_without_small_field(f, Default::default());
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn side_effects() {
     let f = "asm/side_effects.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
 fn multiple_signatures() {
     let f = "asm/multiple_signatures.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn permutation_simple() {
     let f = "asm/permutations/simple.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
 fn permutation_to_block() {
     let f = "asm/permutations/vm_to_block.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
@@ -459,51 +465,51 @@ fn permutation_to_block() {
 fn permutation_to_vm() {
     // TODO: witgen issue: Machine incorrectly detected as block machine.
     let f = "asm/permutations/vm_to_vm.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn permutation_to_block_to_block() {
     let f = "asm/permutations/block_to_block.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
 #[should_panic = "has incoming permutations but doesn't declare call_selectors"]
 fn permutation_incoming_needs_selector() {
     let f = "asm/permutations/incoming_needs_selector.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn call_selectors_with_no_permutation() {
     let f = "asm/permutations/call_selectors_with_no_permutation.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn vm_args() {
     let f = "asm/vm_args.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
 fn vm_args_memory() {
     let f = "asm/vm_args_memory.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
 fn vm_args_relative_path() {
     let f = "asm/vm_args_relative_path.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn vm_args_two_levels() {
     let f = "asm/vm_args_two_levels.asm";
-    regular_test_without_small_field(f, Default::default());
+    regular_test_gl(f, Default::default());
 }
 
 mod reparse {
@@ -544,16 +550,16 @@ mod book {
 fn hello_world_asm_fail() {
     let f = "asm/book/hello_world.asm";
     let i = [2];
-    let pipeline = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
-    test_pilcom(pipeline);
+    let pipeline: Pipeline<GoldilocksField> = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 #[should_panic = "FailedAssertion(\"This should fail.\")"]
 fn failing_assertion() {
     let f = "asm/failing_assertion.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -749,8 +755,8 @@ fn keccak() {
 #[test]
 fn connect_no_witgen() {
     let f = "asm/connect_no_witgen.asm";
-    let i = [];
-    let pipeline = make_prepared_pipeline(f, slice_to_vec(&i), vec![]);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    // TODO Mock prover doesn't support this test yet.
     test_pilcom(pipeline);
 }
 
@@ -772,15 +778,15 @@ fn trait_parsing() {
 #[test]
 fn dynamic_fixed_cols() {
     let f = "asm/dynamic_fixed_cols.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn type_vars_in_local_decl() {
     let f = "asm/type_vars_in_local_decl.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -810,7 +816,7 @@ fn types_in_expressions() {
 #[test]
 fn set_hint() {
     let f = "asm/set_hint.asm";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
diff --git a/pipeline/tests/pil.rs b/pipeline/tests/pil.rs
index 9b9ead722b..a54859d562 100644
--- a/pipeline/tests/pil.rs
+++ b/pipeline/tests/pil.rs
@@ -1,14 +1,12 @@
 #[cfg(feature = "halo2")]
-use powdr_number::Bn254Field;
 use powdr_number::GoldilocksField;
 use powdr_pipeline::{
     test_util::{
         assert_proofs_fail_for_invalid_witnesses, assert_proofs_fail_for_invalid_witnesses_estark,
-        assert_proofs_fail_for_invalid_witnesses_halo2,
-        assert_proofs_fail_for_invalid_witnesses_pilcom, gen_estark_proof_with_backend_variant,
-        make_prepared_pipeline, make_simple_prepared_pipeline, regular_test,
-        run_pilcom_with_backend_variant, test_halo2_with_backend_variant, test_pilcom,
-        test_plonky3_with_backend_variant, test_stwo, BackendVariant,
+        assert_proofs_fail_for_invalid_witnesses_mock,
+        assert_proofs_fail_for_invalid_witnesses_pilcom, make_prepared_pipeline,
+        make_simple_prepared_pipeline, regular_test_all_fields, regular_test_gl,
+        test_halo2_with_backend_variant, test_mock_backend, test_pilcom, test_stwo, BackendVariant,
     },
     Pipeline,
 };
@@ -28,25 +26,22 @@ fn lookup_with_selector() {
 
     // Valid witness
     let f = "pil/lookup_with_selector.pil";
-    #[cfg(feature = "halo2")]
-    {
-        use powdr_pipeline::test_util::resolve_test_file;
-        use powdr_pipeline::Pipeline;
-        let witness = [2, 42, 4, 17];
-        Pipeline::default()
-            .from_file(resolve_test_file(f))
-            .set_witness(vec![(
-                "main::w".to_string(),
-                witness.iter().cloned().map(Bn254Field::from).collect(),
-            )])
-            .with_backend(powdr_backend::BackendType::Halo2Mock, None)
-            .compute_proof()
-            .unwrap();
-    }
+    use powdr_pipeline::test_util::resolve_test_file;
+    use powdr_pipeline::Pipeline;
+    let witness = [2, 42, 4, 17];
+    Pipeline::default()
+        .from_file(resolve_test_file(f))
+        .set_witness(vec![(
+            "main::w".to_string(),
+            witness.iter().cloned().map(GoldilocksField::from).collect(),
+        )])
+        .with_backend(powdr_backend::BackendType::Mock, None)
+        .compute_proof()
+        .unwrap();
 
     // Invalid witness: 0 is not in the set {2, 4}
     let witness = vec![("main::w".to_string(), vec![0, 42, 4, 17])];
-    assert_proofs_fail_for_invalid_witnesses_halo2(f, &witness);
+    assert_proofs_fail_for_invalid_witnesses_mock(f, &witness);
     assert_proofs_fail_for_invalid_witnesses_pilcom(f, &witness);
 }
 
@@ -71,25 +66,22 @@ fn permutation_with_selector() {
 
     // Valid witness
     let f = "pil/permutation_with_selector.pil";
-    #[cfg(feature = "halo2")]
-    {
-        use powdr_pipeline::test_util::resolve_test_file;
-        use powdr_pipeline::Pipeline;
-        let witness = [2, 42, 4, 17];
-        Pipeline::default()
-            .from_file(resolve_test_file(f))
-            .set_witness(vec![(
-                "main::w".to_string(),
-                witness.iter().cloned().map(Bn254Field::from).collect(),
-            )])
-            .with_backend(powdr_backend::BackendType::Halo2Mock, None)
-            .compute_proof()
-            .unwrap();
-    }
+    use powdr_pipeline::test_util::resolve_test_file;
+    use powdr_pipeline::Pipeline;
+    let witness = [2, 42, 4, 17];
+    Pipeline::default()
+        .from_file(resolve_test_file(f))
+        .set_witness(vec![(
+            "main::w".to_string(),
+            witness.iter().cloned().map(GoldilocksField::from).collect(),
+        )])
+        .with_backend(powdr_backend::BackendType::Mock, None)
+        .compute_proof()
+        .unwrap();
 
     // Invalid witness: 0 is not in the set {2, 4}
     let witness = vec![("main::w".to_string(), vec![0, 42, 4, 17])];
-    assert_proofs_fail_for_invalid_witnesses_halo2(f, &witness);
+    assert_proofs_fail_for_invalid_witnesses_mock(f, &witness);
     assert_proofs_fail_for_invalid_witnesses_pilcom(f, &witness);
 }
 
@@ -111,12 +103,7 @@ fn permutation_with_selector_starky() {
 #[test]
 fn fibonacci() {
     let f = "pil/fibonacci.pil";
-    regular_test(f, Default::default());
-    test_plonky3_with_backend_variant::<GoldilocksField>(
-        f,
-        Default::default(),
-        BackendVariant::Monolithic,
-    );
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
@@ -138,7 +125,8 @@ fn fibonacci_invalid_witness() {
         ("Fibonacci::x".to_string(), vec![1, 1, 10, 3]),
         ("Fibonacci::y".to_string(), vec![1, 2, 3, 13]),
     ];
-    assert_proofs_fail_for_invalid_witnesses(f, &witness);
+    assert_proofs_fail_for_invalid_witnesses_mock(f, &witness);
+    assert_proofs_fail_for_invalid_witnesses_pilcom(f, &witness);
 
     // All constraints are valid, except the initial row.
     // The following constraint should fail in row 3:
@@ -147,19 +135,20 @@ fn fibonacci_invalid_witness() {
         ("Fibonacci::x".to_string(), vec![1, 2, 3, 5]),
         ("Fibonacci::y".to_string(), vec![2, 3, 5, 8]),
     ];
-    assert_proofs_fail_for_invalid_witnesses(f, &witness);
+    assert_proofs_fail_for_invalid_witnesses_mock(f, &witness);
+    assert_proofs_fail_for_invalid_witnesses_pilcom(f, &witness);
 }
 
 #[test]
 fn constant_in_identity() {
     let f = "pil/constant_in_identity.pil";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn fib_arrays() {
     let f = "pil/fib_arrays.pil";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
@@ -167,7 +156,8 @@ fn fib_arrays() {
 fn external_witgen_fails_if_none_provided() {
     let f = "pil/external_witgen.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -175,7 +165,8 @@ fn external_witgen_a_provided() {
     let f = "pil/external_witgen.pil";
     let external_witness = vec![("main::a".to_string(), vec![GoldilocksField::from(3); 16])];
     let pipeline = make_prepared_pipeline(f, Default::default(), external_witness);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -183,7 +174,8 @@ fn external_witgen_b_provided() {
     let f = "pil/external_witgen.pil";
     let external_witness = vec![("main::b".to_string(), vec![GoldilocksField::from(4); 16])];
     let pipeline = make_prepared_pipeline(f, Default::default(), external_witness);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -194,7 +186,8 @@ fn external_witgen_both_provided() {
         ("main::b".to_string(), vec![GoldilocksField::from(4); 16]),
     ];
     let pipeline = make_prepared_pipeline(f, Default::default(), external_witness);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -207,7 +200,8 @@ fn external_witgen_fails_on_conflicting_external_witness() {
         ("main::b".to_string(), vec![GoldilocksField::from(3); 16]),
     ];
     let pipeline = make_prepared_pipeline(f, Default::default(), external_witness);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -216,9 +210,8 @@ fn sum_via_witness_query() {
     // Only 3 inputs -> Checks that if we return "None", the system still tries to figure it out on its own.
     let inputs = vec![7.into(), 8.into(), 2.into()];
     let pipeline = make_prepared_pipeline(f, inputs, Default::default());
-    test_pilcom(pipeline);
-    // prover query string uses a different convention,
-    // so we cannot directly use the halo2_proof and estark functions here.
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -230,8 +223,7 @@ fn witness_lookup() {
         .collect::<Vec<_>>();
     let pipeline = make_prepared_pipeline(f, inputs, Default::default());
     test_pilcom(pipeline.clone());
-    // halo2 fails with "gates must contain at least one constraint"
-    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Monolithic);
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -239,49 +231,45 @@ fn witness_lookup() {
 fn underdetermined_zero_no_solution() {
     let f = "pil/underdetermined_zero_no_solution.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn pair_lookup() {
     let f = "pil/pair_lookup.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
-    // halo2 would take too long for this
-    // starky would take too long for this in debug mode
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn block_lookup_or() {
     let f = "pil/block_lookup_or.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
-    // halo2 would take too long for this
-    // starky would take too long for this in debug mode
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn block_lookup_or_permutation() {
     let f = "pil/block_lookup_or_permutation.pil";
-    test_pilcom(make_simple_prepared_pipeline(f));
-    // starky would take too long for this in debug mode
+    let pipeline = make_simple_prepared_pipeline(f);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn halo_without_lookup() {
     let f = "pil/halo_without_lookup.pil";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
 fn add() {
     let f = "pil/add.pil";
-    test_plonky3_with_backend_variant::<GoldilocksField>(
-        f,
-        Default::default(),
-        BackendVariant::Monolithic,
-    );
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
@@ -293,8 +281,8 @@ fn stwo_add_and_equal() {
 fn simple_div() {
     let f = "pil/simple_div.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
-    // starky would take too long for this in debug mode
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -302,7 +290,7 @@ fn single_line_blocks() {
     let f = "pil/single_line_blocks.pil";
     let pipeline = make_simple_prepared_pipeline(f);
     test_pilcom(pipeline.clone());
-    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Monolithic);
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -310,15 +298,15 @@ fn two_block_machine_functions() {
     let f = "pil/two_block_machine_functions.pil";
     let pipeline = make_simple_prepared_pipeline(f);
     test_pilcom(pipeline.clone());
-    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Monolithic);
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn fixed_columns() {
     let f = "pil/fixed_columns.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
-    // Starky requires at least one witness column, this test has none.
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -326,6 +314,7 @@ fn witness_via_let() {
     let f = "pil/witness_via_let.pil";
     let pipeline = make_simple_prepared_pipeline(f);
     test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -333,12 +322,13 @@ fn conditional_fixed_constraints() {
     let f = "pil/conditional_fixed_constraints.pil";
     let pipeline = make_simple_prepared_pipeline(f);
     test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
 fn referencing_arrays() {
     let f = "pil/referencing_array.pil";
-    regular_test(f, Default::default());
+    regular_test_all_fields(f, Default::default());
 }
 
 #[test]
@@ -346,7 +336,7 @@ fn naive_byte_decomposition_bn254() {
     // This should pass, because BN254 is a field that can fit all 64-Bit integers.
     let f = "pil/naive_byte_decomposition.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Monolithic);
+    test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
 }
 
 #[test]
@@ -355,7 +345,8 @@ fn naive_byte_decomposition_gl() {
     // This should fail, because GoldilocksField is a field that cannot fit all 64-Bit integers.
     let f = "pil/naive_byte_decomposition.pil";
     let pipeline = make_simple_prepared_pipeline(f);
-    test_pilcom(pipeline);
+    test_pilcom(pipeline.clone());
+    test_mock_backend(pipeline);
 }
 
 #[test]
@@ -363,13 +354,7 @@ fn different_degrees() {
     let f = "pil/different_degrees.pil";
     // Because machines have different lengths, this can only be proven
     // with a composite proof.
-    run_pilcom_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite)
-        .unwrap();
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
-    gen_estark_proof_with_backend_variant(
-        make_simple_prepared_pipeline(f),
-        BackendVariant::Composite,
-    );
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
@@ -378,13 +363,7 @@ fn vm_to_block_dynamic_length() {
     let f = "pil/vm_to_block_dynamic_length.pil";
     // Because machines have different lengths, this can only be proven
     // with a composite proof.
-    run_pilcom_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite)
-        .unwrap();
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
-    gen_estark_proof_with_backend_variant(
-        make_simple_prepared_pipeline(f),
-        BackendVariant::Composite,
-    );
+    regular_test_gl(f, Default::default());
 }
 
 #[test]
diff --git a/pipeline/tests/powdr_std.rs b/pipeline/tests/powdr_std.rs
index b8e7d6654c..92fb207462 100644
--- a/pipeline/tests/powdr_std.rs
+++ b/pipeline/tests/powdr_std.rs
@@ -7,10 +7,9 @@ use powdr_pipeline::{
     test_runner::run_tests,
     test_util::{
         evaluate_function, evaluate_integer_function, gen_estark_proof_with_backend_variant,
-        gen_halo2_proof, make_simple_prepared_pipeline, regular_test,
-        regular_test_without_small_field, std_analyzed, test_halo2_with_backend_variant,
-        test_mock_backend, test_plonky3_pipeline, test_plonky3_with_backend_variant,
-        BackendVariant,
+        gen_halo2_proof, make_simple_prepared_pipeline, regular_test_bb, regular_test_gl,
+        regular_test_small_field, std_analyzed, test_halo2_with_backend_variant, test_mock_backend,
+        test_plonky3_pipeline, BackendVariant,
     },
     Pipeline,
 };
@@ -39,66 +38,56 @@ fn poseidon_bn254_test() {
 #[test]
 fn poseidon_gl_test() {
     let f = "std/poseidon_gl_test.asm";
-    test_mock_backend(make_simple_prepared_pipeline::<GoldilocksField>(f));
-    gen_estark_proof_with_backend_variant(
-        make_simple_prepared_pipeline(f),
-        BackendVariant::Composite,
-    );
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn poseidon_gl_memory_test() {
     let f = "std/poseidon_gl_memory_test.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline.clone());
-    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn keccakf16_test() {
     let f = "std/keccakf16_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn keccakf16_memory_test() {
     let f = "std/keccakf16_memory_test.asm";
-    test_mock_backend(make_simple_prepared_pipeline::<BabyBearField>(f));
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn keccakf32_memory_test() {
     let f = "std/keccakf32_memory_test.asm";
-    test_mock_backend(make_simple_prepared_pipeline::<GoldilocksField>(f));
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn poseidon_bb_test() {
     let f = "std/poseidon_bb_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_bb(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn poseidon2_bb_test() {
     let f = "std/poseidon2_bb_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_bb(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn poseidon2_gl_test() {
     let f = "std/poseidon2_gl_test.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline.clone());
-    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
@@ -112,50 +101,38 @@ fn split_bn254_test() {
 #[ignore = "Too slow"]
 fn split_gl_test() {
     let f = "std/split_gl_test.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline.clone());
-    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
-#[cfg(feature = "plonky3")]
 #[test]
 #[ignore = "Too slow"]
 fn split_bb_test() {
     let f = "std/split_bb_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_bb(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn add_sub_small_test() {
     let f = "std/add_sub_small_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn arith_small_test() {
     let f = "std/arith_small_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn arith_large_test() {
     let f = "std/arith_large_test.asm";
-    let pipeline = make_simple_prepared_pipeline::<GoldilocksField>(f);
-    test_mock_backend(pipeline.clone());
-
-    // Running gen_estark_proof(f, Default::default())
-    // is too slow for the PR tests. This will only create a single
-    // eStark proof instead of 3.
-    #[cfg(feature = "estark-starky")]
-    pipeline
-        .with_backend(powdr_backend::BackendType::EStarkStarkyComposite, None)
-        .compute_proof()
-        .unwrap();
-
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline);
+    // TODO We can't use P3 yet for this test because of degree 4 constraints.
+    //test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
 }
 
 #[test]
@@ -163,141 +140,136 @@ fn arith_large_test() {
 fn arith256_memory_large_test() {
     let f = "std/arith256_memory_large_test.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-
-    // Running gen_estark_proof(f, Default::default())
-    // is too slow for the PR tests. This will only create a single
-    // eStark proof instead of 3.
-    #[cfg(feature = "estark-starky")]
-    pipeline
-        .with_backend(powdr_backend::BackendType::EStarkStarkyComposite, None)
-        .compute_proof()
-        .unwrap();
-
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Composite);
+    test_mock_backend(pipeline);
+    // TODO We can't use P3 yet for this test because of degree 4 constraints.
+    //test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn memory_large_test() {
     let f = "std/memory_large_test.asm";
-    regular_test_without_small_field(f, &[]);
+    regular_test_gl(f, &[]);
+
+    // This one test was selected to also run estark.
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    gen_estark_proof_with_backend_variant(pipeline, BackendVariant::Composite);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn memory_large_with_bootloader_write_test() {
     let f = "std/memory_large_with_bootloader_write_test.asm";
-    regular_test_without_small_field(f, &[]);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn memory_large_test_parallel_accesses() {
     let f = "std/memory_large_test_parallel_accesses.asm";
-    regular_test_without_small_field(f, &[]);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn memory_small_test() {
     let f = "std/memory_small_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 fn permutation_via_challenges() {
     let f = "std/permutation_via_challenges.asm";
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Monolithic);
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_plonky3_pipeline(pipeline);
+    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn lookup_via_challenges() {
     let f = "std/lookup_via_challenges.asm";
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Monolithic);
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_plonky3_pipeline(pipeline);
+    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn lookup_via_challenges_range_constraint() {
     let f = "std/lookup_via_challenges_range_constraint.asm";
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Monolithic);
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_plonky3_pipeline(pipeline);
+    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn bus_lookup() {
     let f = "std/bus_lookup.asm";
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Monolithic);
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_plonky3_pipeline(pipeline);
+    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn bus_permutation() {
     let f = "std/bus_permutation.asm";
-    test_halo2_with_backend_variant(make_simple_prepared_pipeline(f), BackendVariant::Monolithic);
-    test_plonky3_with_backend_variant::<GoldilocksField>(f, vec![], BackendVariant::Monolithic);
+    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_plonky3_pipeline(pipeline);
+    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn write_once_memory_test() {
     let f = "std/write_once_memory_test.asm";
-    regular_test(f, &[]);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn binary_large_test() {
     let f = "std/binary_large_test.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline.clone());
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn binary_small_8_test() {
     let f = "std/binary_small_8_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Composite);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn binary_small_test() {
     let f = "std/binary_small_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Composite);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn shift_large_test() {
     let f = "std/shift_large_test.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline.clone());
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn shift_small_test() {
     let f = "std/shift_small_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn rotate_large_test() {
     let f = "std/rotate_large_test.asm";
-    let pipeline = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline.clone());
-    test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn rotate_small_test() {
     let f = "std/rotate_small_test.asm";
-    test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_small_field(f, &[]);
 }
 
 #[test]
diff --git a/riscv-executor/src/lib.rs b/riscv-executor/src/lib.rs
index 4f591a0a62..d7d9f64965 100644
--- a/riscv-executor/src/lib.rs
+++ b/riscv-executor/src/lib.rs
@@ -531,10 +531,10 @@ mod builder {
             let mut ret = Self {
                 pc_idx,
                 curr_pc: PC_INITIAL_VAL.into(),
+                regs_machine: MemoryMachine::new("main_regs", &witness_cols),
+                memory_machine: MemoryMachine::new("main_memory", &witness_cols),
                 trace: ExecutionTrace::new(witness_cols, reg_map, reg_writes, PC_INITIAL_VAL + 1),
                 submachines,
-                regs_machine: MemoryMachine::new("main_regs"),
-                memory_machine: MemoryMachine::new("main_memory"),
                 next_statement_line: 1,
                 batch_to_line_map,
                 max_rows: max_rows_len,
@@ -777,17 +777,16 @@ mod builder {
             // add submachine traces to main trace
             // ----------------------------
             for mut machine in self.submachines.into_values().map(|m| m.into_inner()) {
-                if machine.len() == 0 {
-                    // ignore empty machines
-                    continue;
-                }
-                machine.final_row_override();
-                let range = namespace_degree_range(pil, machine.namespace());
-                // extend with dummy blocks up to the required machine degree
-                let machine_degree =
-                    std::cmp::max(machine.len().next_power_of_two(), range.min as u32);
-                while machine.len() < machine_degree {
-                    machine.push_dummy_block(machine_degree as usize);
+                // if the machine is not empty, we need to fill it up to the degree
+                if machine.len() > 0 {
+                    machine.final_row_override();
+                    let range = namespace_degree_range(pil, machine.namespace());
+                    // extend with dummy blocks up to the required machine degree
+                    let machine_degree =
+                        std::cmp::max(machine.len().next_power_of_two(), range.min as u32);
+                    while machine.len() < machine_degree {
+                        machine.push_dummy_block(machine_degree as usize);
+                    }
                 }
                 for (col_name, col) in machine.take_cols() {
                     assert!(self.trace.cols.insert(col_name, col).is_none());
diff --git a/riscv-executor/src/memory.rs b/riscv-executor/src/memory.rs
index 49fc57cf07..c29fc5cf64 100644
--- a/riscv-executor/src/memory.rs
+++ b/riscv-executor/src/memory.rs
@@ -17,23 +17,27 @@ struct Op<F: FieldElement> {
 pub struct MemoryMachine<F: FieldElement> {
     pub namespace: String,
     ops: Vec<Op<F>>,
-    // this is the size of the "selector array" for this machine. We deduce it
-    // from the largest idx given in incoming read/write operations. Each
-    // element becomes a column in the final trace.
-    selector_count: usize,
+    witness_cols: Vec<String>,
 }
 
 impl<F: FieldElement> MemoryMachine<F> {
-    pub fn new(namespace: &str) -> Self {
+    pub fn new(namespace: &str, witness_cols: &[String]) -> Self {
+        // filter for the machine columns
+        let prefix = format!("{namespace}::");
+        let witness_cols = witness_cols
+            .iter()
+            .filter(|c| c.starts_with(&prefix))
+            .cloned()
+            .collect();
+
         MemoryMachine {
             namespace: namespace.to_string(),
             ops: Vec::new(),
-            selector_count: 0,
+            witness_cols,
         }
     }
 
     pub fn write(&mut self, step: u32, addr: u32, val: Elem<F>, selector_idx: u32) {
-        self.selector_count = std::cmp::max(self.selector_count, selector_idx as usize + 1);
         self.ops.push(Op {
             addr,
             step,
@@ -44,7 +48,6 @@ impl<F: FieldElement> MemoryMachine<F> {
     }
 
     pub fn read(&mut self, step: u32, addr: u32, val: Elem<F>, selector_idx: u32) {
-        self.selector_count = std::cmp::max(self.selector_count, selector_idx as usize + 1);
         self.ops.push(Op {
             addr,
             step,
@@ -64,7 +67,7 @@ impl<F: FieldElement> MemoryMachine<F> {
             "trying to take less rows than memory ops"
         );
 
-        // order here matters! we use this to index into the columns
+        // order here matters (pil defines the order of witness cols)! we use this to index into the columns
         #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
         #[repr(usize)]
         enum Cols {
@@ -79,46 +82,15 @@ impl<F: FieldElement> MemoryMachine<F> {
         }
         use Cols::*;
 
-        let mut cols = vec![
-            (
-                format!("{}::m_addr", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-            (
-                format!("{}::m_step", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-            (
-                format!("{}::m_change", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-            (
-                format!("{}::m_value", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-            (
-                format!("{}::m_is_write", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-            (
-                format!("{}::m_diff_lower", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-            (
-                format!("{}::m_diff_upper", self.namespace),
-                Vec::with_capacity(len as usize),
-            ),
-        ];
-        for i in 0..self.selector_count as u32 {
-            cols.push((
-                format!("{}::selectors[{}]", self.namespace, i),
-                Vec::with_capacity(len as usize),
-            ));
-        }
-
         // sort ops by (addr, step)
         self.ops.sort_by_key(|op| (op.addr, op.step));
 
+        let mut cols: Vec<_> = std::mem::take(&mut self.witness_cols)
+            .into_iter()
+            .map(|n| (n, vec![]))
+            .collect();
+        let selector_count = cols.len() - Cols::Selectors as usize;
+
         // generate rows from ops
         for (idx, op) in self.ops.iter().enumerate() {
             if let Some(next_addr) = self.ops.get(idx + 1).map(|op| op.addr) {
@@ -146,7 +118,7 @@ impl<F: FieldElement> MemoryMachine<F> {
             cols[Addr as usize].1.push(op.addr.into());
             cols[Value as usize].1.push(op.value);
 
-            for i in 0..self.selector_count as u32 {
+            for i in 0..selector_count as u32 {
                 cols[Selectors as usize + i as usize]
                     .1
                     .push(if i == op.selector_idx {
@@ -180,7 +152,7 @@ impl<F: FieldElement> MemoryMachine<F> {
             cols[IsWrite as usize].1.resize(len as usize, 0.into());
             cols[DiffLower as usize].1.resize(len as usize, 0.into());
             cols[DiffUpper as usize].1.resize(len as usize, 0.into());
-            for i in 0..self.selector_count as u32 {
+            for i in 0..selector_count as u32 {
                 cols[Selectors as usize + i as usize]
                     .1
                     .resize(len as usize, 0.into());
diff --git a/test_data/std/memory_large_test.asm b/test_data/std/memory_large_test.asm
index 74d02e666c..6a0bcbbdca 100644
--- a/test_data/std/memory_large_test.asm
+++ b/test_data/std/memory_large_test.asm
@@ -4,7 +4,7 @@ use std::machines::large_field::memory::Memory;
 let main_degree: int = 2**7;
 let memory_degree: int = 2**5;
 
-machine Main with degree: 65536 {
+  machine Main with degree: main_degree {
     reg pc[@pc];
     reg X[<=];
     reg Y[<=];
diff --git a/test_data/std/memory_large_test_parallel_accesses.asm b/test_data/std/memory_large_test_parallel_accesses.asm
index 6f7001fb99..e1a9bb2eb4 100644
--- a/test_data/std/memory_large_test_parallel_accesses.asm
+++ b/test_data/std/memory_large_test_parallel_accesses.asm
@@ -1,8 +1,8 @@
 use std::machines::range::Byte2;
 use std::machines::large_field::memory::Memory;
 
-let main_degree: int = 2**16;
-let memory_degree: int = 2**16;
+let main_degree: int = 2**8;
+let memory_degree: int = 2**8;
 
 machine Main with
     degree: main_degree,
diff --git a/test_data/std/memory_large_with_bootloader_write_test.asm b/test_data/std/memory_large_with_bootloader_write_test.asm
index b6f851f89d..4371f575a3 100644
--- a/test_data/std/memory_large_with_bootloader_write_test.asm
+++ b/test_data/std/memory_large_with_bootloader_write_test.asm
@@ -1,8 +1,8 @@
 use std::machines::range::Byte2;
 use std::machines::large_field::memory_with_bootloader_write::MemoryWithBootloaderWrite;
 
-let main_degree: int = 2**16;
-let memory_degree: int = 2**16;
+let main_degree: int = 2**8;
+let memory_degree: int = 2**8;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/memory_small_test.asm b/test_data/std/memory_small_test.asm
index fdcbeb99dd..68252eee44 100644
--- a/test_data/std/memory_small_test.asm
+++ b/test_data/std/memory_small_test.asm
@@ -2,8 +2,8 @@ use std::machines::range::Bit12;
 use std::machines::range::Byte2;
 use std::machines::small_field::memory::Memory;
 
-let main_degree: int = 2**16;
-let memory_degree: int = 2**16;
+let main_degree: int = 2**8;
+let memory_degree: int = 2**8;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/poseidon2_bb_test.asm b/test_data/std/poseidon2_bb_test.asm
index fdaa23e8c6..ca68c2b89d 100644
--- a/test_data/std/poseidon2_bb_test.asm
+++ b/test_data/std/poseidon2_bb_test.asm
@@ -5,10 +5,10 @@ use std::machines::small_field::memory::Memory;
 use std::machines::split::ByteCompare;
 use std::machines::split::split_bb::SplitBB;
 
-let main_degree: int = 2**16;
-let memory_degree: int = 2**16;
-let poseidon2_degree: int = 2**16;
-let split_bb_degree: int = 2**16;
+let main_degree: int = 2**10;
+let memory_degree: int = 2**12;
+let poseidon2_degree: int = 2**12;
+let split_bb_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/poseidon2_gl_test.asm b/test_data/std/poseidon2_gl_test.asm
index 0781b3fe44..caae8ef259 100644
--- a/test_data/std/poseidon2_gl_test.asm
+++ b/test_data/std/poseidon2_gl_test.asm
@@ -4,10 +4,10 @@ use std::machines::large_field::memory::Memory;
 use std::machines::split::ByteCompare;
 use std::machines::split::split_gl::SplitGL;
 
-let main_degree: int = 2**16;
-let memory_degree: int = 2**16;
-let poseidon2_degree: int = 2**16;
-let split_degree: int = 2**16;
+let main_degree: int = 2**10;
+let memory_degree: int = 2**12;
+let poseidon2_degree: int = 2**12;
+let split_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/poseidon_bb_test.asm b/test_data/std/poseidon_bb_test.asm
index d4d65e5809..26668e44a8 100644
--- a/test_data/std/poseidon_bb_test.asm
+++ b/test_data/std/poseidon_bb_test.asm
@@ -5,12 +5,12 @@ use std::machines::small_field::memory::Memory;
 use std::machines::split::ByteCompare;
 use std::machines::split::split_bb::SplitBB;
 
-let main_degree: int = 2**16;
-let memory_degree: int = 2**16;
-let poseidon2_degree: int = 2**16;
-let split_bb_degree: int = 2**16;
+let main_degree: int = 2**10;
+let memory_degree: int = 2**12;
+let poseidon2_degree: int = 2**12;
+let split_bb_degree: int = 2**12;
 
-machine Main with degree: 65536 {
+machine Main with degree: main_degree {
     reg pc[@pc];
     reg X1[<=];
     reg X2[<=];
diff --git a/test_data/std/poseidon_gl_memory_test.asm b/test_data/std/poseidon_gl_memory_test.asm
index 19d3fbc333..f4f967c66d 100644
--- a/test_data/std/poseidon_gl_memory_test.asm
+++ b/test_data/std/poseidon_gl_memory_test.asm
@@ -4,10 +4,10 @@ use std::machines::large_field::memory::Memory;
 use std::machines::split::ByteCompare;
 use std::machines::split::split_gl::SplitGL;
 
-let main_degree: int = 2**16;
-let split_degree: int = 2**16;
-let memory_degree: int = 2**16;
-let poseidon_degree: int = 2**16;
+let main_degree: int = 2**10;
+let split_degree: int = 2**12;
+let memory_degree: int = 2**12;
+let poseidon_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/rotate_large_test.asm b/test_data/std/rotate_large_test.asm
index 53bbd25a59..7241c2a863 100644
--- a/test_data/std/rotate_large_test.asm
+++ b/test_data/std/rotate_large_test.asm
@@ -1,8 +1,8 @@
 use std::machines::large_field::rotate::ByteRotate;
 use std::machines::large_field::rotate::Rotate;
 
-let main_degree: int = 2**16;
-let rotate_degree: int = 2**16;
+let main_degree: int = 2**8;
+let rotate_degree: int = 2**10;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/rotate_small_test.asm b/test_data/std/rotate_small_test.asm
index 14c896a6e7..243b0edc41 100644
--- a/test_data/std/rotate_small_test.asm
+++ b/test_data/std/rotate_small_test.asm
@@ -1,8 +1,8 @@
 use std::machines::small_field::rotate::ByteRotate;
 use std::machines::small_field::rotate::Rotate;
 
-let main_degree: int = 2**16;
-let rotate_degree: int = 2**16;
+let main_degree: int = 2**10;
+let rotate_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
@@ -73,4 +73,4 @@ machine Main with degree: main_degree {
 
         return;
     }
-}
\ No newline at end of file
+}
diff --git a/test_data/std/shift_large_test.asm b/test_data/std/shift_large_test.asm
index dea2df45e2..922af850af 100644
--- a/test_data/std/shift_large_test.asm
+++ b/test_data/std/shift_large_test.asm
@@ -1,8 +1,8 @@
 use std::machines::large_field::shift::ByteShift;
 use std::machines::large_field::shift::Shift;
 
-let main_degree: int = 2**16;
-let shift_degree: int = 2**16;
+let main_degree: int = 2**10;
+let shift_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/shift_small_test.asm b/test_data/std/shift_small_test.asm
index f1f0d03078..b89a65cddd 100644
--- a/test_data/std/shift_small_test.asm
+++ b/test_data/std/shift_small_test.asm
@@ -1,8 +1,8 @@
 use std::machines::small_field::shift::ByteShift;
 use std::machines::small_field::shift::Shift;
 
-let main_degree: int = 2**16;
-let shift_degree: int = 2**16;
+let main_degree: int = 2**10;
+let shift_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/split_bb_test.asm b/test_data/std/split_bb_test.asm
index fbbd2b1e20..afb684586a 100644
--- a/test_data/std/split_bb_test.asm
+++ b/test_data/std/split_bb_test.asm
@@ -1,8 +1,8 @@
 use std::machines::split::ByteCompare;
 use std::machines::split::split_bb::SplitBB;
 
-let main_degree: int = 2**16;
-let split_degree: int = 2**16;
+let main_degree: int = 2**10;
+let split_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/split_bn254_test.asm b/test_data/std/split_bn254_test.asm
index 4508dc4e5f..096f6632e6 100644
--- a/test_data/std/split_bn254_test.asm
+++ b/test_data/std/split_bn254_test.asm
@@ -1,8 +1,8 @@
 use std::machines::split::ByteCompare;
 use std::machines::split::split_bn254::SplitBN254;
 
-let main_degree: int = 2**16;
-let split_degree: int = 2**16;
+let main_degree: int = 2**10;
+let split_degree: int = 2**12;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/split_gl_test.asm b/test_data/std/split_gl_test.asm
index af47083337..5c57f34c34 100644
--- a/test_data/std/split_gl_test.asm
+++ b/test_data/std/split_gl_test.asm
@@ -1,8 +1,8 @@
 use std::machines::split::ByteCompare;
 use std::machines::split::split_gl::SplitGL;
 
-let main_degree: int = 2**16;
-let split_degree: int = 2**16;
+let main_degree: int = 2**8;
+let split_degree: int = 2**10;
 
 machine Main with degree: main_degree {
     reg pc[@pc];
diff --git a/test_data/std/write_once_memory_test.asm b/test_data/std/write_once_memory_test.asm
index df4295b97d..62f4d1e1c0 100644
--- a/test_data/std/write_once_memory_test.asm
+++ b/test_data/std/write_once_memory_test.asm
@@ -3,7 +3,7 @@ use std::machines::write_once_memory::WriteOnceMemory;
 let main_degree: int = 256;
 let memory_degree: int = 256;
 
-machine Main with degree: 256 {
+machine Main with degree: main_degree {
     WriteOnceMemory memory(memory_degree, memory_degree);
 
     reg pc[@pc];

From bba060d56a0aaf2a5eeb68589de7d8646de8cce3 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 29 Nov 2024 11:08:23 +0100
Subject: [PATCH 05/57] `MockBackend`: Implement later-stage witness generation
 (#2168)

Ticks the third item off #2152

This should make the mock prover feature-complete!

---------

Co-authored-by: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
---
 .../src/mock/connection_constraint_checker.rs |  7 +-
 backend/src/mock/evaluator.rs                 |  9 ++-
 backend/src/mock/machine.rs                   | 12 +++-
 backend/src/mock/mod.rs                       | 64 ++++++++++++++-----
 .../src/mock/polynomial_constraint_checker.rs |  9 ++-
 pipeline/tests/asm.rs                         | 10 +--
 pipeline/tests/powdr_std.rs                   | 10 +--
 7 files changed, 90 insertions(+), 31 deletions(-)

diff --git a/backend/src/mock/connection_constraint_checker.rs b/backend/src/mock/connection_constraint_checker.rs
index 81b3813960..f71e9b2c30 100644
--- a/backend/src/mock/connection_constraint_checker.rs
+++ b/backend/src/mock/connection_constraint_checker.rs
@@ -155,6 +155,7 @@ impl<F: FieldElement> Connection<F> {
 pub struct ConnectionConstraintChecker<'a, F: FieldElement> {
     pub connections: &'a [Connection<F>],
     pub machines: BTreeMap<String, Machine<'a, F>>,
+    pub challenges: &'a BTreeMap<u64, F>,
 }
 
 impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
@@ -247,7 +248,11 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
         (0..machine.size)
             .into_par_iter()
             .filter_map(|row| {
-                let variables = Variables { machine, row };
+                let variables = Variables {
+                    machine,
+                    row,
+                    challenges: self.challenges,
+                };
                 let mut evaluator =
                     ExpressionEvaluator::new(&variables, &machine.intermediate_definitions);
                 let result = evaluator.evaluate(&selected_expressions.selector).unwrap();
diff --git a/backend/src/mock/evaluator.rs b/backend/src/mock/evaluator.rs
index e852c0f228..853aef961d 100644
--- a/backend/src/mock/evaluator.rs
+++ b/backend/src/mock/evaluator.rs
@@ -1,4 +1,6 @@
-use powdr_ast::analyzed::PolynomialType;
+use std::collections::BTreeMap;
+
+use powdr_ast::analyzed::{Challenge, PolynomialType};
 use powdr_executor::witgen::{AffineResult, AlgebraicVariable, SymbolicVariables};
 use powdr_number::FieldElement;
 
@@ -7,6 +9,7 @@ use super::machine::Machine;
 pub struct Variables<'a, F> {
     pub machine: &'a Machine<'a, F>,
     pub row: usize,
+    pub challenges: &'a BTreeMap<u64, F>,
 }
 
 impl<'a, F: FieldElement> Variables<'a, F> {
@@ -31,4 +34,8 @@ impl<'a, F: FieldElement> SymbolicVariables<F> for &Variables<'a, F> {
     fn value<'b>(&self, var: AlgebraicVariable<'b>) -> AffineResult<AlgebraicVariable<'b>, F> {
         Ok(self.constant_value(var).into())
     }
+
+    fn challenge<'b>(&self, challenge: &'b Challenge) -> AffineResult<AlgebraicVariable<'b>, F> {
+        Ok(self.challenges[&challenge.id].into())
+    }
 }
diff --git a/backend/src/mock/machine.rs b/backend/src/mock/machine.rs
index 2bcf0a67f1..1e634224b0 100644
--- a/backend/src/mock/machine.rs
+++ b/backend/src/mock/machine.rs
@@ -3,7 +3,7 @@ use std::collections::BTreeMap;
 use itertools::Itertools;
 use powdr_ast::analyzed::{AlgebraicExpression, AlgebraicReferenceThin, Analyzed, PolyID};
 use powdr_backend_utils::{machine_fixed_columns, machine_witness_columns};
-use powdr_executor::constant_evaluator::VariablySizedColumn;
+use powdr_executor::{constant_evaluator::VariablySizedColumn, witgen::WitgenCallback};
 use powdr_number::{DegreeType, FieldElement};
 
 /// A collection of columns with self-contained constraints.
@@ -22,8 +22,10 @@ impl<'a, F: FieldElement> Machine<'a, F> {
         witness: &'a [(String, Vec<F>)],
         fixed: &'a [(String, VariablySizedColumn<F>)],
         pil: &'a Analyzed<F>,
+        witgen_callback: &WitgenCallback<F>,
+        challenges: &BTreeMap<u64, F>,
     ) -> Option<Self> {
-        let witness = machine_witness_columns(witness, pil, &machine_name);
+        let mut witness = machine_witness_columns(witness, pil, &machine_name);
         let size = witness
             .iter()
             .map(|(_, v)| v.len())
@@ -36,6 +38,12 @@ impl<'a, F: FieldElement> Machine<'a, F> {
             return None;
         }
 
+        for stage in 1..pil.stage_count() {
+            log::debug!("Generating stage-{stage} witness for machine {machine_name}");
+            witness =
+                witgen_callback.next_stage_witness(pil, &witness, challenges.clone(), stage as u8);
+        }
+
         let fixed = machine_fixed_columns(fixed, pil);
         let fixed = fixed.get(&(size as DegreeType)).unwrap();
 
diff --git a/backend/src/mock/mod.rs b/backend/src/mock/mod.rs
index c0c5e8478d..6b37fd1dd6 100644
--- a/backend/src/mock/mod.rs
+++ b/backend/src/mock/mod.rs
@@ -1,9 +1,19 @@
-use std::{collections::BTreeMap, io, marker::PhantomData, path::PathBuf, sync::Arc};
+use std::{
+    collections::BTreeMap,
+    hash::{DefaultHasher, Hash, Hasher},
+    io,
+    marker::PhantomData,
+    path::PathBuf,
+    sync::Arc,
+};
 
 use connection_constraint_checker::{Connection, ConnectionConstraintChecker};
 use machine::Machine;
 use polynomial_constraint_checker::PolynomialConstraintChecker;
-use powdr_ast::analyzed::Analyzed;
+use powdr_ast::{
+    analyzed::{AlgebraicExpression, Analyzed},
+    parsed::visitor::AllChildren,
+};
 use powdr_executor::{constant_evaluator::VariablySizedColumn, witgen::WitgenCallback};
 use powdr_number::{DegreeType, FieldElement};
 
@@ -70,37 +80,61 @@ impl<F: FieldElement> Backend<F> for MockBackend<F> {
         &self,
         witness: &[(String, Vec<F>)],
         prev_proof: Option<Proof>,
-        _witgen_callback: WitgenCallback<F>,
+        witgen_callback: WitgenCallback<F>,
     ) -> Result<Proof, Error> {
         if prev_proof.is_some() {
             unimplemented!();
         }
 
+        let challenges = self
+            .machine_to_pil
+            .values()
+            .flat_map(|pil| pil.identities.iter())
+            .flat_map(|identity| identity.all_children())
+            .filter_map(|expr| match expr {
+                AlgebraicExpression::Challenge(challenge) => {
+                    // Use the hash of the ID as the challenge.
+                    // This way, if the same challenge is used by different machines, they will
+                    // have the same value.
+                    let mut hasher = DefaultHasher::new();
+                    challenge.id.hash(&mut hasher);
+                    Some((challenge.id, F::from(hasher.finish())))
+                }
+                _ => None,
+            })
+            .collect::<BTreeMap<_, _>>();
+
         let machines = self
             .machine_to_pil
+            // TODO: We should probably iterate in parallel, because Machine::try_new might generate
+            // later-stage witnesses, which is expensive.
+            // However, for now, doing it sequentially simplifies debugging.
             .iter()
-            .filter_map(|(machine, pil)| {
-                Machine::try_new(machine.clone(), witness, &self.fixed, pil)
+            .filter_map(|(machine_name, pil)| {
+                Machine::try_new(
+                    machine_name.clone(),
+                    witness,
+                    &self.fixed,
+                    pil,
+                    &witgen_callback,
+                    &challenges,
+                )
             })
             .map(|machine| (machine.machine_name.clone(), machine))
             .collect::<BTreeMap<_, _>>();
 
-        let mut is_ok = true;
-        for (_, machine) in machines.iter() {
-            let result = PolynomialConstraintChecker::new(machine).check();
-            is_ok &= !result.has_errors();
-        }
-
-        is_ok &= ConnectionConstraintChecker {
+        let is_ok = machines.values().all(|machine| {
+            !PolynomialConstraintChecker::new(machine, &challenges)
+                .check()
+                .has_errors()
+        }) && ConnectionConstraintChecker {
             connections: &self.connections,
             machines,
+            challenges: &challenges,
         }
         .check()
         .is_ok();
 
-        // TODO:
-        // - Check later-stage witness
-
         match is_ok {
             true => Ok(Vec::new()),
             false => Err(Error::BackendError("Constraint check failed".to_string())),
diff --git a/backend/src/mock/polynomial_constraint_checker.rs b/backend/src/mock/polynomial_constraint_checker.rs
index f6ee3b47d8..194f246c9b 100644
--- a/backend/src/mock/polynomial_constraint_checker.rs
+++ b/backend/src/mock/polynomial_constraint_checker.rs
@@ -14,11 +14,15 @@ use super::machine::Machine;
 
 pub struct PolynomialConstraintChecker<'a, F> {
     machine: &'a Machine<'a, F>,
+    challenges: &'a BTreeMap<u64, F>,
 }
 
 impl<'a, F: FieldElement> PolynomialConstraintChecker<'a, F> {
-    pub fn new(machine: &'a Machine<'a, F>) -> Self {
-        Self { machine }
+    pub fn new(machine: &'a Machine<'a, F>, challenges: &'a BTreeMap<u64, F>) -> Self {
+        Self {
+            machine,
+            challenges,
+        }
     }
 
     pub fn check(&self) -> MachineResult<'a, F> {
@@ -54,6 +58,7 @@ impl<'a, F: FieldElement> PolynomialConstraintChecker<'a, F> {
         let variables = Variables {
             machine: self.machine,
             row,
+            challenges: self.challenges,
         };
         let mut evaluator =
             ExpressionEvaluator::new(&variables, &self.machine.intermediate_definitions);
diff --git a/pipeline/tests/asm.rs b/pipeline/tests/asm.rs
index 04c1e327a8..99ecf47326 100644
--- a/pipeline/tests/asm.rs
+++ b/pipeline/tests/asm.rs
@@ -39,7 +39,7 @@ fn block_machine_exact_number_of_rows_asm() {
 fn challenges_asm() {
     let f = "asm/challenges.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-    // TODO Mock prover doesn't support this test yet.
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
 }
 
@@ -69,7 +69,7 @@ fn secondary_block_machine_add2() {
 fn second_phase_hint() {
     let f = "asm/second_phase_hint.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-    // TODO Mock prover doesn't support this test yet.
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
 }
 
@@ -179,7 +179,7 @@ fn block_to_block_empty_submachine() {
 fn block_to_block_with_bus_monolithic() {
     let f = "asm/block_to_block_with_bus.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-    // TODO Mock prover doesn't support this test yet.
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
 }
 
@@ -187,7 +187,7 @@ fn block_to_block_with_bus_monolithic() {
 fn block_to_block_with_bus_different_sizes() {
     let f = "asm/block_to_block_with_bus_different_sizes.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-    // TODO Mock prover doesn't support this test yet.
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
 }
 
@@ -205,7 +205,7 @@ fn block_to_block_with_bus_composite() {
     use powdr_pipeline::test_util::test_halo2_with_backend_variant;
     let f = "asm/block_to_block_with_bus.asm";
     let pipeline = make_simple_prepared_pipeline(f);
-    // TODO Mock prover doesn't support this test yet.
+    test_mock_backend(pipeline.clone());
     test_halo2_with_backend_variant(pipeline, BackendVariant::Composite);
 }
 
diff --git a/pipeline/tests/powdr_std.rs b/pipeline/tests/powdr_std.rs
index 92fb207462..dd385488ed 100644
--- a/pipeline/tests/powdr_std.rs
+++ b/pipeline/tests/powdr_std.rs
@@ -181,40 +181,40 @@ fn memory_small_test() {
 fn permutation_via_challenges() {
     let f = "std/permutation_via_challenges.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
-    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn lookup_via_challenges() {
     let f = "std/lookup_via_challenges.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
-    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn lookup_via_challenges_range_constraint() {
     let f = "std/lookup_via_challenges_range_constraint.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
-    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn bus_lookup() {
     let f = "std/bus_lookup.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
-    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]
 fn bus_permutation() {
     let f = "std/bus_permutation.asm";
     let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
+    test_mock_backend(pipeline.clone());
     test_plonky3_pipeline(pipeline);
-    // TODO Mock prover doesn't support this test yet.
 }
 
 #[test]

From 23e92a088297d7f210ac8de3799c9bac4df91993 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 29 Nov 2024 12:50:11 +0100
Subject: [PATCH 06/57] Mock Backend: Handle constant LHS / RHS in connections
 (#2172)

With this PR, the `MockBackend` should work also for *some* lookups /
permutations that don't reference any column on the LHS and RHS.

While these constraints could always be removed by the optimizer, it is
not always the case in practice.
---
 .../src/mock/connection_constraint_checker.rs | 197 ++++++++++--------
 backend/src/mock/evaluator.rs                 |  28 ++-
 2 files changed, 136 insertions(+), 89 deletions(-)

diff --git a/backend/src/mock/connection_constraint_checker.rs b/backend/src/mock/connection_constraint_checker.rs
index f71e9b2c30..d79e60e4ec 100644
--- a/backend/src/mock/connection_constraint_checker.rs
+++ b/backend/src/mock/connection_constraint_checker.rs
@@ -13,15 +13,18 @@ use powdr_ast::analyzed::{
 use powdr_ast::parsed::visitor::ExpressionVisitable;
 use powdr_ast::parsed::visitor::VisitOrder;
 use powdr_backend_utils::referenced_namespaces_algebraic_expression;
-use powdr_executor::witgen::AffineExpression;
 use powdr_executor::witgen::ExpressionEvaluator;
 use powdr_number::FieldElement;
 use rayon::iter::IntoParallelIterator;
 use rayon::iter::ParallelIterator;
 
+use crate::mock::evaluator::evaluate_to_fe;
+
+use super::evaluator::EmptyVariables;
 use super::evaluator::Variables;
 use super::machine::Machine;
 
+#[derive(PartialEq, Eq, Debug)]
 pub enum ConnectionKind {
     Lookup,
     Permutation,
@@ -68,30 +71,22 @@ impl<F: FieldElement> Connection<F> {
         }?;
 
         // This connection is not localized yet: Its expression's PolyIDs point to the global PIL, not the local PIL.
-        let connection_global = Self {
+        let mut connection = Self {
             identity: identity.clone(),
             left,
             right,
             kind,
         };
-        let caller = connection_global.caller();
-        let left = connection_global.localize(
-            &connection_global.left,
-            global_pil,
-            &machine_to_pil[&caller],
-        );
-        let callee = connection_global.callee();
-        let right = connection_global.localize(
-            &connection_global.right,
-            global_pil,
-            &machine_to_pil[&callee],
-        );
+        if let Some(caller) = connection.caller() {
+            connection.left =
+                connection.localize(&connection.left, global_pil, &machine_to_pil[&caller]);
+        }
+        if let Some(callee) = connection.callee() {
+            connection.right =
+                connection.localize(&connection.right, global_pil, &machine_to_pil[&callee]);
+        }
 
-        Ok(Self {
-            left,
-            right,
-            ..connection_global
-        })
+        Ok(connection)
     }
 
     /// Translates PolyIDs pointing to columns in the global PIL to PolyIDs pointing to columns in the local PIL.
@@ -131,23 +126,24 @@ impl<F: FieldElement> Connection<F> {
 
 fn unique_referenced_namespaces<F: FieldElement>(
     selected_expressions: &SelectedExpressions<F>,
-) -> String {
+) -> Option<String> {
     let all_namespaces = referenced_namespaces_algebraic_expression(selected_expressions);
-    assert_eq!(
-        all_namespaces.len(),
-        1,
-        "Expected exactly one namespace, got: {all_namespaces:?}",
+    assert!(
+        all_namespaces.len() <= 1,
+        "Expected at most one namespace, got: {all_namespaces:?}",
     );
-    all_namespaces.into_iter().next().unwrap()
+    all_namespaces.into_iter().next()
 }
 
 /// A connection between two machines.
 impl<F: FieldElement> Connection<F> {
-    pub fn caller(&self) -> String {
+    /// The calling machine. None if there are no column references on the LHS.
+    pub fn caller(&self) -> Option<String> {
         unique_referenced_namespaces(&self.left)
     }
 
-    pub fn callee(&self) -> String {
+    /// The called machine. None if there are no column references on the RHS.
+    pub fn callee(&self) -> Option<String> {
         unique_referenced_namespaces(&self.right)
     }
 }
@@ -158,6 +154,12 @@ pub struct ConnectionConstraintChecker<'a, F: FieldElement> {
     pub challenges: &'a BTreeMap<u64, F>,
 }
 
+#[derive(Debug, PartialEq, Eq)]
+enum ConnectionPart {
+    Caller,
+    Callee,
+}
+
 impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
     /// Checks all connections.
     pub fn check(&self) -> Result<(), FailingConnectionConstraints<'a, F>> {
@@ -184,8 +186,8 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
         &self,
         connection: &'a Connection<F>,
     ) -> Result<(), FailingConnectionConstraint<'a, F>> {
-        let caller_set = self.selected_tuples(&connection.caller(), &connection.left);
-        let callee_set = self.selected_tuples(&connection.callee(), &connection.right);
+        let caller_set = self.selected_tuples(connection, ConnectionPart::Caller);
+        let callee_set = self.selected_tuples(connection, ConnectionPart::Callee);
 
         match connection.kind {
             ConnectionKind::Lookup => {
@@ -234,50 +236,83 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
     /// Returns the set of all selected tuples for a given machine.
     fn selected_tuples(
         &self,
-        machine_name: &str,
-        selected_expressions: &SelectedExpressions<F>,
+        connection: &Connection<F>,
+        connection_part: ConnectionPart,
     ) -> Vec<Tuple<F>> {
-        let machine = match self.machines.get(machine_name) {
-            Some(machine) => machine,
-            None => {
-                // The machine is empty, so there are no tuples.
-                return Vec::new();
-            }
+        let machine_name = match connection_part {
+            ConnectionPart::Caller => connection.caller(),
+            ConnectionPart::Callee => connection.callee(),
+        };
+        let selected_expressions = match connection_part {
+            ConnectionPart::Caller => &connection.left,
+            ConnectionPart::Callee => &connection.right,
         };
 
-        (0..machine.size)
-            .into_par_iter()
-            .filter_map(|row| {
-                let variables = Variables {
-                    machine,
-                    row,
-                    challenges: self.challenges,
-                };
-                let mut evaluator =
-                    ExpressionEvaluator::new(&variables, &machine.intermediate_definitions);
-                let result = evaluator.evaluate(&selected_expressions.selector).unwrap();
-                let result = match result {
-                    AffineExpression::Constant(c) => c,
-                    _ => unreachable!("Unexpected result: {:?}", result),
-                };
-
-                assert!(result.is_zero() || result.is_one(), "Non-binary selector");
-                result.is_one().then(|| {
-                    let values = selected_expressions
-                        .expressions
-                        .iter()
-                        .map(|expression| {
-                            let result = evaluator.evaluate(expression).unwrap();
-                            match result {
-                                AffineExpression::Constant(c) => c,
-                                _ => unreachable!("Unexpected result: {:?}", result),
-                            }
+        match machine_name {
+            Some(machine_name) => match self.machines.get(&machine_name) {
+                // The typical case: Find the selected rows and evaluate the tuples.
+                Some(machine) => (0..machine.size)
+                    .into_par_iter()
+                    .filter_map(|row| {
+                        let variables = Variables {
+                            machine,
+                            row,
+                            challenges: self.challenges,
+                        };
+                        let mut evaluator =
+                            ExpressionEvaluator::new(&variables, &machine.intermediate_definitions);
+                        let result = evaluate_to_fe(&mut evaluator, &selected_expressions.selector);
+
+                        assert!(result.is_zero() || result.is_one(), "Non-binary selector");
+                        result.is_one().then(|| {
+                            let values = selected_expressions
+                                .expressions
+                                .iter()
+                                .map(|expression| evaluate_to_fe(&mut evaluator, expression))
+                                .collect::<Vec<_>>();
+                            Tuple { values, row }
                         })
-                        .collect::<Vec<_>>();
-                    Tuple { values, row }
-                })
-            })
-            .collect()
+                    })
+                    .collect(),
+                // The machine is empty, so there are no tuples.
+                None => Vec::new(),
+            },
+            // There are no column references in the selected expressions.
+            None => {
+                let empty_variables = EmptyVariables {};
+                let empty_definitions = BTreeMap::new();
+                let mut evaluator = ExpressionEvaluator::new(empty_variables, &empty_definitions);
+                let selector_value = evaluate_to_fe(&mut evaluator, &selected_expressions.selector);
+
+                match selector_value.to_degree() {
+                    // Selected expressions is of the form `0 $ [ <constants> ]`
+                    // => The tuples is the empty set.
+                    0 => Vec::new(),
+                    // This one is tricky, because we don't know the size of the machine.
+                    // But for lookups, we can return one tuple, so something like `[ 5 ] in [ BYTES ]`
+                    // would still work.
+                    1 => {
+                        assert_eq!(
+                            connection.kind,
+                            ConnectionKind::Lookup,
+                            "Unexpected connection: {}",
+                            connection.identity
+                        );
+                        if connection_part == ConnectionPart::Callee {
+                            // In theory, for lookups we could handle this by repeating the tuple infinitely...
+                            unimplemented!("Unexpected connection: {}", connection.identity);
+                        }
+                        let values = selected_expressions
+                            .expressions
+                            .iter()
+                            .map(|expression| evaluate_to_fe(&mut evaluator, expression))
+                            .collect::<Vec<_>>();
+                        vec![Tuple { values, row: 0 }]
+                    }
+                    _ => unreachable!("Non-binary selector"),
+                }
+            }
+        }
     }
 }
 
@@ -369,29 +404,17 @@ fn fmt_subset_error<F: fmt::Display>(
 
 impl<F: FieldElement> fmt::Display for FailingConnectionConstraint<'_, F> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        writeln!(
-            f,
-            "Connection failed between {} and {}:",
-            self.connection.caller(),
-            self.connection.callee()
-        )?;
+        let caller = self.connection.caller().unwrap_or("???".to_string());
+        let callee = self.connection.callee().unwrap_or("???".to_string());
+
+        writeln!(f, "Connection failed between {caller} and {callee}:")?;
         writeln!(f, "    {}", self.connection.identity)?;
 
         if !self.not_in_callee.is_empty() {
-            fmt_subset_error(
-                f,
-                &self.connection.caller(),
-                &self.connection.callee(),
-                &self.not_in_callee,
-            )?;
+            fmt_subset_error(f, &caller, &callee, &self.not_in_callee)?;
         }
         if !self.not_in_caller.is_empty() {
-            fmt_subset_error(
-                f,
-                &self.connection.callee(),
-                &self.connection.caller(),
-                &self.not_in_caller,
-            )?;
+            fmt_subset_error(f, &callee, &caller, &self.not_in_caller)?;
         }
         Ok(())
     }
diff --git a/backend/src/mock/evaluator.rs b/backend/src/mock/evaluator.rs
index 853aef961d..11da31ffaa 100644
--- a/backend/src/mock/evaluator.rs
+++ b/backend/src/mock/evaluator.rs
@@ -1,7 +1,9 @@
 use std::collections::BTreeMap;
 
-use powdr_ast::analyzed::{Challenge, PolynomialType};
-use powdr_executor::witgen::{AffineResult, AlgebraicVariable, SymbolicVariables};
+use powdr_ast::analyzed::{AlgebraicExpression, Challenge, PolynomialType};
+use powdr_executor::witgen::{
+    AffineExpression, AffineResult, AlgebraicVariable, ExpressionEvaluator, SymbolicVariables,
+};
 use powdr_number::FieldElement;
 
 use super::machine::Machine;
@@ -39,3 +41,25 @@ impl<'a, F: FieldElement> SymbolicVariables<F> for &Variables<'a, F> {
         Ok(self.challenges[&challenge.id].into())
     }
 }
+
+pub struct EmptyVariables;
+
+impl<T> SymbolicVariables<T> for EmptyVariables
+where
+    T: FieldElement,
+{
+    fn value<'a>(&self, _var: AlgebraicVariable<'a>) -> AffineResult<AlgebraicVariable<'a>, T> {
+        unimplemented!()
+    }
+}
+
+pub fn evaluate_to_fe<'a, F: FieldElement, SV: SymbolicVariables<F>>(
+    evaluator: &mut ExpressionEvaluator<'a, F, SV>,
+    expr: &'a AlgebraicExpression<F>,
+) -> F {
+    let result = evaluator.evaluate(expr).unwrap();
+    match result {
+        AffineExpression::Constant(c) => c,
+        _ => unreachable!("Unexpected result: {:?}", result),
+    }
+}

From c0fefae3e7142d7ae3ce6c1d6ed20c1be32cb6a6 Mon Sep 17 00:00:00 2001
From: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
Date: Fri, 29 Nov 2024 15:02:53 +0100
Subject: [PATCH 07/57] Introduce workspace-level lints (#2166)

Reduce the cost of adding a lint from O(#workspace_crates) to 1.
---
 Cargo.toml                      | 4 ++++
 airgen/Cargo.toml               | 4 ++--
 airgen/src/lib.rs               | 2 --
 analysis/Cargo.toml             | 4 ++--
 analysis/src/lib.rs             | 2 --
 analysis/src/machine_check.rs   | 2 --
 asm-to-pil/Cargo.toml           | 4 ++--
 asm-to-pil/src/lib.rs           | 2 --
 ast/Cargo.toml                  | 4 ++--
 ast/src/lib.rs                  | 2 --
 backend/Cargo.toml              | 4 ++--
 backend/src/halo2/mod.rs        | 2 --
 backend/src/lib.rs              | 2 --
 executor/Cargo.toml             | 4 ++--
 executor/src/lib.rs             | 2 --
 importer/Cargo.toml             | 4 ++--
 importer/src/lib.rs             | 2 --
 jit-compiler/Cargo.toml         | 4 ++--
 jit-compiler/tests/execution.rs | 1 -
 linker/Cargo.toml               | 4 ++--
 linker/src/lib.rs               | 2 --
 number/Cargo.toml               | 4 ++--
 number/src/lib.rs               | 2 --
 parser-util/Cargo.toml          | 4 ++--
 parser-util/src/lib.rs          | 2 --
 parser/Cargo.toml               | 4 ++--
 parser/src/lib.rs               | 2 --
 pil-analyzer/Cargo.toml         | 4 ++--
 pil-analyzer/src/lib.rs         | 2 --
 pilopt/Cargo.toml               | 4 ++--
 pilopt/src/lib.rs               | 1 -
 pipeline/Cargo.toml             | 4 ++--
 pipeline/src/lib.rs             | 2 --
 powdr-test/Cargo.toml           | 4 ++--
 riscv-syscalls/Cargo.toml       | 4 ++--
 riscv/Cargo.toml                | 4 ++--
 riscv/src/lib.rs                | 1 -
 37 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 385b4b74b7..fa798ea267 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,3 +81,7 @@ codegen-units = 256
 [profile.release-with-debug]
 inherits = "release"
 debug = true
+
+[workspace.lints.clippy]
+print_stdout = "deny"
+uninlined_format_args = "deny"
\ No newline at end of file
diff --git a/airgen/Cargo.toml b/airgen/Cargo.toml
index af13ed3706..993c1f67cb 100644
--- a/airgen/Cargo.toml
+++ b/airgen/Cargo.toml
@@ -16,5 +16,5 @@ log = "0.4.17"
 
 itertools = "0.13"
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/airgen/src/lib.rs b/airgen/src/lib.rs
index f4216d24ab..cbe261a80d 100644
--- a/airgen/src/lib.rs
+++ b/airgen/src/lib.rs
@@ -1,7 +1,5 @@
 //! Compilation from powdr machines to AIRs
 
-#![deny(clippy::print_stdout)]
-
 use std::collections::BTreeMap;
 
 use powdr_ast::{
diff --git a/analysis/Cargo.toml b/analysis/Cargo.toml
index 56e745fa14..97f2ee7f28 100644
--- a/analysis/Cargo.toml
+++ b/analysis/Cargo.toml
@@ -26,5 +26,5 @@ env_logger = "0.10.0"
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/analysis/src/lib.rs b/analysis/src/lib.rs
index 1202a64d06..1fa1b08af7 100644
--- a/analysis/src/lib.rs
+++ b/analysis/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 pub mod machine_check;
 mod vm;
 
diff --git a/analysis/src/machine_check.rs b/analysis/src/machine_check.rs
index 023033896e..7c0739ab73 100644
--- a/analysis/src/machine_check.rs
+++ b/analysis/src/machine_check.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 use std::collections::BTreeMap;
 
 use powdr_ast::{
diff --git a/asm-to-pil/Cargo.toml b/asm-to-pil/Cargo.toml
index 2472551f06..176bed5f5b 100644
--- a/asm-to-pil/Cargo.toml
+++ b/asm-to-pil/Cargo.toml
@@ -21,5 +21,5 @@ pretty_assertions = "1.4.0"
 powdr-analysis = { path = "../analysis" }
 powdr-importer = { path = "../importer" }
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
\ No newline at end of file
diff --git a/asm-to-pil/src/lib.rs b/asm-to-pil/src/lib.rs
index e87ee30c56..25be3ef11b 100644
--- a/asm-to-pil/src/lib.rs
+++ b/asm-to-pil/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 use std::collections::BTreeMap;
 
 use powdr_ast::asm_analysis::{AnalysisASMFile, Module, StatementReference, SubmachineDeclaration};
diff --git a/ast/Cargo.toml b/ast/Cargo.toml
index 28740ad9eb..8d752edceb 100644
--- a/ast/Cargo.toml
+++ b/ast/Cargo.toml
@@ -26,5 +26,5 @@ powdr-pil-analyzer.workspace = true
 powdr-parser.workspace = true
 
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/ast/src/lib.rs b/ast/src/lib.rs
index f5a219942c..850d105e96 100644
--- a/ast/src/lib.rs
+++ b/ast/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 use itertools::Itertools;
 use std::fmt::{Display, Result, Write};
 
diff --git a/backend/Cargo.toml b/backend/Cargo.toml
index ebac38e74c..2133610e41 100644
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -87,5 +87,5 @@ powdr-pipeline.workspace = true
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/backend/src/halo2/mod.rs b/backend/src/halo2/mod.rs
index 131cfecd2c..4417534ae2 100644
--- a/backend/src/halo2/mod.rs
+++ b/backend/src/halo2/mod.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 use std::io;
 use std::path::PathBuf;
 use std::sync::Arc;
diff --git a/backend/src/lib.rs b/backend/src/lib.rs
index db1f898227..b233adaeba 100644
--- a/backend/src/lib.rs
+++ b/backend/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 #[cfg(any(feature = "estark-polygon", feature = "estark-starky"))]
 mod estark;
 #[cfg(feature = "halo2")]
diff --git a/executor/Cargo.toml b/executor/Cargo.toml
index 33efd934e3..e585a2ce2f 100644
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@@ -33,5 +33,5 @@ pretty_assertions = "1.4.0"
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/executor/src/lib.rs b/executor/src/lib.rs
index e50c0d6418..237dabf0ec 100644
--- a/executor/src/lib.rs
+++ b/executor/src/lib.rs
@@ -1,7 +1,5 @@
 //! Tooling used for execution of compiled programs
 
-#![deny(clippy::print_stdout)]
-
 use powdr_ast::analyzed::Identity;
 
 pub mod constant_evaluator;
diff --git a/importer/Cargo.toml b/importer/Cargo.toml
index 37be828185..b13651aad6 100644
--- a/importer/Cargo.toml
+++ b/importer/Cargo.toml
@@ -15,5 +15,5 @@ powdr-parser-util.workspace = true
 
 pretty_assertions = "1.4.0"
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/importer/src/lib.rs b/importer/src/lib.rs
index 7d183b942a..b40b2cf2f4 100644
--- a/importer/src/lib.rs
+++ b/importer/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 mod module_loader;
 mod path_canonicalizer;
 mod powdr_std;
diff --git a/jit-compiler/Cargo.toml b/jit-compiler/Cargo.toml
index 26d3d82115..5537543cea 100644
--- a/jit-compiler/Cargo.toml
+++ b/jit-compiler/Cargo.toml
@@ -23,5 +23,5 @@ powdr-pil-analyzer.workspace = true
 pretty_assertions = "1.4.0"
 test-log = "0.2.12"
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/jit-compiler/tests/execution.rs b/jit-compiler/tests/execution.rs
index 795f03ae0b..c5bd73aa89 100644
--- a/jit-compiler/tests/execution.rs
+++ b/jit-compiler/tests/execution.rs
@@ -140,7 +140,6 @@ fn degree_builtin() {
             let a: int -> int = |i| std::prover::degree();
         "#;
     let compiled_pil = compile(input, &["main::a"]);
-    println!("Calling set degree outside");
     compiled_pil.set_degree(128);
 
     let a = compiled_pil.get_fixed_column("main::a").unwrap();
diff --git a/linker/Cargo.toml b/linker/Cargo.toml
index a44b2dd5eb..c191bbbc43 100644
--- a/linker/Cargo.toml
+++ b/linker/Cargo.toml
@@ -24,5 +24,5 @@ powdr-airgen.workspace = true
 powdr-importer.workspace = true
 powdr-parser.workspace = true
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/linker/src/lib.rs b/linker/src/lib.rs
index 9697e326a5..44e6cafdd1 100644
--- a/linker/src/lib.rs
+++ b/linker/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 use powdr_analysis::utils::parse_pil_statement;
 use powdr_ast::{
     asm_analysis::{combine_flags, MachineDegree},
diff --git a/number/Cargo.toml b/number/Cargo.toml
index af3dac014e..ef6a4f6f44 100644
--- a/number/Cargo.toml
+++ b/number/Cargo.toml
@@ -38,5 +38,5 @@ env_logger = "0.10.0"
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/number/src/lib.rs b/number/src/lib.rs
index 5f009af735..3abec7b53b 100644
--- a/number/src/lib.rs
+++ b/number/src/lib.rs
@@ -1,7 +1,5 @@
 //! Numerical types used across powdr
 
-#![deny(clippy::print_stdout)]
-
 #[macro_use]
 mod macros;
 mod baby_bear;
diff --git a/parser-util/Cargo.toml b/parser-util/Cargo.toml
index e968cf6ec9..644145365d 100644
--- a/parser-util/Cargo.toml
+++ b/parser-util/Cargo.toml
@@ -23,5 +23,5 @@ env_logger = "0.10.0"
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/parser-util/src/lib.rs b/parser-util/src/lib.rs
index 3e1cb979ed..094f67107f 100644
--- a/parser-util/src/lib.rs
+++ b/parser-util/src/lib.rs
@@ -1,7 +1,5 @@
 //! Utils used with different lalrpop parsers
 
-#![deny(clippy::print_stdout)]
-
 use std::{
     fmt::{self, Debug, Formatter},
     hash::Hash,
diff --git a/parser/Cargo.toml b/parser/Cargo.toml
index d80ac183d5..6c282c48f2 100644
--- a/parser/Cargo.toml
+++ b/parser/Cargo.toml
@@ -31,5 +31,5 @@ lalrpop = "^0.19"
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/parser/src/lib.rs b/parser/src/lib.rs
index 4135c33b45..ac79814934 100644
--- a/parser/src/lib.rs
+++ b/parser/src/lib.rs
@@ -1,7 +1,5 @@
 //! Parser for powdr assembly and PIL
 
-#![deny(clippy::print_stdout)]
-
 use lalrpop_util::*;
 use powdr_ast::parsed::{
     asm::ASMProgram,
diff --git a/pil-analyzer/Cargo.toml b/pil-analyzer/Cargo.toml
index 2510236c23..a1d2f39f80 100644
--- a/pil-analyzer/Cargo.toml
+++ b/pil-analyzer/Cargo.toml
@@ -25,5 +25,5 @@ pretty_assertions = "1.4.0"
 [package.metadata.cargo-udeps.ignore]
 development = ["env_logger"]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/pil-analyzer/src/lib.rs b/pil-analyzer/src/lib.rs
index 1b7cae09a8..9e04799289 100644
--- a/pil-analyzer/src/lib.rs
+++ b/pil-analyzer/src/lib.rs
@@ -1,5 +1,3 @@
-#![deny(clippy::print_stdout)]
-
 mod call_graph;
 mod condenser;
 pub mod evaluator;
diff --git a/pilopt/Cargo.toml b/pilopt/Cargo.toml
index ffe628c4b9..85c55c7ba4 100644
--- a/pilopt/Cargo.toml
+++ b/pilopt/Cargo.toml
@@ -18,5 +18,5 @@ itertools = "0.13.0"
 [dev-dependencies]
 powdr-pil-analyzer.workspace = true
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/pilopt/src/lib.rs b/pilopt/src/lib.rs
index bacf4be8f9..28224aa75f 100644
--- a/pilopt/src/lib.rs
+++ b/pilopt/src/lib.rs
@@ -1,5 +1,4 @@
 //! PIL-based optimizer
-#![deny(clippy::print_stdout)]
 
 use std::cmp::Ordering;
 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml
index d745fd126c..e1c0e41603 100644
--- a/pipeline/Cargo.toml
+++ b/pipeline/Cargo.toml
@@ -61,5 +61,5 @@ walkdir = "2.4.0"
 name = "evaluator_benchmark"
 harness = false
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/pipeline/src/lib.rs b/pipeline/src/lib.rs
index 30ecbcbb84..7b8543fc5d 100644
--- a/pipeline/src/lib.rs
+++ b/pipeline/src/lib.rs
@@ -1,7 +1,5 @@
 //! The main powdr lib, used to compile from assembly to PIL
 
-#![deny(clippy::print_stdout)]
-
 pub mod pipeline;
 pub mod test_runner;
 pub mod test_util;
diff --git a/powdr-test/Cargo.toml b/powdr-test/Cargo.toml
index 6360ce2253..3a8fc49fa8 100644
--- a/powdr-test/Cargo.toml
+++ b/powdr-test/Cargo.toml
@@ -13,5 +13,5 @@ powdr.workspace = true
 [dev-dependencies]
 env_logger = "0.10.2"
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/riscv-syscalls/Cargo.toml b/riscv-syscalls/Cargo.toml
index bbe0e5fcb2..bf6325f45a 100644
--- a/riscv-syscalls/Cargo.toml
+++ b/riscv-syscalls/Cargo.toml
@@ -9,5 +9,5 @@ repository = { workspace = true }
 
 [dependencies]
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/riscv/Cargo.toml b/riscv/Cargo.toml
index 0d1c6f9310..595f8a300a 100644
--- a/riscv/Cargo.toml
+++ b/riscv/Cargo.toml
@@ -65,5 +65,5 @@ development = ["env_logger"]
 name = "executor_benchmark"
 harness = false
 
-[lints.clippy]
-uninlined_format_args = "deny"
+[lints]
+workspace = true
diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs
index b00bf6cfa2..75f4bfa6f1 100644
--- a/riscv/src/lib.rs
+++ b/riscv/src/lib.rs
@@ -1,5 +1,4 @@
 //! A RISC-V frontend for powdr
-#![deny(clippy::print_stdout)]
 
 use std::{
     borrow::Cow,

From 6550f22f4d869956965649f1fef6ac9470739e68 Mon Sep 17 00:00:00 2001
From: Leandro Pacheco <contact@leandropacheco.com>
Date: Fri, 29 Nov 2024 11:31:51 -0300
Subject: [PATCH 08/57] profiler: use function ids instead of strings (#2177)

makes execution ~2x faster when using the profiler
---
 riscv-executor/src/profiler.rs | 77 ++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/riscv-executor/src/profiler.rs b/riscv-executor/src/profiler.rs
index c9bce731a2..e6ba310800 100644
--- a/riscv-executor/src/profiler.rs
+++ b/riscv-executor/src/profiler.rs
@@ -10,10 +10,10 @@ use itertools::Itertools;
 
 use rustc_demangle::demangle;
 
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct Call<'a> {
-    from: Loc<'a>,
-    target: Loc<'a>,
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Call {
+    from: Loc,
+    target: Loc,
 }
 
 /// RISC-V asm profiler.
@@ -23,20 +23,22 @@ pub struct Profiler<'a> {
     options: ProfilerOptions,
     /// file number to (dir,file)
     debug_files: &'a [(&'a str, &'a str)],
+    /// map function names to ids (vec index)
+    functions: Vec<&'a str>,
     /// pc value of function beginnings
-    function_begin: BTreeMap<usize, &'a str>,
+    function_begin: BTreeMap<usize, FuncId>,
     /// pc value of .debug loc statements
     location_begin: BTreeMap<usize, (usize, usize)>,
     /// current call stack, entries include running cost
-    call_stack: Vec<(Call<'a>, usize)>,
+    call_stack: Vec<(Call, usize)>,
     /// saved return address of "jump and link" instructions
     return_pc_stack: Vec<usize>,
     /// cost of each location
-    location_stats: BTreeMap<Loc<'a>, usize>,
+    location_stats: BTreeMap<Loc, usize>,
     /// (count, cumulative cost) of calls
-    call_stats: BTreeMap<Call<'a>, (usize, usize)>,
+    call_stats: BTreeMap<Call, (usize, usize)>,
     /// stack sampling format for FlameGraph
-    folded_stack_stats: BTreeMap<Vec<&'a str>, usize>,
+    folded_stack_stats: BTreeMap<Vec<FuncId>, usize>,
 }
 
 #[derive(Default, Clone)]
@@ -47,13 +49,16 @@ pub struct ProfilerOptions {
     pub callgrind: bool,
 }
 
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct Loc<'a> {
-    function: &'a str,
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Loc {
     file: usize,
     line: usize,
+    function: FuncId,
 }
 
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+struct FuncId(usize);
+
 impl<'a> Profiler<'a> {
     pub fn new(
         options: ProfilerOptions,
@@ -61,16 +66,25 @@ impl<'a> Profiler<'a> {
         function_begin: BTreeMap<usize, &'a str>,
         location_begin: BTreeMap<usize, (usize, usize)>,
     ) -> Self {
+        let mut functions = vec![""]; // "" is a placeholder for the initial call to "__runtime_start" which has no .debug loc
+        let mut id_begin = BTreeMap::default();
+        for (begin, func) in function_begin {
+            let id = functions.len();
+            functions.push(func);
+            id_begin.insert(begin, FuncId(id));
+        }
+
         Profiler {
             options,
             debug_files,
-            function_begin,
+            function_begin: id_begin,
             location_begin,
             call_stack: Default::default(),
             return_pc_stack: Default::default(),
             location_stats: Default::default(),
             call_stats: Default::default(),
             folded_stack_stats: Default::default(),
+            functions,
         }
     }
 
@@ -81,22 +95,23 @@ impl<'a> Profiler<'a> {
         writeln!(&mut w, "events: Instructions\n").unwrap();
 
         struct CallCost<'a> {
-            call: &'a Call<'a>,
+            call: &'a Call,
             count: usize,
             cost: usize,
         }
 
-        let mut func_ids = BTreeMap::new();
         let mut loc_stats: BTreeMap<_, Vec<_>> = BTreeMap::new();
         let mut call_stats: BTreeMap<_, Vec<_>> = BTreeMap::new();
+        // we gather the ids here to only includes fns that have been seen at least once
+        let mut func_ids = BTreeMap::new();
 
         // group stats per (function_id, file)
-        for (id, func) in self.function_begin.values().enumerate() {
+        for func in self.function_begin.values() {
             for (loc, cost) in &self.location_stats {
                 if &loc.function == func {
-                    func_ids.entry(func).or_insert(id);
+                    func_ids.entry(func).or_insert(self.functions[func.0]);
                     loc_stats
-                        .entry((id, loc.file))
+                        .entry((func.0, loc.file))
                         .or_default()
                         .push((loc.line, cost));
                 }
@@ -104,9 +119,9 @@ impl<'a> Profiler<'a> {
 
             for (call, (count, cost)) in &self.call_stats {
                 if &call.from.function == func {
-                    assert!(func_ids.contains_key(func));
+                    func_ids.entry(func).or_insert(self.functions[func.0]);
                     call_stats
-                        .entry((id, call.from.file))
+                        .entry((func.0, call.from.file))
                         .or_default()
                         .push(CallCost {
                             call,
@@ -117,9 +132,9 @@ impl<'a> Profiler<'a> {
             }
         }
 
-        // use function name id mapping, without it callgrind was showing duplicate entries
-        for (func, id) in &func_ids {
-            writeln!(&mut w, "fn=({id}) {}", format_function_name(func)).unwrap();
+        // write id mapping for functions we saw
+        for (id, func) in func_ids {
+            writeln!(&mut w, "fn=({}) {}", id.0, format_function_name(func)).unwrap();
         }
         writeln!(w).unwrap();
 
@@ -136,7 +151,7 @@ impl<'a> Profiler<'a> {
             {
                 let (dir, name) = self.debug_files[*file - 1];
                 writeln!(&mut w, "cfl={dir}/{name}").unwrap();
-                writeln!(w, "cfn=({})", func_ids[&call.target.function]).unwrap();
+                writeln!(w, "cfn=({})", call.target.function.0).unwrap();
                 writeln!(w, "calls={count} {}", call.target.line).unwrap();
                 writeln!(w, "{} {cost}", call.from.line).unwrap();
             }
@@ -152,7 +167,7 @@ impl<'a> Profiler<'a> {
             .map(|(stack, count)| {
                 let stack = stack
                     .iter()
-                    .map(|function| format_function_name(function))
+                    .map(|function| format_function_name(self.functions[function.0]))
                     .join(";");
                 format!("{stack} {count}")
             })
@@ -183,12 +198,12 @@ impl<'a> Profiler<'a> {
     }
 
     /// function at the top of the call stack
-    pub fn curr_function(&self) -> Option<&'a str> {
+    fn curr_function(&self) -> Option<FuncId> {
         self.call_stack.last().map(|(c, _)| c.target.function)
     }
 
     /// get the function name and source location for a given pc value
-    pub fn location_at(&self, pc: usize) -> Option<Loc<'a>> {
+    fn location_at(&self, pc: usize) -> Option<Loc> {
         self.function_begin
             .range(..=pc)
             .last()
@@ -201,7 +216,7 @@ impl<'a> Profiler<'a> {
                     // for labels with no .loc above them, just point to main file
                     .unwrap_or((1, 0));
                 Loc {
-                    function,
+                    function: *function,
                     file,
                     line,
                 }
@@ -249,7 +264,7 @@ impl<'a> Profiler<'a> {
                     ..
                 } = self.location_at(curr_pc).unwrap();
                 // ecall handler code doesn't have a ".debug loc", so we keep current file/line
-                if target.function == "__ecall_handler" {
+                if self.functions[target.function.0] == "__ecall_handler" {
                     target.file = curr_file;
                     target.line = curr_line;
                 }
@@ -267,11 +282,11 @@ impl<'a> Profiler<'a> {
                 self.return_pc_stack.push(return_pc);
             } else {
                 // we start profiling on the initial call to "__runtime_start"
-                if target.function == "__runtime_start" {
+                if self.functions[target.function.0] == "__runtime_start" {
                     let call = Call {
                         // __runtime_start does not have a proper ".debug loc", just point to main file
                         from: Loc {
-                            function: "",
+                            function: FuncId(0),
                             file: 1,
                             line: 0,
                         },

From 34c11ebe383ea15d698e4141b1e3a4dd4b294dae Mon Sep 17 00:00:00 2001
From: Guillaume Ballet <3272758+gballet@users.noreply.github.com>
Date: Fri, 29 Nov 2024 15:41:59 +0100
Subject: [PATCH 09/57] riscv: add support for sra instruction (#2131)

While the `srai` instruction has been implemented, the `sra` (same thing
with shift passed via a register) was not. This PR implements it.

---------

Signed-off-by: Guillaume Ballet <3272758+gballet@users.noreply.github.com>
---
 riscv/src/large_field/code_gen.rs | 25 ++++++++++++++++++
 riscv/src/small_field/code_gen.rs | 42 +++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/riscv/src/large_field/code_gen.rs b/riscv/src/large_field/code_gen.rs
index 6986731344..15c285feb3 100644
--- a/riscv/src/large_field/code_gen.rs
+++ b/riscv/src/large_field/code_gen.rs
@@ -708,6 +708,31 @@ fn process_instruction<A: InstructionArgs>(
                 ],
             )
         }
+        "sra" => {
+            // arithmetic shift right
+            // TODO see if we can implement this directly with a machine.
+            // Now we are using the equivalence
+            // a >>> b = (a >= 0 ? a >> b : ~(~a >> b))
+            let (rd, rs1, rs2) = args.rrr()?;
+            only_if_no_write_to_zero_vec(
+                rd,
+                vec![
+                    format!("to_signed {}, {};", rs1.addr(), tmp1.addr()),
+                    format!(
+                        "is_diff_greater_than 0, {}, 0, {};",
+                        tmp1.addr(),
+                        tmp1.addr()
+                    ),
+                    format!("affine {}, {}, 0xffffffff, 0;", tmp1.addr(), tmp1.addr()),
+                    // Here, tmp1 is the full bit mask if rs is negative
+                    // and zero otherwise.
+                    format!("xor {}, {}, 0, {};", tmp1.addr(), rs1.addr(), tmp2.addr()),
+                    format!("and {}, 0, 0x1f, {};", rs2.addr(), tmp3.addr()),
+                    format!("shr {}, {}, 0, {};", tmp2.addr(), tmp3.addr(), rd.addr()),
+                    format!("xor {}, {}, 0, {};", tmp1.addr(), rd.addr(), rd.addr()),
+                ],
+            )
+        }
 
         // comparison
         "seqz" => {
diff --git a/riscv/src/small_field/code_gen.rs b/riscv/src/small_field/code_gen.rs
index 399263b4cd..48aefa7edc 100644
--- a/riscv/src/small_field/code_gen.rs
+++ b/riscv/src/small_field/code_gen.rs
@@ -1194,6 +1194,48 @@ fn process_instruction<A: InstructionArgs>(
                 ],
             )
         }
+        "sra" => {
+            // arithmetic shift right
+            // TODO see if we can implement this directly with a machine.
+            // Now we are using the equivalence
+            // a >>> b = (a >= 0 ? a >> b : ~(~a >> b))
+            let (rd, rs1, rs2) = args.rrr()?;
+            assert!(rs2.addr() <= 31);
+            only_if_no_write_to_zero_vec(
+                rd,
+                vec![
+                    format!("affine {}, {}, 0, 1, 0, 0;", rs1.addr(), tmp1.addr()),
+                    format!(
+                        "is_greater_or_equal_signed {}, 0, {};",
+                        tmp1.addr(),
+                        tmp1.addr()
+                    ),
+                    format!(
+                        "affine {}, {}, {}, {}, 0, 1;",
+                        tmp1.addr(),
+                        tmp1.addr(),
+                        i32_high(-1),
+                        i32_low(-1)
+                    ),
+                    format!(
+                        "affine {}, {}, 0xffff, 0xffff, 0, 0;",
+                        tmp1.addr(),
+                        tmp1.addr()
+                    ),
+                    // Here, tmp1 is the full bit mask if rs1 is negative
+                    // and zero otherwise.
+                    format!(
+                        "xor {}, {}, 0, 0, {};",
+                        tmp1.addr(),
+                        rs1.addr(),
+                        tmp2.addr()
+                    ),
+                    format!("and {}, 0, 0, 0x1f, {};", rs2.addr(), tmp3.addr()),
+                    format!("shr {}, {}, 0, 0, {};", rd.addr(), tmp3.addr(), rd.addr()),
+                    format!("xor {}, {}, 0, 0, {};", tmp1.addr(), rd.addr(), rd.addr()),
+                ],
+            )
+        }
 
         // comparison
         "seqz" => {

From 6530820ae84fe6f7e12f784dc9c8d7c5a814cd68 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 29 Nov 2024 17:42:19 +0100
Subject: [PATCH 10/57] Introduce `SecondStageMachine` (#2169)

This introduces a new machine which is always used for the second-stage
witness generation. Currently it is a copy of DynamicMachine, but in the
end it will be optimized for second-stage witness generation.
---
 .../src/witgen/machines/machine_extractor.rs  |  20 +-
 executor/src/witgen/machines/mod.rs           |  10 +
 .../witgen/machines/second_stage_machine.rs   | 187 ++++++++++++++++++
 3 files changed, 204 insertions(+), 13 deletions(-)
 create mode 100644 executor/src/witgen/machines/second_stage_machine.rs

diff --git a/executor/src/witgen/machines/machine_extractor.rs b/executor/src/witgen/machines/machine_extractor.rs
index 7451a34aaa..1da28230f0 100644
--- a/executor/src/witgen/machines/machine_extractor.rs
+++ b/executor/src/witgen/machines/machine_extractor.rs
@@ -14,6 +14,7 @@ use super::sorted_witness_machine::SortedWitnesses;
 use super::FixedData;
 use super::KnownMachine;
 use crate::witgen::machines::dynamic_machine::DynamicMachine;
+use crate::witgen::machines::second_stage_machine::SecondStageMachine;
 use crate::witgen::machines::Connection;
 use crate::witgen::machines::{write_once_memory::WriteOnceMemory, MachineParts};
 use crate::Identity;
@@ -60,26 +61,19 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
             .collect::<Vec<&analyzed::Expression>>();
 
         if self.fixed.stage() > 0 {
-            // We expect later-stage witness columns to be accumulators for lookup and permutation arguments.
-            // These don't behave like normal witness columns (e.g. in a block machine), and they might depend
-            // on witness columns of more than one machine.
-            // Therefore, we treat everything as one big machine. Also, we remove lookups and permutations,
-            // as they are assumed to be handled in stage 0.
-            let polynomial_identities = identities
-                .into_iter()
-                .filter(|identity| matches!(identity, Identity::Polynomial(_)))
-                .collect::<Vec<_>>();
             let machine_parts = MachineParts::new(
                 self.fixed,
                 Default::default(),
-                polynomial_identities,
+                identities,
                 self.fixed.witness_cols.keys().collect::<HashSet<_>>(),
                 prover_functions,
             );
 
-            return build_main_machine(self.fixed, machine_parts)
-                .into_iter()
-                .collect();
+            return vec![KnownMachine::SecondStageMachine(SecondStageMachine::new(
+                "Bus Machine".to_string(),
+                self.fixed,
+                machine_parts,
+            ))];
         }
         let mut machines: Vec<KnownMachine<T>> = vec![];
 
diff --git a/executor/src/witgen/machines/mod.rs b/executor/src/witgen/machines/mod.rs
index b425fe3d6c..3a4e3ebff0 100644
--- a/executor/src/witgen/machines/mod.rs
+++ b/executor/src/witgen/machines/mod.rs
@@ -17,6 +17,7 @@ use self::double_sorted_witness_machine_16::DoubleSortedWitnesses16;
 use self::double_sorted_witness_machine_32::DoubleSortedWitnesses32;
 pub use self::fixed_lookup_machine::FixedLookup;
 use self::profiling::{record_end, record_start};
+use self::second_stage_machine::SecondStageMachine;
 use self::sorted_witness_machine::SortedWitnesses;
 use self::write_once_memory::WriteOnceMemory;
 
@@ -30,6 +31,7 @@ mod dynamic_machine;
 mod fixed_lookup_machine;
 pub mod machine_extractor;
 pub mod profiling;
+mod second_stage_machine;
 mod sorted_witness_machine;
 mod write_once_memory;
 
@@ -117,6 +119,7 @@ pub enum LookupCell<'a, T> {
 /// This allows us to treat machines uniformly without putting them into a `Box`,
 /// which requires that all lifetime parameters are 'static.
 pub enum KnownMachine<'a, T: FieldElement> {
+    SecondStageMachine(SecondStageMachine<'a, T>),
     SortedWitnesses(SortedWitnesses<'a, T>),
     DoubleSortedWitnesses16(DoubleSortedWitnesses16<'a, T>),
     DoubleSortedWitnesses32(DoubleSortedWitnesses32<'a, T>),
@@ -129,6 +132,7 @@ pub enum KnownMachine<'a, T: FieldElement> {
 impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
     fn run<Q: QueryCallback<T>>(&mut self, mutable_state: &MutableState<'a, T, Q>) {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.run(mutable_state),
             KnownMachine::SortedWitnesses(m) => m.run(mutable_state),
             KnownMachine::DoubleSortedWitnesses16(m) => m.run(mutable_state),
             KnownMachine::DoubleSortedWitnesses32(m) => m.run(mutable_state),
@@ -146,6 +150,9 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
         caller_rows: &'b RowPair<'b, 'a, T>,
     ) -> EvalResult<'a, T> {
         match self {
+            KnownMachine::SecondStageMachine(m) => {
+                m.process_plookup(mutable_state, identity_id, caller_rows)
+            }
             KnownMachine::SortedWitnesses(m) => {
                 m.process_plookup(mutable_state, identity_id, caller_rows)
             }
@@ -172,6 +179,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
 
     fn name(&self) -> &str {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.name(),
             KnownMachine::SortedWitnesses(m) => m.name(),
             KnownMachine::DoubleSortedWitnesses16(m) => m.name(),
             KnownMachine::DoubleSortedWitnesses32(m) => m.name(),
@@ -187,6 +195,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
         mutable_state: &'b MutableState<'a, T, Q>,
     ) -> HashMap<String, Vec<T>> {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.take_witness_col_values(mutable_state),
             KnownMachine::SortedWitnesses(m) => m.take_witness_col_values(mutable_state),
             KnownMachine::DoubleSortedWitnesses16(m) => m.take_witness_col_values(mutable_state),
             KnownMachine::DoubleSortedWitnesses32(m) => m.take_witness_col_values(mutable_state),
@@ -199,6 +208,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
 
     fn identity_ids(&self) -> Vec<u64> {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.identity_ids(),
             KnownMachine::SortedWitnesses(m) => m.identity_ids(),
             KnownMachine::DoubleSortedWitnesses16(m) => m.identity_ids(),
             KnownMachine::DoubleSortedWitnesses32(m) => m.identity_ids(),
diff --git a/executor/src/witgen/machines/second_stage_machine.rs b/executor/src/witgen/machines/second_stage_machine.rs
new file mode 100644
index 0000000000..412bdb499c
--- /dev/null
+++ b/executor/src/witgen/machines/second_stage_machine.rs
@@ -0,0 +1,187 @@
+use powdr_ast::analyzed::Identity;
+use powdr_number::{DegreeType, FieldElement};
+use std::collections::{BTreeMap, HashMap};
+
+use crate::witgen::block_processor::BlockProcessor;
+use crate::witgen::data_structures::finalizable_data::FinalizableData;
+use crate::witgen::data_structures::mutable_state::MutableState;
+use crate::witgen::machines::{Machine, MachineParts};
+use crate::witgen::processor::SolverState;
+use crate::witgen::rows::{Row, RowIndex, RowPair};
+use crate::witgen::sequence_iterator::{DefaultSequenceIterator, ProcessingSequenceIterator};
+use crate::witgen::vm_processor::VmProcessor;
+use crate::witgen::{EvalResult, FixedData, QueryCallback};
+
+/// A machine responsible for second-phase witness generation.
+/// For example, this might generate the witnesses for a bus accumulator or LogUp argument.
+pub struct SecondStageMachine<'a, T: FieldElement> {
+    fixed_data: &'a FixedData<'a, T>,
+    parts: MachineParts<'a, T>,
+    data: FinalizableData<T>,
+    publics: BTreeMap<&'a str, T>,
+    name: String,
+    degree: DegreeType,
+}
+
+impl<'a, T: FieldElement> Machine<'a, T> for SecondStageMachine<'a, T> {
+    fn identity_ids(&self) -> Vec<u64> {
+        Vec::new()
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    /// Runs the machine without any arguments from the first row.
+    fn run<Q: QueryCallback<T>>(&mut self, mutable_state: &MutableState<'a, T, Q>) {
+        assert!(self.data.is_empty());
+        let first_row = self.compute_partial_first_row(mutable_state);
+        self.data = self.process(first_row, mutable_state);
+    }
+
+    fn process_plookup<'b, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _caller_rows: &'b RowPair<'b, 'a, T>,
+    ) -> EvalResult<'a, T> {
+        panic!("SecondStageMachine can't be called by other machines!")
+    }
+
+    fn take_witness_col_values<'b, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+    ) -> HashMap<String, Vec<T>> {
+        log::debug!("Finalizing VM: {}", self.name());
+
+        self.fix_first_row();
+
+        self.data
+            .take_transposed()
+            .map(|(id, (values, _))| (id, values))
+            .map(|(id, values)| (self.fixed_data.column_name(&id).to_string(), values))
+            .collect()
+    }
+}
+
+impl<'a, T: FieldElement> SecondStageMachine<'a, T> {
+    pub fn new(name: String, fixed_data: &'a FixedData<'a, T>, parts: MachineParts<'a, T>) -> Self {
+        let data = FinalizableData::new(&parts.witnesses);
+
+        // Only keep polynomial identities. We assume other constraints to be handled in stage 0.
+        let polynomial_identities = parts
+            .identities
+            .into_iter()
+            .filter(|identity| matches!(identity, Identity::Polynomial(_)))
+            .collect::<Vec<_>>();
+        let parts = MachineParts::new(
+            fixed_data,
+            Default::default(),
+            polynomial_identities,
+            parts.witnesses,
+            parts.prover_functions,
+        );
+
+        Self {
+            degree: parts.common_degree_range().max,
+            name,
+            fixed_data,
+            parts,
+            data,
+            publics: Default::default(),
+        }
+    }
+
+    /// Runs the solver on the row pair (degree - 1, 0) in order to partially compute the first
+    /// row from identities like `pc' = (1 - first_step') * <...>`.
+    fn compute_partial_first_row<Q: QueryCallback<T>>(
+        &self,
+        mutable_state: &MutableState<'a, T, Q>,
+    ) -> Row<T> {
+        // Use `BlockProcessor` + `DefaultSequenceIterator` using a "block size" of 0. Because `BlockProcessor`
+        // expects `data` to include the row before and after the block, this means we'll run the
+        // solver on exactly one row pair.
+        // Note that using `BlockProcessor` instead of `VmProcessor` is more convenient here because
+        // it does not assert that the row is "complete" afterwards (i.e., that all identities
+        // are satisfied assuming 0 for unknown values).
+        let data = FinalizableData::with_initial_rows_in_progress(
+            &self.parts.witnesses,
+            [
+                Row::fresh(self.fixed_data, RowIndex::from_i64(-1, self.degree)),
+                Row::fresh(self.fixed_data, RowIndex::from_i64(0, self.degree)),
+            ]
+            .into_iter(),
+        );
+
+        // We're only interested in the first row anyway, so identities without a next reference
+        // are irrelevant.
+        // Also, they can lead to problems in the case where some witness columns are provided
+        // externally, e.g. if the last row happens to call into a stateful machine like memory.
+        let next_parts = self.parts.restricted_to_identities_with_next_references();
+        let mut processor = BlockProcessor::new(
+            RowIndex::from_i64(-1, self.degree),
+            // Shouldn't need any publics at this point
+            SolverState::without_publics(data),
+            mutable_state,
+            self.fixed_data,
+            &next_parts,
+            self.degree,
+        );
+        let mut sequence_iterator = ProcessingSequenceIterator::Default(
+            DefaultSequenceIterator::new(0, next_parts.identities.len(), None),
+        );
+        processor.solve(&mut sequence_iterator).unwrap();
+
+        // Ignore any updates to the publics at this point, as we'll re-visit the last row again.
+        let mut block = processor.finish().block;
+        assert!(block.len() == 2);
+        block.pop().unwrap()
+    }
+
+    fn process<Q: QueryCallback<T>>(
+        &mut self,
+        first_row: Row<T>,
+        mutable_state: &MutableState<'a, T, Q>,
+    ) -> FinalizableData<T> {
+        log::trace!(
+            "Running Second-Stage Machine with the following initial values in the first row:\n{}",
+            first_row.render_values(false, &self.parts)
+        );
+        let data = FinalizableData::with_initial_rows_in_progress(
+            &self.parts.witnesses,
+            [first_row].into_iter(),
+        );
+
+        let mut processor = VmProcessor::new(
+            self.name().to_string(),
+            RowIndex::from_degree(0, self.degree),
+            self.fixed_data,
+            &self.parts,
+            SolverState::new(data, self.publics.clone()),
+            mutable_state,
+        );
+        processor.run(true);
+        let (updated_data, degree) = processor.finish();
+
+        // The processor might have detected a loop, in which case the degree has changed
+        self.degree = degree;
+
+        updated_data.block
+    }
+
+    /// At the end of the solving algorithm, we'll have computed the first row twice
+    /// (as row 0 and as row <degree>). This function merges the two versions.
+    fn fix_first_row(&mut self) {
+        assert_eq!(self.data.len() as DegreeType, self.degree + 1);
+
+        let last_row = self.data.pop().unwrap();
+        if self.data[0].merge_with(&last_row).is_err() {
+            log::error!("{}", self.data[0].render("First row", false, &self.parts));
+            log::error!("{}", last_row.render("Last row", false, &self.parts));
+            panic!(
+                "Failed to merge the first and last row of the VM '{}'",
+                self.name()
+            );
+        }
+    }
+}

From d980c68f89386160e3ffd6b2ce4e62f26c045cdf Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 29 Nov 2024 18:14:33 +0100
Subject: [PATCH 11/57] Parallelize calls to prover functions (#2176)

Cherry-picked from #2174

With this PR, we run all prover functions in parallel when solving for
the witness in `VmProcessor`. Interestingly, this didn't require any
changes to the order in which things are done: We already ran the
functions independently and applied the combined updates. So, this is a
classic map-reduce.

I think this change always makes sense, but is especially useful for the
prover functions we have to set bus accumulator values. For example, in
our RISC-V machine, the main machine has ~30 bus interactions, with a
fairly expensive prover function for each.

When used on top of #2173 and #2175, this accelerates second-stage
witness generation for the main machine from ~10s to ~6s for the example
mentioned in #2173.
---
 executor/src/witgen/processor.rs       | 31 +++++++++++++++++++++-----
 executor/src/witgen/query_processor.rs | 12 +++++-----
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/executor/src/witgen/processor.rs b/executor/src/witgen/processor.rs
index 74fe04280a..24279f0968 100644
--- a/executor/src/witgen/processor.rs
+++ b/executor/src/witgen/processor.rs
@@ -4,6 +4,7 @@ use powdr_ast::analyzed::PolynomialType;
 use powdr_ast::analyzed::{AlgebraicExpression as Expression, AlgebraicReference, PolyID};
 
 use powdr_number::{DegreeType, FieldElement};
+use rayon::iter::{ParallelBridge, ParallelIterator};
 
 use crate::witgen::affine_expression::AlgebraicVariable;
 use crate::witgen::data_structures::mutable_state::MutableState;
@@ -220,7 +221,7 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> Processor<'a, 'c, T, Q> {
     }
 
     pub fn process_queries(&mut self, row_index: usize) -> Result<bool, EvalError<T>> {
-        let mut query_processor = QueryProcessor::new(
+        let query_processor = QueryProcessor::new(
             self.fixed_data,
             self.mutable_state.query_callback(),
             self.size,
@@ -238,15 +239,33 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> Processor<'a, 'c, T, Q> {
         );
         let mut updates = EvalValue::complete(vec![]);
 
-        for (i, fun) in self.parts.prover_functions.iter().enumerate() {
-            if !self.processed_prover_functions.has_run(row_index, i) {
-                let r = query_processor.process_prover_function(&row_pair, fun)?;
+        self.parts
+            .prover_functions
+            .iter()
+            .enumerate()
+            // Run all prover functions in parallel
+            .par_bridge()
+            .filter_map(|(i, fun)| {
+                if !self.processed_prover_functions.has_run(row_index, i) {
+                    query_processor
+                        .process_prover_function(&row_pair, fun)
+                        .map(|result| Some((result, i)))
+                        .transpose()
+                } else {
+                    // Skip already processed functions
+                    None
+                }
+            })
+            // Fail if any of the prover functions failed
+            .collect::<Result<Vec<_>, EvalError<T>>>()?
+            // Combine results
+            .into_iter()
+            .for_each(|(r, i)| {
                 if r.is_complete() {
                     updates.combine(r);
                     self.processed_prover_functions.mark_as_run(row_index, i);
                 }
-            }
-        }
+            });
 
         for poly_id in &self.prover_query_witnesses {
             if let Some(r) = query_processor.process_query(&row_pair, poly_id) {
diff --git a/executor/src/witgen/query_processor.rs b/executor/src/witgen/query_processor.rs
index 1e0d383fe6..7847502bd1 100644
--- a/executor/src/witgen/query_processor.rs
+++ b/executor/src/witgen/query_processor.rs
@@ -33,9 +33,9 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
         }
     }
 
-    pub fn process_prover_function<'c>(
-        &'c mut self,
-        rows: &'c RowPair<'c, 'a, T>,
+    pub fn process_prover_function(
+        &self,
+        rows: &RowPair<'_, 'a, T>,
         fun: &'a Expression,
     ) -> EvalResult<'a, T> {
         let arguments = vec![Arc::new(Value::Integer(BigInt::from(u64::from(
@@ -77,7 +77,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     /// Panics if the column does not have a query attached.
     /// @returns None if the value for that column is already known.
     pub fn process_query(
-        &mut self,
+        &self,
         rows: &RowPair<'_, 'a, T>,
         poly_id: &PolyID,
     ) -> Option<EvalResult<'a, T>> {
@@ -91,7 +91,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     }
 
     fn process_witness_query(
-        &mut self,
+        &self,
         query: &'a Expression,
         poly: &'a AlgebraicReference,
         rows: &RowPair<'_, 'a, T>,
@@ -129,7 +129,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     }
 
     fn interpolate_query(
-        &mut self,
+        &self,
         query: &'a Expression,
         rows: &RowPair<'_, 'a, T>,
     ) -> Result<String, EvalError> {

From 8c3f478ddc8240690a3110598ed33b09d21a8b4b Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 29 Nov 2024 18:41:55 +0100
Subject: [PATCH 12/57] `fingerprint()` performance improvements (#2175)

Cherry-picked from #2174

When used on top of #2173, this accelerates second-stage witness
generation for the main machine from ~35s to ~10s for the example
mentioned in #2173.
---
 std/protocols/bus.asm         | 22 +++++++++++++---------
 std/protocols/fingerprint.asm | 14 ++++++++++----
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/std/protocols/bus.asm b/std/protocols/bus.asm
index c0e1ccb1ac..5bf1be17b0 100644
--- a/std/protocols/bus.asm
+++ b/std/protocols/bus.asm
@@ -82,21 +82,25 @@ let bus_interaction: expr, expr[], expr -> () = constr |id, tuple, multiplicity|
 /// This is intended to be used as a hint in the extension field case; for the base case
 /// automatic witgen is smart enough to figure out the value of the accumulator.
 let compute_next_z: expr, expr, expr[], expr, Fp2<expr>, Fp2<expr>, Fp2<expr> -> fe[] = query |is_first, id, tuple, multiplicity, acc, alpha, beta| {
-    // Implemented as: folded = (beta - fingerprint(id, tuple...));
-    // `multiplicity / (beta - fingerprint(id, tuple...))` to `acc`
-    let folded_next = sub_ext(eval_ext(beta), fingerprint_with_id(eval(id'), array::eval(array::next(tuple)), alpha));
 
-    let m_ext = from_base(multiplicity);
-    let m_ext_next = next_ext(m_ext);
+    let m_next = eval(multiplicity');
+    let m_ext_next = from_base(m_next);
 
     let is_first_next = eval(is_first');
     let current_acc = if is_first_next == 1 {from_base(0)} else {eval_ext(acc)};
     
     // acc' = current_acc + multiplicity' / folded'
-    let res = add_ext(
-        current_acc,
-        mul_ext(eval_ext(m_ext_next), inv_ext(folded_next))
-    );
+    let res = if m_next == 0 {
+        current_acc
+    } else {
+        // Implemented as: folded = (beta - fingerprint(id, tuple...));
+        // `multiplicity / (beta - fingerprint(id, tuple...))` to `acc`
+        let folded_next = sub_ext(eval_ext(beta), fingerprint_with_id(eval(id'), array::eval(array::next(tuple)), alpha));
+        add_ext(
+            current_acc,
+            mul_ext(m_ext_next, inv_ext(folded_next))
+        )
+    };
 
     unpack_ext_array(res)
 };
diff --git a/std/protocols/fingerprint.asm b/std/protocols/fingerprint.asm
index c19bb6d634..d39e6611e9 100644
--- a/std/protocols/fingerprint.asm
+++ b/std/protocols/fingerprint.asm
@@ -11,15 +11,21 @@ use std::check::assert;
 /// Maps [x_1, x_2, ..., x_n] to its Read-Solomon fingerprint, using a challenge alpha: $\sum_{i=1}^n alpha**{(n - i)} * x_i$
 /// To generate an expression that computes the fingerprint, use `fingerprint_inter` instead.
 /// Note that alpha is passed as an expressions, so that it is only evaluated if needed (i.e., if len(expr_array) > 1).
-let fingerprint: fe[], Fp2<expr> -> Fp2<fe> = query |expr_array, alpha| if len(expr_array) == 1 {
+let fingerprint: fe[], Fp2<expr> -> Fp2<fe> = query |expr_array, alpha| if array::len(expr_array) == 1 {
+    // No need to evaluate `alpha` (which would be removed by the optimizer).
+    from_base(expr_array[0])
+} else {
+    fingerprint_impl(expr_array, eval_ext(alpha), len(expr_array))
+};
+
+let fingerprint_impl: fe[], Fp2<fe>, int -> Fp2<fe> = query |expr_array, alpha, l| if l == 1 {
     // Base case
     from_base(expr_array[0])
 } else {
-    assert(len(expr_array) > 1, || "fingerprint requires at least one element");
 
     // Recursively compute the fingerprint as fingerprint(expr_array[:-1], alpha) * alpha + expr_array[-1]
-    let intermediate_fingerprint = fingerprint(array::sub_array(expr_array, 0, len(expr_array) - 1), alpha);
-    add_ext(mul_ext(eval_ext(alpha), intermediate_fingerprint), from_base(expr_array[len(expr_array) - 1]))
+    let intermediate_fingerprint = fingerprint_impl(expr_array, alpha, l - 1);
+    add_ext(mul_ext(alpha, intermediate_fingerprint), from_base(expr_array[l - 1]))
 };
 
 /// Like `fingerprint`, but "materializes" the intermediate results as intermediate columns.

From 2832ff108b0eb8e1bd7eab8307009841cb8ed06a Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 29 Nov 2024 19:16:02 +0100
Subject: [PATCH 13/57] Fix second stage witgen (#2173)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds on #2169

With this PR, second-stage witness generation works for the bus used in
the RISC-V machine 🎉

This is an end-to-end test:
```bash
cargo run -r --bin powdr-rs compile riscv/tests/riscv_data/sum-o output --max-degree-log 15 --field gl
cargo run -r pil output/sum.asm -o output -f --field gl --prove-with mock --linker-mode bus -i 1,1,1
```

What's needed is two small changes to `VmProcessor`:
- The degree is now passed by the caller (`DynamicMachine` or
`SecondStageMachine`). That way, `SecondStageMachine` can set it to the
actual final size, instead of the maximum allowed degree.
- I disabled loop detection for second-stage witness generation for now.
---
 executor/src/witgen/machines/dynamic_machine.rs    |  2 ++
 .../src/witgen/machines/second_stage_machine.rs    | 14 +++++++++++++-
 executor/src/witgen/mod.rs                         |  9 ++++++++-
 executor/src/witgen/vm_processor.rs                | 14 +++++++++++---
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/executor/src/witgen/machines/dynamic_machine.rs b/executor/src/witgen/machines/dynamic_machine.rs
index 7c95996a0a..78cfecc422 100644
--- a/executor/src/witgen/machines/dynamic_machine.rs
+++ b/executor/src/witgen/machines/dynamic_machine.rs
@@ -230,6 +230,8 @@ impl<'a, T: FieldElement> DynamicMachine<'a, T> {
             &self.parts,
             SolverState::new(data, self.publics.clone()),
             mutable_state,
+            self.degree,
+            true,
         );
         if let Some(outer_query) = outer_query {
             processor = processor.with_outer_query(outer_query);
diff --git a/executor/src/witgen/machines/second_stage_machine.rs b/executor/src/witgen/machines/second_stage_machine.rs
index 412bdb499c..c957eeb72e 100644
--- a/executor/src/witgen/machines/second_stage_machine.rs
+++ b/executor/src/witgen/machines/second_stage_machine.rs
@@ -1,3 +1,4 @@
+use itertools::Itertools;
 use powdr_ast::analyzed::Identity;
 use powdr_number::{DegreeType, FieldElement};
 use std::collections::{BTreeMap, HashMap};
@@ -82,8 +83,17 @@ impl<'a, T: FieldElement> SecondStageMachine<'a, T> {
             parts.prover_functions,
         );
 
+        let witness_sizes = fixed_data
+            .witness_cols
+            .values()
+            .filter_map(|w| w.external_values.as_ref())
+            .map(|values| values.len())
+            .unique()
+            .collect::<Vec<_>>();
+        let degree = witness_sizes.into_iter().exactly_one().unwrap() as DegreeType;
+
         Self {
-            degree: parts.common_degree_range().max,
+            degree,
             name,
             fixed_data,
             parts,
@@ -159,6 +169,8 @@ impl<'a, T: FieldElement> SecondStageMachine<'a, T> {
             &self.parts,
             SolverState::new(data, self.publics.clone()),
             mutable_state,
+            self.degree,
+            false,
         );
         processor.run(true);
         let (updated_data, degree) = processor.finish();
diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
index 1ce6d6c93b..e1b59edadf 100644
--- a/executor/src/witgen/mod.rs
+++ b/executor/src/witgen/mod.rs
@@ -570,7 +570,14 @@ impl<'a, T> FixedColumn<'a, T> {
     }
 
     pub fn values(&self, size: DegreeType) -> &[T] {
-        self.values.get_by_size(size).unwrap()
+        self.values.get_by_size(size).unwrap_or_else(|| {
+            panic!(
+                "Fixed column {} does not have a value for size {}. Available sizes: {:?}",
+                self.name,
+                size,
+                self.values.available_sizes()
+            )
+        })
     }
 
     pub fn values_max_size(&self) -> &[T] {
diff --git a/executor/src/witgen/vm_processor.rs b/executor/src/witgen/vm_processor.rs
index 65968eb4a9..adc44c1d23 100644
--- a/executor/src/witgen/vm_processor.rs
+++ b/executor/src/witgen/vm_processor.rs
@@ -68,6 +68,9 @@ pub struct VmProcessor<'a, 'c, T: FieldElement, Q: QueryCallback<T>> {
     last_report_time: Instant,
     processor: Processor<'a, 'c, T, Q>,
     progress_bar: ProgressBar,
+    /// If true, we'll periodically check if we are in a loop. If yes, we'll add new rows by
+    /// copying the old ones and check the constraints.
+    loop_detection: bool,
 }
 
 impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
@@ -79,11 +82,11 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
         parts: &'c MachineParts<'a, T>,
         mutable_data: SolverState<'a, T>,
         mutable_state: &'c MutableState<'a, T, Q>,
+        degree: DegreeType,
+        loop_detection: bool,
     ) -> Self {
         let degree_range = parts.common_degree_range();
 
-        let degree = degree_range.max;
-
         let (identities_with_next, identities_without_next): (Vec<_>, Vec<_>) = parts
             .identities
             .iter()
@@ -118,6 +121,7 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
             last_report_time: Instant::now(),
             processor,
             progress_bar,
+            loop_detection,
         }
     }
 
@@ -181,7 +185,11 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
             }
 
             // Check if we are in a loop.
-            if looping_period.is_none() && row_index % 100 == 0 && row_index > 0 {
+            if looping_period.is_none()
+                && row_index % 100 == 0
+                && row_index > 0
+                && self.loop_detection
+            {
                 looping_period = self.rows_are_repeating(row_index);
                 if let Some(p) = looping_period {
                     log::log!(

From 89969ad1f5cb822f611862518f7648dbc090d9ea Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Fri, 29 Nov 2024 19:21:03 +0100
Subject: [PATCH 14/57] Fix some tests (#2178)

---
 riscv/src/large_field/runtime.rs  |  2 +-
 riscv/src/small_field/code_gen.rs |  4 ++--
 riscv/src/small_field/runtime.rs  |  4 ++--
 riscv/tests/common/mod.rs         | 24 ++++++++++++------------
 riscv/tests/riscv.rs              |  8 ++++----
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/riscv/src/large_field/runtime.rs b/riscv/src/large_field/runtime.rs
index d2c6bff75a..6124ce851b 100644
--- a/riscv/src/large_field/runtime.rs
+++ b/riscv/src/large_field/runtime.rs
@@ -331,7 +331,7 @@ impl Runtime {
             "std::machines::large_field::arith::Arith",
             None,
             "arith",
-            vec![],
+            vec!["MIN_DEGREE", "MAIN_MAX_DEGREE"],
             [
                 format!(
                     "instr affine_256 link ~> {};",
diff --git a/riscv/src/small_field/code_gen.rs b/riscv/src/small_field/code_gen.rs
index 48aefa7edc..4506a54401 100644
--- a/riscv/src/small_field/code_gen.rs
+++ b/riscv/src/small_field/code_gen.rs
@@ -376,7 +376,7 @@ fn preamble(field: KnownField, runtime: &Runtime, with_bootloader: bool) -> Stri
     // Jump to the address in register XL and store the return program counter in register WL.
     instr jump_dyn XL, WL
         link ~> (tmp1_h, tmp1_l) = regs.mload(0, XL, STEP)
-        link ~> regs.mstore(0, WL, STEP, tmp2_h, tmp2_l)
+        link ~> regs.mstore(0, WL, STEP + 3, tmp2_h, tmp2_l)
         // pc is capped at 24 bits, so for this instruction 
         // we restrict the higher limbs to 1 byte
         link => byte.check(tmp1_h)
@@ -568,7 +568,7 @@ fn preamble(field: KnownField, runtime: &Runtime, with_bootloader: bool) -> Stri
     // Stores 1 in register WL if val(XL) == val(YL), otherwise stores 0.
     instr is_not_equal XL, YL, WL
         link ~> (tmp1_h, tmp1_l) = regs.mload(0, XL, STEP)
-        link ~> (tmp2_h, tmp2_l) = regs.mload(0, YL, STEP)
+        link ~> (tmp2_h, tmp2_l) = regs.mload(0, YL, STEP + 1)
         link ~> (tmp3_h, tmp3_l) = add_sub.sub(tmp1_h, tmp1_l, tmp2_h, tmp2_l)
         link ~> regs.mstore(0, WL, STEP + 2, 0, 1 - XXIsZero)
     {
diff --git a/riscv/src/small_field/runtime.rs b/riscv/src/small_field/runtime.rs
index a67c3ecd83..809f6fa979 100644
--- a/riscv/src/small_field/runtime.rs
+++ b/riscv/src/small_field/runtime.rs
@@ -76,14 +76,14 @@ impl Runtime {
             [
                 r#"instr shl XL, YL, ZH, ZL, WL
                     link ~> (tmp1_h, tmp1_l) = regs.mload(0, XL, STEP)
-                    link ~> (tmp2_h, tmp2_l) = regs.mload(0, YL, STEP)
+                    link ~> (tmp2_h, tmp2_l) = regs.mload(0, YL, STEP + 1)
                     link ~> (tmp3_h, tmp3_l) = add_sub.add(tmp2_h, tmp2_l, ZH, ZL)
                     link ~> (tmp4_l, tmp4_h) = shift.shl(tmp1_l, tmp1_h, tmp3_l)
                     link ~> regs.mstore(0, WL, STEP + 3, tmp4_h, tmp4_l);
 "#,
                 r#"instr shr XL, YL, ZH, ZL, WL
                     link ~> (tmp1_h, tmp1_l) = regs.mload(0, XL, STEP)
-                    link ~> (tmp2_h, tmp2_l) = regs.mload(0, YL, STEP)
+                    link ~> (tmp2_h, tmp2_l) = regs.mload(0, YL, STEP + 1)
                     link ~> (tmp3_h, tmp3_l) = add_sub.add(tmp2_h, tmp2_l, ZH, ZL)
                     link ~> (tmp4_l, tmp4_h) = shift.shr(tmp1_l, tmp1_h, tmp3_l)
                     link ~> regs.mstore(0, WL, STEP + 3, tmp4_h, tmp4_l);
diff --git a/riscv/tests/common/mod.rs b/riscv/tests/common/mod.rs
index ba33dac17b..be4f32bfa7 100644
--- a/riscv/tests/common/mod.rs
+++ b/riscv/tests/common/mod.rs
@@ -31,18 +31,6 @@ pub fn verify_riscv_asm_string<T: FieldElement, S: serde::Serialize + Send + Syn
         pipeline = pipeline.add_data_vec(data);
     }
 
-    // Compute the witness once for all tests that follow.
-    pipeline.compute_witness().unwrap();
-
-    test_mock_backend(pipeline.clone());
-
-    // verify with PILCOM
-    if T::known_field().unwrap() == KnownField::GoldilocksField {
-        let pipeline_gl: Pipeline<GoldilocksField> =
-            unsafe { std::mem::transmute(pipeline.clone()) };
-        run_pilcom_with_backend_variant(pipeline_gl, BackendVariant::Composite).unwrap();
-    }
-
     // Test with the fast RISCV executor.
     // TODO remove the guard once the executor is implemented for BB
     if T::known_field().unwrap() == KnownField::GoldilocksField {
@@ -56,6 +44,18 @@ pub fn verify_riscv_asm_string<T: FieldElement, S: serde::Serialize + Send + Syn
         );
     }
 
+    // Compute the witness once for all tests that follow.
+    pipeline.compute_witness().unwrap();
+
+    test_mock_backend(pipeline.clone());
+
+    // verify with PILCOM
+    if T::known_field().unwrap() == KnownField::GoldilocksField {
+        let pipeline_gl: Pipeline<GoldilocksField> =
+            unsafe { std::mem::transmute(pipeline.clone()) };
+        run_pilcom_with_backend_variant(pipeline_gl, BackendVariant::Composite).unwrap();
+    }
+
     test_plonky3_pipeline::<T>(pipeline.clone());
 
     // verify executor generated witness
diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs
index 7582ccc137..4b1dd216a9 100644
--- a/riscv/tests/riscv.rs
+++ b/riscv/tests/riscv.rs
@@ -495,7 +495,7 @@ fn features_with_options<T: FieldElement>(options: CompilerOptions) {
         &from_elf,
         &[expected.into()],
         None,
-        true,
+        false,
     );
 
     // "add_two"
@@ -513,7 +513,7 @@ fn features_with_options<T: FieldElement>(options: CompilerOptions) {
         &from_elf,
         &[expected.into()],
         None,
-        true,
+        false,
     );
 
     // "add_two" and "add_three"
@@ -531,7 +531,7 @@ fn features_with_options<T: FieldElement>(options: CompilerOptions) {
         &from_elf,
         &[expected.into()],
         None,
-        true,
+        false,
     );
 }
 
@@ -615,12 +615,12 @@ fn many_chunks_memory() {
 }
 
 fn verify_riscv_crate(case: &str, inputs: &[u64], executor_witgen: bool) {
-    verify_riscv_crate_bb(case, inputs.iter().map(|&x| x.into()).collect());
     verify_riscv_crate_gl(
         case,
         inputs.iter().map(|&x| x.into()).collect(),
         executor_witgen,
     );
+    verify_riscv_crate_bb(case, inputs.iter().map(|&x| x.into()).collect());
 }
 
 fn verify_riscv_crate_bb(case: &str, inputs: Vec<BabyBearField>) {

From d0e49ceb3d0a1425ddd6e731f5150a922ef22192 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Fri, 29 Nov 2024 20:44:15 +0100
Subject: [PATCH 15/57] install riscv assembler for nightly tests (#2179)

---
 .github/workflows/nightly-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml
index 0e106c2156..7295bf4583 100644
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -73,6 +73,8 @@ jobs:
       run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
     - name: Install riscv target
       run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install test dependencies
+      run: sudo apt-get install -y binutils-riscv64-unknown-elf lld
     - name: Install EStarkPolygon prover dependencies
       run: sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
     - name: Install pilcom

From ebea062b7f1a999c8944f87d59d0c479d8aba98e Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Fri, 29 Nov 2024 22:37:29 +0100
Subject: [PATCH 16/57] fix more tests (#2180)

---
 riscv/tests/riscv.rs | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs
index 4b1dd216a9..12db658220 100644
--- a/riscv/tests/riscv.rs
+++ b/riscv/tests/riscv.rs
@@ -4,7 +4,7 @@ use common::{compile_riscv_asm_file, verify_riscv_asm_file, verify_riscv_asm_str
 use mktemp::Temp;
 use powdr_number::{BabyBearField, FieldElement, GoldilocksField, KnownField};
 use powdr_pipeline::{
-    test_util::{run_pilcom_with_backend_variant, BackendVariant},
+    test_util::{run_pilcom_with_backend_variant, test_mock_backend, BackendVariant},
     Pipeline,
 };
 use powdr_riscv_executor::ProfilerOptions;
@@ -237,36 +237,62 @@ fn function_pointer() {
     verify_riscv_crate(case, &[2734, 735, 1999], true);
 }
 
+// Temporary function to run the mock prover for cases where
+// we can't use P3 yet.
+fn run_mock_prover_for_arith(case: &str) {
+    let temp_dir = Temp::new_dir().unwrap();
+    let executable = powdr_riscv::compile_rust_crate_to_riscv(
+        &format!("tests/riscv_data/{case}/Cargo.toml"),
+        &temp_dir,
+        None,
+    );
+
+    let options = CompilerOptions::new(
+        KnownField::GoldilocksField,
+        RuntimeLibs::new().with_arith(),
+        false,
+    );
+    let asm = powdr_riscv::elf::translate(&executable, options);
+
+    let temp_dir = mktemp::Temp::new_dir().unwrap().release();
+    let file_name = format!("{case}.asm");
+    let pipeline = Pipeline::<GoldilocksField>::default()
+        .with_output(temp_dir.to_path_buf(), false)
+        .from_asm_string(asm, Some(PathBuf::from(file_name)));
+
+    test_mock_backend(pipeline);
+}
+
 #[test]
 #[ignore = "Too slow"]
 fn runtime_ec_double() {
     let case = "ec_double";
-    let options = CompilerOptions::new_gl().with_arith();
-    verify_riscv_crate_gl_with_options(case, vec![], options, false);
+    run_mock_prover_for_arith(case);
+    // TODO We can't use P3 yet for this test because of degree 4 constraints.
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn runtime_ec_add() {
     let case = "ec_add";
-    let options = CompilerOptions::new_gl().with_arith();
-    verify_riscv_crate_gl_with_options(case, vec![], options, false);
+    run_mock_prover_for_arith(case);
+    // TODO We can't use P3 yet for this test because of degree 4 constraints.
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn runtime_affine_256() {
     let case = "affine_256";
-    let options = CompilerOptions::new_gl().with_arith();
-    verify_riscv_crate_gl_with_options(case, vec![], options, false);
+    run_mock_prover_for_arith(case);
+    // TODO We can't use P3 yet for this test because of degree 4 constraints.
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn runtime_modmul_256() {
     let case = "modmul_256";
-    let options = CompilerOptions::new_gl().with_arith();
-    verify_riscv_crate_gl_with_options(case, vec![], options, false);
+    run_mock_prover_for_arith(case);
+    // TODO We can't use P3 yet for this test because of degree 4 constraints.
 }
 
 /*

From 4f4f09778c471101302ca25e77640a9705f653d2 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Mon, 2 Dec 2024 13:15:25 +0100
Subject: [PATCH 17/57] do not keep temp test files (#2181)

The nightly test passes on the server now in 1h42m, using the same exact
command. However, when run on CI, it fails with the error below.

```console
System.IO.IOException: No space left on device : '/home/runner/runners/2.321.0/_diag/Worker_20241130-025215-utc.log' ...
```

This is potentially caused by the `.release()` function which keeps the
tmp files alive.
---
 riscv/tests/common/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/riscv/tests/common/mod.rs b/riscv/tests/common/mod.rs
index be4f32bfa7..a5ed2269a6 100644
--- a/riscv/tests/common/mod.rs
+++ b/riscv/tests/common/mod.rs
@@ -20,7 +20,7 @@ pub fn verify_riscv_asm_string<T: FieldElement, S: serde::Serialize + Send + Syn
     data: Option<&[(u32, S)]>,
     executor_witgen: bool,
 ) {
-    let temp_dir = mktemp::Temp::new_dir().unwrap().release();
+    let temp_dir = mktemp::Temp::new_dir().unwrap();
 
     let mut pipeline = Pipeline::default()
         .with_prover_inputs(inputs.to_vec())

From a2eb9ad504ddd3f3b142e80cbaa1682e4724300c Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Mon, 2 Dec 2024 15:10:31 +0100
Subject: [PATCH 18/57] Add `MockBackend` tests (#2182)

Adds some (negative) tests for `MockBackend`.
---
 .../src/mock/connection_constraint_checker.rs |  2 +-
 pipeline/tests/mock_backend.rs                | 65 +++++++++++++++++++
 pipeline/tests/pil.rs                         |  1 -
 3 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 pipeline/tests/mock_backend.rs

diff --git a/backend/src/mock/connection_constraint_checker.rs b/backend/src/mock/connection_constraint_checker.rs
index d79e60e4ec..6cb4c29088 100644
--- a/backend/src/mock/connection_constraint_checker.rs
+++ b/backend/src/mock/connection_constraint_checker.rs
@@ -391,7 +391,7 @@ fn fmt_subset_error<F: fmt::Display>(
 ) -> fmt::Result {
     writeln!(
         f,
-        "  The following tuples appear in {machine2}, but not in {machine1}:"
+        "  The following tuples appear in {machine1}, but not in {machine2}:"
     )?;
     for tuple in not_in_machine2.iter().take(MAX_TUPLES) {
         writeln!(f, "    {tuple}")?;
diff --git a/pipeline/tests/mock_backend.rs b/pipeline/tests/mock_backend.rs
new file mode 100644
index 0000000000..0ce12a845b
--- /dev/null
+++ b/pipeline/tests/mock_backend.rs
@@ -0,0 +1,65 @@
+use powdr_number::GoldilocksField;
+use powdr_pipeline::test_util::{make_simple_prepared_pipeline, test_mock_backend};
+
+fn col<const N: usize>(name: &str, values: [u64; N]) -> (String, Vec<GoldilocksField>) {
+    (
+        name.to_string(),
+        values.iter().map(|&x| GoldilocksField::from(x)).collect(),
+    )
+}
+
+fn init_logger() {
+    env_logger::builder().is_test(true).init();
+}
+
+#[test]
+#[should_panic(expected = "Constraint check failed")]
+fn fibonacci_wrong_initialization() {
+    // Initializes y with 2 instead of 1
+    // -> fails `ISLAST * (y' - 1) = 0;` in the last row
+    init_logger();
+    let f = "pil/fibonacci.pil";
+    let pipeline = make_simple_prepared_pipeline::<GoldilocksField>(f);
+    let pipeline = pipeline.set_witness(vec![
+        // This would be the correct witness:
+        // col("Fibonacci::x", [1, 1, 2, 3]),
+        // col("Fibonacci::y", [1, 2, 3, 5]),
+        // This satisfies the constraints, except the initialization of y:
+        col("Fibonacci::x", [1, 2, 3, 5]),
+        col("Fibonacci::y", [2, 3, 5, 8]),
+    ]);
+    test_mock_backend(pipeline);
+}
+
+#[test]
+#[should_panic(expected = "Constraint check failed")]
+fn block_to_block_wrong_connection() {
+    // Within main_arith, the only constraint is `z = x + y`
+    // So, if we multiply all columns with a constant, the constraint
+    // should still be satisfied, but the connection argument should fail.
+    init_logger();
+    let f = "asm/block_to_block.asm";
+    let pipeline = make_simple_prepared_pipeline::<GoldilocksField>(f);
+
+    // Get the correct witness
+    let witness = pipeline.witness().unwrap();
+
+    // Multiply all values in main_arith with 42
+    let witness = witness
+        .iter()
+        .map(|(name, values)| {
+            if name.starts_with("main_arith") {
+                let values = values
+                    .iter()
+                    .map(|x| *x * GoldilocksField::from(42))
+                    .collect();
+                (name.clone(), values)
+            } else {
+                (name.clone(), values.clone())
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let pipeline = pipeline.set_witness(witness);
+    test_mock_backend(pipeline);
+}
diff --git a/pipeline/tests/pil.rs b/pipeline/tests/pil.rs
index a54859d562..d5a683c0ee 100644
--- a/pipeline/tests/pil.rs
+++ b/pipeline/tests/pil.rs
@@ -1,4 +1,3 @@
-#[cfg(feature = "halo2")]
 use powdr_number::GoldilocksField;
 use powdr_pipeline::{
     test_util::{

From d2e110f177757d24c6eedb7fbcf48e8b168858ac Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Mon, 2 Dec 2024 18:20:27 +0100
Subject: [PATCH 19/57] remove log init from test (#2185)

this used to break nightly
---
 pipeline/tests/mock_backend.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline/tests/mock_backend.rs b/pipeline/tests/mock_backend.rs
index 0ce12a845b..97e6701042 100644
--- a/pipeline/tests/mock_backend.rs
+++ b/pipeline/tests/mock_backend.rs
@@ -9,7 +9,7 @@ fn col<const N: usize>(name: &str, values: [u64; N]) -> (String, Vec<GoldilocksF
 }
 
 fn init_logger() {
-    env_logger::builder().is_test(true).init();
+    env_logger::builder().is_test(true);
 }
 
 #[test]

From f11cf21a11178d20764f96a075bab90565fa4aee Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Tue, 3 Dec 2024 11:17:54 +0100
Subject: [PATCH 20/57] Revert "Parallelize calls to prover functions (#2176)"
 (#2188)

This reverts commit d980c68f89386160e3ffd6b2ce4e62f26c045cdf from
https://github.com/powdr-labs/powdr/pull/2176
---
 executor/src/witgen/processor.rs       | 31 +++++---------------------
 executor/src/witgen/query_processor.rs | 12 +++++-----
 2 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/executor/src/witgen/processor.rs b/executor/src/witgen/processor.rs
index 24279f0968..74fe04280a 100644
--- a/executor/src/witgen/processor.rs
+++ b/executor/src/witgen/processor.rs
@@ -4,7 +4,6 @@ use powdr_ast::analyzed::PolynomialType;
 use powdr_ast::analyzed::{AlgebraicExpression as Expression, AlgebraicReference, PolyID};
 
 use powdr_number::{DegreeType, FieldElement};
-use rayon::iter::{ParallelBridge, ParallelIterator};
 
 use crate::witgen::affine_expression::AlgebraicVariable;
 use crate::witgen::data_structures::mutable_state::MutableState;
@@ -221,7 +220,7 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> Processor<'a, 'c, T, Q> {
     }
 
     pub fn process_queries(&mut self, row_index: usize) -> Result<bool, EvalError<T>> {
-        let query_processor = QueryProcessor::new(
+        let mut query_processor = QueryProcessor::new(
             self.fixed_data,
             self.mutable_state.query_callback(),
             self.size,
@@ -239,33 +238,15 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> Processor<'a, 'c, T, Q> {
         );
         let mut updates = EvalValue::complete(vec![]);
 
-        self.parts
-            .prover_functions
-            .iter()
-            .enumerate()
-            // Run all prover functions in parallel
-            .par_bridge()
-            .filter_map(|(i, fun)| {
-                if !self.processed_prover_functions.has_run(row_index, i) {
-                    query_processor
-                        .process_prover_function(&row_pair, fun)
-                        .map(|result| Some((result, i)))
-                        .transpose()
-                } else {
-                    // Skip already processed functions
-                    None
-                }
-            })
-            // Fail if any of the prover functions failed
-            .collect::<Result<Vec<_>, EvalError<T>>>()?
-            // Combine results
-            .into_iter()
-            .for_each(|(r, i)| {
+        for (i, fun) in self.parts.prover_functions.iter().enumerate() {
+            if !self.processed_prover_functions.has_run(row_index, i) {
+                let r = query_processor.process_prover_function(&row_pair, fun)?;
                 if r.is_complete() {
                     updates.combine(r);
                     self.processed_prover_functions.mark_as_run(row_index, i);
                 }
-            });
+            }
+        }
 
         for poly_id in &self.prover_query_witnesses {
             if let Some(r) = query_processor.process_query(&row_pair, poly_id) {
diff --git a/executor/src/witgen/query_processor.rs b/executor/src/witgen/query_processor.rs
index 7847502bd1..1e0d383fe6 100644
--- a/executor/src/witgen/query_processor.rs
+++ b/executor/src/witgen/query_processor.rs
@@ -33,9 +33,9 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
         }
     }
 
-    pub fn process_prover_function(
-        &self,
-        rows: &RowPair<'_, 'a, T>,
+    pub fn process_prover_function<'c>(
+        &'c mut self,
+        rows: &'c RowPair<'c, 'a, T>,
         fun: &'a Expression,
     ) -> EvalResult<'a, T> {
         let arguments = vec![Arc::new(Value::Integer(BigInt::from(u64::from(
@@ -77,7 +77,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     /// Panics if the column does not have a query attached.
     /// @returns None if the value for that column is already known.
     pub fn process_query(
-        &self,
+        &mut self,
         rows: &RowPair<'_, 'a, T>,
         poly_id: &PolyID,
     ) -> Option<EvalResult<'a, T>> {
@@ -91,7 +91,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     }
 
     fn process_witness_query(
-        &self,
+        &mut self,
         query: &'a Expression,
         poly: &'a AlgebraicReference,
         rows: &RowPair<'_, 'a, T>,
@@ -129,7 +129,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     }
 
     fn interpolate_query(
-        &self,
+        &mut self,
         query: &'a Expression,
         rows: &RowPair<'_, 'a, T>,
     ) -> Result<String, EvalError> {

From b2c0812f8634be84ecd8fd4d10652cd2c3709ae2 Mon Sep 17 00:00:00 2001
From: Noisy <125606576+donatik27@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:51:53 +0100
Subject: [PATCH 21/57] Fix syntax errors in patterns.md documentation (#2186)

## Changes in `book/src/pil/patterns.md`:

1. Fixed array pattern syntax:
- Old: `[a, b, c`]
- New: `[a, b, c]`
Removed extra backtick that was causing incorrect code formatting.

- Ensuring proper Markdown rendering

The fixes help prevent confusion for readers learning about PIL patterns
and maintain documentation quality standards.
---
 book/src/pil/patterns.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/book/src/pil/patterns.md b/book/src/pil/patterns.md
index 2ee173b47d..d18f2a8a06 100644
--- a/book/src/pil/patterns.md
+++ b/book/src/pil/patterns.md
@@ -11,7 +11,7 @@ A pattern is built up in from the following components:
 - `-k` - for a literal number `k`, matches the exact negated number, either as an `int` or a `fe`
 - `"text"` - for a string literal, matches the exact string literal as a `string`
 - `(a, b, c)` - for a tuple, matches a tuple-typed value if all the components match
-- `[a, b, c`] - for an array, matches array values of exactly the same length if all the components match
+- `[a, b, c]` - for an array, matches array values of exactly the same length if all the components match
 - `[a, .., b, c]` - matches an array that has an initial segment of `a` and ends in `b, c`. The omitted part can be empty.
 - `X::Y(a, b)` - for an enum variant `X::Y`, matches that enum variant if all the enum fields match.
 
@@ -74,4 +74,4 @@ The following patterns are refutable:
 - tuple patterns that have refutable components
 - array patterns that are not `[..]`.
 
-Variable patterns and `_` are always irrefutable.
\ No newline at end of file
+Variable patterns and `_` are always irrefutable.

From a124dc6265660356c325c4634bdb2993b0afb18c Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Tue, 3 Dec 2024 18:32:45 +0100
Subject: [PATCH 22/57] update CI ubuntu (#2187)

---
 .github/workflows/build-cache.yml   |  4 ++--
 .github/workflows/dead-links.yml    |  2 +-
 .github/workflows/deploy-book.yml   |  2 +-
 .github/workflows/nightly-tests.yml | 29 +++++++++++++++--------------
 .github/workflows/pr-tests.yml      | 18 +++++++++---------
 riscv/benches/executor_benchmark.rs | 21 +--------------------
 6 files changed, 29 insertions(+), 47 deletions(-)

diff --git a/.github/workflows/build-cache.yml b/.github/workflows/build-cache.yml
index ba8f78e3cb..bde8459c9c 100644
--- a/.github/workflows/build-cache.yml
+++ b/.github/workflows/build-cache.yml
@@ -9,7 +9,7 @@ env:
 
 jobs:
   build:
-    runs-on: warp-ubuntu-2204-x64-4x
+    runs-on: warp-ubuntu-2404-x64-4x
 
     steps:
     - uses: actions/checkout@v4
@@ -18,7 +18,7 @@ jobs:
 
     ##### The block below is shared between cache build and PR build workflows #####
     - name: Install EStarkPolygon prover dependencies
-      run: sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
+      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
     - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
       run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
     - name: Install Rust toolchain 1.81 (stable)
diff --git a/.github/workflows/dead-links.yml b/.github/workflows/dead-links.yml
index 72175f93d8..13d248eeda 100644
--- a/.github/workflows/dead-links.yml
+++ b/.github/workflows/dead-links.yml
@@ -2,7 +2,7 @@ name: Check markdown links
 on: [pull_request, merge_group]
 jobs:
   markdown-link-check:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
       - uses: gaurav-nelson/github-action-markdown-link-check@v1
diff --git a/.github/workflows/deploy-book.yml b/.github/workflows/deploy-book.yml
index 548e32dc8d..12e577e754 100644
--- a/.github/workflows/deploy-book.yml
+++ b/.github/workflows/deploy-book.yml
@@ -8,7 +8,7 @@ on:
 
 jobs:
   deploy:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     permissions:
       contents: write  # To push a branch 
       pull-requests: write  # To create a PR from that branch
diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml
index 7295bf4583..fca02ec059 100644
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -11,7 +11,7 @@ env:
 
 jobs:
   check_if_needs_running:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     outputs:
       status: ${{ steps.count.outputs.status }}
 
@@ -43,7 +43,7 @@ jobs:
           args: '--all-targets'
 
   test_release:
-    runs-on: ubuntu-22.04
+    runs-on: warp-ubuntu-2404-x64-4x
     needs: check_if_needs_running
     if: needs.check_if_needs_running.outputs.status > 0
 
@@ -51,14 +51,17 @@ jobs:
     - uses: actions/checkout@v4
       with:
         submodules: recursive
-    - name: ⚡ Cache rust
-      uses: actions/cache@v4
+    - name: ⚡ Restore rust cache
+      id: cache
+      uses: WarpBuilds/cache/restore@v1
       with:
         path: |
-          ~/.cargo/registry
-          ~/.cargo/git
-          target
-        key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.toml') }}
+          ~/.cargo/registry/index/
+          ~/.cargo/registry/cache/
+          ~/.cargo/git/db/
+          target/
+          Cargo.lock
+        key: ${{ runner.os }}-cargo-nightly-tests
     - name: ⚡ Cache nodejs
       uses: actions/cache@v4
       with:
@@ -74,9 +77,9 @@ jobs:
     - name: Install riscv target
       run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
     - name: Install test dependencies
-      run: sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
     - name: Install EStarkPolygon prover dependencies
-      run: sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
+      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc uuid-dev build-essential cmake pkg-config git
     - name: Install pilcom
       run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
     - name: Check without Halo2
@@ -84,8 +87,6 @@ jobs:
     - name: Build
       run: cargo build --all --release --all-features
     - name: Run tests
-      # Number threads is set to 1 because the runner does not have enough memeory for more.
-      run: PILCOM=$(pwd)/pilcom/ cargo test --all --release --verbose --all-features -- --include-ignored --nocapture --test-threads=1
+      run: PILCOM=$(pwd)/pilcom/ cargo test --all --release --verbose --all-features -- --include-ignored --nocapture
     - name: Run benchmarks
-      run: cargo bench
-      working-directory: compiler
+      run: cargo bench --workspace --all-features
diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index a3f5760b34..537ed488bd 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -20,7 +20,7 @@ env:
 
 jobs:
   build:
-    runs-on: warp-ubuntu-2204-x64-8x
+    runs-on: warp-ubuntu-2404-x64-8x
 
     steps:
     - uses: actions/checkout@v4
@@ -43,7 +43,7 @@ jobs:
 
     ##### The block below is shared between cache build and PR build workflows #####
     - name: Install EStarkPolygon prover dependencies
-      run: sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
+      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
     - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
       run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
     - name: Install Rust toolchain 1.81 (stable)
@@ -77,7 +77,7 @@ jobs:
 
   test_quick:
     needs: build
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     strategy:
       matrix:
         test:
@@ -107,7 +107,7 @@ jobs:
     - name: Install riscv target
       run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
     - name: Install test dependencies
-      run: sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
     - name: Install pilcom
       run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
     - uses: taiki-e/install-action@nextest
@@ -118,7 +118,7 @@ jobs:
         POWDR_STD: ${{ github.workspace }}/std/
 
   run_examples:
-    runs-on: warp-ubuntu-2204-x64-4x
+    runs-on: warp-ubuntu-2404-x64-4x
 
     steps:
     - uses: actions/checkout@v4
@@ -142,7 +142,7 @@ jobs:
 
   test_estark_polygon:
     needs: build
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@v4
       with:
@@ -168,7 +168,7 @@ jobs:
     - name: Install pilcom
       run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
     - name: Install EStarkPolygon prover system dependency
-      run: sudo apt-get install -y nlohmann-json3-dev
+      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev
     - uses: taiki-e/install-action@nextest
     - name: Unpack EStarkPolygon built dependencies
       run: tar --zstd -xf pil-stark-prover-deps.tar.zst
@@ -191,7 +191,7 @@ jobs:
         - "7"
         - "8"
     needs: build
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
 
     steps:
     - uses: actions/checkout@v4
@@ -210,7 +210,7 @@ jobs:
     - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
       run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
     - name: Install test dependencies
-      run: sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
     - name: Install nightly-2024-08-01
       run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
     - name: Install std source
diff --git a/riscv/benches/executor_benchmark.rs b/riscv/benches/executor_benchmark.rs
index 593f2b861b..7d690c04ee 100644
--- a/riscv/benches/executor_benchmark.rs
+++ b/riscv/benches/executor_benchmark.rs
@@ -1,9 +1,7 @@
 use ::powdr_pipeline::Pipeline;
 use powdr_number::GoldilocksField;
 
-use powdr_riscv::{
-    compile_rust_crate_to_riscv, continuations::bootloader::default_input, elf, CompilerOptions,
-};
+use powdr_riscv::{compile_rust_crate_to_riscv, elf, CompilerOptions};
 
 use criterion::{criterion_group, criterion_main, Criterion};
 use mktemp::Temp;
@@ -27,23 +25,6 @@ fn executor_benchmark(c: &mut Criterion) {
     group.bench_function("keccak", |b| {
         b.iter(|| pipeline.clone().compute_witness().unwrap())
     });
-
-    // The first chunk of `many_chunks` with bootloader
-    let executable =
-        compile_rust_crate_to_riscv("./tests/riscv_data/many_chunks/Cargo.toml", &tmp_dir, None);
-    let options = options.with_continuations();
-    let contents = elf::translate(&executable, options);
-    let mut pipeline = Pipeline::<T>::default().from_asm_string(contents, None);
-    pipeline.compute_optimized_pil().unwrap();
-    pipeline.compute_fixed_cols().unwrap();
-
-    let pipeline = pipeline.add_external_witness_values(vec![(
-        "main_bootloader_inputs::value".to_string(),
-        default_input(&[63, 64, 65]),
-    )]);
-    group.bench_function("many_chunks_chunk_0", |b| {
-        b.iter(|| pipeline.clone().compute_witness().unwrap())
-    });
     group.finish();
 }
 

From c9d9e3f4eac203a6d525218aa57f2261d9de4173 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Wed, 4 Dec 2024 10:33:54 +0100
Subject: [PATCH 23/57] Fix ci again (#2193)

The new Ubuntu used in CI now requires new packages for estark. This is
already in nightly but broke the normal cache last night
---
 .github/workflows/build-cache.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-cache.yml b/.github/workflows/build-cache.yml
index bde8459c9c..e73491d428 100644
--- a/.github/workflows/build-cache.yml
+++ b/.github/workflows/build-cache.yml
@@ -18,7 +18,7 @@ jobs:
 
     ##### The block below is shared between cache build and PR build workflows #####
     - name: Install EStarkPolygon prover dependencies
-      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
+      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc uuid-dev build-essential cmake pkg-config git
     - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
       run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
     - name: Install Rust toolchain 1.81 (stable)

From 06d7b6b22a934f3f995383492d9af67f41d405f5 Mon Sep 17 00:00:00 2001
From: Leandro Pacheco <contact@leandropacheco.com>
Date: Wed, 4 Dec 2024 06:34:59 -0300
Subject: [PATCH 24/57] improve demangle of symbol names in the profiler
 (#2192)

---
 riscv-executor/src/profiler.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/riscv-executor/src/profiler.rs b/riscv-executor/src/profiler.rs
index e6ba310800..f9cae4bfc4 100644
--- a/riscv-executor/src/profiler.rs
+++ b/riscv-executor/src/profiler.rs
@@ -360,4 +360,11 @@ fn format_function_name(name: &str) -> String {
     } else {
         format!("{}", demangle(name))
     }
+    // no sure why demangle doesn't properly demangle these
+    .replace("$LT$", "<")
+    .replace("$GT$", ">")
+    .replace("$RF$", "&")
+    .replace("$C$", ",")
+    .replace("$u20$", " ")
+    .replace("_dot__dot_", "::")
 }

From b96f32ec6bf6eaa0eb073c366be648dd8313d9c8 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Wed, 4 Dec 2024 12:03:04 +0100
Subject: [PATCH 25/57] Lower constraint degree for 256-Bit arithmetic machine
 (#2110)

This PR reduces the constraint degree of the 256-Bit arithmetic machines
from 4 to 3, making it possible to create Plonky3 proofs.

As explained in the comments, the current way to do it is not optimal,
because it adds roughly 256 witness columns. There would be an
alternative that only adds 5, but witgen doesn't currently work for
that.

This is a comparison for `test_data/std/arith256_memory_large_test.asm`:
| Metric                        | `main` | `fix-arith` |
|-------------------------------|--------|-------------|
| Fixed columns (arith)         | 32     | 32          |
| Witness columns (arith)       | 195    | 443         |
| Number of constraints (arith) | 234    | 482         |
| Max constraint degree         | 4      | 3           |
| Time witgen (BN254)           | 12.18s | 22.17s      |
| Time witgen (GL)              | 5.24s  | 7.26s       |
| Halo2-Composite proof time    | 0.91s  | 1.65s       |
| Plonky3 proof time            | N/A    | 0.38s       |
---
 pipeline/tests/powdr_std.rs                  | 10 ++------
 std/machines/large_field/arith.asm           | 25 +++++++++++++-----
 std/machines/large_field/arith256_memory.asm | 27 ++++++++++++++------
 3 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/pipeline/tests/powdr_std.rs b/pipeline/tests/powdr_std.rs
index dd385488ed..97ed8a50e3 100644
--- a/pipeline/tests/powdr_std.rs
+++ b/pipeline/tests/powdr_std.rs
@@ -129,20 +129,14 @@ fn arith_small_test() {
 #[ignore = "Too slow"]
 fn arith_large_test() {
     let f = "std/arith_large_test.asm";
-    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline);
-    // TODO We can't use P3 yet for this test because of degree 4 constraints.
-    //test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn arith256_memory_large_test() {
     let f = "std/arith256_memory_large_test.asm";
-    let pipeline: Pipeline<GoldilocksField> = make_simple_prepared_pipeline(f);
-    test_mock_backend(pipeline);
-    // TODO We can't use P3 yet for this test because of degree 4 constraints.
-    //test_plonky3_with_backend_variant::<BabyBearField>(f, vec![], BackendVariant::Monolithic);
+    regular_test_gl(f, &[]);
 }
 
 #[test]
diff --git a/std/machines/large_field/arith.asm b/std/machines/large_field/arith.asm
index 21e58af807..e3d320600e 100644
--- a/std/machines/large_field/arith.asm
+++ b/std/machines/large_field/arith.asm
@@ -220,7 +220,15 @@ machine Arith with
     /// returns a(0) * b(0) + ... + a(n - 1) * b(n - 1)
     let dot_prod = |n, a, b| sum(n, |i| a(i) * b(i));
     /// returns |n| a(0) * b(n) + ... + a(n) * b(0)
-    let product = |a, b| |n| dot_prod(n + 1, a, |i| b(n - i));
+    let product = constr |a, b| constr |n| {
+        // TODO: To reduce the degree of the constraints, we materialize the intermediate result here.
+        // this introduces ~256 additional witness columns & constraints.
+        let product_res;
+        product_res = dot_prod(n + 1, a, |i| b(n - i));
+        product_res
+    };
+    // Same as `product`, but does not materialize the result. Use this to multiply by constants (like `p`).
+    let product_inline = |a, b| |n| dot_prod(n + 1, a, |i| b(n - i));
     /// Converts array to function, extended by zeros.
     let array_as_fun: expr[] -> (int -> expr) = |arr| |i| if 0 <= i && i < array::len(arr) {
         arr[i]
@@ -241,7 +249,7 @@ machine Arith with
     let q2f = array_as_fun(q2);
 
     // Defined for arguments from 0 to 31 (inclusive)
-    let eq0 = |nr|
+    let eq0 = constr |nr|
         product(x1f, y1f)(nr)
         + x2f(nr)
         - shift_right(y2f, 16)(nr)
@@ -257,9 +265,9 @@ machine Arith with
 
     // The "- 4 * shift_right(p, 16)" effectively subtracts 4 * (p << 16 * 16) = 2 ** 258 * p
     // As a result, the term computes `(x - 2 ** 258) * p`.
-    let product_with_p = |x| |nr| product(p, x)(nr) - 4 * shift_right(p, 16)(nr);
+    let product_with_p = |x| |nr| product_inline(p, x)(nr) - 4 * shift_right(p, 16)(nr);
 
-    let eq1 = |nr| product(sf, x2f)(nr) - product(sf, x1f)(nr) - y2f(nr) + y1f(nr) + product_with_p(q0f)(nr);
+    let eq1 = constr |nr| product(sf, x2f)(nr) - product(sf, x1f)(nr) - y2f(nr) + y1f(nr) + product_with_p(q0f)(nr);
 
     /*******
     *
@@ -267,7 +275,7 @@ machine Arith with
     *
     *******/
 
-    let eq2 = |nr| 2 * product(sf, y1f)(nr) - 3 * product(x1f, x1f)(nr) + product_with_p(q0f)(nr);
+    let eq2 = constr |nr| 2 * product(sf, y1f)(nr) - 3 * product(x1f, x1f)(nr) + product_with_p(q0f)(nr);
 
     /*******
     *
@@ -278,7 +286,7 @@ machine Arith with
     // If we're doing the ec_double operation (selEq[2] == 1), x2 is so far unconstrained and should be set to x1
     array::new(16, |i| selEq[2] * (x1[i] - x2[i]) = 0);
 
-    let eq3 = |nr| product(sf, sf)(nr) - x1f(nr) - x2f(nr) - x3f(nr) + product_with_p(q1f)(nr);
+    let eq3 = constr |nr| product(sf, sf)(nr) - x1f(nr) - x2f(nr) - x3f(nr) + product_with_p(q1f)(nr);
 
 
     /*******
@@ -287,7 +295,7 @@ machine Arith with
     *
     *******/
 
-    let eq4 = |nr| product(sf, x1f)(nr) - product(sf, x3f)(nr) - y1f(nr) - y3f(nr) + product_with_p(q2f)(nr);
+    let eq4 = constr |nr| product(sf, x1f)(nr) - product(sf, x3f)(nr) - y1f(nr) - y3f(nr) + product_with_p(q2f)(nr);
 
 
     /*******
@@ -333,6 +341,9 @@ machine Arith with
     *
     *******/
     
+    // TODO: To reduce the degree of the constraints, these intermediate columns should be materialized.
+    // However, witgen doesn't work currently if we do, likely because for some operations, not all inputs are
+    // available.
     col eq0_sum = sum(32, |i| eq0(i) * CLK32[i]);
     col eq1_sum = sum(32, |i| eq1(i) * CLK32[i]);
     col eq2_sum = sum(32, |i| eq2(i) * CLK32[i]);
diff --git a/std/machines/large_field/arith256_memory.asm b/std/machines/large_field/arith256_memory.asm
index 660479903f..761d19e956 100644
--- a/std/machines/large_field/arith256_memory.asm
+++ b/std/machines/large_field/arith256_memory.asm
@@ -351,7 +351,15 @@ machine Arith256Memory(mem: Memory) with
     /// returns a(0) * b(0) + ... + a(n - 1) * b(n - 1)
     let dot_prod = |n, a, b| sum(n, |i| a(i) * b(i));
     /// returns |n| a(0) * b(n) + ... + a(n) * b(0)
-    let product = |a, b| |n| dot_prod(n + 1, a, |i| b(n - i));
+    let product = constr |a, b| constr |n| {
+        // TODO: To reduce the degree of the constraints, we materialize the intermediate result here.
+        // this introduces ~256 additional witness columns & constraints.
+        let product_res;
+        product_res = dot_prod(n + 1, a, |i| b(n - i));
+        product_res
+    };
+    // Same as `product`, but does not materialize the result. Use this to multiply by constants (like `p`).
+    let product_inline = |a, b| |n| dot_prod(n + 1, a, |i| b(n - i));
     /// Converts array to function, extended by zeros.
     let array_as_fun: expr[] -> (int -> expr) = |arr| |i| if 0 <= i && i < array::len(arr) {
         arr[i]
@@ -372,7 +380,7 @@ machine Arith256Memory(mem: Memory) with
     let q2f = array_as_fun(q2);
 
     // Defined for arguments from 0 to 31 (inclusive)
-    let eq0 = |nr|
+    let eq0 = constr |nr|
         product(x1f, y1f)(nr)
         + x2f(nr)
         - shift_right(y2f, 16)(nr)
@@ -388,9 +396,9 @@ machine Arith256Memory(mem: Memory) with
 
     // The "- 4 * shift_right(p, 16)" effectively subtracts 4 * (p << 16 * 16) = 2 ** 258 * p
     // As a result, the term computes `(x - 2 ** 258) * p`.
-    let product_with_p = |x| |nr| product(p, x)(nr) - 4 * shift_right(p, 16)(nr);
+    let product_with_p = |x| |nr| product_inline(p, x)(nr) - 4 * shift_right(p, 16)(nr);
 
-    let eq1 = |nr| product(sf, x2f)(nr) - product(sf, x1f)(nr) - y2f(nr) + y1f(nr) + product_with_p(q0f)(nr);
+    let eq1 = constr |nr| product(sf, x2f)(nr) - product(sf, x1f)(nr) - y2f(nr) + y1f(nr) + product_with_p(q0f)(nr);
 
     /*******
     *
@@ -398,7 +406,7 @@ machine Arith256Memory(mem: Memory) with
     *
     *******/
 
-    let eq2 = |nr| 2 * product(sf, y1f)(nr) - 3 * product(x1f, x1f)(nr) + product_with_p(q0f)(nr);
+    let eq2 = constr |nr| 2 * product(sf, y1f)(nr) - 3 * product(x1f, x1f)(nr) + product_with_p(q0f)(nr);
 
     /*******
     *
@@ -409,7 +417,7 @@ machine Arith256Memory(mem: Memory) with
     // If we're doing the ec_double operation, x2 is so far unconstrained and should be set to x1
     array::new(16, |i| is_ec_double * (x1[i] - x2[i]) = 0);
 
-    let eq3 = |nr| product(sf, sf)(nr) - x1f(nr) - x2f(nr) - x3f(nr) + product_with_p(q1f)(nr);
+    let eq3 = constr |nr| product(sf, sf)(nr) - x1f(nr) - x2f(nr) - x3f(nr) + product_with_p(q1f)(nr);
 
 
     /*******
@@ -418,7 +426,7 @@ machine Arith256Memory(mem: Memory) with
     *
     *******/
 
-    let eq4 = |nr| product(sf, x1f)(nr) - product(sf, x3f)(nr) - y1f(nr) - y3f(nr) + product_with_p(q2f)(nr);
+    let eq4 = constr |nr| product(sf, x1f)(nr) - product(sf, x3f)(nr) - y1f(nr) - y3f(nr) + product_with_p(q2f)(nr);
 
 
     /*******
@@ -471,7 +479,10 @@ machine Arith256Memory(mem: Memory) with
     * Putting everything together
     *
     *******/
-    
+
+    // TODO: To reduce the degree of the constraints, these intermediate columns should be materialized.
+    // However, witgen doesn't work currently if we do, likely because for some operations, not all inputs are
+    // available.
     col eq0_sum = sum(32, |i| eq0(i) * CLK32[i]);
     col eq1_sum = sum(32, |i| eq1(i) * CLK32[i]);
     col eq2_sum = sum(32, |i| eq2(i) * CLK32[i]);

From 4c5caae8f63871a0c4e48eb008e9526de195993d Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Wed, 4 Dec 2024 12:24:10 +0100
Subject: [PATCH 26/57] Better error message if things go wrong in machine
 extraction (#2142)

I tried to debug why the RISC-V machine doesn't work with `--linker-mode
bus` yet, and this adds helpful error messages.
---
 .../src/witgen/machines/machine_extractor.rs  | 36 +++++++++++++++----
 executor/src/witgen/mod.rs                    | 24 +++++++++----
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/executor/src/witgen/machines/machine_extractor.rs b/executor/src/witgen/machines/machine_extractor.rs
index 1da28230f0..5465dd0451 100644
--- a/executor/src/witgen/machines/machine_extractor.rs
+++ b/executor/src/witgen/machines/machine_extractor.rs
@@ -13,6 +13,7 @@ use super::fixed_lookup_machine::FixedLookup;
 use super::sorted_witness_machine::SortedWitnesses;
 use super::FixedData;
 use super::KnownMachine;
+use super::Machine;
 use crate::witgen::machines::dynamic_machine::DynamicMachine;
 use crate::witgen::machines::second_stage_machine::SecondStageMachine;
 use crate::witgen::machines::Connection;
@@ -179,8 +180,6 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
                 prover_functions.iter().map(|&(_, pf)| pf).collect(),
             );
 
-            log_extracted_machine(&machine_parts);
-
             for (i, pf) in &prover_functions {
                 if !extracted_prover_functions.insert(*i) {
                     log::warn!("Prover function was assigned to multiple machines:\n{pf}");
@@ -317,9 +316,25 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
     }
 }
 
-fn log_extracted_machine<T: FieldElement>(parts: &MachineParts<'_, T>) {
-    log::trace!(
-        "\nExtracted a machine with the following witnesses:\n{}\n identities:\n{}\n connecting identities:\n{}\n and prover functions:\n{}",
+fn extract_namespace(name: &str) -> &str {
+    name.split("::").next().unwrap()
+}
+
+fn log_extracted_machine<T: FieldElement>(name: &str, parts: &MachineParts<'_, T>) {
+    let namespaces = parts
+        .witnesses
+        .iter()
+        .map(|s| extract_namespace(parts.column_name(s)))
+        .collect::<BTreeSet<_>>();
+    let exactly_one_namespace = namespaces.len() == 1;
+    let log_level = if exactly_one_namespace {
+        log::Level::Trace
+    } else {
+        log::Level::Warn
+    };
+    log::log!(
+        log_level,
+        "\nExtracted a machine {name} with the following witnesses:\n{}\n identities:\n{}\n connecting identities:\n{}\n and prover functions:\n{}",
         parts.witnesses
             .iter()
             .map(|s|parts.column_name(s))
@@ -335,6 +350,10 @@ fn log_extracted_machine<T: FieldElement>(parts: &MachineParts<'_, T>) {
             .iter()
             .format("\n")
     );
+    if !exactly_one_namespace {
+        log::warn!("The witnesses of the machine are in different namespaces: {namespaces:?}");
+        log::warn!("In theory, witgen ignores namespaces, but in practice, this often means that something has gone wrong with the machine extraction.");
+    }
 }
 
 fn suggest_machine_name<T: FieldElement>(parts: &MachineParts<'_, T>) -> String {
@@ -396,7 +415,7 @@ fn build_machine<'a, T: FieldElement>(
     machine_parts: MachineParts<'a, T>,
     name_with_type: impl Fn(&str) -> String,
 ) -> KnownMachine<'a, T> {
-    if let Some(machine) =
+    let machine = if let Some(machine) =
         SortedWitnesses::try_new(name_with_type("SortedWitness"), fixed_data, &machine_parts)
     {
         log::debug!("Detected machine: sorted witnesses / write-once memory");
@@ -453,7 +472,10 @@ fn build_machine<'a, T: FieldElement>(
             machine_parts.clone(),
             latch,
         ))
-    }
+    };
+
+    log_extracted_machine(machine.name(), &machine_parts);
+    machine
 }
 
 // This only discovers direct references in the expression
diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
index e1b59edadf..5640c38e8e 100644
--- a/executor/src/witgen/mod.rs
+++ b/executor/src/witgen/mod.rs
@@ -443,18 +443,28 @@ impl<'a, T: FieldElement> FixedData<'a, T> {
     fn common_degree_range<'b>(&self, ids: impl IntoIterator<Item = &'b PolyID>) -> DegreeRange {
         let ids: HashSet<_> = ids.into_iter().collect();
 
-        self.all_poly_symbols()
-            .flat_map(|symbol| symbol.array_elements().map(|(_, id)| (id, symbol.degree)))
-            // only keep the ones matching our set
-            .filter_map(|(id, degree)| ids.contains(&id).then_some(degree))
-            // get the common degree
+        // Iterator of (id, Option<DegreeRange>), with only the requested ids.
+        let filtered_ids_and_degrees = || {
+            self.all_poly_symbols()
+                .flat_map(|symbol| symbol.array_elements().map(|(_, id)| (id, symbol.degree)))
+                .filter_map(|(id, degree)| ids.contains(&id).then_some((id, degree)))
+        };
+
+        filtered_ids_and_degrees()
+            .map(|(_, degree_range)| degree_range)
             .unique()
             .exactly_one()
-            .unwrap_or_else(|_| panic!("expected all polynomials to have the same degree"))
+            .unwrap_or_else(|_| {
+                log::error!("The following columns have different degree ranges:");
+                for (id, degree) in filtered_ids_and_degrees() {
+                    log::error!("  {}: {:?}", self.column_name(&id), degree);
+                }
+                panic!("Expected all columns to have the same degree")
+            })
             .unwrap()
     }
 
-    /// Returns whether all polynomials have the same static degree.
+    /// Returns whether all columns have the same static degree.
     fn is_monolithic(&self) -> bool {
         match self
             .all_poly_symbols()

From 4e785d6de2c2f617b37a8687a50e2b37548facfc Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Wed, 4 Dec 2024 15:20:03 +0100
Subject: [PATCH 27/57] fix nightly again (#2195)

This PR moves nightly back to GH with Ubuntu 24.04 and the old settings.
The nightly test passed for this branch in here:
https://github.com/powdr-labs/powdr/actions/runs/12154934614
---
 .github/workflows/nightly-tests.yml | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml
index fca02ec059..f30cd1cd09 100644
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -43,7 +43,7 @@ jobs:
           args: '--all-targets'
 
   test_release:
-    runs-on: warp-ubuntu-2404-x64-4x
+    runs-on: ubuntu-24.04
     needs: check_if_needs_running
     if: needs.check_if_needs_running.outputs.status > 0
 
@@ -51,17 +51,14 @@ jobs:
     - uses: actions/checkout@v4
       with:
         submodules: recursive
-    - name: ⚡ Restore rust cache
-      id: cache
-      uses: WarpBuilds/cache/restore@v1
+    - name: ⚡ Cache rust
+      uses: actions/cache@v4
       with:
         path: |
-          ~/.cargo/registry/index/
-          ~/.cargo/registry/cache/
-          ~/.cargo/git/db/
-          target/
-          Cargo.lock
-        key: ${{ runner.os }}-cargo-nightly-tests
+          ~/.cargo/registry
+          ~/.cargo/git
+          target
+        key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.toml') }}
     - name: ⚡ Cache nodejs
       uses: actions/cache@v4
       with:
@@ -87,6 +84,7 @@ jobs:
     - name: Build
       run: cargo build --all --release --all-features
     - name: Run tests
-      run: PILCOM=$(pwd)/pilcom/ cargo test --all --release --verbose --all-features -- --include-ignored --nocapture
+      # Number threads is set to 1 because the runner does not have enough memeory for more.
+      run: PILCOM=$(pwd)/pilcom/ cargo test --all --release --verbose --all-features -- --include-ignored --nocapture --test-threads=1
     - name: Run benchmarks
       run: cargo bench --workspace --all-features

From 6897f9ba1f16d466f3b536544d6c2c20c4368372 Mon Sep 17 00:00:00 2001
From: Leandro Pacheco <contact@leandropacheco.com>
Date: Wed, 4 Dec 2024 15:04:41 -0300
Subject: [PATCH 28/57] don't generate memory machine traces executing in
 `Fast` mode (#2196)

these calls were missing the `ExecMode::Trace` check
---
 riscv-executor/src/lib.rs | 78 +++++++++++++--------------------------
 1 file changed, 25 insertions(+), 53 deletions(-)

diff --git a/riscv-executor/src/lib.rs b/riscv-executor/src/lib.rs
index d7d9f64965..8cfa8f2ca3 100644
--- a/riscv-executor/src/lib.rs
+++ b/riscv-executor/src/lib.rs
@@ -699,28 +699,30 @@ mod builder {
             self.set_next_pc().and(Some(st_line))
         }
 
-        pub(crate) fn set_mem(&mut self, addr: u32, val: u32) {
+        pub(crate) fn set_mem(&mut self, addr: u32, val: u32, step: u32, selector: u32) {
             if let ExecMode::Trace = self.mode {
                 self.trace.mem_ops.push(MemOperation {
                     row: self.trace.len,
                     kind: MemOperationKind::Write,
                     address: addr,
                 });
+                self.memory_machine.write(step, addr, val.into(), selector);
             }
 
             self.mem.insert(addr, val);
         }
 
-        pub(crate) fn get_mem(&mut self, addr: u32) -> u32 {
+        pub(crate) fn get_mem(&mut self, addr: u32, step: u32, selector: u32) -> u32 {
+            let val = *self.mem.get(&addr).unwrap_or(&0);
             if let ExecMode::Trace = self.mode {
                 self.trace.mem_ops.push(MemOperation {
                     row: self.trace.len,
                     kind: MemOperationKind::Read,
                     address: addr,
                 });
+                self.memory_machine.read(step, addr, val.into(), selector);
             }
-
-            *self.mem.get(&addr).unwrap_or(&0)
+            val
         }
 
         pub(crate) fn set_reg_mem(&mut self, addr: u32, val: Elem<F>) {
@@ -1021,9 +1023,11 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
     /// read register value, updating the register memory machine
     fn reg_read(&mut self, step_offset: u32, reg: u32, selector_idx: u32) -> Elem<F> {
         let val = self.proc.get_reg_mem(reg);
-        self.proc
-            .regs_machine
-            .read(self.step + step_offset, reg, val, selector_idx);
+        if let ExecMode::Trace = self.mode {
+            self.proc
+                .regs_machine
+                .read(self.step + step_offset, reg, val, selector_idx);
+        }
         val
     }
 
@@ -1141,10 +1145,7 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                 );
 
                 let addr = addr as u32;
-                self.proc.set_mem(addr, value.u());
-                self.proc
-                    .memory_machine
-                    .write(self.step + 3, addr, value, 1);
+                self.proc.set_mem(addr, value.u(), self.step + 3, 1);
 
                 set_col!(tmp1_col, addr1);
                 set_col!(tmp2_col, addr2);
@@ -1167,7 +1168,9 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
 
                 let addr = addr1.bin() + offset;
 
-                let val = self.proc.get_mem(addr as u32 & 0xfffffffc);
+                let val = self
+                    .proc
+                    .get_mem(addr as u32 & 0xfffffffc, self.step + 1, 0);
                 let rem = addr % 4;
 
                 self.reg_write(2, write_addr1, val.into(), 3);
@@ -1188,13 +1191,6 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                     Elem::from_u32_as_fe(((v as u64 >> 32) & 1) as u32)
                 );
 
-                self.proc.memory_machine.read(
-                    self.step + 1,
-                    addr as u32 & 0xfffffffc,
-                    val.into(),
-                    0,
-                );
-
                 Vec::new()
             }
             // TODO: update to witness generation for continuations
@@ -1892,23 +1888,9 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
 
                 let inputs = (0..12)
                     .map(|i| {
-                        let lo = self.proc.get_mem(input_ptr.u() + 8 * i);
-                        let hi = self.proc.get_mem(input_ptr.u() + 8 * i + 4);
-                        // memory reads of the poseidon machine
-                        if let ExecMode::Trace = self.mode {
-                            self.proc.memory_machine.read(
-                                self.step,
-                                input_ptr.u() + 8 * i,
-                                lo.into(),
-                                2,
-                            );
-                            self.proc.memory_machine.read(
-                                self.step,
-                                input_ptr.u() + 8 * i + 4,
-                                hi.into(),
-                                3,
-                            );
-                        }
+                        // step/selector of memory reads from the poseidon machine
+                        let lo = self.proc.get_mem(input_ptr.u() + 8 * i, self.step, 2);
+                        let hi = self.proc.get_mem(input_ptr.u() + 8 * i + 4, self.step, 3);
                         F::from(((hi as u64) << 32) | lo as u64)
                     })
                     .collect::<Vec<_>>();
@@ -1920,8 +1902,11 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                     let v = v.to_integer().try_into_u64().unwrap();
                     let hi = (v >> 32) as u32;
                     let lo = (v & 0xffffffff) as u32;
-                    self.proc.set_mem(output_ptr.u() + 8 * i as u32, lo);
-                    self.proc.set_mem(output_ptr.u() + 8 * i as u32 + 4, hi);
+                    // step/selector of memory writes from the poseidon machine
+                    self.proc
+                        .set_mem(output_ptr.u() + 8 * i as u32, lo, self.step + 1, 4);
+                    self.proc
+                        .set_mem(output_ptr.u() + 8 * i as u32 + 4, hi, self.step + 1, 5);
                     if let ExecMode::Trace = self.mode {
                         // split gl of the poseidon machine
                         self.proc.submachine("split_gl").add_operation(
@@ -1930,19 +1915,6 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                             Some(1),
                             &[],
                         );
-                        // memory writes of the poseidon machine
-                        self.proc.memory_machine.write(
-                            self.step + 1,
-                            output_ptr.u() + 8 * i as u32,
-                            lo.into(),
-                            4,
-                        );
-                        self.proc.memory_machine.write(
-                            self.step + 1,
-                            output_ptr.u() + 8 * i as u32 + 4,
-                            hi.into(),
-                            5,
-                        );
                     }
                 });
 
@@ -1982,7 +1954,7 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                 assert_eq!(input_ptr % 4, 0);
 
                 let inputs: [u64; 8] = (0..16)
-                    .map(|i| self.proc.get_mem(input_ptr + i * 4))
+                    .map(|i| self.proc.get_mem(input_ptr + i * 4, 0, 0)) // TODO: step/selector for poseidon2
                     .chunks(2)
                     .into_iter()
                     .map(|mut chunk| {
@@ -2001,7 +1973,7 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                 let output_ptr = self.proc.get_reg_mem(args[1].u()).u();
                 assert_eq!(output_ptr % 4, 0);
                 result.enumerate().for_each(|(i, v)| {
-                    self.proc.set_mem(output_ptr + i as u32 * 4, v);
+                    self.proc.set_mem(output_ptr + i as u32 * 4, v, 0, 0); // TODO: step/selector for poseidon2
                 });
 
                 vec![]

From 60860b731782e4f0779fa6a9d6662d76a1163e40 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Wed, 4 Dec 2024 21:57:53 +0100
Subject: [PATCH 29/57] Expose stdout/stderr in Session (#2189)

This PR:
- Makes explicit the notion that 0=stdin, 1=stdout, 2=stdout in the
QueryCallback's "FS"
- Exposes the outputs in Session
- Removes printing to stdout and stderr in the callback itself. This is
now the responsibility of the host if needed.
- Adds the Fibonacci test with stdout to CI using the write mechanism

The idea is that after this we should also expose the proof's publics
and make a stream mechanism for inputs and outputs
---
 .github/workflows/pr-tests.yml       | 10 +++++++++-
 examples/fibonacci/Cargo.toml        |  4 +++-
 examples/fibonacci/guest/src/main.rs |  9 ++++++---
 examples/fibonacci/src/main.rs       |  2 +-
 pipeline/src/lib.rs                  | 22 +++++++++++++++-------
 pipeline/src/pipeline.rs             |  5 +++--
 powdr-test/examples/fibonacci.rs     | 23 +++++++++++++++++++++++
 powdr/src/lib.rs                     | 10 ++++++++++
 riscv-executor/src/lib.rs            | 14 ++++++++------
 riscv-runtime/src/fmt.rs             |  2 +-
 std/prelude.asm                      |  2 +-
 11 files changed, 80 insertions(+), 23 deletions(-)
 create mode 100644 powdr-test/examples/fibonacci.rs

diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index 537ed488bd..b58a3778fb 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -137,8 +137,16 @@ jobs:
         key: ${{ runner.os }}-cargo-pr-tests
     - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
       run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
+    - name: Install nightly
+      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install std source
+      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install riscv target
+      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install test dependencies
+      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
     - name: Run examples
-      run: cargo run --example hello_world && cargo run --example sqrt_with_publics
+      run: cargo run --profile pr-tests --example hello_world && cargo run --profile pr-tests --example sqrt_with_publics && cargo run --profile pr-tests --example fibonacci
 
   test_estark_polygon:
     needs: build
diff --git a/examples/fibonacci/Cargo.toml b/examples/fibonacci/Cargo.toml
index 2ec2d494cd..8594722109 100644
--- a/examples/fibonacci/Cargo.toml
+++ b/examples/fibonacci/Cargo.toml
@@ -8,7 +8,9 @@ default = []
 simd = ["powdr/plonky3-simd"]
 
 [dependencies]
-powdr = { git = "https://github.com/powdr-labs/powdr", tag = "v0.1.2", features = ["plonky3"] }
+powdr = { git = "https://github.com/powdr-labs/powdr", tag = "v0.1.2", features = [
+  "plonky3",
+] }
 serde = { version = "1.0", default-features = false, features = [
   "alloc",
   "derive",
diff --git a/examples/fibonacci/guest/src/main.rs b/examples/fibonacci/guest/src/main.rs
index 18442e94a9..ffd15a725d 100644
--- a/examples/fibonacci/guest/src/main.rs
+++ b/examples/fibonacci/guest/src/main.rs
@@ -1,5 +1,5 @@
 use powdr_riscv_runtime;
-use powdr_riscv_runtime::io::read;
+use powdr_riscv_runtime::io::{read, write};
 
 fn fib(n: u32) -> u32 {
     if n <= 1 {
@@ -9,6 +9,9 @@ fn fib(n: u32) -> u32 {
 }
 
 fn main() {
-    let n: u32 = read(1);
-    let _ = fib(n);
+    // Read input from stdin.
+    let n: u32 = read(0);
+    let r = fib(n);
+    // Write result to stdout.
+    write(1, r);
 }
diff --git a/examples/fibonacci/src/main.rs b/examples/fibonacci/src/main.rs
index e49de9b8e3..2da28fa1cb 100644
--- a/examples/fibonacci/src/main.rs
+++ b/examples/fibonacci/src/main.rs
@@ -10,7 +10,7 @@ fn main() {
         .chunk_size_log2(18)
         .build()
         // Compute Fibonacci of 21 in the guest.
-        .write(1, &n);
+        .write(0, &n);
 
     // Fast dry run to test execution.
     session.run();
diff --git a/pipeline/src/lib.rs b/pipeline/src/lib.rs
index 7b8543fc5d..af51268068 100644
--- a/pipeline/src/lib.rs
+++ b/pipeline/src/lib.rs
@@ -34,6 +34,11 @@ impl HostContext {
         (ctx, cb)
     }
 
+    pub fn clear(&mut self) {
+        let mut fs = self.file_data.lock().unwrap();
+        fs.clear();
+    }
+
     pub fn read<T: DeserializeOwned>(&self, fd: u32) -> Result<T, String> {
         let fs = self.file_data.lock().unwrap();
         if let Some(data) = fs.get(&fd) {
@@ -58,8 +63,8 @@ impl HostContext {
                         .map_err(|e| format!("Invalid char to print: {e}"))?
                         as char;
                     match fd {
-                        // stdin, stdout and stderr are supported by the default callback
-                        0..=2 => return Err(format!("Unsupported file descriptor: {fd}")),
+                        // stdin cannot be used for Output
+                        0 => return Err(format!("Unsupported file descriptor: {fd}")),
                         _ => {
                             let mut map = fs.lock().unwrap();
                             map.entry(fd).or_default().push(byte as u8);
@@ -67,6 +72,10 @@ impl HostContext {
                     }
                     Ok(Some(0.into()))
                 }
+                "Clear" => {
+                    fs.lock().unwrap().clear();
+                    Ok(Some(0.into()))
+                }
                 _ => Err(format!("Unsupported query: {query}")),
             }
         })
@@ -174,15 +183,14 @@ pub fn handle_simple_queries_callback<'a, T: FieldElement>() -> impl QueryCallba
                 let fd = data[0]
                     .parse::<u32>()
                     .map_err(|e| format!("Invalid fd: {e}"))?;
+                if fd != 0 {
+                    return Err("Debug print requires output fd 0".to_string());
+                }
                 let byte = data[1]
                     .parse::<u8>()
                     .map_err(|e| format!("Invalid char to print: {e}"))?
                     as char;
-                match fd {
-                    1 => print!("{byte}"),
-                    2 => eprint!("{byte}"),
-                    _ => return Err(format!("Unsupported file descriptor: {fd}")),
-                }
+                print!("{byte}");
                 Ok(Some(0.into()))
             }
             "Hint" => {
diff --git a/pipeline/src/pipeline.rs b/pipeline/src/pipeline.rs
index 8b81f667f9..af64b8a022 100644
--- a/pipeline/src/pipeline.rs
+++ b/pipeline/src/pipeline.rs
@@ -191,8 +191,7 @@ where
             arguments: Arguments::default(),
             host_context: ctx,
         }
-        // We add the basic callback functionalities
-        // to support PrintChar and Hint.
+        // We add the basic callback functionalities to support PrintChar and Hint.
         .add_query_callback(Arc::new(handle_simple_queries_callback()))
         .add_query_callback(cb)
     }
@@ -974,6 +973,8 @@ impl<T: FieldElement> Pipeline<T> {
             return Ok(witness.clone());
         }
 
+        self.host_context.clear();
+
         let pil = self.compute_optimized_pil()?;
         let fixed_cols = self.compute_fixed_cols()?;
 
diff --git a/powdr-test/examples/fibonacci.rs b/powdr-test/examples/fibonacci.rs
new file mode 100644
index 0000000000..9dbc498fc7
--- /dev/null
+++ b/powdr-test/examples/fibonacci.rs
@@ -0,0 +1,23 @@
+use powdr::Session;
+
+fn main() {
+    env_logger::init();
+
+    let n = 11;
+    let mut session = Session::builder()
+        .guest_path("./examples/fibonacci/guest")
+        .out_path("powdr-target")
+        .build()
+        .write(0, &n);
+
+    // Fast dry run to test execution.
+    session.run();
+
+    let r: u32 = session.stdout();
+    assert_eq!(r, 89);
+
+    session.prove();
+
+    let r: u32 = session.stdout();
+    assert_eq!(r, 89);
+}
diff --git a/powdr/src/lib.rs b/powdr/src/lib.rs
index 816c38c033..0f49d5cdd9 100644
--- a/powdr/src/lib.rs
+++ b/powdr/src/lib.rs
@@ -191,6 +191,16 @@ impl Session {
 
         self.pipeline.export_verification_key(file).unwrap();
     }
+
+    pub fn stdout<S: serde::de::DeserializeOwned>(&self) -> S {
+        let host = self.pipeline.host_context();
+        host.read(1).unwrap()
+    }
+
+    pub fn stderr<S: serde::de::DeserializeOwned>(&self) -> S {
+        let host = self.pipeline.host_context();
+        host.read(2).unwrap()
+    }
 }
 
 fn pil_file_path(asm_name: &Path) -> PathBuf {
diff --git a/riscv-executor/src/lib.rs b/riscv-executor/src/lib.rs
index 8cfa8f2ca3..3a887d2f0d 100644
--- a/riscv-executor/src/lib.rs
+++ b/riscv-executor/src/lib.rs
@@ -2289,7 +2289,7 @@ enum ExecMode {
 pub fn execute_fast<F: FieldElement>(
     asm: &AnalysisASMFile,
     initial_memory: MemoryState,
-    inputs: &Callback<F>,
+    prover_ctx: &Callback<F>,
     bootloader_inputs: &[F],
     profiling: Option<ProfilerOptions>,
 ) -> usize {
@@ -2299,7 +2299,7 @@ pub fn execute_fast<F: FieldElement>(
         None,
         None,
         initial_memory,
-        inputs,
+        prover_ctx,
         bootloader_inputs,
         usize::MAX,
         ExecMode::Fast,
@@ -2315,7 +2315,7 @@ pub fn execute<F: FieldElement>(
     opt_pil: &Analyzed<F>,
     fixed: FixedColumns<F>,
     initial_memory: MemoryState,
-    inputs: &Callback<F>,
+    prover_ctx: &Callback<F>,
     bootloader_inputs: &[F],
     max_steps_to_execute: Option<usize>,
     profiling: Option<ProfilerOptions>,
@@ -2326,7 +2326,7 @@ pub fn execute<F: FieldElement>(
         Some(opt_pil),
         Some(fixed),
         initial_memory,
-        inputs,
+        prover_ctx,
         bootloader_inputs,
         max_steps_to_execute.unwrap_or(usize::MAX),
         ExecMode::Trace,
@@ -2346,7 +2346,7 @@ fn execute_inner<F: FieldElement>(
     opt_pil: Option<&Analyzed<F>>,
     fixed: Option<FixedColumns<F>>,
     initial_memory: MemoryState,
-    inputs: &Callback<F>,
+    prover_ctx: &Callback<F>,
     bootloader_inputs: &[F],
     max_steps_to_execute: usize,
     mode: ExecMode,
@@ -2408,10 +2408,12 @@ fn execute_inner<F: FieldElement>(
         .map(|v| Elem::try_from_fe_as_bin(v).unwrap_or(Elem::Field(*v)))
         .collect();
 
+    // We clear the QueryCallback's virtual FS before the execution.
+    (prover_ctx)("Clear").unwrap();
     let mut e = Executor {
         proc,
         label_map,
-        inputs,
+        inputs: prover_ctx,
         bootloader_inputs,
         fixed: fixed.unwrap_or_default(),
         program_cols,
diff --git a/riscv-runtime/src/fmt.rs b/riscv-runtime/src/fmt.rs
index 495d36417b..bd696817f6 100644
--- a/riscv-runtime/src/fmt.rs
+++ b/riscv-runtime/src/fmt.rs
@@ -37,6 +37,6 @@ fn print_prover_char(c: u8) {
     let mut value = c as u32;
     #[allow(unused_assignments)]
     unsafe {
-        ecall!(Syscall::Output, lateout("a0") value, in("a0") 1, in("a1") value);
+        ecall!(Syscall::Output, lateout("a0") value, in("a0") 0, in("a1") value);
     }
 }
diff --git a/std/prelude.asm b/std/prelude.asm
index ff1ae56751..14621f7b3c 100644
--- a/std/prelude.asm
+++ b/std/prelude.asm
@@ -59,7 +59,7 @@ enum Query {
     /// Query a prover input (field element) by channel id and index.
     Input(int, int),
     /// Writes a field element (second argument) to an output channel (first argument).
-    /// Channel 1 is stdout, 2 is stderr.
+    /// It is the host's responsibility to give semantics to each channel.
     Output(int, fe),
     /// This value is not (additionally) constrained by the query.
     None,

From 9560cc87976d034c05e960cdd209cb3450b99a76 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Thu, 5 Dec 2024 14:17:53 +0100
Subject: [PATCH 30/57] expose publics in session (#2190)

Depends on https://github.com/powdr-labs/powdr/pull/2189

This PR exposes the publics of a proof in Session and uses it to commit
the result of Fibonacci in the example.
---
 examples/fibonacci/guest/src/main.rs |  3 +++
 powdr-test/examples/fibonacci.rs     |  9 +++++++++
 powdr/src/lib.rs                     | 13 ++++++++++++-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/examples/fibonacci/guest/src/main.rs b/examples/fibonacci/guest/src/main.rs
index ffd15a725d..f094c6f8ff 100644
--- a/examples/fibonacci/guest/src/main.rs
+++ b/examples/fibonacci/guest/src/main.rs
@@ -1,4 +1,5 @@
 use powdr_riscv_runtime;
+use powdr_riscv_runtime::commit;
 use powdr_riscv_runtime::io::{read, write};
 
 fn fib(n: u32) -> u32 {
@@ -14,4 +15,6 @@ fn main() {
     let r = fib(n);
     // Write result to stdout.
     write(1, r);
+    // Commit the result as a public.
+    commit::commit(r);
 }
diff --git a/powdr-test/examples/fibonacci.rs b/powdr-test/examples/fibonacci.rs
index 9dbc498fc7..b3dd60f2f2 100644
--- a/powdr-test/examples/fibonacci.rs
+++ b/powdr-test/examples/fibonacci.rs
@@ -20,4 +20,13 @@ fn main() {
 
     let r: u32 = session.stdout();
     assert_eq!(r, 89);
+
+    let publics = session.publics();
+    assert_eq!(
+        publics,
+        [
+            555233681, 1854640251, 3298928347, 2857173302, 2660189392, 1608424695, 543896544,
+            3870154745
+        ]
+    );
 }
diff --git a/powdr/src/lib.rs b/powdr/src/lib.rs
index 0f49d5cdd9..9d91fd2c7d 100644
--- a/powdr/src/lib.rs
+++ b/powdr/src/lib.rs
@@ -12,8 +12,8 @@ pub use powdr_riscv_executor as riscv_executor;
 pub use powdr_pipeline::Pipeline;
 
 pub use powdr_number::Bn254Field;
-pub use powdr_number::FieldElement;
 pub use powdr_number::GoldilocksField;
+pub use powdr_number::{FieldElement, LargeInt};
 
 use riscv::{CompilerOptions, RuntimeLibs};
 
@@ -192,6 +192,17 @@ impl Session {
         self.pipeline.export_verification_key(file).unwrap();
     }
 
+    pub fn publics(&self) -> [u32; 8] {
+        let pubs: Vec<u32> = self
+            .pipeline
+            .publics()
+            .unwrap()
+            .iter()
+            .map(|(_, v)| v.unwrap().to_integer().try_into_u32().unwrap())
+            .collect();
+        pubs.try_into().expect("There should be exactly 8 publics")
+    }
+
     pub fn stdout<S: serde::de::DeserializeOwned>(&self) -> S {
         let host = self.pipeline.host_context();
         host.read(1).unwrap()

From b1bdec55b65a9155fc6df669c2e0fb34111747be Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Fri, 6 Dec 2024 12:08:56 +0100
Subject: [PATCH 31/57] fix i64 (#2202)

Copied from the GL impl
---
 number/src/plonky3_macros.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/number/src/plonky3_macros.rs b/number/src/plonky3_macros.rs
index cdcfd9f3a0..988fa09802 100644
--- a/number/src/plonky3_macros.rs
+++ b/number/src/plonky3_macros.rs
@@ -133,7 +133,17 @@ macro_rules! powdr_field_plonky3 {
 
         impl From<i64> for $name {
             fn from(n: i64) -> Self {
-                From::<u64>::from(n as u64)
+                Self::from(if n < 0 {
+                    // If n < 0, then this is guaranteed to overflow since
+                    // both arguments have their high bit set, so the result
+                    // is in the canonical range.
+                    Self::modulus()
+                        .try_into_u64()
+                        .unwrap()
+                        .wrapping_add(n as u64)
+                } else {
+                    n as u64
+                })
             }
         }
 

From 58a5c03d49d501e9d48437e40b930d1e0abec3ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gast=C3=B3n=20Zanitti?= <gzanitti@gmail.com>
Date: Fri, 6 Dec 2024 13:50:24 -0300
Subject: [PATCH 32/57] Display: SubmachineDeclaration args are not beign
 printed (#2204)

Small PR to fix the fact that SubmachineDeclaration arguments were being
omitted during display.
---
 ast/src/asm_analysis/display.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/ast/src/asm_analysis/display.rs b/ast/src/asm_analysis/display.rs
index ecf4e133f9..94be80048c 100644
--- a/ast/src/asm_analysis/display.rs
+++ b/ast/src/asm_analysis/display.rs
@@ -133,7 +133,15 @@ impl Display for LinkDefinition {
 
 impl Display for SubmachineDeclaration {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        write!(f, "{} {}", self.ty, self.name)
+        write!(
+            f,
+            "{} {}{}",
+            self.ty,
+            self.name,
+            (!self.args.is_empty())
+                .then(|| format!("({})", self.args.iter().format(", ")))
+                .unwrap_or_default()
+        )
     }
 }
 

From 3b45173c4bd55a35921a11c0a8a6ffc9d9cd7c20 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Fri, 6 Dec 2024 18:21:42 +0100
Subject: [PATCH 33/57] Add `PhantomBusInteraction` (#2183)

This PR adds a `Constr:: PhantomBusInteraction` variant. For now, it is
ignored - if users want to use a bus, they need to express this in terms
of phantom lookups / permutations as before this PR.

I added a few `TODO(bus_interaction)` and opened #2184 to track support
for phantom bus interactions.

One use-case this could have before though is to trigger a
"hand-written" witness generation for the bus, as discussed in the chat.
---
 ast/src/analyzed/display.rs                   | 11 ++++
 ast/src/analyzed/mod.rs                       | 50 ++++++++++++++++---
 .../json_exporter/expression_counter.rs       |  4 +-
 backend/src/estark/json_exporter/mod.rs       |  4 +-
 backend/src/halo2/circuit_builder.rs          |  4 +-
 .../src/mock/connection_constraint_checker.rs |  2 +
 backend/src/stwo/circuit_builder.rs           |  5 +-
 executor/src/witgen/global_constraints.rs     |  5 ++
 executor/src/witgen/identity_processor.rs     |  2 +
 .../src/witgen/machines/machine_extractor.rs  |  8 +++
 .../src/witgen/machines/write_once_memory.rs  |  9 +++-
 pil-analyzer/src/condenser.rs                 | 17 +++++--
 pilopt/src/lib.rs                             | 21 ++++++--
 plonky3/src/circuit_builder.rs                |  4 +-
 std/prelude.asm                               | 11 +++-
 std/protocols/bus.asm                         |  4 ++
 16 files changed, 140 insertions(+), 21 deletions(-)

diff --git a/ast/src/analyzed/display.rs b/ast/src/analyzed/display.rs
index 446a099442..26afbf8b80 100644
--- a/ast/src/analyzed/display.rs
+++ b/ast/src/analyzed/display.rs
@@ -427,6 +427,17 @@ impl<T: Display> Display for ConnectIdentity<T> {
     }
 }
 
+impl<T: Display> Display for PhantomBusInteractionIdentity<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(
+            f,
+            "Constr::PhantomBusInteraction({}, [{}]);",
+            self.multiplicity,
+            self.tuple.0.iter().map(ToString::to_string).format(", "),
+        )
+    }
+}
+
 impl Display for Reference {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
diff --git a/ast/src/analyzed/mod.rs b/ast/src/analyzed/mod.rs
index a0e9bc6e48..81e4031f26 100644
--- a/ast/src/analyzed/mod.rs
+++ b/ast/src/analyzed/mod.rs
@@ -949,7 +949,7 @@ impl<T> Children<AlgebraicExpression<T>> for SelectedExpressions<T> {
 
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct PolynomialIdentity<T> {
-    // The ID is globally unique among identitites.
+    // The ID is globally unique among identities.
     pub id: u64,
     pub source: SourceRef,
     pub expression: AlgebraicExpression<T>,
@@ -966,7 +966,7 @@ impl<T> Children<AlgebraicExpression<T>> for PolynomialIdentity<T> {
 
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct LookupIdentity<T> {
-    // The ID is globally unique among identitites.
+    // The ID is globally unique among identities.
     pub id: u64,
     pub source: SourceRef,
     pub left: SelectedExpressions<T>,
@@ -1017,7 +1017,7 @@ impl<T> Children<AlgebraicExpression<T>> for PhantomLookupIdentity<T> {
 
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct PermutationIdentity<T> {
-    // The ID is globally unique among identitites.
+    // The ID is globally unique among identities.
     pub id: u64,
     pub source: SourceRef,
     pub left: SelectedExpressions<T>,
@@ -1035,11 +1035,11 @@ impl<T> Children<AlgebraicExpression<T>> for PermutationIdentity<T> {
 
 /// A witness generation helper for a permutation identity.
 ///
-/// This identity is used as a replactement for a permutation identity which has been turned into challenge-based polynomial identities.
+/// This identity is used as a replacement for a permutation identity which has been turned into challenge-based polynomial identities.
 /// This is ignored by the backend.
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct PhantomPermutationIdentity<T> {
-    // The ID is globally unique among identitites.
+    // The ID is globally unique among identities.
     pub id: u64,
     pub source: SourceRef,
     pub left: SelectedExpressions<T>,
@@ -1057,7 +1057,7 @@ impl<T> Children<AlgebraicExpression<T>> for PhantomPermutationIdentity<T> {
 
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct ConnectIdentity<T> {
-    // The ID is globally unique among identitites.
+    // The ID is globally unique among identities.
     pub id: u64,
     pub source: SourceRef,
     pub left: Vec<AlgebraicExpression<T>>,
@@ -1073,6 +1073,36 @@ impl<T> Children<AlgebraicExpression<T>> for ConnectIdentity<T> {
     }
 }
 
+#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema, PartialOrd, Ord)]
+pub struct ExpressionList<T>(pub Vec<AlgebraicExpression<T>>);
+
+impl<T> Children<AlgebraicExpression<T>> for ExpressionList<T> {
+    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut AlgebraicExpression<T>> + '_> {
+        Box::new(self.0.iter_mut())
+    }
+    fn children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T>> + '_> {
+        Box::new(self.0.iter())
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct PhantomBusInteractionIdentity<T> {
+    // The ID is globally unique among identities.
+    pub id: u64,
+    pub source: SourceRef,
+    pub multiplicity: AlgebraicExpression<T>,
+    pub tuple: ExpressionList<T>,
+}
+
+impl<T> Children<AlgebraicExpression<T>> for PhantomBusInteractionIdentity<T> {
+    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut AlgebraicExpression<T>> + '_> {
+        Box::new(once(&mut self.multiplicity).chain(self.tuple.children_mut()))
+    }
+    fn children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T>> + '_> {
+        Box::new(once(&self.multiplicity).chain(self.tuple.children()))
+    }
+}
+
 #[derive(
     Debug,
     PartialEq,
@@ -1092,6 +1122,7 @@ pub enum Identity<T> {
     Permutation(PermutationIdentity<T>),
     PhantomPermutation(PhantomPermutationIdentity<T>),
     Connect(ConnectIdentity<T>),
+    PhantomBusInteraction(PhantomBusInteractionIdentity<T>),
 }
 
 impl<T> Identity<T> {
@@ -1111,6 +1142,7 @@ impl<T> Identity<T> {
             Identity::Permutation(i) => i.id,
             Identity::PhantomPermutation(i) => i.id,
             Identity::Connect(i) => i.id,
+            Identity::PhantomBusInteraction(i) => i.id,
         }
     }
 
@@ -1122,6 +1154,7 @@ impl<T> Identity<T> {
             Identity::Permutation(_) => IdentityKind::Permutation,
             Identity::PhantomPermutation(_) => IdentityKind::PhantomPermutation,
             Identity::Connect(_) => IdentityKind::Connect,
+            Identity::PhantomBusInteraction(_) => IdentityKind::PhantomBusInteraction,
         }
     }
 }
@@ -1135,6 +1168,7 @@ impl<T> SourceReference for Identity<T> {
             Identity::Permutation(i) => &i.source,
             Identity::PhantomPermutation(i) => &i.source,
             Identity::Connect(i) => &i.source,
+            Identity::PhantomBusInteraction(i) => &i.source,
         }
     }
 
@@ -1146,6 +1180,7 @@ impl<T> SourceReference for Identity<T> {
             Identity::Permutation(i) => &mut i.source,
             Identity::PhantomPermutation(i) => &mut i.source,
             Identity::Connect(i) => &mut i.source,
+            Identity::PhantomBusInteraction(i) => &mut i.source,
         }
     }
 }
@@ -1159,6 +1194,7 @@ impl<T> Children<AlgebraicExpression<T>> for Identity<T> {
             Identity::Permutation(i) => i.children_mut(),
             Identity::PhantomPermutation(i) => i.children_mut(),
             Identity::Connect(i) => i.children_mut(),
+            Identity::PhantomBusInteraction(i) => i.children_mut(),
         }
     }
 
@@ -1170,6 +1206,7 @@ impl<T> Children<AlgebraicExpression<T>> for Identity<T> {
             Identity::Permutation(i) => i.children(),
             Identity::PhantomPermutation(i) => i.children(),
             Identity::Connect(i) => i.children(),
+            Identity::PhantomBusInteraction(i) => i.children(),
         }
     }
 }
@@ -1184,6 +1221,7 @@ pub enum IdentityKind {
     Permutation,
     PhantomPermutation,
     Connect,
+    PhantomBusInteraction,
 }
 
 impl<T> SelectedExpressions<T> {
diff --git a/backend/src/estark/json_exporter/expression_counter.rs b/backend/src/estark/json_exporter/expression_counter.rs
index 27c022e9ef..06c384b154 100644
--- a/backend/src/estark/json_exporter/expression_counter.rs
+++ b/backend/src/estark/json_exporter/expression_counter.rs
@@ -60,7 +60,9 @@ impl<T: FieldElement> ExpressionCounter for Identity<T> {
                 connect_identity.left.len() + connect_identity.right.len()
             }
             // phantom identities are not relevant in this context
-            Identity::PhantomLookup(..) | Identity::PhantomPermutation(..) => 0,
+            Identity::PhantomLookup(..)
+            | Identity::PhantomPermutation(..)
+            | Identity::PhantomBusInteraction(..) => 0,
         }
     }
 }
diff --git a/backend/src/estark/json_exporter/mod.rs b/backend/src/estark/json_exporter/mod.rs
index 67b70e3d03..cedc95e52b 100644
--- a/backend/src/estark/json_exporter/mod.rs
+++ b/backend/src/estark/json_exporter/mod.rs
@@ -135,7 +135,9 @@ pub fn export<T: FieldElement>(analyzed: &Analyzed<T>) -> PIL {
                             line,
                         });
                     }
-                    Identity::PhantomLookup(..) | Identity::PhantomPermutation(..) => {
+                    Identity::PhantomLookup(..)
+                    | Identity::PhantomPermutation(..)
+                    | Identity::PhantomBusInteraction(..) => {
                         // These are not relevant for the PIL
                     }
                 }
diff --git a/backend/src/halo2/circuit_builder.rs b/backend/src/halo2/circuit_builder.rs
index ffd871d5a8..a9cc08691a 100644
--- a/backend/src/halo2/circuit_builder.rs
+++ b/backend/src/halo2/circuit_builder.rs
@@ -320,7 +320,9 @@ impl<'a, T: FieldElement, F: PrimeField<Repr = [u8; 32]>> Circuit<F> for PowdrCi
                             .collect()
                     });
                 }
-                Identity::PhantomLookup(..) | Identity::PhantomPermutation(..) => {
+                Identity::PhantomLookup(..)
+                | Identity::PhantomPermutation(..)
+                | Identity::PhantomBusInteraction(..) => {
                     // Phantom identities are only used in witness generation
                 }
             }
diff --git a/backend/src/mock/connection_constraint_checker.rs b/backend/src/mock/connection_constraint_checker.rs
index 6cb4c29088..8259ff5642 100644
--- a/backend/src/mock/connection_constraint_checker.rs
+++ b/backend/src/mock/connection_constraint_checker.rs
@@ -68,6 +68,8 @@ impl<F: FieldElement> Connection<F> {
             | Identity::PhantomPermutation(PhantomPermutationIdentity { left, right, .. }) => {
                 Ok((left.clone(), right.clone(), ConnectionKind::Permutation))
             }
+            // TODO(bus_interaction)
+            Identity::PhantomBusInteraction(_) => Err(()),
         }?;
 
         // This connection is not localized yet: Its expression's PolyIDs point to the global PIL, not the local PIL.
diff --git a/backend/src/stwo/circuit_builder.rs b/backend/src/stwo/circuit_builder.rs
index 3b72ecf0b6..fe20a65071 100644
--- a/backend/src/stwo/circuit_builder.rs
+++ b/backend/src/stwo/circuit_builder.rs
@@ -108,8 +108,9 @@ impl<T: FieldElement> FrameworkEval for PowdrEval<T> {
                 Identity::Permutation(..) => {
                     unimplemented!("Permutation is not implemented in stwo yet")
                 }
-                Identity::PhantomPermutation(..) => {}
-                Identity::PhantomLookup(..) => {}
+                Identity::PhantomPermutation(..)
+                | Identity::PhantomLookup(..)
+                | Identity::PhantomBusInteraction(..) => {}
             }
         }
         eval
diff --git a/executor/src/witgen/global_constraints.rs b/executor/src/witgen/global_constraints.rs
index 59fa24e817..b82ef935df 100644
--- a/executor/src/witgen/global_constraints.rs
+++ b/executor/src/witgen/global_constraints.rs
@@ -332,6 +332,11 @@ fn propagate_constraints<T: FieldElement>(
             // permutation identities are stronger than just range constraints, so we do nothing
             false
         }
+        Identity::PhantomBusInteraction(..) => {
+            // TODO(bus_interaction): If we can statically match sends & receives, we could extract
+            // range constraints from them.
+            false
+        }
     }
 }
 
diff --git a/executor/src/witgen/identity_processor.rs b/executor/src/witgen/identity_processor.rs
index 0a403c046b..bf01e9225e 100644
--- a/executor/src/witgen/identity_processor.rs
+++ b/executor/src/witgen/identity_processor.rs
@@ -55,6 +55,8 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> IdentityProcessor<'a, 'c, T,
                 //     "Identity of kind {kind:?} is not supported by the identity processor."
                 // )
             }
+            // TODO(bus_interaction)
+            Identity::PhantomBusInteraction(..) => Ok(EvalValue::complete(Vec::new())),
         };
         report_identity_solving(identity, &result);
         result
diff --git a/executor/src/witgen/machines/machine_extractor.rs b/executor/src/witgen/machines/machine_extractor.rs
index 5465dd0451..286fec6bd7 100644
--- a/executor/src/witgen/machines/machine_extractor.rs
+++ b/executor/src/witgen/machines/machine_extractor.rs
@@ -284,6 +284,8 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
                     Identity::Connect(..) => {
                         unimplemented!()
                     }
+                    // TODO(bus_interaction)
+                    Identity::PhantomBusInteraction(..) => {}
                 };
             }
             if witnesses.len() == count {
@@ -312,6 +314,12 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
             }
             Identity::Polynomial(i) => self.fixed.polynomial_references(i),
             Identity::Connect(i) => self.fixed.polynomial_references(i),
+            Identity::PhantomBusInteraction(i) => self
+                .fixed
+                .polynomial_references(&i.tuple)
+                .into_iter()
+                .chain(self.fixed.polynomial_references(&i.multiplicity))
+                .collect(),
         }
     }
 }
diff --git a/executor/src/witgen/machines/write_once_memory.rs b/executor/src/witgen/machines/write_once_memory.rs
index 60ab069416..268a5a2a24 100644
--- a/executor/src/witgen/machines/write_once_memory.rs
+++ b/executor/src/witgen/machines/write_once_memory.rs
@@ -3,7 +3,7 @@ use std::collections::{BTreeMap, HashMap};
 use itertools::{Either, Itertools};
 
 use num_traits::One;
-use powdr_ast::analyzed::{PolyID, PolynomialType};
+use powdr_ast::analyzed::{Identity, PolyID, PolynomialType};
 use powdr_number::{DegreeType, FieldElement};
 
 use crate::witgen::data_structures::mutable_state::MutableState;
@@ -49,7 +49,12 @@ impl<'a, T: FieldElement> WriteOnceMemory<'a, T> {
         fixed_data: &'a FixedData<'a, T>,
         parts: &MachineParts<'a, T>,
     ) -> Option<Self> {
-        if !parts.identities.is_empty() {
+        if parts
+            .identities
+            .iter()
+            // The only identity we'd expect is a PhantomBusInteraction
+            .any(|id| !matches!(id, Identity::PhantomBusInteraction(_)))
+        {
             return None;
         }
 
diff --git a/pil-analyzer/src/condenser.rs b/pil-analyzer/src/condenser.rs
index 2a92296a37..0262a66b13 100644
--- a/pil-analyzer/src/condenser.rs
+++ b/pil-analyzer/src/condenser.rs
@@ -11,9 +11,10 @@ use std::{
 
 use powdr_ast::analyzed::{
     AlgebraicExpression, AlgebraicReference, Analyzed, ConnectIdentity, DegreeRange, Expression,
-    FunctionValueDefinition, Identity, LookupIdentity, PermutationIdentity, PhantomLookupIdentity,
-    PhantomPermutationIdentity, PolyID, PolynomialIdentity, PolynomialType, PublicDeclaration,
-    SelectedExpressions, SolvedTraitImpls, StatementIdentifier, Symbol, SymbolKind,
+    ExpressionList, FunctionValueDefinition, Identity, LookupIdentity, PermutationIdentity,
+    PhantomBusInteractionIdentity, PhantomLookupIdentity, PhantomPermutationIdentity, PolyID,
+    PolynomialIdentity, PolynomialType, PublicDeclaration, SelectedExpressions, SolvedTraitImpls,
+    StatementIdentifier, Symbol, SymbolKind,
 };
 use powdr_ast::parsed::{
     asm::{AbsoluteSymbolPath, SymbolPath},
@@ -785,6 +786,16 @@ fn to_constraint<T: FieldElement>(
             }
             .into()
         }
+        "PhantomBusInteraction" => PhantomBusInteractionIdentity {
+            id: counters.dispense_identity_id(),
+            source,
+            multiplicity: to_expr(&fields[0]),
+            tuple: ExpressionList(match fields[1].as_ref() {
+                Value::Array(fields) => fields.iter().map(|f| to_expr(f)).collect(),
+                _ => panic!("Expected array, got {:?}", fields[1]),
+            }),
+        }
+        .into(),
         _ => panic!("Expected constraint but got {constraint}"),
     }
 }
diff --git a/pilopt/src/lib.rs b/pilopt/src/lib.rs
index 28224aa75f..541c008d0e 100644
--- a/pilopt/src/lib.rs
+++ b/pilopt/src/lib.rs
@@ -556,6 +556,13 @@ fn remove_trivial_identities<T: FieldElement>(pil_file: &mut Analyzed<T>) {
                 left.expressions.is_empty().then_some(index)
             }
             Identity::Connect(..) => None,
+            Identity::PhantomBusInteraction(id) => {
+                if id.tuple.0.is_empty() {
+                    unreachable!("Unexpected empty bus interaction: {}", id);
+                } else {
+                    None
+                }
+            }
         })
         .collect();
     pil_file.remove_identities(&to_remove);
@@ -576,6 +583,7 @@ fn remove_duplicate_identities<T: FieldElement>(pil_file: &mut Analyzed<T>) {
                 Identity::Permutation(..) => 3,
                 Identity::PhantomPermutation(..) => 4,
                 Identity::Connect(..) => 5,
+                Identity::PhantomBusInteraction(..) => 6,
             };
 
             discriminant(self)
@@ -635,6 +643,11 @@ fn remove_duplicate_identities<T: FieldElement>(pil_file: &mut Analyzed<T>) {
                             left: c, right: d, ..
                         }),
                     ) => a.cmp(c).then_with(|| b.cmp(d)),
+                    (Identity::PhantomBusInteraction(_), Identity::PhantomBusInteraction(_)) => {
+                        unimplemented!(
+                            "Bus interactions should have been removed before this point."
+                        )
+                    }
                     _ => {
                         unreachable!("Different identity types would have different discriminants.")
                     }
@@ -662,11 +675,13 @@ fn remove_duplicate_identities<T: FieldElement>(pil_file: &mut Analyzed<T>) {
         .identities
         .iter()
         .enumerate()
-        .filter_map(|(index, identity)| {
-            match identity_expressions.insert(CanonicalIdentity(identity)) {
+        .filter_map(|(index, identity)| match identity {
+            // Duplicate bus interactions should not be removed, because that changes the statement.
+            Identity::PhantomBusInteraction(_) => None,
+            _ => match identity_expressions.insert(CanonicalIdentity(identity)) {
                 false => Some(index),
                 true => None,
-            }
+            },
         })
         .collect();
     pil_file.remove_identities(&to_remove);
diff --git a/plonky3/src/circuit_builder.rs b/plonky3/src/circuit_builder.rs
index 712a91863c..cf59a14341 100644
--- a/plonky3/src/circuit_builder.rs
+++ b/plonky3/src/circuit_builder.rs
@@ -459,7 +459,9 @@ where
                     unimplemented!("Plonky3 does not support permutations")
                 }
                 Identity::Connect(..) => unimplemented!("Plonky3 does not support connections"),
-                Identity::PhantomPermutation(..) | Identity::PhantomLookup(..) => {
+                Identity::PhantomPermutation(_)
+                | Identity::PhantomLookup(_)
+                | Identity::PhantomBusInteraction(_) => {
                     // phantom identities are only used in witgen
                 }
             }
diff --git a/std/prelude.asm b/std/prelude.asm
index 14621f7b3c..d2b609db08 100644
--- a/std/prelude.asm
+++ b/std/prelude.asm
@@ -41,7 +41,16 @@ enum Constr {
     PhantomPermutation((Option<expr>, Option<expr>), (expr, expr)[]),
 
     /// A connection constraint (copy constraint), result of the "connect" operator.
-    Connection((expr, expr)[])
+    Connection((expr, expr)[]),
+
+    /// A "phantom" bus interaction, i.e., an annotation for witness generation.
+    /// The actual constraint should be enforced via other constraints.
+    /// Contains:
+    /// - An expression for the multiplicity.
+    /// - The tuple added to the bus.
+    /// WARNING: As of now, this annotation is largely ignored. When using the bus,
+    /// make sure that you also add phantom lookup / permutation constraints.
+    PhantomBusInteraction(expr, expr[])
 }
 
 /// This is the result of the "$" operator. It can be used as the left and
diff --git a/std/protocols/bus.asm b/std/protocols/bus.asm
index 5bf1be17b0..0bf93d8ce7 100644
--- a/std/protocols/bus.asm
+++ b/std/protocols/bus.asm
@@ -30,6 +30,10 @@ let bus_interaction: expr, expr[], expr -> () = constr |id, tuple, multiplicity|
 
     std::check::assert(required_extension_size() <= 2, || "Invalid extension size");
 
+    // Add phantom bus interaction
+    let full_tuple = [id] + tuple;
+    Constr::PhantomBusInteraction(multiplicity, full_tuple);
+
     // Alpha is used to compress the LHS and RHS arrays.
     let alpha = fp2_from_array(array::new(required_extension_size(), |i| challenge(0, i + 1)));
     // Beta is used to update the accumulator.

From 127dec572b44a9d98515f7316ba5e94c7b6f9359 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gast=C3=B3n=20Zanitti?= <gzanitti@gmail.com>
Date: Mon, 9 Dec 2024 07:03:41 -0300
Subject: [PATCH 34/57] powdr-asmopt: remove unused submachines, instructions,
 registers (#2143)

Solves #682
---
 Cargo.toml                          |   2 +
 asmopt/Cargo.toml                   |  14 ++
 asmopt/src/lib.rs                   | 276 +++++++++++++++++++++
 asmopt/tests/optimizer.rs           | 360 ++++++++++++++++++++++++++++
 ast/src/asm_analysis/mod.rs         |  15 +-
 pilopt/src/lib.rs                   |   2 +-
 pilopt/src/referenced_symbols.rs    | 257 +++++++++++++++++++-
 pipeline/Cargo.toml                 |   1 +
 pipeline/src/pipeline.rs            |  28 ++-
 test_data/asm/book/declarations.asm |   3 +-
 10 files changed, 944 insertions(+), 14 deletions(-)
 create mode 100644 asmopt/Cargo.toml
 create mode 100644 asmopt/src/lib.rs
 create mode 100644 asmopt/tests/optimizer.rs

diff --git a/Cargo.toml b/Cargo.toml
index fa798ea267..bdaa7d2c41 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,6 +19,7 @@ members = [
     "pilopt",
     "plonky3",
     "asm-to-pil",
+    "asmopt",
     "backend",
     "ast",
     "analysis",
@@ -49,6 +50,7 @@ powdr-ast = { path = "./ast", version = "0.1.3" }
 powdr-asm-to-pil = { path = "./asm-to-pil", version = "0.1.3" }
 powdr-isa-utils = { path = "./isa-utils", version = "0.1.3" }
 powdr-analysis = { path = "./analysis", version = "0.1.3" }
+powdr-asmopt = { path = "./asmopt", version = "0.1.3" }
 powdr-backend = { path = "./backend", version = "0.1.3" }
 powdr-backend-utils = { path = "./backend-utils", version = "0.1.3" }
 powdr-executor = { path = "./executor", version = "0.1.3" }
diff --git a/asmopt/Cargo.toml b/asmopt/Cargo.toml
new file mode 100644
index 0000000000..998c554f49
--- /dev/null
+++ b/asmopt/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "powdr-asmopt"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[dependencies]
+powdr-ast.workspace = true
+powdr-analysis.workspace = true
+powdr-importer.workspace = true
+powdr-pilopt.workspace = true
+powdr-parser.workspace = true
\ No newline at end of file
diff --git a/asmopt/src/lib.rs b/asmopt/src/lib.rs
new file mode 100644
index 0000000000..4b3ea135a8
--- /dev/null
+++ b/asmopt/src/lib.rs
@@ -0,0 +1,276 @@
+use std::collections::{HashMap, HashSet};
+use std::iter::once;
+
+use powdr_ast::parsed::asm::parse_absolute_path;
+use powdr_ast::{
+    asm_analysis::{AnalysisASMFile, Machine},
+    parsed::{asm::AbsoluteSymbolPath, NamespacedPolynomialReference},
+};
+use powdr_pilopt::referenced_symbols::ReferencedSymbols;
+
+type Expression = powdr_ast::asm_analysis::Expression<NamespacedPolynomialReference>;
+
+const MAIN_MACHINE_STR: &str = "::Main";
+const PC_REGISTER: &str = "pc";
+
+pub fn optimize(mut analyzed_asm: AnalysisASMFile) -> AnalysisASMFile {
+    // Optimizations assume the existence of a Main machine as an entry point.
+    // If it doesn't exist, return the ASM as-is to prevent removing all machines,
+    // which would break some examples.
+    let main_machine_path = parse_absolute_path(MAIN_MACHINE_STR);
+    if analyzed_asm
+        .machines()
+        .all(|(path, _)| path != main_machine_path)
+    {
+        return analyzed_asm;
+    }
+
+    asm_remove_unreferenced_machines(&mut analyzed_asm);
+    asm_remove_unused_machine_components(&mut analyzed_asm);
+    asm_remove_unreferenced_machines(&mut analyzed_asm);
+
+    analyzed_asm
+}
+
+/// Remove all machines that are not referenced in any other machine.
+/// This function traverses the dependency graph starting from ::Main to identify all reachable machines.
+fn asm_remove_unreferenced_machines(asm_file: &mut AnalysisASMFile) {
+    let deps = build_machine_dependencies(asm_file);
+    let all_machines = collect_all_dependent_machines(&deps, MAIN_MACHINE_STR)
+        .into_iter()
+        .collect::<HashSet<_>>();
+    asm_file.modules.iter_mut().for_each(|(path, module)| {
+        let machines_in_module = machines_in_module(&all_machines, path);
+        module.retain_machines(machines_in_module);
+    });
+}
+
+/// Analyzes each machine and successively removes unnecessary components:
+/// 1. Removes declarations of instructions that are never used.
+/// 2. Removes instances of submachines that are never used, including those that became unused in the previous step.
+/// 3. Removes unused registers.
+fn asm_remove_unused_machine_components(asm_file: &mut AnalysisASMFile) {
+    for (_, machine) in asm_file.machines_mut() {
+        let submachine_to_decl: HashMap<String, String> = machine
+            .submachines
+            .iter()
+            .map(|sub| (sub.name.clone(), sub.ty.to_string()))
+            .collect();
+
+        let symbols_in_callable: HashSet<String> = machine_callable_body_symbols(machine).collect();
+
+        machine_remove_unused_instructions(machine, &symbols_in_callable);
+        machine_remove_unused_submachines(machine, &symbols_in_callable, &submachine_to_decl);
+        machine_remove_unused_registers(machine, &submachine_to_decl);
+    }
+}
+
+fn machine_remove_unused_registers(
+    machine: &mut Machine,
+    submachine_to_decl: &HashMap<String, String>,
+) {
+    let used_symbols: HashSet<_> = once(PC_REGISTER.to_string())
+        .chain(machine_callable_body_symbols(machine))
+        .chain(machine_in_links(machine, submachine_to_decl))
+        .chain(machine_instructions_symbols(machine))
+        .chain(machine_links_symbols(machine))
+        .collect();
+
+    machine
+        .registers
+        .retain(|reg| used_symbols.contains(&reg.name));
+}
+
+fn machine_remove_unused_submachines(
+    machine: &mut Machine,
+    symbols: &HashSet<String>,
+    submachine_to_decl: &HashMap<String, String>,
+) {
+    let visited_submachines = machine
+        .instructions
+        .iter()
+        .filter(|ins| symbols.contains(&ins.name))
+        .flat_map(|ins| {
+            ins.instruction
+                .links
+                .iter()
+                .filter_map(|link| submachine_to_decl.get(&link.link.instance))
+        })
+        .cloned();
+
+    let used_submachines: HashSet<_> = visited_submachines
+        .chain(machine_in_links(machine, submachine_to_decl))
+        .chain(machine_in_args(machine, submachine_to_decl))
+        .chain(symbols.iter().cloned())
+        .collect();
+
+    machine
+        .submachines
+        .retain(|sub| used_submachines.contains(&sub.ty.to_string()));
+}
+
+fn machine_remove_unused_instructions(machine: &mut Machine, symbols: &HashSet<String>) {
+    machine
+        .instructions
+        .retain(|ins| symbols.contains(&ins.name));
+}
+
+/// Retrieves all machines defined within a specific module, relative to the given module path.
+///
+/// This function filters the provided set of all machine paths to include only those machines
+/// that are defined within the module specified by `path`. It then strips the module path prefix from each
+/// machine path to return the machine names relative to that module.
+fn machines_in_module(
+    all_machines: &HashSet<String>,
+    path: &AbsoluteSymbolPath,
+) -> HashSet<String> {
+    let path_str = path.to_string();
+    let path_prefix = if path_str == "::" {
+        "::".to_string()
+    } else {
+        format!("{}{}", path_str, "::")
+    };
+
+    all_machines
+        .iter()
+        .filter(|machine_path| machine_path.starts_with(&path_prefix))
+        .map(|machine_path| {
+            machine_path
+                .strip_prefix(&path_prefix)
+                .unwrap_or(machine_path)
+                .to_string()
+        })
+        .collect()
+}
+
+/// Creates a mapping between machine names and sets of paths for their instantiated submachines.
+fn build_machine_dependencies(asm_file: &AnalysisASMFile) -> HashMap<String, HashSet<String>> {
+    let mut dependencies = HashMap::new();
+
+    for (path, machine) in asm_file.machines() {
+        let submachine_to_decl: HashMap<String, String> = machine
+            .submachines
+            .iter()
+            .map(|sub| (sub.name.clone(), sub.ty.to_string()))
+            .collect();
+
+        let submachine_names = dependencies_by_machine(machine, submachine_to_decl);
+        dependencies.insert(path.to_string(), submachine_names);
+    }
+
+    dependencies
+}
+
+/// This function analyzes a given `Machine` and gathers all the submachines it depends on.
+/// Dependencies are collected from various components of the machine:
+///
+/// 1. Instantiated Submachines: Submachines that are directly instantiated within the machine.
+/// 2. Submachine Arguments: Submachines referenced in the arguments of the instantiated submachines.
+/// 3. Parameters: Submachines specified in the machine's parameters.
+/// 4. Links: Submachines that are used in links within the machine.
+fn dependencies_by_machine(
+    machine: &Machine,
+    submachine_to_decl: HashMap<String, String>,
+) -> HashSet<String> {
+    let submachine_names: HashSet<String> = machine
+        .submachines
+        .iter()
+        .map(|sub| sub.ty.to_string())
+        .chain(machine.submachines.iter().flat_map(|sub| {
+            sub.args.iter().filter_map(|expr| {
+                expr_to_ref(expr).and_then(|ref_name| submachine_to_decl.get(&ref_name).cloned())
+            })
+        }))
+        .chain(
+            machine
+                .params
+                .0
+                .iter()
+                .map(|param| param.ty.as_ref().unwrap().to_string()),
+        )
+        .chain(
+            machine
+                .links
+                .iter()
+                .filter_map(|ld| submachine_to_decl.get(&ld.to.instance))
+                .cloned(),
+        )
+        .collect();
+    submachine_names
+}
+
+fn expr_to_ref(expr: &Expression) -> Option<String> {
+    match expr {
+        Expression::Reference(_, NamespacedPolynomialReference { path, .. }) => {
+            Some(path.to_string())
+        }
+        Expression::PublicReference(_, pref) => Some(pref.clone()),
+        _ => None,
+    }
+}
+
+fn collect_all_dependent_machines(
+    dependencies: &HashMap<String, HashSet<String>>,
+    start: &str,
+) -> HashSet<String> {
+    let mut result = HashSet::new();
+    let mut to_visit = vec![start.to_string()];
+    let mut visited = HashSet::new();
+
+    while let Some(machine) = to_visit.pop() {
+        if visited.insert(machine.clone()) {
+            result.insert(machine.clone());
+
+            if let Some(submachines) = dependencies.get(&machine) {
+                to_visit.extend(submachines.iter().cloned());
+            }
+        }
+    }
+
+    result
+}
+
+fn machine_callable_body_symbols(machine: &Machine) -> impl Iterator<Item = String> + '_ {
+    machine.callable.function_definitions().flat_map(|def| {
+        def.symbols()
+            .map(|s| s.name.to_string())
+            .collect::<Vec<_>>()
+    })
+}
+
+fn machine_instructions_symbols(machine: &Machine) -> impl Iterator<Item = String> + '_ {
+    machine
+        .instructions
+        .iter()
+        .flat_map(|ins| ins.symbols().map(|s| s.name.to_string()))
+}
+
+fn machine_links_symbols(machine: &Machine) -> impl Iterator<Item = String> + '_ {
+    machine
+        .links
+        .iter()
+        .flat_map(|ld| ld.symbols().map(|s| s.name.to_string()))
+}
+
+fn machine_in_args<'a>(
+    machine: &'a Machine,
+    submachine_to_decl: &'a HashMap<String, String>,
+) -> impl Iterator<Item = String> + 'a {
+    machine
+        .submachines
+        .iter()
+        .flat_map(|sm| sm.args.iter().filter_map(expr_to_ref))
+        .filter_map(|ref_name| submachine_to_decl.get(&ref_name))
+        .cloned()
+}
+
+fn machine_in_links<'a>(
+    machine: &'a Machine,
+    submachine_to_decl: &'a HashMap<String, String>,
+) -> impl Iterator<Item = String> + 'a {
+    machine
+        .links
+        .iter()
+        .filter_map(move |ld| submachine_to_decl.get(&ld.to.instance))
+        .cloned()
+}
diff --git a/asmopt/tests/optimizer.rs b/asmopt/tests/optimizer.rs
new file mode 100644
index 0000000000..48c98815f3
--- /dev/null
+++ b/asmopt/tests/optimizer.rs
@@ -0,0 +1,360 @@
+use powdr_analysis::analyze;
+use powdr_asmopt::optimize;
+use powdr_parser::parse_asm;
+
+#[test]
+fn remove_unused_machine() {
+    let input = r#"
+    machine Main with degree: 8 {
+        reg pc[@pc];
+        reg X[<=];
+        reg A;
+
+        instr assert_eq X, A { X = A }
+
+        function main {
+            assert_eq 1, 1;
+            return;
+        }
+    }
+
+    // This machine should be removed since it's never used
+    machine Unused with degree: 8 {
+        reg pc[@pc];
+        col witness w;
+        w = w * w;
+    }
+    "#;
+
+    let expectation = r#"machine Main with degree: 8 {
+    reg pc[@pc];
+    reg X[<=];
+    reg A;
+    instr assert_eq X, A{ X = A }
+    function main {
+        assert_eq 1, 1;
+        // END BATCH Unimplemented
+        return;
+        // END BATCH
+    }
+}
+"#;
+
+    let parsed = parse_asm(None, input).unwrap();
+    let analyzed = analyze(parsed).unwrap();
+    let optimized = optimize(analyzed).to_string();
+    assert_eq!(optimized, expectation);
+}
+
+#[test]
+fn remove_unused_instruction_and_machine() {
+    let input = r#"
+    machine Main with degree: 8 {
+        Helper helper;
+        
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+        reg A;
+
+        // This instruction is never used and should be removed
+        // which will also remove Helper machine since it's the only usage
+        instr unused X -> Y link ~> Z = helper.double(X);
+        instr assert_eq X, A { X = A }
+
+        function main {
+            assert_eq 1, 1;
+            return;
+        }
+    }
+
+    machine Helper with degree: 8 {
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+
+        function double x: field -> field {
+            return x + x;
+        }
+    }
+    "#;
+
+    let expectation = r#"machine Main with degree: 8 {
+    reg pc[@pc];
+    reg X[<=];
+    reg A;
+    instr assert_eq X, A{ X = A }
+    function main {
+        assert_eq 1, 1;
+        // END BATCH Unimplemented
+        return;
+        // END BATCH
+    }
+}
+"#;
+
+    let parsed = parse_asm(None, input).unwrap();
+    let analyzed = analyze(parsed).unwrap();
+    let optimized = optimize(analyzed).to_string();
+    assert_eq!(optimized, expectation);
+}
+
+#[test]
+fn keep_machine_with_multiple_references() {
+    let input = r#"
+    machine Main with degree: 8 {
+        Helper helper;
+        
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+        reg A;
+
+        // Two different instructions using the same machine
+        instr double X -> Y link => Y = helper.double(X);
+        instr triple X -> Y link => Y = helper.triple(X);
+
+        function main {
+            // Only using one instruction
+            A <== double(2);
+            return;
+        }
+    }
+
+    machine Helper with degree: 8 {
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+
+        function double x: field -> field { return x + x; }
+        function triple x: field -> field { return x + x + x; }
+    }
+    "#;
+
+    let expectation = r#"machine Main with degree: 8 {
+    ::Helper helper
+    reg pc[@pc];
+    reg X[<=];
+    reg Y[<=];
+    reg A;
+    instr double X -> Y link => Y = helper.double(X){  }
+    function main {
+        A <=Y= double(2);
+        // END BATCH Unimplemented
+        return;
+        // END BATCH
+    }
+}
+machine Helper with degree: 8 {
+    reg pc[@pc];
+    function double x: field -> field {
+        return x + x;
+        // END BATCH
+    }
+    function triple x: field -> field {
+        return x + x + x;
+        // END BATCH
+    }
+}
+"#;
+
+    let parsed = parse_asm(None, input).unwrap();
+    let analyzed = analyze(parsed).unwrap();
+    let optimized = optimize(analyzed).to_string();
+    assert_eq!(optimized, expectation);
+}
+
+#[test]
+fn keep_machine_parameters() {
+    let input = r#"
+    machine Main with degree: 8 {
+        Required required;
+        ParamMachine sub(required);
+        Unused unused;
+        
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+        reg A;
+
+        instr compute X -> Y link => Y = sub.compute(X);
+
+        function main {
+            A <== compute(1);
+            return;
+        }
+    }
+
+    machine ParamMachine(mem: Required) with degree: 8 {
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+
+        function compute x: field -> field {
+            return x + x;
+        }
+    }
+
+    machine Required with
+        latch: latch,
+        operation_id: operation_id
+    {
+        operation compute<0> x -> y;
+
+        col fixed latch = [1]*;
+        col witness operation_id;
+        col witness x;
+        col witness y;
+        
+        y = x + x;
+    }
+
+    machine Unused with degree: 8 {
+        reg pc[@pc];
+        col witness w;
+        w = w * w;
+    }
+    "#;
+
+    let expectation = r#"machine Main with degree: 8 {
+    ::Required required
+    ::ParamMachine sub(required)
+    reg pc[@pc];
+    reg X[<=];
+    reg Y[<=];
+    reg A;
+    instr compute X -> Y link => Y = sub.compute(X){  }
+    function main {
+        A <=Y= compute(1);
+        // END BATCH Unimplemented
+        return;
+        // END BATCH
+    }
+}
+machine ParamMachine with degree: 8 {
+    reg pc[@pc];
+    function compute x: field -> field {
+        return x + x;
+        // END BATCH
+    }
+}
+machine Required with , latch: latch, operation_id: operation_id {
+    operation compute<0> x -> y;
+    pol constant latch = [1]*;
+    pol commit operation_id;
+    pol commit x;
+    pol commit y;
+    y = x + x;
+}
+"#;
+
+    let parsed = parse_asm(None, input).unwrap();
+    let analyzed = analyze(parsed).unwrap();
+    let optimized = optimize(analyzed).to_string();
+    assert_eq!(optimized, expectation);
+}
+
+#[test]
+fn remove_unused_registers() {
+    let input = r#"
+    machine Main with degree: 8 {
+        Helper helper;
+        reg pc[@pc];  
+        reg Y[<=];     
+        reg Z[<=];     
+        reg A;         
+
+        instr compute X -> A link => X = helper.compute(X);
+
+        function main {
+            A <== compute(5);
+            return;
+        }
+    }
+
+    machine Helper with degree: 8 {
+        reg pc[@pc];
+        reg X[<=];
+        reg Y[<=];
+
+        function compute x: field -> field {
+            return x + 1;
+        }
+    }
+    "#;
+
+    let expectation = r#"machine Main with degree: 8 {
+    ::Helper helper
+    reg pc[@pc];
+    reg A;
+    instr compute X -> A link => X = helper.compute(X){  }
+    function main {
+        A <=A= compute(5);
+        // END BATCH Unimplemented
+        return;
+        // END BATCH
+    }
+}
+machine Helper with degree: 8 {
+    reg pc[@pc];
+    function compute x: field -> field {
+        return x + 1;
+        // END BATCH
+    }
+}
+"#;
+
+    let parsed = parse_asm(None, input).unwrap();
+    let analyzed = analyze(parsed).unwrap();
+    let optimized = optimize(analyzed).to_string();
+    assert_eq!(optimized, expectation);
+}
+
+#[test]
+fn keep_linked_submachine() {
+    let input = r#"
+    machine Main with degree: 8 {
+        Helper helper;
+        reg pc[@pc];
+        reg X[<=];
+    
+        link => X = helper.check(X);
+
+        function main {
+            return;
+        }
+    }
+
+    machine Helper with degree: 8 {
+        reg pc[@pc];
+        
+        function check x: field -> field {
+            return x + x;
+        }
+    }
+    "#;
+
+    let expectation = r#"machine Main with degree: 8 {
+    ::Helper helper
+    reg pc[@pc];
+    reg X[<=];
+    function main {
+        return;
+        // END BATCH
+    }
+    link => X = helper.check(X);
+}
+machine Helper with degree: 8 {
+    reg pc[@pc];
+    function check x: field -> field {
+        return x + x;
+        // END BATCH
+    }
+}
+"#;
+
+    let parsed = parse_asm(None, input).unwrap();
+    let analyzed = analyze(parsed).unwrap();
+    let optimized = optimize(analyzed).to_string();
+    assert_eq!(optimized, expectation);
+}
diff --git a/ast/src/asm_analysis/mod.rs b/ast/src/asm_analysis/mod.rs
index ada87eb8c9..df82e6bdaa 100644
--- a/ast/src/asm_analysis/mod.rs
+++ b/ast/src/asm_analysis/mod.rs
@@ -3,7 +3,7 @@ mod display;
 use std::{
     collections::{
         btree_map::{IntoIter, Iter, IterMut},
-        BTreeMap, BTreeSet,
+        BTreeMap, BTreeSet, HashSet,
     },
     iter::{once, repeat},
     ops::ControlFlow,
@@ -882,6 +882,19 @@ impl Module {
         self.ordering.push(StatementReference::Module(name));
     }
 
+    /// Retains only the machines with the specified names.
+    /// Ordering is preserved.
+    pub fn retain_machines(&mut self, names: HashSet<String>) {
+        self.machines.retain(|key, _| names.contains(key));
+        self.ordering.retain(|statement| {
+            if let StatementReference::MachineDeclaration(decl_name) = statement {
+                names.contains(decl_name)
+            } else {
+                true
+            }
+        });
+    }
+
     pub fn into_inner(
         self,
     ) -> (
diff --git a/pilopt/src/lib.rs b/pilopt/src/lib.rs
index 541c008d0e..5dc63b9814 100644
--- a/pilopt/src/lib.rs
+++ b/pilopt/src/lib.rs
@@ -16,7 +16,7 @@ use powdr_ast::parsed::visitor::{AllChildren, Children, ExpressionVisitable};
 use powdr_ast::parsed::Number;
 use powdr_number::{BigUint, FieldElement};
 
-mod referenced_symbols;
+pub mod referenced_symbols;
 
 use referenced_symbols::{ReferencedSymbols, SymbolReference};
 
diff --git a/pilopt/src/referenced_symbols.rs b/pilopt/src/referenced_symbols.rs
index b7ee422017..0f0bf45048 100644
--- a/pilopt/src/referenced_symbols.rs
+++ b/pilopt/src/referenced_symbols.rs
@@ -4,11 +4,20 @@ use powdr_ast::{
     analyzed::{
         Expression, FunctionValueDefinition, PolynomialReference, Reference, TypedExpression,
     },
+    asm_analysis::{
+        AssignmentStatement, Expression as ExpressionASM, FunctionBody, FunctionDefinitionRef,
+        FunctionStatement, FunctionSymbol, InstructionDefinitionStatement, InstructionStatement,
+        LinkDefinition, Return,
+    },
     parsed::{
-        asm::SymbolPath,
+        asm::{
+            AssignmentRegister, CallableRef, Instruction, InstructionBody, LinkDeclaration, Param,
+            Params, SymbolPath,
+        },
         types::Type,
         visitor::{AllChildren, Children},
-        EnumDeclaration, StructDeclaration, TraitImplementation, TypeDeclaration,
+        EnumDeclaration, FunctionDefinition, NamespacedPolynomialReference, PilStatement,
+        StructDeclaration, TraitImplementation, TypeDeclaration,
     },
 };
 
@@ -20,7 +29,7 @@ pub trait ReferencedSymbols {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_>;
 }
 
-#[derive(Clone, Hash, Ord, PartialOrd, Eq, PartialEq)]
+#[derive(Clone, Hash, Ord, PartialOrd, Eq, PartialEq, Debug)]
 pub struct SymbolReference<'a> {
     pub name: Cow<'a, str>,
     pub type_args: Option<&'a Vec<Type>>,
@@ -59,6 +68,15 @@ impl<'a> From<&'a PolynomialReference> for SymbolReference<'a> {
     }
 }
 
+impl<'a> From<&'a NamespacedPolynomialReference> for SymbolReference<'a> {
+    fn from(poly: &'a NamespacedPolynomialReference) -> Self {
+        SymbolReference {
+            name: poly.path.to_string().into(),
+            type_args: None,
+        }
+    }
+}
+
 impl ReferencedSymbols for FunctionValueDefinition {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
         match self {
@@ -85,7 +103,7 @@ impl ReferencedSymbols for FunctionValueDefinition {
     }
 }
 
-impl ReferencedSymbols for TraitImplementation<Expression> {
+impl<E: ReferencedSymbols> ReferencedSymbols for TraitImplementation<E> {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
         Box::new(
             once(SymbolReference::from(&self.name))
@@ -95,7 +113,7 @@ impl ReferencedSymbols for TraitImplementation<Expression> {
     }
 }
 
-impl ReferencedSymbols for TypeDeclaration {
+impl<E> ReferencedSymbols for TypeDeclaration<E> {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
         match self {
             TypeDeclaration::Enum(enum_decl) => enum_decl.symbols(),
@@ -104,7 +122,7 @@ impl ReferencedSymbols for TypeDeclaration {
     }
 }
 
-impl ReferencedSymbols for EnumDeclaration {
+impl<E> ReferencedSymbols for EnumDeclaration<E> {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
         Box::new(
             self.variants
@@ -116,7 +134,7 @@ impl ReferencedSymbols for EnumDeclaration {
     }
 }
 
-impl ReferencedSymbols for StructDeclaration {
+impl<E> ReferencedSymbols for StructDeclaration<E> {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
         Box::new(self.fields.iter().flat_map(|named| named.ty.symbols()))
     }
@@ -149,8 +167,231 @@ fn symbols_in_expression(
     }
 }
 
-impl ReferencedSymbols for Type {
+fn symbols_in_expression_asm(
+    e: &ExpressionASM,
+) -> Option<Box<dyn Iterator<Item = SymbolReference<'_>> + '_>> {
+    match e {
+        ExpressionASM::PublicReference(_, name) => {
+            Some(Box::new(once(SymbolReference::from(name))))
+        }
+        ExpressionASM::Reference(_, pr @ NamespacedPolynomialReference { type_args, .. }) => {
+            let type_iter = type_args
+                .iter()
+                .flat_map(|t| t.iter())
+                .flat_map(|t| t.symbols());
+
+            Some(Box::new(type_iter.chain(once(SymbolReference::from(pr)))))
+        }
+        _ => None,
+    }
+}
+
+impl<T> ReferencedSymbols for Type<T> {
     fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
         Box::new(self.contained_named_types().map(SymbolReference::from))
     }
 }
+
+impl ReferencedSymbols for InstructionDefinitionStatement {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(once(SymbolReference::from(&self.name)).chain(self.instruction.symbols()))
+    }
+}
+
+impl ReferencedSymbols for Instruction {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            self.links
+                .iter()
+                .flat_map(|l| l.symbols())
+                .chain(self.body.symbols()),
+        )
+    }
+}
+
+impl<E: ReferencedSymbols> ReferencedSymbols for Params<E> {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            self.inputs
+                .iter()
+                .flat_map(|p| p.symbols())
+                .chain(self.outputs.iter().flat_map(|p| p.symbols())),
+        )
+    }
+}
+
+impl ReferencedSymbols for Param {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            once(SymbolReference::from(&self.name))
+                .chain(self.ty.as_ref().map(SymbolReference::from)),
+        )
+    }
+}
+
+impl ReferencedSymbols for LinkDeclaration {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(self.flag.symbols().chain(self.link.symbols()))
+    }
+}
+
+impl ReferencedSymbols for CallableRef {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            once(SymbolReference::from(&self.instance))
+                .chain(once(SymbolReference::from(&self.callable)))
+                .chain(self.params.symbols()),
+        )
+    }
+}
+
+impl ReferencedSymbols for LinkDefinition {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            self.link_flag
+                .symbols()
+                .chain(self.instr_flag.iter().flat_map(|f| f.symbols()))
+                .chain(self.to.symbols()),
+        )
+    }
+}
+
+impl ReferencedSymbols for FunctionDefinitionRef<'_> {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(once(SymbolReference::from(self.name)).chain(self.function.symbols()))
+    }
+}
+
+impl ReferencedSymbols for FunctionSymbol {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(self.body.symbols().chain(self.params.symbols()))
+    }
+}
+
+impl ReferencedSymbols for InstructionBody {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(self.0.iter().flat_map(|e| e.symbols()))
+    }
+}
+
+impl ReferencedSymbols for PilStatement {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        match self {
+            PilStatement::Include(_, _) => Box::new(std::iter::empty()),
+            PilStatement::Namespace(_, _, _) => Box::new(std::iter::empty()),
+            PilStatement::LetStatement(_, name, type_scheme, expression) => Box::new(
+                type_scheme
+                    .iter()
+                    .flat_map(|ts| ts.ty.symbols())
+                    .chain(expression.iter().flat_map(|e| e.symbols()))
+                    .chain(once(SymbolReference::from(name))),
+            ),
+            PilStatement::PolynomialDefinition(_, polynomial_name, expression) => Box::new(
+                expression
+                    .symbols()
+                    .chain(std::iter::once(SymbolReference::from(
+                        &polynomial_name.name,
+                    ))),
+            ),
+            PilStatement::PublicDeclaration(
+                _,
+                _,
+                namespaced_polynomial_reference,
+                expression,
+                expression1,
+            ) => Box::new(Box::new(
+                once(SymbolReference::from(namespaced_polynomial_reference))
+                    .chain(expression.iter().flat_map(|e| e.symbols()))
+                    .chain(expression1.symbols()),
+            )),
+            PilStatement::PolynomialConstantDefinition(_, _, function_definition) => {
+                function_definition.symbols()
+            }
+            PilStatement::PolynomialCommitDeclaration(_, _, _, function_definition) => {
+                Box::new(function_definition.iter().flat_map(|f| f.symbols()))
+            }
+            PilStatement::EnumDeclaration(_, enum_declaration) => enum_declaration.symbols(),
+            PilStatement::StructDeclaration(_, struct_declaration) => struct_declaration.symbols(),
+            PilStatement::TraitImplementation(_, trait_implementation) => {
+                trait_implementation.symbols()
+            }
+            PilStatement::TraitDeclaration(_, _) => Box::new(std::iter::empty()),
+            PilStatement::Expression(_, expression) => Box::new(
+                expression
+                    .all_children()
+                    .flat_map(symbols_in_expression_asm)
+                    .flatten(),
+            ),
+        }
+    }
+}
+
+impl ReferencedSymbols for FunctionDefinition {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        match self {
+            FunctionDefinition::TypeDeclaration(type_declaration) => type_declaration.symbols(),
+            FunctionDefinition::Array(..)
+            | FunctionDefinition::Expression(..)
+            | FunctionDefinition::TraitDeclaration(..) => {
+                Box::new(self.children().flat_map(|e| e.symbols()))
+            }
+        }
+    }
+}
+
+impl ReferencedSymbols for FunctionBody {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(self.statements.iter().flat_map(|e| e.symbols()))
+    }
+}
+
+impl ReferencedSymbols for FunctionStatement {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        match self {
+            FunctionStatement::Assignment(a) => a.symbols(),
+            FunctionStatement::Instruction(i) => i.symbols(),
+            FunctionStatement::Return(r) => r.symbols(),
+            _ => Box::new(std::iter::empty()),
+        }
+    }
+}
+
+impl ReferencedSymbols for AssignmentStatement {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            self.lhs_with_reg
+                .iter()
+                .flat_map(|(n, reg)| {
+                    let name_ref = Some(SymbolReference::from(n));
+                    let reg_ref = match reg {
+                        AssignmentRegister::Register(name) => Some(SymbolReference::from(name)),
+                        AssignmentRegister::Wildcard => None,
+                    };
+                    [name_ref, reg_ref].into_iter().flatten()
+                })
+                .chain(self.rhs.as_ref().symbols()),
+        )
+    }
+}
+
+impl ReferencedSymbols for Return {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(self.values.iter().flat_map(|expr| expr.symbols()))
+    }
+}
+
+impl ReferencedSymbols for InstructionStatement {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(once(SymbolReference::from(&self.instruction)))
+    }
+}
+
+impl ReferencedSymbols for ExpressionASM {
+    fn symbols(&self) -> Box<dyn Iterator<Item = SymbolReference<'_>> + '_> {
+        Box::new(
+            self.all_children()
+                .flat_map(symbols_in_expression_asm)
+                .flatten(),
+        )
+    }
+}
diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml
index e1c0e41603..45f85e1006 100644
--- a/pipeline/Cargo.toml
+++ b/pipeline/Cargo.toml
@@ -21,6 +21,7 @@ estark-starky-simd = ["powdr-backend/estark-starky-simd"]
 [dependencies]
 powdr-airgen.workspace = true
 powdr-analysis.workspace = true
+powdr-asmopt.workspace = true
 powdr-asm-to-pil.workspace = true
 powdr-ast.workspace = true
 powdr-backend.workspace = true
diff --git a/pipeline/src/pipeline.rs b/pipeline/src/pipeline.rs
index af64b8a022..5cf852ba92 100644
--- a/pipeline/src/pipeline.rs
+++ b/pipeline/src/pipeline.rs
@@ -55,6 +55,8 @@ pub struct Artifacts<T: FieldElement> {
     /// The analyzed .asm file: Assignment registers are inferred, instructions
     /// are batched and some properties are checked.
     analyzed_asm: Option<AnalysisASMFile>,
+    /// The optimized version of the analyzed ASM file.
+    optimized_asm: Option<AnalysisASMFile>,
     /// A machine collection that only contains constrained machines.
     constrained_machine_collection: Option<AnalysisASMFile>,
     /// The airgen graph, i.e. a collection of constrained machines with resolved
@@ -156,6 +158,7 @@ impl<T: FieldElement> Clone for Artifacts<T> {
             parsed_asm_file: self.parsed_asm_file.clone(),
             resolved_module_tree: self.resolved_module_tree.clone(),
             analyzed_asm: self.analyzed_asm.clone(),
+            optimized_asm: self.optimized_asm.clone(),
             constrained_machine_collection: self.constrained_machine_collection.clone(),
             linked_machine_graph: self.linked_machine_graph.clone(),
             parsed_pil_file: self.parsed_pil_file.clone(),
@@ -786,14 +789,33 @@ impl<T: FieldElement> Pipeline<T> {
         Ok(self.artifact.analyzed_asm.as_ref().unwrap())
     }
 
+    pub fn compute_optimized_asm(&mut self) -> Result<&AnalysisASMFile, Vec<String>> {
+        if let Some(ref optimized_asm) = self.artifact.optimized_asm {
+            return Ok(optimized_asm);
+        }
+
+        self.compute_analyzed_asm()?;
+        let analyzed_asm = self.artifact.analyzed_asm.take().unwrap();
+
+        self.log("Optimizing asm...");
+        let optimized = powdr_asmopt::optimize(analyzed_asm);
+        self.artifact.optimized_asm = Some(optimized);
+
+        Ok(self.artifact.optimized_asm.as_ref().unwrap())
+    }
+
+    pub fn optimized_asm(&self) -> Result<&AnalysisASMFile, Vec<String>> {
+        Ok(self.artifact.optimized_asm.as_ref().unwrap())
+    }
+
     pub fn compute_constrained_machine_collection(
         &mut self,
     ) -> Result<&AnalysisASMFile, Vec<String>> {
         if self.artifact.constrained_machine_collection.is_none() {
             self.artifact.constrained_machine_collection = Some({
-                self.compute_analyzed_asm()?;
-                let analyzed_asm = self.artifact.analyzed_asm.take().unwrap();
-                powdr_asm_to_pil::compile::<T>(analyzed_asm)
+                self.compute_optimized_asm()?;
+                let optimized_asm = self.artifact.optimized_asm.take().unwrap();
+                powdr_asm_to_pil::compile::<T>(optimized_asm)
             });
         }
 
diff --git a/test_data/asm/book/declarations.asm b/test_data/asm/book/declarations.asm
index 75593153b2..3710f19307 100644
--- a/test_data/asm/book/declarations.asm
+++ b/test_data/asm/book/declarations.asm
@@ -28,9 +28,10 @@ machine Main with degree: 4 {
     utils::constrain_incremented_by(x, 0);
 
     // We define an instruction that uses a complicated way to increment a register.
-    instr incr_a { A = utils::incremented(A) }
+    instr incr_a { A' = utils::incremented(A) }
 
     function main {
+        incr_a;
         return;
     }
 }
\ No newline at end of file

From ace0b1db92891b0a9a9d8fd4580aac91e9a7b895 Mon Sep 17 00:00:00 2001
From: Steve Wang <qian.wang.wg24@wharton.upenn.edu>
Date: Mon, 9 Dec 2024 18:10:05 +0800
Subject: [PATCH 35/57] Convert Keccak non memory circuit endianness (#1960)

Was little endian before, now converted to big endian to match with all
other machines.

Simple PR.

---------

Co-authored-by: Leo Alt <leo@powdrlabs.com>
---
 std/machines/hash/mod.asm                     |  1 -
 .../{hash => small_field}/keccakf16.asm       | 55 ++++++++++++++++++-
 std/machines/small_field/mod.asm              |  1 +
 test_data/std/keccakf16_test.asm              | 16 +++---
 4 files changed, 62 insertions(+), 11 deletions(-)
 rename std/machines/{hash => small_field}/keccakf16.asm (84%)

diff --git a/std/machines/hash/mod.asm b/std/machines/hash/mod.asm
index 3b9a215595..0321642f13 100644
--- a/std/machines/hash/mod.asm
+++ b/std/machines/hash/mod.asm
@@ -5,6 +5,5 @@ mod poseidon_bb;
 mod poseidon2_common;
 mod poseidon2_bb;
 mod poseidon2_gl;
-mod keccakf16;
 mod keccakf16_memory;
 mod keccakf32_memory;
diff --git a/std/machines/hash/keccakf16.asm b/std/machines/small_field/keccakf16.asm
similarity index 84%
rename from std/machines/hash/keccakf16.asm
rename to std/machines/small_field/keccakf16.asm
index c8b2b8236b..141efdc231 100644
--- a/std/machines/hash/keccakf16.asm
+++ b/std/machines/small_field/keccakf16.asm
@@ -19,9 +19,60 @@ machine Keccakf16 with
 
     std::check::require_field_bits(16, || "The field modulus should be at least 2^16 - 1 to work in the keccakf16 machine.");
 
-    // Expects input of 25 64-bit numbers decomposed to 25 chunks of 4 16-bit little endian limbs. 
+    // Expects input of 25 64-bit numbers decomposed to 25 chunks of 4 16-bit big endian limbs. Same for output.
     // The output is a_prime_prime_prime_0_0_limbs for the first 4 and a_prime_prime for the rest.
-    operation keccakf16<0> preimage[0], preimage[1], preimage[2], preimage[3], preimage[4], preimage[5], preimage[6], preimage[7], preimage[8], preimage[9], preimage[10], preimage[11], preimage[12], preimage[13], preimage[14], preimage[15], preimage[16], preimage[17], preimage[18], preimage[19], preimage[20], preimage[21], preimage[22], preimage[23], preimage[24], preimage[25], preimage[26], preimage[27], preimage[28], preimage[29], preimage[30], preimage[31], preimage[32], preimage[33], preimage[34], preimage[35], preimage[36], preimage[37], preimage[38], preimage[39], preimage[40], preimage[41], preimage[42], preimage[43], preimage[44], preimage[45], preimage[46], preimage[47], preimage[48], preimage[49], preimage[50], preimage[51], preimage[52], preimage[53], preimage[54], preimage[55], preimage[56], preimage[57], preimage[58], preimage[59], preimage[60], preimage[61], preimage[62], preimage[63], preimage[64], preimage[65], preimage[66], preimage[67], preimage[68], preimage[69], preimage[70], preimage[71], preimage[72], preimage[73], preimage[74], preimage[75], preimage[76], preimage[77], preimage[78], preimage[79], preimage[80], preimage[81], preimage[82], preimage[83], preimage[84], preimage[85], preimage[86], preimage[87], preimage[88], preimage[89], preimage[90], preimage[91], preimage[92], preimage[93], preimage[94], preimage[95], preimage[96], preimage[97], preimage[98], preimage[99] -> a_prime_prime_prime_0_0_limbs[0], a_prime_prime_prime_0_0_limbs[1], a_prime_prime_prime_0_0_limbs[2], a_prime_prime_prime_0_0_limbs[3], a_prime_prime[4], a_prime_prime[5], a_prime_prime[6], a_prime_prime[7], a_prime_prime[8], a_prime_prime[9], a_prime_prime[10], a_prime_prime[11], a_prime_prime[12], a_prime_prime[13], a_prime_prime[14], a_prime_prime[15], a_prime_prime[16], a_prime_prime[17], a_prime_prime[18], a_prime_prime[19], a_prime_prime[20], a_prime_prime[21], a_prime_prime[22], a_prime_prime[23], a_prime_prime[24], a_prime_prime[25], a_prime_prime[26], a_prime_prime[27], a_prime_prime[28], a_prime_prime[29], a_prime_prime[30], a_prime_prime[31], a_prime_prime[32], a_prime_prime[33], a_prime_prime[34], a_prime_prime[35], a_prime_prime[36], a_prime_prime[37], a_prime_prime[38], a_prime_prime[39], a_prime_prime[40], a_prime_prime[41], a_prime_prime[42], a_prime_prime[43], a_prime_prime[44], a_prime_prime[45], a_prime_prime[46], a_prime_prime[47], a_prime_prime[48], a_prime_prime[49], a_prime_prime[50], a_prime_prime[51], a_prime_prime[52], a_prime_prime[53], a_prime_prime[54], a_prime_prime[55], a_prime_prime[56], a_prime_prime[57], a_prime_prime[58], a_prime_prime[59], a_prime_prime[60], a_prime_prime[61], a_prime_prime[62], a_prime_prime[63], a_prime_prime[64], a_prime_prime[65], a_prime_prime[66], a_prime_prime[67], a_prime_prime[68], a_prime_prime[69], a_prime_prime[70], a_prime_prime[71], a_prime_prime[72], a_prime_prime[73], a_prime_prime[74], a_prime_prime[75], a_prime_prime[76], a_prime_prime[77], a_prime_prime[78], a_prime_prime[79], a_prime_prime[80], a_prime_prime[81], a_prime_prime[82], a_prime_prime[83], a_prime_prime[84], a_prime_prime[85], a_prime_prime[86], a_prime_prime[87], a_prime_prime[88], a_prime_prime[89], a_prime_prime[90], a_prime_prime[91], a_prime_prime[92], a_prime_prime[93], a_prime_prime[94], a_prime_prime[95], a_prime_prime[96], a_prime_prime[97], a_prime_prime[98], a_prime_prime[99];
+    operation keccakf16<0> 
+        preimage[3], preimage[2], preimage[1], preimage[0], 
+        preimage[7], preimage[6], preimage[5], preimage[4], 
+        preimage[11], preimage[10], preimage[9], preimage[8], 
+        preimage[15], preimage[14], preimage[13], preimage[12], 
+        preimage[19], preimage[18], preimage[17], preimage[16], 
+        preimage[23], preimage[22], preimage[21], preimage[20], 
+        preimage[27], preimage[26], preimage[25], preimage[24], 
+        preimage[31], preimage[30], preimage[29], preimage[28], 
+        preimage[35], preimage[34], preimage[33], preimage[32], 
+        preimage[39], preimage[38], preimage[37], preimage[36], 
+        preimage[43], preimage[42], preimage[41], preimage[40], 
+        preimage[47], preimage[46], preimage[45], preimage[44], 
+        preimage[51], preimage[50], preimage[49], preimage[48], 
+        preimage[55], preimage[54], preimage[53], preimage[52], 
+        preimage[59], preimage[58], preimage[57], preimage[56], 
+        preimage[63], preimage[62], preimage[61], preimage[60], 
+        preimage[67], preimage[66], preimage[65], preimage[64], 
+        preimage[71], preimage[70], preimage[69], preimage[68], 
+        preimage[75], preimage[74], preimage[73], preimage[72], 
+        preimage[79], preimage[78], preimage[77], preimage[76], 
+        preimage[83], preimage[82], preimage[81], preimage[80], 
+        preimage[87], preimage[86], preimage[85], preimage[84], 
+        preimage[91], preimage[90], preimage[89], preimage[88], 
+        preimage[95], preimage[94], preimage[93], preimage[92], 
+        preimage[99], preimage[98], preimage[97], preimage[96] 
+        ->
+        a_prime_prime_prime_0_0_limbs[3], a_prime_prime_prime_0_0_limbs[2], a_prime_prime_prime_0_0_limbs[1], a_prime_prime_prime_0_0_limbs[0], 
+        a_prime_prime[7], a_prime_prime[6], a_prime_prime[5], a_prime_prime[4], 
+        a_prime_prime[11], a_prime_prime[10], a_prime_prime[9], a_prime_prime[8], 
+        a_prime_prime[15], a_prime_prime[14], a_prime_prime[13], a_prime_prime[12], 
+        a_prime_prime[19], a_prime_prime[18], a_prime_prime[17], a_prime_prime[16], 
+        a_prime_prime[23], a_prime_prime[22], a_prime_prime[21], a_prime_prime[20], 
+        a_prime_prime[27], a_prime_prime[26], a_prime_prime[25], a_prime_prime[24], 
+        a_prime_prime[31], a_prime_prime[30], a_prime_prime[29], a_prime_prime[28], 
+        a_prime_prime[35], a_prime_prime[34], a_prime_prime[33], a_prime_prime[32], 
+        a_prime_prime[39], a_prime_prime[38], a_prime_prime[37], a_prime_prime[36], 
+        a_prime_prime[43], a_prime_prime[42], a_prime_prime[41], a_prime_prime[40], 
+        a_prime_prime[47], a_prime_prime[46], a_prime_prime[45], a_prime_prime[44], 
+        a_prime_prime[51], a_prime_prime[50], a_prime_prime[49], a_prime_prime[48], 
+        a_prime_prime[55], a_prime_prime[54], a_prime_prime[53], a_prime_prime[52], 
+        a_prime_prime[59], a_prime_prime[58], a_prime_prime[57], a_prime_prime[56], 
+        a_prime_prime[63], a_prime_prime[62], a_prime_prime[61], a_prime_prime[60], 
+        a_prime_prime[67], a_prime_prime[66], a_prime_prime[65], a_prime_prime[64], 
+        a_prime_prime[71], a_prime_prime[70], a_prime_prime[69], a_prime_prime[68], 
+        a_prime_prime[75], a_prime_prime[74], a_prime_prime[73], a_prime_prime[72], 
+        a_prime_prime[79], a_prime_prime[78], a_prime_prime[77], a_prime_prime[76], 
+        a_prime_prime[83], a_prime_prime[82], a_prime_prime[81], a_prime_prime[80], 
+        a_prime_prime[87], a_prime_prime[86], a_prime_prime[85], a_prime_prime[84], 
+        a_prime_prime[91], a_prime_prime[90], a_prime_prime[89], a_prime_prime[88], 
+        a_prime_prime[95], a_prime_prime[94], a_prime_prime[93], a_prime_prime[92], 
+        a_prime_prime[99], a_prime_prime[98], a_prime_prime[97], a_prime_prime[96];
 
     col witness operation_id;
 
diff --git a/std/machines/small_field/mod.asm b/std/machines/small_field/mod.asm
index bb71f682e6..a8ee054bc4 100644
--- a/std/machines/small_field/mod.asm
+++ b/std/machines/small_field/mod.asm
@@ -8,3 +8,4 @@ mod memory;
 mod pointer_arith;
 mod rotate;
 mod shift;
+mod keccakf16;
diff --git a/test_data/std/keccakf16_test.asm b/test_data/std/keccakf16_test.asm
index 7b7b30a253..7b06b3a1c7 100644
--- a/test_data/std/keccakf16_test.asm
+++ b/test_data/std/keccakf16_test.asm
@@ -1,4 +1,4 @@
-use std::machines::hash::keccakf16::Keccakf16;
+use std::machines::small_field::keccakf16::Keccakf16;
 
 let main_degree: int = 2**4;
 let keccak_degree: int = 2**6;
@@ -319,14 +319,14 @@ machine Main with degree: main_degree {
 
     function main {
         // 0 for all 25 64-bit inputs except setting the second 64-bit input to 1. All 64-bit inputs in chunks of 4 16-bit little endian limbs.
-        A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40, A41, A42, A43, A44, A45, A46, A47, A48, A49, A50, A51, A52, A53, A54, A55, A56, A57, A58, A59, A60, A61, A62, A63, A64, A65, A66, A67, A68, A69, A70, A71, A72, A73, A74, A75, A76, A77, A78, A79, A80, A81, A82, A83, A84, A85, A86, A87, A88, A89, A90, A91, A92, A93, A94, A95, A96, A97, A98, A99 <== keccakf16(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+        A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40, A41, A42, A43, A44, A45, A46, A47, A48, A49, A50, A51, A52, A53, A54, A55, A56, A57, A58, A59, A60, A61, A62, A63, A64, A65, A66, A67, A68, A69, A70, A71, A72, A73, A74, A75, A76, A77, A78, A79, A80, A81, A82, A83, A84, A85, A86, A87, A88, A89, A90, A91, A92, A93, A94, A95, A96, A97, A98, A99 <== keccakf16(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         // Selectively checking a few registers only.
-        assert_eq A0, 0x405f;
-        assert_eq A3, 0xfdbb;
-        assert_eq A92, 0x8f6e;
-        assert_eq A95, 0x3e10;
-        assert_eq A96, 0xeb35;
-        assert_eq A99, 0xeac9;
+        assert_eq A3, 0x405f;
+        assert_eq A0, 0xfdbb;
+        assert_eq A95, 0x8f6e;
+        assert_eq A92, 0x3e10;
+        assert_eq A99, 0xeb35;
+        assert_eq A96, 0xeac9;
 
         return;
     }

From f1a0c1feedbba5e2300391a03c09afa24bc776b6 Mon Sep 17 00:00:00 2001
From: chriseth <chris@ethereum.org>
Date: Mon, 9 Dec 2024 11:51:42 +0100
Subject: [PATCH 36/57] Paged memory (#2205)

Change memory machine to use pages instead of a "flat" map.
---
 .../double_sorted_witness_machine_32.rs       | 76 +++++++++++++++++--
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/executor/src/witgen/machines/double_sorted_witness_machine_32.rs b/executor/src/witgen/machines/double_sorted_witness_machine_32.rs
index 12901b6870..3423fafbd9 100644
--- a/executor/src/witgen/machines/double_sorted_witness_machine_32.rs
+++ b/executor/src/witgen/machines/double_sorted_witness_machine_32.rs
@@ -11,7 +11,7 @@ use crate::witgen::util::try_to_simple_poly;
 use crate::witgen::{EvalError, EvalResult, FixedData, QueryCallback};
 use crate::witgen::{EvalValue, IncompleteCause};
 
-use powdr_number::{DegreeType, FieldElement};
+use powdr_number::{DegreeType, FieldElement, LargeInt};
 
 use powdr_ast::analyzed::{DegreeRange, PolyID};
 
@@ -54,8 +54,8 @@ pub struct DoubleSortedWitnesses32<'a, T: FieldElement> {
     //witness_positions: HashMap<String, usize>,
     /// (addr, step) -> value
     trace: BTreeMap<(T, T), Operation<T>>,
-    /// A map addr -> value, the current content of the memory.
-    data: BTreeMap<T, T>,
+    /// The current contents of memory.
+    data: PagedData<T>,
     is_initialized: BTreeMap<T, bool>,
     namespace: String,
     name: String,
@@ -419,7 +419,7 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
                 addr,
                 value
             );
-            self.data.insert(addr, value);
+            self.data.write(addr, value);
             self.trace
                 .insert(
                     (addr, step),
@@ -432,14 +432,14 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
                 )
                 .is_none()
         } else {
-            let value = self.data.entry(addr).or_default();
+            let value = self.data.read(addr);
             log::trace!(
                 "Memory read: addr={:x}, step={step}, value={:x}",
                 addr,
                 value
             );
             let ass =
-                (value_expr.clone() - (*value).into()).solve_with_range_constraints(caller_rows)?;
+                (value_expr.clone() - value.into()).solve_with_range_constraints(caller_rows)?;
             assignments.combine(ass);
             self.trace
                 .insert(
@@ -447,7 +447,7 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
                     Operation {
                         is_normal_write,
                         is_bootloader_write,
-                        value: *value,
+                        value,
                         selector_id,
                     },
                 )
@@ -466,3 +466,65 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
         Ok(assignments)
     }
 }
+
+/// A paged key-value store. Addresses do not overlap, every address can store
+/// a full field element.
+struct PagedData<T> {
+    /// All pages except the first.
+    pages: HashMap<u64, Vec<T>>,
+    /// The first page, to optimize for small memory addresses.
+    first_page: Vec<T>,
+}
+
+impl<T: FieldElement> Default for PagedData<T> {
+    fn default() -> Self {
+        Self {
+            pages: Default::default(),
+            first_page: Self::fresh_page(),
+        }
+    }
+}
+
+impl<T: FieldElement> PagedData<T> {
+    /// Tuning parameters.
+    /// On the dev machine, only the combination of "PAGE_SIZE_LOG2 <= 8" and the introduction
+    /// of "page zero" gives a 2x improvement in the register machine (and 20% in regular
+    /// memory as well actually) relative to non-paged.
+    /// This should be continuously monitored.
+    const PAGE_SIZE_LOG2: u64 = 8;
+    const PAGE_SIZE: u64 = (1 << Self::PAGE_SIZE_LOG2);
+    const PAGE_MASK: u64 = Self::PAGE_SIZE - 1;
+
+    fn page_offset(addr: T) -> (u64, usize) {
+        let addr = addr.to_integer().try_into_u64().unwrap();
+        (
+            addr >> Self::PAGE_SIZE_LOG2,
+            (addr & Self::PAGE_MASK) as usize,
+        )
+    }
+
+    fn fresh_page() -> Vec<T> {
+        vec![0.into(); Self::PAGE_SIZE as usize]
+    }
+
+    pub fn write(&mut self, addr: T, value: T) {
+        let (page, offset) = Self::page_offset(addr);
+        if page == 0 {
+            self.first_page[offset] = value;
+        } else {
+            self.pages.entry(page).or_insert_with(Self::fresh_page)[offset] = value;
+        }
+    }
+
+    pub fn read(&mut self, addr: T) -> T {
+        let (page, offset) = Self::page_offset(addr);
+        if page == 0 {
+            self.first_page[offset]
+        } else {
+            self.pages
+                .get(&page)
+                .map(|page| page[offset])
+                .unwrap_or_default()
+        }
+    }
+}

From 8a3e33e07c7e2f180309499bfb36d20cc5f89a3c Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Mon, 9 Dec 2024 12:33:42 +0100
Subject: [PATCH 37/57] Add a simpler `ExpressionEvaluator` (#2194)

This PR:
- Renames the current `executor::witgen::ExpressionEvaluator` to
`executor::witgen::evaluators::partial_expression_evaluator::PartialExpressionEvaluator`
- It is used when solving and evaluates to a
`AffineResult<AlgebraicVariable<'a>, T>`, which might still contain
unknown variables.
- Adds a new `ExpressionEvaluator` that simply evaluates to `T`
- Changes `MockBackend` to use the new `ExpressionEvaluator` (previously
wrapped what is now called the `PartialExpressionEvaluator`)

As a result, the code in `MockBackend` can be simplified. Also, I'm
building on this in #2191 for fast witness generation for the bus.
---
 .../src/mock/connection_constraint_checker.rs |  46 +++---
 backend/src/mock/evaluator.rs                 |  65 ---------
 backend/src/mock/machine.rs                   |  34 ++---
 backend/src/mock/mod.rs                       |   1 -
 .../src/mock/polynomial_constraint_checker.rs |  39 +++--
 .../witgen/evaluators/expression_evaluator.rs | 133 ++++++++++++++++++
 .../{ => evaluators}/fixed_evaluator.rs       |   5 +-
 executor/src/witgen/evaluators/mod.rs         |   5 +
 .../partial_expression_evaluator.rs}          |   9 +-
 .../{ => evaluators}/symbolic_evaluator.rs    |   8 +-
 .../symbolic_witness_evaluator.rs             |  16 +--
 executor/src/witgen/global_constraints.rs     |  14 +-
 .../witgen/machines/sorted_witness_machine.rs |  11 +-
 executor/src/witgen/mod.rs                    |   7 +-
 executor/src/witgen/rows.rs                   |   7 +-
 15 files changed, 233 insertions(+), 167 deletions(-)
 delete mode 100644 backend/src/mock/evaluator.rs
 create mode 100644 executor/src/witgen/evaluators/expression_evaluator.rs
 rename executor/src/witgen/{ => evaluators}/fixed_evaluator.rs (91%)
 create mode 100644 executor/src/witgen/evaluators/mod.rs
 rename executor/src/witgen/{expression_evaluator.rs => evaluators/partial_expression_evaluator.rs} (97%)
 rename executor/src/witgen/{ => evaluators}/symbolic_evaluator.rs (85%)
 rename executor/src/witgen/{ => evaluators}/symbolic_witness_evaluator.rs (87%)

diff --git a/backend/src/mock/connection_constraint_checker.rs b/backend/src/mock/connection_constraint_checker.rs
index 8259ff5642..901f32bad6 100644
--- a/backend/src/mock/connection_constraint_checker.rs
+++ b/backend/src/mock/connection_constraint_checker.rs
@@ -5,6 +5,7 @@ use std::ops::ControlFlow;
 
 use itertools::Itertools;
 use powdr_ast::analyzed::AlgebraicExpression;
+use powdr_ast::analyzed::AlgebraicReference;
 use powdr_ast::analyzed::Analyzed;
 use powdr_ast::analyzed::{
     Identity, LookupIdentity, PermutationIdentity, PhantomLookupIdentity,
@@ -13,15 +14,12 @@ use powdr_ast::analyzed::{
 use powdr_ast::parsed::visitor::ExpressionVisitable;
 use powdr_ast::parsed::visitor::VisitOrder;
 use powdr_backend_utils::referenced_namespaces_algebraic_expression;
-use powdr_executor::witgen::ExpressionEvaluator;
+use powdr_executor::witgen::evaluators::expression_evaluator::ExpressionEvaluator;
+use powdr_executor::witgen::evaluators::expression_evaluator::TraceValues;
 use powdr_number::FieldElement;
 use rayon::iter::IntoParallelIterator;
 use rayon::iter::ParallelIterator;
 
-use crate::mock::evaluator::evaluate_to_fe;
-
-use super::evaluator::EmptyVariables;
-use super::evaluator::Variables;
 use super::machine::Machine;
 
 #[derive(PartialEq, Eq, Debug)]
@@ -256,21 +254,19 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
                 Some(machine) => (0..machine.size)
                     .into_par_iter()
                     .filter_map(|row| {
-                        let variables = Variables {
-                            machine,
-                            row,
-                            challenges: self.challenges,
-                        };
-                        let mut evaluator =
-                            ExpressionEvaluator::new(&variables, &machine.intermediate_definitions);
-                        let result = evaluate_to_fe(&mut evaluator, &selected_expressions.selector);
+                        let mut evaluator = ExpressionEvaluator::new(
+                            machine.trace_values.row(row),
+                            &machine.intermediate_definitions,
+                            self.challenges,
+                        );
+                        let result = evaluator.evaluate(&selected_expressions.selector);
 
                         assert!(result.is_zero() || result.is_one(), "Non-binary selector");
                         result.is_one().then(|| {
                             let values = selected_expressions
                                 .expressions
                                 .iter()
-                                .map(|expression| evaluate_to_fe(&mut evaluator, expression))
+                                .map(|expression| evaluator.evaluate(expression))
                                 .collect::<Vec<_>>();
                             Tuple { values, row }
                         })
@@ -283,8 +279,13 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
             None => {
                 let empty_variables = EmptyVariables {};
                 let empty_definitions = BTreeMap::new();
-                let mut evaluator = ExpressionEvaluator::new(empty_variables, &empty_definitions);
-                let selector_value = evaluate_to_fe(&mut evaluator, &selected_expressions.selector);
+                let empty_challenges = BTreeMap::new();
+                let mut evaluator = ExpressionEvaluator::new(
+                    empty_variables,
+                    &empty_definitions,
+                    &empty_challenges,
+                );
+                let selector_value = evaluator.evaluate(&selected_expressions.selector);
 
                 match selector_value.to_degree() {
                     // Selected expressions is of the form `0 $ [ <constants> ]`
@@ -307,7 +308,7 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
                         let values = selected_expressions
                             .expressions
                             .iter()
-                            .map(|expression| evaluate_to_fe(&mut evaluator, expression))
+                            .map(|expression| evaluator.evaluate(expression))
                             .collect::<Vec<_>>();
                         vec![Tuple { values, row: 0 }]
                     }
@@ -318,6 +319,17 @@ impl<'a, F: FieldElement> ConnectionConstraintChecker<'a, F> {
     }
 }
 
+struct EmptyVariables;
+
+impl<T> TraceValues<T> for EmptyVariables
+where
+    T: FieldElement,
+{
+    fn get(&self, _reference: &AlgebraicReference) -> T {
+        panic!()
+    }
+}
+
 /// Converts a slice to a multi-set, represented as a map from elements to their count.
 fn to_multi_set<T: Ord>(a: &[T]) -> BTreeMap<&T, usize> {
     a.iter()
diff --git a/backend/src/mock/evaluator.rs b/backend/src/mock/evaluator.rs
deleted file mode 100644
index 11da31ffaa..0000000000
--- a/backend/src/mock/evaluator.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-use std::collections::BTreeMap;
-
-use powdr_ast::analyzed::{AlgebraicExpression, Challenge, PolynomialType};
-use powdr_executor::witgen::{
-    AffineExpression, AffineResult, AlgebraicVariable, ExpressionEvaluator, SymbolicVariables,
-};
-use powdr_number::FieldElement;
-
-use super::machine::Machine;
-
-pub struct Variables<'a, F> {
-    pub machine: &'a Machine<'a, F>,
-    pub row: usize,
-    pub challenges: &'a BTreeMap<u64, F>,
-}
-
-impl<'a, F: FieldElement> Variables<'a, F> {
-    pub fn constant_value(&self, var: AlgebraicVariable) -> F {
-        match var {
-            AlgebraicVariable::Column(column) => match column.poly_id.ptype {
-                PolynomialType::Committed | PolynomialType::Constant => {
-                    let column_values = self.machine.columns.get(&column.poly_id).unwrap();
-                    let row = (self.row + column.next as usize) % column_values.len();
-                    column_values[row]
-                }
-                PolynomialType::Intermediate => unreachable!(
-                    "Intermediate polynomials should have been handled by ExpressionEvaluator"
-                ),
-            },
-            AlgebraicVariable::Public(_) => todo!(),
-        }
-    }
-}
-
-impl<'a, F: FieldElement> SymbolicVariables<F> for &Variables<'a, F> {
-    fn value<'b>(&self, var: AlgebraicVariable<'b>) -> AffineResult<AlgebraicVariable<'b>, F> {
-        Ok(self.constant_value(var).into())
-    }
-
-    fn challenge<'b>(&self, challenge: &'b Challenge) -> AffineResult<AlgebraicVariable<'b>, F> {
-        Ok(self.challenges[&challenge.id].into())
-    }
-}
-
-pub struct EmptyVariables;
-
-impl<T> SymbolicVariables<T> for EmptyVariables
-where
-    T: FieldElement,
-{
-    fn value<'a>(&self, _var: AlgebraicVariable<'a>) -> AffineResult<AlgebraicVariable<'a>, T> {
-        unimplemented!()
-    }
-}
-
-pub fn evaluate_to_fe<'a, F: FieldElement, SV: SymbolicVariables<F>>(
-    evaluator: &mut ExpressionEvaluator<'a, F, SV>,
-    expr: &'a AlgebraicExpression<F>,
-) -> F {
-    let result = evaluator.evaluate(expr).unwrap();
-    match result {
-        AffineExpression::Constant(c) => c,
-        _ => unreachable!("Unexpected result: {:?}", result),
-    }
-}
diff --git a/backend/src/mock/machine.rs b/backend/src/mock/machine.rs
index 1e634224b0..93742e58ba 100644
--- a/backend/src/mock/machine.rs
+++ b/backend/src/mock/machine.rs
@@ -1,16 +1,19 @@
 use std::collections::BTreeMap;
 
 use itertools::Itertools;
-use powdr_ast::analyzed::{AlgebraicExpression, AlgebraicReferenceThin, Analyzed, PolyID};
+use powdr_ast::analyzed::{AlgebraicExpression, AlgebraicReferenceThin, Analyzed};
 use powdr_backend_utils::{machine_fixed_columns, machine_witness_columns};
-use powdr_executor::{constant_evaluator::VariablySizedColumn, witgen::WitgenCallback};
+use powdr_executor::{
+    constant_evaluator::VariablySizedColumn,
+    witgen::{evaluators::expression_evaluator::OwnedTraceValues, WitgenCallback},
+};
 use powdr_number::{DegreeType, FieldElement};
 
 /// A collection of columns with self-contained constraints.
 pub struct Machine<'a, F> {
     pub machine_name: String,
     pub size: usize,
-    pub columns: BTreeMap<PolyID, Vec<F>>,
+    pub trace_values: OwnedTraceValues<F>,
     pub pil: &'a Analyzed<F>,
     pub intermediate_definitions: BTreeMap<AlgebraicReferenceThin, AlgebraicExpression<F>>,
 }
@@ -46,31 +49,20 @@ impl<'a, F: FieldElement> Machine<'a, F> {
 
         let fixed = machine_fixed_columns(fixed, pil);
         let fixed = fixed.get(&(size as DegreeType)).unwrap();
+        let fixed = fixed
+            .iter()
+            // TODO: Avoid clone?
+            .map(|(name, col)| (name.clone(), col.to_vec()))
+            .collect::<Vec<_>>();
 
         let intermediate_definitions = pil.intermediate_definitions();
 
-        let mut columns_by_name = witness
-            .into_iter()
-            // TODO: Avoid clone?
-            .chain(fixed.iter().map(|(name, col)| (name.clone(), col.to_vec())))
-            .collect::<BTreeMap<_, _>>();
-
-        let columns = pil
-            .committed_polys_in_source_order()
-            .chain(pil.constant_polys_in_source_order())
-            .flat_map(|(symbol, _)| symbol.array_elements())
-            .map(|(name, poly_id)| {
-                let column = columns_by_name
-                    .remove(&name)
-                    .unwrap_or_else(|| panic!("Missing column: {name}"));
-                (poly_id, column)
-            })
-            .collect();
+        let trace_values = OwnedTraceValues::new(pil, witness, fixed);
 
         Some(Self {
             machine_name,
             size,
-            columns,
+            trace_values,
             pil,
             intermediate_definitions,
         })
diff --git a/backend/src/mock/mod.rs b/backend/src/mock/mod.rs
index 6b37fd1dd6..29e9c62d5e 100644
--- a/backend/src/mock/mod.rs
+++ b/backend/src/mock/mod.rs
@@ -20,7 +20,6 @@ use powdr_number::{DegreeType, FieldElement};
 use crate::{Backend, BackendFactory, BackendOptions, Error, Proof};
 
 mod connection_constraint_checker;
-mod evaluator;
 mod machine;
 mod polynomial_constraint_checker;
 
diff --git a/backend/src/mock/polynomial_constraint_checker.rs b/backend/src/mock/polynomial_constraint_checker.rs
index 194f246c9b..82bd300266 100644
--- a/backend/src/mock/polynomial_constraint_checker.rs
+++ b/backend/src/mock/polynomial_constraint_checker.rs
@@ -1,15 +1,13 @@
 use std::{collections::BTreeMap, fmt};
 
 use powdr_ast::{
-    analyzed::{Identity, PolynomialIdentity},
+    analyzed::{AlgebraicExpression, Identity, PolynomialIdentity},
     parsed::visitor::AllChildren,
 };
-use powdr_executor::witgen::{AffineExpression, AlgebraicVariable, ExpressionEvaluator};
+use powdr_executor::witgen::evaluators::expression_evaluator::ExpressionEvaluator;
 use powdr_number::FieldElement;
 use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
-use crate::mock::evaluator::Variables;
-
 use super::machine::Machine;
 
 pub struct PolynomialConstraintChecker<'a, F> {
@@ -55,13 +53,11 @@ impl<'a, F: FieldElement> PolynomialConstraintChecker<'a, F> {
         row: usize,
         identities: &[&'a Identity<F>],
     ) -> Vec<FailingPolynomialConstraint<'a, F>> {
-        let variables = Variables {
-            machine: self.machine,
-            row,
-            challenges: self.challenges,
-        };
-        let mut evaluator =
-            ExpressionEvaluator::new(&variables, &self.machine.intermediate_definitions);
+        let mut evaluator = ExpressionEvaluator::new(
+            self.machine.trace_values.row(row),
+            &self.machine.intermediate_definitions,
+            self.challenges,
+        );
         identities
             .iter()
             .filter_map(|identity| {
@@ -69,21 +65,22 @@ impl<'a, F: FieldElement> PolynomialConstraintChecker<'a, F> {
                     Identity::Polynomial(polynomial_identity) => polynomial_identity,
                     _ => unreachable!("Unexpected identity: {}", identity),
                 };
-                let result = evaluator.evaluate(&identity.expression).unwrap();
-                let result = match result {
-                    AffineExpression::Constant(c) => c,
-                    _ => unreachable!("Unexpected result: {:?}", result),
-                };
+                let result = evaluator.evaluate(&identity.expression);
 
                 if result != F::zero() {
-                    let used_variables = identity
-                        .all_children()
-                        .filter_map(|child| child.try_into().ok());
+                    let used_variables = identity.all_children().filter(|expr| match expr {
+                        AlgebraicExpression::Reference(_)
+                        | AlgebraicExpression::PublicReference(_)
+                        | AlgebraicExpression::Challenge(_) => true,
+                        AlgebraicExpression::Number(_)
+                        | AlgebraicExpression::BinaryOperation(_)
+                        | AlgebraicExpression::UnaryOperation(_) => false,
+                    });
                     Some(FailingPolynomialConstraint {
                         row,
                         identity,
                         assignments: used_variables
-                            .map(|variable| (variable, variables.constant_value(variable)))
+                            .map(|variable| (variable, evaluator.evaluate(variable)))
                             .collect(),
                     })
                 } else {
@@ -97,7 +94,7 @@ impl<'a, F: FieldElement> PolynomialConstraintChecker<'a, F> {
 struct FailingPolynomialConstraint<'a, F> {
     row: usize,
     identity: &'a PolynomialIdentity<F>,
-    assignments: BTreeMap<AlgebraicVariable<'a>, F>,
+    assignments: BTreeMap<&'a AlgebraicExpression<F>, F>,
 }
 
 impl<F: fmt::Display> fmt::Display for FailingPolynomialConstraint<'_, F> {
diff --git a/executor/src/witgen/evaluators/expression_evaluator.rs b/executor/src/witgen/evaluators/expression_evaluator.rs
new file mode 100644
index 0000000000..d992749290
--- /dev/null
+++ b/executor/src/witgen/evaluators/expression_evaluator.rs
@@ -0,0 +1,133 @@
+use std::collections::BTreeMap;
+
+use powdr_ast::analyzed::{
+    AlgebraicBinaryOperation, AlgebraicBinaryOperator, AlgebraicExpression as Expression,
+    AlgebraicReference, AlgebraicReferenceThin, AlgebraicUnaryOperation, AlgebraicUnaryOperator,
+    Analyzed, PolyID, PolynomialType,
+};
+use powdr_number::FieldElement;
+
+/// Accessor for trace values.
+pub trait TraceValues<T> {
+    fn get(&self, poly_id: &AlgebraicReference) -> T;
+}
+
+/// A simple container for trace values.
+pub struct OwnedTraceValues<T> {
+    pub values: BTreeMap<PolyID, Vec<T>>,
+}
+
+/// A view into the trace values for a single row.
+pub struct RowTraceValues<'a, T> {
+    trace: &'a OwnedTraceValues<T>,
+    row: usize,
+}
+
+impl<T> OwnedTraceValues<T> {
+    pub fn new(
+        pil: &Analyzed<T>,
+        witness_columns: Vec<(String, Vec<T>)>,
+        fixed_columns: Vec<(String, Vec<T>)>,
+    ) -> Self {
+        let mut columns_by_name = witness_columns
+            .into_iter()
+            .chain(fixed_columns)
+            .collect::<BTreeMap<_, _>>();
+        let values = pil
+            .committed_polys_in_source_order()
+            .chain(pil.constant_polys_in_source_order())
+            .flat_map(|(symbol, _)| symbol.array_elements())
+            .map(|(name, poly_id)| {
+                let column = columns_by_name
+                    .remove(&name)
+                    .unwrap_or_else(|| panic!("Missing column: {name}"));
+                (poly_id, column)
+            })
+            .collect();
+        Self { values }
+    }
+
+    pub fn row(&self, row: usize) -> RowTraceValues<T> {
+        RowTraceValues { trace: self, row }
+    }
+}
+
+impl<'a, F: FieldElement> TraceValues<F> for RowTraceValues<'a, F> {
+    fn get(&self, column: &AlgebraicReference) -> F {
+        match column.poly_id.ptype {
+            PolynomialType::Committed | PolynomialType::Constant => {
+                let column_values = self.trace.values.get(&column.poly_id).unwrap();
+                let row = (self.row + column.next as usize) % column_values.len();
+                column_values[row]
+            }
+            PolynomialType::Intermediate => unreachable!(
+                "Intermediate polynomials should have been handled by ExpressionEvaluator"
+            ),
+        }
+    }
+}
+
+/// Evaluates an algebraic expression to a value.
+pub struct ExpressionEvaluator<'a, T, SV> {
+    trace_values: SV,
+    intermediate_definitions: &'a BTreeMap<AlgebraicReferenceThin, Expression<T>>,
+    challenges: &'a BTreeMap<u64, T>,
+    /// Maps intermediate reference to their evaluation. Updated throughout the lifetime of the
+    /// ExpressionEvaluator.
+    intermediates_cache: BTreeMap<AlgebraicReferenceThin, T>,
+}
+
+impl<'a, T, TV> ExpressionEvaluator<'a, T, TV>
+where
+    TV: TraceValues<T>,
+    T: FieldElement,
+{
+    pub fn new(
+        variables: TV,
+        intermediate_definitions: &'a BTreeMap<AlgebraicReferenceThin, Expression<T>>,
+        challenges: &'a BTreeMap<u64, T>,
+    ) -> Self {
+        Self {
+            trace_values: variables,
+            intermediate_definitions,
+            challenges,
+            intermediates_cache: Default::default(),
+        }
+    }
+
+    pub fn evaluate(&mut self, expr: &'a Expression<T>) -> T {
+        match expr {
+            Expression::Reference(reference) => match reference.poly_id.ptype {
+                PolynomialType::Committed => self.trace_values.get(reference),
+                PolynomialType::Constant => self.trace_values.get(reference),
+                PolynomialType::Intermediate => {
+                    let reference = reference.to_thin();
+                    let value = self.intermediates_cache.get(&reference).cloned();
+                    match value {
+                        Some(v) => v,
+                        None => {
+                            let definition = self.intermediate_definitions.get(&reference).unwrap();
+                            let result = self.evaluate(definition);
+                            self.intermediates_cache.insert(reference, result);
+                            result
+                        }
+                    }
+                }
+            },
+            Expression::PublicReference(_public) => unimplemented!(),
+            Expression::Number(n) => *n,
+            Expression::BinaryOperation(AlgebraicBinaryOperation { left, op, right }) => match op {
+                AlgebraicBinaryOperator::Add => self.evaluate(left) + self.evaluate(right),
+                AlgebraicBinaryOperator::Sub => self.evaluate(left) - self.evaluate(right),
+                AlgebraicBinaryOperator::Mul => self.evaluate(left) * self.evaluate(right),
+                AlgebraicBinaryOperator::Pow => {
+                    self.evaluate(left).pow(self.evaluate(right).to_integer())
+                }
+            },
+            Expression::UnaryOperation(AlgebraicUnaryOperation { op, expr }) => match op {
+                AlgebraicUnaryOperator::Minus => self.evaluate(expr),
+            },
+            Expression::Challenge(challenge) => self.challenges[&challenge.id],
+        }
+    }
+}
diff --git a/executor/src/witgen/fixed_evaluator.rs b/executor/src/witgen/evaluators/fixed_evaluator.rs
similarity index 91%
rename from executor/src/witgen/fixed_evaluator.rs
rename to executor/src/witgen/evaluators/fixed_evaluator.rs
index d147a203a1..4802d19016 100644
--- a/executor/src/witgen/fixed_evaluator.rs
+++ b/executor/src/witgen/evaluators/fixed_evaluator.rs
@@ -1,8 +1,7 @@
-use super::affine_expression::{AffineResult, AlgebraicVariable};
-use super::expression_evaluator::SymbolicVariables;
-use super::FixedData;
 use powdr_number::{DegreeType, FieldElement};
 
+use crate::witgen::{AffineResult, AlgebraicVariable, FixedData, SymbolicVariables};
+
 /// Evaluates only fixed columns on a specific row.
 pub struct FixedEvaluator<'a, T: FieldElement> {
     fixed_data: &'a FixedData<'a, T>,
diff --git a/executor/src/witgen/evaluators/mod.rs b/executor/src/witgen/evaluators/mod.rs
new file mode 100644
index 0000000000..7a34837256
--- /dev/null
+++ b/executor/src/witgen/evaluators/mod.rs
@@ -0,0 +1,5 @@
+pub mod expression_evaluator;
+pub mod fixed_evaluator;
+pub mod partial_expression_evaluator;
+pub mod symbolic_evaluator;
+pub mod symbolic_witness_evaluator;
diff --git a/executor/src/witgen/expression_evaluator.rs b/executor/src/witgen/evaluators/partial_expression_evaluator.rs
similarity index 97%
rename from executor/src/witgen/expression_evaluator.rs
rename to executor/src/witgen/evaluators/partial_expression_evaluator.rs
index 908584cd04..96656d4286 100644
--- a/executor/src/witgen/expression_evaluator.rs
+++ b/executor/src/witgen/evaluators/partial_expression_evaluator.rs
@@ -8,10 +8,7 @@ use powdr_ast::analyzed::{
 
 use powdr_number::FieldElement;
 
-use super::{
-    affine_expression::{AffineResult, AlgebraicVariable},
-    IncompleteCause,
-};
+use crate::witgen::{AffineResult, AlgebraicVariable, IncompleteCause};
 
 pub trait SymbolicVariables<T> {
     /// Value of a polynomial (fixed or witness) or public.
@@ -24,7 +21,7 @@ pub trait SymbolicVariables<T> {
     }
 }
 
-pub struct ExpressionEvaluator<'a, T, SV> {
+pub struct PartialExpressionEvaluator<'a, T, SV> {
     variables: SV,
     intermediate_definitions: &'a BTreeMap<AlgebraicReferenceThin, Expression<T>>,
     /// Maps intermediate reference to their evaluation. Updated throughout the lifetime of the
@@ -32,7 +29,7 @@ pub struct ExpressionEvaluator<'a, T, SV> {
     intermediates_cache: BTreeMap<AlgebraicReferenceThin, AffineResult<AlgebraicVariable<'a>, T>>,
 }
 
-impl<'a, T, SV> ExpressionEvaluator<'a, T, SV>
+impl<'a, T, SV> PartialExpressionEvaluator<'a, T, SV>
 where
     SV: SymbolicVariables<T>,
     T: FieldElement,
diff --git a/executor/src/witgen/symbolic_evaluator.rs b/executor/src/witgen/evaluators/symbolic_evaluator.rs
similarity index 85%
rename from executor/src/witgen/symbolic_evaluator.rs
rename to executor/src/witgen/evaluators/symbolic_evaluator.rs
index a7d79f48ab..e2e0f86592 100644
--- a/executor/src/witgen/symbolic_evaluator.rs
+++ b/executor/src/witgen/evaluators/symbolic_evaluator.rs
@@ -1,9 +1,9 @@
-use super::affine_expression::{AffineExpression, AffineResult, AlgebraicVariable};
-use super::expression_evaluator::SymbolicVariables;
-use super::IncompleteCause;
-
 use powdr_number::FieldElement;
 
+use crate::witgen::{AffineExpression, AffineResult, AlgebraicVariable, IncompleteCause};
+
+use super::partial_expression_evaluator::SymbolicVariables;
+
 /// A purely symbolic evaluator, uses AlgebraicReference as keys
 /// and neither resolves fixed columns nor witness columns.
 #[derive(Clone, Default)]
diff --git a/executor/src/witgen/symbolic_witness_evaluator.rs b/executor/src/witgen/evaluators/symbolic_witness_evaluator.rs
similarity index 87%
rename from executor/src/witgen/symbolic_witness_evaluator.rs
rename to executor/src/witgen/evaluators/symbolic_witness_evaluator.rs
index 7724eef122..fe665a3344 100644
--- a/executor/src/witgen/symbolic_witness_evaluator.rs
+++ b/executor/src/witgen/evaluators/symbolic_witness_evaluator.rs
@@ -1,11 +1,9 @@
 use powdr_ast::analyzed::{Challenge, PolynomialType};
 use powdr_number::{DegreeType, FieldElement};
 
-use super::{
-    affine_expression::{AffineResult, AlgebraicVariable},
-    expression_evaluator::SymbolicVariables,
-    FixedData,
-};
+use crate::witgen::{AffineResult, AlgebraicVariable, FixedData};
+
+use super::partial_expression_evaluator::SymbolicVariables;
 
 pub trait WitnessColumnEvaluator<T> {
     /// Returns a symbolic or concrete value for the given witness column and next flag.
@@ -14,9 +12,11 @@ pub trait WitnessColumnEvaluator<T> {
     fn value<'b>(&self, poly: AlgebraicVariable<'b>) -> AffineResult<AlgebraicVariable<'b>, T>;
 }
 
-/// An evaluator (to be used together with ExpressionEvaluator) that performs concrete
-/// evaluation of all fixed columns but falls back to a generic WitnessColumnEvaluator
-/// to evaluate the witness columns either symbolically or concretely.
+/// An evaluator to be used together with ExpressionEvaluator.
+///
+/// Performs concrete evaluation of all fixed columns but falls back to a
+/// generic WitnessColumnEvaluator to evaluate the witness columns either
+/// symbolically or concretely.
 pub struct SymbolicWitnessEvaluator<'a, T: FieldElement, WA: WitnessColumnEvaluator<T>> {
     fixed_data: &'a FixedData<'a, T>,
     row: DegreeType,
diff --git a/executor/src/witgen/global_constraints.rs b/executor/src/witgen/global_constraints.rs
index b82ef935df..32fd6ef6af 100644
--- a/executor/src/witgen/global_constraints.rs
+++ b/executor/src/witgen/global_constraints.rs
@@ -16,10 +16,10 @@ use crate::witgen::data_structures::column_map::{FixedColumnMap, WitnessColumnMa
 use crate::Identity;
 
 use super::affine_expression::AlgebraicVariable;
-use super::expression_evaluator::ExpressionEvaluator;
+use super::evaluators::partial_expression_evaluator::PartialExpressionEvaluator;
+use super::evaluators::symbolic_evaluator::SymbolicEvaluator;
 use super::machines::Connection;
 use super::range_constraints::RangeConstraint;
-use super::symbolic_evaluator::SymbolicEvaluator;
 use super::util::try_to_simple_poly;
 use super::{Constraint, FixedData};
 use powdr_ast::analyzed::AlgebraicExpression;
@@ -363,7 +363,8 @@ fn is_binary_constraint<T: FieldElement>(
         right,
     }) = expr
     {
-        let mut evaluator = ExpressionEvaluator::new(SymbolicEvaluator, intermediate_definitions);
+        let mut evaluator =
+            PartialExpressionEvaluator::new(SymbolicEvaluator, intermediate_definitions);
         let left_root = evaluator.evaluate(left).ok().and_then(|l| l.solve().ok())?;
         let right_root = evaluator
             .evaluate(right)
@@ -397,9 +398,10 @@ fn try_transfer_constraints<T: FieldElement>(
         return vec![];
     }
 
-    let Some(aff_expr) = ExpressionEvaluator::new(SymbolicEvaluator, intermediate_definitions)
-        .evaluate(expr)
-        .ok()
+    let Some(aff_expr) =
+        PartialExpressionEvaluator::new(SymbolicEvaluator, intermediate_definitions)
+            .evaluate(expr)
+            .ok()
     else {
         return vec![];
     };
diff --git a/executor/src/witgen/machines/sorted_witness_machine.rs b/executor/src/witgen/machines/sorted_witness_machine.rs
index 56cb4e577a..ea8ee8ef2a 100644
--- a/executor/src/witgen/machines/sorted_witness_machine.rs
+++ b/executor/src/witgen/machines/sorted_witness_machine.rs
@@ -5,11 +5,10 @@ use super::{Connection, EvalResult, FixedData};
 use super::{Machine, MachineParts};
 use crate::witgen::affine_expression::AlgebraicVariable;
 use crate::witgen::data_structures::mutable_state::MutableState;
+use crate::witgen::evaluators::fixed_evaluator::FixedEvaluator;
+use crate::witgen::evaluators::partial_expression_evaluator::PartialExpressionEvaluator;
+use crate::witgen::evaluators::symbolic_evaluator::SymbolicEvaluator;
 use crate::witgen::rows::RowPair;
-use crate::witgen::{
-    expression_evaluator::ExpressionEvaluator, fixed_evaluator::FixedEvaluator,
-    symbolic_evaluator::SymbolicEvaluator,
-};
 use crate::witgen::{EvalValue, IncompleteCause, QueryCallback};
 use crate::Identity;
 use itertools::Itertools;
@@ -138,7 +137,7 @@ fn check_identity<T: FieldElement>(
     for row in 0..(degree as usize) {
         let fixed_evaluator = FixedEvaluator::new(fixed_data, row, degree);
         let mut ev =
-            ExpressionEvaluator::new(fixed_evaluator, &fixed_data.intermediate_definitions);
+            PartialExpressionEvaluator::new(fixed_evaluator, &fixed_data.intermediate_definitions);
         let degree = degree as usize;
         let nl = ev.evaluate(not_last).ok()?.constant_value()?;
         if (row == degree - 1 && !nl.is_zero()) || (row < degree - 1 && !nl.is_one()) {
@@ -159,7 +158,7 @@ fn check_constraint<T: FieldElement>(
     constraint: &Expression<T>,
 ) -> Option<PolyID> {
     let sort_constraint =
-        match ExpressionEvaluator::new(SymbolicEvaluator, &fixed.intermediate_definitions)
+        match PartialExpressionEvaluator::new(SymbolicEvaluator, &fixed.intermediate_definitions)
             .evaluate(constraint)
         {
             Ok(c) => c,
diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
index 5640c38e8e..9579bc437e 100644
--- a/executor/src/witgen/mod.rs
+++ b/executor/src/witgen/mod.rs
@@ -29,8 +29,7 @@ pub(crate) mod analysis;
 mod block_processor;
 mod data_structures;
 mod eval_result;
-mod expression_evaluator;
-pub mod fixed_evaluator;
+pub mod evaluators;
 mod global_constraints;
 mod identity_processor;
 mod machines;
@@ -39,13 +38,11 @@ mod query_processor;
 mod range_constraints;
 mod rows;
 mod sequence_iterator;
-pub mod symbolic_evaluator;
-mod symbolic_witness_evaluator;
 mod util;
 mod vm_processor;
 
 pub use affine_expression::{AffineExpression, AffineResult, AlgebraicVariable};
-pub use expression_evaluator::{ExpressionEvaluator, SymbolicVariables};
+pub use evaluators::partial_expression_evaluator::{PartialExpressionEvaluator, SymbolicVariables};
 
 static OUTER_CODE_NAME: &str = "witgen (outer code)";
 static RANGE_CONSTRAINT_MULTIPLICITY_WITGEN: &str = "range constraint multiplicity witgen";
diff --git a/executor/src/witgen/rows.rs b/executor/src/witgen/rows.rs
index e7a5b95f19..ec1272cf28 100644
--- a/executor/src/witgen/rows.rs
+++ b/executor/src/witgen/rows.rs
@@ -13,12 +13,11 @@ use crate::witgen::Constraint;
 use super::{
     affine_expression::{AffineExpression, AffineResult, AlgebraicVariable},
     data_structures::column_map::WitnessColumnMap,
-    expression_evaluator::ExpressionEvaluator,
+    evaluators::symbolic_witness_evaluator::{SymbolicWitnessEvaluator, WitnessColumnEvaluator},
     global_constraints::RangeConstraintSet,
     machines::MachineParts,
     range_constraints::RangeConstraint,
-    symbolic_witness_evaluator::{SymbolicWitnessEvaluator, WitnessColumnEvaluator},
-    FixedData,
+    FixedData, PartialExpressionEvaluator,
 };
 
 /// A small wrapper around a row index, which knows the total number of rows.
@@ -472,7 +471,7 @@ impl<'row, 'a, T: FieldElement> RowPair<'row, 'a, T> {
         // Note that because we instantiate a fresh evaluator here, we don't benefit from caching
         // of intermediate values across calls of `RowPair::evaluate`. In practice, we only call
         // it many times for the same RowPair though.
-        ExpressionEvaluator::new(variables, &self.fixed_data.intermediate_definitions)
+        PartialExpressionEvaluator::new(variables, &self.fixed_data.intermediate_definitions)
             .evaluate(expr)
     }
 }

From dbc53c76a67bf4499f29fae09d39a3a496c807d2 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Mon, 9 Dec 2024 14:35:23 +0100
Subject: [PATCH 38/57] Hand-written bus witness generation (#2191)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds on #2194 and #2183.

This PR gives us (relatively) fast witness generation for the bus, by
writing custom code instead of relying on the generic solver + prover
functions:
```
$ cargo run -r --features plonky3 --bin powdr-rs compile riscv/tests/riscv_data/keccak-o output --max-degree-log 18 --field gl
$ cargo run -r --features plonky3 pil output/$TEST.asm -o output -f --field gl --prove-with mock --linker-mode bus
...
Running main machine for 262144 rows
[00:00:05 (ETA: 00:00:05)] █████████░░░░░░░░░░░ 48% - 24283 rows/s, 3169k identities/s, 92% progress
Found loop with period 1 starting at row 127900
[00:00:05 (ETA: 00:00:00)] ████████████████████ 100% - 151125 rows/s, 16170k identities/s, 100% progress
Witness generation took 5.748081s
Writing output/commits.bin.
Backend setup for mock...
Setup took 0.54769236s
Generating later-stage witnesses took 0.29s
Proof generation took 2.0383847s
```

On `main`, second-stage witgen for the main machine alone takes about 5
minutes.
---
 backend/src/mock/mod.rs                       |  14 +-
 executor/src/witgen/bus_accumulator/fp2.rs    | 169 +++++++++++++++++
 executor/src/witgen/bus_accumulator/mod.rs    | 179 ++++++++++++++++++
 .../witgen/evaluators/expression_evaluator.rs |  11 +-
 executor/src/witgen/mod.rs                    |  38 +++-
 5 files changed, 395 insertions(+), 16 deletions(-)
 create mode 100644 executor/src/witgen/bus_accumulator/fp2.rs
 create mode 100644 executor/src/witgen/bus_accumulator/mod.rs

diff --git a/backend/src/mock/mod.rs b/backend/src/mock/mod.rs
index 29e9c62d5e..63e993c9da 100644
--- a/backend/src/mock/mod.rs
+++ b/backend/src/mock/mod.rs
@@ -16,6 +16,7 @@ use powdr_ast::{
 };
 use powdr_executor::{constant_evaluator::VariablySizedColumn, witgen::WitgenCallback};
 use powdr_number::{DegreeType, FieldElement};
+use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 
 use crate::{Backend, BackendFactory, BackendOptions, Error, Proof};
 
@@ -103,12 +104,11 @@ impl<F: FieldElement> Backend<F> for MockBackend<F> {
             })
             .collect::<BTreeMap<_, _>>();
 
+        let start = std::time::Instant::now();
         let machines = self
             .machine_to_pil
-            // TODO: We should probably iterate in parallel, because Machine::try_new might generate
-            // later-stage witnesses, which is expensive.
-            // However, for now, doing it sequentially simplifies debugging.
-            .iter()
+            // Machine::try_new generates any second-stage witnesses, so better to do it in parallel.
+            .par_iter()
             .filter_map(|(machine_name, pil)| {
                 Machine::try_new(
                     machine_name.clone(),
@@ -121,6 +121,12 @@ impl<F: FieldElement> Backend<F> for MockBackend<F> {
             })
             .map(|machine| (machine.machine_name.clone(), machine))
             .collect::<BTreeMap<_, _>>();
+        if !challenges.is_empty() {
+            log::info!(
+                "Generating later-stage witnesses took {:.2}s",
+                start.elapsed().as_secs_f32()
+            );
+        }
 
         let is_ok = machines.values().all(|machine| {
             !PolynomialConstraintChecker::new(machine, &challenges)
diff --git a/executor/src/witgen/bus_accumulator/fp2.rs b/executor/src/witgen/bus_accumulator/fp2.rs
new file mode 100644
index 0000000000..73e101bfce
--- /dev/null
+++ b/executor/src/witgen/bus_accumulator/fp2.rs
@@ -0,0 +1,169 @@
+use std::{
+    iter::Sum,
+    ops::{Add, Div, Mul, Sub},
+};
+
+use num_traits::{One, Zero};
+use powdr_number::FieldElement;
+
+/// An implementation of Fp2, analogous to `std/math/fp2.asm`.
+
+/// An Fp2 element. The tuple (a, b) represents the polynomial a + b * X.
+/// All computations are done modulo the irreducible polynomial X^2 - 11.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Fp2<T>(pub T, pub T);
+
+impl<T: FieldElement> Fp2<T> {
+    pub fn new(a: T, b: T) -> Self {
+        Fp2(a, b)
+    }
+}
+
+impl<T: FieldElement> Zero for Fp2<T> {
+    fn zero() -> Self {
+        Fp2(T::zero(), T::zero())
+    }
+
+    fn is_zero(&self) -> bool {
+        self.0.is_zero() && self.1.is_zero()
+    }
+}
+
+impl<T: FieldElement> One for Fp2<T> {
+    fn one() -> Self {
+        Fp2(T::one(), T::zero())
+    }
+
+    fn is_one(&self) -> bool {
+        self.0.is_one() && self.1.is_zero()
+    }
+}
+
+impl<T: FieldElement> From<T> for Fp2<T> {
+    fn from(a: T) -> Self {
+        Fp2(a, T::zero())
+    }
+}
+
+impl<T: FieldElement> Add for Fp2<T> {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Fp2(self.0 + other.0, self.1 + other.1)
+    }
+}
+
+impl<T: FieldElement> Sum for Fp2<T> {
+    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+        iter.fold(Self::zero(), Add::add)
+    }
+}
+
+impl<T: FieldElement> Sub for Fp2<T> {
+    type Output = Self;
+
+    fn sub(self, other: Self) -> Self {
+        Fp2(self.0 - other.0, self.1 - other.1)
+    }
+}
+
+impl<T: FieldElement> Mul for Fp2<T> {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp2(
+            self.0 * other.0 + self.1 * other.1 * T::from(11),
+            self.1 * other.0 + self.0 * other.1,
+        )
+    }
+}
+
+impl<T: FieldElement> Mul<T> for Fp2<T> {
+    type Output = Self;
+
+    fn mul(self, other: T) -> Self {
+        Fp2(self.0 * other, self.1 * other)
+    }
+}
+
+impl<T: FieldElement> Fp2<T> {
+    pub fn inverse(self) -> Self {
+        let inv = T::from(1) / (self.0 * self.0 - self.1 * self.1 * T::from(11));
+        Fp2(self.0 * inv, -self.1 * inv)
+    }
+}
+
+impl<T: FieldElement> Div for Fp2<T> {
+    type Output = Self;
+
+    #[allow(clippy::suspicious_arithmetic_impl)]
+    fn div(self, other: Self) -> Self {
+        self * other.inverse()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use powdr_number::GoldilocksField;
+
+    use super::*;
+
+    fn new(a: i64, b: i64) -> Fp2<GoldilocksField> {
+        Fp2(GoldilocksField::from(a), GoldilocksField::from(b))
+    }
+
+    fn from_base(x: i64) -> Fp2<GoldilocksField> {
+        GoldilocksField::from(x).into()
+    }
+
+    #[test]
+    fn test_add() {
+        // Test adding 0
+        assert_eq!(from_base(0) + from_base(0), from_base(0));
+        assert_eq!(new(123, 1234) + from_base(0), new(123, 1234));
+        assert_eq!(from_base(0) + new(123, 1234), new(123, 1234));
+
+        // Add arbitrary elements
+        assert_eq!(new(123, 1234) + new(567, 5678), new(690, 6912));
+        assert_eq!(new(-1, -1) + new(3, 4), new(2, 3));
+    }
+
+    #[test]
+    fn test_sub() {
+        // Test subtracting 0
+        assert_eq!(from_base(0) - from_base(0), from_base(0));
+        assert_eq!(new(123, 1234) - from_base(0), new(123, 1234));
+
+        // Subtract arbitrary elements
+        assert_eq!(new(123, 1234) - new(567, 5678), new(123 - 567, 1234 - 5678));
+        assert_eq!(new(-1, -1) - new(0x78000000, 1), new(-0x78000000 - 1, -2));
+    }
+
+    #[test]
+    fn test_mul() {
+        // Test multiplication by 1
+        assert_eq!(from_base(1) * from_base(1), from_base(1));
+        assert_eq!(new(123, 1234) * from_base(1), new(123, 1234));
+        assert_eq!(from_base(1) * new(123, 1234), new(123, 1234));
+
+        // Test multiplication by 0
+        assert_eq!(new(123, 1234) * from_base(0), from_base(0));
+        assert_eq!(from_base(0) * new(123, 1234), from_base(0));
+
+        // Multiply arbitrary elements
+        assert_eq!(new(123, 1234) * new(567, 5678), new(77142913, 1398072));
+
+        // Multiplication with field overflow
+        assert_eq!(new(-1, -2) * new(-3, 4), new(3 - 11 * 8, 6 - 4));
+    }
+
+    #[test]
+    fn test_inverse() {
+        let test_elements = [from_base(1), new(123, 1234), new(-1, 500)];
+
+        for x in test_elements.iter() {
+            let mul_with_inverse = *x * x.inverse();
+            assert_eq!(mul_with_inverse, from_base(1));
+        }
+    }
+}
diff --git a/executor/src/witgen/bus_accumulator/mod.rs b/executor/src/witgen/bus_accumulator/mod.rs
new file mode 100644
index 0000000000..9479383185
--- /dev/null
+++ b/executor/src/witgen/bus_accumulator/mod.rs
@@ -0,0 +1,179 @@
+use std::collections::{BTreeMap, BTreeSet};
+
+use fp2::Fp2;
+use itertools::Itertools;
+use num_traits::{One, Zero};
+use powdr_ast::analyzed::{Analyzed, Identity, PhantomBusInteractionIdentity};
+use powdr_executor_utils::VariablySizedColumn;
+use powdr_number::{DegreeType, FieldElement};
+use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
+
+use crate::witgen::evaluators::expression_evaluator::ExpressionEvaluator;
+
+use super::evaluators::expression_evaluator::OwnedTraceValues;
+
+mod fp2;
+
+/// Witness generator for the second-stage bus accumulator.
+pub struct BusAccumulatorGenerator<'a, T> {
+    pil: &'a Analyzed<T>,
+    bus_interactions: Vec<&'a PhantomBusInteractionIdentity<T>>,
+    trace_values: OwnedTraceValues<T>,
+    powers_of_alpha: Vec<Fp2<T>>,
+    beta: Fp2<T>,
+}
+
+impl<'a, T: FieldElement> BusAccumulatorGenerator<'a, T> {
+    pub fn new(
+        pil: &'a Analyzed<T>,
+        witness_columns: &'a [(String, Vec<T>)],
+        fixed_columns: &'a [(String, VariablySizedColumn<T>)],
+        challenges: BTreeMap<u64, T>,
+    ) -> Self {
+        let size = witness_columns.iter().next().unwrap().1.len() as DegreeType;
+
+        // The provided PIL might only contain a subset of all fixed columns.
+        let fixed_column_names = pil
+            .constant_polys_in_source_order()
+            .flat_map(|(symbol, _)| symbol.array_elements())
+            .map(|(name, _)| name.clone())
+            .collect::<BTreeSet<_>>();
+
+        // Select the columns in the current PIL and select the right size.
+        let fixed_columns = fixed_columns
+            .iter()
+            .filter(|(n, _)| fixed_column_names.contains(n))
+            .map(|(n, v)| (n.clone(), v.get_by_size(size).unwrap()));
+
+        let trace_values = OwnedTraceValues::new(
+            pil,
+            witness_columns.to_vec(),
+            fixed_columns
+                .map(|(name, values)| (name, values.to_vec()))
+                .collect(),
+        );
+
+        let bus_interactions = pil
+            .identities
+            .iter()
+            .filter_map(|identity| match identity {
+                Identity::PhantomBusInteraction(i) => Some(i),
+                _ => None,
+            })
+            .collect::<Vec<_>>();
+
+        let max_tuple_size = bus_interactions
+            .iter()
+            .map(|i| i.tuple.0.len())
+            .max()
+            .unwrap();
+
+        let alpha = Fp2::new(challenges[&1], challenges[&2]);
+        let beta = Fp2::new(challenges[&3], challenges[&4]);
+        let powers_of_alpha = powers_of_alpha(alpha, max_tuple_size);
+
+        Self {
+            pil,
+            bus_interactions,
+            trace_values,
+            powers_of_alpha,
+            beta,
+        }
+    }
+
+    pub fn generate(&self) -> Vec<(String, Vec<T>)> {
+        let accumulators = self
+            .bus_interactions
+            .par_iter()
+            .flat_map(|bus_interaction| {
+                let (acc1, acc2) = self.interaction_columns(bus_interaction);
+                let next1 = next(&acc1);
+                let next2 = next(&acc2);
+
+                // We assume that the second-stage witness columns are in this order,
+                // for each bus interaction.
+                [acc1, acc2, next1, next2]
+            })
+            .collect::<Vec<_>>();
+
+        self.pil
+            .committed_polys_in_source_order()
+            .filter(|(symbol, _)| symbol.stage == Some(1))
+            .flat_map(|(symbol, _)| symbol.array_elements().map(|(name, _)| name))
+            .zip_eq(accumulators)
+            .collect()
+    }
+
+    fn interaction_columns(
+        &self,
+        bus_interaction: &PhantomBusInteractionIdentity<T>,
+    ) -> (Vec<T>, Vec<T>) {
+        let intermediate_definitions = self.pil.intermediate_definitions();
+        let empty_challenges = BTreeMap::new();
+
+        let size = self.trace_values.height();
+        let mut acc1 = vec![T::zero(); size];
+        let mut acc2 = vec![T::zero(); size];
+
+        for i in 0..size {
+            let mut evaluator = ExpressionEvaluator::new(
+                self.trace_values.row(i),
+                &intermediate_definitions,
+                &empty_challenges,
+            );
+            let current_acc = if i == 0 {
+                Fp2::zero()
+            } else {
+                Fp2::new(acc1[i - 1], acc2[i - 1])
+            };
+            let multiplicity = evaluator.evaluate(&bus_interaction.multiplicity);
+
+            let new_acc = match multiplicity.is_zero() {
+                true => current_acc,
+                false => {
+                    let tuple = bus_interaction
+                        .tuple
+                        .0
+                        .iter()
+                        .map(|r| evaluator.evaluate(r))
+                        .collect::<Vec<_>>();
+
+                    let fingerprint = self.beta - self.fingerprint(&tuple);
+                    current_acc + fingerprint.inverse() * multiplicity
+                }
+            };
+
+            acc1[i] = new_acc.0;
+            acc2[i] = new_acc.1;
+        }
+
+        (acc1, acc2)
+    }
+
+    /// Fingerprints a tuples of field elements, using the pre-computed powers of alpha.
+    fn fingerprint(&self, tuple: &[T]) -> Fp2<T> {
+        tuple
+            .iter()
+            .zip_eq(self.powers_of_alpha.iter().take(tuple.len()).rev())
+            .map(|(a, b)| (*b) * (*a))
+            .sum()
+    }
+}
+
+/// Given `alpha`, computes [1, alpha, alpha^2, ..., alpha^(n-1)].
+fn powers_of_alpha<T: FieldElement>(alpha: Fp2<T>, n: usize) -> Vec<Fp2<T>> {
+    (0..n)
+        .scan(Fp2::one(), |state, _| {
+            let result = *state;
+            *state = *state * alpha;
+            Some(result)
+        })
+        .collect::<Vec<_>>()
+}
+
+/// Rotates a column to the left.
+fn next<T: Clone>(column: &[T]) -> Vec<T> {
+    let mut result = column.to_vec();
+    result.rotate_left(1);
+    result
+}
diff --git a/executor/src/witgen/evaluators/expression_evaluator.rs b/executor/src/witgen/evaluators/expression_evaluator.rs
index d992749290..ce4b193f23 100644
--- a/executor/src/witgen/evaluators/expression_evaluator.rs
+++ b/executor/src/witgen/evaluators/expression_evaluator.rs
@@ -37,16 +37,19 @@ impl<T> OwnedTraceValues<T> {
             .committed_polys_in_source_order()
             .chain(pil.constant_polys_in_source_order())
             .flat_map(|(symbol, _)| symbol.array_elements())
-            .map(|(name, poly_id)| {
-                let column = columns_by_name
+            .filter_map(|(name, poly_id)| {
+                columns_by_name
                     .remove(&name)
-                    .unwrap_or_else(|| panic!("Missing column: {name}"));
-                (poly_id, column)
+                    .map(|column| (poly_id, column))
             })
             .collect();
         Self { values }
     }
 
+    pub fn height(&self) -> usize {
+        self.values.values().next().map(|v| v.len()).unwrap()
+    }
+
     pub fn row(&self, row: usize) -> RowTraceValues<T> {
         RowTraceValues { trace: self, row }
     }
diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
index 9579bc437e..61cfe6bd10 100644
--- a/executor/src/witgen/mod.rs
+++ b/executor/src/witgen/mod.rs
@@ -1,16 +1,17 @@
 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::sync::Arc;
 
+use bus_accumulator::BusAccumulatorGenerator;
 use itertools::Itertools;
 use machines::machine_extractor::MachineExtractor;
 use powdr_ast::analyzed::{
     AlgebraicExpression, AlgebraicReference, AlgebraicReferenceThin, Analyzed, DegreeRange,
-    Expression, FunctionValueDefinition, PolyID, PolynomialType, Symbol, SymbolKind,
+    Expression, FunctionValueDefinition, Identity, PolyID, PolynomialType, Symbol, SymbolKind,
     TypedExpression,
 };
 use powdr_ast::parsed::visitor::{AllChildren, ExpressionVisitable};
 use powdr_ast::parsed::{FunctionKind, LambdaExpression};
-use powdr_number::{DegreeType, FieldElement};
+use powdr_number::{DegreeType, FieldElement, KnownField};
 use std::iter::once;
 
 use crate::constant_evaluator::VariablySizedColumn;
@@ -27,6 +28,7 @@ use self::machines::profiling::{record_end, record_start, reset_and_print_profil
 mod affine_expression;
 pub(crate) mod analysis;
 mod block_processor;
+mod bus_accumulator;
 mod data_structures;
 mod eval_result;
 pub mod evaluators;
@@ -104,12 +106,32 @@ impl<T: FieldElement> WitgenCallbackContext<T> {
         challenges: BTreeMap<u64, T>,
         stage: u8,
     ) -> Vec<(String, Vec<T>)> {
-        let size = current_witness.iter().next().unwrap().1.len() as DegreeType;
-        let fixed_col_values = self.select_fixed_columns(pil, size);
-        WitnessGenerator::new(pil, &fixed_col_values, &*self.query_callback)
-            .with_external_witness_values(current_witness)
-            .with_challenges(stage, challenges)
-            .generate()
+        let has_phantom_bus_sends = pil
+            .identities
+            .iter()
+            .any(|identity| matches!(identity, Identity::PhantomBusInteraction(_)));
+
+        if has_phantom_bus_sends && T::known_field() == Some(KnownField::GoldilocksField) {
+            log::debug!("Using hand-written bus witgen.");
+            assert_eq!(stage, 1);
+            let bus_columns = BusAccumulatorGenerator::new(
+                pil,
+                current_witness,
+                &self.fixed_col_values,
+                challenges,
+            )
+            .generate();
+
+            current_witness.iter().cloned().chain(bus_columns).collect()
+        } else {
+            log::debug!("Using automatic stage-1 witgen.");
+            let size = current_witness.iter().next().unwrap().1.len() as DegreeType;
+            let fixed_col_values = self.select_fixed_columns(pil, size);
+            WitnessGenerator::new(pil, &fixed_col_values, &*self.query_callback)
+                .with_external_witness_values(current_witness)
+                .with_challenges(stage, challenges)
+                .generate()
+        }
     }
 }
 

From ee7cf29c8e89292cc6d2665ce3d2368a98755988 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Mon, 9 Dec 2024 14:58:02 +0100
Subject: [PATCH 39/57] Implement `process_lookup_direct` for KnownMachine &
 change interface slightly (#2206)

Extracted out of #2071
---
 .../witgen/machines/fixed_lookup_machine.rs   | 10 ++---
 executor/src/witgen/machines/mod.rs           | 38 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/executor/src/witgen/machines/fixed_lookup_machine.rs b/executor/src/witgen/machines/fixed_lookup_machine.rs
index 885eda3d8d..ea1476369d 100644
--- a/executor/src/witgen/machines/fixed_lookup_machine.rs
+++ b/executor/src/witgen/machines/fixed_lookup_machine.rs
@@ -220,7 +220,7 @@ impl<'a, T: FieldElement> FixedLookup<'a, T> {
 
         // Split the left-hand-side into known input values and unknown output expressions.
         let mut data = vec![T::zero(); left.len()];
-        let values = left
+        let mut values = left
             .iter()
             .zip(&mut data)
             .map(|(l, d)| {
@@ -233,7 +233,7 @@ impl<'a, T: FieldElement> FixedLookup<'a, T> {
             })
             .collect::<Vec<_>>();
 
-        if !self.process_lookup_direct(mutable_state, identity_id, values)? {
+        if !self.process_lookup_direct(mutable_state, identity_id, &mut values)? {
             // multiple matches, we stop and learnt nothing
             return Ok(EvalValue::incomplete(
                 IncompleteCause::MultipleLookupMatches,
@@ -357,7 +357,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for FixedLookup<'a, T> {
         &mut self,
         _mutable_state: &'b MutableState<'a, T, Q>,
         identity_id: u64,
-        values: Vec<LookupCell<'c, T>>,
+        values: &mut [LookupCell<'c, T>],
     ) -> Result<bool, EvalError<T>> {
         let mut input_values = vec![];
 
@@ -407,14 +407,14 @@ impl<'a, T: FieldElement> Machine<'a, T> for FixedLookup<'a, T> {
         self.multiplicity_counter.increment_at_row(identity_id, row);
 
         values
-            .into_iter()
+            .iter_mut()
             .filter_map(|v| match v {
                 LookupCell::Output(e) => Some(e),
                 _ => None,
             })
             .zip(output)
             .for_each(|(e, v)| {
-                *e = *v;
+                **e = *v;
             });
         Ok(true)
     }
diff --git a/executor/src/witgen/machines/mod.rs b/executor/src/witgen/machines/mod.rs
index 3a4e3ebff0..169877bd0f 100644
--- a/executor/src/witgen/machines/mod.rs
+++ b/executor/src/witgen/machines/mod.rs
@@ -93,9 +93,9 @@ pub trait Machine<'a, T: FieldElement>: Send + Sync {
         &mut self,
         _mutable_state: &'b MutableState<'a, T, Q>,
         _identity_id: u64,
-        _values: Vec<LookupCell<'c, T>>,
+        _values: &mut [LookupCell<'c, T>],
     ) -> Result<bool, EvalError<T>> {
-        unimplemented!("Direct lookup is not supported for this machine.");
+        unimplemented!("Direct lookup not supported machine {}.", self.name())
     }
 
     /// Returns the final values of the witness columns.
@@ -177,6 +177,40 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
         }
     }
 
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        mutable_state: &'b MutableState<'a, T, Q>,
+        identity_id: u64,
+        values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        match self {
+            KnownMachine::SecondStageMachine(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::SortedWitnesses(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::DoubleSortedWitnesses16(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::DoubleSortedWitnesses32(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::WriteOnceMemory(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::BlockMachine(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::DynamicMachine(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+            KnownMachine::FixedLookup(m) => {
+                m.process_lookup_direct(mutable_state, identity_id, values)
+            }
+        }
+    }
+
     fn name(&self) -> &str {
         match self {
             KnownMachine::SecondStageMachine(m) => m.name(),

From ad858a1d7d31fded704b1a231f3f7d4ec106ba12 Mon Sep 17 00:00:00 2001
From: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
Date: Mon, 9 Dec 2024 15:51:20 +0100
Subject: [PATCH 40/57] Run `cargo bench` in PR CI tests and report result to
 PR and Github Pages (#2198)

Run benchmarks in PRs, fail and warn on the PR if we got more than 20%
slower, add benchmark results to https://docs.powdr.org/dev/bench/
---
 .github/workflows/deploy-book.yml       |  3 +-
 .github/workflows/nightly-tests.yml     |  2 -
 .github/workflows/pr-tests.yml          | 52 +++++++++++++++++++++++++
 airgen/Cargo.toml                       |  3 ++
 analysis/Cargo.toml                     |  3 ++
 asm-to-pil/Cargo.toml                   |  5 ++-
 ast/Cargo.toml                          |  3 ++
 backend-utils/Cargo.toml                |  3 ++
 backend/Cargo.toml                      |  3 ++
 cargo-powdr/Cargo.toml                  |  1 +
 cli-rs/Cargo.toml                       |  1 +
 cli/Cargo.toml                          |  1 +
 executor-utils/Cargo.toml               |  3 ++
 executor/Cargo.toml                     |  3 ++
 importer/Cargo.toml                     |  3 ++
 isa-utils/Cargo.toml                    |  3 ++
 jit-compiler/Cargo.toml                 |  3 ++
 linker/Cargo.toml                       |  3 ++
 number/Cargo.toml                       |  3 ++
 parser-util/Cargo.toml                  |  3 ++
 parser/Cargo.toml                       |  3 ++
 pil-analyzer/Cargo.toml                 |  3 ++
 pilopt/Cargo.toml                       |  3 ++
 pipeline/Cargo.toml                     |  3 ++
 pipeline/benches/evaluator_benchmark.rs |  1 +
 plonky3/Cargo.toml                      | 11 +++---
 powdr-test/Cargo.toml                   |  3 ++
 powdr/Cargo.toml                        |  3 ++
 riscv-executor/Cargo.toml               |  3 ++
 riscv-syscalls/Cargo.toml               |  3 ++
 riscv/Cargo.toml                        |  3 ++
 schemas/Cargo.toml                      | 12 +++++-
 32 files changed, 143 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/deploy-book.yml b/.github/workflows/deploy-book.yml
index 12e577e754..4225c0cba5 100644
--- a/.github/workflows/deploy-book.yml
+++ b/.github/workflows/deploy-book.yml
@@ -36,7 +36,8 @@ jobs:
         cd gh-pages
         # Delete the ref to avoid keeping history.
         git update-ref -d refs/heads/gh-pages
-        rm -rf *
+        # Delete everything except the `dev` folder, as it contains benchmarks we should keep.
+        find . -mindepth 1 -maxdepth 1 ! -name "dev" -exec rm -rf {} +
         mv ../book/* .
         git add .
         git commit -m "Deploy $GITHUB_SHA to gh-pages"
diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml
index f30cd1cd09..55b6ef19d6 100644
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -86,5 +86,3 @@ jobs:
     - name: Run tests
       # Number threads is set to 1 because the runner does not have enough memeory for more.
       run: PILCOM=$(pwd)/pilcom/ cargo test --all --release --verbose --all-features -- --include-ignored --nocapture --test-threads=1
-    - name: Run benchmarks
-      run: cargo bench --workspace --all-features
diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index b58a3778fb..9713ba1e4f 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -237,3 +237,55 @@ jobs:
       env:
         PILCOM: ${{ github.workspace }}/pilcom/
         POWDR_STD: ${{ github.workspace }}/std/
+
+  bench:
+    needs: build
+    runs-on: warp-ubuntu-2404-x64-4x
+    permissions:
+      contents: write
+      deployments: write
+      pull-requests: write
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: recursive
+    - name: ⚡ Restore rust cache
+      id: cache
+      uses: WarpBuilds/cache/restore@v1
+      with:
+        path: |
+          ~/.cargo/registry/index/
+          ~/.cargo/registry/cache/
+          ~/.cargo/git/db/
+          target/
+          Cargo.lock
+        key: ${{ runner.os }}-cargo-pr-tests
+    - name: Install Rust toolchain 1.81
+      run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu
+    - name: Install nightly
+      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install std source
+      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install riscv target
+      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+    - name: Install test dependencies
+      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
+    - name: Install EStarkPolygon prover dependencies
+      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc uuid-dev build-essential cmake pkg-config git
+    - name: Install pilcom
+      run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
+    - name: Run benchmarks
+      # we add `|| exit 1` to make sure the step fails if `cargo bench` fails
+      run: cargo bench --workspace --all-features -- --output-format bencher | tee output.txt || exit 1
+    - name: Store benchmark result
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        name: Benchmarks
+        tool: 'cargo'
+        output-file-path: output.txt
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        auto-push: true
+        alert-threshold: '120%'
+        comment-on-alert: true
+        summary-always: true
diff --git a/airgen/Cargo.toml b/airgen/Cargo.toml
index 993c1f67cb..5bec6ebc3b 100644
--- a/airgen/Cargo.toml
+++ b/airgen/Cargo.toml
@@ -18,3 +18,6 @@ itertools = "0.13"
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/analysis/Cargo.toml b/analysis/Cargo.toml
index 97f2ee7f28..ba4aae0031 100644
--- a/analysis/Cargo.toml
+++ b/analysis/Cargo.toml
@@ -28,3 +28,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/asm-to-pil/Cargo.toml b/asm-to-pil/Cargo.toml
index 176bed5f5b..232c7f84aa 100644
--- a/asm-to-pil/Cargo.toml
+++ b/asm-to-pil/Cargo.toml
@@ -22,4 +22,7 @@ powdr-analysis = { path = "../analysis" }
 powdr-importer = { path = "../importer" }
 
 [lints]
-workspace = true
\ No newline at end of file
+workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
\ No newline at end of file
diff --git a/ast/Cargo.toml b/ast/Cargo.toml
index 8d752edceb..dd5b7f26b4 100644
--- a/ast/Cargo.toml
+++ b/ast/Cargo.toml
@@ -28,3 +28,6 @@ powdr-parser.workspace = true
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/backend-utils/Cargo.toml b/backend-utils/Cargo.toml
index 2472ca3ebe..33e71f3c62 100644
--- a/backend-utils/Cargo.toml
+++ b/backend-utils/Cargo.toml
@@ -14,3 +14,6 @@ powdr-number.workspace = true
 powdr-executor-utils.workspace = true
 log = "0.4.22"
 itertools = "0.13.0"
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/backend/Cargo.toml b/backend/Cargo.toml
index 2133610e41..9137f73492 100644
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -89,3 +89,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/cargo-powdr/Cargo.toml b/cargo-powdr/Cargo.toml
index fcff10988b..6d13817795 100644
--- a/cargo-powdr/Cargo.toml
+++ b/cargo-powdr/Cargo.toml
@@ -13,3 +13,4 @@ clap = { version = "^4.3", features = ["derive"] }
 [[bin]]
 name = "cargo-powdr"
 path = "src/main.rs"
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/cli-rs/Cargo.toml b/cli-rs/Cargo.toml
index 912ab22167..97f7466e58 100644
--- a/cli-rs/Cargo.toml
+++ b/cli-rs/Cargo.toml
@@ -27,3 +27,4 @@ clap-markdown = "0.1.3"
 [[bin]]
 name = "powdr-rs"
 path = "src/main.rs"
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 097ee5b4d2..cc377f855b 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -43,6 +43,7 @@ env_logger = "0.10.0"
 [[bin]]
 name = "powdr"
 path = "src/main.rs"
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
 
 [lints.clippy]
 uninlined_format_args = "deny"
diff --git a/executor-utils/Cargo.toml b/executor-utils/Cargo.toml
index 99d74950bf..a97db4f980 100644
--- a/executor-utils/Cargo.toml
+++ b/executor-utils/Cargo.toml
@@ -11,3 +11,6 @@ powdr-number.workspace = true
 powdr-ast.workspace = true
 
 serde = { version = "1.0", default-features = false, features = ["alloc", "derive", "rc"] }
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
\ No newline at end of file
diff --git a/executor/Cargo.toml b/executor/Cargo.toml
index e585a2ce2f..3e9fd701cb 100644
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@@ -35,3 +35,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/importer/Cargo.toml b/importer/Cargo.toml
index b13651aad6..e8cfdff635 100644
--- a/importer/Cargo.toml
+++ b/importer/Cargo.toml
@@ -17,3 +17,6 @@ pretty_assertions = "1.4.0"
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/isa-utils/Cargo.toml b/isa-utils/Cargo.toml
index 91d49bf52e..cbf03f1192 100644
--- a/isa-utils/Cargo.toml
+++ b/isa-utils/Cargo.toml
@@ -6,3 +6,6 @@ edition = { workspace = true }
 license = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/jit-compiler/Cargo.toml b/jit-compiler/Cargo.toml
index 5537543cea..de17708a7a 100644
--- a/jit-compiler/Cargo.toml
+++ b/jit-compiler/Cargo.toml
@@ -25,3 +25,6 @@ test-log = "0.2.12"
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/linker/Cargo.toml b/linker/Cargo.toml
index c191bbbc43..ed883c3c23 100644
--- a/linker/Cargo.toml
+++ b/linker/Cargo.toml
@@ -26,3 +26,6 @@ powdr-parser.workspace = true
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/number/Cargo.toml b/number/Cargo.toml
index ef6a4f6f44..e7af0da4fe 100644
--- a/number/Cargo.toml
+++ b/number/Cargo.toml
@@ -40,3 +40,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/parser-util/Cargo.toml b/parser-util/Cargo.toml
index 644145365d..8e2adb33e8 100644
--- a/parser-util/Cargo.toml
+++ b/parser-util/Cargo.toml
@@ -25,3 +25,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/parser/Cargo.toml b/parser/Cargo.toml
index 6c282c48f2..9995e3b629 100644
--- a/parser/Cargo.toml
+++ b/parser/Cargo.toml
@@ -33,3 +33,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
\ No newline at end of file
diff --git a/pil-analyzer/Cargo.toml b/pil-analyzer/Cargo.toml
index a1d2f39f80..1b09c0c743 100644
--- a/pil-analyzer/Cargo.toml
+++ b/pil-analyzer/Cargo.toml
@@ -27,3 +27,6 @@ development = ["env_logger"]
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/pilopt/Cargo.toml b/pilopt/Cargo.toml
index 85c55c7ba4..d4e6a4f960 100644
--- a/pilopt/Cargo.toml
+++ b/pilopt/Cargo.toml
@@ -20,3 +20,6 @@ powdr-pil-analyzer.workspace = true
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml
index 45f85e1006..6549a1d4b1 100644
--- a/pipeline/Cargo.toml
+++ b/pipeline/Cargo.toml
@@ -64,3 +64,6 @@ harness = false
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/pipeline/benches/evaluator_benchmark.rs b/pipeline/benches/evaluator_benchmark.rs
index 5f500c984e..72a194f03a 100644
--- a/pipeline/benches/evaluator_benchmark.rs
+++ b/pipeline/benches/evaluator_benchmark.rs
@@ -167,6 +167,7 @@ fn jit_benchmark(c: &mut Criterion) {
             });
         });
     }
+
     group.finish();
 }
 
diff --git a/plonky3/Cargo.toml b/plonky3/Cargo.toml
index 35b688d27d..fc44576cb4 100644
--- a/plonky3/Cargo.toml
+++ b/plonky3/Cargo.toml
@@ -6,12 +6,10 @@ license.workspace = true
 homepage.workspace = true
 repository.workspace = true
 
-[lib]
-name = "powdr_plonky3"
-
 [[bin]]
 name = "gen_poseidon_consts"
 path = "src/bin/gen_poseidon_consts.rs"
+bench = false                           # See https://github.com/bheisler/criterion.rs/issues/458
 
 [features]
 default = []
@@ -74,6 +72,7 @@ powdr-riscv-runtime = { path = "../riscv-runtime", features = [
   "getrandom",
   "allow_fake_rand",
 ] }
-indexmap = { version = "1.9.3", features = [
-  "std",
-] }
+indexmap = { version = "1.9.3", features = ["std"] }
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/powdr-test/Cargo.toml b/powdr-test/Cargo.toml
index 3a8fc49fa8..3f86616376 100644
--- a/powdr-test/Cargo.toml
+++ b/powdr-test/Cargo.toml
@@ -15,3 +15,6 @@ env_logger = "0.10.2"
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/powdr/Cargo.toml b/powdr/Cargo.toml
index 471a188e18..7dd38b8bf0 100644
--- a/powdr/Cargo.toml
+++ b/powdr/Cargo.toml
@@ -43,3 +43,6 @@ estark-starky-simd = ["powdr-backend/estark-starky-simd", "powdr-pipeline/estark
 
 [lints.clippy]
 uninlined_format_args = "deny"
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/riscv-executor/Cargo.toml b/riscv-executor/Cargo.toml
index a58e511b6f..293ed81ac0 100644
--- a/riscv-executor/Cargo.toml
+++ b/riscv-executor/Cargo.toml
@@ -28,3 +28,6 @@ inferno = "0.11.19"
 
 [lints.clippy]
 uninlined_format_args = "deny"
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/riscv-syscalls/Cargo.toml b/riscv-syscalls/Cargo.toml
index bf6325f45a..5f84a11d1d 100644
--- a/riscv-syscalls/Cargo.toml
+++ b/riscv-syscalls/Cargo.toml
@@ -11,3 +11,6 @@ repository = { workspace = true }
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/riscv/Cargo.toml b/riscv/Cargo.toml
index 595f8a300a..f297823512 100644
--- a/riscv/Cargo.toml
+++ b/riscv/Cargo.toml
@@ -67,3 +67,6 @@ harness = false
 
 [lints]
 workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458
diff --git a/schemas/Cargo.toml b/schemas/Cargo.toml
index 0b0db6b81f..dd9f3ca3b3 100644
--- a/schemas/Cargo.toml
+++ b/schemas/Cargo.toml
@@ -12,13 +12,21 @@ repository.workspace = true
 powdr-ast.workspace = true
 powdr-number.workspace = true
 
-serde = { version = "1.0", default-features = false, features = ["alloc", "derive", "rc"] }
-schemars = { version = "0.8.16", features = ["preserve_order"]}
+serde = { version = "1.0", default-features = false, features = [
+    "alloc",
+    "derive",
+    "rc",
+] }
+schemars = { version = "0.8.16", features = ["preserve_order"] }
 serde_cbor = "0.11.2"
 
 [[bin]]
 name = "powdr-schemas"
 path = "bin/main.rs"
+bench = false          # See https://github.com/bheisler/criterion.rs/issues/458
 
 [lints.clippy]
 uninlined_format_args = "deny"
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458

From d98cf9f4868c970e428070ad59a68e770588dbb5 Mon Sep 17 00:00:00 2001
From: Leandro Pacheco <contact@leandropacheco.com>
Date: Mon, 9 Dec 2024 12:47:18 -0300
Subject: [PATCH 41/57] remove poseidon operation id (#2208)

In cases where the operation id was defined as a witness column, it
seems the optimizer didn't remove them. So this can save a witness
column (and lookup parameter) in some cases (e.g., poseidon machines)
---
 riscv-executor/src/submachines.rs        |  1 -
 std/machines/binary.asm                  |  4 +---
 std/machines/hash/keccakf16_memory.asm   |  5 +----
 std/machines/hash/keccakf32_memory.asm   |  5 +----
 std/machines/hash/poseidon2_bb.asm       |  4 +---
 std/machines/hash/poseidon2_gl.asm       |  4 +---
 std/machines/hash/poseidon_bb.asm        |  5 +----
 std/machines/hash/poseidon_bn254.asm     |  5 +----
 std/machines/hash/poseidon_gl.asm        |  5 +----
 std/machines/hash/poseidon_gl_memory.asm |  5 +----
 std/machines/large_field/rotate.asm      |  4 +---
 std/machines/large_field/shift.asm       |  4 +---
 std/machines/range.asm                   | 24 ++++++------------------
 std/machines/small_field/keccakf16.asm   |  5 +----
 std/machines/small_field/rotate.asm      |  4 +---
 std/machines/small_field/shift.asm       |  4 +---
 std/machines/split/mod.asm               |  4 +---
 17 files changed, 21 insertions(+), 71 deletions(-)

diff --git a/riscv-executor/src/submachines.rs b/riscv-executor/src/submachines.rs
index a5aa96d85b..8a5c8e7f4f 100644
--- a/riscv-executor/src/submachines.rs
+++ b/riscv-executor/src/submachines.rs
@@ -726,7 +726,6 @@ impl SubmachineKind for PoseidonGlMachine {
             trace.set_current_row(STATE_COLS[i], Elem::Field(state[i]));
         }
         // these are the same in the whole block
-        trace.set_current_block(Self::BLOCK_SIZE, "operation_id", 0.into());
         trace.set_current_block(Self::BLOCK_SIZE, "time_step", time_step);
         trace.set_current_block(Self::BLOCK_SIZE, "input_addr", input_addr);
         trace.set_current_block(Self::BLOCK_SIZE, "output_addr", output_addr);
diff --git a/std/machines/binary.asm b/std/machines/binary.asm
index 1b02bcd32e..7ad99e6aa9 100644
--- a/std/machines/binary.asm
+++ b/std/machines/binary.asm
@@ -4,13 +4,11 @@ use std::utils::cross_product;
 // Binary for single bytes using an exhaustive table
 machine ByteBinary with
     latch: latch,
-    operation_id: operation_id,
     degree: 262144
 {
-    operation run<0> P_operation, P_A, P_B -> P_C;
+    operation run P_operation, P_A, P_B -> P_C;
 
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 
     let bit_counts = [256, 256, 3];
     let min_degree = std::array::product(bit_counts);
diff --git a/std/machines/hash/keccakf16_memory.asm b/std/machines/hash/keccakf16_memory.asm
index b393ca8b67..52a7d62fdd 100644
--- a/std/machines/hash/keccakf16_memory.asm
+++ b/std/machines/hash/keccakf16_memory.asm
@@ -14,7 +14,6 @@ use std::machines::small_field::add_sub::AddSub;
 
 machine Keccakf16Memory(mem: Memory, add_sub: AddSub) with
     latch: final_step,
-    operation_id: operation_id,
     call_selectors: sel,
 {
     /*
@@ -44,7 +43,7 @@ machine Keccakf16Memory(mem: Memory, add_sub: AddSub) with
     Though note that input address need to be first copied from the last row to the first row.
     */
 
-    operation keccakf16_memory<0> input_addr_h, input_addr_l, output_addr_h, output_addr_l, time_step ->;
+    operation keccakf16_memory input_addr_h, input_addr_l, output_addr_h, output_addr_l, time_step ->;
 
     // Get an intermediate column that indicates that we're in an
     // actual block, not a default block. Its value is constant
@@ -297,8 +296,6 @@ machine Keccakf16Memory(mem: Memory, add_sub: AddSub) with
 
     std::check::require_field_bits(16, || "The field modulus should be at least 2^16 - 1 to work in the keccakf16 machine.");
 
-    col witness operation_id;
-
     let NUM_ROUNDS: int = 24;
 
     // pub struct KeccakCols<T> {
diff --git a/std/machines/hash/keccakf32_memory.asm b/std/machines/hash/keccakf32_memory.asm
index 8b166b9777..09149915ed 100644
--- a/std/machines/hash/keccakf32_memory.asm
+++ b/std/machines/hash/keccakf32_memory.asm
@@ -13,7 +13,6 @@ use std::machines::large_field::memory::Memory;
 
 machine Keccakf32Memory(mem: Memory) with
     latch: final_step,
-    operation_id: operation_id,
     call_selectors: sel,
 {
     /*
@@ -37,7 +36,7 @@ machine Keccakf32Memory(mem: Memory) with
     Though note that input address need to be first copied from the last row to the first row.
     */
 
-    operation keccakf32_memory<0> input_addr, output_addr, time_step ->;
+    operation keccakf32_memory input_addr, output_addr, time_step ->;
 
     // Get an intermediate column that indicates that we're in an
     // actual block, not a default block. Its value is constant
@@ -179,8 +178,6 @@ machine Keccakf32Memory(mem: Memory) with
 
     std::check::require_field_bits(32, || "The field modulus should be at least 2^32 - 1 to work in the keccakf32 machine.");
 
-    col witness operation_id;
-
     let NUM_ROUNDS: int = 24;
 
     // pub struct KeccakCols<T> {
diff --git a/std/machines/hash/poseidon2_bb.asm b/std/machines/hash/poseidon2_bb.asm
index ce6eddf6b0..9aafb30174 100644
--- a/std/machines/hash/poseidon2_bb.asm
+++ b/std/machines/hash/poseidon2_bb.asm
@@ -16,7 +16,6 @@ use super::poseidon2_common::poseidon2;
 // it can be used as a compression function for building a Merkle tree.
 machine Poseidon2BB(mem: Memory, split_BB: SplitBB) with
     latch: latch,
-    operation_id: operation_id,
     // Allow this machine to be connected via a permutation
     call_selectors: sel,
 {
@@ -30,13 +29,12 @@ machine Poseidon2BB(mem: Memory, split_BB: SplitBB) with
     // Similarly, the output data is written to memory at the provided pointer.
     //
     // Reads happen at the provided time step; writes happen at the next time step.
-    operation poseidon2_permutation<0>
+    operation poseidon2_permutation
         input_addr_high[0], input_addr_low[0],
         output_addr_high[0], output_addr_low[0],
         time_step ->;
 
     let latch = 1;
-    let operation_id;
 
     let time_step;
 
diff --git a/std/machines/hash/poseidon2_gl.asm b/std/machines/hash/poseidon2_gl.asm
index 7a4f3ebdbc..b5c79716c4 100644
--- a/std/machines/hash/poseidon2_gl.asm
+++ b/std/machines/hash/poseidon2_gl.asm
@@ -19,7 +19,6 @@ use super::poseidon2_common::poseidon2;
 // state size of 8 field elements instead of 12, matching Plonky3's implementation.
 machine Poseidon2GL(mem: Memory, split_GL: SplitGL) with
     latch: latch,
-    operation_id: operation_id,
     // Allow this machine to be connected via a permutation
     call_selectors: sel,
 {
@@ -33,13 +32,12 @@ machine Poseidon2GL(mem: Memory, split_GL: SplitGL) with
     // Similarly, the output data is written to memory at the provided pointer.
     //
     // Reads happen at the provided time step; writes happen at the next time step.
-    operation poseidon2_permutation<0>
+    operation poseidon2_permutation
         input_addr,
         output_addr,
         time_step ->;
 
     let latch = 1;
-    let operation_id;
 
     let time_step;
 
diff --git a/std/machines/hash/poseidon_bb.asm b/std/machines/hash/poseidon_bb.asm
index a8c1e9f6ae..296a7a08c1 100644
--- a/std/machines/hash/poseidon_bb.asm
+++ b/std/machines/hash/poseidon_bb.asm
@@ -23,7 +23,6 @@ use std::machines::split::split_bb::SplitBB;
 // with memory directly to fetch its inputs and write its outputs.
 machine PoseidonBB(mem: Memory, split_bb: SplitBB) with
     latch: CLK_0,
-    operation_id: operation_id,
     // Allow this machine to be connected via a permutation
     call_selectors: sel,
 {
@@ -39,13 +38,11 @@ machine PoseidonBB(mem: Memory, split_bb: SplitBB) with
     // (in canonical form).
     //
     // Reads happen at the provided time step; writes happen at the next time step.
-    operation poseidon_permutation<0>
+    operation poseidon_permutation
         input_addr_high, input_addr_low,
         output_addr_high, output_addr_low,
         time_step ->;
 
-    let operation_id;
-
     // Number of field elements in the state
     let STATE_SIZE: int = 16;
     // Number of output elements
diff --git a/std/machines/hash/poseidon_bn254.asm b/std/machines/hash/poseidon_bn254.asm
index 82b6971f31..2ce4aa3762 100644
--- a/std/machines/hash/poseidon_bn254.asm
+++ b/std/machines/hash/poseidon_bn254.asm
@@ -4,7 +4,6 @@ use std::utils::unchanged_until;
 // Implements the Poseidon permutation for the BN254 curve.
 machine PoseidonBN254 with
     latch: FIRSTBLOCK,
-    operation_id: operation_id,
     // Allow this machine to be connected via a permutation
     call_selectors: sel,
 {
@@ -14,9 +13,7 @@ machine PoseidonBN254 with
     // When the hash function is used only once, the capacity element should be
     // set to a constant, where different constants can be used to define different
     // hash functions.
-    operation poseidon_permutation<0> state[0], state[1], state[2] -> output[0];
-
-    let operation_id;
+    operation poseidon_permutation state[0], state[1], state[2] -> output[0];
 
     // Using parameters from https://eprint.iacr.org/2019/458.pdf
     // See https://extgit.iaik.tugraz.at/krypto/hadeshash/-/blob/master/code/poseidonperm_x5_254_3.sage
diff --git a/std/machines/hash/poseidon_gl.asm b/std/machines/hash/poseidon_gl.asm
index 827d31f893..2414cc6fec 100644
--- a/std/machines/hash/poseidon_gl.asm
+++ b/std/machines/hash/poseidon_gl.asm
@@ -4,7 +4,6 @@ use std::utils::unchanged_until;
 // Implements the Poseidon permutation for the Goldilocks field.
 machine PoseidonGL with
     latch: FIRSTBLOCK,
-    operation_id: operation_id,
     // Allow this machine to be connected via a permutation
     call_selectors: sel,
 {
@@ -14,9 +13,7 @@ machine PoseidonGL with
     // When the hash function is used only once, the capacity elements should be
     // set to constants, where different constants can be used to define different
     // hash functions.
-    operation poseidon_permutation<0> state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state[8], state[9], state[10], state[11] -> output[0], output[1], output[2], output[3];
-
-    let operation_id;
+    operation poseidon_permutation state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state[8], state[9], state[10], state[11] -> output[0], output[1], output[2], output[3];
 
     // Ported from:
     // - https://github.com/0xPolygonHermez/zkevm-proverjs/blob/main/pil/poseidong.pil
diff --git a/std/machines/hash/poseidon_gl_memory.asm b/std/machines/hash/poseidon_gl_memory.asm
index f03ef07bca..2deb278776 100644
--- a/std/machines/hash/poseidon_gl_memory.asm
+++ b/std/machines/hash/poseidon_gl_memory.asm
@@ -23,7 +23,6 @@ use std::machines::split::split_gl::SplitGL;
 //   - 1 to split the current output into low and high words
 machine PoseidonGLMemory(mem: Memory, split_gl: SplitGL) with
     latch: CLK_0,
-    operation_id: operation_id,
     // Allow this machine to be connected via a permutation
     call_selectors: sel,
 {
@@ -40,9 +39,7 @@ machine PoseidonGLMemory(mem: Memory, split_gl: SplitGL) with
     // 8 32-Bit machine words representing 4 field elements in little-endian format
     // (in canonical form).
     // Reads happen at the provided time step; writes happen at the next time step.
-    operation poseidon_permutation<0> input_addr, output_addr, time_step ->;
-
-    let operation_id;
+    operation poseidon_permutation input_addr, output_addr, time_step ->;
 
     // Ported from:
     // - https://github.com/0xPolygonHermez/zkevm-proverjs/blob/main/pil/poseidong.pil
diff --git a/std/machines/large_field/rotate.asm b/std/machines/large_field/rotate.asm
index 550db0e7a3..77bf3edfbf 100644
--- a/std/machines/large_field/rotate.asm
+++ b/std/machines/large_field/rotate.asm
@@ -7,13 +7,11 @@ use std::check::require_field_bits;
 /// We can rotate by at most 31 bits
 machine ByteRotate with
     latch: latch,
-    operation_id: operation_id,
     degree: 65536
 {
-    operation run<0> P_operation, P_A, P_B, P_ROW -> P_C;
+    operation run P_operation, P_A, P_B, P_ROW -> P_C;
 
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
     let bit_counts = [256, 32, 4, 2];
     let min_degree = std::array::product(bit_counts);
     std::check::assert(std::prover::min_degree() >= std::array::product(bit_counts), || "The rotate machine needs at least 65536 rows to work.");
diff --git a/std/machines/large_field/shift.asm b/std/machines/large_field/shift.asm
index efd2d9c482..c8a70188e2 100644
--- a/std/machines/large_field/shift.asm
+++ b/std/machines/large_field/shift.asm
@@ -9,13 +9,11 @@ use std::check::require_field_bits;
 // TODO this way, we cannot prove anything that shifts by more than 31 bits.
 machine ByteShift with
     latch: latch,
-    operation_id: operation_id,
     degree: 65536
 {
-    operation run<0> P_operation, P_A, P_B, P_ROW -> P_C;
+    operation run P_operation, P_A, P_B, P_ROW -> P_C;
 
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 
     let bit_counts = [256, 32, 4, 2];
     let min_degree = std::array::product(bit_counts);
diff --git a/std/machines/range.asm b/std/machines/range.asm
index baf154d700..0580796c9e 100644
--- a/std/machines/range.asm
+++ b/std/machines/range.asm
@@ -1,71 +1,59 @@
 machine Byte with
     latch: latch,
-    operation_id: operation_id,
     degree: 256
 {
-    operation check<0> BYTE -> ;
+    operation check BYTE -> ;
 
     let BYTE: col = |i| i & 0xff;
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 }
 
 machine Byte2 with
     latch: latch,
-    operation_id: operation_id,
     degree: 65536
 {
-    operation check<0> BYTE2 -> ;
+    operation check BYTE2 -> ;
 
     let BYTE2: col = |i| i & 0xffff;
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 }
 
 machine Bit2 with
     latch: latch,
-    operation_id: operation_id,
     degree: 4
 {
-    operation check<0> BIT2 -> ;
+    operation check BIT2 -> ;
 
     let BIT2: col = |i| i % 4;
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 }
 
 machine Bit6 with
     latch: latch,
-    operation_id: operation_id,
     degree: 64
 {
-    operation check<0> BIT6 -> ;
+    operation check BIT6 -> ;
 
     let BIT6: col = |i| i % 64;
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 }
 
 machine Bit7 with
     latch: latch,
-    operation_id: operation_id,
     degree: 128
 {
-    operation check<0> BIT7 -> ;
+    operation check BIT7 -> ;
 
     let BIT7: col = |i| i % 128;
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 }
 
 machine Bit12 with
     latch: latch,
-    operation_id: operation_id,
     degree: 4096
 {
-    operation check<0> BIT12 -> ;
+    operation check BIT12 -> ;
 
     let BIT12: col = |i| i % (2**12);
     let latch = 1;
-    col fixed operation_id = [0]*;
 }
diff --git a/std/machines/small_field/keccakf16.asm b/std/machines/small_field/keccakf16.asm
index 141efdc231..887a9d5799 100644
--- a/std/machines/small_field/keccakf16.asm
+++ b/std/machines/small_field/keccakf16.asm
@@ -12,7 +12,6 @@ use std::prover::provide_value;
 
 machine Keccakf16 with
     latch: final_step,
-    operation_id: operation_id,
     call_selectors: sel,
 {
     // Adapted from Plonky3 implementation of Keccak: https://github.com/Plonky3/Plonky3/tree/main/keccak-air/src
@@ -21,7 +20,7 @@ machine Keccakf16 with
 
     // Expects input of 25 64-bit numbers decomposed to 25 chunks of 4 16-bit big endian limbs. Same for output.
     // The output is a_prime_prime_prime_0_0_limbs for the first 4 and a_prime_prime for the rest.
-    operation keccakf16<0> 
+    operation keccakf16
         preimage[3], preimage[2], preimage[1], preimage[0], 
         preimage[7], preimage[6], preimage[5], preimage[4], 
         preimage[11], preimage[10], preimage[9], preimage[8], 
@@ -74,8 +73,6 @@ machine Keccakf16 with
         a_prime_prime[95], a_prime_prime[94], a_prime_prime[93], a_prime_prime[92], 
         a_prime_prime[99], a_prime_prime[98], a_prime_prime[97], a_prime_prime[96];
 
-    col witness operation_id;
-
     let NUM_ROUNDS: int = 24;
 
     // pub struct KeccakCols<T> {
diff --git a/std/machines/small_field/rotate.asm b/std/machines/small_field/rotate.asm
index d73281d621..44dfcf8661 100644
--- a/std/machines/small_field/rotate.asm
+++ b/std/machines/small_field/rotate.asm
@@ -7,13 +7,11 @@ use std::check::require_field_bits;
 /// We can rotate by at most 31 bits
 machine ByteRotate with
     latch: latch,
-    operation_id: operation_id,
     degree: 65536
 {
     // P_C0 and P_C1 are both 16 bit limbs of P_C, where P_C0 is the less significant limb.
-    operation run<0> P_operation, P_A, P_B, P_ROW -> P_C0, P_C1;
+    operation run P_operation, P_A, P_B, P_ROW -> P_C0, P_C1;
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 
     require_field_bits(16, || "The field modulus should be at least 2^16 - 1 to work in the rotate machine.");
 
diff --git a/std/machines/small_field/shift.asm b/std/machines/small_field/shift.asm
index 617d30de04..8016be500e 100644
--- a/std/machines/small_field/shift.asm
+++ b/std/machines/small_field/shift.asm
@@ -9,14 +9,12 @@ use std::check::require_field_bits;
 // TODO this way, we cannot prove anything that shifts by more than 31 bits.
 machine ByteShift with
     latch: latch,
-    operation_id: operation_id,
     degree: 65536
 {
     // P_CLow and P_CHi are both 16 bit limbs of P_C, where P_CLow is the less significant limb.
-    operation run<0> P_operation, P_A, P_B, P_ROW -> P_CLow, P_CHi;
+    operation run P_operation, P_A, P_B, P_ROW -> P_CLow, P_CHi;
 
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 
     let bit_counts = [256, 32, 4, 2];
     let min_degree = std::array::product(bit_counts);
diff --git a/std/machines/split/mod.asm b/std/machines/split/mod.asm
index bfd89c2c71..3dc4232c6e 100644
--- a/std/machines/split/mod.asm
+++ b/std/machines/split/mod.asm
@@ -7,7 +7,6 @@ use std::utils::cross_product;
 // Byte comparison block machine
 machine ByteCompare with
         latch: latch,
-        operation_id: operation_id,
         degree: 65536
     {
     let inputs = cross_product([256, 256]);
@@ -18,8 +17,7 @@ machine ByteCompare with
     col fixed P_LT(i) { if a(i) < b(i) { 1 } else { 0 } };
     col fixed P_GT(i) { if a(i) > b(i) { 1 } else { 0 } };
 
-    operation run<0> P_A, P_B -> P_LT, P_GT;
+    operation run P_A, P_B -> P_LT, P_GT;
 
     col fixed latch = [1]*;
-    col fixed operation_id = [0]*;
 }

From 576bdd714d1ca8345a0de50dc7bff45dd53dcaa2 Mon Sep 17 00:00:00 2001
From: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
Date: Mon, 9 Dec 2024 16:47:54 +0100
Subject: [PATCH 42/57] Disallow iteration over hash types (#2167)

Iterating over hash types introduces non-determinism.
Ban generally and require turning off the lint locally when it's fine to
do so.
This
[lint](https://rust-lang.github.io/rust-clippy/master/index.html#/iter_over_hash_type)
only applies to loops.
---
 Cargo.toml                                             | 5 +++--
 ast/src/analyzed/mod.rs                                | 2 +-
 executor/src/witgen/machines/block_machine.rs          | 2 ++
 executor/src/witgen/machines/sorted_witness_machine.rs | 2 ++
 pil-analyzer/src/call_graph.rs                         | 8 ++++----
 pil-analyzer/src/condenser.rs                          | 6 ++++++
 pil-analyzer/src/pil_analyzer.rs                       | 2 ++
 pil-analyzer/src/structural_checks.rs                  | 2 ++
 pil-analyzer/src/type_inference.rs                     | 4 ++++
 pil-analyzer/src/type_unifier.rs                       | 2 ++
 pilopt/src/lib.rs                                      | 2 ++
 11 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index bdaa7d2c41..42871beb57 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -33,7 +33,7 @@ members = [
     "executor-utils",
 ]
 
-exclude = [ "riscv-runtime" ]
+exclude = ["riscv-runtime"]
 
 [workspace.package]
 version = "0.1.3"
@@ -86,4 +86,5 @@ debug = true
 
 [workspace.lints.clippy]
 print_stdout = "deny"
-uninlined_format_args = "deny"
\ No newline at end of file
+uninlined_format_args = "deny"
+iter_over_hash_type = "deny"
diff --git a/ast/src/analyzed/mod.rs b/ast/src/analyzed/mod.rs
index 81e4031f26..a2291b3009 100644
--- a/ast/src/analyzed/mod.rs
+++ b/ast/src/analyzed/mod.rs
@@ -648,7 +648,7 @@ pub fn type_from_definition(
 /// given a list of type arguments.
 #[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct SolvedTraitImpls {
-    impls: HashMap<String, HashMap<Vec<Type>, ImplData>>,
+    impls: BTreeMap<String, HashMap<Vec<Type>, ImplData>>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
diff --git a/executor/src/witgen/machines/block_machine.rs b/executor/src/witgen/machines/block_machine.rs
index fa5a887276..614d9f00e8 100644
--- a/executor/src/witgen/machines/block_machine.rs
+++ b/executor/src/witgen/machines/block_machine.rs
@@ -320,6 +320,8 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
     /// _operation_id_no_change = ((1 - _block_enforcer_last_step) * (1 - <Latch>));
     /// This function fixes this exception by setting _operation_id_no_change to 0.
     fn handle_last_row(&self, data: &mut HashMap<PolyID, Vec<T>>) {
+        #[allow(clippy::iter_over_hash_type)]
+        // This is deterministic because there is no shared state.
         for (poly_id, col) in data.iter_mut() {
             if self
                 .parts
diff --git a/executor/src/witgen/machines/sorted_witness_machine.rs b/executor/src/witgen/machines/sorted_witness_machine.rs
index ea8ee8ef2a..c2993123bb 100644
--- a/executor/src/witgen/machines/sorted_witness_machine.rs
+++ b/executor/src/witgen/machines/sorted_witness_machine.rs
@@ -228,6 +228,8 @@ impl<'a, T: FieldElement> Machine<'a, T> for SortedWitnesses<'a, T> {
         }
         result.insert(self.fixed_data.column_name(&self.key_col).to_string(), keys);
 
+        #[allow(clippy::iter_over_hash_type)]
+        // TODO: Is this deterministic?
         for (col, &i) in &self.witness_positions {
             let mut col_values = values
                 .iter_mut()
diff --git a/pil-analyzer/src/call_graph.rs b/pil-analyzer/src/call_graph.rs
index a5a59ffcf4..70330bc826 100644
--- a/pil-analyzer/src/call_graph.rs
+++ b/pil-analyzer/src/call_graph.rs
@@ -1,4 +1,4 @@
-use std::collections::{HashMap, HashSet};
+use std::collections::{BTreeMap, BTreeSet, HashSet};
 
 use powdr_ast::{
     analyzed::{Expression, Reference},
@@ -22,7 +22,7 @@ pub fn sort_called_first<'a, I: Iterator<Item = (&'a str, Option<&'a Expression>
 
 fn topo_sort_visit<'a, 'b>(
     name: &'a str,
-    graph: &'b HashMap<&'a str, HashSet<&'a str>>,
+    graph: &'b BTreeMap<&'a str, BTreeSet<&'a str>>,
     visited: &'b mut HashSet<&'a str>,
     result: &'b mut Vec<String>,
 ) {
@@ -39,10 +39,10 @@ fn topo_sort_visit<'a, 'b>(
 
 fn call_graph<'a, I: Iterator<Item = (&'a str, Option<&'a Expression>)>>(
     symbols: I,
-) -> HashMap<&'a str, HashSet<&'a str>> {
+) -> BTreeMap<&'a str, BTreeSet<&'a str>> {
     symbols
         .map(|(name, expr)| {
-            let mut called: HashSet<&str> = HashSet::new();
+            let mut called: BTreeSet<&str> = BTreeSet::new();
             if let Some(e) = expr {
                 e.all_children().for_each(|e| {
                     if let Expression::Reference(_, Reference::Poly(r)) = e {
diff --git a/pil-analyzer/src/condenser.rs b/pil-analyzer/src/condenser.rs
index 0262a66b13..b25ed0b9d8 100644
--- a/pil-analyzer/src/condenser.rs
+++ b/pil-analyzer/src/condenser.rs
@@ -147,6 +147,8 @@ pub fn condense<T: FieldElement>(
                 })
                 .collect::<Vec<_>>();
 
+            #[allow(clippy::iter_over_hash_type)]
+            // TODO: is this deterministic?
             for (name, value) in condenser.extract_new_column_values() {
                 if new_values.insert(name.clone(), value).is_some() {
                     panic!("Column {name} already has a hint set, but tried to add another one.",)
@@ -162,9 +164,13 @@ pub fn condense<T: FieldElement>(
         .collect();
 
     definitions.retain(|name, _| !intermediate_columns.contains_key(name));
+    #[allow(clippy::iter_over_hash_type)]
+    // This is deterministic because insertion order does not matter.
     for symbol in new_columns {
         definitions.insert(symbol.absolute_name.clone(), (symbol, None));
     }
+    #[allow(clippy::iter_over_hash_type)]
+    // This is deterministic because definitions can be updated in any order.
     for (name, new_value) in new_values {
         if let Some((_, value)) = definitions.get_mut(&name) {
             if !value.is_none() {
diff --git a/pil-analyzer/src/pil_analyzer.rs b/pil-analyzer/src/pil_analyzer.rs
index 39d1118758..7dbf0ca100 100644
--- a/pil-analyzer/src/pil_analyzer.rs
+++ b/pil-analyzer/src/pil_analyzer.rs
@@ -204,6 +204,8 @@ impl PILAnalyzer {
     /// Check that query and constr functions are used in the correct contexts.
     pub fn side_effect_check(&self) -> Result<(), Vec<Error>> {
         let mut errors = vec![];
+        #[allow(clippy::iter_over_hash_type)]
+        // TODO: This is not deterministic to the extent that the errors are added in arbitrary order. Source order would be better.
         for (symbol, value) in self.definitions.values() {
             let Some(value) = value else { continue };
             let context = match symbol.kind {
diff --git a/pil-analyzer/src/structural_checks.rs b/pil-analyzer/src/structural_checks.rs
index bc327750af..b50d0947ac 100644
--- a/pil-analyzer/src/structural_checks.rs
+++ b/pil-analyzer/src/structural_checks.rs
@@ -85,6 +85,8 @@ fn check_struct_declarations(
     definitions: &HashMap<String, (Symbol, Option<FunctionValueDefinition>)>,
 ) -> Vec<Error> {
     let mut errors = Vec::new();
+    #[allow(clippy::iter_over_hash_type)]
+    // TODO: This is not deterministic, because the errors are inserted in arbitrary order. Source order would be better.
     for (symbol, def) in definitions.values() {
         let Some(FunctionValueDefinition::TypeDeclaration(TypeDeclaration::Struct(struct_decl))) =
             def
diff --git a/pil-analyzer/src/type_inference.rs b/pil-analyzer/src/type_inference.rs
index 82ee2c8a47..4b99b5c6c1 100644
--- a/pil-analyzer/src/type_inference.rs
+++ b/pil-analyzer/src/type_inference.rs
@@ -168,6 +168,8 @@ impl TypeChecker {
 
         // Now we check for all symbols that are not declared as a type scheme that they
         // can resolve to a concrete type.
+        #[allow(clippy::iter_over_hash_type)]
+        // TODO: This is not deterministic, because it returns the first error in an arbitrary order. Source order would be better.
         for (name, (source_ref, declared_type)) in &self.declared_types {
             if declared_type.vars.is_empty() {
                 // It is not a type scheme, see if we were able to derive a concrete type.
@@ -234,6 +236,8 @@ impl TypeChecker {
 
         // Add builtin schemes if they are not already there and also remove them from the definitions
         // (because we ignore the defined value).
+        #[allow(clippy::iter_over_hash_type)]
+        // This is deterministic, because the order does not matter.
         for (name, scheme) in builtin_schemes() {
             self.declared_types
                 .entry(name.clone())
diff --git a/pil-analyzer/src/type_unifier.rs b/pil-analyzer/src/type_unifier.rs
index 1d7da5a4d0..cb4a1b9af4 100644
--- a/pil-analyzer/src/type_unifier.rs
+++ b/pil-analyzer/src/type_unifier.rs
@@ -172,6 +172,8 @@ impl Unifier {
             ));
         }
 
+        #[allow(clippy::iter_over_hash_type)]
+        // TODO: Is this deterministic?
         for bound in self.type_var_bounds(&type_var) {
             self.ensure_bound(&ty, bound)?;
         }
diff --git a/pilopt/src/lib.rs b/pilopt/src/lib.rs
index 5dc63b9814..5db5a81aaf 100644
--- a/pilopt/src/lib.rs
+++ b/pilopt/src/lib.rs
@@ -126,6 +126,7 @@ fn build_poly_id_to_definition_name_lookup(
     pil_file: &Analyzed<impl FieldElement>,
 ) -> BTreeMap<PolyID, &String> {
     let mut poly_id_to_definition_name = BTreeMap::new();
+    #[allow(clippy::iter_over_hash_type)]
     for (name, (symbol, _)) in &pil_file.definitions {
         if matches!(symbol.kind, SymbolKind::Poly(_)) {
             symbol.array_elements().for_each(|(_, id)| {
@@ -133,6 +134,7 @@ fn build_poly_id_to_definition_name_lookup(
             });
         }
     }
+    #[allow(clippy::iter_over_hash_type)]
     for (name, (symbol, _)) in &pil_file.intermediate_columns {
         symbol.array_elements().for_each(|(_, id)| {
             poly_id_to_definition_name.insert(id, name);

From e286da46f7470e0c1a9c354557449151762de156 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Mon, 9 Dec 2024 19:12:33 +0100
Subject: [PATCH 43/57] Bus: Remove `acc_next` + materialize folded tuple
 (#2201)

This PR:
- Removes the `acc_next` columns, which were only needed because of a
limitation of prover functions. The prover function that existed is now
removed entirely, because we use the hand written witgen anyway, see
#2191.
- Also this PR materializes the folded tuple. This lowers the degree of
the constraints if the tuples being sent have a degree > 1. It also
enables next references in the tuple being sent.

As a result, we can now generate Plonky3 proofs with a bus!

```bash
cargo run -r --features plonky3 --bin powdr-rs compile riscv/tests/riscv_data/keccak -o output --max-degree-log 18 --field gl
cargo run -r --features plonky3 pil output/keccak.asm -o output -f --field gl --prove-with plonky3 --linker-mode bus
```

The proof generation takes 8.32s (of which 394ms are spent on generating
the second-stage witness). This compares to 2.07s proof time without a
bus.
---
 executor/src/witgen/bus_accumulator/mod.rs | 45 ++++++++--------------
 std/protocols/bus.asm                      | 44 ++++++++++++---------
 2 files changed, 42 insertions(+), 47 deletions(-)

diff --git a/executor/src/witgen/bus_accumulator/mod.rs b/executor/src/witgen/bus_accumulator/mod.rs
index 9479383185..76e1c6f38e 100644
--- a/executor/src/witgen/bus_accumulator/mod.rs
+++ b/executor/src/witgen/bus_accumulator/mod.rs
@@ -85,15 +85,7 @@ impl<'a, T: FieldElement> BusAccumulatorGenerator<'a, T> {
         let accumulators = self
             .bus_interactions
             .par_iter()
-            .flat_map(|bus_interaction| {
-                let (acc1, acc2) = self.interaction_columns(bus_interaction);
-                let next1 = next(&acc1);
-                let next2 = next(&acc2);
-
-                // We assume that the second-stage witness columns are in this order,
-                // for each bus interaction.
-                [acc1, acc2, next1, next2]
-            })
+            .flat_map(|bus_interaction| self.interaction_columns(bus_interaction))
             .collect::<Vec<_>>();
 
         self.pil
@@ -107,11 +99,13 @@ impl<'a, T: FieldElement> BusAccumulatorGenerator<'a, T> {
     fn interaction_columns(
         &self,
         bus_interaction: &PhantomBusInteractionIdentity<T>,
-    ) -> (Vec<T>, Vec<T>) {
+    ) -> Vec<Vec<T>> {
         let intermediate_definitions = self.pil.intermediate_definitions();
         let empty_challenges = BTreeMap::new();
 
         let size = self.trace_values.height();
+        let mut folded1 = vec![T::zero(); size];
+        let mut folded2 = vec![T::zero(); size];
         let mut acc1 = vec![T::zero(); size];
         let mut acc2 = vec![T::zero(); size];
 
@@ -128,26 +122,26 @@ impl<'a, T: FieldElement> BusAccumulatorGenerator<'a, T> {
             };
             let multiplicity = evaluator.evaluate(&bus_interaction.multiplicity);
 
+            let tuple = bus_interaction
+                .tuple
+                .0
+                .iter()
+                .map(|r| evaluator.evaluate(r))
+                .collect::<Vec<_>>();
+            let folded = self.beta - self.fingerprint(&tuple);
+
             let new_acc = match multiplicity.is_zero() {
                 true => current_acc,
-                false => {
-                    let tuple = bus_interaction
-                        .tuple
-                        .0
-                        .iter()
-                        .map(|r| evaluator.evaluate(r))
-                        .collect::<Vec<_>>();
-
-                    let fingerprint = self.beta - self.fingerprint(&tuple);
-                    current_acc + fingerprint.inverse() * multiplicity
-                }
+                false => current_acc + folded.inverse() * multiplicity,
             };
 
+            folded1[i] = folded.0;
+            folded2[i] = folded.1;
             acc1[i] = new_acc.0;
             acc2[i] = new_acc.1;
         }
 
-        (acc1, acc2)
+        vec![folded1, folded2, acc1, acc2]
     }
 
     /// Fingerprints a tuples of field elements, using the pre-computed powers of alpha.
@@ -170,10 +164,3 @@ fn powers_of_alpha<T: FieldElement>(alpha: Fp2<T>, n: usize) -> Vec<Fp2<T>> {
         })
         .collect::<Vec<_>>()
 }
-
-/// Rotates a column to the left.
-fn next<T: Clone>(column: &[T]) -> Vec<T> {
-    let mut result = column.to_vec();
-    result.rotate_left(1);
-    result
-}
diff --git a/std/protocols/bus.asm b/std/protocols/bus.asm
index 0bf93d8ce7..66fdaab295 100644
--- a/std/protocols/bus.asm
+++ b/std/protocols/bus.asm
@@ -16,6 +16,9 @@ use std::protocols::fingerprint::fingerprint_with_id;
 use std::protocols::fingerprint::fingerprint_with_id_inter;
 use std::math::fp2::required_extension_size;
 use std::prover::eval;
+use std::field::known_field;
+use std::field::KnownField;
+use std::check::panic;
 
 /// Sends the tuple (id, tuple...) to the bus by adding
 /// `multiplicity / (beta - fingerprint(id, tuple...))` to `acc`
@@ -40,7 +43,29 @@ let bus_interaction: expr, expr[], expr -> () = constr |id, tuple, multiplicity|
     let beta = fp2_from_array(array::new(required_extension_size(), |i| challenge(0, i + 3)));
 
     // Implemented as: folded = (beta - fingerprint(id, tuple...));
-    let folded = sub_ext(beta, fingerprint_with_id_inter(id, tuple, alpha));
+    let folded = match known_field() {
+        Option::Some(KnownField::Goldilocks) => {
+            // Materialized as a witness column for two reasons:
+            // - It makes sure the constraint degree is independent of the input tuple.
+            // - We can access folded', even if the tuple contains next references.
+            // Note that if all expressions are degree-1 and there is no next reference,
+            // this is wasteful, but we can't check that here.
+            let folded = fp2_from_array(
+                array::new(required_extension_size(),
+                        |i| std::prover::new_witness_col_at_stage("folded", 1))
+            );
+            constrain_eq_ext(folded, sub_ext(beta, fingerprint_with_id_inter(id, tuple, alpha)));
+            folded
+        },
+        // The case above triggers our hand-written witness generation, but on Bn254, we'd not be
+        // on the extension field and use the automatic witness generation.
+        // However, it does not work with a materialized folded tuple. At the same time, Halo2
+        // (the only prover that supports BN254) does not have a hard degree bound. So, we can
+        // in-line the expression here. 
+        Option::Some(KnownField::BN254) => sub_ext(beta, fingerprint_with_id_inter(id, tuple, alpha)),
+        _ => panic("Unexpected field!")
+    };
+
     let folded_next = next_ext(folded);
 
     let m_ext = from_base(multiplicity);
@@ -62,23 +87,6 @@ let bus_interaction: expr, expr[], expr -> () = constr |id, tuple, multiplicity|
     );
     
     constrain_eq_ext(update_expr, from_base(0));
-
-    // In the extension field, we need a prover function for the accumulator.
-    if needs_extension() {
-        // TODO: Helper columns, because we can't access the previous row in hints
-        let acc_next_col = std::array::map(acc, |_| std::prover::new_witness_col_at_stage("acc_next", 1));
-        query |i| {
-            let _ = std::array::zip(
-                acc_next_col,
-                compute_next_z(is_first, id, tuple, multiplicity, acc_ext, alpha, beta),
-                |acc_next, hint_val| std::prover::provide_value(acc_next, i, hint_val)
-            );
-        };
-        std::array::zip(acc, acc_next_col, |acc_col, acc_next| {
-            acc_col' = acc_next
-        });
-    } else {
-    }
 };
 
 /// Compute acc' = acc * (1 - is_first') + multiplicity' / fingerprint(id, tuple...),

From d7170e13049919c0e5f28d07d5c559f0da047975 Mon Sep 17 00:00:00 2001
From: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
Date: Mon, 9 Dec 2024 23:40:52 +0100
Subject: [PATCH 44/57] Fix CI bench (#2211)

All cargo.toml must have this `bench = false`. This will be caught in pr
tests in the future.
---
 asmopt/Cargo.toml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/asmopt/Cargo.toml b/asmopt/Cargo.toml
index 998c554f49..3aa43774f4 100644
--- a/asmopt/Cargo.toml
+++ b/asmopt/Cargo.toml
@@ -11,4 +11,7 @@ powdr-ast.workspace = true
 powdr-analysis.workspace = true
 powdr-importer.workspace = true
 powdr-pilopt.workspace = true
-powdr-parser.workspace = true
\ No newline at end of file
+powdr-parser.workspace = true
+
+[lib]
+bench = false # See https://github.com/bheisler/criterion.rs/issues/458

From 8450f6a1ceddec193358c6413662f7fa78d13fb7 Mon Sep 17 00:00:00 2001
From: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
Date: Tue, 10 Dec 2024 12:17:05 +0100
Subject: [PATCH 45/57] Fix book deploy (#2215)

Avoid deleting `.git` etc
---
 .github/workflows/deploy-book.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy-book.yml b/.github/workflows/deploy-book.yml
index 4225c0cba5..cff7abd638 100644
--- a/.github/workflows/deploy-book.yml
+++ b/.github/workflows/deploy-book.yml
@@ -37,7 +37,7 @@ jobs:
         # Delete the ref to avoid keeping history.
         git update-ref -d refs/heads/gh-pages
         # Delete everything except the `dev` folder, as it contains benchmarks we should keep.
-        find . -mindepth 1 -maxdepth 1 ! -name "dev" -exec rm -rf {} +
+        find . -mindepth 1 -maxdepth 1 ! -name "dev" ! -name ".*" -exec rm -rf {} +
         mv ../book/* .
         git add .
         git commit -m "Deploy $GITHUB_SHA to gh-pages"

From c40099e8b97b38646517cc7f5a685d4da7c025e0 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Tue, 10 Dec 2024 12:52:53 +0100
Subject: [PATCH 46/57] fix udeps and add to pr-tests (#2214)

This PR fixes udeps again and finally adds it to the pr-tests so it
fails in a PR already.
---
 .github/workflows/pr-tests.yml | 472 +++++++++++++++++----------------
 asmopt/Cargo.toml              |   1 -
 2 files changed, 246 insertions(+), 227 deletions(-)

diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index 9713ba1e4f..eb4774b30a 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -23,57 +23,57 @@ jobs:
     runs-on: warp-ubuntu-2404-x64-8x
 
     steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: recursive
-    - name: ⚡ Restore rust cache
-      id: cache
-      uses: WarpBuilds/cache/restore@v1
-      with:
-        path: |
-          ~/.cargo/registry/index/
-          ~/.cargo/registry/cache/
-          ~/.cargo/git/db/
-          target/
-          Cargo.lock
-        key: ${{ runner.os }}-cargo-pr-tests
-    - name: Date of the restored cache
-      run: cat target/cache-build-date.txt
-      continue-on-error: true
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: ⚡ Restore rust cache
+        id: cache
+        uses: WarpBuilds/cache/restore@v1
+        with:
+          path: |
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            target/
+            Cargo.lock
+          key: ${{ runner.os }}-cargo-pr-tests
+      - name: Date of the restored cache
+        run: cat target/cache-build-date.txt
+        continue-on-error: true
 
-    ##### The block below is shared between cache build and PR build workflows #####
-    - name: Install EStarkPolygon prover dependencies
-      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
-    - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
-      run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
-    - name: Install Rust toolchain 1.81 (stable)
-      run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu
-    - name: Set cargo to perform shallow clones
-      run: echo "CARGO_NET_GIT_FETCH_WITH_CLI=true" >> $GITHUB_ENV
-    - name: Cargo check with Rust 1.81 (default features)
-      run: cargo +1.81-x86_64-unknown-linux-gnu check --all-targets
-    - name: Lint no default features
-      run: cargo clippy --all --all-targets --no-default-features --profile pr-tests --verbose -- -D warnings
-    - name: Lint all features
-      run: cargo clippy --all --all-targets --all-features --profile pr-tests --verbose -- -D warnings
-    - name: Format
-      run: cargo fmt --all --check --verbose
-    - name: Build
-      run: cargo build --all-targets --all --all-features --profile pr-tests --verbose
-    ###############################################################################
+      ##### The block below is shared between cache build and PR build workflows #####
+      - name: Install EStarkPolygon prover dependencies
+        run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm
+      - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
+        run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
+      - name: Install Rust toolchain 1.81 (stable)
+        run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu
+      - name: Set cargo to perform shallow clones
+        run: echo "CARGO_NET_GIT_FETCH_WITH_CLI=true" >> $GITHUB_ENV
+      - name: Cargo check with Rust 1.81 (default features)
+        run: cargo +1.81-x86_64-unknown-linux-gnu check --all-targets
+      - name: Lint no default features
+        run: cargo clippy --all --all-targets --no-default-features --profile pr-tests --verbose -- -D warnings
+      - name: Lint all features
+        run: cargo clippy --all --all-targets --all-features --profile pr-tests --verbose -- -D warnings
+      - name: Format
+        run: cargo fmt --all --check --verbose
+      - name: Build
+        run: cargo build --all-targets --all --all-features --profile pr-tests --verbose
+      ###############################################################################
 
-    - uses: taiki-e/install-action@nextest
-    - name: Archive EStarkPolygon prover built dependencies
-      run: tar --zstd -cf pil-stark-prover-deps.tar.zst target/pr-tests/build/pil-stark-prover-*/out
-    - name: Create tests archive
-      run: cargo nextest archive --archive-file tests.tar.zst --cargo-profile pr-tests --workspace --all-features
-    - name: Upload build artifacts
-      uses: actions/upload-artifact@v4
-      with:
-        name: tests_archive
-        path: |
-          tests.tar.zst
-          pil-stark-prover-deps.tar.zst
+      - uses: taiki-e/install-action@nextest
+      - name: Archive EStarkPolygon prover built dependencies
+        run: tar --zstd -cf pil-stark-prover-deps.tar.zst target/pr-tests/build/pil-stark-prover-*/out
+      - name: Create tests archive
+        run: cargo nextest archive --archive-file tests.tar.zst --cargo-profile pr-tests --workspace --all-features
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: tests_archive
+          path: |
+            tests.tar.zst
+            pil-stark-prover-deps.tar.zst
 
   test_quick:
     needs: build
@@ -81,162 +81,162 @@ jobs:
     strategy:
       matrix:
         test:
-        - "1"
-        - "2"
+          - "1"
+          - "2"
 
     steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: recursive
-    - name: Download build artifacts
-      uses: actions/download-artifact@v4
-      with:
-        name: tests_archive
-    - name: ⚡ Cache nodejs
-      uses: actions/cache@v4
-      with:
-        path: |
-          ~/pilcom/node_modules
-        key: ${{ runner.os }}-pilcom-node-modules
-    - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
-      run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
-    - name: Install nightly-2024-08-01
-      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install std source
-      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install riscv target
-      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install test dependencies
-      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
-    - name: Install pilcom
-      run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
-    - uses: taiki-e/install-action@nextest
-    - name: Run default tests
-      run: cargo nextest run --archive-file tests.tar.zst --workspace-remap . --verbose --partition count:"${{ matrix.test }}"/2
-      env:
-        PILCOM: ${{ github.workspace }}/pilcom/
-        POWDR_STD: ${{ github.workspace }}/std/
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: tests_archive
+      - name: ⚡ Cache nodejs
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/pilcom/node_modules
+          key: ${{ runner.os }}-pilcom-node-modules
+      - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
+        run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
+      - name: Install nightly-2024-08-01
+        run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install std source
+        run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install riscv target
+        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install test dependencies
+        run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      - name: Install pilcom
+        run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
+      - uses: taiki-e/install-action@nextest
+      - name: Run default tests
+        run: cargo nextest run --archive-file tests.tar.zst --workspace-remap . --verbose --partition count:"${{ matrix.test }}"/2
+        env:
+          PILCOM: ${{ github.workspace }}/pilcom/
+          POWDR_STD: ${{ github.workspace }}/std/
 
   run_examples:
     runs-on: warp-ubuntu-2404-x64-4x
 
     steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: recursive
-    - name: ⚡ Restore rust cache
-      id: cache
-      uses: WarpBuilds/cache/restore@v1
-      with:
-        path: |
-          ~/.cargo/registry/index/
-          ~/.cargo/registry/cache/
-          ~/.cargo/git/db/
-          target/
-          Cargo.lock
-        key: ${{ runner.os }}-cargo-pr-tests
-    - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
-      run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
-    - name: Install nightly
-      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install std source
-      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install riscv target
-      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install test dependencies
-      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
-    - name: Run examples
-      run: cargo run --profile pr-tests --example hello_world && cargo run --profile pr-tests --example sqrt_with_publics && cargo run --profile pr-tests --example fibonacci
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: ⚡ Restore rust cache
+        id: cache
+        uses: WarpBuilds/cache/restore@v1
+        with:
+          path: |
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            target/
+            Cargo.lock
+          key: ${{ runner.os }}-cargo-pr-tests
+      - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
+        run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
+      - name: Install nightly
+        run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install std source
+        run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install riscv target
+        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install test dependencies
+        run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      - name: Run examples
+        run: cargo run --profile pr-tests --example hello_world && cargo run --profile pr-tests --example sqrt_with_publics && cargo run --profile pr-tests --example fibonacci
 
   test_estark_polygon:
     needs: build
     runs-on: ubuntu-24.04
     steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: recursive
-    - name: Download build artifacts
-      uses: actions/download-artifact@v4
-      with:
-        name: tests_archive
-    - name: ⚡ Cache nodejs
-      uses: actions/cache@v4
-      with:
-        path: |
-          ~/pilcom/node_modules
-        key: ${{ runner.os }}-pilcom-node-modules
-    - name: Install Rust toolchain nightly-2024-09-21(with clippy and rustfmt)
-      run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
-    - name: Install nightly-2024-08-01
-      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install std source
-      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install riscv target
-      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install pilcom
-      run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
-    - name: Install EStarkPolygon prover system dependency
-      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev
-    - uses: taiki-e/install-action@nextest
-    - name: Unpack EStarkPolygon built dependencies
-      run: tar --zstd -xf pil-stark-prover-deps.tar.zst
-    - name: Run EStark Polygon test
-      run: cargo nextest run --archive-file tests.tar.zst --workspace-remap . --verbose --run-ignored=ignored-only --no-capture -E "test(=vec_median_estark_polygon)"
-      env:
-        PILCOM: ${{ github.workspace }}/pilcom/
-        POWDR_STD: ${{ github.workspace }}/std/
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: tests_archive
+      - name: ⚡ Cache nodejs
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/pilcom/node_modules
+          key: ${{ runner.os }}-pilcom-node-modules
+      - name: Install Rust toolchain nightly-2024-09-21(with clippy and rustfmt)
+        run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
+      - name: Install nightly-2024-08-01
+        run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install std source
+        run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install riscv target
+        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install pilcom
+        run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
+      - name: Install EStarkPolygon prover system dependency
+        run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev
+      - uses: taiki-e/install-action@nextest
+      - name: Unpack EStarkPolygon built dependencies
+        run: tar --zstd -xf pil-stark-prover-deps.tar.zst
+      - name: Run EStark Polygon test
+        run: cargo nextest run --archive-file tests.tar.zst --workspace-remap . --verbose --run-ignored=ignored-only --no-capture -E "test(=vec_median_estark_polygon)"
+        env:
+          PILCOM: ${{ github.workspace }}/pilcom/
+          POWDR_STD: ${{ github.workspace }}/std/
 
   test_slow:
     strategy:
       matrix:
         test:
-        - "1"
-        - "2"
-        - "3"
-        - "4"
-        - "5"
-        - "6"
-        - "7"
-        - "8"
+          - "1"
+          - "2"
+          - "3"
+          - "4"
+          - "5"
+          - "6"
+          - "7"
+          - "8"
     needs: build
     runs-on: ubuntu-24.04
 
     steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: recursive
-    - name: Download build artifacts
-      uses: actions/download-artifact@v4
-      with:
-        name: tests_archive
-    - name: ⚡ Cache nodejs
-      uses: actions/cache@v4
-      with:
-        path: |
-          ~/pilcom/node_modules
-        key: ${{ runner.os }}-pilcom-node-modules
-    - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
-      run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
-    - name: Install test dependencies
-      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
-    - name: Install nightly-2024-08-01
-      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install std source
-      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install riscv target
-      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install pilcom
-      run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
-    - uses: taiki-e/install-action@nextest
-    - name: Run slow tests
-      # Number threads is set to 2 because the runner does not have enough memory for more.
-      run: |
-        NIGHTLY_TESTS=$(cat .github/workflows/nightly_tests_list.txt)
-        cargo nextest run --archive-file tests.tar.zst --workspace-remap . --verbose --run-ignored=ignored-only -E "!($NIGHTLY_TESTS)" --test-threads 2 --partition hash:"${{ matrix.test }}"/8
-      shell: bash
-      env:
-        PILCOM: ${{ github.workspace }}/pilcom/
-        POWDR_STD: ${{ github.workspace }}/std/
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Download build artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: tests_archive
+      - name: ⚡ Cache nodejs
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/pilcom/node_modules
+          key: ${{ runner.os }}-pilcom-node-modules
+      - name: Install Rust toolchain nightly-2024-09-21 (with clippy and rustfmt)
+        run: rustup toolchain install nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add clippy --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu && rustup component add rustfmt --toolchain nightly-2024-09-21-x86_64-unknown-linux-gnu
+      - name: Install test dependencies
+        run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      - name: Install nightly-2024-08-01
+        run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install std source
+        run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install riscv target
+        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install pilcom
+        run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
+      - uses: taiki-e/install-action@nextest
+      - name: Run slow tests
+        # Number threads is set to 2 because the runner does not have enough memory for more.
+        run: |
+          NIGHTLY_TESTS=$(cat .github/workflows/nightly_tests_list.txt)
+          cargo nextest run --archive-file tests.tar.zst --workspace-remap . --verbose --run-ignored=ignored-only -E "!($NIGHTLY_TESTS)" --test-threads 2 --partition hash:"${{ matrix.test }}"/8
+        shell: bash
+        env:
+          PILCOM: ${{ github.workspace }}/pilcom/
+          POWDR_STD: ${{ github.workspace }}/std/
 
   bench:
     needs: build
@@ -247,45 +247,65 @@ jobs:
       pull-requests: write
 
     steps:
-    - uses: actions/checkout@v4
-      with:
-        submodules: recursive
-    - name: ⚡ Restore rust cache
-      id: cache
-      uses: WarpBuilds/cache/restore@v1
-      with:
-        path: |
-          ~/.cargo/registry/index/
-          ~/.cargo/registry/cache/
-          ~/.cargo/git/db/
-          target/
-          Cargo.lock
-        key: ${{ runner.os }}-cargo-pr-tests
-    - name: Install Rust toolchain 1.81
-      run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu
-    - name: Install nightly
-      run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install std source
-      run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install riscv target
-      run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
-    - name: Install test dependencies
-      run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
-    - name: Install EStarkPolygon prover dependencies
-      run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc uuid-dev build-essential cmake pkg-config git
-    - name: Install pilcom
-      run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
-    - name: Run benchmarks
-      # we add `|| exit 1` to make sure the step fails if `cargo bench` fails
-      run: cargo bench --workspace --all-features -- --output-format bencher | tee output.txt || exit 1
-    - name: Store benchmark result
-      uses: benchmark-action/github-action-benchmark@v1
-      with:
-        name: Benchmarks
-        tool: 'cargo'
-        output-file-path: output.txt
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        auto-push: true
-        alert-threshold: '120%'
-        comment-on-alert: true
-        summary-always: true
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: ⚡ Restore rust cache
+        id: cache
+        uses: WarpBuilds/cache/restore@v1
+        with:
+          path: |
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            target/
+            Cargo.lock
+          key: ${{ runner.os }}-cargo-pr-tests
+      - name: Install Rust toolchain 1.81
+        run: rustup toolchain install 1.81-x86_64-unknown-linux-gnu
+      - name: Install nightly
+        run: rustup toolchain install nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install std source
+        run: rustup component add rust-src --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install riscv target
+        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2024-08-01-x86_64-unknown-linux-gnu
+      - name: Install test dependencies
+        run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
+      - name: Install EStarkPolygon prover dependencies
+        run: sudo apt-get update && sudo apt-get install -y nlohmann-json3-dev libpqxx-dev nasm libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc uuid-dev build-essential cmake pkg-config git
+      - name: Install pilcom
+        run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
+      - name: Run benchmarks
+        # we add `|| exit 1` to make sure the step fails if `cargo bench` fails
+        run: cargo bench --workspace --all-features -- --output-format bencher | tee output.txt || exit 1
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: Benchmarks
+          tool: "cargo"
+          output-file-path: output.txt
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: true
+          alert-threshold: "120%"
+          comment-on-alert: true
+          summary-always: true
+
+  udeps:
+    needs: build
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install nightly toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: nightly
+          override: true
+
+      - name: Run cargo-udeps
+        uses: aig787/cargo-udeps-action@v1
+        with:
+          version: "latest"
+          args: "--all-targets"
diff --git a/asmopt/Cargo.toml b/asmopt/Cargo.toml
index 3aa43774f4..57067c6eae 100644
--- a/asmopt/Cargo.toml
+++ b/asmopt/Cargo.toml
@@ -9,7 +9,6 @@ repository.workspace = true
 [dependencies]
 powdr-ast.workspace = true
 powdr-analysis.workspace = true
-powdr-importer.workspace = true
 powdr-pilopt.workspace = true
 powdr-parser.workspace = true
 

From 0a12ffe1884b9fa39585bba18618baf2d26b166f Mon Sep 17 00:00:00 2001
From: chriseth <chris@ethereum.org>
Date: Tue, 10 Dec 2024 14:28:44 +0100
Subject: [PATCH 47/57] Remove dependencies on build (#2217)

These runs make no use of the artefacts created in build and do a full
re-build, so we might as well run them from the start.
---
 .github/workflows/pr-tests.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index eb4774b30a..4c2a7f153c 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -239,7 +239,6 @@ jobs:
           POWDR_STD: ${{ github.workspace }}/std/
 
   bench:
-    needs: build
     runs-on: warp-ubuntu-2404-x64-4x
     permissions:
       contents: write
@@ -291,7 +290,6 @@ jobs:
           summary-always: true
 
   udeps:
-    needs: build
     runs-on: ubuntu-22.04
 
     steps:

From 670802aaee125602269e7ef0e0f5422119598e69 Mon Sep 17 00:00:00 2001
From: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
Date: Tue, 10 Dec 2024 15:09:38 +0100
Subject: [PATCH 48/57] Fix book deploy again (#2218)

Turns out `git worktree` does not track the remote branch, so the
benchmarks are not there.
Revert to what we did before, and explicitly restore the benchmarks from
the remote `gh-pages` branch.
---
 .github/workflows/deploy-book.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy-book.yml b/.github/workflows/deploy-book.yml
index cff7abd638..29ed1a93f5 100644
--- a/.github/workflows/deploy-book.yml
+++ b/.github/workflows/deploy-book.yml
@@ -36,9 +36,10 @@ jobs:
         cd gh-pages
         # Delete the ref to avoid keeping history.
         git update-ref -d refs/heads/gh-pages
-        # Delete everything except the `dev` folder, as it contains benchmarks we should keep.
-        find . -mindepth 1 -maxdepth 1 ! -name "dev" ! -name ".*" -exec rm -rf {} +
+        rm -rf *
         mv ../book/* .
+        # restore the benchmark directory
+        git restore --source=origin/gh-pages -- dev
         git add .
         git commit -m "Deploy $GITHUB_SHA to gh-pages"
         git push --force --set-upstream origin gh-pages

From 11c3c7023c8febdbb55cafcbd76003bd9f93ac58 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Tue, 10 Dec 2024 15:50:14 +0100
Subject: [PATCH 49/57] Fix Plonky3 proofs for removed machines (#2221)

We removed them in the proof, but still ran second-stage witgen for it.
---
 backend/src/plonky3/stark.rs | 16 +++++++++++-----
 plonky3/src/prover.rs        | 19 ++++++++-----------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/backend/src/plonky3/stark.rs b/backend/src/plonky3/stark.rs
index b6ec7422fc..7e0d0b679e 100644
--- a/backend/src/plonky3/stark.rs
+++ b/backend/src/plonky3/stark.rs
@@ -211,11 +211,17 @@ where
         let mut witness_by_machine = self
             .split
             .iter()
-            .map(|(machine, (pil, _))| {
-                (
-                    machine.clone(),
-                    machine_witness_columns(witness, pil, machine),
-                )
+            .filter_map(|(machine, (pil, _))| {
+                let witness_columns = machine_witness_columns(witness, pil, machine);
+                if witness_columns[0].1.is_empty() {
+                    // Empty machines can be removed entirely.
+                    None
+                } else {
+                    Some((
+                        machine.clone(),
+                        machine_witness_columns(witness, pil, machine),
+                    ))
+                }
             })
             .collect::<BTreeMap<_, _>>();
 
diff --git a/plonky3/src/prover.rs b/plonky3/src/prover.rs
index f0258a8d50..84558e1768 100644
--- a/plonky3/src/prover.rs
+++ b/plonky3/src/prover.rs
@@ -393,6 +393,9 @@ where
     }
 }
 
+/// Prove a program execution.
+/// Note that `witness_by_machine` might not have all the machines, empty ones are expected
+/// to be removed already.
 #[instrument(skip_all)]
 #[allow(clippy::multiple_bound_locations)] // cfg not supported in where clauses?
 pub fn prove<T: FieldElementMap>(
@@ -405,18 +408,12 @@ where
     ProverData<T>: Send,
     Commitment<T>: Send,
 {
-    let (tables, stage_0): (BTreeMap<_, _>, BTreeMap<_, _>) = program
-        .split
+    let (tables, stage_0): (BTreeMap<_, _>, BTreeMap<_, _>) = witness_by_machine
         .iter()
-        .filter_map(|(name, (_, constraint_system))| {
-            let columns = witness_by_machine.get(name).unwrap();
+        .map(|(name, columns)| {
+            let constraint_system = &program.split.get(name).unwrap().1;
             let degree = columns[0].1.len();
 
-            if degree == 0 {
-                // If a machine has no rows, remove it entirely.
-                return None;
-            }
-
             let table = Table {
                 air: PowdrTable::new(constraint_system),
                 degree,
@@ -433,7 +430,7 @@ where
                 );
             }
 
-            Some((
+            (
                 (name.clone(), table),
                 (
                     name.clone(),
@@ -455,7 +452,7 @@ where
                             .collect(),
                     },
                 ),
-            ))
+            )
         })
         .unzip();
 

From 0180542559db8ea6811c2b57d5b2ca292ea7ea69 Mon Sep 17 00:00:00 2001
From: Georg Wiese <georgwiese@gmail.com>
Date: Tue, 10 Dec 2024 17:44:15 +0100
Subject: [PATCH 50/57] Refactorings around `process_lookup_direct` (#2209)

This PR refactors a few things:
- `process_lookup_direct` no longer has a default implementation.
Eventually, we want all machines to implement it, so I figured it would
be better to explicitly panic in each machine.
- Refactored the implementation of
`FixedLookupMachine::process_plookup`, pulling some stuff out into a new
`CallerData` struct. This is similar to what @chriseth has done on
[`call_jit_from_block`](https://github.com/powdr-labs/powdr/compare/main...call_jit_from_block),
see the comment below.
- As a first test, I implemented `process_lookup_direct` for the
"large"-field memory machine (and `process_plookup` by wrapping
`process_lookup_direct`)
---
 .../src/witgen/data_structures/caller_data.rs |  73 +++++++++
 executor/src/witgen/data_structures/mod.rs    |   1 +
 executor/src/witgen/machines/block_machine.rs |  13 +-
 .../double_sorted_witness_machine_16.rs       |  11 +-
 .../double_sorted_witness_machine_32.rs       | 142 ++++++++++--------
 .../src/witgen/machines/dynamic_machine.rs    |  15 +-
 .../witgen/machines/fixed_lookup_machine.rs   |  64 ++------
 executor/src/witgen/machines/mod.rs           |  10 +-
 .../witgen/machines/second_stage_machine.rs   |  13 +-
 .../witgen/machines/sorted_witness_machine.rs |  13 +-
 .../src/witgen/machines/write_once_memory.rs  |  11 +-
 executor/src/witgen/processor.rs              |   2 +-
 12 files changed, 239 insertions(+), 129 deletions(-)
 create mode 100644 executor/src/witgen/data_structures/caller_data.rs

diff --git a/executor/src/witgen/data_structures/caller_data.rs b/executor/src/witgen/data_structures/caller_data.rs
new file mode 100644
index 0000000000..bba32f747c
--- /dev/null
+++ b/executor/src/witgen/data_structures/caller_data.rs
@@ -0,0 +1,73 @@
+use itertools::Itertools;
+use powdr_number::FieldElement;
+
+use crate::witgen::{
+    machines::LookupCell,
+    processor::{Left, OuterQuery},
+    EvalError, EvalResult, EvalValue,
+};
+
+/// A representation of the caller's data.
+///
+/// Useful for implementing [Machine::process_plookup] in terms of [Machine::process_lookup_direct].
+pub struct CallerData<'a, 'b, T> {
+    /// The raw data of the caller. Unknown values should be ignored.
+    data: Vec<T>,
+    /// The affine expressions of the caller.
+    left: &'b Left<'a, T>,
+}
+
+impl<'a, 'b, T: FieldElement> From<&'b OuterQuery<'a, '_, T>> for CallerData<'a, 'b, T> {
+    /// Builds a `CallerData` from an `OuterQuery`.
+    fn from(outer_query: &'b OuterQuery<'a, '_, T>) -> Self {
+        let data = outer_query
+            .left
+            .iter()
+            .map(|l| l.constant_value().unwrap_or_default())
+            .collect();
+        Self {
+            data,
+            left: &outer_query.left,
+        }
+    }
+}
+
+impl<'a, 'b, T: FieldElement> CallerData<'a, 'b, T> {
+    /// Returns the data as a list of `LookupCell`s, as expected by `Machine::process_lookup_direct`.
+    pub fn as_lookup_cells(&mut self) -> Vec<LookupCell<'_, T>> {
+        self.data
+            .iter_mut()
+            .zip_eq(self.left.iter())
+            .map(|(value, left)| match left.constant_value().is_some() {
+                true => LookupCell::Input(value),
+                false => LookupCell::Output(value),
+            })
+            .collect()
+    }
+}
+
+impl<'a, 'b, T: FieldElement> From<CallerData<'a, 'b, T>> for EvalResult<'a, T> {
+    /// Turns the result of a direct lookup into an `EvalResult`, as used by `Machine::process_plookup`.
+    ///
+    /// Note that this function assumes that the lookup was successful and complete.
+    fn from(data: CallerData<'a, 'b, T>) -> EvalResult<'a, T> {
+        let mut result = EvalValue::complete(vec![]);
+        for (l, v) in data.left.iter().zip_eq(data.data.iter()) {
+            if !l.is_constant() {
+                let evaluated = l.clone() - (*v).into();
+                match evaluated.solve() {
+                    Ok(constraints) => {
+                        result.combine(constraints);
+                    }
+                    Err(_) => {
+                        // Fail the whole lookup
+                        return Err(EvalError::ConstraintUnsatisfiable(format!(
+                            "Constraint is invalid ({l} != {v}).",
+                        )));
+                    }
+                }
+            }
+        }
+        Ok(result)
+    }
+}
diff --git a/executor/src/witgen/data_structures/mod.rs b/executor/src/witgen/data_structures/mod.rs
index a3b49e6dfc..b376530b73 100644
--- a/executor/src/witgen/data_structures/mod.rs
+++ b/executor/src/witgen/data_structures/mod.rs
@@ -1,3 +1,4 @@
+pub mod caller_data;
 pub mod column_map;
 pub mod copy_constraints;
 pub mod finalizable_data;
diff --git a/executor/src/witgen/machines/block_machine.rs b/executor/src/witgen/machines/block_machine.rs
index 614d9f00e8..ab2a25b23b 100644
--- a/executor/src/witgen/machines/block_machine.rs
+++ b/executor/src/witgen/machines/block_machine.rs
@@ -2,7 +2,9 @@ use std::collections::{BTreeMap, HashMap};
 use std::fmt::Display;
 use std::iter::{self};
 
-use super::{compute_size_and_log, ConnectionKind, EvalResult, FixedData, MachineParts};
+use super::{
+    compute_size_and_log, ConnectionKind, EvalResult, FixedData, LookupCell, MachineParts,
+};
 
 use crate::witgen::affine_expression::AlgebraicVariable;
 use crate::witgen::analysis::detect_connection_type_and_block_size;
@@ -139,6 +141,15 @@ impl<'a, T: FieldElement> Machine<'a, T> for BlockMachine<'a, T> {
         self.parts.connections.keys().copied().collect()
     }
 
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        unimplemented!("Direct lookup not supported by machine {}.", self.name())
+    }
+
     fn process_plookup<'b, Q: QueryCallback<T>>(
         &mut self,
         mutable_state: &'b MutableState<'a, T, Q>,
diff --git a/executor/src/witgen/machines/double_sorted_witness_machine_16.rs b/executor/src/witgen/machines/double_sorted_witness_machine_16.rs
index a2c6a5e3f1..4feea2e510 100644
--- a/executor/src/witgen/machines/double_sorted_witness_machine_16.rs
+++ b/executor/src/witgen/machines/double_sorted_witness_machine_16.rs
@@ -3,7 +3,7 @@ use std::iter::once;
 
 use itertools::Itertools;
 
-use super::{ConnectionKind, Machine, MachineParts};
+use super::{ConnectionKind, LookupCell, Machine, MachineParts};
 use crate::witgen::data_structures::mutable_state::MutableState;
 use crate::witgen::machines::compute_size_and_log;
 use crate::witgen::rows::RowPair;
@@ -214,6 +214,15 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses16<'a, T> {
 }
 
 impl<'a, T: FieldElement> Machine<'a, T> for DoubleSortedWitnesses16<'a, T> {
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        unimplemented!("Direct lookup not supported by machine {}.", self.name())
+    }
+
     fn identity_ids(&self) -> Vec<u64> {
         self.selector_ids.keys().cloned().collect()
     }
diff --git a/executor/src/witgen/machines/double_sorted_witness_machine_32.rs b/executor/src/witgen/machines/double_sorted_witness_machine_32.rs
index 3423fafbd9..e542541050 100644
--- a/executor/src/witgen/machines/double_sorted_witness_machine_32.rs
+++ b/executor/src/witgen/machines/double_sorted_witness_machine_32.rs
@@ -3,13 +3,14 @@ use std::iter::once;
 
 use itertools::Itertools;
 
-use super::{Machine, MachineParts};
+use super::{LookupCell, Machine, MachineParts};
+use crate::witgen::data_structures::caller_data::CallerData;
 use crate::witgen::data_structures::mutable_state::MutableState;
 use crate::witgen::machines::compute_size_and_log;
+use crate::witgen::processor::OuterQuery;
 use crate::witgen::rows::RowPair;
 use crate::witgen::util::try_to_simple_poly;
-use crate::witgen::{EvalError, EvalResult, FixedData, QueryCallback};
-use crate::witgen::{EvalValue, IncompleteCause};
+use crate::witgen::{EvalError, EvalResult, EvalValue, FixedData, IncompleteCause, QueryCallback};
 
 use powdr_number::{DegreeType, FieldElement, LargeInt};
 
@@ -184,6 +185,15 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
 }
 
 impl<'a, T: FieldElement> Machine<'a, T> for DoubleSortedWitnesses32<'a, T> {
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        identity_id: u64,
+        values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        self.process_plookup_internal(identity_id, values)
+    }
+
     fn identity_ids(&self) -> Vec<u64> {
         self.selector_ids.keys().cloned().collect()
     }
@@ -194,11 +204,33 @@ impl<'a, T: FieldElement> Machine<'a, T> for DoubleSortedWitnesses32<'a, T> {
 
     fn process_plookup<Q: QueryCallback<T>>(
         &mut self,
-        _mutable_state: &MutableState<'a, T, Q>,
+        mutable_state: &MutableState<'a, T, Q>,
         identity_id: u64,
         caller_rows: &RowPair<'_, 'a, T>,
     ) -> EvalResult<'a, T> {
-        self.process_plookup_internal(identity_id, caller_rows)
+        let connection = self.parts.connections[&identity_id];
+        let outer_query = OuterQuery::new(caller_rows, connection);
+        let mut data = CallerData::from(&outer_query);
+        if self.process_lookup_direct(mutable_state, identity_id, &mut data.as_lookup_cells())? {
+            Ok(EvalResult::from(data)?.report_side_effect())
+        } else {
+            // One of the required arguments was not set, find out which:
+            let data = data.as_lookup_cells();
+            Ok(EvalValue::incomplete(
+                IncompleteCause::NonConstantRequiredArgument(
+                    match (&data[0], &data[1], &data[2], &data[3]) {
+                        (LookupCell::Output(_), _, _, _) => "operation_id",
+                        (_, LookupCell::Output(_), _, _) => "m_addr",
+                        (_, _, LookupCell::Output(_), _) => "m_step",
+                        // Note that for the mload operation, we'd expect this to be an output.
+                        // But since process_lookup_direct returned false and all other arguments are set,
+                        // we must have been in the mstore operation, in which case the value is required.
+                        (_, _, _, LookupCell::Output(_)) => "m_value",
+                        _ => unreachable!(),
+                    },
+                ),
+            ))
+        }
     }
 
     fn take_witness_col_values<'b, Q: QueryCallback<T>>(
@@ -343,8 +375,8 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
     fn process_plookup_internal(
         &mut self,
         identity_id: u64,
-        caller_rows: &RowPair<'_, 'a, T>,
-    ) -> EvalResult<'a, T> {
+        values: &mut [LookupCell<'_, T>],
+    ) -> Result<bool, EvalError<T>> {
         // We blindly assume the lookup is of the form
         // OP { operation_id, ADDR, STEP, X } is <selector> { operation_id, m_addr, m_step, m_value }
         // Where:
@@ -352,66 +384,39 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
         // - operation_id == 1: Write
         // - operation_id == 2: Bootloader write
 
-        let args = self.parts.connections[&identity_id]
-            .left
-            .expressions
-            .iter()
-            .map(|e| caller_rows.evaluate(e).unwrap())
-            .collect::<Vec<_>>();
-
-        let operation_id = match args[0].constant_value() {
-            Some(v) => v,
-            None => {
-                return Ok(EvalValue::incomplete(
-                    IncompleteCause::NonConstantRequiredArgument("operation_id"),
-                ))
-            }
+        let operation_id = match values[0] {
+            LookupCell::Input(v) => v,
+            LookupCell::Output(_) => return Ok(false),
         };
+        let addr = match values[1] {
+            LookupCell::Input(v) => v,
+            LookupCell::Output(_) => return Ok(false),
+        };
+        let step = match values[2] {
+            LookupCell::Input(v) => v,
+            LookupCell::Output(_) => return Ok(false),
+        };
+        let value_ptr = &mut values[3];
 
         let selector_id = *self.selector_ids.get(&identity_id).unwrap();
 
-        let is_normal_write = operation_id == T::from(OPERATION_ID_WRITE);
-        let is_bootloader_write = operation_id == T::from(OPERATION_ID_BOOTLOADER_WRITE);
+        let is_normal_write = operation_id == &T::from(OPERATION_ID_WRITE);
+        let is_bootloader_write = operation_id == &T::from(OPERATION_ID_BOOTLOADER_WRITE);
         let is_write = is_bootloader_write || is_normal_write;
-        let addr = match args[1].constant_value() {
-            Some(v) => v,
-            None => {
-                return Ok(EvalValue::incomplete(
-                    IncompleteCause::NonConstantRequiredArgument("m_addr"),
-                ))
-            }
-        };
 
         if self.has_bootloader_write_column {
-            let is_initialized = self.is_initialized.get(&addr).cloned().unwrap_or_default();
+            let is_initialized = self.is_initialized.get(addr).cloned().unwrap_or_default();
             if !is_initialized && !is_bootloader_write {
                 panic!("Memory address {addr:x} must be initialized with a bootloader write",);
             }
-            self.is_initialized.insert(addr, true);
+            self.is_initialized.insert(*addr, true);
         }
 
-        let step = args[2]
-            .constant_value()
-            .ok_or_else(|| format!("Step must be known but is: {}", args[2]))?;
-
-        let value_expr = &args[3];
-
-        log::trace!(
-            "Query addr={:x}, step={step}, write: {is_write}, value: {}",
-            addr.to_arbitrary_integer(),
-            value_expr
-        );
-
         // TODO this does not check any of the failure modes
-        let mut assignments = EvalValue::complete(vec![]);
-        let has_side_effect = if is_write {
-            let value = match value_expr.constant_value() {
-                Some(v) => v,
-                None => {
-                    return Ok(EvalValue::incomplete(
-                        IncompleteCause::NonConstantRequiredArgument("m_value"),
-                    ))
-                }
+        let added_memory_access = if is_write {
+            let value = match value_ptr {
+                LookupCell::Input(v) => *v,
+                LookupCell::Output(_) => return Ok(false),
             };
 
             log::trace!(
@@ -419,31 +424,39 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
                 addr,
                 value
             );
-            self.data.write(addr, value);
+            self.data.write(*addr, *value);
             self.trace
                 .insert(
-                    (addr, step),
+                    (*addr, *step),
                     Operation {
                         is_normal_write,
                         is_bootloader_write,
-                        value,
+                        value: *value,
                         selector_id,
                     },
                 )
                 .is_none()
         } else {
-            let value = self.data.read(addr);
+            let value = self.data.read(*addr);
             log::trace!(
                 "Memory read: addr={:x}, step={step}, value={:x}",
                 addr,
                 value
             );
-            let ass =
-                (value_expr.clone() - value.into()).solve_with_range_constraints(caller_rows)?;
-            assignments.combine(ass);
+            match value_ptr {
+                LookupCell::Input(v) => {
+                    if *v != &value {
+                        return Err(EvalError::ConstraintUnsatisfiable(format!(
+                            "{v} != {value} (address 0x{addr:x}, time step)"
+                        )));
+                    }
+                }
+                LookupCell::Output(v) => **v = value,
+            };
+
             self.trace
                 .insert(
-                    (addr, step),
+                    (*addr, *step),
                     Operation {
                         is_normal_write,
                         is_bootloader_write,
@@ -454,16 +467,15 @@ impl<'a, T: FieldElement> DoubleSortedWitnesses32<'a, T> {
                 .is_none()
         };
         assert!(
-            has_side_effect,
+            added_memory_access,
             "Already had a memory access for address 0x{addr:x} and time step {step}!"
         );
-        assignments = assignments.report_side_effect();
 
         if self.trace.len() > (self.degree as usize) {
             return Err(EvalError::RowsExhausted(self.name.clone()));
         }
 
-        Ok(assignments)
+        Ok(true)
     }
 }
 
diff --git a/executor/src/witgen/machines/dynamic_machine.rs b/executor/src/witgen/machines/dynamic_machine.rs
index 78cfecc422..d41fccf86e 100644
--- a/executor/src/witgen/machines/dynamic_machine.rs
+++ b/executor/src/witgen/machines/dynamic_machine.rs
@@ -11,7 +11,11 @@ use crate::witgen::processor::{OuterQuery, SolverState};
 use crate::witgen::rows::{Row, RowIndex, RowPair};
 use crate::witgen::sequence_iterator::{DefaultSequenceIterator, ProcessingSequenceIterator};
 use crate::witgen::vm_processor::VmProcessor;
-use crate::witgen::{AlgebraicVariable, EvalResult, EvalValue, FixedData, QueryCallback};
+use crate::witgen::{
+    AlgebraicVariable, EvalError, EvalResult, EvalValue, FixedData, QueryCallback,
+};
+
+use super::LookupCell;
 
 struct ProcessResult<'a, T: FieldElement> {
     eval_value: EvalValue<AlgebraicVariable<'a>, T>,
@@ -31,6 +35,15 @@ pub struct DynamicMachine<'a, T: FieldElement> {
 }
 
 impl<'a, T: FieldElement> Machine<'a, T> for DynamicMachine<'a, T> {
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        unimplemented!("Direct lookup not supported by machine {}.", self.name())
+    }
+
     fn identity_ids(&self) -> Vec<u64> {
         self.parts.identity_ids()
     }
diff --git a/executor/src/witgen/machines/fixed_lookup_machine.rs b/executor/src/witgen/machines/fixed_lookup_machine.rs
index ea1476369d..9f6c2f8790 100644
--- a/executor/src/witgen/machines/fixed_lookup_machine.rs
+++ b/executor/src/witgen/machines/fixed_lookup_machine.rs
@@ -9,6 +9,7 @@ use powdr_ast::analyzed::{AlgebraicReference, PolynomialType};
 use powdr_number::{DegreeType, FieldElement};
 
 use crate::witgen::affine_expression::{AffineExpression, AlgebraicVariable};
+use crate::witgen::data_structures::caller_data::CallerData;
 use crate::witgen::data_structures::multiplicity_counter::MultiplicityCounter;
 use crate::witgen::data_structures::mutable_state::MutableState;
 use crate::witgen::global_constraints::{GlobalConstraints, RangeConstraintSet};
@@ -198,68 +199,37 @@ impl<'a, T: FieldElement> FixedLookup<'a, T> {
         }
     }
 
-    fn process_plookup_internal<'b, Q: QueryCallback<T>>(
+    fn process_plookup_internal<Q: QueryCallback<T>>(
         &mut self,
-        mutable_state: &'b MutableState<'a, T, Q>,
+        mutable_state: &MutableState<'a, T, Q>,
         identity_id: u64,
         rows: &RowPair<'_, 'a, T>,
-        left: &[AffineExpression<AlgebraicVariable<'a>, T>],
+        outer_query: OuterQuery<'a, '_, T>,
         mut right: Peekable<impl Iterator<Item = &'a AlgebraicReference>>,
     ) -> EvalResult<'a, T> {
-        if left.len() == 1
-            && !left.first().unwrap().is_constant()
+        if outer_query.left.len() == 1
+            && !outer_query.left.first().unwrap().is_constant()
             && right.peek().unwrap().poly_id.ptype == PolynomialType::Constant
         {
             // Lookup of the form "c $ [ X ] in [ B ]". Might be a conditional range check.
             return self.process_range_check(
                 rows,
-                left.first().unwrap(),
+                outer_query.left.first().unwrap(),
                 AlgebraicVariable::Column(right.peek().unwrap()),
             );
         }
 
         // Split the left-hand-side into known input values and unknown output expressions.
-        let mut data = vec![T::zero(); left.len()];
-        let mut values = left
-            .iter()
-            .zip(&mut data)
-            .map(|(l, d)| {
-                if let Some(value) = l.constant_value() {
-                    *d = value;
-                    LookupCell::Input(d)
-                } else {
-                    LookupCell::Output(d)
-                }
-            })
-            .collect::<Vec<_>>();
+        let mut values = CallerData::from(&outer_query);
 
-        if !self.process_lookup_direct(mutable_state, identity_id, &mut values)? {
+        if !self.process_lookup_direct(mutable_state, identity_id, &mut values.as_lookup_cells())? {
             // multiple matches, we stop and learnt nothing
             return Ok(EvalValue::incomplete(
                 IncompleteCause::MultipleLookupMatches,
             ));
         };
 
-        let mut result = EvalValue::complete(vec![]);
-        for (l, v) in left.iter().zip(data) {
-            if !l.is_constant() {
-                let evaluated = l.clone() - v.into();
-                // TODO we could use bit constraints here
-                match evaluated.solve() {
-                    Ok(constraints) => {
-                        result.combine(constraints);
-                    }
-                    Err(_) => {
-                        // Fail the whole lookup
-                        return Err(EvalError::ConstraintUnsatisfiable(format!(
-                            "Constraint is invalid ({l} != {v}).",
-                        )));
-                    }
-                }
-            }
-        }
-
-        Ok(result)
+        values.into()
     }
 
     fn process_range_check(
@@ -327,11 +297,11 @@ impl<'a, T: FieldElement> Machine<'a, T> for FixedLookup<'a, T> {
         "FixedLookup"
     }
 
-    fn process_plookup<'b, Q: crate::witgen::QueryCallback<T>>(
+    fn process_plookup<Q: crate::witgen::QueryCallback<T>>(
         &mut self,
-        mutable_state: &'b MutableState<'a, T, Q>,
+        mutable_state: &MutableState<'a, T, Q>,
         identity_id: u64,
-        caller_rows: &'b RowPair<'b, 'a, T>,
+        caller_rows: &RowPair<'_, 'a, T>,
     ) -> EvalResult<'a, T> {
         let identity = self.connections[&identity_id];
         let right = identity.right;
@@ -344,13 +314,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for FixedLookup<'a, T> {
             .peekable();
 
         let outer_query = OuterQuery::new(caller_rows, identity);
-        self.process_plookup_internal(
-            mutable_state,
-            identity_id,
-            caller_rows,
-            &outer_query.left,
-            right,
-        )
+        self.process_plookup_internal(mutable_state, identity_id, caller_rows, outer_query, right)
     }
 
     fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
diff --git a/executor/src/witgen/machines/mod.rs b/executor/src/witgen/machines/mod.rs
index 169877bd0f..d632e7a6e1 100644
--- a/executor/src/witgen/machines/mod.rs
+++ b/executor/src/witgen/machines/mod.rs
@@ -91,12 +91,10 @@ pub trait Machine<'a, T: FieldElement>: Send + Sync {
     /// An error is always unrecoverable.
     fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
         &mut self,
-        _mutable_state: &'b MutableState<'a, T, Q>,
-        _identity_id: u64,
-        _values: &mut [LookupCell<'c, T>],
-    ) -> Result<bool, EvalError<T>> {
-        unimplemented!("Direct lookup not supported machine {}.", self.name())
-    }
+        mutable_state: &'b MutableState<'a, T, Q>,
+        identity_id: u64,
+        values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>>;
 
     /// Returns the final values of the witness columns.
     fn take_witness_col_values<'b, Q: QueryCallback<T>>(
diff --git a/executor/src/witgen/machines/second_stage_machine.rs b/executor/src/witgen/machines/second_stage_machine.rs
index c957eeb72e..103a54ed4d 100644
--- a/executor/src/witgen/machines/second_stage_machine.rs
+++ b/executor/src/witgen/machines/second_stage_machine.rs
@@ -11,7 +11,9 @@ use crate::witgen::processor::SolverState;
 use crate::witgen::rows::{Row, RowIndex, RowPair};
 use crate::witgen::sequence_iterator::{DefaultSequenceIterator, ProcessingSequenceIterator};
 use crate::witgen::vm_processor::VmProcessor;
-use crate::witgen::{EvalResult, FixedData, QueryCallback};
+use crate::witgen::{EvalError, EvalResult, FixedData, QueryCallback};
+
+use super::LookupCell;
 
 /// A machine responsible for second-phase witness generation.
 /// For example, this might generate the witnesses for a bus accumulator or LogUp argument.
@@ -25,6 +27,15 @@ pub struct SecondStageMachine<'a, T: FieldElement> {
 }
 
 impl<'a, T: FieldElement> Machine<'a, T> for SecondStageMachine<'a, T> {
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        unimplemented!("Direct lookup not supported by machine {}.", self.name())
+    }
+
     fn identity_ids(&self) -> Vec<u64> {
         Vec::new()
     }
diff --git a/executor/src/witgen/machines/sorted_witness_machine.rs b/executor/src/witgen/machines/sorted_witness_machine.rs
index c2993123bb..f2379186ed 100644
--- a/executor/src/witgen/machines/sorted_witness_machine.rs
+++ b/executor/src/witgen/machines/sorted_witness_machine.rs
@@ -1,7 +1,7 @@
 use std::collections::{BTreeMap, HashMap};
 
 use super::super::affine_expression::AffineExpression;
-use super::{Connection, EvalResult, FixedData};
+use super::{Connection, EvalResult, FixedData, LookupCell};
 use super::{Machine, MachineParts};
 use crate::witgen::affine_expression::AlgebraicVariable;
 use crate::witgen::data_structures::mutable_state::MutableState;
@@ -9,7 +9,7 @@ use crate::witgen::evaluators::fixed_evaluator::FixedEvaluator;
 use crate::witgen::evaluators::partial_expression_evaluator::PartialExpressionEvaluator;
 use crate::witgen::evaluators::symbolic_evaluator::SymbolicEvaluator;
 use crate::witgen::rows::RowPair;
-use crate::witgen::{EvalValue, IncompleteCause, QueryCallback};
+use crate::witgen::{EvalError, EvalValue, IncompleteCause, QueryCallback};
 use crate::Identity;
 use itertools::Itertools;
 use num_traits::One;
@@ -195,6 +195,15 @@ fn check_constraint<T: FieldElement>(
 }
 
 impl<'a, T: FieldElement> Machine<'a, T> for SortedWitnesses<'a, T> {
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        unimplemented!("Direct lookup not supported by machine {}.", self.name())
+    }
+
     fn identity_ids(&self) -> Vec<u64> {
         self.rhs_references.keys().cloned().collect()
     }
diff --git a/executor/src/witgen/machines/write_once_memory.rs b/executor/src/witgen/machines/write_once_memory.rs
index 268a5a2a24..6e489ed857 100644
--- a/executor/src/witgen/machines/write_once_memory.rs
+++ b/executor/src/witgen/machines/write_once_memory.rs
@@ -13,7 +13,7 @@ use crate::witgen::{
     QueryCallback,
 };
 
-use super::{Connection, Machine, MachineParts};
+use super::{Connection, LookupCell, Machine, MachineParts};
 
 /// A memory machine with a fixed address space, and each address can only have one
 /// value during the lifetime of the program.
@@ -242,6 +242,15 @@ impl<'a, T: FieldElement> WriteOnceMemory<'a, T> {
 }
 
 impl<'a, T: FieldElement> Machine<'a, T> for WriteOnceMemory<'a, T> {
+    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        unimplemented!("Direct lookup not supported by machine {}.", self.name())
+    }
+
     fn identity_ids(&self) -> Vec<u64> {
         self.connections.keys().copied().collect()
     }
diff --git a/executor/src/witgen/processor.rs b/executor/src/witgen/processor.rs
index 74fe04280a..9ded01b88a 100644
--- a/executor/src/witgen/processor.rs
+++ b/executor/src/witgen/processor.rs
@@ -23,7 +23,7 @@ use super::{
     Constraints, EvalError, EvalValue, IncompleteCause, QueryCallback,
 };
 
-type Left<'a, T> = Vec<AffineExpression<AlgebraicVariable<'a>, T>>;
+pub type Left<'a, T> = Vec<AffineExpression<AlgebraicVariable<'a>, T>>;
 
 /// The data mutated by the processor
 pub(crate) struct SolverState<'a, T: FieldElement> {

From 4f1aa4a5f3879e974f8f668b728a8f7eb656b528 Mon Sep 17 00:00:00 2001
From: Leo <leo@powdrlabs.com>
Date: Wed, 11 Dec 2024 14:52:34 +0100
Subject: [PATCH 51/57] Use arith-memory in RISCV (#2199)

---
 powdr/src/lib.rs                              |   17 +-
 riscv-executor/src/lib.rs                     |  193 ++-
 riscv-runtime/src/arith.rs                    |  126 +-
 riscv-runtime/src/ec.rs                       |  100 +-
 riscv/src/large_field/runtime.rs              |  163 +--
 riscv/tests/riscv.rs                          |   44 +-
 riscv/tests/riscv_data/affine_256/src/main.rs |   62 +-
 riscv/tests/riscv_data/ec_add/src/main.rs     | 1198 ++++++-----------
 riscv/tests/riscv_data/ec_double/src/main.rs  |  802 ++++-------
 std/machines/large_field/arith256_memory.asm  |   18 +-
 10 files changed, 1034 insertions(+), 1689 deletions(-)

diff --git a/powdr/src/lib.rs b/powdr/src/lib.rs
index 9d91fd2c7d..71bd9974a6 100644
--- a/powdr/src/lib.rs
+++ b/powdr/src/lib.rs
@@ -26,6 +26,7 @@ use std::time::Instant;
 pub struct SessionBuilder {
     guest_path: String,
     out_path: String,
+    asm_file: Option<String>,
     chunk_size_log2: Option<u8>,
     precompiles: RuntimeLibs,
 }
@@ -47,14 +48,20 @@ const DEFAULT_MIN_MAX_DEGREE_LOG: u8 = 18;
 impl SessionBuilder {
     /// Builds a session with the given parameters.
     pub fn build(self) -> Session {
-        Session {
-            pipeline: pipeline_from_guest(
+        let pipeline = match self.asm_file {
+            Some(asm_file) => Pipeline::<GoldilocksField>::default()
+                .from_asm_file(asm_file.into())
+                .with_output(Path::new(&self.out_path).to_path_buf(), true),
+            None => pipeline_from_guest(
                 &self.guest_path,
                 Path::new(&self.out_path),
                 DEFAULT_MIN_DEGREE_LOG,
                 self.chunk_size_log2.unwrap_or(DEFAULT_MAX_DEGREE_LOG),
                 self.precompiles,
             ),
+        };
+        Session {
+            pipeline,
             out_path: self.out_path,
         }
         .with_backend(powdr_backend::BackendType::Plonky3)
@@ -72,6 +79,12 @@ impl SessionBuilder {
         self
     }
 
+    /// Re-use a previously compiled guest program.
+    pub fn asm_file(mut self, asm_file: &str) -> Self {
+        self.asm_file = Some(asm_file.into());
+        self
+    }
+
     /// Set the chunk size, represented by its log2.
     /// Example: for a chunk size of 2^20, set chunk_size_log2 to 20.
     /// If the execution trace is longer than the 2^chunk_size_log2,
diff --git a/riscv-executor/src/lib.rs b/riscv-executor/src/lib.rs
index 3a887d2f0d..e8e53bb455 100644
--- a/riscv-executor/src/lib.rs
+++ b/riscv-executor/src/lib.rs
@@ -1865,9 +1865,9 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                 let reg1 = args[0].u();
                 let reg2 = args[1].u();
                 let input_ptr = self.reg_read(0, reg1, 0);
-                assert_eq!(input_ptr.u() % 4, 0);
+                assert!(is_multiple_of_4(input_ptr.u()));
                 let output_ptr = self.reg_read(1, reg2, 1);
-                assert_eq!(output_ptr.u() % 4, 0);
+                assert!(is_multiple_of_4(output_ptr.u()));
 
                 set_col!(tmp1_col, input_ptr);
                 set_col!(tmp2_col, output_ptr);
@@ -1951,7 +1951,7 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
             }
             "poseidon2_gl" => {
                 let input_ptr = self.proc.get_reg_mem(args[0].u()).u();
-                assert_eq!(input_ptr % 4, 0);
+                assert!(is_multiple_of_4(input_ptr));
 
                 let inputs: [u64; 8] = (0..16)
                     .map(|i| self.proc.get_mem(input_ptr + i * 4, 0, 0)) // TODO: step/selector for poseidon2
@@ -1971,7 +1971,7 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                     .flat_map(|v| vec![(v & 0xffffffff) as u32, (v >> 32) as u32]);
 
                 let output_ptr = self.proc.get_reg_mem(args[1].u()).u();
-                assert_eq!(output_ptr % 4, 0);
+                assert!(is_multiple_of_4(output_ptr));
                 result.enumerate().for_each(|(i, v)| {
                     self.proc.set_mem(output_ptr + i as u32 * 4, v, 0, 0); // TODO: step/selector for poseidon2
                 });
@@ -1979,96 +1979,149 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> {
                 vec![]
             }
             "affine_256" => {
-                assert!(args.is_empty());
-                // take input from registers
-                let x1 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i)).into_fe())
+                // a * b + c = d
+                let input_ptr_a = self.proc.get_reg_mem(args[0].u()).u();
+                assert!(is_multiple_of_4(input_ptr_a));
+                let input_ptr_b = self.proc.get_reg_mem(args[1].u()).u();
+                assert!(is_multiple_of_4(input_ptr_b));
+                let input_ptr_c = self.proc.get_reg_mem(args[2].u()).u();
+                assert!(is_multiple_of_4(input_ptr_c));
+                let output_ptr_d = self.proc.get_reg_mem(args[3].u()).u();
+                assert!(is_multiple_of_4(output_ptr_d));
+
+                let a = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let y1 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 8)).into_fe())
+                let b = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_b + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let x2 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 16)).into_fe())
+                let c = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_c + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let result = arith::affine_256(&x1, &y1, &x2);
-                // store result in registers
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i), Elem::Field(result.0[i]))
+                let result = arith::affine_256(&a, &b, &c);
+
+                result.0.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_d + i as u32 * 4,
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i + 8), Elem::Field(result.1[i]))
+                result.1.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_d + (result.0.len() as u32 * 4) + (i as u32 * 4),
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
 
                 vec![]
             }
             "mod_256" => {
-                assert!(args.is_empty());
-                // take input from registers
-                let y2 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i)).into_fe())
+                // a mod b = c
+                let input_ptr_a = self.proc.get_reg_mem(args[0].u()).u();
+                assert!(is_multiple_of_4(input_ptr_a));
+                let input_ptr_b = self.proc.get_reg_mem(args[1].u()).u();
+                assert!(is_multiple_of_4(input_ptr_b));
+                let output_ptr_c = self.proc.get_reg_mem(args[2].u()).u();
+                assert!(is_multiple_of_4(output_ptr_c));
+
+                let ah = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let y3 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 8)).into_fe())
+                let al = (8..16)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let x1 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 16)).into_fe())
+                let b = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_b + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let result = arith::mod_256(&y2, &y3, &x1);
-                // store result in registers
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i), Elem::Field(result[i]))
+                let result = arith::mod_256(&ah, &al, &b);
+
+                result.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_c + i as u32 * 4,
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
+
                 vec![]
             }
             "ec_add" => {
-                assert!(args.is_empty());
-                // take input from registers
-                let x1 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i)).into_fe())
+                // a + b = c
+                let input_ptr_a = self.proc.get_reg_mem(args[0].u()).u();
+                assert!(is_multiple_of_4(input_ptr_a));
+                let input_ptr_b = self.proc.get_reg_mem(args[1].u()).u();
+                assert!(is_multiple_of_4(input_ptr_b));
+                let output_ptr_c = self.proc.get_reg_mem(args[2].u()).u();
+                assert!(is_multiple_of_4(output_ptr_c));
+
+                let ax = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let y1 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 8)).into_fe())
+                let ay = (8..16)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let x2 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 16)).into_fe())
+                let bx = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_b + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let y2 = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 24)).into_fe())
+                let by = (8..16)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_b + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let result = arith::ec_add(&x1, &y1, &x2, &y2);
-                // store result in registers
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i), Elem::Field(result.0[i]))
+
+                let result = arith::ec_add(&ax, &ay, &bx, &by);
+                result.0.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_c + i as u32 * 4,
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i + 8), Elem::Field(result.1[i]))
+                result.1.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_c + (result.0.len() as u32 * 4) + (i as u32 * 4),
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
 
                 vec![]
             }
             "ec_double" => {
-                assert!(args.is_empty());
-                // take input from registers
-                let x = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i)).into_fe())
+                // a * 2 = b
+                let input_ptr_a = self.proc.get_reg_mem(args[0].u()).u();
+                assert!(is_multiple_of_4(input_ptr_a));
+                let output_ptr_b = self.proc.get_reg_mem(args[1].u()).u();
+                assert!(is_multiple_of_4(output_ptr_b));
+
+                let ax = (0..8)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let y = (0..8)
-                    .map(|i| self.proc.get_reg(&register_by_idx(i + 8)).into_fe())
+                let ay = (8..16)
+                    .map(|i| F::from(self.proc.get_mem(input_ptr_a + i * 4, 0, 0)))
                     .collect::<Vec<_>>();
-                let result = arith::ec_double(&x, &y);
-                // store result in registers
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i), Elem::Field(result.0[i]))
+
+                let result = arith::ec_double(&ax, &ay);
+                result.0.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_b + i as u32 * 4,
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
-                (0..8).for_each(|i| {
-                    self.proc
-                        .set_reg(&register_by_idx(i + 8), Elem::Field(result.1[i]))
+                result.1.iter().enumerate().for_each(|(i, &v)| {
+                    self.proc.set_mem(
+                        output_ptr_b + (result.0.len() as u32 * 4) + (i as u32 * 4),
+                        v.to_integer().try_into_u32().unwrap(),
+                        1,
+                        1,
+                    );
                 });
 
                 vec![]
@@ -2334,12 +2387,6 @@ pub fn execute<F: FieldElement>(
     )
 }
 
-/// FIXME: copied from `riscv/runtime.rs` instead of adding dependency.
-/// Helper function for register names used in submachine instruction params.
-fn register_by_idx(idx: usize) -> String {
-    format!("xtra{idx}")
-}
-
 #[allow(clippy::too_many_arguments)]
 fn execute_inner<F: FieldElement>(
     asm: &AnalysisASMFile,
@@ -2629,3 +2676,7 @@ pub fn write_executor_csv<F: FieldElement, P: AsRef<Path>>(
         &columns[..],
     );
 }
+
+fn is_multiple_of_4(n: u32) -> bool {
+    n % 4 == 0
+}
diff --git a/riscv-runtime/src/arith.rs b/riscv-runtime/src/arith.rs
index b3f5d0ca38..281ad071fa 100644
--- a/riscv-runtime/src/arith.rs
+++ b/riscv-runtime/src/arith.rs
@@ -11,6 +11,16 @@ pub(crate) fn be_to_u32(from: &[u8; 32], to: &mut [u32; 8]) {
     }
 }
 
+/// convert two big-endian u8 arrays to u32 array (arith machine format)
+pub(crate) fn bes_to_u32(from1: &[u8; 32], from2: &[u8; 32], to: &mut [u32; 16]) {
+    for (i, chunk) in from1.chunks_exact(4).rev().enumerate() {
+        to[i] = u32::from_be_bytes(chunk.try_into().unwrap());
+    }
+    for (i, chunk) in from2.chunks_exact(4).rev().enumerate() {
+        to[i + 8] = u32::from_be_bytes(chunk.try_into().unwrap());
+    }
+}
+
 /// convert u32 array (arith machine format) to big-endian u8 array
 pub(crate) fn u32_to_be(from: &[u32; 8], to: &mut [u8; 32]) {
     for (i, n) in from.iter().rev().enumerate() {
@@ -19,52 +29,70 @@ pub(crate) fn u32_to_be(from: &[u32; 8], to: &mut [u8; 32]) {
     }
 }
 
-/// Calculate `a*b + c = hi*2**256 + lo` for 256 bit values (as u8 big-endian arrays).
-/// Returns `(hi, lo)`.
-pub fn affine_256_u8_be(mut a: [u8; 32], mut b: [u8; 32], c: [u8; 32]) -> ([u8; 32], [u8; 32]) {
+/// convert u32 array (arith machine format) to big-endian u16 array
+pub(crate) fn u32x16_to_be(from: &[u32; 16], to: &mut [u8; 64]) {
+    for (i, n) in from.iter().rev().enumerate() {
+        let bytes = n.to_be_bytes();
+        to[i * 4..i * 4 + 4].copy_from_slice(&bytes)
+    }
+}
+
+/// Calculate `a*b + c = d` for 256 bit values (as u8 big-endian arrays).
+/// Returns `d`, a 512-bit value.
+pub fn affine_256_u8_be(a: [u8; 32], b: [u8; 32], c: [u8; 32]) -> [u8; 64] {
     let mut a1: [u32; 8] = Default::default();
     let mut b1: [u32; 8] = Default::default();
     let mut c1: [u32; 8] = Default::default();
+    let mut res: [u32; 16] = Default::default();
 
     be_to_u32(&a, &mut a1);
     be_to_u32(&b, &mut b1);
     be_to_u32(&c, &mut c1);
 
+    // the Affine256 syscall does x13 = x10 * x11 + x12
+    // where x10, x11, x12 are 256-bit values
+    // and x13 is a 512-bit value.
     unsafe {
-        ecall!(Syscall::Affine256,
-            in("a0") a1.as_mut_ptr(),
-            in("a1") b1.as_mut_ptr(),
-            in("a2") c1.as_ptr());
+        ecall!(
+            Syscall::Affine256,
+            in("a0") a1.as_ptr(),
+            in("a1") b1.as_ptr(),
+            in("a2") c1.as_ptr(),
+            in("a3") res.as_mut_ptr());
     }
 
-    u32_to_be(&a1, &mut a);
-    u32_to_be(&b1, &mut b);
-    (a, b)
+    let mut res_u8: [u8; 64] = [0u8; 64];
+    u32x16_to_be(&res, &mut res_u8);
+    res_u8
 }
 
-/// Calculate `a*b + c = hi*2**256 + lo` for 256 bit values (as u8 little-endian arrays).
-/// Returns `(hi, lo)`.
-pub fn affine_256_u8_le(mut a: [u8; 32], mut b: [u8; 32], c: [u8; 32]) -> ([u8; 32], [u8; 32]) {
+/// Calculate `a*b + c = d` for 256 bit values (as u8 little-endian arrays).
+/// Returns `d`, a 512-bit value.
+pub fn affine_256_u8_le(a: [u8; 32], b: [u8; 32], c: [u8; 32]) -> [u8; 64] {
+    let mut res: [u8; 64] = [0u8; 64];
     unsafe {
         ecall!(Syscall::Affine256,
-            in("a0") a.as_mut_ptr(),
-            in("a1") b.as_mut_ptr(),
-            in("a2") c.as_ptr());
+            in("a0") a.as_ptr(),
+            in("a1") b.as_ptr(),
+            in("a2") c.as_ptr(),
+            in("a3") res.as_mut_ptr());
     }
 
-    (a, b)
+    res
 }
 
-/// Calculate `a*b + c = hi*2**256 + lo` for 256 bit values (as u32 little-endian arrays).
-/// Returns `(hi, lo)`.
-pub fn affine_256_u32_le(mut a: [u32; 8], mut b: [u32; 8], c: [u32; 8]) -> ([u32; 8], [u32; 8]) {
+/// Calculate `a*b + c = d` for 256 bit values (as u32 little-endian arrays).
+/// Returns `d`, a 512-bit value.
+pub fn affine_256_u32_le(mut a: [u32; 8], mut b: [u32; 8], c: [u32; 8]) -> [u32; 16] {
+    let mut res: [u32; 16] = Default::default();
     unsafe {
         ecall!(Syscall::Affine256,
             in("a0") a.as_mut_ptr(),
             in("a1") b.as_mut_ptr(),
-            in("a2") c.as_ptr());
+            in("a2") c.as_ptr(),
+            in("a3") res.as_mut_ptr());
     }
-    (a, b)
+    res
 }
 
 /// Calculate `(a*b) % m = r` for 256 bit values (as u8 big-endian arrays).
@@ -73,23 +101,24 @@ pub fn modmul_256_u8_be(mut a: [u8; 32], b: [u8; 32], m: [u8; 32]) -> [u8; 32] {
     let mut a1: [u32; 8] = Default::default();
     let mut b1: [u32; 8] = Default::default();
     let mut m1: [u32; 8] = Default::default();
+    let mut aff_res: [u32; 16] = Default::default();
 
     be_to_u32(&a, &mut a1);
     be_to_u32(&b, &mut b1);
     be_to_u32(&m, &mut m1);
 
     unsafe {
-        // First compute the two halves of the result a*b.
-        // Results are stored in place in a and b.
+        // First compute a*b in 512 bits.
         ecall!(Syscall::Affine256,
-            in("a0") a1.as_mut_ptr(),
-            in("a1") b1.as_mut_ptr(),
-            in("a2") [0u32; 8].as_ptr());
+            in("a0") a1.as_ptr(),
+            in("a1") b1.as_ptr(),
+            in("a2") [0u32; 8].as_ptr(),
+            in("a3") aff_res.as_mut_ptr());
         // Next compute the remainder, stored in place in a.
         ecall!(Syscall::Mod256,
-            in("a0") a1.as_mut_ptr(),
-            in("a1") b1.as_ptr(),
-            in("a2") m1.as_ptr());
+            in("a0") aff_res.as_ptr(),
+            in("a1") m1.as_ptr(),
+            in("a2") a1.as_ptr());
     }
 
     u32_to_be(&a1, &mut a);
@@ -98,19 +127,21 @@ pub fn modmul_256_u8_be(mut a: [u8; 32], b: [u8; 32], m: [u8; 32]) -> [u8; 32] {
 
 /// Calculate `(a*b) % m = r` for 256 bit values (as u8 little-endian arrays).
 /// Returns `r`.
-pub fn modmul_256_u8_le(mut a: [u8; 32], mut b: [u8; 32], m: [u8; 32]) -> [u8; 32] {
+pub fn modmul_256_u8_le(mut a: [u8; 32], b: [u8; 32], m: [u8; 32]) -> [u8; 32] {
+    let mut aff_res: [u8; 64] = [0u8; 64];
     unsafe {
-        // First compute the two halves of the result a*b.
-        // Results are stored in place in a and b.
+        // First compute a*b in 512 bits.
         ecall!(Syscall::Affine256,
-            in("a0") a.as_mut_ptr(),
-            in("a1") b.as_mut_ptr(),
-            in("a2") [0u32; 8].as_ptr());
+            in("a0") a.as_ptr(),
+            in("a1") b.as_ptr(),
+            in("a2") [0u32; 8].as_ptr(),
+            in("a3") aff_res.as_mut_ptr());
         // Next compute the remainder, stored in place in a.
         ecall!(Syscall::Mod256,
-            in("a0") a.as_mut_ptr(),
+            in("a0") aff_res.as_ptr(),
             in("a1") b.as_ptr(),
-            in("a2") m.as_ptr());
+            in("a2") m.as_ptr(),
+            in("a3") a.as_mut_ptr());
     }
 
     a
@@ -118,19 +149,20 @@ pub fn modmul_256_u8_le(mut a: [u8; 32], mut b: [u8; 32], m: [u8; 32]) -> [u8; 3
 
 /// Calculate `(a*b) % m = r` for 256 bit values (as u32 little-endian arrays).
 /// Returns `r`.
-pub fn modmul_256_u32_le(mut a: [u32; 8], mut b: [u32; 8], m: [u32; 8]) -> [u32; 8] {
+pub fn modmul_256_u32_le(mut a: [u32; 8], b: [u32; 8], m: [u32; 8]) -> [u32; 8] {
+    let mut aff_res: [u32; 16] = Default::default();
     unsafe {
-        // First compute the two halves of the result a*b.
-        // Results are stored in place in a and b.
+        // First compute a*b in 512 bits.
         ecall!(Syscall::Affine256,
-            in("a0") a.as_mut_ptr(),
-            in("a1") b.as_mut_ptr(),
-            in("a2") [0u32; 8].as_ptr());
+            in("a0") a.as_ptr(),
+            in("a1") b.as_ptr(),
+            in("a2") [0u32; 8].as_ptr(),
+            in("a3") aff_res.as_mut_ptr());
         // Next compute the remainder, stored in place in a.
         ecall!(Syscall::Mod256,
-            in("a0") a.as_mut_ptr(),
-            in("a1") b.as_ptr(),
-            in("a2") m.as_ptr());
+            in("a0") aff_res.as_ptr(),
+            in("a1") m.as_ptr(),
+            in("a2") a.as_mut_ptr());
     }
 
     a
diff --git a/riscv-runtime/src/ec.rs b/riscv-runtime/src/ec.rs
index 13a8faa141..79ee6e02f0 100644
--- a/riscv-runtime/src/ec.rs
+++ b/riscv-runtime/src/ec.rs
@@ -1,110 +1,82 @@
 use core::arch::asm;
 
-use crate::arith::{be_to_u32, u32_to_be};
+use crate::arith::{bes_to_u32, u32x16_to_be};
 use powdr_riscv_syscalls::Syscall;
 
 /// Add two k256 ec points. Coordinates are big-endian u8 arrays.
-pub fn add_u8_be(
-    mut ax: [u8; 32],
-    mut ay: [u8; 32],
-    bx: [u8; 32],
-    by: [u8; 32],
-) -> ([u8; 32], [u8; 32]) {
-    let mut ax1: [u32; 8] = Default::default();
-    let mut ay1: [u32; 8] = Default::default();
-    let mut bx1: [u32; 8] = Default::default();
-    let mut by1: [u32; 8] = Default::default();
+pub fn add_u8_be(ax: [u8; 32], ay: [u8; 32], bx: [u8; 32], by: [u8; 32]) -> [u8; 64] {
+    let mut a1: [u32; 16] = Default::default();
+    let mut b1: [u32; 16] = Default::default();
 
-    be_to_u32(&ax, &mut ax1);
-    be_to_u32(&ay, &mut ay1);
-    be_to_u32(&bx, &mut bx1);
-    be_to_u32(&by, &mut by1);
+    bes_to_u32(&ax, &ay, &mut a1);
+    bes_to_u32(&bx, &by, &mut b1);
 
     unsafe {
         ecall!(Syscall::EcAdd,
-            in("a0") ax1.as_mut_ptr(),
-            in("a1") ay1.as_mut_ptr(),
-            in("a2") bx1.as_ptr(),
-            in("a3") by1.as_ptr());
+            in("a0") a1.as_mut_ptr(),
+            in("a1") b1.as_mut_ptr(),
+            in("a2") a1.as_mut_ptr());
     }
 
-    u32_to_be(&ax1, &mut ax);
-    u32_to_be(&ay1, &mut ay);
-
-    (ax, ay)
+    let mut res = [0u8; 64];
+    u32x16_to_be(&a1, &mut res);
+    res
 }
 
 /// Add two k256 ec points. Coordinates are little-endian u8 arrays.
-pub fn add_u8_le(
-    mut ax: [u8; 32],
-    mut ay: [u8; 32],
-    bx: [u8; 32],
-    by: [u8; 32],
-) -> ([u8; 32], [u8; 32]) {
+pub fn add_u8_le(mut a: [u8; 64], b: [u8; 64]) -> [u8; 64] {
     unsafe {
         ecall!(Syscall::EcAdd,
-            in("a0") ax.as_mut_ptr(),
-            in("a1") ay.as_mut_ptr(),
-            in("a2") bx.as_ptr(),
-            in("a3") by.as_ptr());
+            in("a0") a.as_mut_ptr(),
+            in("a1") b.as_ptr(),
+            in("a2") a.as_mut_ptr());
     }
-    (ax, ay)
+    a
 }
 
 /// Add two k256 ec points. Coordinates are little-endian u32 arrays.
-pub fn add_u32_le(
-    mut ax: [u32; 8],
-    mut ay: [u32; 8],
-    bx: [u32; 8],
-    by: [u32; 8],
-) -> ([u32; 8], [u32; 8]) {
+pub fn add_u32_le(mut a: [u32; 16], b: [u32; 16]) -> [u32; 16] {
     unsafe {
         ecall!(Syscall::EcAdd,
-            in("a0") ax.as_mut_ptr(),
-            in("a1") ay.as_mut_ptr(),
-            in("a2") bx.as_ptr(),
-            in("a3") by.as_ptr());
+            in("a0") a.as_mut_ptr(),
+            in("a1") b.as_ptr(),
+            in("a2") a.as_mut_ptr());
     }
-    (ax, ay)
+    a
 }
 
 /// Double a k256 ec point. Coordinates are big-endian u8 arrays.
-pub fn double_u8_be(mut x: [u8; 32], mut y: [u8; 32]) -> ([u8; 32], [u8; 32]) {
-    let mut x1: [u32; 8] = Default::default();
-    let mut y1: [u32; 8] = Default::default();
-
-    be_to_u32(&x, &mut x1);
-    be_to_u32(&y, &mut y1);
+pub fn double_u8_be(x: [u8; 32], y: [u8; 32]) -> [u8; 64] {
+    let mut res = [0u32; 16];
+    bes_to_u32(&x, &y, &mut res);
 
     unsafe {
         ecall!(Syscall::EcDouble,
-            in("a0") x1.as_mut_ptr(),
-            in("a1") y1.as_mut_ptr());
+            in("a0") res.as_mut_ptr(),
+            in("a1") res.as_mut_ptr());
     }
 
-    u32_to_be(&x1, &mut x);
-    u32_to_be(&y1, &mut y);
-
-    (x, y)
+    let mut res_u8 = [0u8; 64];
+    u32x16_to_be(&res, &mut res_u8);
+    res_u8
 }
 
 /// Double a k256 ec point. Coordinates are little-endian u8 arrays.
-pub fn double_u8_le(mut x: [u8; 32], mut y: [u8; 32]) -> ([u8; 32], [u8; 32]) {
+pub fn double_u8_le(mut x: [u8; 64]) -> [u8; 64] {
     unsafe {
         ecall!(Syscall::EcDouble,
             in("a0") x.as_mut_ptr(),
-            in("a1") y.as_mut_ptr());
+            in("a1") x.as_mut_ptr());
     }
-
-    (x, y)
+    x
 }
 
 /// Double a k256 ec point. Coordinates are little-endian u32 arrays.
-pub fn double_u32_le(mut x: [u32; 8], mut y: [u32; 8]) -> ([u32; 8], [u32; 8]) {
+pub fn double_u32_le(mut x: [u32; 16]) -> [u32; 16] {
     unsafe {
         ecall!(Syscall::EcDouble,
             in("a0") x.as_mut_ptr(),
-            in("a1") y.as_mut_ptr());
+            in("a1") x.as_mut_ptr());
     }
-    (x, y)
+    x
 }
diff --git a/riscv/src/large_field/runtime.rs b/riscv/src/large_field/runtime.rs
index 6124ce851b..d9ada363bb 100644
--- a/riscv/src/large_field/runtime.rs
+++ b/riscv/src/large_field/runtime.rs
@@ -328,97 +328,49 @@ impl Runtime {
 
     fn with_arith(mut self) -> Self {
         self.add_submachine(
-            "std::machines::large_field::arith::Arith",
+            "std::machines::large_field::arith256_memory::Arith256Memory",
             None,
             "arith",
-            vec!["MIN_DEGREE", "MAIN_MAX_DEGREE"],
+            vec!["memory", "MIN_DEGREE", "MAIN_MAX_DEGREE"],
             [
-                format!(
-                    "instr affine_256 link ~> {};",
-                    instr_link("arith.affine_256", 24, 16)
-                ),
-                format!(
-                    "instr ec_add link ~> {};",
-                    instr_link("arith.ec_add", 32, 16)
-                ),
-                format!(
-                    "instr ec_double link ~> {};",
-                    instr_link("arith.ec_double", 16, 16)
-                ),
-                format!(
-                    "instr mod_256 link ~> {};",
-                    instr_link("arith.mod_256", 24, 8)
-                ),
+                r#"instr affine_256 X, Y, Z, W
+                    link ~> tmp1_col = regs.mload(X, STEP)
+                    link ~> tmp2_col = regs.mload(Y, STEP)
+                    link ~> tmp3_col = regs.mload(Z, STEP)
+                    link ~> tmp4_col = regs.mload(W, STEP)
+                    link ~> arith.affine_256(STEP, tmp1_col, tmp2_col, tmp3_col, tmp4_col);
+            "#,
+                r#"instr ec_add X, Y, W
+                    link ~> tmp1_col = regs.mload(X, STEP)
+                    link ~> tmp2_col = regs.mload(Y, STEP)
+                    link ~> tmp4_col = regs.mload(W, STEP)
+                    link ~> arith.ec_add(STEP, tmp1_col, tmp2_col, tmp4_col);
+            "#,
+                r#"instr ec_double X, W
+                    link ~> tmp1_col = regs.mload(X, STEP)
+                    link ~> tmp4_col = regs.mload(W, STEP)
+                    link ~> arith.ec_double(STEP, tmp1_col, tmp4_col);
+            "#,
+                r#"instr mod_256 X, Y, W
+                    link ~> tmp1_col = regs.mload(X, STEP)
+                    link ~> tmp2_col = regs.mload(Y, STEP)
+                    link ~> tmp4_col = regs.mload(W, STEP)
+                    link ~> arith.mod_256(STEP, tmp1_col, tmp2_col, tmp4_col);
+            "#,
             ],
-            32,
+            0,
         );
 
-        // The affine_256 syscall takes as input the addresses of x1, y1 and x2.
-        let affine256 =
-            // Load x1 in 0..8
-            (0..8).flat_map(|i| load_word(10, i as u32 *4 , &reg(i)))
-            // Load y1 in 8..16
-            .chain((0..8).flat_map(|i| load_word(11, i as u32 *4 , &reg(i + 8))))
-            // Load x2 in 16..24
-            .chain((0..8).flat_map(|i| load_word(12, i as u32 *4 , &reg(i + 16))))
-            // Call instruction
-            .chain(std::iter::once("affine_256;".to_string()))
-            // Store result y2 in x1's memory
-            .chain((0..8).flat_map(|i| store_word(10, i as u32 *4 , &reg(i))))
-            // Store result y3 in y1's memory
-            .chain((0..8).flat_map(|i| store_word(11, i as u32 *4 , &reg(i + 8))));
-
+        let affine256 = std::iter::once("affine_256 10, 11, 12, 13;".to_string());
         self.add_syscall(Syscall::Affine256, affine256);
 
-        // The mod_256 syscall takes as input the addresses of y2, y3, and x1.
-        let mod256 =
-            // Load y2 in 0..8
-            (0..8).flat_map(|i| load_word(10, i as u32 *4 , &reg(i)))
-            // Load y3 in 8..16
-            .chain((0..8).flat_map(|i| load_word(11, i as u32 *4 , &reg(i + 8))))
-            // Load x1 in 16..24
-            .chain((0..8).flat_map(|i| load_word(12, i as u32 *4 , &reg(i + 16))))
-            // Call instruction
-            .chain(std::iter::once("mod_256;".to_string()))
-            // Store result x2 in y2's memory
-            .chain((0..8).flat_map(|i| store_word(10, i as u32 *4 , &reg(i))));
-
+        let mod256 = std::iter::once("mod_256 10, 11, 12;".to_string());
         self.add_syscall(Syscall::Mod256, mod256);
 
-        // The ec_add syscall takes as input the four addresses of x1, y1, x2, y2.
-        let ec_add =
-            // Load x1 in 0..8
-            (0..8).flat_map(|i| load_word(10, i as u32 * 4, &reg(i)))
-            // Load y1 in 8..16
-            .chain((0..8).flat_map(|i| load_word(11, i as u32 * 4, &reg(i + 8))))
-            // Load x2 in 16..24
-            .chain((0..8).flat_map(|i| load_word(12, i as u32 * 4, &reg(i + 16))))
-            // Load y2 in 24..32
-            .chain((0..8).flat_map(|i| load_word(13, i as u32 * 4, &reg(i + 24))))
-            // Call instruction
-            .chain(std::iter::once("ec_add;".to_string()))
-            // Save result x3 in x1
-            .chain((0..8).flat_map(|i| store_word(10, i as u32 * 4, &reg(i))))
-            // Save result y3 in y1
-            .chain((0..8).flat_map(|i| store_word(11, i as u32 * 4, &reg(i + 8))));
-
+        let ec_add = std::iter::once("ec_add 10, 11, 12;".to_string());
         self.add_syscall(Syscall::EcAdd, ec_add);
 
-        // The ec_double syscall takes as input the addresses of x and y in x10 and x11 respectively.
-        // We load x and y from memory into registers 0..8 and registers 8..16 respectively.
-        // We then store the result from those registers into the same addresses (x10 and x11).
-        let ec_double =
-            // Load x in 0..8
-            (0..8).flat_map(|i| load_word(10, i as u32 * 4, &reg(i)))
-            // Load y in 8..16
-            .chain((0..8).flat_map(|i| load_word(11, i as u32 * 4, &reg(i + 8))))
-            // Call instruction
-            .chain(std::iter::once("ec_double;".to_string()))
-            // Store result in x
-            .chain((0..8).flat_map(|i| store_word(10, i as u32 * 4, &reg(i))))
-            // Store result in y
-            .chain((0..8).flat_map(|i| store_word(11, i as u32 * 4, &reg(i + 8))));
-
+        let ec_double = std::iter::once("ec_double 10, 11;".to_string());
         self.add_syscall(Syscall::EcDouble, ec_double);
 
         self
@@ -490,56 +442,3 @@ impl Runtime {
         self.syscalls.get(syscall_name)
     }
 }
-
-/// Helper function for register names used in instruction params
-fn reg(idx: usize) -> String {
-    format!("{EXTRA_REG_PREFIX}{idx}")
-}
-
-/// Helper function to generate instr link for large number input/output registers
-fn instr_link(call: &str, inputs: usize, outputs: usize) -> String {
-    format!(
-        "{}{}({})",
-        if outputs > 0 {
-            format!(
-                "({}) = ",
-                (0..outputs).map(|i| format!("{}'", reg(i))).join(", ")
-            )
-        } else {
-            "".to_string()
-        },
-        call,
-        (0..inputs).map(reg).join(", ")
-    )
-}
-
-/// Load word from addr+offset into register
-fn load_word(addr_reg_id: u32, offset: u32, reg: &str) -> [String; 2] {
-    let tmp1 = Register::from("tmp1");
-    let tmp2 = Register::from("tmp2");
-    [
-        format!(
-            "mload {addr_reg_id}, {offset}, {}, {};",
-            tmp1.addr(),
-            tmp2.addr()
-        ),
-        format!("{reg} <=X= get_reg({});", tmp1.addr()),
-    ]
-}
-
-/// Store word from register into addr+offset
-fn store_word(addr_reg_id: u32, offset: u32, reg: &str) -> [String; 3] {
-    let tmp1 = Register::from("tmp1");
-    let tmp2 = Register::from("tmp2");
-    [
-        // split_gl ensures we store a 32-bit value
-        format!("set_reg {}, {reg};", tmp1.addr()),
-        format!(
-            "split_gl {}, {}, {};",
-            tmp1.addr(),
-            tmp1.addr(),
-            tmp2.addr()
-        ),
-        format!("mstore {addr_reg_id}, 0, {offset}, {};", tmp1.addr()),
-    ]
-}
diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs
index 12db658220..30712a8352 100644
--- a/riscv/tests/riscv.rs
+++ b/riscv/tests/riscv.rs
@@ -4,7 +4,7 @@ use common::{compile_riscv_asm_file, verify_riscv_asm_file, verify_riscv_asm_str
 use mktemp::Temp;
 use powdr_number::{BabyBearField, FieldElement, GoldilocksField, KnownField};
 use powdr_pipeline::{
-    test_util::{run_pilcom_with_backend_variant, test_mock_backend, BackendVariant},
+    test_util::{run_pilcom_with_backend_variant, BackendVariant},
     Pipeline,
 };
 use powdr_riscv_executor::ProfilerOptions;
@@ -237,62 +237,36 @@ fn function_pointer() {
     verify_riscv_crate(case, &[2734, 735, 1999], true);
 }
 
-// Temporary function to run the mock prover for cases where
-// we can't use P3 yet.
-fn run_mock_prover_for_arith(case: &str) {
-    let temp_dir = Temp::new_dir().unwrap();
-    let executable = powdr_riscv::compile_rust_crate_to_riscv(
-        &format!("tests/riscv_data/{case}/Cargo.toml"),
-        &temp_dir,
-        None,
-    );
-
-    let options = CompilerOptions::new(
-        KnownField::GoldilocksField,
-        RuntimeLibs::new().with_arith(),
-        false,
-    );
-    let asm = powdr_riscv::elf::translate(&executable, options);
-
-    let temp_dir = mktemp::Temp::new_dir().unwrap().release();
-    let file_name = format!("{case}.asm");
-    let pipeline = Pipeline::<GoldilocksField>::default()
-        .with_output(temp_dir.to_path_buf(), false)
-        .from_asm_string(asm, Some(PathBuf::from(file_name)));
-
-    test_mock_backend(pipeline);
-}
-
 #[test]
 #[ignore = "Too slow"]
 fn runtime_ec_double() {
     let case = "ec_double";
-    run_mock_prover_for_arith(case);
-    // TODO We can't use P3 yet for this test because of degree 4 constraints.
+    let options = CompilerOptions::new_gl().with_arith();
+    verify_riscv_crate_gl_with_options(case, Default::default(), options, false);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn runtime_ec_add() {
     let case = "ec_add";
-    run_mock_prover_for_arith(case);
-    // TODO We can't use P3 yet for this test because of degree 4 constraints.
+    let options = CompilerOptions::new_gl().with_arith();
+    verify_riscv_crate_gl_with_options(case, Default::default(), options, false);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn runtime_affine_256() {
     let case = "affine_256";
-    run_mock_prover_for_arith(case);
-    // TODO We can't use P3 yet for this test because of degree 4 constraints.
+    let options = CompilerOptions::new_gl().with_arith();
+    verify_riscv_crate_gl_with_options(case, Default::default(), options, false);
 }
 
 #[test]
 #[ignore = "Too slow"]
 fn runtime_modmul_256() {
     let case = "modmul_256";
-    run_mock_prover_for_arith(case);
-    // TODO We can't use P3 yet for this test because of degree 4 constraints.
+    let options = CompilerOptions::new_gl().with_arith();
+    verify_riscv_crate_gl_with_options(case, Default::default(), options, false);
 }
 
 /*
diff --git a/riscv/tests/riscv_data/affine_256/src/main.rs b/riscv/tests/riscv_data/affine_256/src/main.rs
index a1682eb4d7..5a8b1c77e7 100644
--- a/riscv/tests/riscv_data/affine_256/src/main.rs
+++ b/riscv/tests/riscv_data/affine_256/src/main.rs
@@ -20,71 +20,61 @@ pub fn main() {
         0xaaaaaaaa, 0xbbbbbbbb, 0xbbbbbbbb, 0xaaaaaaaa, 0xaaaaaaaa, 0xbbbbbbbb, 0xbbbbbbbb,
         0xaaaaaaaa,
     ];
-    let hi = [
+    let d = [
         0x9be02469, 0xf258bf25, 0x38e38e38, 0xe6f8091a, 0x740da740, 0x579be024, 0x091a2b3c,
-        0x00000000,
-    ];
-    let lo = [
-        0x33333333, 0xa1907f6e, 0xca8641fd, 0x369d0369, 0x907f6e5d, 0x60b60b60, 0x0da740da,
-        0x1fdb9753,
+        0x00000000, 0x33333333, 0xa1907f6e, 0xca8641fd, 0x369d0369, 0x907f6e5d, 0x60b60b60,
+        0x0da740da, 0x1fdb9753,
     ];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    assert_eq!(affine_256(a, b, c), d);
 
     // same as above but using the big endian api
     let a = hex!("0000000011111111222222223333333344444444555555556666666677777777");
     let b = hex!("8888888899999999aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffff");
     let c = hex!("aaaaaaaabbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbaaaaaaaa");
-    let hi = hex!("00000000091a2b3c579be024740da740e6f8091a38e38e38f258bf259be02469");
-    let lo = hex!("1fdb97530da740da60b60b60907f6e5d369d0369ca8641fda1907f6e33333333");
-    assert_eq!(affine_256_u8_be(a, b, c), (hi, lo));
+    let d = hex!("1fdb97530da740da60b60b60907f6e5d369d0369ca8641fda1907f6e3333333300000000091a2b3c579be024740da740e6f8091a38e38e38f258bf259be02469");
+    assert_eq!(affine_256_u8_be(a, b, c), d);
 
     // 2 * 3 + 5 = 11
     let a = [2, 0, 0, 0, 0, 0, 0, 0];
     let b = [3, 0, 0, 0, 0, 0, 0, 0];
     let c = [5, 0, 0, 0, 0, 0, 0, 0];
-    let hi = [0, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [11, 0, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    let d = [0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0];
+    assert_eq!(affine_256(a, b, c), d);
 
     // 256 * 256 + 1 = 65537
     let a = [256, 0, 0, 0, 0, 0, 0, 0];
     let b = [256, 0, 0, 0, 0, 0, 0, 0];
     let c = [1, 0, 0, 0, 0, 0, 0, 0];
-    let hi = [0, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [65537, 0, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    let d = [0, 0, 0, 0, 0, 0, 0, 0, 65537, 0, 0, 0, 0, 0, 0, 0];
+    assert_eq!(affine_256(a, b, c), d);
 
     // 3000 * 2000 + 5000 = 6005000
     let a = [3000, 0, 0, 0, 0, 0, 0, 0];
     let b = [2000, 0, 0, 0, 0, 0, 0, 0];
     let c = [5000, 0, 0, 0, 0, 0, 0, 0];
-    let hi = [0, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [6005000, 0, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    let d = [0, 0, 0, 0, 0, 0, 0, 0, 6005000, 0, 0, 0, 0, 0, 0, 0];
+    assert_eq!(affine_256(a, b, c), d);
 
     // 3000000 * 2000000 + 5000000 = 6000005000000
     let a = [3000000, 0, 0, 0, 0, 0, 0, 0];
     let b = [2000000, 0, 0, 0, 0, 0, 0, 0];
     let c = [5000000, 0, 0, 0, 0, 0, 0, 0];
-    let hi = [0, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [0xfc2aab40, 0x574, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    let d = [0, 0, 0, 0, 0, 0, 0, 0, 0xfc2aab40, 0x574, 0, 0, 0, 0, 0, 0];
+    assert_eq!(affine_256(a, b, c), d);
 
     // 3000 * 0 + 5000 = 5000
     let a = [3000, 0, 0, 0, 0, 0, 0, 0];
     let b = [0, 0, 0, 0, 0, 0, 0, 0];
     let c = [5000, 0, 0, 0, 0, 0, 0, 0];
-    let hi = [0, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [5000, 0, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    let d = [0, 0, 0, 0, 0, 0, 0, 0, 5000, 0, 0, 0, 0, 0, 0, 0];
+    assert_eq!(affine_256(a, b, c), d);
 
     // 2**255 * 2 + 0 = 2 ** 256
     let a = [0, 0, 0, 0, 0, 0, 0, 0x80000000];
     let b = [2, 0, 0, 0, 0, 0, 0, 0];
     let c = [0, 0, 0, 0, 0, 0, 0, 0];
-    let hi = [1, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [0, 0, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    let d = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+    assert_eq!(affine_256(a, b, c), d);
 
     // (2**256 - 1) * (2**256 - 1) + (2**256 - 1) = 2 ** 256 * 115792089237316195423570985008687907853269984665640564039457584007913129639935
     // = 2 ** 256 * 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
@@ -100,12 +90,11 @@ pub fn main() {
         0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
         0xffffffff,
     ];
-    let hi = [
+    let d = [
         0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
-        0xffffffff,
+        0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0,
     ];
-    let lo = [0, 0, 0, 0, 0, 0, 0, 0];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    assert_eq!(affine_256(a, b, c), d);
 
     // (2**256 - 1) * 1 + (2**256 - 1) = 2 ** 256 + 115792089237316195423570985008687907853269984665640564039457584007913129639934
     // = 2 ** 256 + 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
@@ -118,10 +107,9 @@ pub fn main() {
         0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
         0xffffffff,
     ];
-    let hi = [1, 0, 0, 0, 0, 0, 0, 0];
-    let lo = [
-        0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
-        0xffffffff,
+    let d = [
+        1, 0, 0, 0, 0, 0, 0, 0, 0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+        0xffffffff, 0xffffffff, 0xffffffff,
     ];
-    assert_eq!(affine_256(a, b, c), (hi, lo));
+    assert_eq!(affine_256(a, b, c), d);
 }
diff --git a/riscv/tests/riscv_data/ec_add/src/main.rs b/riscv/tests/riscv_data/ec_add/src/main.rs
index f2daeb1f1f..91f7110713 100644
--- a/riscv/tests/riscv_data/ec_add/src/main.rs
+++ b/riscv/tests/riscv_data/ec_add/src/main.rs
@@ -8,1148 +8,788 @@ use powdr_riscv_runtime::ec::add_u8_be;
 
 #[no_mangle]
 pub fn main() {
-    let x1 = [
+    let p1 = [
         0x16f81798, 0x59f2815b, 0x2dce28d9, 0x029bfcdb, 0xce870b07, 0x55a06295, 0xf9dcbbac,
-        0x79be667e,
+        0x79be667e, 0xfb10d4b8, 0x9c47d08f, 0xa6855419, 0xfd17b448, 0x0e1108a8, 0x5da4fbfc,
+        0x26a3c465, 0x483ada77,
     ];
-    let y1 = [
-        0xfb10d4b8, 0x9c47d08f, 0xa6855419, 0xfd17b448, 0x0e1108a8, 0x5da4fbfc, 0x26a3c465,
-        0x483ada77,
-    ];
-    let x2 = [
+    let p2 = [
         0x5c709ee5, 0xabac09b9, 0x8cef3ca7, 0x5c778e4b, 0x95c07cd8, 0x3045406e, 0x41ed7d6d,
-        0xc6047f94,
-    ];
-    let y2 = [
-        0x50cfe52a, 0x236431a9, 0x3266d0e1, 0xf7f63265, 0x466ceaee, 0xa3c58419, 0xa63dc339,
-        0x1ae168fe,
+        0xc6047f94, 0x50cfe52a, 0x236431a9, 0x3266d0e1, 0xf7f63265, 0x466ceaee, 0xa3c58419,
+        0xa63dc339, 0x1ae168fe,
     ];
-    let x3 = [
+    let p3 = [
         0xbce036f9, 0x8601f113, 0x836f99b0, 0xb531c845, 0xf89d5229, 0x49344f85, 0x9258c310,
-        0xf9308a01,
+        0xf9308a01, 0x84b8e672, 0x6cb9fd75, 0x34c2231b, 0x6500a999, 0x2a37f356, 0x0fe337e6,
+        0x632de814, 0x388f7b0f,
     ];
-    let y3 = [
-        0x84b8e672, 0x6cb9fd75, 0x34c2231b, 0x6500a999, 0x2a37f356, 0x0fe337e6, 0x632de814,
-        0x388f7b0f,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     // same as above but using the big endian api
     let x1 = hex!("79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798");
     let y1 = hex!("483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8");
     let x2 = hex!("c6047f9441ed7d6d3045406e95c07cd85c778e4b8cef3ca7abac09b95c709ee5");
     let y2 = hex!("1ae168fea63dc339a3c58419466ceaeef7f632653266d0e1236431a950cfe52a");
-    let x3 = hex!("f9308a019258c31049344f85f89d5229b531c845836f99b08601f113bce036f9");
-    let y3 = hex!("388f7b0f632de8140fe337e62a37f3566500a99934c2231b6cb9fd7584b8e672");
-    assert_eq!(add_u8_be(x1, y1, x2, y2), (x3, y3));
+    let p3 = hex!("388f7b0f632de8140fe337e62a37f3566500a99934c2231b6cb9fd7584b8e672f9308a019258c31049344f85f89d5229b531c845836f99b08601f113bce036f9");
+    assert_eq!(add_u8_be(x1, y1, x2, y2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x60297556, 0x2f057a14, 0x8568a18b, 0x82f6472f, 0x355235d3, 0x20453a14, 0x755eeea4,
-        0xfff97bd5,
-    ];
-    let y1 = [
-        0xb075f297, 0x3c870c36, 0x518fe4a0, 0xde80f0f6, 0x7f45c560, 0xf3be9601, 0xacfbb620,
-        0xae12777a,
+        0xfff97bd5, 0xb075f297, 0x3c870c36, 0x518fe4a0, 0xde80f0f6, 0x7f45c560, 0xf3be9601,
+        0xacfbb620, 0xae12777a,
     ];
-    let x2 = [
+    let p2 = [
         0x6c953fa9, 0x4d05956d, 0xf0b8c3db, 0x28ab2629, 0x4bd18c06, 0x3a5f485d, 0xaaab9323,
-        0xa49ed10e,
+        0xa49ed10e, 0x46fb4c72, 0x67b2bd22, 0x968e181b, 0x5ae87534, 0xa0dfddfb, 0xe03476c0,
+        0x660f5398, 0xcc72b894,
     ];
-    let y2 = [
-        0x46fb4c72, 0x67b2bd22, 0x968e181b, 0x5ae87534, 0xa0dfddfb, 0xe03476c0, 0x660f5398,
-        0xcc72b894,
-    ];
-    let x3 = [
+    let p3 = [
         0x23510ef9, 0xbb3af611, 0xf22c19c7, 0x87d4c3cb, 0x53cecf40, 0xd11823c1, 0xdc6f9ec5,
-        0xe12026ef,
-    ];
-    let y3 = [
-        0xb058f360, 0x96451fbb, 0x20e5efc6, 0xdcda7f72, 0xe03ad137, 0xf367ef20, 0x5a51eadc,
-        0x30a930ea,
+        0xe12026ef, 0xb058f360, 0x96451fbb, 0x20e5efc6, 0xdcda7f72, 0xe03ad137, 0xf367ef20,
+        0x5a51eadc, 0x30a930ea,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x70afe85a, 0xc5b0f470, 0x9620095b, 0x687cf441, 0x4d734633, 0x15c38f00, 0x48e7561b,
-        0xd01115d5,
+        0xd01115d5, 0xf4062327, 0x6b051b13, 0xd9a86d52, 0x79238c5d, 0xe17bd815, 0xa8b64537,
+        0xc815e0d7, 0xa9f34ffd,
     ];
-    let y1 = [
-        0xf4062327, 0x6b051b13, 0xd9a86d52, 0x79238c5d, 0xe17bd815, 0xa8b64537, 0xc815e0d7,
-        0xa9f34ffd,
-    ];
-    let x2 = [
+    let p2 = [
         0x23510ef9, 0xbb3af611, 0xf22c19c7, 0x87d4c3cb, 0x53cecf40, 0xd11823c1, 0xdc6f9ec5,
-        0xe12026ef,
-    ];
-    let y2 = [
-        0xb058f360, 0x96451fbb, 0x20e5efc6, 0xdcda7f72, 0xe03ad137, 0xf367ef20, 0x5a51eadc,
-        0x30a930ea,
+        0xe12026ef, 0xb058f360, 0x96451fbb, 0x20e5efc6, 0xdcda7f72, 0xe03ad137, 0xf367ef20,
+        0x5a51eadc, 0x30a930ea,
     ];
-    let x3 = [
+    let p3 = [
         0xaee8b7f4, 0xbfd2ecb4, 0xe21c31ef, 0x22c7841e, 0x2da82592, 0x7d356af0, 0x64eab6f5,
-        0xd5ae772d,
+        0xd5ae772d, 0x454d87bc, 0x18cf6cf2, 0xe498d098, 0x8d035085, 0x121f031d, 0x43b132ac,
+        0x313a838a, 0x10231a1d,
     ];
-    let y3 = [
-        0x454d87bc, 0x18cf6cf2, 0xe498d098, 0x8d035085, 0x121f031d, 0x43b132ac, 0x313a838a,
-        0x10231a1d,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb202e6ce, 0x502bda8, 0x9d62b794, 0x68321543, 0x61ba8b09, 0x8ac09c91, 0x413d33d4,
-        0xfe72c435,
-    ];
-    let y1 = [
-        0xcf58c5bf, 0x978ed2fb, 0x6b4a9d22, 0x1dc88e3, 0x9d729981, 0xd3ab47e0, 0x7ff24a68,
-        0x6851de06,
+        0xfe72c435, 0xcf58c5bf, 0x978ed2fb, 0x6b4a9d22, 0x1dc88e3, 0x9d729981, 0xd3ab47e0,
+        0x7ff24a68, 0x6851de06,
     ];
-    let x2 = [
+    let p2 = [
         0xaee8b7f4, 0xbfd2ecb4, 0xe21c31ef, 0x22c7841e, 0x2da82592, 0x7d356af0, 0x64eab6f5,
-        0xd5ae772d,
+        0xd5ae772d, 0x454d87bc, 0x18cf6cf2, 0xe498d098, 0x8d035085, 0x121f031d, 0x43b132ac,
+        0x313a838a, 0x10231a1d,
     ];
-    let y2 = [
-        0x454d87bc, 0x18cf6cf2, 0xe498d098, 0x8d035085, 0x121f031d, 0x43b132ac, 0x313a838a,
-        0x10231a1d,
-    ];
-    let x3 = [
+    let p3 = [
         0x29f01588, 0x47a519de, 0x80b7983e, 0xe3a17625, 0xc7a542b9, 0x2c03e1b0, 0xd5ba06de,
-        0xf952de32,
-    ];
-    let y3 = [
-        0x96452adf, 0xcc4f6b5a, 0xa0593002, 0x1ebbb2bb, 0x49fb7433, 0xee209edb, 0x9e3a6edb,
-        0x23c02d3,
+        0xf952de32, 0x96452adf, 0xcc4f6b5a, 0xa0593002, 0x1ebbb2bb, 0x49fb7433, 0xee209edb,
+        0x9e3a6edb, 0x23c02d3,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x1118e5c3, 0x9bd870aa, 0x452bebc1, 0xfc579b27, 0xf4e65b4b, 0xb441656e, 0x9645307d,
-        0x6eca335d,
+        0x6eca335d, 0x5a08668, 0x498a2f78, 0x3bf8ec34, 0x3a496a3a, 0x74b875a0, 0x592f5790,
+        0x7a7a0710, 0xd50123b5,
     ];
-    let y1 = [
-        0x5a08668, 0x498a2f78, 0x3bf8ec34, 0x3a496a3a, 0x74b875a0, 0x592f5790, 0x7a7a0710,
-        0xd50123b5,
-    ];
-    let x2 = [
+    let p2 = [
         0x29f01588, 0x47a519de, 0x80b7983e, 0xe3a17625, 0xc7a542b9, 0x2c03e1b0, 0xd5ba06de,
-        0xf952de32,
-    ];
-    let y2 = [
-        0x96452adf, 0xcc4f6b5a, 0xa0593002, 0x1ebbb2bb, 0x49fb7433, 0xee209edb, 0x9e3a6edb,
-        0x23c02d3,
+        0xf952de32, 0x96452adf, 0xcc4f6b5a, 0xa0593002, 0x1ebbb2bb, 0x49fb7433, 0xee209edb,
+        0x9e3a6edb, 0x23c02d3,
     ];
-    let x3 = [
+    let p3 = [
         0x170508d7, 0x5137b67a, 0xfec94132, 0xacb18631, 0xca0ddf6b, 0xa9f0b9c1, 0x59982afe,
-        0x19d6a989,
+        0x19d6a989, 0x311ed142, 0xacf14bf8, 0x9cdeddfc, 0xa372ada, 0x47016a3d, 0xe73aacf,
+        0xa468f4db, 0x221a2b52,
     ];
-    let y3 = [
-        0x311ed142, 0xacf14bf8, 0x9cdeddfc, 0xa372ada, 0x47016a3d, 0xe73aacf, 0xa468f4db,
-        0x221a2b52,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x7f8cb0e3, 0x43933aca, 0xe1efe3a4, 0xa22eb53f, 0x4b2eb72e, 0x8fa64e04, 0x74456d8f,
-        0x3f0e80e5,
-    ];
-    let y1 = [
-        0xea5f404f, 0xcb0289e2, 0xa65b53a4, 0x9501253a, 0x485d01b3, 0xe90b9c08, 0x296cbc91,
-        0xcb66d7d7,
+        0x3f0e80e5, 0xea5f404f, 0xcb0289e2, 0xa65b53a4, 0x9501253a, 0x485d01b3, 0xe90b9c08,
+        0x296cbc91, 0xcb66d7d7,
     ];
-    let x2 = [
+    let p2 = [
         0x170508d7, 0x5137b67a, 0xfec94132, 0xacb18631, 0xca0ddf6b, 0xa9f0b9c1, 0x59982afe,
-        0x19d6a989,
+        0x19d6a989, 0x311ed142, 0xacf14bf8, 0x9cdeddfc, 0xa372ada, 0x47016a3d, 0xe73aacf,
+        0xa468f4db, 0x221a2b52,
     ];
-    let y2 = [
-        0x311ed142, 0xacf14bf8, 0x9cdeddfc, 0xa372ada, 0x47016a3d, 0xe73aacf, 0xa468f4db,
-        0x221a2b52,
-    ];
-    let x3 = [
+    let p3 = [
         0xb4774055, 0x3beba8b, 0x1e5354e0, 0x390e80f2, 0x84fe6b81, 0x7da29ade, 0x873a0892,
-        0x9e60108,
-    ];
-    let y3 = [
-        0xe91c37b4, 0x1f4b0a9e, 0xa0f5c9df, 0xf8b46cf3, 0xa2e33e67, 0xdd6c49ac, 0x8cc23a00,
-        0xb16a1638,
+        0x9e60108, 0xe91c37b4, 0x1f4b0a9e, 0xa0f5c9df, 0xf8b46cf3, 0xa2e33e67, 0xdd6c49ac,
+        0x8cc23a00, 0xb16a1638,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x33ce1752, 0xc7b750f7, 0xd7cd204e, 0xe783c797, 0xd99c9aea, 0x812ddf64, 0xd01dc635,
-        0xd7a0da58,
+        0xd7a0da58, 0x762cef4, 0xbbc02738, 0xc062b742, 0xbe040a8, 0x40e28465, 0xf6f29283,
+        0x68008032, 0x912770e0,
     ];
-    let y1 = [
-        0x762cef4, 0xbbc02738, 0xc062b742, 0xbe040a8, 0x40e28465, 0xf6f29283, 0x68008032,
-        0x912770e0,
-    ];
-    let x2 = [
+    let p2 = [
         0xb4774055, 0x3beba8b, 0x1e5354e0, 0x390e80f2, 0x84fe6b81, 0x7da29ade, 0x873a0892,
-        0x9e60108,
-    ];
-    let y2 = [
-        0xe91c37b4, 0x1f4b0a9e, 0xa0f5c9df, 0xf8b46cf3, 0xa2e33e67, 0xdd6c49ac, 0x8cc23a00,
-        0xb16a1638,
+        0x9e60108, 0xe91c37b4, 0x1f4b0a9e, 0xa0f5c9df, 0xf8b46cf3, 0xa2e33e67, 0xdd6c49ac,
+        0x8cc23a00, 0xb16a1638,
     ];
-    let x3 = [
+    let p3 = [
         0xb2993603, 0x84fd71bd, 0x35e25ff7, 0x1077370c, 0x5867e485, 0x9edfb935, 0xd0b48228,
-        0x2de4cdc,
+        0x2de4cdc, 0xf3c18392, 0xd7b138d4, 0xa44f7f11, 0x1bd09b1d, 0x87477348, 0x53396a92,
+        0xc1c97199, 0x18b5caf6,
     ];
-    let y3 = [
-        0xf3c18392, 0xd7b138d4, 0xa44f7f11, 0x1bd09b1d, 0x87477348, 0x53396a92, 0xc1c97199,
-        0x18b5caf6,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb5476085, 0xa908b701, 0x96eb9f84, 0xb5714e77, 0xa78ed1af, 0x10d3aad6, 0x7a08cd3e,
-        0x3443a706,
-    ];
-    let y1 = [
-        0x8b8f52d8, 0x6d3484bd, 0xd0c2b67f, 0x18a4b27, 0x8c7e1da9, 0x4f6e8c4b, 0x829b6f85,
-        0x661a7a5f,
+        0x3443a706, 0x8b8f52d8, 0x6d3484bd, 0xd0c2b67f, 0x18a4b27, 0x8c7e1da9, 0x4f6e8c4b,
+        0x829b6f85, 0x661a7a5f,
     ];
-    let x2 = [
+    let p2 = [
         0xb2993603, 0x84fd71bd, 0x35e25ff7, 0x1077370c, 0x5867e485, 0x9edfb935, 0xd0b48228,
-        0x2de4cdc,
+        0x2de4cdc, 0xf3c18392, 0xd7b138d4, 0xa44f7f11, 0x1bd09b1d, 0x87477348, 0x53396a92,
+        0xc1c97199, 0x18b5caf6,
     ];
-    let y2 = [
-        0xf3c18392, 0xd7b138d4, 0xa44f7f11, 0x1bd09b1d, 0x87477348, 0x53396a92, 0xc1c97199,
-        0x18b5caf6,
-    ];
-    let x3 = [
+    let p3 = [
         0x694478f1, 0xb4fb31d0, 0x639a1309, 0xe3af2921, 0x8069c19c, 0xcea090c4, 0xdb4efc49,
-        0x2ce2bd56,
-    ];
-    let y3 = [
-        0xc94a43b7, 0xdb966e56, 0xa8a289c1, 0x166b72, 0xe9560522, 0x78291fe1, 0x3d95a2e2, 0xb56a711,
+        0x2ce2bd56, 0xc94a43b7, 0xdb966e56, 0xa8a289c1, 0x166b72, 0xe9560522, 0x78291fe1,
+        0x3d95a2e2, 0xb56a711,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xe57e8dfa, 0xfcfc0cb9, 0xa3c7e184, 0x9809191, 0xaca98ca0, 0xd9a30f8, 0xf0799c4c,
-        0x8262cf2f,
+        0x8262cf2f, 0xfbac376a, 0x35cff8d8, 0x2b14c478, 0x57b6ed33, 0xc5b34f34, 0x66fee22e,
+        0x9109e4e, 0x83fd95e2,
     ];
-    let y1 = [
-        0xfbac376a, 0x35cff8d8, 0x2b14c478, 0x57b6ed33, 0xc5b34f34, 0x66fee22e, 0x9109e4e,
-        0x83fd95e2,
-    ];
-    let x2 = [
+    let p2 = [
         0x694478f1, 0xb4fb31d0, 0x639a1309, 0xe3af2921, 0x8069c19c, 0xcea090c4, 0xdb4efc49,
-        0x2ce2bd56,
-    ];
-    let y2 = [
-        0xc94a43b7, 0xdb966e56, 0xa8a289c1, 0x166b72, 0xe9560522, 0x78291fe1, 0x3d95a2e2, 0xb56a711,
+        0x2ce2bd56, 0xc94a43b7, 0xdb966e56, 0xa8a289c1, 0x166b72, 0xe9560522, 0x78291fe1,
+        0x3d95a2e2, 0xb56a711,
     ];
-    let x3 = [
+    let p3 = [
         0x44ad3367, 0x9aeb9669, 0x637f77d7, 0xc3a1a0e7, 0x6964096c, 0xdf790607, 0x3e8509c2,
-        0xab7527e3,
+        0xab7527e3, 0xba33e3e9, 0xc844b48d, 0xda415aa3, 0xc572928e, 0xa95cf18e, 0x4778ec33,
+        0xfa8b39, 0xfac5ff0c,
     ];
-    let y3 = [
-        0xba33e3e9, 0xc844b48d, 0xda415aa3, 0xc572928e, 0xa95cf18e, 0x4778ec33, 0xfa8b39,
-        0xfac5ff0c,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x7c70620c, 0xd17cc1f2, 0xabc288d9, 0x4998c4be, 0x2b671780, 0xc60dd31a, 0x8d2c236d,
-        0x1653a8a4,
-    ];
-    let y1 = [
-        0x315b32cd, 0x6ca2e81d, 0xdfd3dc52, 0x12af748, 0x4efa701c, 0xeafa9947, 0x35af7f7a,
-        0x3382909,
+        0x1653a8a4, 0x315b32cd, 0x6ca2e81d, 0xdfd3dc52, 0x12af748, 0x4efa701c, 0xeafa9947,
+        0x35af7f7a, 0x3382909,
     ];
-    let x2 = [
+    let p2 = [
         0x44ad3367, 0x9aeb9669, 0x637f77d7, 0xc3a1a0e7, 0x6964096c, 0xdf790607, 0x3e8509c2,
-        0xab7527e3,
+        0xab7527e3, 0xba33e3e9, 0xc844b48d, 0xda415aa3, 0xc572928e, 0xa95cf18e, 0x4778ec33,
+        0xfa8b39, 0xfac5ff0c,
     ];
-    let y2 = [
-        0xba33e3e9, 0xc844b48d, 0xda415aa3, 0xc572928e, 0xa95cf18e, 0x4778ec33, 0xfa8b39,
-        0xfac5ff0c,
-    ];
-    let x3 = [
+    let p3 = [
         0x344d1571, 0xfbc3a3ed, 0x6d037843, 0xd86e1c94, 0xb24f4644, 0x3c6685fd, 0xb14dbaa6,
-        0xd4632e2b,
-    ];
-    let y3 = [
-        0xb26ac915, 0x57c30d45, 0xa60c041f, 0xaff15cc2, 0x76a49ec0, 0x2e672992, 0xed49e170,
-        0x5b7067,
+        0xd4632e2b, 0xb26ac915, 0x57c30d45, 0xa60c041f, 0xaff15cc2, 0x76a49ec0, 0x2e672992,
+        0xed49e170, 0x5b7067,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xe71dabcd, 0x47d42ba6, 0x89e5cb4f, 0x54d3fe49, 0x60b5373f, 0x6098ae32, 0x6b63f43c,
-        0xd49ee4fb,
-    ];
-    let y1 = [
-        0x16603c2, 0xe66a90cf, 0x12ff7031, 0x129c5093, 0xa61bf356, 0xd7c87ea7, 0x9a5490d, 0x531e392,
+        0xd49ee4fb, 0x16603c2, 0xe66a90cf, 0x12ff7031, 0x129c5093, 0xa61bf356, 0xd7c87ea7,
+        0x9a5490d, 0x531e392,
     ];
-    let x2 = [
+    let p2 = [
         0x344d1571, 0xfbc3a3ed, 0x6d037843, 0xd86e1c94, 0xb24f4644, 0x3c6685fd, 0xb14dbaa6,
-        0xd4632e2b,
+        0xd4632e2b, 0xb26ac915, 0x57c30d45, 0xa60c041f, 0xaff15cc2, 0x76a49ec0, 0x2e672992,
+        0xed49e170, 0x5b7067,
     ];
-    let y2 = [
-        0xb26ac915, 0x57c30d45, 0xa60c041f, 0xaff15cc2, 0x76a49ec0, 0x2e672992, 0xed49e170,
-        0x5b7067,
-    ];
-    let x3 = [
+    let p3 = [
         0xdbe47240, 0xf2ee698, 0xb9575b37, 0xe2d2cfe2, 0x4a09b9d5, 0xbfe560fa, 0xaf3c4f5c,
-        0xb311519d,
-    ];
-    let y3 = [
-        0x66c734da, 0x1147627c, 0xa4e7d38c, 0x41bf546d, 0xa86dd053, 0xbd7ee596, 0x65390183,
-        0xe8608078,
+        0xb311519d, 0x66c734da, 0x1147627c, 0xa4e7d38c, 0x41bf546d, 0xa86dd053, 0xbd7ee596,
+        0x65390183, 0xe8608078,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xc8c828a, 0x53f30ab9, 0xc96ae41f, 0x132eb242, 0x17e81c75, 0xe44a0d8, 0xa4149e75,
-        0x5f94851c,
+        0x5f94851c, 0x37344d80, 0xbfeb0a3f, 0x4fc68b04, 0x8c66df75, 0x8882f35e, 0xe5f0797d,
+        0xafa1fee8, 0x26b8c3b8,
     ];
-    let y1 = [
-        0x37344d80, 0xbfeb0a3f, 0x4fc68b04, 0x8c66df75, 0x8882f35e, 0xe5f0797d, 0xafa1fee8,
-        0x26b8c3b8,
-    ];
-    let x2 = [
+    let p2 = [
         0xdbe47240, 0xf2ee698, 0xb9575b37, 0xe2d2cfe2, 0x4a09b9d5, 0xbfe560fa, 0xaf3c4f5c,
-        0xb311519d,
-    ];
-    let y2 = [
-        0x66c734da, 0x1147627c, 0xa4e7d38c, 0x41bf546d, 0xa86dd053, 0xbd7ee596, 0x65390183,
-        0xe8608078,
+        0xb311519d, 0x66c734da, 0x1147627c, 0xa4e7d38c, 0x41bf546d, 0xa86dd053, 0xbd7ee596,
+        0x65390183, 0xe8608078,
     ];
-    let x3 = [
+    let p3 = [
         0x7950604f, 0x3904da0a, 0x8d3a8c08, 0x7f74a2e8, 0x5270445, 0x70f70734, 0x3ca5ebf9,
-        0xa4da524a,
+        0xa4da524a, 0xe01e4087, 0x6f2bfded, 0x85e258c9, 0xb7498e0b, 0x51ab50dd, 0x1cb690a9,
+        0xae9bf4bc, 0x3bc500fb,
     ];
-    let y3 = [
-        0xe01e4087, 0x6f2bfded, 0x85e258c9, 0xb7498e0b, 0x51ab50dd, 0x1cb690a9, 0xae9bf4bc,
-        0x3bc500fb,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xc5041216, 0x65b7f8f1, 0x842b836a, 0x3f7335f6, 0xdc2fed52, 0x128b59ef, 0x21f7acf4,
-        0xda75317b,
-    ];
-    let y1 = [
-        0x6e708572, 0xdaed3298, 0xe77aceda, 0xe9aac07a, 0x342d7fc6, 0xdf19e21b, 0xbf72d5f0,
-        0x73f8a046,
+        0xda75317b, 0x6e708572, 0xdaed3298, 0xe77aceda, 0xe9aac07a, 0x342d7fc6, 0xdf19e21b,
+        0xbf72d5f0, 0x73f8a046,
     ];
-    let x2 = [
+    let p2 = [
         0x7950604f, 0x3904da0a, 0x8d3a8c08, 0x7f74a2e8, 0x5270445, 0x70f70734, 0x3ca5ebf9,
-        0xa4da524a,
+        0xa4da524a, 0xe01e4087, 0x6f2bfded, 0x85e258c9, 0xb7498e0b, 0x51ab50dd, 0x1cb690a9,
+        0xae9bf4bc, 0x3bc500fb,
     ];
-    let y2 = [
-        0xe01e4087, 0x6f2bfded, 0x85e258c9, 0xb7498e0b, 0x51ab50dd, 0x1cb690a9, 0xae9bf4bc,
-        0x3bc500fb,
-    ];
-    let x3 = [
+    let p3 = [
         0x84b81f7f, 0xf0507d87, 0xa12969d7, 0xee9f7214, 0x1d049f95, 0xddb232b2, 0x640741b1,
-        0x448316e7,
-    ];
-    let y3 = [
-        0x5e2540f7, 0x3630d948, 0xb351e0f, 0x43ce853a, 0x803089f2, 0x692ec20e, 0x29d5008f,
-        0xf4bf3660,
+        0x448316e7, 0x5e2540f7, 0x3630d948, 0xb351e0f, 0x43ce853a, 0x803089f2, 0x692ec20e,
+        0x29d5008f, 0xf4bf3660,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x3c62bac0, 0x9505324f, 0x51f0ab06, 0x19150ddf, 0xc3e8b70e, 0x1364b7d2, 0x23f469c,
-        0x9530f0f9,
+        0x9530f0f9, 0x7618e309, 0x478abda9, 0x2f1fdc68, 0xe25b3285, 0x59b333e0, 0x34dd2f7f,
+        0x8f9f21e2, 0x8f3c305a,
     ];
-    let y1 = [
-        0x7618e309, 0x478abda9, 0x2f1fdc68, 0xe25b3285, 0x59b333e0, 0x34dd2f7f, 0x8f9f21e2,
-        0x8f3c305a,
-    ];
-    let x2 = [
+    let p2 = [
         0x84b81f7f, 0xf0507d87, 0xa12969d7, 0xee9f7214, 0x1d049f95, 0xddb232b2, 0x640741b1,
-        0x448316e7,
-    ];
-    let y2 = [
-        0x5e2540f7, 0x3630d948, 0xb351e0f, 0x43ce853a, 0x803089f2, 0x692ec20e, 0x29d5008f,
-        0xf4bf3660,
+        0x448316e7, 0x5e2540f7, 0x3630d948, 0xb351e0f, 0x43ce853a, 0x803089f2, 0x692ec20e,
+        0x29d5008f, 0xf4bf3660,
     ];
-    let x3 = [
+    let p3 = [
         0xc37466d3, 0x1258ebbc, 0x79fac61e, 0x5911c69b, 0x594aefa8, 0x6a450d16, 0xb5da30ac,
-        0xebee8e11,
+        0xebee8e11, 0xaf04f45f, 0x99500512, 0xde6da3c0, 0xd8a47486, 0x8facdfd6, 0xab96ea50,
+        0xcc61f51d, 0x5c523787,
     ];
-    let y3 = [
-        0xaf04f45f, 0x99500512, 0xde6da3c0, 0xd8a47486, 0x8facdfd6, 0xab96ea50, 0xcc61f51d,
-        0x5c523787,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xdc3c9c8f, 0x6704385, 0x3e4367b2, 0xf2816fee, 0xaaa332b0, 0x6f09ff43, 0xbe4298fd,
-        0x67be02dc,
-    ];
-    let y1 = [
-        0x593652d9, 0x55384998, 0xb88c2be, 0xcd993bf6, 0x8291693, 0xa2c945b6, 0x3e4def84,
-        0x7a9b55a7,
+        0x67be02dc, 0x593652d9, 0x55384998, 0xb88c2be, 0xcd993bf6, 0x8291693, 0xa2c945b6,
+        0x3e4def84, 0x7a9b55a7,
     ];
-    let x2 = [
+    let p2 = [
         0xc37466d3, 0x1258ebbc, 0x79fac61e, 0x5911c69b, 0x594aefa8, 0x6a450d16, 0xb5da30ac,
-        0xebee8e11,
+        0xebee8e11, 0xaf04f45f, 0x99500512, 0xde6da3c0, 0xd8a47486, 0x8facdfd6, 0xab96ea50,
+        0xcc61f51d, 0x5c523787,
     ];
-    let y2 = [
-        0xaf04f45f, 0x99500512, 0xde6da3c0, 0xd8a47486, 0x8facdfd6, 0xab96ea50, 0xcc61f51d,
-        0x5c523787,
-    ];
-    let x3 = [
+    let p3 = [
         0xa0aae439, 0x4f4c07cf, 0x89c4c360, 0x6f99cc22, 0x11576d96, 0x536674ab, 0xaa9cf13d,
-        0x9d9c34cc,
-    ];
-    let y3 = [
-        0xb57534d9, 0xab01d669, 0xa1e9a5a7, 0xcd33ec53, 0x421d0474, 0x399ae585, 0x40a53d7d,
-        0xd9c7c978,
+        0x9d9c34cc, 0xb57534d9, 0xab01d669, 0xa1e9a5a7, 0xcd33ec53, 0x421d0474, 0x399ae585,
+        0x40a53d7d, 0xd9c7c978,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x10aaa33a, 0x11f9bcbe, 0xc17b9ca5, 0x8c92dd29, 0xbc571836, 0xdf569013, 0xf4ef876a,
-        0x893b2492,
+        0x893b2492, 0xd1af3445, 0x67b80b8a, 0x13ceeb42, 0xa439e8a2, 0x66507f32, 0xf413a007,
+        0x72d1c89e, 0xcdb152b6,
     ];
-    let y1 = [
-        0xd1af3445, 0x67b80b8a, 0x13ceeb42, 0xa439e8a2, 0x66507f32, 0xf413a007, 0x72d1c89e,
-        0xcdb152b6,
-    ];
-    let x2 = [
+    let p2 = [
         0xa0aae439, 0x4f4c07cf, 0x89c4c360, 0x6f99cc22, 0x11576d96, 0x536674ab, 0xaa9cf13d,
-        0x9d9c34cc,
-    ];
-    let y2 = [
-        0xb57534d9, 0xab01d669, 0xa1e9a5a7, 0xcd33ec53, 0x421d0474, 0x399ae585, 0x40a53d7d,
-        0xd9c7c978,
+        0x9d9c34cc, 0xb57534d9, 0xab01d669, 0xa1e9a5a7, 0xcd33ec53, 0x421d0474, 0x399ae585,
+        0x40a53d7d, 0xd9c7c978,
     ];
-    let x3 = [
-        0x61dec56, 0xc3d986b6, 0x3fa64819, 0xa346dbb9, 0x5106c8a, 0xcf195055, 0x89c4b6c0, 0x4d83a5c,
+    let p3 = [
+        0x61dec56, 0xc3d986b6, 0x3fa64819, 0xa346dbb9, 0x5106c8a, 0xcf195055, 0x89c4b6c0,
+        0x4d83a5c, 0xde60d2c4, 0x28c617d, 0xbc06b39, 0x3ed8a31f, 0x85df1f33, 0xb8f6d515,
+        0x5a0b2233, 0xcf911e0f,
     ];
-    let y3 = [
-        0xde60d2c4, 0x28c617d, 0xbc06b39, 0x3ed8a31f, 0x85df1f33, 0xb8f6d515, 0x5a0b2233,
-        0xcf911e0f,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xf6e55dc8, 0x4b891216, 0xeaca0439, 0x6ff95ab6, 0xc0509442, 0xba84a440, 0x90c5ffb2,
-        0x44314047,
-    ];
-    let y1 = [
-        0xdbe323b3, 0x31d944ae, 0x9eaa2e50, 0xa66a29b7, 0x5642fed7, 0xfe99837f, 0xe65366f8,
-        0x96b0c142,
+        0x44314047, 0xdbe323b3, 0x31d944ae, 0x9eaa2e50, 0xa66a29b7, 0x5642fed7, 0xfe99837f,
+        0xe65366f8, 0x96b0c142,
     ];
-    let x2 = [
-        0x61dec56, 0xc3d986b6, 0x3fa64819, 0xa346dbb9, 0x5106c8a, 0xcf195055, 0x89c4b6c0, 0x4d83a5c,
+    let p2 = [
+        0x61dec56, 0xc3d986b6, 0x3fa64819, 0xa346dbb9, 0x5106c8a, 0xcf195055, 0x89c4b6c0,
+        0x4d83a5c, 0xde60d2c4, 0x28c617d, 0xbc06b39, 0x3ed8a31f, 0x85df1f33, 0xb8f6d515,
+        0x5a0b2233, 0xcf911e0f,
     ];
-    let y2 = [
-        0xde60d2c4, 0x28c617d, 0xbc06b39, 0x3ed8a31f, 0x85df1f33, 0xb8f6d515, 0x5a0b2233,
-        0xcf911e0f,
-    ];
-    let x3 = [
+    let p3 = [
         0x11b05bca, 0x2a7f5d14, 0x3949a197, 0xb9db55f0, 0xb966688, 0x13962410, 0xbbc05a67,
-        0x6ed24224,
-    ];
-    let y3 = [
-        0x8e241ae8, 0x38555bc9, 0xdc0cc527, 0x55bb4406, 0xd4c00fb5, 0x447a0f71, 0xf36480ac,
-        0x5160ba0,
+        0x6ed24224, 0x8e241ae8, 0x38555bc9, 0xdc0cc527, 0x55bb4406, 0xd4c00fb5, 0x447a0f71,
+        0xf36480ac, 0x5160ba0,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x33f0e9aa, 0x3eb5e196, 0xb11bd34b, 0x68112776, 0xd58138d2, 0xb7924ae0, 0x575f26ad,
-        0xe5380fe8,
+        0xe5380fe8, 0x4082720f, 0xc4ba4136, 0xf468318e, 0x6fb94e5d, 0x924c8e01, 0x5b691363,
+        0x9087b41d, 0xb97fd873,
     ];
-    let y1 = [
-        0x4082720f, 0xc4ba4136, 0xf468318e, 0x6fb94e5d, 0x924c8e01, 0x5b691363, 0x9087b41d,
-        0xb97fd873,
-    ];
-    let x2 = [
+    let p2 = [
         0x11b05bca, 0x2a7f5d14, 0x3949a197, 0xb9db55f0, 0xb966688, 0x13962410, 0xbbc05a67,
-        0x6ed24224,
-    ];
-    let y2 = [
-        0x8e241ae8, 0x38555bc9, 0xdc0cc527, 0x55bb4406, 0xd4c00fb5, 0x447a0f71, 0xf36480ac,
-        0x5160ba0,
+        0x6ed24224, 0x8e241ae8, 0x38555bc9, 0xdc0cc527, 0x55bb4406, 0xd4c00fb5, 0x447a0f71,
+        0xf36480ac, 0x5160ba0,
     ];
-    let x3 = [
+    let p3 = [
         0x6522cef6, 0xf3e9afc9, 0x3f587dda, 0xac84eab3, 0xa8c2e75d, 0x14466cd4, 0x17b57b58,
-        0x6d663b77,
+        0x6d663b77, 0x4564c5a4, 0x827c89b6, 0xe66d0671, 0xff99027c, 0xa69fd33b, 0x4325889a,
+        0xa57f3c05, 0xd69cf941,
     ];
-    let y3 = [
-        0x4564c5a4, 0x827c89b6, 0xe66d0671, 0xff99027c, 0xa69fd33b, 0x4325889a, 0xa57f3c05,
-        0xd69cf941,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xeebc61d6, 0x1aed361b, 0xd9ff42de, 0x8a8fd3a7, 0x5d6b1f51, 0xc395f0d1, 0xa3ed9af0,
-        0x939ff3e4,
-    ];
-    let y1 = [
-        0xa3f5cb70, 0xe75ea466, 0xb78c7f82, 0x980bf26e, 0xef016c04, 0x9d46fc4e, 0x8b7a90e,
-        0xdeab3bcf,
+        0x939ff3e4, 0xa3f5cb70, 0xe75ea466, 0xb78c7f82, 0x980bf26e, 0xef016c04, 0x9d46fc4e,
+        0x8b7a90e, 0xdeab3bcf,
     ];
-    let x2 = [
+    let p2 = [
         0x6522cef6, 0xf3e9afc9, 0x3f587dda, 0xac84eab3, 0xa8c2e75d, 0x14466cd4, 0x17b57b58,
-        0x6d663b77,
+        0x6d663b77, 0x4564c5a4, 0x827c89b6, 0xe66d0671, 0xff99027c, 0xa69fd33b, 0x4325889a,
+        0xa57f3c05, 0xd69cf941,
     ];
-    let y2 = [
-        0x4564c5a4, 0x827c89b6, 0xe66d0671, 0xff99027c, 0xa69fd33b, 0x4325889a, 0xa57f3c05,
-        0xd69cf941,
+    let p3 = [
+        0xc75826ca, 0x64c7ca6, 0xc829e086, 0xdc2329c, 0xff69f2ec, 0xa840f259, 0x40689eac,
+        0xf80118d, 0x49b394f1, 0x49420a27, 0xaecd9f53, 0xc5f848b0, 0x8658a660, 0x1e5fa185,
+        0x9fd2f732, 0xb518b863,
     ];
-    let x3 = [
-        0xc75826ca, 0x64c7ca6, 0xc829e086, 0xdc2329c, 0xff69f2ec, 0xa840f259, 0x40689eac, 0xf80118d,
-    ];
-    let y3 = [
-        0x49b394f1, 0x49420a27, 0xaecd9f53, 0xc5f848b0, 0x8658a660, 0x1e5fa185, 0x9fd2f732,
-        0xb518b863,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xc497e0df, 0x16e134d, 0xecf76f53, 0x4c3bb436, 0xfe6029a0, 0x7858785, 0xae383293,
-        0xfdc63e52,
-    ];
-    let y1 = [
-        0xdb9eb19f, 0xf0604449, 0xbf35d9d5, 0x7bbeb22f, 0x8ae2e8b8, 0xe3df7142, 0xacebbb52,
-        0x292dad67,
-    ];
-    let x2 = [
-        0xc75826ca, 0x64c7ca6, 0xc829e086, 0xdc2329c, 0xff69f2ec, 0xa840f259, 0x40689eac, 0xf80118d,
+        0xfdc63e52, 0xdb9eb19f, 0xf0604449, 0xbf35d9d5, 0x7bbeb22f, 0x8ae2e8b8, 0xe3df7142,
+        0xacebbb52, 0x292dad67,
     ];
-    let y2 = [
-        0x49b394f1, 0x49420a27, 0xaecd9f53, 0xc5f848b0, 0x8658a660, 0x1e5fa185, 0x9fd2f732,
-        0xb518b863,
+    let p2 = [
+        0xc75826ca, 0x64c7ca6, 0xc829e086, 0xdc2329c, 0xff69f2ec, 0xa840f259, 0x40689eac,
+        0xf80118d, 0x49b394f1, 0x49420a27, 0xaecd9f53, 0xc5f848b0, 0x8658a660, 0x1e5fa185,
+        0x9fd2f732, 0xb518b863,
     ];
-    let x3 = [
+    let p3 = [
         0x5d52bd3b, 0xd260a04e, 0xe527dc75, 0x41a7866d, 0xba1eb327, 0x1cc02fa9, 0xf290ba01,
-        0x1d931895,
+        0x1d931895, 0x54a028f0, 0x8a9692c7, 0xdd90d86a, 0xd007d5eb, 0x8ef4581a, 0xcb5c9f55,
+        0xfd528566, 0x17e3d9dd,
     ];
-    let y3 = [
-        0x54a028f0, 0x8a9692c7, 0xdd90d86a, 0xd007d5eb, 0x8ef4581a, 0xcb5c9f55, 0xfd528566,
-        0x17e3d9dd,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xf55812dd, 0xa0a2a582, 0x552d30e2, 0x3d446723, 0xc058f78e, 0xb6abed6, 0x92ff352f,
-        0x7029bd7a,
-    ];
-    let y1 = [
-        0x1a2d2927, 0x721cc66b, 0x43b2c73c, 0x47dae842, 0xe30683ac, 0x7dd6544a, 0xfde8b3d2,
-        0xb0eefada,
+        0x7029bd7a, 0x1a2d2927, 0x721cc66b, 0x43b2c73c, 0x47dae842, 0xe30683ac, 0x7dd6544a,
+        0xfde8b3d2, 0xb0eefada,
     ];
-    let x2 = [
+    let p2 = [
         0x5d52bd3b, 0xd260a04e, 0xe527dc75, 0x41a7866d, 0xba1eb327, 0x1cc02fa9, 0xf290ba01,
-        0x1d931895,
-    ];
-    let y2 = [
-        0x54a028f0, 0x8a9692c7, 0xdd90d86a, 0xd007d5eb, 0x8ef4581a, 0xcb5c9f55, 0xfd528566,
-        0x17e3d9dd,
+        0x1d931895, 0x54a028f0, 0x8a9692c7, 0xdd90d86a, 0xd007d5eb, 0x8ef4581a, 0xcb5c9f55,
+        0xfd528566, 0x17e3d9dd,
     ];
-    let x3 = [
+    let p3 = [
         0xb054c95a, 0xbcd8f87c, 0xb25e5d80, 0x459c9fcc, 0x1674b8f, 0x516609ce, 0x7cc748c,
-        0x2f6c48fd,
+        0x2f6c48fd, 0x9bef73d9, 0x89dc0133, 0x19da7c6, 0xd2d4e81d, 0xada47bd1, 0xc91bfe10,
+        0x4a947582, 0x6633d51b,
     ];
-    let y3 = [
-        0x9bef73d9, 0x89dc0133, 0x19da7c6, 0xd2d4e81d, 0xada47bd1, 0xc91bfe10, 0x4a947582,
-        0x6633d51b,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb181fdc2, 0xdcdabff9, 0x5cc62364, 0xdd2f62bb, 0x18a34e7e, 0x4aa264b8, 0xf47e6e47,
-        0xf42c102a,
-    ];
-    let y1 = [
-        0xa485d7fd, 0x81f00093, 0x9a2acf26, 0x4c15502d, 0xb86fe22a, 0x78fad05c, 0x6cfe806c,
-        0x57503ab4,
+        0xf42c102a, 0xa485d7fd, 0x81f00093, 0x9a2acf26, 0x4c15502d, 0xb86fe22a, 0x78fad05c,
+        0x6cfe806c, 0x57503ab4,
     ];
-    let x2 = [
+    let p2 = [
         0xb054c95a, 0xbcd8f87c, 0xb25e5d80, 0x459c9fcc, 0x1674b8f, 0x516609ce, 0x7cc748c,
-        0x2f6c48fd,
+        0x2f6c48fd, 0x9bef73d9, 0x89dc0133, 0x19da7c6, 0xd2d4e81d, 0xada47bd1, 0xc91bfe10,
+        0x4a947582, 0x6633d51b,
     ];
-    let y2 = [
-        0x9bef73d9, 0x89dc0133, 0x19da7c6, 0xd2d4e81d, 0xada47bd1, 0xc91bfe10, 0x4a947582,
-        0x6633d51b,
-    ];
-    let x3 = [
+    let p3 = [
         0x7cf01352, 0x3fca8944, 0xc3b91a98, 0x47e08381, 0xc5409be0, 0x48662b2e, 0x51b3b0dd,
-        0x331795e5,
-    ];
-    let y3 = [
-        0x87069ca1, 0xb6fba74, 0xa10b410a, 0x866f98f, 0x5f2a5ed2, 0xa0afcbbe, 0x3f7674bd,
-        0x515daa7f,
+        0x331795e5, 0x87069ca1, 0xb6fba74, 0xa10b410a, 0x866f98f, 0x5f2a5ed2, 0xa0afcbbe,
+        0x3f7674bd, 0x515daa7f,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xeedd7dd6, 0x3866d47d, 0x65e1968c, 0x49376fe2, 0xee7cfdec, 0xca5a7840, 0x24c7524b,
-        0x32cfcf6a,
+        0x32cfcf6a, 0xfe08e330, 0x25fd44ae, 0x349a08b, 0x7a0d8cd2, 0x409f561e, 0x6208096a,
+        0x976a7748, 0x21846a34,
     ];
-    let y1 = [
-        0xfe08e330, 0x25fd44ae, 0x349a08b, 0x7a0d8cd2, 0x409f561e, 0x6208096a, 0x976a7748,
-        0x21846a34,
-    ];
-    let x2 = [
+    let p2 = [
         0x7cf01352, 0x3fca8944, 0xc3b91a98, 0x47e08381, 0xc5409be0, 0x48662b2e, 0x51b3b0dd,
-        0x331795e5,
-    ];
-    let y2 = [
-        0x87069ca1, 0xb6fba74, 0xa10b410a, 0x866f98f, 0x5f2a5ed2, 0xa0afcbbe, 0x3f7674bd,
-        0x515daa7f,
+        0x331795e5, 0x87069ca1, 0xb6fba74, 0xa10b410a, 0x866f98f, 0x5f2a5ed2, 0xa0afcbbe,
+        0x3f7674bd, 0x515daa7f,
     ];
-    let x3 = [
-        0x472a1fd3, 0x3020c90, 0x8e1aca3, 0xc31a79e1, 0x633ce07, 0x73ea1256, 0xef361199, 0x63f74113,
+    let p3 = [
+        0x472a1fd3, 0x3020c90, 0x8e1aca3, 0xc31a79e1, 0x633ce07, 0x73ea1256, 0xef361199,
+        0x63f74113, 0x48050037, 0x1b081bdc, 0x8371f934, 0xd3405d6b, 0x3b8c2882, 0xdf0fd90a,
+        0x730bdfce, 0x83b14db6,
     ];
-    let y3 = [
-        0x48050037, 0x1b081bdc, 0x8371f934, 0xd3405d6b, 0x3b8c2882, 0xdf0fd90a, 0x730bdfce,
-        0x83b14db6,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x21231d11, 0xce674831, 0x3c2aaad7, 0x22ab36c6, 0xc777c398, 0x33d1155c, 0x8b9388e4,
-        0x3514d41e,
-    ];
-    let y1 = [
-        0xe3855df5, 0x53d6fb40, 0xaf79ebe, 0x9384f31d, 0x56839eff, 0xef44d11e, 0x16017eb8,
-        0x89a83250,
+        0x3514d41e, 0xe3855df5, 0x53d6fb40, 0xaf79ebe, 0x9384f31d, 0x56839eff, 0xef44d11e,
+        0x16017eb8, 0x89a83250,
     ];
-    let x2 = [
-        0x472a1fd3, 0x3020c90, 0x8e1aca3, 0xc31a79e1, 0x633ce07, 0x73ea1256, 0xef361199, 0x63f74113,
+    let p2 = [
+        0x472a1fd3, 0x3020c90, 0x8e1aca3, 0xc31a79e1, 0x633ce07, 0x73ea1256, 0xef361199,
+        0x63f74113, 0x48050037, 0x1b081bdc, 0x8371f934, 0xd3405d6b, 0x3b8c2882, 0xdf0fd90a,
+        0x730bdfce, 0x83b14db6,
     ];
-    let y2 = [
-        0x48050037, 0x1b081bdc, 0x8371f934, 0xd3405d6b, 0x3b8c2882, 0xdf0fd90a, 0x730bdfce,
-        0x83b14db6,
-    ];
-    let x3 = [
+    let p3 = [
         0xa38fb89e, 0x72d6446a, 0xe51dd73e, 0x15410f1, 0xb5a38c2f, 0x99eade1f, 0x3f79e108,
-        0xfb202e0f,
-    ];
-    let y3 = [
-        0xcde0a70a, 0x5999a198, 0xb48c34b6, 0x343ea4ea, 0xcdb51927, 0x89aafe43, 0x4580257c,
-        0x5f924734,
+        0xfb202e0f, 0xcde0a70a, 0x5999a198, 0xb48c34b6, 0x343ea4ea, 0xcdb51927, 0x89aafe43,
+        0x4580257c, 0x5f924734,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x80633cb1, 0x2567e09e, 0x69d02113, 0x575a224b, 0x12181fcb, 0xc62732, 0x17aacad4,
-        0x6dde9cf3,
+        0x6dde9cf3, 0x67ce6b34, 0x57dd49aa, 0xcf859ef3, 0x80b27fda, 0xa1ba66a8, 0x5c99ef86,
+        0xa707e41d, 0x9188fbe7,
     ];
-    let y1 = [
-        0x67ce6b34, 0x57dd49aa, 0xcf859ef3, 0x80b27fda, 0xa1ba66a8, 0x5c99ef86, 0xa707e41d,
-        0x9188fbe7,
-    ];
-    let x2 = [
+    let p2 = [
         0xa38fb89e, 0x72d6446a, 0xe51dd73e, 0x15410f1, 0xb5a38c2f, 0x99eade1f, 0x3f79e108,
-        0xfb202e0f,
-    ];
-    let y2 = [
-        0xcde0a70a, 0x5999a198, 0xb48c34b6, 0x343ea4ea, 0xcdb51927, 0x89aafe43, 0x4580257c,
-        0x5f924734,
+        0xfb202e0f, 0xcde0a70a, 0x5999a198, 0xb48c34b6, 0x343ea4ea, 0xcdb51927, 0x89aafe43,
+        0x4580257c, 0x5f924734,
     ];
-    let x3 = [
+    let p3 = [
         0x4e1fdadb, 0x30998607, 0x50dd64ce, 0xfdf652c1, 0xbc4a599, 0x122bc1a, 0xf1769ea5,
-        0x51a4c0ac,
+        0x51a4c0ac, 0x521044ef, 0xadff4507, 0xfb522d06, 0x6f255923, 0x8cba892a, 0xe3517e53,
+        0x8c01d153, 0x77c8d346,
     ];
-    let y3 = [
-        0x521044ef, 0xadff4507, 0xfb522d06, 0x6f255923, 0x8cba892a, 0xe3517e53, 0x8c01d153,
-        0x77c8d346,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x44e5467d, 0x4d0bd76a, 0x19bbface, 0x40908ab8, 0xec970e9, 0x2c21f62e, 0xfc69a122,
-        0x97d064f0,
-    ];
-    let y1 = [
-        0x1e9cb3fa, 0x797300fd, 0x54f17ccd, 0xda5fb3b8, 0xa850861f, 0x3f7c66f, 0xd33402cc,
-        0x89974f2e,
+        0x97d064f0, 0x1e9cb3fa, 0x797300fd, 0x54f17ccd, 0xda5fb3b8, 0xa850861f, 0x3f7c66f,
+        0xd33402cc, 0x89974f2e,
     ];
-    let x2 = [
+    let p2 = [
         0x4e1fdadb, 0x30998607, 0x50dd64ce, 0xfdf652c1, 0xbc4a599, 0x122bc1a, 0xf1769ea5,
-        0x51a4c0ac,
+        0x51a4c0ac, 0x521044ef, 0xadff4507, 0xfb522d06, 0x6f255923, 0x8cba892a, 0xe3517e53,
+        0x8c01d153, 0x77c8d346,
     ];
-    let y2 = [
-        0x521044ef, 0xadff4507, 0xfb522d06, 0x6f255923, 0x8cba892a, 0xe3517e53, 0x8c01d153,
-        0x77c8d346,
-    ];
-    let x3 = [
+    let p3 = [
         0x926341d9, 0xffcca36d, 0x20156334, 0x47b40a76, 0xe290d534, 0x19612e4a, 0xbb0b867,
-        0xbca7069b,
-    ];
-    let y3 = [
-        0xbb0b77b, 0xc0d93775, 0x6a1fc9b3, 0x6204875d, 0x70b64966, 0x982754ac, 0x1566660,
-        0x5a7df87d,
+        0xbca7069b, 0xbb0b77b, 0xc0d93775, 0x6a1fc9b3, 0x6204875d, 0x70b64966, 0x982754ac,
+        0x1566660, 0x5a7df87d,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x13613bec, 0xcca81cb9, 0x101cfe67, 0x8bb5fc9d, 0xc74f972a, 0xedf1b33d, 0xc93937bd,
-        0x2dcfcab8,
+        0x2dcfcab8, 0x9a039215, 0x3e730924, 0xd33f5f38, 0x3732cfba, 0xd6f6c6f4, 0x65f088b7,
+        0x9474a412, 0x46dbc4dd,
     ];
-    let y1 = [
-        0x9a039215, 0x3e730924, 0xd33f5f38, 0x3732cfba, 0xd6f6c6f4, 0x65f088b7, 0x9474a412,
-        0x46dbc4dd,
-    ];
-    let x2 = [
+    let p2 = [
         0x926341d9, 0xffcca36d, 0x20156334, 0x47b40a76, 0xe290d534, 0x19612e4a, 0xbb0b867,
-        0xbca7069b,
-    ];
-    let y2 = [
-        0xbb0b77b, 0xc0d93775, 0x6a1fc9b3, 0x6204875d, 0x70b64966, 0x982754ac, 0x1566660,
-        0x5a7df87d,
+        0xbca7069b, 0xbb0b77b, 0xc0d93775, 0x6a1fc9b3, 0x6204875d, 0x70b64966, 0x982754ac,
+        0x1566660, 0x5a7df87d,
     ];
-    let x3 = [
+    let p3 = [
         0x2bbfca49, 0x712b0408, 0xd867f353, 0x62f95f3d, 0x41a92b20, 0xd1ea0c49, 0x7ac6f3aa,
-        0xdeb21645,
+        0xdeb21645, 0xe16061a8, 0xcc4c43b3, 0xf6aa30a4, 0xb75e791b, 0x870bf702, 0x2ff3ef86,
+        0x5c58abfc, 0xe07e6c7e,
     ];
-    let y3 = [
-        0xe16061a8, 0xcc4c43b3, 0xf6aa30a4, 0xb75e791b, 0x870bf702, 0x2ff3ef86, 0x5c58abfc,
-        0xe07e6c7e,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x47fb9e1a, 0x17cd1708, 0xde2a3296, 0x7fe74b74, 0xbbab0e76, 0xf1a02bc9, 0xa48ec5a8,
-        0x1bec414a,
-    ];
-    let y1 = [
-        0x749c0443, 0x57f6e117, 0xe8c9796e, 0x681385da, 0x30c54b0f, 0x8a79bc57, 0x70126667,
-        0xe3586704,
+        0x1bec414a, 0x749c0443, 0x57f6e117, 0xe8c9796e, 0x681385da, 0x30c54b0f, 0x8a79bc57,
+        0x70126667, 0xe3586704,
     ];
-    let x2 = [
+    let p2 = [
         0x2bbfca49, 0x712b0408, 0xd867f353, 0x62f95f3d, 0x41a92b20, 0xd1ea0c49, 0x7ac6f3aa,
-        0xdeb21645,
+        0xdeb21645, 0xe16061a8, 0xcc4c43b3, 0xf6aa30a4, 0xb75e791b, 0x870bf702, 0x2ff3ef86,
+        0x5c58abfc, 0xe07e6c7e,
     ];
-    let y2 = [
-        0xe16061a8, 0xcc4c43b3, 0xf6aa30a4, 0xb75e791b, 0x870bf702, 0x2ff3ef86, 0x5c58abfc,
-        0xe07e6c7e,
-    ];
-    let x3 = [
+    let p3 = [
         0x58a42d36, 0x8d906bf7, 0xb8d381b8, 0xd367e918, 0x4606faae, 0x17469bda, 0xdc7a255b,
-        0xeb6e3277,
-    ];
-    let y3 = [
-        0x993cc451, 0x56a4320a, 0x892949a2, 0x7abe2059, 0xa0c8d1ff, 0xe30a6394, 0x80e9e03d,
-        0x615fa4f7,
+        0xeb6e3277, 0x993cc451, 0x56a4320a, 0x892949a2, 0x7abe2059, 0xa0c8d1ff, 0xe30a6394,
+        0x80e9e03d, 0x615fa4f7,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xbb7ceceb, 0xf3f678ff, 0x8897faf0, 0x73a59f93, 0x6f6e6814, 0x36ffb812, 0x4276d450,
-        0x437a8620,
+        0x437a8620, 0x56c181e1, 0x7363bcc3, 0xdc8f9782, 0x87220fcf, 0x99d297ff, 0x69b8feb6,
+        0x3eeac32f, 0xb916ba1,
     ];
-    let y1 = [
-        0x56c181e1, 0x7363bcc3, 0xdc8f9782, 0x87220fcf, 0x99d297ff, 0x69b8feb6, 0x3eeac32f,
-        0xb916ba1,
-    ];
-    let x2 = [
+    let p2 = [
         0x58a42d36, 0x8d906bf7, 0xb8d381b8, 0xd367e918, 0x4606faae, 0x17469bda, 0xdc7a255b,
-        0xeb6e3277,
-    ];
-    let y2 = [
-        0x993cc451, 0x56a4320a, 0x892949a2, 0x7abe2059, 0xa0c8d1ff, 0xe30a6394, 0x80e9e03d,
-        0x615fa4f7,
+        0xeb6e3277, 0x993cc451, 0x56a4320a, 0x892949a2, 0x7abe2059, 0xa0c8d1ff, 0xe30a6394,
+        0x80e9e03d, 0x615fa4f7,
     ];
-    let x3 = [
+    let p3 = [
         0xc1a7674e, 0xc0497aa4, 0x27b9af61, 0x813645a4, 0xc1c691a3, 0x3be4aee1, 0xa5a4164c,
-        0x955e83c9,
+        0x955e83c9, 0xd6ae5ea4, 0x71ae0ae5, 0xcc9834ac, 0xbe2ecf82, 0x5ef04324, 0x753b98ff,
+        0x93b1d494, 0xc87400d2,
     ];
-    let y3 = [
-        0xd6ae5ea4, 0x71ae0ae5, 0xcc9834ac, 0xbe2ecf82, 0x5ef04324, 0x753b98ff, 0x93b1d494,
-        0xc87400d2,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xdcbf00eb, 0x4c9d9d87, 0xc18d0227, 0x41b4e98b, 0xa1a30bc2, 0x49be16f6, 0x96ead4dc,
-        0xb89070ae,
-    ];
-    let y1 = [
-        0x1b0e664e, 0x1b7f1bcd, 0xb6b96a67, 0xcb0d8b06, 0xc1c4a766, 0x472294e4, 0xc8a2d88f,
-        0x6f24c8c2,
+        0xb89070ae, 0x1b0e664e, 0x1b7f1bcd, 0xb6b96a67, 0xcb0d8b06, 0xc1c4a766, 0x472294e4,
+        0xc8a2d88f, 0x6f24c8c2,
     ];
-    let x2 = [
+    let p2 = [
         0xc1a7674e, 0xc0497aa4, 0x27b9af61, 0x813645a4, 0xc1c691a3, 0x3be4aee1, 0xa5a4164c,
-        0x955e83c9,
+        0x955e83c9, 0xd6ae5ea4, 0x71ae0ae5, 0xcc9834ac, 0xbe2ecf82, 0x5ef04324, 0x753b98ff,
+        0x93b1d494, 0xc87400d2,
     ];
-    let y2 = [
-        0xd6ae5ea4, 0x71ae0ae5, 0xcc9834ac, 0xbe2ecf82, 0x5ef04324, 0x753b98ff, 0x93b1d494,
-        0xc87400d2,
-    ];
-    let x3 = [
+    let p3 = [
         0x47f62b82, 0x47e3fb06, 0xc2414cdc, 0x4d64416a, 0x9b263f25, 0x122b078f, 0xc89072f7,
-        0xcd9aa0b,
-    ];
-    let y3 = [
-        0x14ca295f, 0x30910574, 0x19149bd4, 0x25b0ad0, 0xd8b34884, 0x9a61a4e0, 0xb15c4d94,
-        0xc510e04c,
+        0xcd9aa0b, 0x14ca295f, 0x30910574, 0x19149bd4, 0x25b0ad0, 0xd8b34884, 0x9a61a4e0,
+        0xb15c4d94, 0xc510e04c,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb6fbe7b2, 0xb9d6ff9a, 0x458d65a3, 0x5eadedc1, 0xb2a88460, 0xf336bbb1, 0x9cb441f8,
-        0x26488766,
+        0x26488766, 0x21bc2a34, 0x932a78bc, 0x6a0eb603, 0x5638d981, 0xd02ddf18, 0x8f2f2dca,
+        0xb2014498, 0x9e15dab4,
     ];
-    let y1 = [
-        0x21bc2a34, 0x932a78bc, 0x6a0eb603, 0x5638d981, 0xd02ddf18, 0x8f2f2dca, 0xb2014498,
-        0x9e15dab4,
-    ];
-    let x2 = [
+    let p2 = [
         0x47f62b82, 0x47e3fb06, 0xc2414cdc, 0x4d64416a, 0x9b263f25, 0x122b078f, 0xc89072f7,
-        0xcd9aa0b,
-    ];
-    let y2 = [
-        0x14ca295f, 0x30910574, 0x19149bd4, 0x25b0ad0, 0xd8b34884, 0x9a61a4e0, 0xb15c4d94,
-        0xc510e04c,
+        0xcd9aa0b, 0x14ca295f, 0x30910574, 0x19149bd4, 0x25b0ad0, 0xd8b34884, 0x9a61a4e0,
+        0xb15c4d94, 0xc510e04c,
     ];
-    let x3 = [
+    let p3 = [
         0xb0263efb, 0xcc80645c, 0x7316d99, 0x4d5abacd, 0xb542ffec, 0xc03d886c, 0x5bc3b48b,
-        0xb9ad22c1,
-    ];
-    let y3 = [
-        0xc37062b9, 0x474ad42a, 0x3e498d1a, 0x109e16c7, 0x9444c97b, 0x49968c1c, 0xbb4f8cca,
-        0xeb85392,
+        0xb9ad22c1, 0xc37062b9, 0x474ad42a, 0x3e498d1a, 0x109e16c7, 0x9444c97b, 0x49968c1c,
+        0xbb4f8cca, 0xeb85392,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x2b038315, 0x9690d306, 0x69310e6f, 0x9cacc433, 0x9794b862, 0x1e4680e3, 0x56771222,
-        0xaba55687,
+        0xaba55687, 0xae25fc0a, 0xf9a003f9, 0xd8b63338, 0x3fbfb532, 0x25130d6f, 0x63d570f6,
+        0xaa365edb, 0xa0e75d87,
     ];
-    let y1 = [
-        0xae25fc0a, 0xf9a003f9, 0xd8b63338, 0x3fbfb532, 0x25130d6f, 0x63d570f6, 0xaa365edb,
-        0xa0e75d87,
-    ];
-    let x2 = [
+    let p2 = [
         0xb0263efb, 0xcc80645c, 0x7316d99, 0x4d5abacd, 0xb542ffec, 0xc03d886c, 0x5bc3b48b,
-        0xb9ad22c1,
-    ];
-    let y2 = [
-        0xc37062b9, 0x474ad42a, 0x3e498d1a, 0x109e16c7, 0x9444c97b, 0x49968c1c, 0xbb4f8cca,
-        0xeb85392,
+        0xb9ad22c1, 0xc37062b9, 0x474ad42a, 0x3e498d1a, 0x109e16c7, 0x9444c97b, 0x49968c1c,
+        0xbb4f8cca, 0xeb85392,
     ];
-    let x3 = [
+    let p3 = [
         0x16e97240, 0xb56720fb, 0x25349558, 0x15ab0093, 0xf2363793, 0x1f147d1a, 0x76551f7,
-        0x81e874bb,
+        0x81e874bb, 0x1ceb8018, 0x3937014e, 0x6c600999, 0xaf1ce7cf, 0xfaeb8246, 0x2032d276,
+        0x4664bb0, 0xc2791f04,
     ];
-    let y3 = [
-        0x1ceb8018, 0x3937014e, 0x6c600999, 0xaf1ce7cf, 0xfaeb8246, 0x2032d276, 0x4664bb0,
-        0xc2791f04,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x95bc15b4, 0x9cb9a134, 0x465a2ee6, 0x9275028e, 0xced7ca8d, 0xed858ee9, 0x51eeadc9,
-        0x10e90e2e,
-    ];
-    let y1 = [
-        0x58aa258d, 0x34ebe609, 0x2bb6a88, 0x4ca58963, 0x16ad1f75, 0x4d57a8c6, 0x80d5e042,
-        0xc68a3703,
+        0x10e90e2e, 0x58aa258d, 0x34ebe609, 0x2bb6a88, 0x4ca58963, 0x16ad1f75, 0x4d57a8c6,
+        0x80d5e042, 0xc68a3703,
     ];
-    let x2 = [
+    let p2 = [
         0x16e97240, 0xb56720fb, 0x25349558, 0x15ab0093, 0xf2363793, 0x1f147d1a, 0x76551f7,
-        0x81e874bb,
+        0x81e874bb, 0x1ceb8018, 0x3937014e, 0x6c600999, 0xaf1ce7cf, 0xfaeb8246, 0x2032d276,
+        0x4664bb0, 0xc2791f04,
     ];
-    let y2 = [
-        0x1ceb8018, 0x3937014e, 0x6c600999, 0xaf1ce7cf, 0xfaeb8246, 0x2032d276, 0x4664bb0,
-        0xc2791f04,
-    ];
-    let x3 = [
+    let p3 = [
         0xf94b2dfb, 0x8add44b4, 0x33978c7a, 0x5d5d4dd3, 0xd75d0b54, 0x61ca58e9, 0x97c539fd,
-        0xe0a6cdb7,
-    ];
-    let y3 = [
-        0xed63d567, 0x7446491b, 0xe5c5e6d3, 0x8055cb06, 0xd0165eb0, 0xae321a97, 0x2dc8eb3f,
-        0x8d1484c4,
+        0xe0a6cdb7, 0xed63d567, 0x7446491b, 0xe5c5e6d3, 0x8055cb06, 0xd0165eb0, 0xae321a97,
+        0x2dc8eb3f, 0x8d1484c4,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
-        0x7a1c0a80, 0xf62abc8, 0xc65a9c74, 0x4d625158, 0x2ff9c3, 0xb17c9be7, 0xa614cca5, 0xb6b15a68,
+    let p1 = [
+        0x7a1c0a80, 0xf62abc8, 0xc65a9c74, 0x4d625158, 0x2ff9c3, 0xb17c9be7, 0xa614cca5,
+        0xb6b15a68, 0x41ce0a03, 0xb6cd0110, 0x82e16ee, 0x9c9a12b3, 0xef6536d4, 0xa54e223e,
+        0xd6cdb61e, 0xfae62e14,
     ];
-    let y1 = [
-        0x41ce0a03, 0xb6cd0110, 0x82e16ee, 0x9c9a12b3, 0xef6536d4, 0xa54e223e, 0xd6cdb61e,
-        0xfae62e14,
-    ];
-    let x2 = [
+    let p2 = [
         0xf94b2dfb, 0x8add44b4, 0x33978c7a, 0x5d5d4dd3, 0xd75d0b54, 0x61ca58e9, 0x97c539fd,
-        0xe0a6cdb7,
-    ];
-    let y2 = [
-        0xed63d567, 0x7446491b, 0xe5c5e6d3, 0x8055cb06, 0xd0165eb0, 0xae321a97, 0x2dc8eb3f,
-        0x8d1484c4,
+        0xe0a6cdb7, 0xed63d567, 0x7446491b, 0xe5c5e6d3, 0x8055cb06, 0xd0165eb0, 0xae321a97,
+        0x2dc8eb3f, 0x8d1484c4,
     ];
-    let x3 = [
+    let p3 = [
         0xe6409a8e, 0xa14dcacb, 0x2639e842, 0xece0189b, 0x1414cac6, 0x3979374e, 0xed7d382d,
-        0x1e3faaf7,
+        0x1e3faaf7, 0xae6d0176, 0x8c62c805, 0x97c1e2e6, 0xa6e353ea, 0x70d1f1d5, 0xbd387a7,
+        0x7aa6fb10, 0x2eff9414,
     ];
-    let y3 = [
-        0xae6d0176, 0x8c62c805, 0x97c1e2e6, 0xa6e353ea, 0x70d1f1d5, 0xbd387a7, 0x7aa6fb10,
-        0x2eff9414,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x92b062d4, 0xa7caa50a, 0x9bb6a141, 0x7a5ce7e5, 0x83ea227a, 0x6fb1712, 0x3256eaca,
-        0x35963ea4,
-    ];
-    let y1 = [
-        0xbbb25302, 0xa10aa4d1, 0x64de59b1, 0xd04082b9, 0xf9c08a96, 0xbfcce196, 0x4951e5c9,
-        0xf65be145,
+        0x35963ea4, 0xbbb25302, 0xa10aa4d1, 0x64de59b1, 0xd04082b9, 0xf9c08a96, 0xbfcce196,
+        0x4951e5c9, 0xf65be145,
     ];
-    let x2 = [
+    let p2 = [
         0xe6409a8e, 0xa14dcacb, 0x2639e842, 0xece0189b, 0x1414cac6, 0x3979374e, 0xed7d382d,
-        0x1e3faaf7,
+        0x1e3faaf7, 0xae6d0176, 0x8c62c805, 0x97c1e2e6, 0xa6e353ea, 0x70d1f1d5, 0xbd387a7,
+        0x7aa6fb10, 0x2eff9414,
     ];
-    let y2 = [
-        0xae6d0176, 0x8c62c805, 0x97c1e2e6, 0xa6e353ea, 0x70d1f1d5, 0xbd387a7, 0x7aa6fb10,
-        0x2eff9414,
-    ];
-    let x3 = [
+    let p3 = [
         0x2548781a, 0xe605a68, 0x1ceff047, 0xf0d2b94d, 0x45e90176, 0x3b3b64d5, 0x15169a11,
-        0x37e15dc4,
-    ];
-    let y3 = [
-        0x18c6306a, 0xea220b3a, 0x17dc3bc2, 0x1efec53b, 0xa03a580, 0xc329cc5b, 0x1d3d12f0,
-        0x4e6dfbf5,
+        0x37e15dc4, 0x18c6306a, 0xea220b3a, 0x17dc3bc2, 0x1efec53b, 0xa03a580, 0xc329cc5b,
+        0x1d3d12f0, 0x4e6dfbf5,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x1d33fd27, 0xfa0bf5c5, 0xb646cc62, 0x445f573d, 0xda82361b, 0xd022388e, 0x2263e84c,
-        0x9ed73f09,
+        0x9ed73f09, 0x2716c458, 0x5972b2de, 0xb2e44934, 0x94a823e5, 0x42467254, 0xee75b4f3,
+        0xebb1eeea, 0xb6318967,
     ];
-    let y1 = [
-        0x2716c458, 0x5972b2de, 0xb2e44934, 0x94a823e5, 0x42467254, 0xee75b4f3, 0xebb1eeea,
-        0xb6318967,
-    ];
-    let x2 = [
+    let p2 = [
         0x2548781a, 0xe605a68, 0x1ceff047, 0xf0d2b94d, 0x45e90176, 0x3b3b64d5, 0x15169a11,
-        0x37e15dc4,
-    ];
-    let y2 = [
-        0x18c6306a, 0xea220b3a, 0x17dc3bc2, 0x1efec53b, 0xa03a580, 0xc329cc5b, 0x1d3d12f0,
-        0x4e6dfbf5,
+        0x37e15dc4, 0x18c6306a, 0xea220b3a, 0x17dc3bc2, 0x1efec53b, 0xa03a580, 0xc329cc5b,
+        0x1d3d12f0, 0x4e6dfbf5,
     ];
-    let x3 = [
+    let p3 = [
         0xdae7b802, 0xecfda59, 0x8e518ce7, 0xe5ab4c5, 0x7cd74c07, 0x2c98b4ea, 0x216c600f,
-        0x328a2f7a,
-    ];
-    let y3 = [
-        0xd9b7d882, 0x86610a8e, 0x29924aee, 0x9754734c, 0x6e285105, 0x607959f5, 0x34212010,
-        0xb9d97615,
+        0x328a2f7a, 0xd9b7d882, 0x86610a8e, 0x29924aee, 0x9754734c, 0x6e285105, 0x607959f5,
+        0x34212010, 0xb9d97615,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xef028d83, 0x579623ae, 0xba743961, 0x6195926d, 0x15de69db, 0x6a5abe5a, 0xe3c785ec,
-        0xa7ebf7c4,
+        0xa7ebf7c4, 0x99d0bed1, 0x9640392b, 0x4b053919, 0x47a38927, 0x7044804b, 0xcfd9c737,
+        0xbfe362d5, 0x6205152f,
     ];
-    let y1 = [
-        0x99d0bed1, 0x9640392b, 0x4b053919, 0x47a38927, 0x7044804b, 0xcfd9c737, 0xbfe362d5,
-        0x6205152f,
-    ];
-    let x2 = [
+    let p2 = [
         0xdae7b802, 0xecfda59, 0x8e518ce7, 0xe5ab4c5, 0x7cd74c07, 0x2c98b4ea, 0x216c600f,
-        0x328a2f7a,
-    ];
-    let y2 = [
-        0xd9b7d882, 0x86610a8e, 0x29924aee, 0x9754734c, 0x6e285105, 0x607959f5, 0x34212010,
-        0xb9d97615,
+        0x328a2f7a, 0xd9b7d882, 0x86610a8e, 0x29924aee, 0x9754734c, 0x6e285105, 0x607959f5,
+        0x34212010, 0xb9d97615,
     ];
-    let x3 = [
+    let p3 = [
         0x7498cf74, 0xbb2d0fe6, 0x2d4edb8, 0x35fa8af, 0xfe19c40f, 0x1eef3c75, 0xac797f07,
-        0xad59a910,
+        0xad59a910, 0xffaadfdd, 0x84d70ced, 0xa3defb74, 0xdeee57b6, 0x5624675d, 0xa5225083,
+        0x9035d182, 0xd758026d,
     ];
-    let y3 = [
-        0xffaadfdd, 0x84d70ced, 0xa3defb74, 0xdeee57b6, 0x5624675d, 0xa5225083, 0x9035d182,
-        0xd758026d,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x7bb61ee5, 0xf2884413, 0xfb1f0c13, 0xda4f04e2, 0x8974ae6e, 0x662638cd, 0xcc8721b8,
-        0xd4933230,
-    ];
-    let y1 = [
-        0xe5d694a8, 0x662da4d0, 0x5a438ddc, 0x1ad12c8c, 0x1ecafb5e, 0xedcc5e9d, 0xf51a9d23,
-        0x21c09ab,
+        0xd4933230, 0xe5d694a8, 0x662da4d0, 0x5a438ddc, 0x1ad12c8c, 0x1ecafb5e, 0xedcc5e9d,
+        0xf51a9d23, 0x21c09ab,
     ];
-    let x2 = [
+    let p2 = [
         0x7498cf74, 0xbb2d0fe6, 0x2d4edb8, 0x35fa8af, 0xfe19c40f, 0x1eef3c75, 0xac797f07,
-        0xad59a910,
+        0xad59a910, 0xffaadfdd, 0x84d70ced, 0xa3defb74, 0xdeee57b6, 0x5624675d, 0xa5225083,
+        0x9035d182, 0xd758026d,
     ];
-    let y2 = [
-        0xffaadfdd, 0x84d70ced, 0xa3defb74, 0xdeee57b6, 0x5624675d, 0xa5225083, 0x9035d182,
-        0xd758026d,
-    ];
-    let x3 = [
+    let p3 = [
         0x39cf7518, 0x5eee87ed, 0x7312f938, 0x519c909, 0x17773d04, 0x3f3fe6bc, 0x94420f9a,
-        0x99966667,
-    ];
-    let y3 = [
-        0xbeb56284, 0x7cb2d8ef, 0xfc349071, 0xa97b338a, 0xdbaa9dc0, 0x23089a10, 0x76ecf29,
-        0x6986934a,
+        0x99966667, 0xbeb56284, 0x7cb2d8ef, 0xfc349071, 0xa97b338a, 0xdbaa9dc0, 0x23089a10,
+        0x76ecf29, 0x6986934a,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xec04554a, 0x530ddcbc, 0x4688cffe, 0xaadcffbb, 0x7a10a2ec, 0x474652c2, 0x9873d1a0,
-        0x896f37c8,
+        0x896f37c8, 0x929138df, 0xd68f9fe1, 0xacc417dc, 0xe6085b61, 0x4e811bf1, 0xda622bb0,
+        0x224ac4ac, 0x380423e7,
     ];
-    let y1 = [
-        0x929138df, 0xd68f9fe1, 0xacc417dc, 0xe6085b61, 0x4e811bf1, 0xda622bb0, 0x224ac4ac,
-        0x380423e7,
-    ];
-    let x2 = [
+    let p2 = [
         0x39cf7518, 0x5eee87ed, 0x7312f938, 0x519c909, 0x17773d04, 0x3f3fe6bc, 0x94420f9a,
-        0x99966667,
-    ];
-    let y2 = [
-        0xbeb56284, 0x7cb2d8ef, 0xfc349071, 0xa97b338a, 0xdbaa9dc0, 0x23089a10, 0x76ecf29,
-        0x6986934a,
+        0x99966667, 0xbeb56284, 0x7cb2d8ef, 0xfc349071, 0xa97b338a, 0xdbaa9dc0, 0x23089a10,
+        0x76ecf29, 0x6986934a,
     ];
-    let x3 = [
+    let p3 = [
         0xca77ca75, 0x6ffe52cb, 0x8f7ef3dd, 0x8b554eb8, 0x3b86af90, 0x387c6679, 0xc837de69,
-        0xc7ce6fa9,
+        0xc7ce6fa9, 0xd8ce2d85, 0x216c037b, 0x8f97bcee, 0xfb1f33db, 0xb4898bdf, 0x7eb02451,
+        0xc41130a7, 0x1d6668e2,
     ];
-    let y3 = [
-        0xd8ce2d85, 0x216c037b, 0x8f97bcee, 0xfb1f33db, 0xb4898bdf, 0x7eb02451, 0xc41130a7,
-        0x1d6668e2,
-    ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb43fca26, 0x84077d1a, 0xa3bc2367, 0x7dfb841d, 0xbf3578a2, 0xca6c209d, 0x774b6d6c,
-        0x11b3b97f,
-    ];
-    let y1 = [
-        0x5b679d58, 0xd3b27eaf, 0x4b9f9d42, 0x3bae231c, 0x2f36d3bb, 0x8cd5650c, 0xae600c50,
-        0x65331f9f,
+        0x11b3b97f, 0x5b679d58, 0xd3b27eaf, 0x4b9f9d42, 0x3bae231c, 0x2f36d3bb, 0x8cd5650c,
+        0xae600c50, 0x65331f9f,
     ];
-    let x2 = [
+    let p2 = [
         0xca77ca75, 0x6ffe52cb, 0x8f7ef3dd, 0x8b554eb8, 0x3b86af90, 0x387c6679, 0xc837de69,
-        0xc7ce6fa9,
+        0xc7ce6fa9, 0xd8ce2d85, 0x216c037b, 0x8f97bcee, 0xfb1f33db, 0xb4898bdf, 0x7eb02451,
+        0xc41130a7, 0x1d6668e2,
     ];
-    let y2 = [
-        0xd8ce2d85, 0x216c037b, 0x8f97bcee, 0xfb1f33db, 0xb4898bdf, 0x7eb02451, 0xc41130a7,
-        0x1d6668e2,
-    ];
-    let x3 = [
+    let p3 = [
         0xbf92c52f, 0x49bbb218, 0x58dc039d, 0x4fc6d734, 0x8ad8179f, 0xdd9068ac, 0xdc747673,
-        0x213995c9,
-    ];
-    let y3 = [
-        0x84240dd, 0xed98f6fa, 0x3ecbf6a4, 0x907e4ae8, 0x7d55c35a, 0x2d8acdec, 0xddf22625,
-        0x458f0e6b,
+        0x213995c9, 0x84240dd, 0xed98f6fa, 0x3ecbf6a4, 0x907e4ae8, 0x7d55c35a, 0x2d8acdec,
+        0xddf22625, 0x458f0e6b,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 
     ////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x48dfd587, 0x79361bb, 0xc9b02656, 0x5ec4ba38, 0x2cf5a12d, 0x34867aaa, 0xacf4508b,
-        0x5084b41b,
+        0x5084b41b, 0x91470e89, 0x6e79e97f, 0x6891f560, 0x5db6f560, 0x55292747, 0x619aa6c8,
+        0x1d980d31, 0x34a9631a,
     ];
-    let y1 = [
-        0x91470e89, 0x6e79e97f, 0x6891f560, 0x5db6f560, 0x55292747, 0x619aa6c8, 0x1d980d31,
-        0x34a9631a,
-    ];
-    let x2 = [
+    let p2 = [
         0xbf92c52f, 0x49bbb218, 0x58dc039d, 0x4fc6d734, 0x8ad8179f, 0xdd9068ac, 0xdc747673,
-        0x213995c9,
-    ];
-    let y2 = [
-        0x84240dd, 0xed98f6fa, 0x3ecbf6a4, 0x907e4ae8, 0x7d55c35a, 0x2d8acdec, 0xddf22625,
-        0x458f0e6b,
+        0x213995c9, 0x84240dd, 0xed98f6fa, 0x3ecbf6a4, 0x907e4ae8, 0x7d55c35a, 0x2d8acdec,
+        0xddf22625, 0x458f0e6b,
     ];
-    let x3 = [
+    let p3 = [
         0xed91aaf, 0x96eb71b9, 0xe5e3ee7d, 0x7d30103b, 0xa207dafc, 0x501b2c67, 0x237542f4,
-        0x1159abd4,
-    ];
-    let y3 = [
-        0x6a255906, 0xb1976404, 0x3c3747d4, 0xebd4f608, 0x6bd3bbf, 0x40d90c4b, 0x23adb6fd,
-        0xb5196132,
+        0x1159abd4, 0x6a255906, 0xb1976404, 0x3c3747d4, 0xebd4f608, 0x6bd3bbf, 0x40d90c4b,
+        0x23adb6fd, 0xb5196132,
     ];
-    assert_eq!(ec_add(x1, y1, x2, y2), (x3, y3));
+    assert_eq!(ec_add(p1, p2), p3);
 }
diff --git a/riscv/tests/riscv_data/ec_double/src/main.rs b/riscv/tests/riscv_data/ec_double/src/main.rs
index 0f3927d00f..bc191a5589 100644
--- a/riscv/tests/riscv_data/ec_double/src/main.rs
+++ b/riscv/tests/riscv_data/ec_double/src/main.rs
@@ -8,804 +8,572 @@ use powdr_riscv_runtime::ec::double_u8_be;
 
 #[no_mangle]
 pub fn main() {
-    let x1 = [
+    let p1 = [
+        // x
         0x60297556, 0x2f057a14, 0x8568a18b, 0x82f6472f, 0x355235d3, 0x20453a14, 0x755eeea4,
-        0xfff97bd5,
-    ];
-    let y1 = [
+        0xfff97bd5, // y
         0xb075f297, 0x3c870c36, 0x518fe4a0, 0xde80f0f6, 0x7f45c560, 0xf3be9601, 0xacfbb620,
         0xae12777a,
     ];
-    let x2 = [
+    let p2 = [
+        // x
         0x70afe85a, 0xc5b0f470, 0x9620095b, 0x687cf441, 0x4d734633, 0x15c38f00, 0x48e7561b,
-        0xd01115d5,
-    ];
-    let y2 = [
+        0xd01115d5, // y
         0xf4062327, 0x6b051b13, 0xd9a86d52, 0x79238c5d, 0xe17bd815, 0xa8b64537, 0xc815e0d7,
         0xa9f34ffd,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+
+    assert_eq!(ec_double(p1), p2);
 
     // same as above but using the big endian api
     let x1 = hex!("fff97bd5755eeea420453a14355235d382f6472f8568a18b2f057a1460297556");
     let y1 = hex!("ae12777aacfbb620f3be96017f45c560de80f0f6518fe4a03c870c36b075f297");
-    let x2 = hex!("d01115d548e7561b15c38f004d734633687cf4419620095bc5b0f47070afe85a");
-    let y2 = hex!("a9f34ffdc815e0d7a8b64537e17bd81579238c5dd9a86d526b051b13f4062327");
-    assert_eq!(double_u8_be(x1, y1), (x2, y2));
+    let p2 = hex!("a9f34ffdc815e0d7a8b64537e17bd81579238c5dd9a86d526b051b13f4062327d01115d548e7561b15c38f004d734633687cf4419620095bc5b0f47070afe85a");
+    assert_eq!(double_u8_be(x1, y1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x70afe85a, 0xc5b0f470, 0x9620095b, 0x687cf441, 0x4d734633, 0x15c38f00, 0x48e7561b,
-        0xd01115d5,
-    ];
-    let y1 = [
-        0xf4062327, 0x6b051b13, 0xd9a86d52, 0x79238c5d, 0xe17bd815, 0xa8b64537, 0xc815e0d7,
-        0xa9f34ffd,
+        0xd01115d5, 0xf4062327, 0x6b051b13, 0xd9a86d52, 0x79238c5d, 0xe17bd815, 0xa8b64537,
+        0xc815e0d7, 0xa9f34ffd,
     ];
-    let x2 = [
+    let p2 = [
         0xb202e6ce, 0x502bda8, 0x9d62b794, 0x68321543, 0x61ba8b09, 0x8ac09c91, 0x413d33d4,
-        0xfe72c435,
+        0xfe72c435, 0xcf58c5bf, 0x978ed2fb, 0x6b4a9d22, 0x1dc88e3, 0x9d729981, 0xd3ab47e0,
+        0x7ff24a68, 0x6851de06,
     ];
-    let y2 = [
-        0xcf58c5bf, 0x978ed2fb, 0x6b4a9d22, 0x1dc88e3, 0x9d729981, 0xd3ab47e0, 0x7ff24a68,
-        0x6851de06,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb202e6ce, 0x502bda8, 0x9d62b794, 0x68321543, 0x61ba8b09, 0x8ac09c91, 0x413d33d4,
-        0xfe72c435,
-    ];
-    let y1 = [
-        0xcf58c5bf, 0x978ed2fb, 0x6b4a9d22, 0x1dc88e3, 0x9d729981, 0xd3ab47e0, 0x7ff24a68,
-        0x6851de06,
+        0xfe72c435, 0xcf58c5bf, 0x978ed2fb, 0x6b4a9d22, 0x1dc88e3, 0x9d729981, 0xd3ab47e0,
+        0x7ff24a68, 0x6851de06,
     ];
-    let x2 = [
+    let p2 = [
         0x1118e5c3, 0x9bd870aa, 0x452bebc1, 0xfc579b27, 0xf4e65b4b, 0xb441656e, 0x9645307d,
-        0x6eca335d,
-    ];
-    let y2 = [
-        0x5a08668, 0x498a2f78, 0x3bf8ec34, 0x3a496a3a, 0x74b875a0, 0x592f5790, 0x7a7a0710,
-        0xd50123b5,
+        0x6eca335d, 0x5a08668, 0x498a2f78, 0x3bf8ec34, 0x3a496a3a, 0x74b875a0, 0x592f5790,
+        0x7a7a0710, 0xd50123b5,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x1118e5c3, 0x9bd870aa, 0x452bebc1, 0xfc579b27, 0xf4e65b4b, 0xb441656e, 0x9645307d,
-        0x6eca335d,
+        0x6eca335d, 0x5a08668, 0x498a2f78, 0x3bf8ec34, 0x3a496a3a, 0x74b875a0, 0x592f5790,
+        0x7a7a0710, 0xd50123b5,
     ];
-    let y1 = [
-        0x5a08668, 0x498a2f78, 0x3bf8ec34, 0x3a496a3a, 0x74b875a0, 0x592f5790, 0x7a7a0710,
-        0xd50123b5,
-    ];
-    let x2 = [
+    let p2 = [
         0x7f8cb0e3, 0x43933aca, 0xe1efe3a4, 0xa22eb53f, 0x4b2eb72e, 0x8fa64e04, 0x74456d8f,
-        0x3f0e80e5,
-    ];
-    let y2 = [
-        0xea5f404f, 0xcb0289e2, 0xa65b53a4, 0x9501253a, 0x485d01b3, 0xe90b9c08, 0x296cbc91,
-        0xcb66d7d7,
+        0x3f0e80e5, 0xea5f404f, 0xcb0289e2, 0xa65b53a4, 0x9501253a, 0x485d01b3, 0xe90b9c08,
+        0x296cbc91, 0xcb66d7d7,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x7f8cb0e3, 0x43933aca, 0xe1efe3a4, 0xa22eb53f, 0x4b2eb72e, 0x8fa64e04, 0x74456d8f,
-        0x3f0e80e5,
+        0x3f0e80e5, 0xea5f404f, 0xcb0289e2, 0xa65b53a4, 0x9501253a, 0x485d01b3, 0xe90b9c08,
+        0x296cbc91, 0xcb66d7d7,
     ];
-    let y1 = [
-        0xea5f404f, 0xcb0289e2, 0xa65b53a4, 0x9501253a, 0x485d01b3, 0xe90b9c08, 0x296cbc91,
-        0xcb66d7d7,
-    ];
-    let x2 = [
+    let p2 = [
         0x33ce1752, 0xc7b750f7, 0xd7cd204e, 0xe783c797, 0xd99c9aea, 0x812ddf64, 0xd01dc635,
-        0xd7a0da58,
-    ];
-    let y2 = [
-        0x762cef4, 0xbbc02738, 0xc062b742, 0xbe040a8, 0x40e28465, 0xf6f29283, 0x68008032,
-        0x912770e0,
+        0xd7a0da58, 0x762cef4, 0xbbc02738, 0xc062b742, 0xbe040a8, 0x40e28465, 0xf6f29283,
+        0x68008032, 0x912770e0,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x33ce1752, 0xc7b750f7, 0xd7cd204e, 0xe783c797, 0xd99c9aea, 0x812ddf64, 0xd01dc635,
-        0xd7a0da58,
-    ];
-    let y1 = [
-        0x762cef4, 0xbbc02738, 0xc062b742, 0xbe040a8, 0x40e28465, 0xf6f29283, 0x68008032,
-        0x912770e0,
+        0xd7a0da58, 0x762cef4, 0xbbc02738, 0xc062b742, 0xbe040a8, 0x40e28465, 0xf6f29283,
+        0x68008032, 0x912770e0,
     ];
-    let x2 = [
+    let p2 = [
         0xb5476085, 0xa908b701, 0x96eb9f84, 0xb5714e77, 0xa78ed1af, 0x10d3aad6, 0x7a08cd3e,
-        0x3443a706,
+        0x3443a706, 0x8b8f52d8, 0x6d3484bd, 0xd0c2b67f, 0x18a4b27, 0x8c7e1da9, 0x4f6e8c4b,
+        0x829b6f85, 0x661a7a5f,
     ];
-    let y2 = [
-        0x8b8f52d8, 0x6d3484bd, 0xd0c2b67f, 0x18a4b27, 0x8c7e1da9, 0x4f6e8c4b, 0x829b6f85,
-        0x661a7a5f,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb5476085, 0xa908b701, 0x96eb9f84, 0xb5714e77, 0xa78ed1af, 0x10d3aad6, 0x7a08cd3e,
-        0x3443a706,
-    ];
-    let y1 = [
-        0x8b8f52d8, 0x6d3484bd, 0xd0c2b67f, 0x18a4b27, 0x8c7e1da9, 0x4f6e8c4b, 0x829b6f85,
-        0x661a7a5f,
+        0x3443a706, 0x8b8f52d8, 0x6d3484bd, 0xd0c2b67f, 0x18a4b27, 0x8c7e1da9, 0x4f6e8c4b,
+        0x829b6f85, 0x661a7a5f,
     ];
-    let x2 = [
+    let p2 = [
         0xe57e8dfa, 0xfcfc0cb9, 0xa3c7e184, 0x9809191, 0xaca98ca0, 0xd9a30f8, 0xf0799c4c,
-        0x8262cf2f,
+        0x8262cf2f, 0xfbac376a, 0x35cff8d8, 0x2b14c478, 0x57b6ed33, 0xc5b34f34, 0x66fee22e,
+        0x9109e4e, 0x83fd95e2,
     ];
-    let y2 = [
-        0xfbac376a, 0x35cff8d8, 0x2b14c478, 0x57b6ed33, 0xc5b34f34, 0x66fee22e, 0x9109e4e,
-        0x83fd95e2,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xe57e8dfa, 0xfcfc0cb9, 0xa3c7e184, 0x9809191, 0xaca98ca0, 0xd9a30f8, 0xf0799c4c,
-        0x8262cf2f,
-    ];
-    let y1 = [
-        0xfbac376a, 0x35cff8d8, 0x2b14c478, 0x57b6ed33, 0xc5b34f34, 0x66fee22e, 0x9109e4e,
-        0x83fd95e2,
+        0x8262cf2f, 0xfbac376a, 0x35cff8d8, 0x2b14c478, 0x57b6ed33, 0xc5b34f34, 0x66fee22e,
+        0x9109e4e, 0x83fd95e2,
     ];
-    let x2 = [
+    let p2 = [
         0x7c70620c, 0xd17cc1f2, 0xabc288d9, 0x4998c4be, 0x2b671780, 0xc60dd31a, 0x8d2c236d,
-        0x1653a8a4,
-    ];
-    let y2 = [
-        0x315b32cd, 0x6ca2e81d, 0xdfd3dc52, 0x12af748, 0x4efa701c, 0xeafa9947, 0x35af7f7a,
-        0x3382909,
+        0x1653a8a4, 0x315b32cd, 0x6ca2e81d, 0xdfd3dc52, 0x12af748, 0x4efa701c, 0xeafa9947,
+        0x35af7f7a, 0x3382909,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x7c70620c, 0xd17cc1f2, 0xabc288d9, 0x4998c4be, 0x2b671780, 0xc60dd31a, 0x8d2c236d,
-        0x1653a8a4,
+        0x1653a8a4, 0x315b32cd, 0x6ca2e81d, 0xdfd3dc52, 0x12af748, 0x4efa701c, 0xeafa9947,
+        0x35af7f7a, 0x3382909,
     ];
-    let y1 = [
-        0x315b32cd, 0x6ca2e81d, 0xdfd3dc52, 0x12af748, 0x4efa701c, 0xeafa9947, 0x35af7f7a,
-        0x3382909,
-    ];
-    let x2 = [
+    let p2 = [
         0xe71dabcd, 0x47d42ba6, 0x89e5cb4f, 0x54d3fe49, 0x60b5373f, 0x6098ae32, 0x6b63f43c,
-        0xd49ee4fb,
-    ];
-    let y2 = [
-        0x16603c2, 0xe66a90cf, 0x12ff7031, 0x129c5093, 0xa61bf356, 0xd7c87ea7, 0x9a5490d, 0x531e392,
+        0xd49ee4fb, 0x16603c2, 0xe66a90cf, 0x12ff7031, 0x129c5093, 0xa61bf356, 0xd7c87ea7,
+        0x9a5490d, 0x531e392,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xe71dabcd, 0x47d42ba6, 0x89e5cb4f, 0x54d3fe49, 0x60b5373f, 0x6098ae32, 0x6b63f43c,
-        0xd49ee4fb,
+        0xd49ee4fb, 0x16603c2, 0xe66a90cf, 0x12ff7031, 0x129c5093, 0xa61bf356, 0xd7c87ea7,
+        0x9a5490d, 0x531e392,
     ];
-    let y1 = [
-        0x16603c2, 0xe66a90cf, 0x12ff7031, 0x129c5093, 0xa61bf356, 0xd7c87ea7, 0x9a5490d, 0x531e392,
-    ];
-    let x2 = [
+    let p2 = [
         0xc8c828a, 0x53f30ab9, 0xc96ae41f, 0x132eb242, 0x17e81c75, 0xe44a0d8, 0xa4149e75,
-        0x5f94851c,
-    ];
-    let y2 = [
-        0x37344d80, 0xbfeb0a3f, 0x4fc68b04, 0x8c66df75, 0x8882f35e, 0xe5f0797d, 0xafa1fee8,
-        0x26b8c3b8,
+        0x5f94851c, 0x37344d80, 0xbfeb0a3f, 0x4fc68b04, 0x8c66df75, 0x8882f35e, 0xe5f0797d,
+        0xafa1fee8, 0x26b8c3b8,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xc8c828a, 0x53f30ab9, 0xc96ae41f, 0x132eb242, 0x17e81c75, 0xe44a0d8, 0xa4149e75,
-        0x5f94851c,
-    ];
-    let y1 = [
-        0x37344d80, 0xbfeb0a3f, 0x4fc68b04, 0x8c66df75, 0x8882f35e, 0xe5f0797d, 0xafa1fee8,
-        0x26b8c3b8,
+        0x5f94851c, 0x37344d80, 0xbfeb0a3f, 0x4fc68b04, 0x8c66df75, 0x8882f35e, 0xe5f0797d,
+        0xafa1fee8, 0x26b8c3b8,
     ];
-    let x2 = [
+    let p2 = [
         0xc5041216, 0x65b7f8f1, 0x842b836a, 0x3f7335f6, 0xdc2fed52, 0x128b59ef, 0x21f7acf4,
-        0xda75317b,
+        0xda75317b, 0x6e708572, 0xdaed3298, 0xe77aceda, 0xe9aac07a, 0x342d7fc6, 0xdf19e21b,
+        0xbf72d5f0, 0x73f8a046,
     ];
-    let y2 = [
-        0x6e708572, 0xdaed3298, 0xe77aceda, 0xe9aac07a, 0x342d7fc6, 0xdf19e21b, 0xbf72d5f0,
-        0x73f8a046,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xc5041216, 0x65b7f8f1, 0x842b836a, 0x3f7335f6, 0xdc2fed52, 0x128b59ef, 0x21f7acf4,
-        0xda75317b,
-    ];
-    let y1 = [
-        0x6e708572, 0xdaed3298, 0xe77aceda, 0xe9aac07a, 0x342d7fc6, 0xdf19e21b, 0xbf72d5f0,
-        0x73f8a046,
+        0xda75317b, 0x6e708572, 0xdaed3298, 0xe77aceda, 0xe9aac07a, 0x342d7fc6, 0xdf19e21b,
+        0xbf72d5f0, 0x73f8a046,
     ];
-    let x2 = [
+    let p2 = [
         0x3c62bac0, 0x9505324f, 0x51f0ab06, 0x19150ddf, 0xc3e8b70e, 0x1364b7d2, 0x23f469c,
-        0x9530f0f9,
-    ];
-    let y2 = [
-        0x7618e309, 0x478abda9, 0x2f1fdc68, 0xe25b3285, 0x59b333e0, 0x34dd2f7f, 0x8f9f21e2,
-        0x8f3c305a,
+        0x9530f0f9, 0x7618e309, 0x478abda9, 0x2f1fdc68, 0xe25b3285, 0x59b333e0, 0x34dd2f7f,
+        0x8f9f21e2, 0x8f3c305a,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x3c62bac0, 0x9505324f, 0x51f0ab06, 0x19150ddf, 0xc3e8b70e, 0x1364b7d2, 0x23f469c,
-        0x9530f0f9,
+        0x9530f0f9, 0x7618e309, 0x478abda9, 0x2f1fdc68, 0xe25b3285, 0x59b333e0, 0x34dd2f7f,
+        0x8f9f21e2, 0x8f3c305a,
     ];
-    let y1 = [
-        0x7618e309, 0x478abda9, 0x2f1fdc68, 0xe25b3285, 0x59b333e0, 0x34dd2f7f, 0x8f9f21e2,
-        0x8f3c305a,
-    ];
-    let x2 = [
+    let p2 = [
         0xdc3c9c8f, 0x6704385, 0x3e4367b2, 0xf2816fee, 0xaaa332b0, 0x6f09ff43, 0xbe4298fd,
-        0x67be02dc,
-    ];
-    let y2 = [
-        0x593652d9, 0x55384998, 0xb88c2be, 0xcd993bf6, 0x8291693, 0xa2c945b6, 0x3e4def84,
-        0x7a9b55a7,
+        0x67be02dc, 0x593652d9, 0x55384998, 0xb88c2be, 0xcd993bf6, 0x8291693, 0xa2c945b6,
+        0x3e4def84, 0x7a9b55a7,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xdc3c9c8f, 0x6704385, 0x3e4367b2, 0xf2816fee, 0xaaa332b0, 0x6f09ff43, 0xbe4298fd,
-        0x67be02dc,
-    ];
-    let y1 = [
-        0x593652d9, 0x55384998, 0xb88c2be, 0xcd993bf6, 0x8291693, 0xa2c945b6, 0x3e4def84,
-        0x7a9b55a7,
+        0x67be02dc, 0x593652d9, 0x55384998, 0xb88c2be, 0xcd993bf6, 0x8291693, 0xa2c945b6,
+        0x3e4def84, 0x7a9b55a7,
     ];
-    let x2 = [
+    let p2 = [
         0x10aaa33a, 0x11f9bcbe, 0xc17b9ca5, 0x8c92dd29, 0xbc571836, 0xdf569013, 0xf4ef876a,
-        0x893b2492,
+        0x893b2492, 0xd1af3445, 0x67b80b8a, 0x13ceeb42, 0xa439e8a2, 0x66507f32, 0xf413a007,
+        0x72d1c89e, 0xcdb152b6,
     ];
-    let y2 = [
-        0xd1af3445, 0x67b80b8a, 0x13ceeb42, 0xa439e8a2, 0x66507f32, 0xf413a007, 0x72d1c89e,
-        0xcdb152b6,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x10aaa33a, 0x11f9bcbe, 0xc17b9ca5, 0x8c92dd29, 0xbc571836, 0xdf569013, 0xf4ef876a,
-        0x893b2492,
-    ];
-    let y1 = [
-        0xd1af3445, 0x67b80b8a, 0x13ceeb42, 0xa439e8a2, 0x66507f32, 0xf413a007, 0x72d1c89e,
-        0xcdb152b6,
+        0x893b2492, 0xd1af3445, 0x67b80b8a, 0x13ceeb42, 0xa439e8a2, 0x66507f32, 0xf413a007,
+        0x72d1c89e, 0xcdb152b6,
     ];
-    let x2 = [
+    let p2 = [
         0xf6e55dc8, 0x4b891216, 0xeaca0439, 0x6ff95ab6, 0xc0509442, 0xba84a440, 0x90c5ffb2,
-        0x44314047,
+        0x44314047, 0xdbe323b3, 0x31d944ae, 0x9eaa2e50, 0xa66a29b7, 0x5642fed7, 0xfe99837f,
+        0xe65366f8, 0x96b0c142,
     ];
-    let y2 = [
-        0xdbe323b3, 0x31d944ae, 0x9eaa2e50, 0xa66a29b7, 0x5642fed7, 0xfe99837f, 0xe65366f8,
-        0x96b0c142,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xf6e55dc8, 0x4b891216, 0xeaca0439, 0x6ff95ab6, 0xc0509442, 0xba84a440, 0x90c5ffb2,
-        0x44314047,
-    ];
-    let y1 = [
-        0xdbe323b3, 0x31d944ae, 0x9eaa2e50, 0xa66a29b7, 0x5642fed7, 0xfe99837f, 0xe65366f8,
-        0x96b0c142,
+        0x44314047, 0xdbe323b3, 0x31d944ae, 0x9eaa2e50, 0xa66a29b7, 0x5642fed7, 0xfe99837f,
+        0xe65366f8, 0x96b0c142,
     ];
-    let x2 = [
+    let p2 = [
         0x33f0e9aa, 0x3eb5e196, 0xb11bd34b, 0x68112776, 0xd58138d2, 0xb7924ae0, 0x575f26ad,
-        0xe5380fe8,
-    ];
-    let y2 = [
-        0x4082720f, 0xc4ba4136, 0xf468318e, 0x6fb94e5d, 0x924c8e01, 0x5b691363, 0x9087b41d,
-        0xb97fd873,
+        0xe5380fe8, 0x4082720f, 0xc4ba4136, 0xf468318e, 0x6fb94e5d, 0x924c8e01, 0x5b691363,
+        0x9087b41d, 0xb97fd873,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x33f0e9aa, 0x3eb5e196, 0xb11bd34b, 0x68112776, 0xd58138d2, 0xb7924ae0, 0x575f26ad,
-        0xe5380fe8,
+        0xe5380fe8, 0x4082720f, 0xc4ba4136, 0xf468318e, 0x6fb94e5d, 0x924c8e01, 0x5b691363,
+        0x9087b41d, 0xb97fd873,
     ];
-    let y1 = [
-        0x4082720f, 0xc4ba4136, 0xf468318e, 0x6fb94e5d, 0x924c8e01, 0x5b691363, 0x9087b41d,
-        0xb97fd873,
-    ];
-    let x2 = [
+    let p2 = [
         0xeebc61d6, 0x1aed361b, 0xd9ff42de, 0x8a8fd3a7, 0x5d6b1f51, 0xc395f0d1, 0xa3ed9af0,
-        0x939ff3e4,
-    ];
-    let y2 = [
-        0xa3f5cb70, 0xe75ea466, 0xb78c7f82, 0x980bf26e, 0xef016c04, 0x9d46fc4e, 0x8b7a90e,
-        0xdeab3bcf,
+        0x939ff3e4, 0xa3f5cb70, 0xe75ea466, 0xb78c7f82, 0x980bf26e, 0xef016c04, 0x9d46fc4e,
+        0x8b7a90e, 0xdeab3bcf,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xeebc61d6, 0x1aed361b, 0xd9ff42de, 0x8a8fd3a7, 0x5d6b1f51, 0xc395f0d1, 0xa3ed9af0,
-        0x939ff3e4,
+        0x939ff3e4, 0xa3f5cb70, 0xe75ea466, 0xb78c7f82, 0x980bf26e, 0xef016c04, 0x9d46fc4e,
+        0x8b7a90e, 0xdeab3bcf,
     ];
-    let y1 = [
-        0xa3f5cb70, 0xe75ea466, 0xb78c7f82, 0x980bf26e, 0xef016c04, 0x9d46fc4e, 0x8b7a90e,
-        0xdeab3bcf,
-    ];
-    let x2 = [
+    let p2 = [
         0xc497e0df, 0x16e134d, 0xecf76f53, 0x4c3bb436, 0xfe6029a0, 0x7858785, 0xae383293,
-        0xfdc63e52,
-    ];
-    let y2 = [
-        0xdb9eb19f, 0xf0604449, 0xbf35d9d5, 0x7bbeb22f, 0x8ae2e8b8, 0xe3df7142, 0xacebbb52,
-        0x292dad67,
+        0xfdc63e52, 0xdb9eb19f, 0xf0604449, 0xbf35d9d5, 0x7bbeb22f, 0x8ae2e8b8, 0xe3df7142,
+        0xacebbb52, 0x292dad67,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xc497e0df, 0x16e134d, 0xecf76f53, 0x4c3bb436, 0xfe6029a0, 0x7858785, 0xae383293,
-        0xfdc63e52,
-    ];
-    let y1 = [
-        0xdb9eb19f, 0xf0604449, 0xbf35d9d5, 0x7bbeb22f, 0x8ae2e8b8, 0xe3df7142, 0xacebbb52,
-        0x292dad67,
+        0xfdc63e52, 0xdb9eb19f, 0xf0604449, 0xbf35d9d5, 0x7bbeb22f, 0x8ae2e8b8, 0xe3df7142,
+        0xacebbb52, 0x292dad67,
     ];
-    let x2 = [
+    let p2 = [
         0xf55812dd, 0xa0a2a582, 0x552d30e2, 0x3d446723, 0xc058f78e, 0xb6abed6, 0x92ff352f,
-        0x7029bd7a,
+        0x7029bd7a, 0x1a2d2927, 0x721cc66b, 0x43b2c73c, 0x47dae842, 0xe30683ac, 0x7dd6544a,
+        0xfde8b3d2, 0xb0eefada,
     ];
-    let y2 = [
-        0x1a2d2927, 0x721cc66b, 0x43b2c73c, 0x47dae842, 0xe30683ac, 0x7dd6544a, 0xfde8b3d2,
-        0xb0eefada,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xf55812dd, 0xa0a2a582, 0x552d30e2, 0x3d446723, 0xc058f78e, 0xb6abed6, 0x92ff352f,
-        0x7029bd7a,
-    ];
-    let y1 = [
-        0x1a2d2927, 0x721cc66b, 0x43b2c73c, 0x47dae842, 0xe30683ac, 0x7dd6544a, 0xfde8b3d2,
-        0xb0eefada,
+        0x7029bd7a, 0x1a2d2927, 0x721cc66b, 0x43b2c73c, 0x47dae842, 0xe30683ac, 0x7dd6544a,
+        0xfde8b3d2, 0xb0eefada,
     ];
-    let x2 = [
+    let p2 = [
         0xb181fdc2, 0xdcdabff9, 0x5cc62364, 0xdd2f62bb, 0x18a34e7e, 0x4aa264b8, 0xf47e6e47,
-        0xf42c102a,
+        0xf42c102a, 0xa485d7fd, 0x81f00093, 0x9a2acf26, 0x4c15502d, 0xb86fe22a, 0x78fad05c,
+        0x6cfe806c, 0x57503ab4,
     ];
-    let y2 = [
-        0xa485d7fd, 0x81f00093, 0x9a2acf26, 0x4c15502d, 0xb86fe22a, 0x78fad05c, 0x6cfe806c,
-        0x57503ab4,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb181fdc2, 0xdcdabff9, 0x5cc62364, 0xdd2f62bb, 0x18a34e7e, 0x4aa264b8, 0xf47e6e47,
-        0xf42c102a,
-    ];
-    let y1 = [
-        0xa485d7fd, 0x81f00093, 0x9a2acf26, 0x4c15502d, 0xb86fe22a, 0x78fad05c, 0x6cfe806c,
-        0x57503ab4,
+        0xf42c102a, 0xa485d7fd, 0x81f00093, 0x9a2acf26, 0x4c15502d, 0xb86fe22a, 0x78fad05c,
+        0x6cfe806c, 0x57503ab4,
     ];
-    let x2 = [
+    let p2 = [
         0xeedd7dd6, 0x3866d47d, 0x65e1968c, 0x49376fe2, 0xee7cfdec, 0xca5a7840, 0x24c7524b,
-        0x32cfcf6a,
-    ];
-    let y2 = [
-        0xfe08e330, 0x25fd44ae, 0x349a08b, 0x7a0d8cd2, 0x409f561e, 0x6208096a, 0x976a7748,
-        0x21846a34,
+        0x32cfcf6a, 0xfe08e330, 0x25fd44ae, 0x349a08b, 0x7a0d8cd2, 0x409f561e, 0x6208096a,
+        0x976a7748, 0x21846a34,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xeedd7dd6, 0x3866d47d, 0x65e1968c, 0x49376fe2, 0xee7cfdec, 0xca5a7840, 0x24c7524b,
-        0x32cfcf6a,
+        0x32cfcf6a, 0xfe08e330, 0x25fd44ae, 0x349a08b, 0x7a0d8cd2, 0x409f561e, 0x6208096a,
+        0x976a7748, 0x21846a34,
     ];
-    let y1 = [
-        0xfe08e330, 0x25fd44ae, 0x349a08b, 0x7a0d8cd2, 0x409f561e, 0x6208096a, 0x976a7748,
-        0x21846a34,
-    ];
-    let x2 = [
+    let p2 = [
         0x21231d11, 0xce674831, 0x3c2aaad7, 0x22ab36c6, 0xc777c398, 0x33d1155c, 0x8b9388e4,
-        0x3514d41e,
-    ];
-    let y2 = [
-        0xe3855df5, 0x53d6fb40, 0xaf79ebe, 0x9384f31d, 0x56839eff, 0xef44d11e, 0x16017eb8,
-        0x89a83250,
+        0x3514d41e, 0xe3855df5, 0x53d6fb40, 0xaf79ebe, 0x9384f31d, 0x56839eff, 0xef44d11e,
+        0x16017eb8, 0x89a83250,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x21231d11, 0xce674831, 0x3c2aaad7, 0x22ab36c6, 0xc777c398, 0x33d1155c, 0x8b9388e4,
-        0x3514d41e,
-    ];
-    let y1 = [
-        0xe3855df5, 0x53d6fb40, 0xaf79ebe, 0x9384f31d, 0x56839eff, 0xef44d11e, 0x16017eb8,
-        0x89a83250,
+        0x3514d41e, 0xe3855df5, 0x53d6fb40, 0xaf79ebe, 0x9384f31d, 0x56839eff, 0xef44d11e,
+        0x16017eb8, 0x89a83250,
     ];
-    let x2 = [
+    let p2 = [
         0x80633cb1, 0x2567e09e, 0x69d02113, 0x575a224b, 0x12181fcb, 0xc62732, 0x17aacad4,
-        0x6dde9cf3,
+        0x6dde9cf3, 0x67ce6b34, 0x57dd49aa, 0xcf859ef3, 0x80b27fda, 0xa1ba66a8, 0x5c99ef86,
+        0xa707e41d, 0x9188fbe7,
     ];
-    let y2 = [
-        0x67ce6b34, 0x57dd49aa, 0xcf859ef3, 0x80b27fda, 0xa1ba66a8, 0x5c99ef86, 0xa707e41d,
-        0x9188fbe7,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x80633cb1, 0x2567e09e, 0x69d02113, 0x575a224b, 0x12181fcb, 0xc62732, 0x17aacad4,
-        0x6dde9cf3,
-    ];
-    let y1 = [
-        0x67ce6b34, 0x57dd49aa, 0xcf859ef3, 0x80b27fda, 0xa1ba66a8, 0x5c99ef86, 0xa707e41d,
-        0x9188fbe7,
+        0x6dde9cf3, 0x67ce6b34, 0x57dd49aa, 0xcf859ef3, 0x80b27fda, 0xa1ba66a8, 0x5c99ef86,
+        0xa707e41d, 0x9188fbe7,
     ];
-    let x2 = [
+    let p2 = [
         0x44e5467d, 0x4d0bd76a, 0x19bbface, 0x40908ab8, 0xec970e9, 0x2c21f62e, 0xfc69a122,
-        0x97d064f0,
-    ];
-    let y2 = [
-        0x1e9cb3fa, 0x797300fd, 0x54f17ccd, 0xda5fb3b8, 0xa850861f, 0x3f7c66f, 0xd33402cc,
-        0x89974f2e,
+        0x97d064f0, 0x1e9cb3fa, 0x797300fd, 0x54f17ccd, 0xda5fb3b8, 0xa850861f, 0x3f7c66f,
+        0xd33402cc, 0x89974f2e,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x44e5467d, 0x4d0bd76a, 0x19bbface, 0x40908ab8, 0xec970e9, 0x2c21f62e, 0xfc69a122,
-        0x97d064f0,
+        0x97d064f0, 0x1e9cb3fa, 0x797300fd, 0x54f17ccd, 0xda5fb3b8, 0xa850861f, 0x3f7c66f,
+        0xd33402cc, 0x89974f2e,
     ];
-    let y1 = [
-        0x1e9cb3fa, 0x797300fd, 0x54f17ccd, 0xda5fb3b8, 0xa850861f, 0x3f7c66f, 0xd33402cc,
-        0x89974f2e,
-    ];
-    let x2 = [
+    let p2 = [
         0x13613bec, 0xcca81cb9, 0x101cfe67, 0x8bb5fc9d, 0xc74f972a, 0xedf1b33d, 0xc93937bd,
-        0x2dcfcab8,
-    ];
-    let y2 = [
-        0x9a039215, 0x3e730924, 0xd33f5f38, 0x3732cfba, 0xd6f6c6f4, 0x65f088b7, 0x9474a412,
-        0x46dbc4dd,
+        0x2dcfcab8, 0x9a039215, 0x3e730924, 0xd33f5f38, 0x3732cfba, 0xd6f6c6f4, 0x65f088b7,
+        0x9474a412, 0x46dbc4dd,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x13613bec, 0xcca81cb9, 0x101cfe67, 0x8bb5fc9d, 0xc74f972a, 0xedf1b33d, 0xc93937bd,
-        0x2dcfcab8,
+        0x2dcfcab8, 0x9a039215, 0x3e730924, 0xd33f5f38, 0x3732cfba, 0xd6f6c6f4, 0x65f088b7,
+        0x9474a412, 0x46dbc4dd,
     ];
-    let y1 = [
-        0x9a039215, 0x3e730924, 0xd33f5f38, 0x3732cfba, 0xd6f6c6f4, 0x65f088b7, 0x9474a412,
-        0x46dbc4dd,
-    ];
-    let x2 = [
+    let p2 = [
         0x47fb9e1a, 0x17cd1708, 0xde2a3296, 0x7fe74b74, 0xbbab0e76, 0xf1a02bc9, 0xa48ec5a8,
-        0x1bec414a,
-    ];
-    let y2 = [
-        0x749c0443, 0x57f6e117, 0xe8c9796e, 0x681385da, 0x30c54b0f, 0x8a79bc57, 0x70126667,
-        0xe3586704,
+        0x1bec414a, 0x749c0443, 0x57f6e117, 0xe8c9796e, 0x681385da, 0x30c54b0f, 0x8a79bc57,
+        0x70126667, 0xe3586704,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x47fb9e1a, 0x17cd1708, 0xde2a3296, 0x7fe74b74, 0xbbab0e76, 0xf1a02bc9, 0xa48ec5a8,
-        0x1bec414a,
-    ];
-    let y1 = [
-        0x749c0443, 0x57f6e117, 0xe8c9796e, 0x681385da, 0x30c54b0f, 0x8a79bc57, 0x70126667,
-        0xe3586704,
+        0x1bec414a, 0x749c0443, 0x57f6e117, 0xe8c9796e, 0x681385da, 0x30c54b0f, 0x8a79bc57,
+        0x70126667, 0xe3586704,
     ];
-    let x2 = [
+    let p2 = [
         0xbb7ceceb, 0xf3f678ff, 0x8897faf0, 0x73a59f93, 0x6f6e6814, 0x36ffb812, 0x4276d450,
-        0x437a8620,
+        0x437a8620, 0x56c181e1, 0x7363bcc3, 0xdc8f9782, 0x87220fcf, 0x99d297ff, 0x69b8feb6,
+        0x3eeac32f, 0xb916ba1,
     ];
-    let y2 = [
-        0x56c181e1, 0x7363bcc3, 0xdc8f9782, 0x87220fcf, 0x99d297ff, 0x69b8feb6, 0x3eeac32f,
-        0xb916ba1,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xbb7ceceb, 0xf3f678ff, 0x8897faf0, 0x73a59f93, 0x6f6e6814, 0x36ffb812, 0x4276d450,
-        0x437a8620,
-    ];
-    let y1 = [
-        0x56c181e1, 0x7363bcc3, 0xdc8f9782, 0x87220fcf, 0x99d297ff, 0x69b8feb6, 0x3eeac32f,
-        0xb916ba1,
+        0x437a8620, 0x56c181e1, 0x7363bcc3, 0xdc8f9782, 0x87220fcf, 0x99d297ff, 0x69b8feb6,
+        0x3eeac32f, 0xb916ba1,
     ];
-    let x2 = [
+    let p2 = [
         0xdcbf00eb, 0x4c9d9d87, 0xc18d0227, 0x41b4e98b, 0xa1a30bc2, 0x49be16f6, 0x96ead4dc,
-        0xb89070ae,
+        0xb89070ae, 0x1b0e664e, 0x1b7f1bcd, 0xb6b96a67, 0xcb0d8b06, 0xc1c4a766, 0x472294e4,
+        0xc8a2d88f, 0x6f24c8c2,
     ];
-    let y2 = [
-        0x1b0e664e, 0x1b7f1bcd, 0xb6b96a67, 0xcb0d8b06, 0xc1c4a766, 0x472294e4, 0xc8a2d88f,
-        0x6f24c8c2,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xdcbf00eb, 0x4c9d9d87, 0xc18d0227, 0x41b4e98b, 0xa1a30bc2, 0x49be16f6, 0x96ead4dc,
-        0xb89070ae,
-    ];
-    let y1 = [
-        0x1b0e664e, 0x1b7f1bcd, 0xb6b96a67, 0xcb0d8b06, 0xc1c4a766, 0x472294e4, 0xc8a2d88f,
-        0x6f24c8c2,
+        0xb89070ae, 0x1b0e664e, 0x1b7f1bcd, 0xb6b96a67, 0xcb0d8b06, 0xc1c4a766, 0x472294e4,
+        0xc8a2d88f, 0x6f24c8c2,
     ];
-    let x2 = [
+    let p2 = [
         0xb6fbe7b2, 0xb9d6ff9a, 0x458d65a3, 0x5eadedc1, 0xb2a88460, 0xf336bbb1, 0x9cb441f8,
-        0x26488766,
-    ];
-    let y2 = [
-        0x21bc2a34, 0x932a78bc, 0x6a0eb603, 0x5638d981, 0xd02ddf18, 0x8f2f2dca, 0xb2014498,
-        0x9e15dab4,
+        0x26488766, 0x21bc2a34, 0x932a78bc, 0x6a0eb603, 0x5638d981, 0xd02ddf18, 0x8f2f2dca,
+        0xb2014498, 0x9e15dab4,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb6fbe7b2, 0xb9d6ff9a, 0x458d65a3, 0x5eadedc1, 0xb2a88460, 0xf336bbb1, 0x9cb441f8,
-        0x26488766,
+        0x26488766, 0x21bc2a34, 0x932a78bc, 0x6a0eb603, 0x5638d981, 0xd02ddf18, 0x8f2f2dca,
+        0xb2014498, 0x9e15dab4,
     ];
-    let y1 = [
-        0x21bc2a34, 0x932a78bc, 0x6a0eb603, 0x5638d981, 0xd02ddf18, 0x8f2f2dca, 0xb2014498,
-        0x9e15dab4,
-    ];
-    let x2 = [
+    let p2 = [
         0x2b038315, 0x9690d306, 0x69310e6f, 0x9cacc433, 0x9794b862, 0x1e4680e3, 0x56771222,
-        0xaba55687,
-    ];
-    let y2 = [
-        0xae25fc0a, 0xf9a003f9, 0xd8b63338, 0x3fbfb532, 0x25130d6f, 0x63d570f6, 0xaa365edb,
-        0xa0e75d87,
+        0xaba55687, 0xae25fc0a, 0xf9a003f9, 0xd8b63338, 0x3fbfb532, 0x25130d6f, 0x63d570f6,
+        0xaa365edb, 0xa0e75d87,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x2b038315, 0x9690d306, 0x69310e6f, 0x9cacc433, 0x9794b862, 0x1e4680e3, 0x56771222,
-        0xaba55687,
+        0xaba55687, 0xae25fc0a, 0xf9a003f9, 0xd8b63338, 0x3fbfb532, 0x25130d6f, 0x63d570f6,
+        0xaa365edb, 0xa0e75d87,
     ];
-    let y1 = [
-        0xae25fc0a, 0xf9a003f9, 0xd8b63338, 0x3fbfb532, 0x25130d6f, 0x63d570f6, 0xaa365edb,
-        0xa0e75d87,
-    ];
-    let x2 = [
+    let p2 = [
         0x95bc15b4, 0x9cb9a134, 0x465a2ee6, 0x9275028e, 0xced7ca8d, 0xed858ee9, 0x51eeadc9,
-        0x10e90e2e,
-    ];
-    let y2 = [
-        0x58aa258d, 0x34ebe609, 0x2bb6a88, 0x4ca58963, 0x16ad1f75, 0x4d57a8c6, 0x80d5e042,
-        0xc68a3703,
+        0x10e90e2e, 0x58aa258d, 0x34ebe609, 0x2bb6a88, 0x4ca58963, 0x16ad1f75, 0x4d57a8c6,
+        0x80d5e042, 0xc68a3703,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x95bc15b4, 0x9cb9a134, 0x465a2ee6, 0x9275028e, 0xced7ca8d, 0xed858ee9, 0x51eeadc9,
-        0x10e90e2e,
-    ];
-    let y1 = [
-        0x58aa258d, 0x34ebe609, 0x2bb6a88, 0x4ca58963, 0x16ad1f75, 0x4d57a8c6, 0x80d5e042,
-        0xc68a3703,
+        0x10e90e2e, 0x58aa258d, 0x34ebe609, 0x2bb6a88, 0x4ca58963, 0x16ad1f75, 0x4d57a8c6,
+        0x80d5e042, 0xc68a3703,
     ];
-    let x2 = [
-        0x7a1c0a80, 0xf62abc8, 0xc65a9c74, 0x4d625158, 0x2ff9c3, 0xb17c9be7, 0xa614cca5, 0xb6b15a68,
+    let p2 = [
+        0x7a1c0a80, 0xf62abc8, 0xc65a9c74, 0x4d625158, 0x2ff9c3, 0xb17c9be7, 0xa614cca5,
+        0xb6b15a68, 0x41ce0a03, 0xb6cd0110, 0x82e16ee, 0x9c9a12b3, 0xef6536d4, 0xa54e223e,
+        0xd6cdb61e, 0xfae62e14,
     ];
-    let y2 = [
-        0x41ce0a03, 0xb6cd0110, 0x82e16ee, 0x9c9a12b3, 0xef6536d4, 0xa54e223e, 0xd6cdb61e,
-        0xfae62e14,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
-        0x7a1c0a80, 0xf62abc8, 0xc65a9c74, 0x4d625158, 0x2ff9c3, 0xb17c9be7, 0xa614cca5, 0xb6b15a68,
-    ];
-    let y1 = [
-        0x41ce0a03, 0xb6cd0110, 0x82e16ee, 0x9c9a12b3, 0xef6536d4, 0xa54e223e, 0xd6cdb61e,
-        0xfae62e14,
+    let p1 = [
+        0x7a1c0a80, 0xf62abc8, 0xc65a9c74, 0x4d625158, 0x2ff9c3, 0xb17c9be7, 0xa614cca5,
+        0xb6b15a68, 0x41ce0a03, 0xb6cd0110, 0x82e16ee, 0x9c9a12b3, 0xef6536d4, 0xa54e223e,
+        0xd6cdb61e, 0xfae62e14,
     ];
-    let x2 = [
+    let p2 = [
         0x92b062d4, 0xa7caa50a, 0x9bb6a141, 0x7a5ce7e5, 0x83ea227a, 0x6fb1712, 0x3256eaca,
-        0x35963ea4,
+        0x35963ea4, 0xbbb25302, 0xa10aa4d1, 0x64de59b1, 0xd04082b9, 0xf9c08a96, 0xbfcce196,
+        0x4951e5c9, 0xf65be145,
     ];
-    let y2 = [
-        0xbbb25302, 0xa10aa4d1, 0x64de59b1, 0xd04082b9, 0xf9c08a96, 0xbfcce196, 0x4951e5c9,
-        0xf65be145,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x92b062d4, 0xa7caa50a, 0x9bb6a141, 0x7a5ce7e5, 0x83ea227a, 0x6fb1712, 0x3256eaca,
-        0x35963ea4,
-    ];
-    let y1 = [
-        0xbbb25302, 0xa10aa4d1, 0x64de59b1, 0xd04082b9, 0xf9c08a96, 0xbfcce196, 0x4951e5c9,
-        0xf65be145,
+        0x35963ea4, 0xbbb25302, 0xa10aa4d1, 0x64de59b1, 0xd04082b9, 0xf9c08a96, 0xbfcce196,
+        0x4951e5c9, 0xf65be145,
     ];
-    let x2 = [
+    let p2 = [
         0x1d33fd27, 0xfa0bf5c5, 0xb646cc62, 0x445f573d, 0xda82361b, 0xd022388e, 0x2263e84c,
-        0x9ed73f09,
-    ];
-    let y2 = [
-        0x2716c458, 0x5972b2de, 0xb2e44934, 0x94a823e5, 0x42467254, 0xee75b4f3, 0xebb1eeea,
-        0xb6318967,
+        0x9ed73f09, 0x2716c458, 0x5972b2de, 0xb2e44934, 0x94a823e5, 0x42467254, 0xee75b4f3,
+        0xebb1eeea, 0xb6318967,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x1d33fd27, 0xfa0bf5c5, 0xb646cc62, 0x445f573d, 0xda82361b, 0xd022388e, 0x2263e84c,
-        0x9ed73f09,
+        0x9ed73f09, 0x2716c458, 0x5972b2de, 0xb2e44934, 0x94a823e5, 0x42467254, 0xee75b4f3,
+        0xebb1eeea, 0xb6318967,
     ];
-    let y1 = [
-        0x2716c458, 0x5972b2de, 0xb2e44934, 0x94a823e5, 0x42467254, 0xee75b4f3, 0xebb1eeea,
-        0xb6318967,
-    ];
-    let x2 = [
+    let p2 = [
         0xef028d83, 0x579623ae, 0xba743961, 0x6195926d, 0x15de69db, 0x6a5abe5a, 0xe3c785ec,
-        0xa7ebf7c4,
-    ];
-    let y2 = [
-        0x99d0bed1, 0x9640392b, 0x4b053919, 0x47a38927, 0x7044804b, 0xcfd9c737, 0xbfe362d5,
-        0x6205152f,
+        0xa7ebf7c4, 0x99d0bed1, 0x9640392b, 0x4b053919, 0x47a38927, 0x7044804b, 0xcfd9c737,
+        0xbfe362d5, 0x6205152f,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xef028d83, 0x579623ae, 0xba743961, 0x6195926d, 0x15de69db, 0x6a5abe5a, 0xe3c785ec,
-        0xa7ebf7c4,
+        0xa7ebf7c4, 0x99d0bed1, 0x9640392b, 0x4b053919, 0x47a38927, 0x7044804b, 0xcfd9c737,
+        0xbfe362d5, 0x6205152f,
     ];
-    let y1 = [
-        0x99d0bed1, 0x9640392b, 0x4b053919, 0x47a38927, 0x7044804b, 0xcfd9c737, 0xbfe362d5,
-        0x6205152f,
-    ];
-    let x2 = [
+    let p2 = [
         0x7bb61ee5, 0xf2884413, 0xfb1f0c13, 0xda4f04e2, 0x8974ae6e, 0x662638cd, 0xcc8721b8,
-        0xd4933230,
-    ];
-    let y2 = [
-        0xe5d694a8, 0x662da4d0, 0x5a438ddc, 0x1ad12c8c, 0x1ecafb5e, 0xedcc5e9d, 0xf51a9d23,
-        0x21c09ab,
+        0xd4933230, 0xe5d694a8, 0x662da4d0, 0x5a438ddc, 0x1ad12c8c, 0x1ecafb5e, 0xedcc5e9d,
+        0xf51a9d23, 0x21c09ab,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x7bb61ee5, 0xf2884413, 0xfb1f0c13, 0xda4f04e2, 0x8974ae6e, 0x662638cd, 0xcc8721b8,
-        0xd4933230,
-    ];
-    let y1 = [
-        0xe5d694a8, 0x662da4d0, 0x5a438ddc, 0x1ad12c8c, 0x1ecafb5e, 0xedcc5e9d, 0xf51a9d23,
-        0x21c09ab,
+        0xd4933230, 0xe5d694a8, 0x662da4d0, 0x5a438ddc, 0x1ad12c8c, 0x1ecafb5e, 0xedcc5e9d,
+        0xf51a9d23, 0x21c09ab,
     ];
-    let x2 = [
+    let p2 = [
         0xec04554a, 0x530ddcbc, 0x4688cffe, 0xaadcffbb, 0x7a10a2ec, 0x474652c2, 0x9873d1a0,
-        0x896f37c8,
+        0x896f37c8, 0x929138df, 0xd68f9fe1, 0xacc417dc, 0xe6085b61, 0x4e811bf1, 0xda622bb0,
+        0x224ac4ac, 0x380423e7,
     ];
-    let y2 = [
-        0x929138df, 0xd68f9fe1, 0xacc417dc, 0xe6085b61, 0x4e811bf1, 0xda622bb0, 0x224ac4ac,
-        0x380423e7,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xec04554a, 0x530ddcbc, 0x4688cffe, 0xaadcffbb, 0x7a10a2ec, 0x474652c2, 0x9873d1a0,
-        0x896f37c8,
-    ];
-    let y1 = [
-        0x929138df, 0xd68f9fe1, 0xacc417dc, 0xe6085b61, 0x4e811bf1, 0xda622bb0, 0x224ac4ac,
-        0x380423e7,
+        0x896f37c8, 0x929138df, 0xd68f9fe1, 0xacc417dc, 0xe6085b61, 0x4e811bf1, 0xda622bb0,
+        0x224ac4ac, 0x380423e7,
     ];
-    let x2 = [
+    let p2 = [
         0xb43fca26, 0x84077d1a, 0xa3bc2367, 0x7dfb841d, 0xbf3578a2, 0xca6c209d, 0x774b6d6c,
-        0x11b3b97f,
+        0x11b3b97f, 0x5b679d58, 0xd3b27eaf, 0x4b9f9d42, 0x3bae231c, 0x2f36d3bb, 0x8cd5650c,
+        0xae600c50, 0x65331f9f,
     ];
-    let y2 = [
-        0x5b679d58, 0xd3b27eaf, 0x4b9f9d42, 0x3bae231c, 0x2f36d3bb, 0x8cd5650c, 0xae600c50,
-        0x65331f9f,
-    ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0xb43fca26, 0x84077d1a, 0xa3bc2367, 0x7dfb841d, 0xbf3578a2, 0xca6c209d, 0x774b6d6c,
-        0x11b3b97f,
-    ];
-    let y1 = [
-        0x5b679d58, 0xd3b27eaf, 0x4b9f9d42, 0x3bae231c, 0x2f36d3bb, 0x8cd5650c, 0xae600c50,
-        0x65331f9f,
+        0x11b3b97f, 0x5b679d58, 0xd3b27eaf, 0x4b9f9d42, 0x3bae231c, 0x2f36d3bb, 0x8cd5650c,
+        0xae600c50, 0x65331f9f,
     ];
-    let x2 = [
+    let p2 = [
         0x48dfd587, 0x79361bb, 0xc9b02656, 0x5ec4ba38, 0x2cf5a12d, 0x34867aaa, 0xacf4508b,
-        0x5084b41b,
-    ];
-    let y2 = [
-        0x91470e89, 0x6e79e97f, 0x6891f560, 0x5db6f560, 0x55292747, 0x619aa6c8, 0x1d980d31,
-        0x34a9631a,
+        0x5084b41b, 0x91470e89, 0x6e79e97f, 0x6891f560, 0x5db6f560, 0x55292747, 0x619aa6c8,
+        0x1d980d31, 0x34a9631a,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 
     ///////////////////////////////////////////////////////////////
 
-    let x1 = [
+    let p1 = [
         0x48dfd587, 0x79361bb, 0xc9b02656, 0x5ec4ba38, 0x2cf5a12d, 0x34867aaa, 0xacf4508b,
-        0x5084b41b,
+        0x5084b41b, 0x91470e89, 0x6e79e97f, 0x6891f560, 0x5db6f560, 0x55292747, 0x619aa6c8,
+        0x1d980d31, 0x34a9631a,
     ];
-    let y1 = [
-        0x91470e89, 0x6e79e97f, 0x6891f560, 0x5db6f560, 0x55292747, 0x619aa6c8, 0x1d980d31,
-        0x34a9631a,
-    ];
-    let x2 = [
+    let p2 = [
         0x6c953fa9, 0x4d05956d, 0xf0b8c3db, 0x28ab2629, 0x4bd18c06, 0x3a5f485d, 0xaaab9323,
-        0xa49ed10e,
-    ];
-    let y2 = [
-        0x46fb4c72, 0x67b2bd22, 0x968e181b, 0x5ae87534, 0xa0dfddfb, 0xe03476c0, 0x660f5398,
-        0xcc72b894,
+        0xa49ed10e, 0x46fb4c72, 0x67b2bd22, 0x968e181b, 0x5ae87534, 0xa0dfddfb, 0xe03476c0,
+        0x660f5398, 0xcc72b894,
     ];
-    assert_eq!(ec_double(x1, y1), (x2, y2));
+    assert_eq!(ec_double(p1), p2);
 }
diff --git a/std/machines/large_field/arith256_memory.asm b/std/machines/large_field/arith256_memory.asm
index 761d19e956..4b47c6b6e2 100644
--- a/std/machines/large_field/arith256_memory.asm
+++ b/std/machines/large_field/arith256_memory.asm
@@ -313,12 +313,20 @@ machine Arith256Memory(mem: Memory) with
     *
     *****/
 
-    link => byte2.check(sum(16, |i| x1[i] * CLK32[i]) + sum(16, |i| y1[i] * CLK32[16 + i]));
-    link => byte2.check(sum(16, |i| x2[i] * CLK32[i]) + sum(16, |i| y2[i] * CLK32[16 + i]));
-    link => byte2.check(sum(16, |i| x3[i] * CLK32[i]) + sum(16, |i| y3[i] * CLK32[16 + i]));
+    // The sums were extracted out of the checks because of a bug
+    // in the bus linker code that prepends the constraints with the current namespace.
+    // TODO Revert when that's fixed.
+    let range_arg1 = sum(16, |i| x1[i] * CLK32[i]) + sum(16, |i| y1[i] * CLK32[16 + i]);
+    link => byte2.check(range_arg1);
+    let range_arg2 = sum(16, |i| x2[i] * CLK32[i]) + sum(16, |i| y2[i] * CLK32[16 + i]);
+    link => byte2.check(range_arg2);
+    let range_arg3 = sum(16, |i| x3[i] * CLK32[i]) + sum(16, |i| y3[i] * CLK32[16 + i]);
+    link => byte2.check(range_arg3);
     // Note that for q0-q2, we only range-constrain the first 15 limbs here
-    link => byte2.check(sum(16, |i| s[i] * CLK32[i]) + sum(15, |i| q0[i] * CLK32[16 + i]));
-    link => byte2.check(sum(15, |i| q1[i] * CLK32[i]) + sum(15, |i| q2[i] * CLK32[16 + i]));
+    let range_arg4 = sum(15, |i| s[i] * CLK32[i]) + sum(15, |i| q0[i] * CLK32[16 + i]);
+    link => byte2.check(range_arg4);
+    let range_arg5 = sum(15, |i| q1[i] * CLK32[i]) + sum(15, |i| q2[i] * CLK32[16 + i]);
+    link => byte2.check(range_arg5);
 
     // The most significant limbs of q0-q2 are constrained to be 32 bits
     // In Polygon's version they are 19 bits, but that requires increasing the minimum degree

From 56d43c48b94d1e57b3504da5191c4a5dcb4dcc7e Mon Sep 17 00:00:00 2001
From: t <mtanmoy5086@gmail.com>
Date: Wed, 11 Dec 2024 20:48:04 +0600
Subject: [PATCH 52/57] cargo-powdr gitignore add on template (#2213)

https://github.com/powdr-labs/powdr/issues/2210
---
 cargo-powdr/src/main.rs                  | 2 ++
 cargo-powdr/template/.gitignore.template | 4 ++++
 2 files changed, 6 insertions(+)
 create mode 100644 cargo-powdr/template/.gitignore.template

diff --git a/cargo-powdr/src/main.rs b/cargo-powdr/src/main.rs
index add160db28..1db31ad8f9 100644
--- a/cargo-powdr/src/main.rs
+++ b/cargo-powdr/src/main.rs
@@ -35,6 +35,7 @@ const HOST_MAIN_TEMPLATE: &str = include_str!("../template/src/main.rs");
 const HOST_README_TEMPLATE: &str = include_str!("../template/README.md");
 const GUEST_CARGO_TOML_TEMPLATE: &str = include_str!("../template/guest/Cargo.toml.template");
 const GUEST_MAIN_TEMPLATE: &str = include_str!("../template/guest/src/main.rs");
+const GITIGNORE: &str = include_str!("../template/.gitignore.template");
 
 fn main() -> Result<(), io::Error> {
     let args = Cli::parse();
@@ -90,6 +91,7 @@ fn new_project(project_name: String, guest_name: String) -> Result<(), Error> {
         HOST_TOOLCHAIN_TEMPLATE,
     )?;
     create_file(&project_dir.join("README.md"), HOST_README_TEMPLATE)?;
+    create_file(&project_dir.join(".gitignore"), GITIGNORE)?;
     create_file(&src_dir.join("main.rs"), HOST_MAIN_TEMPLATE)?;
     create_file(&guest_dir.join("Cargo.toml"), GUEST_CARGO_TOML_TEMPLATE)?;
     create_file(&guest_src_dir.join("main.rs"), GUEST_MAIN_TEMPLATE)?;
diff --git a/cargo-powdr/template/.gitignore.template b/cargo-powdr/template/.gitignore.template
new file mode 100644
index 0000000000..eff815bb4b
--- /dev/null
+++ b/cargo-powdr/template/.gitignore.template
@@ -0,0 +1,4 @@
+target
+Cargo.lock
+guest/Cargo.lock
+powdr-target
\ No newline at end of file

From 598352a23d7f791124541788d8de5f637124b9d8 Mon Sep 17 00:00:00 2001
From: chriseth <chris@ethereum.org>
Date: Wed, 11 Dec 2024 18:06:18 +0100
Subject: [PATCH 53/57] Expressions and solving routines. (#2212)

This module is an equivalent of the existing affine_expression.rs, but
for compile-time execution on symbolic values instead of run-time
execution on concrete values.

Using the operators defined on that that type, you can build a
SymbolicAffineExpression from a polynomial identity and then use
`.solve()` to try to solve for one unknown variable. The result (instead
of a concrete assignment as in affine_expression.rs) is a
SymbolicExpression, i.e. a complex expression involving variables
(assumed to have a concrete value known at run time), constants and
certain operators on them.

The idea is that SymbolicAffineExpression is used on polynomial
identities in turn and solving for one cell after the other in the
trace. The resulting SymbolicExpression can be translated to rust or
pil.
---
 .../witgen/jit/affine_symbolic_expression.rs  | 562 ++++++++++++++++++
 executor/src/witgen/jit/mod.rs                |   2 +
 .../src/witgen/jit/symbolic_expression.rs     | 332 +++++++++++
 executor/src/witgen/mod.rs                    |   1 +
 executor/src/witgen/range_constraints.rs      |  11 +
 5 files changed, 908 insertions(+)
 create mode 100644 executor/src/witgen/jit/affine_symbolic_expression.rs
 create mode 100644 executor/src/witgen/jit/mod.rs
 create mode 100644 executor/src/witgen/jit/symbolic_expression.rs

diff --git a/executor/src/witgen/jit/affine_symbolic_expression.rs b/executor/src/witgen/jit/affine_symbolic_expression.rs
new file mode 100644
index 0000000000..afdf683bd7
--- /dev/null
+++ b/executor/src/witgen/jit/affine_symbolic_expression.rs
@@ -0,0 +1,562 @@
+use std::{
+    collections::BTreeMap,
+    fmt::{self, Display, Formatter},
+    ops::{Add, Mul, Neg, Sub},
+};
+
+use itertools::Itertools;
+use num_traits::Zero;
+use powdr_number::FieldElement;
+
+use crate::witgen::EvalError;
+
+use super::{super::range_constraints::RangeConstraint, symbolic_expression::SymbolicExpression};
+
+/// The effect of solving a symbolic equation.
+pub enum Effect<T: FieldElement, V> {
+    /// Variable can be assigned a value.
+    Assignment(V, SymbolicExpression<T, V>),
+    /// We learnt a new range constraint on variable.
+    RangeConstraint(V, RangeConstraint<T>),
+    /// A run-time assertion. If this fails, we have conflicting constraints.
+    Assertion(Assertion<T, V>),
+}
+
+/// A run-time assertion. If this fails, we have conflicting constraints.
+pub struct Assertion<T: FieldElement, V> {
+    pub lhs: SymbolicExpression<T, V>,
+    pub rhs: SymbolicExpression<T, V>,
+    /// If this is true, we assert that both sides are equal.
+    /// Otherwise, we assert that they are different.
+    pub expected_equal: bool,
+}
+
+impl<T: FieldElement, V> Assertion<T, V> {
+    pub fn assert_is_zero(condition: SymbolicExpression<T, V>) -> Effect<T, V> {
+        Self::assert_eq(condition, SymbolicExpression::from(T::from(0)))
+    }
+    pub fn assert_is_nonzero(condition: SymbolicExpression<T, V>) -> Effect<T, V> {
+        Self::assert_neq(condition, SymbolicExpression::from(T::from(0)))
+    }
+    pub fn assert_eq(lhs: SymbolicExpression<T, V>, rhs: SymbolicExpression<T, V>) -> Effect<T, V> {
+        Effect::Assertion(Assertion {
+            lhs,
+            rhs,
+            expected_equal: true,
+        })
+    }
+    pub fn assert_neq(
+        lhs: SymbolicExpression<T, V>,
+        rhs: SymbolicExpression<T, V>,
+    ) -> Effect<T, V> {
+        Effect::Assertion(Assertion {
+            lhs,
+            rhs,
+            expected_equal: false,
+        })
+    }
+}
+
+/// Represents an expression `a_1 * x_1 + ... + a_k * x_k + offset`,
+/// where the `a_i` and `offset` are symbolic expressions, i.e. values known at run-time
+/// (which can still include variables or symbols, which are only known at run-time),
+/// and the `x_i` are variables that are unknown at this point.
+/// It also stores range constraints for all unknown variables.
+#[derive(Debug, Clone)]
+pub struct AffineSymbolicExpression<T: FieldElement, V> {
+    coefficients: BTreeMap<V, SymbolicExpression<T, V>>,
+    offset: SymbolicExpression<T, V>,
+    range_constraints: BTreeMap<V, RangeConstraint<T>>,
+}
+
+/// Display for affine symbolic expressions, for informational purposes only.
+impl<T: FieldElement, V: Display> Display for AffineSymbolicExpression<T, V> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        if self.coefficients.is_empty() {
+            write!(f, "{}", self.offset)
+        } else {
+            write!(
+                f,
+                "{}",
+                self.coefficients
+                    .iter()
+                    .map(|(var, coeff)| if coeff.is_known_one() {
+                        var.to_string()
+                    } else if coeff.is_known_minus_one() {
+                        format!("-{var}")
+                    } else {
+                        format!("{coeff} * {var}")
+                    })
+                    .join(" + ")
+            )?;
+            if !self.offset.is_known_zero() {
+                write!(f, " + {}", self.offset)?;
+            }
+            Ok(())
+        }
+    }
+}
+
+impl<T: FieldElement, V> From<SymbolicExpression<T, V>> for AffineSymbolicExpression<T, V> {
+    fn from(k: SymbolicExpression<T, V>) -> Self {
+        AffineSymbolicExpression {
+            coefficients: Default::default(),
+            offset: k,
+            range_constraints: Default::default(),
+        }
+    }
+}
+
+impl<T: FieldElement, V> From<T> for AffineSymbolicExpression<T, V> {
+    fn from(k: T) -> Self {
+        SymbolicExpression::from(k).into()
+    }
+}
+
+impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
+    pub fn from_known_symbol(symbol: V, rc: Option<RangeConstraint<T>>) -> Self {
+        SymbolicExpression::from_symbol(symbol, rc).into()
+    }
+    pub fn from_unknown_variable(var: V, rc: Option<RangeConstraint<T>>) -> Self {
+        AffineSymbolicExpression {
+            coefficients: [(var.clone(), T::from(1).into())].into_iter().collect(),
+            offset: SymbolicExpression::from(T::from(0)),
+            range_constraints: rc.into_iter().map(|rc| (var.clone(), rc)).collect(),
+        }
+    }
+
+    /// If this expression does not contain unknown variables, returns the symbolic expression.
+    pub fn try_to_known(&self) -> Option<&SymbolicExpression<T, V>> {
+        if self.coefficients.is_empty() {
+            Some(&self.offset)
+        } else {
+            None
+        }
+    }
+
+    /// Tries to multiply this expression with another one.
+    /// Returns `None` if the result would be quadratic, i.e.
+    /// if both expressions contain unknown variables.
+    pub fn try_mul(&self, other: &Self) -> Option<Self> {
+        if let Some(multiplier) = other.try_to_known() {
+            Some(self.clone() * multiplier)
+        } else {
+            self.try_to_known()
+                .map(|multiplier| other.clone() * multiplier)
+        }
+    }
+
+    /// Solves the equation `self = 0` and returns how to compute the solution.
+    /// The solution can contain assignments to multiple variables.
+    /// If no way to solve the equation has been found, returns the empty vector.
+    /// If the equation is known to be unsolvable, returns an error.
+    pub fn solve(&self) -> Result<Vec<Effect<T, V>>, EvalError<T>> {
+        Ok(match self.coefficients.len() {
+            0 => {
+                if self.offset.is_known_nonzero() {
+                    return Err(EvalError::ConstraintUnsatisfiable(self.to_string()));
+                } else {
+                    vec![]
+                }
+            }
+            1 => {
+                let (var, coeff) = self.coefficients.iter().next().unwrap();
+                // Solve "coeff * X + self.offset = 0" by division.
+                assert!(
+                    !coeff.is_known_zero(),
+                    "Zero coefficient has not been removed."
+                );
+                if coeff.is_known_nonzero() {
+                    // In this case, we can always compute a solution.
+                    let value = self.offset.field_div(&-coeff);
+                    vec![Effect::Assignment(var.clone(), value)]
+                } else if self.offset.is_known_nonzero() {
+                    // If the offset is not zero, then the coefficient must be non-zero,
+                    // otherwise the constraint is violated.
+                    let value = self.offset.field_div(&-coeff);
+                    vec![
+                        Assertion::assert_is_nonzero(coeff.clone()),
+                        Effect::Assignment(var.clone(), value),
+                    ]
+                } else {
+                    // If this case, we could have an equation of the form
+                    // 0 * X = 0, which is valid and generates no information about X.
+                    vec![]
+                }
+            }
+            _ => {
+                let r = self.solve_bit_decomposition();
+                if !r.is_empty() {
+                    r
+                } else {
+                    let negated = -self;
+                    let r = negated.solve_bit_decomposition();
+                    if !r.is_empty() {
+                        r
+                    } else {
+                        self.transfer_constraints()
+                            .into_iter()
+                            .chain(negated.transfer_constraints())
+                            .collect()
+                    }
+                }
+            }
+        })
+    }
+
+    /// Tries to solve a bit-decomposition equation.
+    fn solve_bit_decomposition(&self) -> Vec<Effect<T, V>> {
+        // All the coefficients need to be known numbers and the
+        // variables need to be range-constrained.
+        let constrained_coefficients = self
+            .coefficients
+            .iter()
+            .map(|(var, coeff)| {
+                let c = coeff.try_to_number()?;
+                let rc = self.range_constraints.get(var)?;
+                Some((var.clone(), c, rc))
+            })
+            .collect::<Option<Vec<_>>>();
+        let Some(constrained_coefficients) = constrained_coefficients else {
+            return vec![];
+        };
+
+        // Check if they are mutually exclusive and compute assignments.
+        let mut covered_bits: <T as FieldElement>::Integer = 0.into();
+        let mut effects = vec![];
+        for (var, coeff, constraint) in constrained_coefficients {
+            let mask = *constraint.multiple(coeff).mask();
+            if !(mask & covered_bits).is_zero() {
+                // Overlapping range constraints.
+                return vec![];
+            } else {
+                covered_bits |= mask;
+            }
+            let masked = -&self.offset & T::from(mask).into();
+            effects.push(Effect::Assignment(
+                var.clone(),
+                masked.integer_div(&coeff.into()),
+            ));
+        }
+
+        if covered_bits >= T::modulus() {
+            return vec![];
+        }
+
+        // We need to assert that the masks cover the offset,
+        // otherwise the equation is not solvable.
+        // We assert offset & !masks == 0 <=> offset == offset | masks.
+        // We use the latter since we cannot properly bit-negate inside the field.
+        effects.push(Assertion::assert_eq(
+            self.offset.clone(),
+            &self.offset | &T::from(covered_bits).into(),
+        ));
+
+        effects
+    }
+
+    fn transfer_constraints(&self) -> Vec<Effect<T, V>> {
+        // We are looking for X = a * Y + b * Z + ... or -X = a * Y + b * Z + ...
+        // where X is least constrained.
+
+        let Some((solve_for, solve_for_coefficient)) = self
+            .coefficients
+            .iter()
+            .filter(|(_var, coeff)| coeff.is_known_one() || coeff.is_known_minus_one())
+            .max_by_key(|(var, _c)| {
+                // Sort so that we get the least constrained variable.
+                self.range_constraints
+                    .get(var)
+                    .map(|c| c.range_width())
+                    .unwrap_or_else(|| T::modulus())
+            })
+        else {
+            return vec![];
+        };
+
+        // This only works if the coefficients are all known.
+        let Some(summands) = self
+            .coefficients
+            .iter()
+            .filter(|(var, _)| *var != solve_for)
+            .map(|(var, coeff)| {
+                let coeff = coeff.try_to_number()?;
+                let rc = self.range_constraints.get(var)?;
+                Some(rc.multiple(coeff))
+            })
+            .chain(std::iter::once(self.offset.range_constraint()))
+            .collect::<Option<Vec<_>>>()
+        else {
+            return vec![];
+        };
+        let Some(constraint) = summands.into_iter().reduce(|c1, c2| c1.combine_sum(&c2)) else {
+            return vec![];
+        };
+        let constraint = if solve_for_coefficient.is_known_one() {
+            -constraint
+        } else {
+            constraint
+        };
+        vec![Effect::RangeConstraint(solve_for.clone(), constraint)]
+    }
+}
+
+impl<T: FieldElement, V: Clone + Ord> Add for &AffineSymbolicExpression<T, V> {
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        let mut coefficients = self.coefficients.clone();
+        let mut range_constraints = self.range_constraints.clone();
+        for (var, coeff) in &rhs.coefficients {
+            coefficients
+                .entry(var.clone())
+                .and_modify(|f| *f = &*f + coeff)
+                .or_insert_with(|| coeff.clone());
+            if let Some(range_right) = rhs.range_constraints.get(var) {
+                range_constraints
+                    .entry(var.clone())
+                    .and_modify(|rc| *rc = rc.conjunction(range_right))
+                    .or_insert_with(|| range_right.clone());
+            }
+        }
+        coefficients.retain(|_, f| !f.is_known_zero());
+        let offset = &self.offset + &rhs.offset;
+        AffineSymbolicExpression {
+            coefficients,
+            offset,
+            range_constraints,
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone + Ord> Add for AffineSymbolicExpression<T, V> {
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        &self + &rhs
+    }
+}
+
+impl<T: FieldElement, V: Clone + Ord> Sub for &AffineSymbolicExpression<T, V> {
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self + &-rhs
+    }
+}
+
+impl<T: FieldElement, V: Clone + Ord> Sub for AffineSymbolicExpression<T, V> {
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        &self - &rhs
+    }
+}
+
+impl<T: FieldElement, V: Clone + Ord> Neg for &AffineSymbolicExpression<T, V> {
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn neg(self) -> Self::Output {
+        AffineSymbolicExpression {
+            coefficients: self
+                .coefficients
+                .iter()
+                .map(|(var, coeff)| (var.clone(), -coeff))
+                .collect(),
+            offset: -&self.offset,
+            range_constraints: self.range_constraints.clone(),
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone + Ord> Neg for AffineSymbolicExpression<T, V> {
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn neg(self) -> Self::Output {
+        -&self
+    }
+}
+
+/// Multiply by known symbolic expression.
+impl<T: FieldElement, V: Clone + Ord> Mul<&SymbolicExpression<T, V>>
+    for AffineSymbolicExpression<T, V>
+{
+    type Output = AffineSymbolicExpression<T, V>;
+
+    fn mul(mut self, rhs: &SymbolicExpression<T, V>) -> Self::Output {
+        for coeff in self.coefficients.values_mut() {
+            *coeff = &*coeff * rhs;
+        }
+        self.offset = &self.offset * rhs;
+        self
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use powdr_number::GoldilocksField;
+
+    use super::*;
+
+    type Ase = AffineSymbolicExpression<GoldilocksField, &'static str>;
+
+    fn from_number(x: i32) -> Ase {
+        GoldilocksField::from(x).into()
+    }
+
+    fn mul(a: &Ase, b: &Ase) -> Ase {
+        a.try_mul(b).unwrap()
+    }
+
+    #[test]
+    fn unsolvable() {
+        let r = from_number(10).solve();
+        assert!(r.is_err());
+    }
+
+    #[test]
+    fn unsolvable_with_vars() {
+        let x = &Ase::from_known_symbol("X", None);
+        let y = &Ase::from_known_symbol("Y", None);
+        let constr = x + y - from_number(10);
+        // We cannot solve it but also cannot know it is unsolvable.
+        assert!(constr.solve().unwrap().is_empty());
+        // But if we know the values, we can be sure there is a conflict.
+        assert!(from_number(10).solve().is_err());
+    }
+
+    #[test]
+    fn solvable_without_vars() {
+        let constr = &from_number(0);
+        assert!(constr.solve().unwrap().is_empty());
+    }
+
+    #[test]
+    fn solve_simple_eq() {
+        let y = Ase::from_known_symbol("y", None);
+        let x = Ase::from_unknown_variable("X", None);
+        // 2 * X + 7 * y - 10 = 0
+        let two = from_number(2);
+        let seven = from_number(7);
+        let ten = from_number(10);
+        let constr = mul(&two, &x) + mul(&seven, &y) - ten;
+        let effects = constr.solve().unwrap();
+        assert_eq!(effects.len(), 1);
+        let Effect::Assignment(var, expr) = &effects[0] else {
+            panic!("Expected assignment");
+        };
+        assert_eq!(var.to_string(), "X");
+        assert_eq!(expr.to_string(), "(((7 * y) + -10) / -2)");
+    }
+
+    #[test]
+    fn solve_div_by_range_constrained_var() {
+        let y = Ase::from_known_symbol("y", None);
+        let z = Ase::from_known_symbol("z", None);
+        let x = Ase::from_unknown_variable("X", None);
+        // z * X + 7 * y - 10 = 0
+        let seven = from_number(7);
+        let ten = from_number(10);
+        let constr = mul(&z, &x) + mul(&seven, &y) - ten.clone();
+        // If we do not range-constrain z, we cannot solve since we don't know if it might be zero.
+        let effects = constr.solve().unwrap();
+        assert_eq!(effects.len(), 0);
+        let z =
+            Ase::from_known_symbol("z", Some(RangeConstraint::from_range(10.into(), 20.into())));
+        let constr = mul(&z, &x) + mul(&seven, &y) - ten;
+        let effects = constr.solve().unwrap();
+        let Effect::Assignment(var, expr) = &effects[0] else {
+            panic!("Expected assignment");
+        };
+        assert_eq!(var.to_string(), "X");
+        assert_eq!(expr.to_string(), "(((7 * y) + -10) / -z)");
+    }
+
+    #[test]
+    fn solve_bit_decomposition() {
+        let rc = Some(RangeConstraint::from_mask(0xffu32));
+        // First try without range constrain on a
+        let a = Ase::from_unknown_variable("a", None);
+        let b = Ase::from_unknown_variable("b", rc.clone());
+        let c = Ase::from_unknown_variable("c", rc.clone());
+        let z = Ase::from_known_symbol("Z", None);
+        // a * 0x100 + b * 0x10000 + c * 0x1000000 + 10 + Z = 0
+        let ten = from_number(10);
+        let constr = mul(&a, &from_number(0x100))
+            + mul(&b, &from_number(0x10000))
+            + mul(&c, &from_number(0x1000000))
+            + ten.clone()
+            + z.clone();
+        // Without range constraints, this is not solvable.
+        assert!(constr.solve().unwrap().is_empty());
+        // Now add the range constraint on a, it should be solvable.
+        let a = Ase::from_unknown_variable("a", rc.clone());
+        let constr = mul(&a, &from_number(0x100))
+            + mul(&b, &from_number(0x10000))
+            + mul(&c, &from_number(0x1000000))
+            + ten.clone()
+            + z;
+        let effects = constr
+            .solve()
+            .unwrap()
+            .into_iter()
+            .map(|effect| match effect {
+                Effect::Assignment(v, expr) => format!("{v} = {expr};\n"),
+                Effect::Assertion(Assertion {
+                    lhs,
+                    rhs,
+                    expected_equal,
+                }) => {
+                    format!(
+                        "assert {lhs} {} {rhs};\n",
+                        if expected_equal { "==" } else { "!=" }
+                    )
+                }
+                _ => panic!(),
+            })
+            .format("")
+            .to_string();
+        assert_eq!(
+            effects,
+            "a = ((-(10 + Z) & 65280) // 256);
+b = ((-(10 + Z) & 16711680) // 65536);
+c = ((-(10 + Z) & 4278190080) // 16777216);
+assert (10 + Z) == ((10 + Z) | 4294967040);
+"
+        );
+    }
+
+    #[test]
+    fn solve_constraint_transfer() {
+        let rc = Some(RangeConstraint::from_mask(0xffu32));
+        let a = Ase::from_unknown_variable("a", rc.clone());
+        let b = Ase::from_unknown_variable("b", rc.clone());
+        let c = Ase::from_unknown_variable("c", rc.clone());
+        let z = Ase::from_unknown_variable("Z", None);
+        // a * 0x100 + b * 0x10000 + c * 0x1000000 + 10 - Z = 0
+        let ten = from_number(10);
+        let constr = mul(&a, &from_number(0x100))
+            + mul(&b, &from_number(0x10000))
+            + mul(&c, &from_number(0x1000000))
+            + ten
+            - z;
+        let effects = constr
+            .solve()
+            .unwrap()
+            .into_iter()
+            .map(|effect| match effect {
+                Effect::RangeConstraint(v, rc) => format!("{v}: {rc};\n"),
+                _ => panic!(),
+            })
+            .format("")
+            .to_string();
+        // It appears twice because we solve the positive and the negated equation.
+        // Note that the negated version has a different bit mask.
+        assert_eq!(
+            effects,
+            "Z: [10, 4294967050] & 0xffffff0a;
+Z: [10, 4294967050] & 0xffffffff;
+"
+        );
+    }
+}
diff --git a/executor/src/witgen/jit/mod.rs b/executor/src/witgen/jit/mod.rs
new file mode 100644
index 0000000000..a6d9dabf39
--- /dev/null
+++ b/executor/src/witgen/jit/mod.rs
@@ -0,0 +1,2 @@
+mod affine_symbolic_expression;
+mod symbolic_expression;
diff --git a/executor/src/witgen/jit/symbolic_expression.rs b/executor/src/witgen/jit/symbolic_expression.rs
new file mode 100644
index 0000000000..8f3ddd470f
--- /dev/null
+++ b/executor/src/witgen/jit/symbolic_expression.rs
@@ -0,0 +1,332 @@
+use std::{
+    fmt::{self, Display, Formatter},
+    ops::{Add, BitAnd, BitOr, Mul, Neg},
+    rc::Rc,
+};
+
+use powdr_number::FieldElement;
+
+use crate::witgen::range_constraints::RangeConstraint;
+
+/// A value that is known at run-time, defined through a complex expression
+/// involving known cells or variables and compile-time constants.
+/// Each of the sub-expressions can have its own range constraint.
+#[derive(Debug, Clone)]
+pub enum SymbolicExpression<T: FieldElement, S> {
+    /// A concrete constant value known at compile time.
+    Concrete(T),
+    /// A symbolic value known at run-time, referencing a cell,
+    /// an input, a local variable or whatever it is used for.
+    Symbol(S, Option<RangeConstraint<T>>),
+    BinaryOperation(
+        Rc<Self>,
+        BinaryOperator,
+        Rc<Self>,
+        Option<RangeConstraint<T>>,
+    ),
+    UnaryOperation(UnaryOperator, Rc<Self>, Option<RangeConstraint<T>>),
+}
+
+#[derive(Debug, Clone)]
+pub enum BinaryOperator {
+    Add,
+    Sub,
+    Mul,
+    /// Finite field division.
+    Div,
+    /// Integer division, i.e. convert field elements to unsigned integer and divide.
+    IntegerDiv,
+    BitAnd,
+    BitOr,
+}
+
+#[derive(Debug, Clone)]
+pub enum UnaryOperator {
+    Neg,
+}
+
+impl<T: FieldElement, S> SymbolicExpression<T, S> {
+    pub fn from_symbol(symbol: S, rc: Option<RangeConstraint<T>>) -> Self {
+        SymbolicExpression::Symbol(symbol, rc)
+    }
+
+    pub fn is_known_zero(&self) -> bool {
+        self.try_to_number().map_or(false, |n| n.is_zero())
+    }
+
+    pub fn is_known_one(&self) -> bool {
+        self.try_to_number().map_or(false, |n| n.is_one())
+    }
+
+    pub fn is_known_minus_one(&self) -> bool {
+        self.try_to_number().map_or(false, |n| n == -T::from(1))
+    }
+
+    pub fn is_known_nonzero(&self) -> bool {
+        // Only checking range constraint is enough since if this is a known
+        // fixed value, we will get a range constraint with just a single value.
+        if let Some(rc) = self.range_constraint() {
+            !rc.allows_value(0.into())
+        } else {
+            // unknown
+            false
+        }
+    }
+
+    pub fn range_constraint(&self) -> Option<RangeConstraint<T>> {
+        match self {
+            SymbolicExpression::Concrete(v) => Some(RangeConstraint::from_value(*v)),
+            SymbolicExpression::Symbol(.., rc)
+            | SymbolicExpression::BinaryOperation(.., rc)
+            | SymbolicExpression::UnaryOperation(.., rc) => rc.clone(),
+        }
+    }
+
+    pub fn try_to_number(&self) -> Option<T> {
+        match self {
+            SymbolicExpression::Concrete(n) => Some(*n),
+            SymbolicExpression::Symbol(..)
+            | SymbolicExpression::BinaryOperation(..)
+            | SymbolicExpression::UnaryOperation(..) => None,
+        }
+    }
+}
+
+/// Display for affine symbolic expressions, for informational purposes only.
+impl<T: FieldElement, V: Display> Display for SymbolicExpression<T, V> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            SymbolicExpression::Concrete(n) => {
+                if n.is_in_lower_half() {
+                    write!(f, "{n}")
+                } else {
+                    write!(f, "-{}", -*n)
+                }
+            }
+            SymbolicExpression::Symbol(name, _) => write!(f, "{name}"),
+            SymbolicExpression::BinaryOperation(lhs, op, rhs, _) => {
+                write!(f, "({lhs} {op} {rhs})")
+            }
+            SymbolicExpression::UnaryOperation(op, expr, _) => write!(f, "{op}{expr}"),
+        }
+    }
+}
+
+impl Display for BinaryOperator {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            BinaryOperator::Add => write!(f, "+"),
+            BinaryOperator::Sub => write!(f, "-"),
+            BinaryOperator::Mul => write!(f, "*"),
+            BinaryOperator::Div => write!(f, "/"),
+            BinaryOperator::IntegerDiv => write!(f, "//"),
+            BinaryOperator::BitAnd => write!(f, "&"),
+            BinaryOperator::BitOr => write!(f, "|"),
+        }
+    }
+}
+
+impl Display for UnaryOperator {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            UnaryOperator::Neg => write!(f, "-"),
+        }
+    }
+}
+
+impl<T: FieldElement, V> From<T> for SymbolicExpression<T, V> {
+    fn from(n: T) -> Self {
+        SymbolicExpression::Concrete(n)
+    }
+}
+
+impl<T: FieldElement, V: Clone> Add for &SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn add(self, rhs: Self) -> Self::Output {
+        if self.is_known_zero() {
+            return rhs.clone();
+        }
+        if rhs.is_known_zero() {
+            return self.clone();
+        }
+        match (self, rhs) {
+            (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) => {
+                SymbolicExpression::Concrete(*a + *b)
+            }
+            _ => SymbolicExpression::BinaryOperation(
+                Rc::new(self.clone()),
+                BinaryOperator::Add,
+                Rc::new(rhs.clone()),
+                self.range_constraint()
+                    .zip(rhs.range_constraint())
+                    .map(|(a, b)| a.combine_sum(&b)),
+            ),
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone> Add for SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+    fn add(self, rhs: Self) -> Self::Output {
+        &self + &rhs
+    }
+}
+
+impl<T: FieldElement, V: Clone> Neg for &SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn neg(self) -> Self::Output {
+        match self {
+            SymbolicExpression::Concrete(n) => SymbolicExpression::Concrete(-*n),
+            SymbolicExpression::UnaryOperation(UnaryOperator::Neg, expr, _) => {
+                expr.as_ref().clone()
+            }
+            _ => SymbolicExpression::UnaryOperation(
+                UnaryOperator::Neg,
+                Rc::new(self.clone()),
+                self.range_constraint().map(|rc| rc.multiple(-T::from(1))),
+            ),
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone> Neg for SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+    fn neg(self) -> Self::Output {
+        -&self
+    }
+}
+
+impl<T: FieldElement, V: Clone> Mul for &SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn mul(self, rhs: Self) -> Self::Output {
+        if let (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) = (self, rhs) {
+            SymbolicExpression::Concrete(*a * *b)
+        } else if self.is_known_zero() || rhs.is_known_zero() {
+            SymbolicExpression::Concrete(T::from(0))
+        } else if self.is_known_one() {
+            rhs.clone()
+        } else if rhs.is_known_one() {
+            self.clone()
+        } else if self.is_known_minus_one() {
+            -rhs
+        } else if rhs.is_known_minus_one() {
+            -self
+        } else {
+            SymbolicExpression::BinaryOperation(
+                Rc::new(self.clone()),
+                BinaryOperator::Mul,
+                Rc::new(rhs.clone()),
+                None,
+            )
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone> Mul for SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+    fn mul(self, rhs: Self) -> Self {
+        &self * &rhs
+    }
+}
+
+impl<T: FieldElement, V: Clone> SymbolicExpression<T, V> {
+    /// Field element division. See `integer_div` for integer division.
+    /// If you use this, you must ensure that the divisor is not zero.
+    pub fn field_div(&self, rhs: &Self) -> Self {
+        if let (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) = (self, rhs) {
+            assert!(b != &T::from(0));
+            SymbolicExpression::Concrete(*a / *b)
+        } else if self.is_known_zero() {
+            SymbolicExpression::Concrete(T::from(0))
+        } else if rhs.is_known_one() {
+            self.clone()
+        } else if rhs.is_known_minus_one() {
+            -self
+        } else {
+            // TODO other simplifications like `-x / -y => x / y`, `-x / concrete => x / -concrete`, etc.
+            SymbolicExpression::BinaryOperation(
+                Rc::new(self.clone()),
+                BinaryOperator::Div,
+                Rc::new(rhs.clone()),
+                None,
+            )
+        }
+    }
+
+    /// Integer division, i.e. convert field elements to unsigned integer and divide.
+    pub fn integer_div(&self, rhs: &Self) -> Self {
+        if rhs.is_known_one() {
+            self.clone()
+        } else {
+            SymbolicExpression::BinaryOperation(
+                Rc::new(self.clone()),
+                BinaryOperator::IntegerDiv,
+                Rc::new(rhs.clone()),
+                None,
+            )
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone> BitAnd for &SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn bitand(self, rhs: Self) -> Self::Output {
+        if let (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) = (self, rhs) {
+            SymbolicExpression::Concrete(T::from(a.to_integer() & b.to_integer()))
+        } else if self.is_known_zero() || rhs.is_known_zero() {
+            SymbolicExpression::Concrete(T::from(0))
+        } else {
+            SymbolicExpression::BinaryOperation(
+                Rc::new(self.clone()),
+                BinaryOperator::BitAnd,
+                Rc::new(rhs.clone()),
+                self.range_constraint()
+                    .zip(rhs.range_constraint())
+                    .map(|(a, b)| a.conjunction(&b)),
+            )
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone> BitAnd for SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn bitand(self, rhs: Self) -> Self::Output {
+        &self & &rhs
+    }
+}
+
+impl<T: FieldElement, V: Clone> BitOr for &SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn bitor(self, rhs: Self) -> Self::Output {
+        if let (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) = (self, rhs) {
+            let v = a.to_integer() | b.to_integer();
+            assert!(v < T::modulus());
+            SymbolicExpression::Concrete(T::from(v))
+        } else if self.is_known_zero() {
+            rhs.clone()
+        } else if rhs.is_known_zero() {
+            self.clone()
+        } else {
+            SymbolicExpression::BinaryOperation(
+                Rc::new(self.clone()),
+                BinaryOperator::BitOr,
+                Rc::new(rhs.clone()),
+                None,
+            )
+        }
+    }
+}
+
+impl<T: FieldElement, V: Clone> BitOr for SymbolicExpression<T, V> {
+    type Output = SymbolicExpression<T, V>;
+
+    fn bitor(self, rhs: Self) -> Self::Output {
+        &self | &rhs
+    }
+}
diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
index 61cfe6bd10..8b5c3f52af 100644
--- a/executor/src/witgen/mod.rs
+++ b/executor/src/witgen/mod.rs
@@ -34,6 +34,7 @@ mod eval_result;
 pub mod evaluators;
 mod global_constraints;
 mod identity_processor;
+mod jit;
 mod machines;
 mod processor;
 mod query_processor;
diff --git a/executor/src/witgen/range_constraints.rs b/executor/src/witgen/range_constraints.rs
index 8e6d9748a8..cdd76a76a0 100644
--- a/executor/src/witgen/range_constraints.rs
+++ b/executor/src/witgen/range_constraints.rs
@@ -82,6 +82,17 @@ impl<T: FieldElement> RangeConstraint<T> {
         range_width(self.min, self.max)
     }
 
+    /// Returns true if `v` is an allowed value for this range constraint.
+    pub fn allows_value(&self, v: T) -> bool {
+        let in_range = if self.min <= self.max {
+            self.min <= v && v <= self.max
+        } else {
+            v <= self.min || self.max <= v
+        };
+        let in_mask = v.to_integer() & self.mask == v.to_integer();
+        in_range && in_mask
+    }
+
     /// The range constraint of the sum of two expressions.
     pub fn combine_sum(&self, other: &Self) -> Self {
         // TODO we could use "add_with_carry" to see if this created an overflow.

From ad409564caae4aa407abf10170783ce16ce27534 Mon Sep 17 00:00:00 2001
From: ShuangWu121 <47602565+ShuangWu121@users.noreply.github.com>
Date: Wed, 11 Dec 2024 18:45:46 +0100
Subject: [PATCH 54/57] Add support for constant column in STWO backend (#2112)

# Add Support for Constant Columns in Stwo Backend

Stwo's recent development introduced new APIs to support
constant/pre-processed columns. However, the `dev` branch of Stwo is
still using an older nightly toolchain, which is incompatible with
Powdr.

Currently, Stwo has two open PRs
[PR1](https://github.com/starkware-libs/stwo/pull/847),
[PR2](https://github.com/starkware-libs/stwo/pull/871) aimed at updating
the toolchain to nightly `11-06`. Previously, our Stwo backend
dependency relied on one of these PRs' branches. However, this branch is
no longer updated with the latest commits from their `dev` branch. These
new commits are required to support constant columns.

### Temporary Solution
To address this issue temporarily:
- I moved Stwo's dependency to my fork of Stwo, where the branch is
updated with both the latest `dev` branch commits and the newer
toolchain.

---

### Tasks in this PR
1. **Add APIs to support constant columns**:
2. **Update test cases**:
   - Modify and add test cases to validate constant column support.

---

---------

Co-authored-by: Thibaut Schaeffer <schaeffer.thibaut@gmail.com>
---
 backend/Cargo.toml                       |   2 +-
 backend/src/stwo/circuit_builder.rs      | 166 +++++++++---
 backend/src/stwo/mod.rs                  |  31 ++-
 backend/src/stwo/proof.rs                | 126 +++++++++
 backend/src/stwo/prover.rs               | 310 +++++++++++++++++++----
 pipeline/src/test_util.rs                |  22 ++
 pipeline/tests/pil.rs                    |  50 +++-
 test_data/pil/fibo_no_publics.pil        |  13 +
 test_data/pil/fixed_with_incremental.pil |  13 +
 test_data/pil/fixed_with_next.pil        |  14 +
 test_data/pil/incremental_one.pil        |  11 +
 11 files changed, 658 insertions(+), 100 deletions(-)
 create mode 100644 backend/src/stwo/proof.rs
 create mode 100644 test_data/pil/fibo_no_publics.pil
 create mode 100644 test_data/pil/fixed_with_incremental.pil
 create mode 100644 test_data/pil/fixed_with_next.pil
 create mode 100644 test_data/pil/incremental_one.pil

diff --git a/backend/Cargo.toml b/backend/Cargo.toml
index 9137f73492..d4fd6768f3 100644
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -63,7 +63,7 @@ p3-commit = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf2
 p3-matrix = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf28e7359dd2c577447886463e6124f0", optional = true }
 p3-uni-stark = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf28e7359dd2c577447886463e6124f0", optional = true }
 # TODO: Change this to main branch when the `andrew/dev/update-toolchain` branch is merged,the main branch is using "nightly-2024-01-04", not compatiable with plonky3
-stwo-prover = { git = "https://github.com/starkware-libs/stwo.git", optional = true, rev = "e6d10bc107c11cce54bb4aa152c3afa2e15e92c1" }
+stwo-prover = { git = "https://github.com/ShuangWu121/stwo.git", optional = true, rev = "564a4ddcde376ba0ae78da4d86ea5ad7338ef6fe",features = ["parallel"] }
 
 strum = { version = "0.24.1", features = ["derive"] }
 log = "0.4.17"
diff --git a/backend/src/stwo/circuit_builder.rs b/backend/src/stwo/circuit_builder.rs
index fe20a65071..e7c07943ae 100644
--- a/backend/src/stwo/circuit_builder.rs
+++ b/backend/src/stwo/circuit_builder.rs
@@ -1,58 +1,68 @@
 use num_traits::Zero;
+use powdr_ast::parsed::visitor::AllChildren;
+use std::collections::HashSet;
 use std::fmt::Debug;
 use std::ops::{Add, AddAssign, Mul, Neg, Sub};
+use std::sync::Arc;
 
 extern crate alloc;
-use alloc::{collections::btree_map::BTreeMap, string::String, vec::Vec};
+use alloc::collections::btree_map::BTreeMap;
 use powdr_ast::analyzed::{
-    AlgebraicBinaryOperation, AlgebraicBinaryOperator, AlgebraicExpression, Analyzed, Identity,
+    AlgebraicBinaryOperation, AlgebraicBinaryOperator, AlgebraicExpression, AlgebraicReference,
+    Analyzed, Identity,
 };
 use powdr_number::{FieldElement, LargeInt};
-use std::sync::Arc;
 
 use powdr_ast::analyzed::{
     AlgebraicUnaryOperation, AlgebraicUnaryOperator, PolyID, PolynomialType,
 };
-use stwo_prover::constraint_framework::{EvalAtRow, FrameworkComponent, FrameworkEval};
-use stwo_prover::core::backend::ColumnOps;
+use stwo_prover::constraint_framework::preprocessed_columns::PreprocessedColumn;
+use stwo_prover::constraint_framework::{
+    EvalAtRow, FrameworkComponent, FrameworkEval, ORIGINAL_TRACE_IDX,
+};
+use stwo_prover::core::backend::{Column, ColumnOps};
 use stwo_prover::core::fields::m31::{BaseField, M31};
 use stwo_prover::core::fields::{ExtensionOf, FieldExpOps, FieldOps};
-use stwo_prover::core::poly::circle::{CanonicCoset, CircleEvaluation};
+use stwo_prover::core::poly::circle::{CircleDomain, CircleEvaluation};
 use stwo_prover::core::poly::BitReversedOrder;
-use stwo_prover::core::ColumnVec;
+use stwo_prover::core::utils::{bit_reverse_index, coset_index_to_circle_domain_index};
 
 pub type PowdrComponent<'a, F> = FrameworkComponent<PowdrEval<F>>;
 
-pub(crate) fn gen_stwo_circuit_trace<T, B, F>(
-    witness: &[(String, Vec<T>)],
-) -> ColumnVec<CircleEvaluation<B, BaseField, BitReversedOrder>>
+pub fn gen_stwo_circle_column<T, B, F>(
+    domain: CircleDomain,
+    slice: &[T],
+) -> CircleEvaluation<B, BaseField, BitReversedOrder>
 where
-    T: FieldElement, //only Merenne31Field is supported, checked in runtime
-    B: FieldOps<M31> + ColumnOps<F>, // Ensure B implements FieldOps for M31
+    T: FieldElement,
+    B: FieldOps<M31> + ColumnOps<F>,
+
     F: ExtensionOf<BaseField>,
 {
     assert!(
-        witness
-            .iter()
-            .all(|(_name, vec)| vec.len() == witness[0].1.len()),
-        "All Vec<T> in witness must have the same length. Mismatch found!"
+        slice.len().ilog2() == domain.size().ilog2(),
+        "column size must be equal to domain size"
     );
-    let domain = CanonicCoset::new(witness[0].1.len().ilog2()).circle_domain();
-    witness
-        .iter()
-        .map(|(_name, values)| {
-            let values = values
-                .iter()
-                .map(|v| v.try_into_i32().unwrap().into())
-                .collect();
-            CircleEvaluation::new(domain, values)
-        })
-        .collect()
+    let mut column: <B as ColumnOps<M31>>::Column =
+        <B as ColumnOps<M31>>::Column::zeros(slice.len());
+    slice.iter().enumerate().for_each(|(i, v)| {
+        column.set(
+            bit_reverse_index(
+                coset_index_to_circle_domain_index(i, slice.len().ilog2()),
+                slice.len().ilog2(),
+            ),
+            v.try_into_i32().unwrap().into(),
+        );
+    });
+
+    CircleEvaluation::new(domain, column)
 }
 
 pub struct PowdrEval<T> {
     analyzed: Arc<Analyzed<T>>,
     witness_columns: BTreeMap<PolyID, usize>,
+    constant_shifted: BTreeMap<PolyID, usize>,
+    constant_columns: BTreeMap<PolyID, usize>,
 }
 
 impl<T: FieldElement> PowdrEval<T> {
@@ -64,9 +74,28 @@ impl<T: FieldElement> PowdrEval<T> {
             .map(|(index, (_, id))| (id, index))
             .collect();
 
+        let constant_with_next_list = get_constant_with_next_list(&analyzed);
+
+        let constant_shifted: BTreeMap<PolyID, usize> = analyzed
+            .definitions_in_source_order(PolynomialType::Constant)
+            .flat_map(|(symbol, _)| symbol.array_elements())
+            .enumerate()
+            .filter(|(_, (name, _))| constant_with_next_list.contains(name))
+            .map(|(index, (_, id))| (id, index))
+            .collect();
+
+        let constant_columns: BTreeMap<PolyID, usize> = analyzed
+            .definitions_in_source_order(PolynomialType::Constant)
+            .flat_map(|(symbol, _)| symbol.array_elements())
+            .enumerate()
+            .map(|(index, (_, id))| (id, index))
+            .collect();
+
         Self {
             analyzed,
             witness_columns,
+            constant_shifted,
+            constant_columns,
         }
     }
 }
@@ -80,14 +109,46 @@ impl<T: FieldElement> FrameworkEval for PowdrEval<T> {
     }
     fn evaluate<E: EvalAtRow>(&self, mut eval: E) -> E {
         assert!(
-            self.analyzed.constant_count() == 0 && self.analyzed.publics_count() == 0,
-            "Error: Expected no fixed columns nor public inputs, as they are not supported yet.",
+            self.analyzed.publics_count() == 0,
+            "Error: Expected no public inputs, as they are not supported yet.",
         );
 
         let witness_eval: BTreeMap<PolyID, [<E as EvalAtRow>::F; 2]> = self
             .witness_columns
             .keys()
-            .map(|poly_id| (*poly_id, eval.next_interaction_mask(0, [0, 1])))
+            .map(|poly_id| {
+                (
+                    *poly_id,
+                    eval.next_interaction_mask(ORIGINAL_TRACE_IDX, [0, 1]),
+                )
+            })
+            .collect();
+
+        let constant_eval: BTreeMap<_, _> = self
+            .constant_columns
+            .keys()
+            .enumerate()
+            .map(|(i, poly_id)| {
+                (
+                    *poly_id,
+                    // PreprocessedColumn::Plonk(i) is unused argument in get_preprocessed_column
+                    eval.get_preprocessed_column(PreprocessedColumn::Plonk(i)),
+                )
+            })
+            .collect();
+
+        let constant_shifted_eval: BTreeMap<_, _> = self
+            .constant_shifted
+            .keys()
+            .enumerate()
+            .map(|(i, poly_id)| {
+                (
+                    *poly_id,
+                    eval.get_preprocessed_column(PreprocessedColumn::Plonk(
+                        i + constant_eval.len(),
+                    )),
+                )
+            })
             .collect();
 
         for id in self
@@ -96,7 +157,12 @@ impl<T: FieldElement> FrameworkEval for PowdrEval<T> {
         {
             match id {
                 Identity::Polynomial(identity) => {
-                    let expr = to_stwo_expression(&identity.expression, &witness_eval);
+                    let expr = to_stwo_expression(
+                        &identity.expression,
+                        &witness_eval,
+                        &constant_shifted_eval,
+                        &constant_eval,
+                    );
                     eval.add_constraint(expr);
                 }
                 Identity::Connect(..) => {
@@ -120,6 +186,8 @@ impl<T: FieldElement> FrameworkEval for PowdrEval<T> {
 fn to_stwo_expression<T: FieldElement, F>(
     expr: &AlgebraicExpression<T>,
     witness_eval: &BTreeMap<PolyID, [F; 2]>,
+    constant_shifted_eval: &BTreeMap<PolyID, F>,
+    constant_eval: &BTreeMap<PolyID, F>,
 ) -> F
 where
     F: FieldExpOps
@@ -145,9 +213,10 @@ where
                     false => witness_eval[&poly_id][0].clone(),
                     true => witness_eval[&poly_id][1].clone(),
                 },
-                PolynomialType::Constant => {
-                    unimplemented!("Constant polynomials are not supported in stwo yet")
-                }
+                PolynomialType::Constant => match r.next {
+                    false => constant_eval[&poly_id].clone(),
+                    true => constant_shifted_eval[&poly_id].clone(),
+                },
                 PolynomialType::Intermediate => {
                     unimplemented!("Intermediate polynomials are not supported in stwo yet")
                 }
@@ -163,15 +232,17 @@ where
             right,
         }) => match **right {
             AlgebraicExpression::Number(n) => {
-                let left = to_stwo_expression(left, witness_eval);
+                let left =
+                    to_stwo_expression(left, witness_eval, constant_shifted_eval, constant_eval);
                 (0u32..n.to_integer().try_into_u32().unwrap())
                     .fold(F::one(), |acc, _| acc * left.clone())
             }
             _ => unimplemented!("pow with non-constant exponent"),
         },
         AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation { left, op, right }) => {
-            let left = to_stwo_expression(left, witness_eval);
-            let right = to_stwo_expression(right, witness_eval);
+            let left = to_stwo_expression(left, witness_eval, constant_shifted_eval, constant_eval);
+            let right =
+                to_stwo_expression(right, witness_eval, constant_shifted_eval, constant_eval);
 
             match op {
                 Add => left + right,
@@ -181,7 +252,7 @@ where
             }
         }
         AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation { op, expr }) => {
-            let expr = to_stwo_expression(expr, witness_eval);
+            let expr = to_stwo_expression(expr, witness_eval, constant_shifted_eval, constant_eval);
 
             match op {
                 AlgebraicUnaryOperator::Minus => -expr,
@@ -192,3 +263,22 @@ where
         }
     }
 }
+
+// This function creates a list of the names of the constant polynomials that have next references constraint
+pub fn get_constant_with_next_list<T: FieldElement>(analyzed: &Analyzed<T>) -> HashSet<&String> {
+    let mut constant_with_next_list: HashSet<&String> = HashSet::new();
+    analyzed.all_children().for_each(|e| {
+        if let AlgebraicExpression::Reference(AlgebraicReference {
+            name,
+            poly_id,
+            next,
+        }) = e
+        {
+            if matches!(poly_id.ptype, PolynomialType::Constant) && *next {
+                // add the name of the constant polynomial to the list
+                constant_with_next_list.insert(name);
+            }
+        };
+    });
+    constant_with_next_list
+}
diff --git a/backend/src/stwo/mod.rs b/backend/src/stwo/mod.rs
index 794b66401d..14630dfc08 100644
--- a/backend/src/stwo/mod.rs
+++ b/backend/src/stwo/mod.rs
@@ -8,7 +8,7 @@ use crate::{
     field_filter::generalize_factory, Backend, BackendFactory, BackendOptions, Error, Proof,
 };
 use powdr_ast::analyzed::Analyzed;
-use powdr_executor::constant_evaluator::{get_uniquely_sized_cloned, VariablySizedColumn};
+use powdr_executor::constant_evaluator::VariablySizedColumn;
 use powdr_executor::witgen::WitgenCallback;
 use powdr_number::{FieldElement, Mersenne31Field};
 use prover::StwoProver;
@@ -17,13 +17,12 @@ use stwo_prover::core::channel::{Blake2sChannel, Channel, MerkleChannel};
 use stwo_prover::core::vcs::blake2_merkle::Blake2sMerkleChannel;
 
 mod circuit_builder;
+mod proof;
 mod prover;
-#[allow(dead_code)]
 
 struct RestrictedFactory;
 
 impl<F: FieldElement> BackendFactory<F> for RestrictedFactory {
-    #[allow(unreachable_code)]
     #[allow(unused_variables)]
     fn create(
         &self,
@@ -39,14 +38,24 @@ impl<F: FieldElement> BackendFactory<F> for RestrictedFactory {
         if proving_key.is_some() {
             return Err(Error::BackendError("Proving key unused".to_string()));
         }
+
         if pil.degrees().len() > 1 {
             return Err(Error::NoVariableDegreeAvailable);
         }
-        let fixed = Arc::new(
-            get_uniquely_sized_cloned(&fixed).map_err(|_| Error::NoVariableDegreeAvailable)?,
-        );
-        let stwo: Box<StwoProver<F, SimdBackend, Blake2sMerkleChannel, Blake2sChannel>> =
+
+        let mut stwo: Box<StwoProver<F, SimdBackend, Blake2sMerkleChannel, Blake2sChannel>> =
             Box::new(StwoProver::new(pil, fixed)?);
+
+        match (proving_key, verification_key) {
+            (Some(pk), Some(vk)) => {
+                stwo.set_proving_key(pk);
+                //stwo.set_verifying_key(vk);
+            }
+            _ => {
+                stwo.setup();
+            }
+        }
+
         Ok(stwo)
     }
 }
@@ -68,7 +77,7 @@ where
 
         Ok(self.verify(proof, instances)?)
     }
-    #[allow(unreachable_code)]
+
     #[allow(unused_variables)]
     fn prove(
         &self,
@@ -81,8 +90,8 @@ where
         }
         Ok(StwoProver::prove(self, witness)?)
     }
-    #[allow(unused_variables)]
-    fn export_verification_key(&self, output: &mut dyn io::Write) -> Result<(), Error> {
-        unimplemented!()
+    fn export_proving_key(&self, output: &mut dyn io::Write) -> Result<(), Error> {
+        self.export_proving_key(output)
+            .map_err(|e| Error::BackendError(e.to_string()))
     }
 }
diff --git a/backend/src/stwo/proof.rs b/backend/src/stwo/proof.rs
new file mode 100644
index 0000000000..8177511f97
--- /dev/null
+++ b/backend/src/stwo/proof.rs
@@ -0,0 +1,126 @@
+use serde::Deserialize;
+use serde::Serialize;
+use std::collections::BTreeMap;
+use stwo_prover::core::backend::Backend;
+use stwo_prover::core::backend::Column;
+use stwo_prover::core::backend::ColumnOps;
+use stwo_prover::core::fields::m31::BaseField;
+use stwo_prover::core::fields::m31::M31;
+use stwo_prover::core::poly::circle::{CanonicCoset, CircleEvaluation};
+use stwo_prover::core::poly::BitReversedOrder;
+use stwo_prover::core::ColumnVec;
+
+/// For each possible size, the commitment and prover data
+pub type TableProvingKeyCollection<B> = BTreeMap<usize, TableProvingKey<B>>;
+
+impl<B: Backend> From<SerializableTableProvingKeyCollection> for TableProvingKeyCollection<B> {
+    fn from(serializable: SerializableTableProvingKeyCollection) -> Self {
+        let constant_trace_circle_domain_collection = serializable
+            .constant_trace_circle_domain_collection
+            .into_iter()
+            .map(|(size, table_provingkey)| {
+                let domain = CanonicCoset::new(size.ilog2()).circle_domain();
+                let constant_trace_circle_domain = table_provingkey
+                    .into_values()
+                    .map(|values| {
+                        let mut column: <B as ColumnOps<M31>>::Column =
+                            <B as ColumnOps<M31>>::Column::zeros(values.len());
+                        values.iter().enumerate().for_each(|(i, v)| {
+                            column.set(i, *v);
+                        });
+
+                        CircleEvaluation::<B, BaseField, BitReversedOrder>::new(domain, column)
+                    })
+                    .collect::<ColumnVec<_>>();
+
+                (
+                    size,
+                    TableProvingKey {
+                        constant_trace_circle_domain,
+                    },
+                )
+            })
+            .collect::<BTreeMap<_, _>>();
+
+        constant_trace_circle_domain_collection
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct TableProvingKey<B: Backend> {
+    pub constant_trace_circle_domain: ColumnVec<CircleEvaluation<B, BaseField, BitReversedOrder>>,
+}
+
+#[derive(Debug, Clone)]
+pub struct StarkProvingKey<B: Backend> {
+    pub preprocessed: Option<BTreeMap<String, TableProvingKeyCollection<B>>>,
+}
+
+impl<B: Backend> From<SerializableStarkProvingKey> for StarkProvingKey<B> {
+    fn from(serializable: SerializableStarkProvingKey) -> Self {
+        let preprocessed = serializable.preprocessed.map(|map| {
+            map.into_iter()
+                .map(|(namespace, table_provingkey_collection)| {
+                    (
+                        namespace,
+                        TableProvingKeyCollection::<B>::from(table_provingkey_collection),
+                    )
+                })
+                .collect::<BTreeMap<_, _>>()
+        });
+
+        StarkProvingKey { preprocessed }
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct SerializableTableProvingKeyCollection {
+    constant_trace_circle_domain_collection: BTreeMap<usize, BTreeMap<usize, Vec<M31>>>,
+}
+
+impl<B: Backend> From<TableProvingKeyCollection<B>> for SerializableTableProvingKeyCollection {
+    fn from(table_provingkey_collection: TableProvingKeyCollection<B>) -> Self {
+        let mut constant_trace_circle_domain_collection = BTreeMap::new();
+
+        table_provingkey_collection
+            .iter()
+            .for_each(|(&size, trable_provingkey)| {
+                let mut values: BTreeMap<usize, Vec<M31>> = BTreeMap::new();
+                let log_size = size.ilog2();
+                trable_provingkey
+                    .constant_trace_circle_domain
+                    .iter()
+                    .for_each(|circle_eval| {
+                        values.insert(log_size as usize, circle_eval.values.to_cpu().to_vec());
+                    });
+
+                constant_trace_circle_domain_collection.insert(size, values);
+            });
+
+        Self {
+            constant_trace_circle_domain_collection,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct SerializableStarkProvingKey {
+    preprocessed: Option<BTreeMap<String, SerializableTableProvingKeyCollection>>,
+}
+
+impl<B: Backend> From<StarkProvingKey<B>> for SerializableStarkProvingKey {
+    fn from(stark_proving_key: StarkProvingKey<B>) -> Self {
+        let preprocessed = stark_proving_key.preprocessed.map(|map| {
+            map.into_iter()
+                .map(|(namespace, table_provingkey_collection)| {
+                    (
+                        namespace,
+                        SerializableTableProvingKeyCollection::from(table_provingkey_collection),
+                    )
+                })
+                .collect::<BTreeMap<_, _>>()
+        });
+
+        Self { preprocessed }
+    }
+}
diff --git a/backend/src/stwo/prover.rs b/backend/src/stwo/prover.rs
index ab79e93b03..8bdb954b8e 100644
--- a/backend/src/stwo/prover.rs
+++ b/backend/src/stwo/prover.rs
@@ -1,44 +1,74 @@
 use powdr_ast::analyzed::Analyzed;
+use powdr_backend_utils::machine_fixed_columns;
+use powdr_executor::constant_evaluator::VariablySizedColumn;
+use powdr_number::FieldElement;
 use serde::de::DeserializeOwned;
 use serde::ser::Serialize;
-use std::io;
+use std::collections::BTreeMap;
 use std::marker::PhantomData;
 use std::sync::Arc;
+use std::{fmt, io};
 
-use crate::stwo::circuit_builder::{gen_stwo_circuit_trace, PowdrComponent, PowdrEval};
+use crate::stwo::circuit_builder::{
+    gen_stwo_circle_column, get_constant_with_next_list, PowdrComponent, PowdrEval,
+};
+use crate::stwo::proof::{
+    SerializableStarkProvingKey, StarkProvingKey, TableProvingKey, TableProvingKeyCollection,
+};
 
-use stwo_prover::constraint_framework::TraceLocationAllocator;
+use stwo_prover::constraint_framework::{
+    TraceLocationAllocator, ORIGINAL_TRACE_IDX, PREPROCESSED_TRACE_IDX,
+};
 use stwo_prover::core::prover::StarkProof;
 
-use powdr_number::FieldElement;
 use stwo_prover::core::air::{Component, ComponentProver};
 use stwo_prover::core::backend::{Backend, BackendForChannel};
 use stwo_prover::core::channel::{Channel, MerkleChannel};
-use stwo_prover::core::fields::m31::M31;
+use stwo_prover::core::fields::m31::{BaseField, M31};
 use stwo_prover::core::fri::FriConfig;
 use stwo_prover::core::pcs::{CommitmentSchemeProver, CommitmentSchemeVerifier, PcsConfig};
-use stwo_prover::core::poly::circle::CanonicCoset;
+use stwo_prover::core::poly::circle::{CanonicCoset, CircleDomain, CircleEvaluation};
+use stwo_prover::core::poly::twiddles::TwiddleTree;
+use stwo_prover::core::poly::BitReversedOrder;
+use stwo_prover::core::ColumnVec;
 
 const FRI_LOG_BLOWUP: usize = 1;
 const FRI_NUM_QUERIES: usize = 100;
 const FRI_PROOF_OF_WORK_BITS: usize = 16;
 const LOG_LAST_LAYER_DEGREE_BOUND: usize = 0;
 
-pub struct StwoProver<T, B: Backend + Send, MC: MerkleChannel, C: Channel> {
+pub enum KeyExportError {
+    NoProvingKey,
+    //NoVerificationKey,
+}
+
+impl fmt::Display for KeyExportError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::NoProvingKey => write!(f, "No proving key set"),
+            // Self::NoVerificationKey => write!(f, "No verification key set"),
+        }
+    }
+}
+
+pub struct StwoProver<T, B: BackendForChannel<MC> + Send, MC: MerkleChannel, C: Channel> {
     pub analyzed: Arc<Analyzed<T>>,
-    _fixed: Arc<Vec<(String, Vec<T>)>>,
-    /// Proving key placeholder
-    _proving_key: Option<()>,
+    /// The split analyzed PIL
+    split: BTreeMap<String, Analyzed<T>>,
+    /// The value of the fixed columns
+    pub fixed: Arc<Vec<(String, VariablySizedColumn<T>)>>,
+
+    /// Proving key
+    proving_key: StarkProvingKey<B>,
     /// Verifying key placeholder
     _verifying_key: Option<()>,
     _channel_marker: PhantomData<C>,
-    _backend_marker: PhantomData<B>,
     _merkle_channel_marker: PhantomData<MC>,
 }
 
 impl<'a, F: FieldElement, B, MC, C> StwoProver<F, B, MC, C>
 where
-    B: Backend + Send + BackendForChannel<MC>, // Ensure B implements BackendForChannel<MC>
+    B: Backend + Send + BackendForChannel<MC>,
     MC: MerkleChannel + Send,
     C: Channel + Send,
     MC::H: DeserializeOwned + Serialize,
@@ -46,50 +76,231 @@ where
 {
     pub fn new(
         analyzed: Arc<Analyzed<F>>,
-        _fixed: Arc<Vec<(String, Vec<F>)>>,
+        fixed: Arc<Vec<(String, VariablySizedColumn<F>)>>,
     ) -> Result<Self, io::Error> {
+        let split: BTreeMap<String, Analyzed<F>> = powdr_backend_utils::split_pil(&analyzed)
+            .into_iter()
+            .collect();
+
         Ok(Self {
             analyzed,
-            _fixed,
-            _proving_key: None,
+            split,
+            fixed,
+            proving_key: StarkProvingKey { preprocessed: None },
             _verifying_key: None,
             _channel_marker: PhantomData,
-            _backend_marker: PhantomData,
             _merkle_channel_marker: PhantomData,
         })
     }
+
+    pub fn set_proving_key(&mut self, rdr: &mut dyn std::io::Read) {
+        let serializable_key: SerializableStarkProvingKey = bincode::deserialize_from(rdr).unwrap();
+        self.proving_key = StarkProvingKey::from(serializable_key);
+    }
+
+    pub fn export_proving_key(
+        &self,
+        writer: &mut dyn std::io::Write,
+    ) -> Result<(), KeyExportError> {
+        let pk = SerializableStarkProvingKey::from(self.proving_key.clone());
+        self.proving_key
+            .preprocessed
+            .as_ref()
+            .ok_or(KeyExportError::NoProvingKey)?;
+        bincode::serialize_into(writer, &pk).unwrap();
+        Ok(())
+    }
+
+    pub fn setup(&mut self) {
+        // machines with varying sizes are not supported yet, and it is checked in backendfactory create function.
+        //TODO: support machines with varying sizes
+        let domain_map: BTreeMap<usize, CircleDomain> = self
+            .analyzed
+            .degrees()
+            .iter()
+            .map(|size| {
+                (
+                    (size.ilog2() as usize),
+                    CanonicCoset::new(size.ilog2()).circle_domain(),
+                )
+            })
+            .collect();
+
+        let preprocessed: BTreeMap<String, TableProvingKeyCollection<B>> = self
+            .split
+            .iter()
+            .filter_map(|(namespace, pil)| {
+                // if we have no fixed columns, we don't need to commit to anything.
+                if pil.constant_count() == 0 {
+                    None
+                } else {
+                    let fixed_columns = machine_fixed_columns(&self.fixed, pil);
+
+                    Some((
+                        namespace.to_string(),
+                        pil.committed_polys_in_source_order()
+                            .find_map(|(s, _)| s.degree)
+                            .unwrap()
+                            .iter()
+                            .map(|size| {
+                                let mut constant_trace: ColumnVec<
+                                    CircleEvaluation<B, BaseField, BitReversedOrder>,
+                                > = fixed_columns
+                                    .values()
+                                    .flat_map(|vec| {
+                                        vec.iter().map(|(_name, values)| {
+                                            gen_stwo_circle_column::<F, B, M31>(
+                                                *domain_map
+                                                    .get(&(values.len().ilog2() as usize))
+                                                    .unwrap(),
+                                                values,
+                                            )
+                                        })
+                                    })
+                                    .collect();
+
+                                let constant_with_next_list = get_constant_with_next_list(pil);
+
+                                let constant_shifted_trace: ColumnVec<
+                                    CircleEvaluation<B, BaseField, BitReversedOrder>,
+                                > = fixed_columns
+                                    .values()
+                                    .flat_map(|vec| {
+                                        vec.iter()
+                                            .filter(|(name, _)| {
+                                                constant_with_next_list.contains(name)
+                                            })
+                                            .map(|(_, values)| {
+                                                let mut rotated_values = values.to_vec();
+                                                rotated_values.rotate_left(1);
+                                                gen_stwo_circle_column::<F, B, M31>(
+                                                    *domain_map
+                                                        .get(&(values.len().ilog2() as usize))
+                                                        .unwrap(),
+                                                    &rotated_values,
+                                                )
+                                            })
+                                    })
+                                    .collect();
+
+                                constant_trace.extend(constant_shifted_trace);
+
+                                (
+                                    size as usize,
+                                    TableProvingKey {
+                                        constant_trace_circle_domain: constant_trace,
+                                    },
+                                )
+                            })
+                            .collect(),
+                    ))
+                }
+            })
+            .collect();
+        let proving_key = StarkProvingKey {
+            preprocessed: Some(preprocessed),
+        };
+        self.proving_key = proving_key;
+    }
+
     pub fn prove(&self, witness: &[(String, Vec<F>)]) -> Result<Vec<u8>, String> {
-        let config = get_config();
-        // twiddles are used for FFT, they are computed in a bigger group than the eval domain.
-        // the eval domain is the half coset G_{2n} + <G_{n/2}>
-        // twiddles are computed in half coset G_{4n} + <G_{n}>, double the size of eval doamin.
-        let twiddles = B::precompute_twiddles(
-            CanonicCoset::new(self.analyzed.degree().ilog2() + 1 + FRI_LOG_BLOWUP as u32)
-                .circle_domain()
-                .half_coset,
+        assert!(
+            witness
+                .iter()
+                .all(|(_name, vec)| vec.len() == witness[0].1.len()),
+            "All Vec<T> in witness must have the same length. Mismatch found!"
         );
 
-        // Setup protocol.
-        let mut prover_channel = <MC as MerkleChannel>::C::default();
-        let commitment_scheme = &mut CommitmentSchemeProver::<B, MC>::new(config, &twiddles);
+        let config = get_config();
+        let domain_map: BTreeMap<usize, CircleDomain> = self
+            .analyzed
+            .degrees()
+            .iter()
+            .map(|size| {
+                (
+                    (size.ilog2() as usize),
+                    CanonicCoset::new(size.ilog2()).circle_domain(),
+                )
+            })
+            .collect();
+        let twiddles_map: BTreeMap<usize, TwiddleTree<B>> = self
+            .split
+            .values()
+            .flat_map(|pil| {
+                // Precompute twiddles for all sizes in the PIL
+                pil.committed_polys_in_source_order()
+                    .flat_map(|(s, _)| {
+                        s.degree.iter().flat_map(|range| {
+                            range
+                                .iter()
+                                .filter(|&size| size.is_power_of_two())
+                                .map(|size| {
+                                    let twiddles = B::precompute_twiddles(
+                                        CanonicCoset::new(size.ilog2() + 1 + FRI_LOG_BLOWUP as u32)
+                                            .circle_domain()
+                                            .half_coset,
+                                    );
+                                    (size as usize, twiddles)
+                                })
+                                .collect::<Vec<_>>()
+                        })
+                    })
+                    .collect::<Vec<_>>()
+            })
+            .collect();
+        // only the first one is used, machines with varying sizes are not supported yet, and it is checked in backendfactory create function.
+        let prover_channel = &mut <MC as MerkleChannel>::C::default();
+        let mut commitment_scheme =
+            CommitmentSchemeProver::<'_, B, MC>::new(config, twiddles_map.iter().next().unwrap().1);
+
+        let mut tree_builder = commitment_scheme.tree_builder();
+
+        //commit to the constant and shifted constant polynomials
+        if let Some((_, table_proving_key)) =
+            self.proving_key
+                .preprocessed
+                .as_ref()
+                .and_then(|preprocessed| {
+                    preprocessed
+                        .iter()
+                        .find_map(|(_, table_collection)| table_collection.iter().next())
+                })
+        {
+            tree_builder.extend_evals(table_proving_key.constant_trace_circle_domain.clone());
+        } else {
+            tree_builder.extend_evals([]);
+        }
+        tree_builder.commit(prover_channel);
 
-        let trace = gen_stwo_circuit_trace::<F, B, M31>(witness);
+        let trace: ColumnVec<CircleEvaluation<B, BaseField, BitReversedOrder>> = witness
+            .iter()
+            .map(|(_name, values)| {
+                gen_stwo_circle_column::<F, B, M31>(
+                    *domain_map.get(&(values.len().ilog2() as usize)).unwrap(),
+                    values,
+                )
+            })
+            .collect();
 
         let mut tree_builder = commitment_scheme.tree_builder();
         tree_builder.extend_evals(trace);
-        tree_builder.commit(&mut prover_channel);
+        tree_builder.commit(prover_channel);
 
         let component = PowdrComponent::new(
             &mut TraceLocationAllocator::default(),
             PowdrEval::new(self.analyzed.clone()),
         );
 
-        let proof = stwo_prover::core::prover::prove::<B, MC>(
+        let proof_result = stwo_prover::core::prover::prove::<B, MC>(
             &[&component],
-            &mut prover_channel,
-            commitment_scheme,
-        )
-        .unwrap();
+            prover_channel,
+            &mut commitment_scheme,
+        );
+
+        let proof = match proof_result {
+            Ok(value) => value,
+            Err(e) => return Err(e.to_string()), // Propagate the error instead of panicking
+        };
 
         Ok(bincode::serialize(&proof).unwrap())
     }
@@ -105,8 +316,8 @@ where
         let proof: StarkProof<MC::H> =
             bincode::deserialize(proof).map_err(|e| format!("Failed to deserialize proof: {e}"))?;
 
-        let mut verifier_channel = <MC as MerkleChannel>::C::default();
-        let mut commitment_scheme = CommitmentSchemeVerifier::<MC>::new(config);
+        let verifier_channel = &mut <MC as MerkleChannel>::C::default();
+        let commitment_scheme = &mut CommitmentSchemeVerifier::<MC>::new(config);
 
         //Constraints that are to be proved
         let component = PowdrComponent::new(
@@ -115,21 +326,22 @@ where
         );
 
         // Retrieve the expected column sizes in each commitment interaction, from the AIR.
-        // TODO: When constant columns are supported, there will be more than one sizes and proof.commitments
-        // size[0] is for constant columns, size[1] is for witness columns, size[2] is for lookup columns
-        // pass size[1] for witness columns now is not doable due to this branch is outdated for the new feature of constant columns
-        // it will throw errors.
+        // the sizes include the degrees of the constant, witness, native lookups. Native lookups are not used yet.
         let sizes = component.trace_log_degree_bounds();
-        assert_eq!(sizes.len(), 1);
-        commitment_scheme.commit(proof.commitments[0], &sizes[0], &mut verifier_channel);
 
-        stwo_prover::core::prover::verify(
-            &[&component],
-            &mut verifier_channel,
-            &mut commitment_scheme,
-            proof,
-        )
-        .map_err(|e| e.to_string())
+        commitment_scheme.commit(
+            proof.commitments[PREPROCESSED_TRACE_IDX],
+            &sizes[PREPROCESSED_TRACE_IDX],
+            verifier_channel,
+        );
+        commitment_scheme.commit(
+            proof.commitments[ORIGINAL_TRACE_IDX],
+            &sizes[ORIGINAL_TRACE_IDX],
+            verifier_channel,
+        );
+
+        stwo_prover::core::prover::verify(&[&component], verifier_channel, commitment_scheme, proof)
+            .map_err(|e| e.to_string())
     }
 }
 
diff --git a/pipeline/src/test_util.rs b/pipeline/src/test_util.rs
index a058728657..6770b7ceb3 100644
--- a/pipeline/src/test_util.rs
+++ b/pipeline/src/test_util.rs
@@ -610,6 +610,28 @@ pub fn test_stwo(file_name: &str, inputs: Vec<Mersenne31Field>) {
         .collect();
     pipeline.verify(&proof, &[publics]).unwrap();
 }
+#[cfg(feature = "stwo")]
+pub fn assert_proofs_fail_for_invalid_witnesses_stwo(
+    file_name: &str,
+    witness: &[(String, Vec<u64>)],
+) {
+    let pipeline = Pipeline::<Mersenne31Field>::default()
+        .from_file(resolve_test_file(file_name))
+        .set_witness(convert_witness(witness));
+
+    assert!(pipeline
+        .clone()
+        .with_backend(powdr_backend::BackendType::Stwo, None)
+        .compute_proof()
+        .is_err());
+}
+
+#[cfg(not(feature = "stwo"))]
+pub fn assert_proofs_fail_for_invalid_witnesses_stwo(
+    _file_name: &str,
+    _witness: &[(String, Vec<u64>)],
+) {
+}
 
 #[cfg(not(feature = "stwo"))]
 pub fn test_stwo(_file_name: &str, _inputs: Vec<u32>) {}
diff --git a/pipeline/tests/pil.rs b/pipeline/tests/pil.rs
index d5a683c0ee..20ce509a2c 100644
--- a/pipeline/tests/pil.rs
+++ b/pipeline/tests/pil.rs
@@ -3,7 +3,8 @@ use powdr_pipeline::{
     test_util::{
         assert_proofs_fail_for_invalid_witnesses, assert_proofs_fail_for_invalid_witnesses_estark,
         assert_proofs_fail_for_invalid_witnesses_mock,
-        assert_proofs_fail_for_invalid_witnesses_pilcom, make_prepared_pipeline,
+        assert_proofs_fail_for_invalid_witnesses_pilcom,
+        assert_proofs_fail_for_invalid_witnesses_stwo, make_prepared_pipeline,
         make_simple_prepared_pipeline, regular_test_all_fields, regular_test_gl,
         test_halo2_with_backend_variant, test_mock_backend, test_pilcom, test_stwo, BackendVariant,
     },
@@ -276,6 +277,53 @@ fn stwo_add_and_equal() {
     let f = "pil/add_and_equal.pil";
     test_stwo(f, Default::default());
 }
+
+#[test]
+fn stwo_fibonacci() {
+    let f = "pil/fibo_no_publics.pil";
+    test_stwo(f, Default::default());
+}
+
+#[test]
+fn stwo_fixed_columns() {
+    let f = "pil/fixed_columns.pil";
+    test_stwo(f, Default::default());
+}
+
+#[test]
+fn stwo_incremental_one() {
+    let f = "pil/incremental_one.pil";
+    test_stwo(f, Default::default());
+}
+
+#[test]
+fn stwo_constant_next_test() {
+    let f = "pil/fixed_with_incremental.pil";
+    test_stwo(f, Default::default());
+}
+
+#[test]
+fn fibonacci_invalid_witness_stwo() {
+    let f = "pil/fibo_no_publics.pil";
+
+    // Changed one value and then continued.
+    // The following constraint should fail in row 1:
+    //     (1-ISLAST) * (x' - y) = 0;
+    let witness = vec![
+        ("Fibonacci::x".to_string(), vec![1, 1, 10, 3]),
+        ("Fibonacci::y".to_string(), vec![1, 2, 3, 13]),
+    ];
+    assert_proofs_fail_for_invalid_witnesses_stwo(f, &witness);
+
+    // All constraints are valid, except the initial row.
+    // The following constraint should fail in row 3:
+    //     ISLAST * (y' - 1) = 0;
+    let witness = vec![
+        ("Fibonacci::x".to_string(), vec![1, 2, 3, 5]),
+        ("Fibonacci::y".to_string(), vec![2, 3, 5, 8]),
+    ];
+    assert_proofs_fail_for_invalid_witnesses_stwo(f, &witness);
+}
 #[test]
 fn simple_div() {
     let f = "pil/simple_div.pil";
diff --git a/test_data/pil/fibo_no_publics.pil b/test_data/pil/fibo_no_publics.pil
new file mode 100644
index 0000000000..94c674b863
--- /dev/null
+++ b/test_data/pil/fibo_no_publics.pil
@@ -0,0 +1,13 @@
+let N = 4;
+
+// This uses the alternative nomenclature as well.
+
+namespace Fibonacci(N);
+    col fixed ISLAST(i) { if i == N - 1 { 1 } else { 0 } };
+    col witness x, y;
+
+    ISLAST * (y' - 1) = 0;
+    ISLAST * (x' - 1) = 0;
+
+    (1-ISLAST) * (x' - y) = 0;
+    (1-ISLAST) * (y' - (x + y)) = 0;
\ No newline at end of file
diff --git a/test_data/pil/fixed_with_incremental.pil b/test_data/pil/fixed_with_incremental.pil
new file mode 100644
index 0000000000..dfd46c0f55
--- /dev/null
+++ b/test_data/pil/fixed_with_incremental.pil
@@ -0,0 +1,13 @@
+let N = 32;
+
+// This uses the alternative nomenclature as well.
+
+namespace Incremental(N);
+    col fixed ISLAST(i) { if i == N - 1 { 1 } else { 0 } };
+    col witness x ;
+    col fixed INCREMENTAL(i) {i+1};
+    ISLAST * (x' - 1) = 0;
+    ISLAST * (INCREMENTAL' - 1) = 0;
+
+    (1-ISLAST) * (x' - x-1) = 0;
+    (1-ISLAST) * (INCREMENTAL' - INCREMENTAL-1) = 0;
\ No newline at end of file
diff --git a/test_data/pil/fixed_with_next.pil b/test_data/pil/fixed_with_next.pil
new file mode 100644
index 0000000000..786876d57f
--- /dev/null
+++ b/test_data/pil/fixed_with_next.pil
@@ -0,0 +1,14 @@
+let N = 32;
+
+// This uses the alternative nomenclature as well.
+
+namespace Incremental(N);
+    col fixed ISLAST(i) { if i == N - 1 { 1 } else { 0 } };
+    col fixed INCREMENT(i) { i + 1 };
+    col witness x, y;
+
+    ISLAST * (x' - 1) = 0;
+    ISLAST * (INCREMENT' - 1) = 0;
+
+    (1-ISLAST) * (x' - x-1) = 0;
+    (1-ISLAST) * (INCREMENT' - INCREMENT - 1) = 0;
\ No newline at end of file
diff --git a/test_data/pil/incremental_one.pil b/test_data/pil/incremental_one.pil
new file mode 100644
index 0000000000..ccda39ccb4
--- /dev/null
+++ b/test_data/pil/incremental_one.pil
@@ -0,0 +1,11 @@
+let N = 32;
+
+// This uses the alternative nomenclature as well.
+
+namespace Incremental(N);
+    col fixed ISLAST(i) { if i == N - 1 { 1 } else { 0 } };
+    col witness x, y;
+
+    ISLAST * (x' - 1) = 0;
+
+    (1-ISLAST) * (x' - x-1) = 0;
\ No newline at end of file

From 2aa7f8388cbea891bf23e3e09ce302f430710847 Mon Sep 17 00:00:00 2001
From: chriseth <chris@ethereum.org>
Date: Thu, 12 Dec 2024 12:58:23 +0100
Subject: [PATCH 55/57] Prepare to call jit from block machine. (#2098)

This PR performs preliminary preparations in the block machine so that
it will be able to JIT-compile and evaluate lookups into this machine
given a certain combination of "known inputs".

---------

Co-authored-by: Georg Wiese <georgwiese@gmail.com>
---
 executor/Cargo.toml                           |   1 +
 .../data_structures/finalizable_data.rs       | 121 ++++++++++++++++--
 executor/src/witgen/jit/jit_processor.rs      |  63 +++++++++
 executor/src/witgen/jit/mod.rs                |   1 +
 executor/src/witgen/machines/block_machine.rs |  56 ++++++--
 .../witgen/machines/fixed_lookup_machine.rs   |   6 +-
 executor/src/witgen/processor.rs              |  50 +++++++-
 executor/src/witgen/rows.rs                   |  21 +++
 8 files changed, 296 insertions(+), 23 deletions(-)
 create mode 100644 executor/src/witgen/jit/jit_processor.rs

diff --git a/executor/Cargo.toml b/executor/Cargo.toml
index 3e9fd701cb..a7d0738c0d 100644
--- a/executor/Cargo.toml
+++ b/executor/Cargo.toml
@@ -15,6 +15,7 @@ powdr-parser-util.workspace = true
 powdr-pil-analyzer.workspace = true
 powdr-jit-compiler.workspace = true
 
+auto_enums = "0.8.5"
 itertools = "0.13"
 log = { version = "0.4.17" }
 rayon = "1.7.0"
diff --git a/executor/src/witgen/data_structures/finalizable_data.rs b/executor/src/witgen/data_structures/finalizable_data.rs
index d8603d7b34..ed5d530325 100644
--- a/executor/src/witgen/data_structures/finalizable_data.rs
+++ b/executor/src/witgen/data_structures/finalizable_data.rs
@@ -3,6 +3,7 @@ use std::{
     ops::{Index, IndexMut},
 };
 
+use auto_enums::auto_enum;
 use bit_vec::BitVec;
 use itertools::Itertools;
 use powdr_ast::analyzed::{PolyID, PolynomialType};
@@ -13,7 +14,7 @@ use crate::witgen::rows::Row;
 /// Sequence of rows of field elements, stored in a compact form.
 /// Optimized for contiguous column IDs, but works with any combination.
 #[derive(Clone)]
-struct CompactData<T: FieldElement> {
+pub struct CompactData<T> {
     /// The ID of the first column used in the table.
     first_column_id: u64,
     /// The length of a row in the table.
@@ -26,7 +27,7 @@ struct CompactData<T: FieldElement> {
 
 impl<T: FieldElement> CompactData<T> {
     /// Creates a new empty compact data storage.
-    fn new(column_ids: &[PolyID]) -> Self {
+    pub fn new(column_ids: &[PolyID]) -> Self {
         let col_id_range = column_ids.iter().map(|id| id.id).minmax();
         let (first_column_id, last_column_id) = col_id_range.into_option().unwrap();
         Self {
@@ -37,28 +38,28 @@ impl<T: FieldElement> CompactData<T> {
         }
     }
 
-    fn is_empty(&self) -> bool {
+    pub fn is_empty(&self) -> bool {
         self.data.is_empty()
     }
 
     /// Returns the number of stored rows.
-    fn len(&self) -> usize {
+    pub fn len(&self) -> usize {
         self.data.len() / self.column_count
     }
 
     /// Truncates the data to `len` rows.
-    fn truncate(&mut self, len: usize) {
+    pub fn truncate(&mut self, len: usize) {
         self.data.truncate(len * self.column_count);
         self.known_cells.truncate(len * self.column_count);
     }
 
-    fn clear(&mut self) {
+    pub fn clear(&mut self) {
         self.data.clear();
         self.known_cells.clear();
     }
 
     /// Appends a non-finalized row to the data, turning it into a finalized row.
-    fn push(&mut self, row: Row<T>) {
+    pub fn push(&mut self, row: Row<T>) {
         self.data.reserve(self.column_count);
         self.known_cells.reserve(self.column_count);
         for col_id in self.first_column_id..(self.first_column_id + self.column_count as u64) {
@@ -75,11 +76,69 @@ impl<T: FieldElement> CompactData<T> {
         }
     }
 
-    fn get(&self, row: usize, col: u64) -> (T, bool) {
+    pub fn append_new_rows(&mut self, count: usize) {
+        self.data
+            .resize(self.data.len() + count * self.column_count, T::zero());
+        self.known_cells.grow(count * self.column_count, false);
+    }
+
+    fn index(&self, row: usize, col: u64) -> usize {
         let col = col - self.first_column_id;
-        let idx = row * self.column_count + col as usize;
+        row * self.column_count + col as usize
+    }
+
+    pub fn get(&self, row: usize, col: u64) -> (T, bool) {
+        let idx = self.index(row, col);
         (self.data[idx], self.known_cells[idx])
     }
+
+    pub fn set(&mut self, row: usize, col: u64, value: T) {
+        let idx = self.index(row, col);
+        assert!(!self.known_cells[idx] || self.data[idx] == value);
+        self.data[idx] = value;
+        self.known_cells.set(idx, true);
+    }
+
+    pub fn known_values_in_row(&self, row: usize) -> impl Iterator<Item = (u64, &T)> {
+        (0..self.column_count).filter_map(move |i| {
+            let col = self.first_column_id + i as u64;
+            let idx = self.index(row, col);
+            self.known_cells[idx].then(|| {
+                let col_id = self.first_column_id + i as u64;
+                (col_id, &self.data[idx])
+            })
+        })
+    }
+}
+
+/// A mutable reference into CompactData that is meant to be used
+/// only for a certain block of rows, starting from row index zero.
+/// It allows negative row indices as well.
+pub struct CompactDataRef<'a, T> {
+    data: &'a mut CompactData<T>,
+    row_offset: usize,
+}
+
+impl<'a, T: FieldElement> CompactDataRef<'a, T> {
+    /// Creates a new reference to the data, supplying the offset of the row
+    /// that is supposed to be "row zero".
+    pub fn new(data: &'a mut CompactData<T>, row_offset: usize) -> Self {
+        Self { data, row_offset }
+    }
+
+    pub fn get(&self, row: i32, col: u32) -> T {
+        let (v, known) = self.data.get(self.inner_row(row), col as u64);
+        assert!(known);
+        v
+    }
+
+    pub fn set(&mut self, row: i32, col: u32, value: T) {
+        self.data.set(self.inner_row(row), col as u64, value);
+    }
+
+    fn inner_row(&self, row: i32) -> usize {
+        (row + self.row_offset as i32) as usize
+    }
 }
 
 /// A data structure that stores witness data.
@@ -215,6 +274,38 @@ impl<T: FieldElement> FinalizableData<T> {
         }
     }
 
+    /// Returns an iterator over the values known in that row together with the PolyIDs.
+    #[auto_enum(Iterator)]
+    pub fn known_values_in_row(&self, row: usize) -> impl Iterator<Item = (PolyID, T)> + '_ {
+        match self.location_of_row(row) {
+            Location::PreFinalized(local) => {
+                let row = &self.pre_finalized_data[local];
+                self.column_ids
+                    .iter()
+                    .filter_map(move |id| row.value(id).map(|v| (*id, v)))
+            }
+            Location::Finalized(local) => {
+                self.finalized_data
+                    .known_values_in_row(local)
+                    .map(|(id, v)| {
+                        (
+                            PolyID {
+                                id,
+                                ptype: PolynomialType::Committed,
+                            },
+                            *v,
+                        )
+                    })
+            }
+            Location::PostFinalized(local) => {
+                let row = &self.post_finalized_data[local];
+                self.column_ids
+                    .iter()
+                    .filter_map(move |id| row.value(id).map(|v| (*id, v)))
+            }
+        }
+    }
+
     pub fn last(&self) -> Option<&Row<T>> {
         match self.location_of_last_row()? {
             Location::PreFinalized(local) => self.pre_finalized_data.get(local),
@@ -283,6 +374,18 @@ impl<T: FieldElement> FinalizableData<T> {
         }
     }
 
+    /// Appends a given amount of new finalized rows set to zero and "unknown".
+    /// Returns a `CompactDataRef` that is built so that its "row zero" is the
+    /// first newly appended row.
+    ///
+    /// Panics if there are any non-finalized rows at the end.
+    pub fn append_new_finalized_rows(&mut self, count: usize) -> CompactDataRef<'_, T> {
+        assert!(self.post_finalized_data.is_empty());
+        let row_zero = self.finalized_data.len();
+        self.finalized_data.append_new_rows(count);
+        CompactDataRef::new(&mut self.finalized_data, row_zero)
+    }
+
     /// Takes all data out of the [FinalizableData] and returns it as a list of columns.
     /// Columns are represented as a tuple of:
     /// - A list of values
diff --git a/executor/src/witgen/jit/jit_processor.rs b/executor/src/witgen/jit/jit_processor.rs
new file mode 100644
index 0000000000..dc87bd9a4e
--- /dev/null
+++ b/executor/src/witgen/jit/jit_processor.rs
@@ -0,0 +1,63 @@
+use bit_vec::BitVec;
+use powdr_number::FieldElement;
+
+use crate::witgen::{
+    data_structures::finalizable_data::CompactDataRef,
+    machines::{LookupCell, MachineParts},
+    util::try_to_simple_poly,
+    EvalError, FixedData, MutableState, QueryCallback,
+};
+
+pub struct JitProcessor<'a, T: FieldElement> {
+    _fixed_data: &'a FixedData<'a, T>,
+    parts: MachineParts<'a, T>,
+    _block_size: usize,
+    latch_row: usize,
+}
+
+impl<'a, T: FieldElement> JitProcessor<'a, T> {
+    pub fn new(
+        fixed_data: &'a FixedData<'a, T>,
+        parts: MachineParts<'a, T>,
+        block_size: usize,
+        latch_row: usize,
+    ) -> Self {
+        JitProcessor {
+            _fixed_data: fixed_data,
+            parts,
+            _block_size: block_size,
+            latch_row,
+        }
+    }
+
+    pub fn can_answer_lookup(&self, _identity_id: u64, _known_inputs: &BitVec) -> bool {
+        // TODO call the JIT compiler here.
+        false
+    }
+
+    pub fn process_lookup_direct<'c, 'd, Q: QueryCallback<T>>(
+        &self,
+        _mutable_state: &MutableState<'a, T, Q>,
+        connection_id: u64,
+        values: Vec<LookupCell<'c, T>>,
+        mut data: CompactDataRef<'d, T>,
+    ) -> Result<bool, EvalError<T>> {
+        // Transfer inputs.
+        let right = self.parts.connections[&connection_id].right;
+        for (e, v) in right.expressions.iter().zip(&values) {
+            match v {
+                LookupCell::Input(&v) => {
+                    let col = try_to_simple_poly(e).unwrap();
+                    data.set(self.latch_row as i32, col.poly_id.id as u32, v);
+                }
+                LookupCell::Output(_) => {}
+            }
+        }
+
+        // Just some code here to avoid "unused" warnings.
+        // This code will not be called as long as `can_answer_lookup` returns false.
+        data.get(self.latch_row as i32, 0);
+
+        unimplemented!();
+    }
+}
diff --git a/executor/src/witgen/jit/mod.rs b/executor/src/witgen/jit/mod.rs
index a6d9dabf39..e7633eec65 100644
--- a/executor/src/witgen/jit/mod.rs
+++ b/executor/src/witgen/jit/mod.rs
@@ -1,2 +1,3 @@
 mod affine_symbolic_expression;
+pub(crate) mod jit_processor;
 mod symbolic_expression;
diff --git a/executor/src/witgen/machines/block_machine.rs b/executor/src/witgen/machines/block_machine.rs
index ab2a25b23b..a893b783b6 100644
--- a/executor/src/witgen/machines/block_machine.rs
+++ b/executor/src/witgen/machines/block_machine.rs
@@ -12,6 +12,7 @@ use crate::witgen::block_processor::BlockProcessor;
 use crate::witgen::data_structures::finalizable_data::FinalizableData;
 use crate::witgen::data_structures::multiplicity_counter::MultiplicityCounter;
 use crate::witgen::data_structures::mutable_state::MutableState;
+use crate::witgen::jit::jit_processor::JitProcessor;
 use crate::witgen::processor::{OuterQuery, Processor, SolverState};
 use crate::witgen::rows::{Row, RowIndex, RowPair};
 use crate::witgen::sequence_iterator::{
@@ -72,6 +73,9 @@ pub struct BlockMachine<'a, T: FieldElement> {
     /// Cache that states the order in which to evaluate identities
     /// to make progress most quickly.
     processing_sequence_cache: ProcessingSequenceCache,
+    /// The JIT processor for this machine, i.e. the component that tries to generate
+    /// witgen code based on which elements of the connection are known.
+    jit_processor: JitProcessor<'a, T>,
     name: String,
     multiplicity_counter: MultiplicityCounter,
 }
@@ -132,6 +136,7 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
                 latch_row,
                 parts.identities.len(),
             ),
+            jit_processor: JitProcessor::new(fixed_data, parts.clone(), block_size, latch_row),
         })
     }
 }
@@ -356,12 +361,6 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         RowIndex::from_i64(self.rows() as i64 - 1, self.degree)
     }
 
-    fn get_row(&self, row: RowIndex) -> &Row<T> {
-        // The first block is a dummy block corresponding to rows (-block_size, 0),
-        // so we have to add the block size to the row index.
-        &self.data[(row + self.block_size).into()]
-    }
-
     fn process_plookup_internal<'b, Q: QueryCallback<T>>(
         &mut self,
         mutable_state: &MutableState<'a, T, Q>,
@@ -372,8 +371,18 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
 
         log::trace!("Start processing block machine '{}'", self.name());
         log::trace!("Left values of lookup:");
-        for l in &outer_query.left {
-            log::trace!("  {}", l);
+        if log::log_enabled!(log::Level::Trace) {
+            for l in &outer_query.left {
+                log::trace!("  {}", l);
+            }
+        }
+
+        let known_inputs = outer_query.left.iter().map(|e| e.is_constant()).collect();
+        if self
+            .jit_processor
+            .can_answer_lookup(identity_id, &known_inputs)
+        {
+            return self.process_lookup_via_jit(mutable_state, identity_id, outer_query);
         }
 
         // TODO this assumes we are always using the same lookup for this machine.
@@ -431,6 +440,35 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         }
     }
 
+    fn process_lookup_via_jit<'b, Q: QueryCallback<T>>(
+        &mut self,
+        mutable_state: &MutableState<'a, T, Q>,
+        identity_id: u64,
+        outer_query: OuterQuery<'a, 'b, T>,
+    ) -> EvalResult<'a, T> {
+        let mut input_output_data = vec![T::zero(); outer_query.left.len()];
+        let values = outer_query.prepare_for_direct_lookup(&mut input_output_data);
+
+        assert!(
+            (self.rows() + self.block_size as DegreeType) < self.degree,
+            "Block machine is full (this should have been checked before)"
+        );
+        self.data
+            .finalize_range(self.first_in_progress_row..self.data.len());
+        self.first_in_progress_row = self.data.len() + self.block_size;
+        //TODO can we properly access the last row of the dummy block?
+        let data = self.data.append_new_finalized_rows(self.block_size);
+
+        let success =
+            self.jit_processor
+                .process_lookup_direct(mutable_state, identity_id, values, data)?;
+        assert!(success);
+
+        Ok(outer_query
+            .direct_lookup_to_eval_result(input_output_data)?
+            .report_side_effect())
+    }
+
     fn process<'b, Q: QueryCallback<T>>(
         &self,
         mutable_state: &MutableState<'a, T, Q>,
@@ -481,7 +519,7 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         new_block
             .get_mut(0)
             .unwrap()
-            .merge_with(self.get_row(self.last_row_index()))
+            .merge_with_values(self.data.known_values_in_row(self.data.len() - 1))
             .map_err(|_| {
                 EvalError::Generic(
                     "Block machine overwrites existing value with different value!".to_string(),
diff --git a/executor/src/witgen/machines/fixed_lookup_machine.rs b/executor/src/witgen/machines/fixed_lookup_machine.rs
index 9f6c2f8790..91c54c3818 100644
--- a/executor/src/witgen/machines/fixed_lookup_machine.rs
+++ b/executor/src/witgen/machines/fixed_lookup_machine.rs
@@ -234,7 +234,7 @@ impl<'a, T: FieldElement> FixedLookup<'a, T> {
 
     fn process_range_check(
         &self,
-        rows: &RowPair<'_, 'a, T>,
+        rows: &RowPair<'_, '_, T>,
         lhs: &AffineExpression<AlgebraicVariable<'a>, T>,
         rhs: AlgebraicVariable<'a>,
     ) -> EvalResult<'a, T> {
@@ -317,9 +317,9 @@ impl<'a, T: FieldElement> Machine<'a, T> for FixedLookup<'a, T> {
         self.process_plookup_internal(mutable_state, identity_id, caller_rows, outer_query, right)
     }
 
-    fn process_lookup_direct<'b, 'c, Q: QueryCallback<T>>(
+    fn process_lookup_direct<'c, Q: QueryCallback<T>>(
         &mut self,
-        _mutable_state: &'b MutableState<'a, T, Q>,
+        _mutable_state: &MutableState<'a, T, Q>,
         identity_id: u64,
         values: &mut [LookupCell<'c, T>],
     ) -> Result<bool, EvalError<T>> {
diff --git a/executor/src/witgen/processor.rs b/executor/src/witgen/processor.rs
index 9ded01b88a..e1791a696c 100644
--- a/executor/src/witgen/processor.rs
+++ b/executor/src/witgen/processor.rs
@@ -1,5 +1,6 @@
 use std::collections::BTreeMap;
 
+use itertools::Itertools;
 use powdr_ast::analyzed::PolynomialType;
 use powdr_ast::analyzed::{AlgebraicExpression as Expression, AlgebraicReference, PolyID};
 
@@ -10,8 +11,7 @@ use crate::witgen::data_structures::mutable_state::MutableState;
 use crate::witgen::{query_processor::QueryProcessor, util::try_to_simple_poly, Constraint};
 use crate::Identity;
 
-use super::machines::{Connection, MachineParts};
-use super::FixedData;
+use super::machines::{Connection, LookupCell, MachineParts};
 use super::{
     affine_expression::AffineExpression,
     data_structures::{
@@ -22,6 +22,7 @@ use super::{
     rows::{Row, RowIndex, RowPair, RowUpdater, UnknownStrategy},
     Constraints, EvalError, EvalValue, IncompleteCause, QueryCallback,
 };
+use super::{EvalResult, FixedData};
 
 pub type Left<'a, T> = Vec<AffineExpression<AlgebraicVariable<'a>, T>>;
 
@@ -76,6 +77,51 @@ impl<'a, 'b, T: FieldElement> OuterQuery<'a, 'b, T> {
     pub fn is_complete(&self) -> bool {
         self.left.iter().all(|l| l.is_constant())
     }
+
+    /// Helper functon to convert an `OuterQuery` into a list of `LookupCell`s
+    /// to be used by `Machine::process_lookup_direct`.
+    pub fn prepare_for_direct_lookup<'c>(
+        &self,
+        input_output_data: &'c mut [T],
+    ) -> Vec<LookupCell<'c, T>> {
+        self.left
+            .iter()
+            .zip_eq(input_output_data.iter_mut())
+            .map(|(l, d)| {
+                if let Some(value) = l.constant_value() {
+                    *d = value;
+                    LookupCell::Input(d)
+                } else {
+                    LookupCell::Output(d)
+                }
+            })
+            .collect::<Vec<_>>()
+    }
+
+    /// Helper function to turn the result of a direct lookup into an `EvalResult`,
+    /// as used by `Machine::process_plookup`.
+    ///
+    /// Note that this function assumes that the lookup was successful and complete.
+    pub fn direct_lookup_to_eval_result(&self, input_output_data: Vec<T>) -> EvalResult<'a, T> {
+        let mut result = EvalValue::complete(vec![]);
+        for (l, v) in self.left.iter().zip(input_output_data) {
+            if !l.is_constant() {
+                let evaluated = l.clone() - v.into();
+                match evaluated.solve() {
+                    Ok(constraints) => {
+                        result.combine(constraints);
+                    }
+                    Err(_) => {
+                        // Fail the whole lookup
+                        return Err(EvalError::ConstraintUnsatisfiable(format!(
+                            "Constraint is invalid ({l} != {v}).",
+                        )));
+                    }
+                }
+            }
+        }
+        Ok(result)
+    }
 }
 
 pub struct IdentityResult {
diff --git a/executor/src/witgen/rows.rs b/executor/src/witgen/rows.rs
index ec1272cf28..9ac3255e36 100644
--- a/executor/src/witgen/rows.rs
+++ b/executor/src/witgen/rows.rs
@@ -224,6 +224,27 @@ impl<T: FieldElement> Row<T> {
         Ok(())
     }
 
+    /// Merges a list of known values into the current row.
+    /// Returns an error if there is a conflict.
+    pub fn merge_with_values(
+        &mut self,
+        values: impl Iterator<Item = (PolyID, T)>,
+    ) -> Result<(), ()> {
+        let stored = self.values.clone();
+
+        for (poly_id, value) in values {
+            let v = &mut self.values[&poly_id];
+            if let CellValue::Known(stored_value) = v {
+                if *stored_value != value {
+                    self.values = stored;
+                    return Err(());
+                }
+            }
+            *v = CellValue::Known(value);
+        }
+        Ok(())
+    }
+
     pub fn value_is_known(&self, poly_id: &PolyID) -> bool {
         self.values[poly_id].is_known()
     }

From e3c4c858f017c57c4f856cb32a4dfbb5fd1e1fbf Mon Sep 17 00:00:00 2001
From: chriseth <chris@ethereum.org>
Date: Thu, 12 Dec 2024 17:12:30 +0100
Subject: [PATCH 56/57] Witgen inference. (#2219)

This PR adds a component that can derive assignments and other code on
identities and multiple rows. It keeps track of which cells in the trace
are already known and which not. The way to access fixed rows is
abstracted because it does not have a concept of an absolute row. While
this might work for block machines with cyclic fixed columns, it does
not work in the general case.

What it does not do:
- have a sequence of which identities to consider on which rows
- a mechanism that determines when it is finished

---------

Co-authored-by: Georg Wiese <georgwiese@gmail.com>
---
 .../witgen/jit/affine_symbolic_expression.rs  | 150 ++++--
 executor/src/witgen/jit/cell.rs               |  59 +++
 executor/src/witgen/jit/mod.rs                |   4 +-
 .../src/witgen/jit/symbolic_expression.rs     |   6 +-
 executor/src/witgen/jit/witgen_inference.rs   | 501 ++++++++++++++++++
 executor/src/witgen/range_constraints.rs      |   8 +
 6 files changed, 673 insertions(+), 55 deletions(-)
 create mode 100644 executor/src/witgen/jit/cell.rs
 create mode 100644 executor/src/witgen/jit/witgen_inference.rs

diff --git a/executor/src/witgen/jit/affine_symbolic_expression.rs b/executor/src/witgen/jit/affine_symbolic_expression.rs
index afdf683bd7..d4b3c93571 100644
--- a/executor/src/witgen/jit/affine_symbolic_expression.rs
+++ b/executor/src/witgen/jit/affine_symbolic_expression.rs
@@ -20,6 +20,8 @@ pub enum Effect<T: FieldElement, V> {
     RangeConstraint(V, RangeConstraint<T>),
     /// A run-time assertion. If this fails, we have conflicting constraints.
     Assertion(Assertion<T, V>),
+    /// a call to a different machine.
+    MachineCall(u64, Vec<MachineCallArgument<T, V>>),
 }
 
 /// A run-time assertion. If this fails, we have conflicting constraints.
@@ -57,6 +59,32 @@ impl<T: FieldElement, V> Assertion<T, V> {
     }
 }
 
+pub enum MachineCallArgument<T: FieldElement, V> {
+    Known(SymbolicExpression<T, V>),
+    Unknown(AffineSymbolicExpression<T, V>),
+}
+
+#[derive(Default)]
+pub struct ProcessResult<T: FieldElement, V> {
+    pub effects: Vec<Effect<T, V>>,
+    pub complete: bool,
+}
+
+impl<T: FieldElement, V> ProcessResult<T, V> {
+    pub fn empty() -> Self {
+        Self {
+            effects: vec![],
+            complete: false,
+        }
+    }
+    pub fn complete(effects: Vec<Effect<T, V>>) -> Self {
+        Self {
+            effects,
+            complete: true,
+        }
+    }
+}
+
 /// Represents an expression `a_1 * x_1 + ... + a_k * x_k + offset`,
 /// where the `a_i` and `offset` are symbolic expressions, i.e. values known at run-time
 /// (which can still include variables or symbols, which are only known at run-time),
@@ -134,6 +162,15 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
         }
     }
 
+    /// If this expression contains a single unknown variable, returns it.
+    pub fn single_unknown_variable(&self) -> Option<&V> {
+        if self.coefficients.len() == 1 {
+            self.coefficients.keys().next()
+        } else {
+            None
+        }
+    }
+
     /// Tries to multiply this expression with another one.
     /// Returns `None` if the result would be quadratic, i.e.
     /// if both expressions contain unknown variables.
@@ -148,15 +185,17 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
 
     /// Solves the equation `self = 0` and returns how to compute the solution.
     /// The solution can contain assignments to multiple variables.
-    /// If no way to solve the equation has been found, returns the empty vector.
+    /// If no way to solve the equation (and no way to derive new range
+    /// constraints) has been found, but it still contains
+    /// unknown variables, returns an empty, incomplete result.
     /// If the equation is known to be unsolvable, returns an error.
-    pub fn solve(&self) -> Result<Vec<Effect<T, V>>, EvalError<T>> {
+    pub fn solve(&self) -> Result<ProcessResult<T, V>, EvalError<T>> {
         Ok(match self.coefficients.len() {
             0 => {
                 if self.offset.is_known_nonzero() {
                     return Err(EvalError::ConstraintUnsatisfiable(self.to_string()));
                 } else {
-                    vec![]
+                    ProcessResult::complete(vec![])
                 }
             }
             1 => {
@@ -169,35 +208,40 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
                 if coeff.is_known_nonzero() {
                     // In this case, we can always compute a solution.
                     let value = self.offset.field_div(&-coeff);
-                    vec![Effect::Assignment(var.clone(), value)]
+                    ProcessResult::complete(vec![Effect::Assignment(var.clone(), value)])
                 } else if self.offset.is_known_nonzero() {
                     // If the offset is not zero, then the coefficient must be non-zero,
                     // otherwise the constraint is violated.
                     let value = self.offset.field_div(&-coeff);
-                    vec![
+                    ProcessResult::complete(vec![
                         Assertion::assert_is_nonzero(coeff.clone()),
                         Effect::Assignment(var.clone(), value),
-                    ]
+                    ])
                 } else {
                     // If this case, we could have an equation of the form
                     // 0 * X = 0, which is valid and generates no information about X.
-                    vec![]
+                    ProcessResult::empty()
                 }
             }
             _ => {
                 let r = self.solve_bit_decomposition();
-                if !r.is_empty() {
+                if r.complete {
                     r
                 } else {
                     let negated = -self;
                     let r = negated.solve_bit_decomposition();
-                    if !r.is_empty() {
+                    if r.complete {
                         r
                     } else {
-                        self.transfer_constraints()
+                        let effects = self
+                            .transfer_constraints()
                             .into_iter()
                             .chain(negated.transfer_constraints())
-                            .collect()
+                            .collect();
+                        ProcessResult {
+                            effects,
+                            complete: false,
+                        }
                     }
                 }
             }
@@ -205,7 +249,7 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
     }
 
     /// Tries to solve a bit-decomposition equation.
-    fn solve_bit_decomposition(&self) -> Vec<Effect<T, V>> {
+    fn solve_bit_decomposition(&self) -> ProcessResult<T, V> {
         // All the coefficients need to be known numbers and the
         // variables need to be range-constrained.
         let constrained_coefficients = self
@@ -218,7 +262,7 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
             })
             .collect::<Option<Vec<_>>>();
         let Some(constrained_coefficients) = constrained_coefficients else {
-            return vec![];
+            return ProcessResult::empty();
         };
 
         // Check if they are mutually exclusive and compute assignments.
@@ -228,7 +272,7 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
             let mask = *constraint.multiple(coeff).mask();
             if !(mask & covered_bits).is_zero() {
                 // Overlapping range constraints.
-                return vec![];
+                return ProcessResult::empty();
             } else {
                 covered_bits |= mask;
             }
@@ -240,26 +284,26 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
         }
 
         if covered_bits >= T::modulus() {
-            return vec![];
+            return ProcessResult::empty();
         }
 
-        // We need to assert that the masks cover the offset,
+        // We need to assert that the masks cover "-offset",
         // otherwise the equation is not solvable.
-        // We assert offset & !masks == 0 <=> offset == offset | masks.
+        // We assert -offset & !masks == 0 <=> -offset == -offset | masks.
         // We use the latter since we cannot properly bit-negate inside the field.
         effects.push(Assertion::assert_eq(
-            self.offset.clone(),
-            &self.offset | &T::from(covered_bits).into(),
+            -&self.offset,
+            -&self.offset | T::from(covered_bits).into(),
         ));
 
-        effects
+        ProcessResult::complete(effects)
     }
 
-    fn transfer_constraints(&self) -> Vec<Effect<T, V>> {
+    fn transfer_constraints(&self) -> Option<Effect<T, V>> {
         // We are looking for X = a * Y + b * Z + ... or -X = a * Y + b * Z + ...
         // where X is least constrained.
 
-        let Some((solve_for, solve_for_coefficient)) = self
+        let (solve_for, solve_for_coefficient) = self
             .coefficients
             .iter()
             .filter(|(_var, coeff)| coeff.is_known_one() || coeff.is_known_minus_one())
@@ -269,13 +313,10 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
                     .get(var)
                     .map(|c| c.range_width())
                     .unwrap_or_else(|| T::modulus())
-            })
-        else {
-            return vec![];
-        };
+            })?;
 
         // This only works if the coefficients are all known.
-        let Some(summands) = self
+        let summands = self
             .coefficients
             .iter()
             .filter(|(var, _)| *var != solve_for)
@@ -285,19 +326,14 @@ impl<T: FieldElement, V: Ord + Clone + Display> AffineSymbolicExpression<T, V> {
                 Some(rc.multiple(coeff))
             })
             .chain(std::iter::once(self.offset.range_constraint()))
-            .collect::<Option<Vec<_>>>()
-        else {
-            return vec![];
-        };
-        let Some(constraint) = summands.into_iter().reduce(|c1, c2| c1.combine_sum(&c2)) else {
-            return vec![];
-        };
+            .collect::<Option<Vec<_>>>()?;
+        let constraint = summands.into_iter().reduce(|c1, c2| c1.combine_sum(&c2))?;
         let constraint = if solve_for_coefficient.is_known_one() {
             -constraint
         } else {
             constraint
         };
-        vec![Effect::RangeConstraint(solve_for.clone(), constraint)]
+        Some(Effect::RangeConstraint(solve_for.clone(), constraint))
     }
 }
 
@@ -394,6 +430,8 @@ impl<T: FieldElement, V: Clone + Ord> Mul<&SymbolicExpression<T, V>>
 
 #[cfg(test)]
 mod test {
+    use pretty_assertions::assert_eq;
+
     use powdr_number::GoldilocksField;
 
     use super::*;
@@ -419,8 +457,9 @@ mod test {
         let x = &Ase::from_known_symbol("X", None);
         let y = &Ase::from_known_symbol("Y", None);
         let constr = x + y - from_number(10);
-        // We cannot solve it but also cannot know it is unsolvable.
-        assert!(constr.solve().unwrap().is_empty());
+        // We cannot solve it, but we can also not learn anything new from it.
+        let result = constr.solve().unwrap();
+        assert!(result.complete && result.effects.is_empty());
         // But if we know the values, we can be sure there is a conflict.
         assert!(from_number(10).solve().is_err());
     }
@@ -428,7 +467,8 @@ mod test {
     #[test]
     fn solvable_without_vars() {
         let constr = &from_number(0);
-        assert!(constr.solve().unwrap().is_empty());
+        let result = constr.solve().unwrap();
+        assert!(result.complete && result.effects.is_empty());
     }
 
     #[test]
@@ -440,9 +480,10 @@ mod test {
         let seven = from_number(7);
         let ten = from_number(10);
         let constr = mul(&two, &x) + mul(&seven, &y) - ten;
-        let effects = constr.solve().unwrap();
-        assert_eq!(effects.len(), 1);
-        let Effect::Assignment(var, expr) = &effects[0] else {
+        let result = constr.solve().unwrap();
+        assert!(result.complete);
+        assert_eq!(result.effects.len(), 1);
+        let Effect::Assignment(var, expr) = &result.effects[0] else {
             panic!("Expected assignment");
         };
         assert_eq!(var.to_string(), "X");
@@ -459,12 +500,14 @@ mod test {
         let ten = from_number(10);
         let constr = mul(&z, &x) + mul(&seven, &y) - ten.clone();
         // If we do not range-constrain z, we cannot solve since we don't know if it might be zero.
-        let effects = constr.solve().unwrap();
-        assert_eq!(effects.len(), 0);
+        let result = constr.solve().unwrap();
+        assert!(!result.complete && result.effects.is_empty());
         let z =
             Ase::from_known_symbol("z", Some(RangeConstraint::from_range(10.into(), 20.into())));
         let constr = mul(&z, &x) + mul(&seven, &y) - ten;
-        let effects = constr.solve().unwrap();
+        let result = constr.solve().unwrap();
+        assert!(result.complete);
+        let effects = result.effects;
         let Effect::Assignment(var, expr) = &effects[0] else {
             panic!("Expected assignment");
         };
@@ -488,7 +531,8 @@ mod test {
             + ten.clone()
             + z.clone();
         // Without range constraints, this is not solvable.
-        assert!(constr.solve().unwrap().is_empty());
+        let result = constr.solve().unwrap();
+        assert!(!result.complete && result.effects.is_empty());
         // Now add the range constraint on a, it should be solvable.
         let a = Ase::from_unknown_variable("a", rc.clone());
         let constr = mul(&a, &from_number(0x100))
@@ -496,9 +540,10 @@ mod test {
             + mul(&c, &from_number(0x1000000))
             + ten.clone()
             + z;
-        let effects = constr
-            .solve()
-            .unwrap()
+        let result = constr.solve().unwrap();
+        assert!(result.complete);
+        let effects = result
+            .effects
             .into_iter()
             .map(|effect| match effect {
                 Effect::Assignment(v, expr) => format!("{v} = {expr};\n"),
@@ -521,7 +566,7 @@ mod test {
             "a = ((-(10 + Z) & 65280) // 256);
 b = ((-(10 + Z) & 16711680) // 65536);
 c = ((-(10 + Z) & 4278190080) // 16777216);
-assert (10 + Z) == ((10 + Z) | 4294967040);
+assert -(10 + Z) == (-(10 + Z) | 4294967040);
 "
         );
     }
@@ -540,9 +585,10 @@ assert (10 + Z) == ((10 + Z) | 4294967040);
             + mul(&c, &from_number(0x1000000))
             + ten
             - z;
-        let effects = constr
-            .solve()
-            .unwrap()
+        let result = constr.solve().unwrap();
+        assert!(!result.complete);
+        let effects = result
+            .effects
             .into_iter()
             .map(|effect| match effect {
                 Effect::RangeConstraint(v, rc) => format!("{v}: {rc};\n"),
diff --git a/executor/src/witgen/jit/cell.rs b/executor/src/witgen/jit/cell.rs
new file mode 100644
index 0000000000..090e277048
--- /dev/null
+++ b/executor/src/witgen/jit/cell.rs
@@ -0,0 +1,59 @@
+use std::{
+    fmt::{self, Display, Formatter},
+    hash::{Hash, Hasher},
+};
+
+use powdr_ast::analyzed::AlgebraicReference;
+
+/// The identifier of a witness cell in the trace table.
+/// The `row_offset` is relative to a certain "zero row" defined
+/// by the component that uses this data structure.
+#[derive(Debug, Clone, Eq)]
+pub struct Cell {
+    /// Name of the column, used only for display purposes.
+    pub column_name: String,
+    pub id: u64,
+    pub row_offset: i32,
+}
+
+impl Hash for Cell {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.id.hash(state);
+        self.row_offset.hash(state);
+    }
+}
+
+impl PartialEq for Cell {
+    fn eq(&self, other: &Self) -> bool {
+        self.id == other.id && self.row_offset == other.row_offset
+    }
+}
+
+impl Ord for Cell {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        (self.id, self.row_offset).cmp(&(other.id, other.row_offset))
+    }
+}
+
+impl PartialOrd for Cell {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Cell {
+    pub fn from_reference(r: &AlgebraicReference, row_offset: i32) -> Self {
+        assert!(r.is_witness());
+        Self {
+            column_name: r.name.clone(),
+            id: r.poly_id.id,
+            row_offset: r.next as i32 + row_offset,
+        }
+    }
+}
+
+impl Display for Cell {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{}[{}]", self.column_name, self.row_offset)
+    }
+}
diff --git a/executor/src/witgen/jit/mod.rs b/executor/src/witgen/jit/mod.rs
index e7633eec65..57940a97f0 100644
--- a/executor/src/witgen/jit/mod.rs
+++ b/executor/src/witgen/jit/mod.rs
@@ -1,3 +1,5 @@
-mod affine_symbolic_expression;
+pub(crate) mod affine_symbolic_expression;
+mod cell;
 pub(crate) mod jit_processor;
 mod symbolic_expression;
+pub(crate) mod witgen_inference;
diff --git a/executor/src/witgen/jit/symbolic_expression.rs b/executor/src/witgen/jit/symbolic_expression.rs
index 8f3ddd470f..900135c7b3 100644
--- a/executor/src/witgen/jit/symbolic_expression.rs
+++ b/executor/src/witgen/jit/symbolic_expression.rs
@@ -286,7 +286,7 @@ impl<T: FieldElement, V: Clone> BitAnd for &SymbolicExpression<T, V> {
                 Rc::new(rhs.clone()),
                 self.range_constraint()
                     .zip(rhs.range_constraint())
-                    .map(|(a, b)| a.conjunction(&b)),
+                    .map(|(a, b)| RangeConstraint::from_mask(*a.mask() & *b.mask())),
             )
         }
     }
@@ -317,7 +317,9 @@ impl<T: FieldElement, V: Clone> BitOr for &SymbolicExpression<T, V> {
                 Rc::new(self.clone()),
                 BinaryOperator::BitOr,
                 Rc::new(rhs.clone()),
-                None,
+                self.range_constraint()
+                    .zip(rhs.range_constraint())
+                    .map(|(a, b)| RangeConstraint::from_mask(*a.mask() | *b.mask())),
             )
         }
     }
diff --git a/executor/src/witgen/jit/witgen_inference.rs b/executor/src/witgen/jit/witgen_inference.rs
new file mode 100644
index 0000000000..f921bbcd31
--- /dev/null
+++ b/executor/src/witgen/jit/witgen_inference.rs
@@ -0,0 +1,501 @@
+#![allow(unused)]
+use std::collections::{HashMap, HashSet};
+
+use itertools::Itertools;
+use powdr_ast::analyzed::{
+    AlgebraicBinaryOperation, AlgebraicBinaryOperator, AlgebraicExpression as Expression,
+    AlgebraicReference, AlgebraicUnaryOperation, AlgebraicUnaryOperator, Identity, LookupIdentity,
+    PermutationIdentity, PhantomLookupIdentity, PhantomPermutationIdentity, PolyID,
+    PolynomialIdentity, PolynomialType, SelectedExpressions,
+};
+use powdr_number::FieldElement;
+
+use crate::witgen::{
+    global_constraints::RangeConstraintSet, jit::affine_symbolic_expression::MachineCallArgument,
+};
+
+use super::{
+    super::{range_constraints::RangeConstraint, FixedData},
+    affine_symbolic_expression::{AffineSymbolicExpression, Effect, ProcessResult},
+    cell::Cell,
+};
+
+/// This component can generate code that solves identities.
+/// It needs a driver that tells it which identities to process on which rows.
+pub struct WitgenInference<'a, T: FieldElement, FixedEval: FixedEvaluator<T>> {
+    fixed_data: &'a FixedData<'a, T>,
+    fixed_evaluator: FixedEval,
+    derived_range_constraints: HashMap<Cell, RangeConstraint<T>>,
+    known_cells: HashSet<Cell>,
+    code: Vec<Effect<T, Cell>>,
+}
+
+impl<'a, T: FieldElement, FixedEval: FixedEvaluator<T>> WitgenInference<'a, T, FixedEval> {
+    pub fn new(
+        fixed_data: &'a FixedData<'a, T>,
+        fixed_evaluator: FixedEval,
+        known_cells: impl IntoIterator<Item = Cell>,
+    ) -> Self {
+        Self {
+            fixed_data,
+            fixed_evaluator,
+            derived_range_constraints: Default::default(),
+            known_cells: known_cells.into_iter().collect(),
+            code: Default::default(),
+        }
+    }
+
+    pub fn code(self) -> Vec<Effect<T, Cell>> {
+        self.code
+    }
+
+    /// Process an identity on a certain row.
+    /// Returns true if this identity/row pair was fully processed and
+    /// should not be considered again.
+    pub fn process_identity(&mut self, id: &Identity<T>, row_offset: i32) -> bool {
+        let result = match id {
+            Identity::Polynomial(PolynomialIdentity { expression, .. }) => {
+                self.process_polynomial_identity(expression, row_offset)
+            }
+            Identity::Lookup(LookupIdentity {
+                id, left, right, ..
+            })
+            | Identity::Permutation(PermutationIdentity {
+                id, left, right, ..
+            })
+            | Identity::PhantomPermutation(PhantomPermutationIdentity {
+                id, left, right, ..
+            })
+            | Identity::PhantomLookup(PhantomLookupIdentity {
+                id, left, right, ..
+            }) => self.process_lookup(*id, left, right, row_offset),
+            Identity::PhantomBusInteraction(_) => {
+                // TODO(bus_interaction) Once we have a concept of "can_be_answered", bus interactions
+                // should be as easy as lookups.
+                ProcessResult::empty()
+            }
+            Identity::Connect(_) => ProcessResult::empty(),
+        };
+        self.ingest_effects(result.effects);
+        result.complete
+    }
+
+    fn process_polynomial_identity(
+        &self,
+        expression: &'a Expression<T>,
+        offset: i32,
+    ) -> ProcessResult<T, Cell> {
+        if let Some(r) = self.evaluate(expression, offset) {
+            // TODO propagate or report error properly.
+            // If solve returns an error, it means that the constraint is conflicting.
+            // In the future, we might run this in a runtime-conditional, so an error
+            // could just mean that this case cannot happen in practice.
+            r.solve().unwrap()
+        } else {
+            ProcessResult::empty()
+        }
+    }
+
+    fn process_lookup(
+        &self,
+        lookup_id: u64,
+        left: &SelectedExpressions<T>,
+        right: &SelectedExpressions<T>,
+        offset: i32,
+    ) -> ProcessResult<T, Cell> {
+        // TODO: In the future, call the 'mutable state' to check if the
+        // lookup can always be answered.
+
+        // If the RHS is fully fixed columns...
+        if right.expressions.iter().all(|e| match e {
+            Expression::Reference(r) => r.is_fixed(),
+            Expression::Number(_) => true,
+            _ => false,
+        }) {
+            // and the selector is known to be 1...
+            if self
+                .evaluate(&left.selector, offset)
+                .and_then(|s| s.try_to_known().map(|k| k.is_known_one()))
+                == Some(true)
+            {
+                if let Some(lhs) = left
+                    .expressions
+                    .iter()
+                    .map(|e| self.evaluate(e, offset))
+                    .collect::<Option<Vec<_>>>()
+                {
+                    // and all except one expression is known on the LHS.
+                    let unknown = lhs
+                        .iter()
+                        .filter(|e| e.try_to_known().is_none())
+                        .collect_vec();
+                    if unknown.len() == 1 && unknown[0].single_unknown_variable().is_some() {
+                        let effects = vec![Effect::MachineCall(
+                            lookup_id,
+                            lhs.into_iter()
+                                .map(|e| {
+                                    if let Some(val) = e.try_to_known() {
+                                        MachineCallArgument::Known(val.clone())
+                                    } else {
+                                        MachineCallArgument::Unknown(e)
+                                    }
+                                })
+                                .collect(),
+                        )];
+                        return ProcessResult::complete(effects);
+                    }
+                }
+            }
+        }
+        ProcessResult::empty()
+    }
+
+    fn ingest_effects(&mut self, effects: Vec<Effect<T, Cell>>) {
+        for e in effects {
+            match &e {
+                Effect::Assignment(cell, assignment) => {
+                    self.known_cells.insert(cell.clone());
+                    if let Some(rc) = assignment.range_constraint() {
+                        // If the cell was determined to be a constant, we add this
+                        // as a range constraint, so we can use it in future evaluations.
+                        self.add_range_constraint(cell.clone(), rc);
+                    }
+                    self.code.push(e);
+                }
+                Effect::RangeConstraint(cell, rc) => {
+                    self.add_range_constraint(cell.clone(), rc.clone());
+                }
+                Effect::MachineCall(_, arguments) => {
+                    for arg in arguments {
+                        if let MachineCallArgument::Unknown(expr) = arg {
+                            let cell = expr.single_unknown_variable().unwrap();
+                            self.known_cells.insert(cell.clone());
+                        }
+                    }
+                    self.code.push(e);
+                }
+                Effect::Assertion(_) => self.code.push(e),
+            }
+        }
+    }
+
+    fn add_range_constraint(&mut self, cell: Cell, rc: RangeConstraint<T>) {
+        let rc = self
+            .range_constraint(cell.clone())
+            .map_or(rc.clone(), |existing_rc| existing_rc.conjunction(&rc));
+        if !self.known_cells.contains(&cell) {
+            if let Some(v) = rc.try_to_single_value() {
+                // Special case: Cell is fixed to a constant by range constraints only.
+                self.known_cells.insert(cell.clone());
+                self.code.push(Effect::Assignment(cell.clone(), v.into()));
+            }
+        }
+        self.derived_range_constraints.insert(cell.clone(), rc);
+    }
+
+    fn evaluate(
+        &self,
+        expr: &Expression<T>,
+        offset: i32,
+    ) -> Option<AffineSymbolicExpression<T, Cell>> {
+        Some(match expr {
+            Expression::Reference(r) => {
+                if r.is_fixed() {
+                    self.fixed_evaluator.evaluate(r, offset)?.into()
+                } else {
+                    let cell = Cell::from_reference(r, offset);
+                    // If a cell is known and has a compile-time constant value,
+                    // that value is stored in the range constraints.
+                    let rc = self.range_constraint(cell.clone());
+                    if let Some(val) = rc.as_ref().and_then(|rc| rc.try_to_single_value()) {
+                        val.into()
+                    } else if self.known_cells.contains(&cell) {
+                        AffineSymbolicExpression::from_known_symbol(cell, rc)
+                    } else {
+                        AffineSymbolicExpression::from_unknown_variable(cell, rc)
+                    }
+                }
+            }
+            Expression::PublicReference(_) | Expression::Challenge(_) => {
+                // TODO we need to introduce a variable type for those.
+                return None;
+            }
+            Expression::Number(n) => (*n).into(),
+            Expression::BinaryOperation(op) => self.evaluate_binary_operation(op, offset)?,
+            Expression::UnaryOperation(op) => self.evaluate_unary_operation(op, offset)?,
+        })
+    }
+
+    fn evaluate_binary_operation(
+        &self,
+        op: &AlgebraicBinaryOperation<T>,
+        offset: i32,
+    ) -> Option<AffineSymbolicExpression<T, Cell>> {
+        let left = self.evaluate(&op.left, offset)?;
+        let right = self.evaluate(&op.right, offset)?;
+        match op.op {
+            AlgebraicBinaryOperator::Add => Some(&left + &right),
+            AlgebraicBinaryOperator::Sub => Some(&left - &right),
+            AlgebraicBinaryOperator::Mul => left.try_mul(&right),
+            AlgebraicBinaryOperator::Pow => {
+                let result = left
+                    .try_to_known()?
+                    .try_to_number()?
+                    .pow(right.try_to_known()?.try_to_number()?.to_integer());
+                Some(AffineSymbolicExpression::from(result))
+            }
+        }
+    }
+
+    fn evaluate_unary_operation(
+        &self,
+        op: &AlgebraicUnaryOperation<T>,
+        offset: i32,
+    ) -> Option<AffineSymbolicExpression<T, Cell>> {
+        let expr = self.evaluate(&op.expr, offset)?;
+        match op.op {
+            AlgebraicUnaryOperator::Minus => Some(-&expr),
+        }
+    }
+
+    /// Returns the current best-known range constraint on the given cell
+    /// combining global range constraints and newly derived local range constraints.
+    fn range_constraint(&self, cell: Cell) -> Option<RangeConstraint<T>> {
+        self.fixed_data
+            .global_range_constraints
+            .range_constraint(&AlgebraicReference {
+                name: Default::default(),
+                poly_id: PolyID {
+                    id: cell.id,
+                    ptype: PolynomialType::Committed,
+                },
+                next: false,
+            })
+            .iter()
+            .chain(self.derived_range_constraints.get(&cell))
+            .cloned()
+            .reduce(|gc, rc| gc.conjunction(&rc))
+    }
+}
+
+pub trait FixedEvaluator<T: FieldElement> {
+    fn evaluate(&self, _var: &AlgebraicReference, _row_offset: i32) -> Option<T> {
+        None
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use pretty_assertions::assert_eq;
+
+    use powdr_ast::analyzed::Analyzed;
+    use powdr_number::GoldilocksField;
+
+    use crate::{
+        constant_evaluator,
+        witgen::{global_constraints, jit::affine_symbolic_expression::Assertion, FixedData},
+    };
+
+    use super::*;
+
+    fn format_code(effects: &[Effect<GoldilocksField, Cell>]) -> String {
+        effects
+            .iter()
+            .map(|effect| match effect {
+                Effect::Assignment(v, expr) => format!("{v} = {expr};"),
+                Effect::Assertion(Assertion {
+                    lhs,
+                    rhs,
+                    expected_equal,
+                }) => {
+                    format!(
+                        "assert {lhs} {} {rhs};",
+                        if *expected_equal { "==" } else { "!=" }
+                    )
+                }
+                Effect::MachineCall(id, args) => {
+                    format!(
+                        "lookup({id}, [{}]);",
+                        args.iter()
+                            .map(|arg| match arg {
+                                MachineCallArgument::Known(k) => format!("Known({k})"),
+                                MachineCallArgument::Unknown(u) => format!("Unknown({u})"),
+                            })
+                            .join(", ")
+                    )
+                }
+                Effect::RangeConstraint(..) => {
+                    panic!("Range constraints should not be part of the code.")
+                }
+            })
+            .join("\n")
+    }
+
+    struct FixedEvaluatorForFixedData<'a>(&'a FixedData<'a, GoldilocksField>);
+    impl<'a> FixedEvaluator<GoldilocksField> for FixedEvaluatorForFixedData<'a> {
+        fn evaluate(&self, var: &AlgebraicReference, row_offset: i32) -> Option<GoldilocksField> {
+            assert!(var.is_fixed());
+            let values = self.0.fixed_cols[&var.poly_id].values_max_size();
+            let row = (row_offset as usize + var.next as usize) % values.len();
+            Some(values[row])
+        }
+    }
+
+    fn solve_on_rows(
+        input: &str,
+        rows: &[i32],
+        known_cells: Vec<(&str, i32)>,
+        expected_complete: Option<usize>,
+    ) -> String {
+        let analyzed: Analyzed<GoldilocksField> =
+            powdr_pil_analyzer::analyze_string(input).unwrap();
+        let fixed_col_vals = constant_evaluator::generate(&analyzed);
+        let fixed_data = FixedData::new(&analyzed, &fixed_col_vals, &[], Default::default(), 0);
+        let (fixed_data, retained_identities) =
+            global_constraints::set_global_constraints(fixed_data, &analyzed.identities);
+        let known_cells = known_cells.iter().map(|(name, row_offset)| {
+            let id = fixed_data.try_column_by_name(name).unwrap().id;
+            Cell {
+                column_name: name.to_string(),
+                id,
+                row_offset: *row_offset,
+            }
+        });
+
+        let ref_eval = FixedEvaluatorForFixedData(&fixed_data);
+        let mut witgen = WitgenInference::new(&fixed_data, ref_eval, known_cells);
+        let mut complete = HashSet::new();
+        let mut counter = 0;
+        let expected_complete = expected_complete.unwrap_or(retained_identities.len() * rows.len());
+        while complete.len() != expected_complete {
+            counter += 1;
+            for row in rows {
+                for id in retained_identities.iter() {
+                    if !complete.contains(&(id.id(), *row)) && witgen.process_identity(id, *row) {
+                        complete.insert((id.id(), *row));
+                    }
+                }
+            }
+            assert!(counter < 10000, "Solving took more than 10000 rounds.");
+        }
+        format_code(&witgen.code())
+    }
+
+    #[test]
+    fn simple_polynomial_solving() {
+        let input = "let X; let Y; let Z; X = 1; Y = X + 1; Z * Y = X + 10;";
+        let code = solve_on_rows(input, &[0], vec![], None);
+        assert_eq!(code, "X[0] = 1;\nY[0] = 2;\nZ[0] = -9223372034707292155;");
+    }
+
+    #[test]
+    fn fib() {
+        let input = "let X; let Y; X' = Y; Y' = X + Y;";
+        let code = solve_on_rows(input, &[0, 1], vec![("X", 0), ("Y", 0)], None);
+        assert_eq!(
+            code,
+            "X[1] = Y[0];\nY[1] = (X[0] + Y[0]);\nX[2] = Y[1];\nY[2] = (X[1] + Y[1]);"
+        );
+    }
+
+    #[test]
+    fn fib_with_fixed() {
+        let input = "
+        namespace Fib(8);
+            col fixed FIRST = [1] + [0]*;
+            let x;
+            let y;
+            FIRST * (y - 1) = 0;
+            FIRST * (x - 1) = 0;
+            // This works in this test because we do not implement wrapping properly in this test.
+            x' - y = 0;
+            y' - (x + y) = 0;
+        ";
+        let code = solve_on_rows(input, &[0, 1, 2, 3], vec![], None);
+        assert_eq!(
+            code,
+            "Fib::y[0] = 1;
+Fib::x[0] = 1;
+Fib::x[1] = 1;
+Fib::y[1] = 2;
+Fib::x[2] = 2;
+Fib::y[2] = 3;
+Fib::x[3] = 3;
+Fib::y[3] = 5;
+Fib::x[4] = 5;
+Fib::y[4] = 8;"
+        );
+    }
+
+    #[test]
+    fn xor() {
+        let input = "
+namespace Xor(256 * 256);
+    let latch: col = |i| { if (i % 4) == 3 { 1 } else { 0 } };
+    let FACTOR: col = |i| { 1 << (((i + 1) % 4) * 8) };
+
+    let a: int -> int = |i| i % 256;
+    let b: int -> int = |i| (i / 256) % 256;
+    let P_A: col = a;
+    let P_B: col = b;
+    let P_C: col = |i| a(i) ^ b(i);
+
+    let A_byte;
+    let B_byte;
+    let C_byte;
+
+    [ A_byte, B_byte, C_byte ] in [ P_A, P_B, P_C ];
+
+    let A;
+    let B;
+    let C;
+
+    A' = A * (1 - latch) + A_byte * FACTOR;
+    B' = B * (1 - latch) + B_byte * FACTOR;
+    C' = C * (1 - latch) + C_byte * FACTOR;
+";
+        let code = solve_on_rows(
+            input,
+            // Use the second block to avoid wrap-around.
+            &[3, 4, 5, 6, 7],
+            vec![
+                ("Xor::A", 7),
+                ("Xor::C", 7), // We solve it in reverse, just for fun.
+            ],
+            Some(16),
+        );
+        assert_eq!(
+            code,
+            "\
+Xor::A_byte[6] = ((Xor::A[7] & 4278190080) // 16777216);
+Xor::A[6] = (Xor::A[7] & 16777215);
+assert Xor::A[7] == (Xor::A[7] | 4294967295);
+Xor::C_byte[6] = ((Xor::C[7] & 4278190080) // 16777216);
+Xor::C[6] = (Xor::C[7] & 16777215);
+assert Xor::C[7] == (Xor::C[7] | 4294967295);
+Xor::A_byte[5] = ((Xor::A[6] & 16711680) // 65536);
+Xor::A[5] = (Xor::A[6] & 65535);
+assert Xor::A[6] == (Xor::A[6] | 16777215);
+Xor::C_byte[5] = ((Xor::C[6] & 16711680) // 65536);
+Xor::C[5] = (Xor::C[6] & 65535);
+assert Xor::C[6] == (Xor::C[6] | 16777215);
+lookup(0, [Known(Xor::A_byte[6]), Unknown(Xor::B_byte[6]), Known(Xor::C_byte[6])]);
+Xor::A_byte[4] = ((Xor::A[5] & 65280) // 256);
+Xor::A[4] = (Xor::A[5] & 255);
+assert Xor::A[5] == (Xor::A[5] | 65535);
+Xor::C_byte[4] = ((Xor::C[5] & 65280) // 256);
+Xor::C[4] = (Xor::C[5] & 255);
+assert Xor::C[5] == (Xor::C[5] | 65535);
+lookup(0, [Known(Xor::A_byte[5]), Unknown(Xor::B_byte[5]), Known(Xor::C_byte[5])]);
+Xor::A_byte[3] = Xor::A[4];
+Xor::C_byte[3] = Xor::C[4];
+lookup(0, [Known(Xor::A_byte[4]), Unknown(Xor::B_byte[4]), Known(Xor::C_byte[4])]);
+lookup(0, [Known(Xor::A_byte[3]), Unknown(Xor::B_byte[3]), Known(Xor::C_byte[3])]);
+Xor::B[4] = Xor::B_byte[3];
+Xor::B[5] = (Xor::B[4] + (Xor::B_byte[4] * 256));
+Xor::B[6] = (Xor::B[5] + (Xor::B_byte[5] * 65536));
+Xor::B[7] = (Xor::B[6] + (Xor::B_byte[6] * 16777216));"
+        );
+    }
+}
diff --git a/executor/src/witgen/range_constraints.rs b/executor/src/witgen/range_constraints.rs
index cdd76a76a0..55c3ed394f 100644
--- a/executor/src/witgen/range_constraints.rs
+++ b/executor/src/witgen/range_constraints.rs
@@ -168,6 +168,14 @@ impl<T: FieldElement> RangeConstraint<T> {
             mask: mask.unwrap_or_else(|| Self::from_range(min, max).mask),
         }
     }
+
+    pub fn try_to_single_value(&self) -> Option<T> {
+        if self.min == self.max {
+            Some(self.min)
+        } else {
+            None
+        }
+    }
 }
 
 /// The number of elements in an (inclusive) min/max range.

From 12aca0e136cb39db408e47f9e67f5e50251e265c Mon Sep 17 00:00:00 2001
From: chriseth <chris@ethereum.org>
Date: Thu, 12 Dec 2024 18:56:54 +0100
Subject: [PATCH 57/57] Various preparatory changes. (#2228)

---
 .../witgen/data_structures/mutable_state.rs   | 38 +++++++++++++------
 executor/src/witgen/machines/mod.rs           | 14 +++++++
 jit-compiler/src/compiler.rs                  |  4 +-
 jit-compiler/src/lib.rs                       |  2 +-
 number/src/goldilocks.rs                      |  4 ++
 number/src/macros.rs                          |  4 ++
 number/src/plonky3_macros.rs                  |  5 +++
 number/src/traits.rs                          |  6 +++
 8 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/executor/src/witgen/data_structures/mutable_state.rs b/executor/src/witgen/data_structures/mutable_state.rs
index 384329eb32..904a73e1e3 100644
--- a/executor/src/witgen/data_structures/mutable_state.rs
+++ b/executor/src/witgen/data_structures/mutable_state.rs
@@ -1,12 +1,12 @@
 use std::{
-    cell::RefCell,
+    cell::{RefCell, RefMut},
     collections::{BTreeMap, HashMap},
 };
 
 use powdr_number::FieldElement;
 
 use crate::witgen::{
-    machines::{KnownMachine, Machine},
+    machines::{KnownMachine, LookupCell, Machine},
     rows::RowPair,
     EvalError, EvalResult, QueryCallback,
 };
@@ -52,19 +52,35 @@ impl<'a, T: FieldElement, Q: QueryCallback<T>> MutableState<'a, T, Q> {
     /// Call the machine responsible for the right-hand-side of an identity given its ID
     /// and the row pair of the caller.
     pub fn call(&self, identity_id: u64, caller_rows: &RowPair<'_, 'a, T>) -> EvalResult<'a, T> {
+        self.responsible_machine(identity_id)?
+            .process_plookup_timed(self, identity_id, caller_rows)
+    }
+
+    /// Call the machine responsible for the right-hand-side of an identity given its ID,
+    /// use the direct interface.
+    #[allow(unused)]
+    pub fn call_direct(
+        &self,
+        identity_id: u64,
+        values: &mut [LookupCell<'_, T>],
+    ) -> Result<bool, EvalError<T>> {
+        self.responsible_machine(identity_id)?
+            .process_lookup_direct_timed(self, identity_id, values)
+    }
+
+    fn responsible_machine(
+        &self,
+        identity_id: u64,
+    ) -> Result<RefMut<KnownMachine<'a, T>>, EvalError<T>> {
         let machine_index = *self
             .identity_to_machine_index
             .get(&identity_id)
             .unwrap_or_else(|| panic!("No executor machine matched identity ID: {identity_id}"));
-
-        self.machines[machine_index]
-            .try_borrow_mut()
-            .map_err(|_| {
-                EvalError::RecursiveMachineCalls(format!(
-                    "Detected when processing identity with ID {identity_id}"
-                ))
-            })?
-            .process_plookup_timed(self, identity_id, caller_rows)
+        self.machines[machine_index].try_borrow_mut().map_err(|_| {
+            EvalError::RecursiveMachineCalls(format!(
+                "Detected when processing identity with ID {identity_id}"
+            ))
+        })
     }
 
     /// Extracts the witness column values from the machines.
diff --git a/executor/src/witgen/machines/mod.rs b/executor/src/witgen/machines/mod.rs
index d632e7a6e1..9f1ed714f2 100644
--- a/executor/src/witgen/machines/mod.rs
+++ b/executor/src/witgen/machines/mod.rs
@@ -66,6 +66,19 @@ pub trait Machine<'a, T: FieldElement>: Send + Sync {
         result
     }
 
+    /// Like 'process_lookup_direct', but also records the time spent in this machine.
+    fn process_lookup_direct_timed<'b, 'c, Q: QueryCallback<T>>(
+        &mut self,
+        mutable_state: &'b MutableState<'a, T, Q>,
+        identity_id: u64,
+        values: &mut [LookupCell<'c, T>],
+    ) -> Result<bool, EvalError<T>> {
+        record_start(self.name());
+        let result = self.process_lookup_direct(mutable_state, identity_id, values);
+        record_end(self.name());
+        result
+    }
+
     /// Returns a unique name for this machine.
     fn name(&self) -> &str;
 
@@ -106,6 +119,7 @@ pub trait Machine<'a, T: FieldElement>: Send + Sync {
     fn identity_ids(&self) -> Vec<u64>;
 }
 
+#[repr(C)]
 pub enum LookupCell<'a, T> {
     /// Value is known (i.e. an input)
     Input(&'a T),
diff --git a/jit-compiler/src/compiler.rs b/jit-compiler/src/compiler.rs
index a5fbb4348a..e6f73db9e8 100644
--- a/jit-compiler/src/compiler.rs
+++ b/jit-compiler/src/compiler.rs
@@ -218,10 +218,10 @@ pub fn call_cargo(code: &str) -> Result<PathInTempDir, String> {
         if log::log_enabled!(log::Level::Debug) {
             let stderr = from_utf8(&out.stderr).unwrap_or("UTF-8 error in error message.");
             return Err(format!(
-                "Rust compiler error when JIT-compiling. Will use evaluator for all symbols. Error message:\n{stderr}."
+                "Rust compiler error when JIT-compiling. Will use interpreter instead. Error message:\n{stderr}."
             ));
         } else {
-            return Err("Rust compiler error when JIT-compiling. Will use evaluator for all symbols. Set log level to DEBUG for reason.".to_string());
+            return Err("Rust compiler error when JIT-compiling. Will use interpreter instead. Set log level to DEBUG for reason.".to_string());
         }
     }
     let extension = if cfg!(target_os = "windows") {
diff --git a/jit-compiler/src/lib.rs b/jit-compiler/src/lib.rs
index 1e331d487a..d0fa4c13d8 100644
--- a/jit-compiler/src/lib.rs
+++ b/jit-compiler/src/lib.rs
@@ -73,7 +73,7 @@ pub fn compile<T: FieldElement>(
         let successful_hash = successful_symbol_names.iter().collect::<HashSet<_>>();
         // TODO this should be changed back to Info after the introduction of the ToCol trait.
         log::debug!(
-            "Unable to generate code during JIT-compilation for the following symbols. Will use evaluator instead.\n{}",
+            "Unable to generate code during JIT-compilation for the following symbols. Will use interpreter instead.\n{}",
             requested_symbols
                 .iter()
                 .filter(|&sym| !successful_hash.contains(sym))
diff --git a/number/src/goldilocks.rs b/number/src/goldilocks.rs
index 375dae1fc1..3503b71b83 100644
--- a/number/src/goldilocks.rs
+++ b/number/src/goldilocks.rs
@@ -398,6 +398,10 @@ impl FieldElement for GoldilocksField {
         // Undo the shift
         Some(v.wrapping_sub(SHIFT as u32) as i32)
     }
+
+    fn has_direct_repr() -> bool {
+        true
+    }
 }
 
 impl LowerHex for GoldilocksField {
diff --git a/number/src/macros.rs b/number/src/macros.rs
index d50d49fbbd..b8c9877337 100644
--- a/number/src/macros.rs
+++ b/number/src/macros.rs
@@ -400,6 +400,10 @@ macro_rules! powdr_field {
                 // Undo the shift
                 Some(v.wrapping_sub(SHIFT as u32) as i32)
             }
+
+            fn has_direct_repr() -> bool {
+                false
+            }
         }
 
         impl From<$ark_type> for $name {
diff --git a/number/src/plonky3_macros.rs b/number/src/plonky3_macros.rs
index 988fa09802..9bd09af63f 100644
--- a/number/src/plonky3_macros.rs
+++ b/number/src/plonky3_macros.rs
@@ -117,6 +117,11 @@ macro_rules! powdr_field_plonky3 {
             fn try_into_i32(&self) -> Option<i32> {
                 Some(self.to_canonical_u32() as i32)
             }
+
+            fn has_direct_repr() -> bool {
+                // No direct repr, because 'mod' is not always applied.
+                false
+            }
         }
 
         impl LowerHex for $name {
diff --git a/number/src/traits.rs b/number/src/traits.rs
index d15672480b..55809d1ad1 100644
--- a/number/src/traits.rs
+++ b/number/src/traits.rs
@@ -185,6 +185,12 @@ pub trait FieldElement:
     /// As conventional, negative values are in relation to 0 in the field.
     /// Returns None if out of the range [0 - 2^31, 2^31).
     fn try_into_i32(&self) -> Option<i32>;
+
+    /// Returns `true` if values of this type are directly stored as their integer
+    /// value (i.e. not in montgomery representation and there are also no
+    /// additional fields), i.e. the `to_integer` function can be implemented as
+    /// a mem::transmute operation on pointers.
+    fn has_direct_repr() -> bool;
 }
 
 #[cfg(test)]