Skip to content

Commit

Permalink
refactor: do G2 calculations after G1 calculations (filecoin-project#272
Browse files Browse the repository at this point in the history
)

Spliting into distinct G1 and G2 phases prepares for future changes
(when separate kernels are used for G1 and G2). This change doesn't
harm the performance. On 32GiB sectors the performance difference
between runs is bigger than with or without this change.

For micro benchmarks the `groth16::proof::test_with_bls12_381::serialization`
test was run. Below are the average prover times. The tests were always
run twice sequentially, the best run was used.

|    Version    | #GPUs |   CUDA    |  OpenCL   |
| :------------ | ----: | --------: | --------: |
| prior         |     1 | 167.665ms | 379.428ms |
| this change   |     1 | 165.597ms | 374.507ms |
| prior         |     4 | 197.071ms | 1155.59ms |
| this change   |     4 | 197.899ms | 1080.51ms |

The command the tests were run with:

    RUST_LOG=info cargo test --release --features opencl groth16::proof::test_with_bls12_381::serialization 2>&1|grep 'prover time'|awk '/ms$/ { print substr($6, 1, length($6) - 2); next } /s$/ { print substr($6, 1, length($6) - 1) * 1000 }'|awk '{ total += $1; count++ } END { print total/count }

For running in a single GPU `CUDA_VISIBLE_DEVICES=0` was set. For
CUDA vs. OpenCL the `--features` flag was either set to `cuda` or
`opencl`.
  • Loading branch information
vmx authored Jun 15, 2022
1 parent 1ad35f0 commit 5bdd144
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 67 deletions.
2 changes: 2 additions & 0 deletions src/groth16/proof.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ mod test_with_bls12_381 {

#[test]
fn serialization() {
env_logger::try_init().ok();

struct MySillyCircuit<Scalar: PrimeField> {
a: Option<Scalar>,
b: Option<Scalar>,
Expand Down
171 changes: 104 additions & 67 deletions src/groth16/prover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,91 +415,128 @@ where
}
});

debug!("get_a b_g1 b_g2");
debug!("get a b_g1");
let (a_inputs_source, a_aux_source) = params_a.unwrap()?;
let (b_g2_inputs_source, b_g2_aux_source) = params_b_g2.unwrap()?;
let params_b_g1_opt = params_b_g1.transpose()?;

debug!("multiexp a b_g1 b_g2");
let inputs = provers
.into_iter()
.zip(input_assignments.iter())
.zip(aux_assignments.iter())
.map(|((prover, input_assignment), aux_assignment)| {
let a_inputs = multiexp(
worker,
a_inputs_source.clone(),
FullDensity,
input_assignment.clone(),
&mut multiexp_kern,
);

let a_aux = multiexp(
worker,
a_aux_source.clone(),
Arc::new(prover.a_aux_density),
aux_assignment.clone(),
&mut multiexp_kern,
);
let b_input_density = Arc::new(prover.b_input_density);
let b_aux_density = Arc::new(prover.b_aux_density);

let b_g1_inputs_aux_opt =
params_b_g1_opt
.as_ref()
.map(|(b_g1_inputs_source, b_g1_aux_source)| {
(
multiexp(
worker,
b_g1_inputs_source.clone(),
b_input_density.clone(),
input_assignment.clone(),
&mut multiexp_kern,
),
multiexp(
worker,
b_g1_aux_source.clone(),
b_aux_density.clone(),
aux_assignment.clone(),
&mut multiexp_kern,
),
)
});

let b_g2_inputs = multiexp(
worker,
b_g2_inputs_source.clone(),
b_input_density,
input_assignment.clone(),
&mut multiexp_kern,
);
let b_g2_aux = multiexp(
worker,
b_g2_aux_source.clone(),
b_aux_density,
aux_assignment.clone(),
&mut multiexp_kern,
);

(a_inputs, a_aux, b_g1_inputs_aux_opt, b_g2_inputs, b_g2_aux)
let densities = provers
.iter_mut()
.map(|prover| {
let a_aux_density = std::mem::take(&mut prover.a_aux_density);
let b_input_density = std::mem::take(&mut prover.b_input_density);
let b_aux_density = std::mem::take(&mut prover.b_aux_density);
(
Arc::new(a_aux_density),
Arc::new(b_input_density),
Arc::new(b_aux_density),
)
})
.collect::<Vec<_>>();
drop(multiexp_kern);
drop(provers);

debug!("multiexp a b_g1");
let inputs_g1 = input_assignments
.iter()
.zip(aux_assignments.iter())
.zip(densities.iter())
.map(
|(
(input_assignment, aux_assignment),
(a_aux_density, b_input_density, b_aux_density),
)| {
let a_inputs = multiexp(
worker,
a_inputs_source.clone(),
FullDensity,
input_assignment.clone(),
&mut multiexp_kern,
);

let a_aux = multiexp(
worker,
a_aux_source.clone(),
a_aux_density.clone(),
aux_assignment.clone(),
&mut multiexp_kern,
);

let b_g1_inputs_aux_opt =
params_b_g1_opt
.as_ref()
.map(|(b_g1_inputs_source, b_g1_aux_source)| {
(
multiexp(
worker,
b_g1_inputs_source.clone(),
b_input_density.clone(),
input_assignment.clone(),
&mut multiexp_kern,
),
multiexp(
worker,
b_g1_aux_source.clone(),
b_aux_density.clone(),
aux_assignment.clone(),
&mut multiexp_kern,
),
)
});

(a_inputs, a_aux, b_g1_inputs_aux_opt)
},
)
.collect::<Vec<_>>();
drop(a_inputs_source);
drop(a_aux_source);
drop(params_b_g1_opt);

debug!("get b_g2");
let (b_g2_inputs_source, b_g2_aux_source) = params_b_g2.unwrap()?;

debug!("multiexp b_g2");
let inputs_g2 = input_assignments
.iter()
.zip(aux_assignments.iter())
.zip(densities.iter())
.map(
|((input_assignment, aux_assignment), (_, b_input_density, b_aux_density))| {
let b_g2_inputs = multiexp(
worker,
b_g2_inputs_source.clone(),
b_input_density.clone(),
input_assignment.clone(),
&mut multiexp_kern,
);
let b_g2_aux = multiexp(
worker,
b_g2_aux_source.clone(),
b_aux_density.clone(),
aux_assignment.clone(),
&mut multiexp_kern,
);

(b_g2_inputs, b_g2_aux)
},
)
.collect::<Vec<_>>();
drop(multiexp_kern);
drop(densities);
drop(b_g2_inputs_source);
drop(b_g2_aux_source);

debug!("proofs");
let proofs = h_s
.into_iter()
.zip(l_s.into_iter())
.zip(inputs.into_iter())
.zip(inputs_g1.into_iter())
.zip(inputs_g2.into_iter())
.zip(r_s.into_iter())
.zip(s_s.into_iter())
.map(
|((((h, l), (a_inputs, a_aux, b_g1_inputs_aux_opt, b_g2_inputs, b_g2_aux)), r), s)| {
|(
((((h, l), (a_inputs, a_aux, b_g1_inputs_aux_opt)), (b_g2_inputs, b_g2_aux)), r),
s,
)| {
if (vk.delta_g1.is_identity() | vk.delta_g2.is_identity()).into() {
// If this element is zero, someone is trying to perform a
// subversion-CRS attack.
Expand Down

0 comments on commit 5bdd144

Please sign in to comment.