Skip to content

Commit

Permalink
fix: generate OpenCL source at compile time (filecoin-project#28)
Browse files Browse the repository at this point in the history
The OpenCL kernel source could be generated at run-time, but with this
change it makes it more similar to the CUDA version, where we need to
compile the kernel at compile-time. This is to improve the symmetry
between those features and should also simplify the code for future
changes.
  • Loading branch information
vmx authored Jul 7, 2022
1 parent cc48fd2 commit 1e24689
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 113 deletions.
214 changes: 130 additions & 84 deletions ec-gpu-gen/build.rs
Original file line number Diff line number Diff line change
@@ -1,106 +1,152 @@
/// The build script is needed to compile the CUDA kernel.
///
/// It will compile the kernel at compile time if the `fft` and/or the `multiexp` features are
/// enabled.
/// enabled. It also generates the OpenCL source code at compile time.
#[cfg(all(
any(feature = "fft", feature = "multiexp"),
not(feature = "cargo-clippy")
))]
#[path = "src/source.rs"]
mod source;

/// The build script is use to generate the CUDA kernel and OpenCL source at compile-time, if the
/// if the `fft` and/or the `multiexp` features are enabled.
#[cfg(all(
feature = "cuda",
any(feature = "fft", feature = "multiexp"),
not(feature = "cargo-clippy")
))]
fn main() {
#[cfg(feature = "cuda")]
kernel::generate_cuda();
#[cfg(feature = "opencl")]
kernel::generate_opencl();
}

// This is a hack for the case when we run Clippy while we don't generate any GPU kernel. For
// Clippy we don't need a proper source or properly compiled kernel, but just some arbitrary bytes.
#[cfg(not(all(
any(feature = "fft", feature = "multiexp"),
not(feature = "cargo-clippy")
)))]
fn main() {
println!("cargo:rustc-env=CUDA_KERNEL_FATBIN=../build.rs");
println!("cargo:rustc-env=OPENCL_KERNEL_SOURCE=../build.rs");
}

// Put the code into a module, so that we need to repeat the feature flags less often.
#[cfg(all(
any(feature = "fft", feature = "multiexp"),
not(feature = "cargo-clippy")
))]
mod kernel {
use std::path::PathBuf;
use std::process::Command;
use std::{env, fs};

use blstrs::Bls12;
use sha2::{Digest, Sha256};

#[path = "src/source.rs"]
mod source;

// This is a hack for the case when the documentation is built on docs.rs. For the
// documentation we don't need a properly compiled kernel, but just some arbitrary bytes.
if env::var("DOCS_RS").is_ok() {
println!("cargo:rustc-env=CUDA_KERNEL_FATBIN=../build.rs");
return;
}
#[cfg(feature = "cuda")]
pub(crate) fn generate_cuda() {
use sha2::{Digest, Sha256};

let kernel_source = source::gen_source::<Bls12, source::Limb32>();

let out_dir = env::var("OUT_DIR").expect("OUT_DIR was not set.");

// Make it possible to override the default options. Though the source and output file is
// always set automatically.
let mut nvcc = match env::var("EC_GPU_CUDA_NVCC_ARGS") {
Ok(args) => execute::command(format!("nvcc {}", args)),
Err(_) => {
let mut command = Command::new("nvcc");
command
.arg("--optimize=6")
// Compile with as many threads as CPUs are available.
.arg("--threads=0")
.arg("--fatbin")
.arg("--gpu-architecture=sm_86")
.arg("--generate-code=arch=compute_86,code=sm_86")
.arg("--generate-code=arch=compute_80,code=sm_80")
.arg("--generate-code=arch=compute_75,code=sm_75");
command
// This is a hack for the case when the documentation is built on docs.rs. For the
// documentation we don't need a properly compiled kernel, but just some arbitrary bytes.
if env::var("DOCS_RS").is_ok() {
println!("cargo:rustc-env=CUDA_KERNEL_FATBIN=../build.rs");
return;
}
};

// Hash the source and and the compile flags. Use that as the filename, so that the kernel is
// only rebuilt if any of them change.
let mut hasher = Sha256::new();
hasher.update(kernel_source.as_bytes());
hasher.update(&format!("{:?}", &nvcc));
let kernel_digest = hex::encode(hasher.finalize());

let source_path: PathBuf = [&out_dir, &format!("{}.cu", &kernel_digest)]
.iter()
.collect();
let fatbin_path: PathBuf = [&out_dir, &format!("{}.fatbin", &kernel_digest)]
.iter()
.collect();

fs::write(&source_path, &kernel_source).unwrap_or_else(|_| {
panic!(
"Cannot write kernel source at {}.",
source_path.to_str().unwrap()
)
});

// Only compile if the output doesn't exist yet.
if !fatbin_path.as_path().exists() {
let status = nvcc
.arg("--output-file")
.arg(&fatbin_path)
.arg(&source_path)
.status()
.expect("Cannot run nvcc. Install the NVIDIA toolkit or disable the `cuda` feature.");

if !status.success() {

let kernel_source = crate::source::gen_source::<Bls12, crate::source::Limb32>();
let out_dir = env::var("OUT_DIR").expect("OUT_DIR was not set.");

// Make it possible to override the default options. Though the source and output file is
// always set automatically.
let mut nvcc = match env::var("EC_GPU_CUDA_NVCC_ARGS") {
Ok(args) => execute::command(format!("nvcc {}", args)),
Err(_) => {
let mut command = std::process::Command::new("nvcc");
command
.arg("--optimize=6")
// Compile with as many threads as CPUs are available.
.arg("--threads=0")
.arg("--fatbin")
.arg("--gpu-architecture=sm_86")
.arg("--generate-code=arch=compute_86,code=sm_86")
.arg("--generate-code=arch=compute_80,code=sm_80")
.arg("--generate-code=arch=compute_75,code=sm_75");
command
}
};

// Hash the source and the compile flags. Use that as the filename, so that the kernel is only
// rebuilt if any of them change.
let mut hasher = Sha256::new();
hasher.update(kernel_source.as_bytes());
hasher.update(&format!("{:?}", &nvcc));
let kernel_digest = hex::encode(hasher.finalize());

let source_path: PathBuf = [&out_dir, &format!("{}.cu", &kernel_digest)]
.iter()
.collect();
let fatbin_path: PathBuf = [&out_dir, &format!("{}.fatbin", &kernel_digest)]
.iter()
.collect();

fs::write(&source_path, &kernel_source).unwrap_or_else(|_| {
panic!(
"nvcc failed. See the kernel source at {}",
"Cannot write kernel source at {}.",
source_path.to_str().unwrap()
);
)
});

// Only compile if the output doesn't exist yet.
if !fatbin_path.as_path().exists() {
let status = nvcc
.arg("--output-file")
.arg(&fatbin_path)
.arg(&source_path)
.status()
.expect(
"Cannot run nvcc. Install the NVIDIA toolkit or disable the `cuda` feature.",
);

if !status.success() {
panic!(
"nvcc failed. See the kernel source at {}",
source_path.to_str().unwrap()
);
}
}

// The idea to put the path to the farbin into a compile-time env variable is from
// https://github.com/LutzCle/fast-interconnects-demo/blob/b80ea8e04825167f486ab8ac1b5d67cf7dd51d2c/rust-demo/build.rs
println!(
"cargo:rustc-env=CUDA_KERNEL_FATBIN={}",
fatbin_path.to_str().unwrap()
);
}

// The idea to put the path to the farbin into a compile-time env variable is from
// https://github.com/LutzCle/fast-interconnects-demo/blob/b80ea8e04825167f486ab8ac1b5d67cf7dd51d2c/rust-demo/build.rs
println!(
"cargo:rustc-env=CUDA_KERNEL_FATBIN={}",
fatbin_path.to_str().unwrap()
);
}
#[cfg(feature = "opencl")]
pub(crate) fn generate_opencl() {
let kernel_source = crate::source::gen_source::<Bls12, crate::source::Limb64>();
let out_dir = env::var("OUT_DIR").expect("OUT_DIR was not set.");

#[cfg(not(all(
feature = "cuda",
any(feature = "fft", feature = "multiexp"),
not(feature = "cargo-clippy")
)))]
fn main() {
// This is a hack for the case when the `cuda` and `cargo-clippy` features are enabled. For
// Clippy we don't need a properly compiled kernel, but just some arbitrary bytes.
println!("cargo:rustc-env=CUDA_KERNEL_FATBIN=../build.rs");
// Generating the kernel source is cheap, hence use a fixed name and override it on every
// build.
let source_path: PathBuf = [&out_dir, "kernel.cl"].iter().collect();

fs::write(&source_path, &kernel_source).unwrap_or_else(|_| {
panic!(
"Cannot write kernel source at {}.",
source_path.to_str().unwrap()
)
});

// For OpenCL we only need the kernel source, it is compiled at runtime.
#[cfg(feature = "opencl")]
println!(
"cargo:rustc-env=OPENCL_KERNEL_SOURCE={}",
source_path.to_str().unwrap()
);
}
}
10 changes: 3 additions & 7 deletions ec-gpu-gen/src/fft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ use ec_gpu::GpuEngine;
use ff::Field;
use log::{error, info};
use pairing::Engine;
use rust_gpu_tools::{program_closures, Device, LocalBuffer, Program, Vendor};
use rust_gpu_tools::{program_closures, Device, LocalBuffer, Program};

use crate::threadpool::THREAD_POOL;
use crate::{
error::{EcError, EcResult},
program, Limb32, Limb64,
program,
};

const LOG2_MAX_ELEMENTS: usize = 32; // At most 2^32 elements is supported.
Expand Down Expand Up @@ -40,11 +40,7 @@ impl<'a, E: Engine + GpuEngine> SingleFftKernel<'a, E> {
device: &Device,
maybe_abort: Option<&'a (dyn Fn() -> bool + Send + Sync)>,
) -> EcResult<Self> {
let source = match device.vendor() {
Vendor::Nvidia => crate::gen_source::<E, Limb32>(),
_ => crate::gen_source::<E, Limb64>(),
};
let program = program::program::<E>(device, &source)?;
let program = program::program(device)?;

Ok(SingleFftKernel {
program,
Expand Down
9 changes: 2 additions & 7 deletions ec-gpu-gen/src/multiexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@ use ff::PrimeField;
use group::{prime::PrimeCurveAffine, Group};
use log::{error, info};
use pairing::Engine;
use rust_gpu_tools::{program_closures, Device, Program, Vendor};
use rust_gpu_tools::{program_closures, Device, Program};
use yastl::Scope;

use crate::{
error::{EcError, EcResult},
program,
threadpool::Worker,
Limb32, Limb64,
};

/// On the GPU, the exponents are split into windows, this is the maximum number of such windows.
Expand Down Expand Up @@ -113,11 +112,7 @@ where
let work_units = work_units(compute_units, compute_capability);
let chunk_size = calc_chunk_size::<E>(mem, work_units);

let source = match device.vendor() {
Vendor::Nvidia => crate::gen_source::<E, Limb32>(),
_ => crate::gen_source::<E, Limb64>(),
};
let program = program::program::<E>(device, &source)?;
let program = program::program(device)?;

Ok(SingleMultiexpKernel {
program,
Expand Down
19 changes: 4 additions & 15 deletions ec-gpu-gen/src/program.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use std::env;

use ec_gpu::GpuEngine;
use log::info;
use pairing::Engine;
#[cfg(feature = "cuda")]
use rust_gpu_tools::cuda;
#[cfg(feature = "opencl")]
Expand Down Expand Up @@ -49,23 +47,13 @@ fn select_framework(default_framework: Framework) -> EcResult<Framework> {
///
/// If the device supports CUDA, then CUDA is used, else OpenCL. You can force a selection with
/// the environment variable `EC_GPU_FRAMEWORK`, which can be set either to `cuda` or `opencl`.
pub fn program<E>(device: &Device, source: &str) -> EcResult<Program>
where
E: Engine + GpuEngine,
{
pub fn program(device: &Device) -> EcResult<Program> {
let framework = select_framework(device.framework())?;
program_use_framework::<E>(device, source, &framework)
program_use_framework(device, &framework)
}

/// Returns the program for the specified [`rust_gpu_tools::Framework`].
pub fn program_use_framework<E>(
device: &Device,
#[allow(unused_variables)] source: &str,
framework: &Framework,
) -> EcResult<Program>
where
E: Engine + GpuEngine,
{
pub fn program_use_framework(device: &Device, framework: &Framework) -> EcResult<Program> {
match framework {
#[cfg(feature = "cuda")]
Framework::Cuda => {
Expand All @@ -78,6 +66,7 @@ where
#[cfg(feature = "opencl")]
Framework::Opencl => {
info!("Using kernel on OpenCL.");
let source = include_str!(env!("OPENCL_KERNEL_SOURCE"));
let opencl_device = device.opencl_device().ok_or(GPUError::DeviceNotFound)?;
let program = opencl::Program::from_opencl(opencl_device, source)?;
Ok(Program::Opencl(program))
Expand Down

0 comments on commit 1e24689

Please sign in to comment.