Skip to content

Commit

Permalink
[Diffusion] Add C++ dpm solver (PaddlePaddle#714)
Browse files Browse the repository at this point in the history
* Add BetaForAlphaBar, ConvertModelOutput, SetTimesteps, and constructor for DPMSolverMultistepScheduler

* tmp

* Add DPMSolverFirstOrderUpdate

* Add ScaleModelInput

* Add MultiStepDPMSolverSecondOrderUpdate

* add MultiStepDPMSolverThirdOrderUpdate

* Add Step

* Add FASTDEPLOY_DECL

* Add AddNoise

* Fix operator

* update

* Fix DPMSolverMultistepScheduler

* Upgrade Slice

* Fix DPMSolverFirstOrderUpdate

* remove FASTDEPLOY_DECL

* Add config for dpm solver
  • Loading branch information
joey12300 authored Nov 30, 2022
1 parent 3f8ed9b commit d95094c
Show file tree
Hide file tree
Showing 14 changed files with 675 additions and 11 deletions.
27 changes: 27 additions & 0 deletions examples/multimodal/stable_diffusion/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

PROJECT(main C CXX)
CMAKE_MINIMUM_REQUIRED (VERSION 3.10)

option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
set(THIRD_LIBS "")
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)

include_directories(${FASTDEPLOY_INCS})

file(GLOB_RECURSE ALL_SRCS ${PROJECT_SOURCE_DIR}/*.cc)

add_executable(main ${ALL_SRCS})
target_link_libraries(main ${FASTDEPLOY_LIBS} ${THIRD_LIBS})

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "./scheduler.h"
#include "fastdeploy/core/fd_tensor.h"

namespace fastdeploy {

class DPMSolverMultistepScheduler : public Scheduler {
public:
DPMSolverMultistepScheduler(int num_train_timesteps = 1000,
float beta_start = 0.0001, float beta_end = 0.02,
const std::string& beta_schedule = "linear",
const std::vector<float>& trained_betas = {},
int solver_order = 2, bool predict_epsilon = true,
bool thresholding = false,
float dynamic_thresholding_ratio = 0.995,
float sample_max_value = 1.0,
const std::string& algorithm_type = "dpmsolver++",
const std::string& solver_type = "midpoint",
bool lower_order_final = true);
void BetaForAlphaBar(FDTensor* out, int num_diffusion_timesteps,
float max_beta = 0.999);
void ConvertModelOutput(const FDTensor& model_output, int timestep,
const FDTensor& sample, FDTensor* out);
void DPMSolverFirstOrderUpdate(const FDTensor& model_output, int timestep,
int prev_timestep, const FDTensor& sample,
FDTensor* out);
void MultiStepDPMSolverSecondOrderUpdate(
const std::vector<FDTensor>& model_output_list,
const std::vector<int>& timestep_list, int prev_timestep,
const FDTensor& sample, FDTensor* out);
void MultiStepDPMSolverThirdOrderUpdate(
const std::vector<FDTensor>& model_output_list,
const std::vector<int>& timestep_list, int prev_timestep,
const FDTensor& sample, FDTensor* out);
void SetTimesteps(int num_inference_steps) override;
void Step(const FDTensor& model_output, int timestep, const FDTensor& sample,
FDTensor* prev_sample) override;
void ScaleModelInput(const FDTensor& sample, FDTensor* out,
const std::vector<FDTensor>& timesteps = {}) override;
void AddNoise(const FDTensor& original_samples, const FDTensor& noise,
const FDTensor& timesteps, FDTensor* out) override;
struct Config {
int num_train_timesteps_;
float beta_start_;
float beta_end_;
std::string beta_schedule_;
int solver_order_;
bool predict_epsilon_;
bool thresholding_;
float dynamic_thresholding_ratio_;
float sample_max_value_;
std::string algorithm_type_;
std::string solver_type_;
bool lower_order_final_;
} config;

private:
FDTensor betas_;
FDTensor alphas_;
FDTensor alphas_cumprod_;
FDTensor alpha_t_;
FDTensor sigma_t_;
FDTensor lambda_t_;
int num_inference_steps_;
FDTensor timesteps_;
int lower_order_nums_;
std::vector<FDTensor> model_outputs_;
};

} // namespace fastdeploy
35 changes: 35 additions & 0 deletions examples/multimodal/stable_diffusion/cpp/main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "dpm_solver_multistep_scheduler.h"
#include <iostream>

int main() {
fastdeploy::DPMSolverMultistepScheduler dpm(
/* num_train_timesteps */ 1000,
/* beta_start = */ 0.00085,
/* beta_end = */ 0.012,
/* beta_schedule = */ "scaled_linear",
/* trained_betas = */ {},
/* solver_order = */ 2,
/* predict_epsilon = */ true,
/* thresholding = */ false,
/* dynamic_thresholding_ratio = */ 0.995,
/* sample_max_value = */ 1.0,
/* algorithm_type = */ "dpmsolver++",
/* solver_type = */ "midpoint",
/* lower_order_final = */ true);

return 0;
}
31 changes: 31 additions & 0 deletions examples/multimodal/stable_diffusion/cpp/scheduler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "fastdeploy/core/fd_tensor.h"

namespace fastdeploy {

class Scheduler {
virtual void SetTimesteps(int num_inference_steps) = 0;
virtual void Step(const FDTensor& model_output, int timestep,
const FDTensor& sample, FDTensor* prev_sample) = 0;
virtual void ScaleModelInput(const FDTensor& sample, FDTensor* out,
const std::vector<FDTensor>& timesteps = {}) = 0;
virtual void AddNoise(const FDTensor& original_samples, const FDTensor& noise,
const FDTensor& timesteps, FDTensor* out) = 0;
};

} // namespace fastdeploy
4 changes: 1 addition & 3 deletions fastdeploy/core/fd_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/fd_scalar.h"
#include "fastdeploy/core/float16.h"
#include "fastdeploy/utils/utils.h"

Expand Down Expand Up @@ -81,8 +80,7 @@ const void* FDTensor::CpuData() const {

void FDTensor::SetExternalData(const std::vector<int64_t>& new_shape,
const FDDataType& data_type, void* data_buffer,
const Device& new_device,
int new_device_id) {
const Device& new_device, int new_device_id) {
dtype = data_type;
shape.assign(new_shape.begin(), new_shape.end());
external_data_ptr = data_buffer;
Expand Down
3 changes: 1 addition & 2 deletions fastdeploy/core/fd_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@
#include <vector>

#include "fastdeploy/core/allocate.h"
#include "fastdeploy/core/fd_scalar.h"
#include "fastdeploy/core/fd_type.h"

namespace fastdeploy {

struct Scalar;

struct FASTDEPLOY_DECL FDTensor {
// std::vector<int8_t> data;
void* buffer_ = nullptr;
Expand Down
7 changes: 4 additions & 3 deletions fastdeploy/function/clip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ void ClipKernel(const FDTensor& x, double min, double max, FDTensor* out) {
"max should be greater than or equal to min. But received min = %f, "
"max = %f",
static_cast<float>(min_), static_cast<float>(max_));

out->Allocate(x.Shape(), x.Dtype());
FDTensor tmp;
tmp.Allocate(x.Shape(), x.Dtype());
const T* x_data = reinterpret_cast<const T*>(x.Data());

int64_t numel = x.Numel();
T* out_data = reinterpret_cast<T*>(out->Data());
T* out_data = reinterpret_cast<T*>(tmp.Data());

std::transform(x_data, x_data + numel, out_data, ClipFunctor<T>(min_, max_));
*out = std::move(tmp);
}

void Clip(const FDTensor& x, double min, double max, FDTensor* out) {
Expand Down
21 changes: 21 additions & 0 deletions fastdeploy/function/elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,25 @@ FDTensor operator/(const FDTensor& x, const FDTensor& y) {
return out;
}

#define INSTANTIATE_OPERATOR(operation_type) \
template FDTensor operator operation_type(const FDTensor& x, bool y); \
template FDTensor operator operation_type(const FDTensor& x, uint8_t y); \
template FDTensor operator operation_type(const FDTensor& x, int16_t y); \
template FDTensor operator operation_type(const FDTensor& x, int y); \
template FDTensor operator operation_type(const FDTensor& x, int64_t y); \
template FDTensor operator operation_type(const FDTensor& x, float y); \
template FDTensor operator operation_type(const FDTensor& x, double y); \
template FDTensor operator operation_type(bool x, const FDTensor& y); \
template FDTensor operator operation_type(uint8_t x, const FDTensor& y); \
template FDTensor operator operation_type(int16_t x, const FDTensor& y); \
template FDTensor operator operation_type(int x, const FDTensor& y); \
template FDTensor operator operation_type(int64_t x, const FDTensor& y); \
template FDTensor operator operation_type(float x, const FDTensor& y); \
template FDTensor operator operation_type(double x, const FDTensor& y)

INSTANTIATE_OPERATOR(+);
INSTANTIATE_OPERATOR(-);
INSTANTIATE_OPERATOR(*);
INSTANTIATE_OPERATOR(/);

} // namespace fastdeploy
34 changes: 34 additions & 0 deletions fastdeploy/function/elementwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

#pragma once

#include "fastdeploy/core/fd_scalar.h"
#include "fastdeploy/core/fd_tensor.h"

namespace fastdeploy {

namespace function {

/** Excute the add operation for input FDTensors. *out = x + y.
Expand Down Expand Up @@ -62,10 +64,42 @@ FASTDEPLOY_DECL void Maximum(const FDTensor& x, const FDTensor& y,

FASTDEPLOY_DECL FDTensor operator+(const FDTensor& x, const FDTensor& y);

template <typename T> FDTensor operator+(const FDTensor& x, T y) {
return x + FDTensor(Scalar(y));
}

template <typename T> FDTensor operator+(T x, const FDTensor& y) {
return FDTensor(Scalar(x)) + y;
}

FASTDEPLOY_DECL FDTensor operator-(const FDTensor& x, const FDTensor& y);

template <typename T> FDTensor operator-(const FDTensor& x, T y) {
return x - FDTensor(Scalar(y));
}

template <typename T> FDTensor operator-(T x, const FDTensor& y) {
return FDTensor(Scalar(x)) - y;
}

FASTDEPLOY_DECL FDTensor operator*(const FDTensor& x, const FDTensor& y);

template <typename T> FDTensor operator*(const FDTensor& x, T y) {
return x * FDTensor(Scalar(y));
}

template <typename T> FDTensor operator*(T x, const FDTensor& y) {
return FDTensor(Scalar(x)) * y;
}

FASTDEPLOY_DECL FDTensor operator/(const FDTensor& x, const FDTensor& y);

template <typename T> FDTensor operator/(const FDTensor& x, T y) {
return x / FDTensor(Scalar(y));
}

template <typename T> FDTensor operator/(T x, const FDTensor& y) {
return FDTensor(Scalar(x)) / y;
}

} // namespace fastdeploy
8 changes: 5 additions & 3 deletions fastdeploy/function/elementwise_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,12 @@ void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y,
GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
y_dims_array.data(), out_dims_array.data(), max_dim,
axis);
z->Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
FDTensor tmp;
tmp.Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
CommonForwardBroadcastCPU<Functor, T, OutType>(
x, y, z, x_dims_array.data(), y_dims_array.data(), out_dims_array.data(),
max_dim, func, is_xsize_larger);
x, y, &tmp, x_dims_array.data(), y_dims_array.data(),
out_dims_array.data(), max_dim, func, is_xsize_larger);
*z = std::move(tmp);
}

template <typename Functor, typename T, typename OutType = T>
Expand Down
15 changes: 15 additions & 0 deletions fastdeploy/function/slice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,5 +163,20 @@ void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
}));
}

void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
const std::vector<int64_t>& index, FDTensor* out) {
std::vector<int64_t> ends = index;
for (int i = 0; i < ends.size(); ++i) {
ends[i] += 1;
}
Slice(x, axes, index, ends, out);
for (int i = 0; i < axes.size(); ++i) {
if (out->Shape().size() <= 1) {
break;
}
out->Squeeze(axes[i]);
}
}

} // namespace function
} // namespace fastdeploy
3 changes: 3 additions & 0 deletions fastdeploy/function/slice.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,8 @@ FASTDEPLOY_DECL void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
const std::vector<int64_t>& starts,
const std::vector<int64_t>& ends, FDTensor* out);

FASTDEPLOY_DECL void Slice(const FDTensor& x, const std::vector<int64_t>& axes,
const std::vector<int64_t>& index, FDTensor* out);

} // namespace function
} // namespace fastdeploy
18 changes: 18 additions & 0 deletions tests/function/test_elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,15 @@ TEST(fastdeploy, check_same_dim) {
check_shape(z.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(z.Data()), maximum_result.data(),
maximum_result.size());

x = 1.0f - x;
sub_result = {0.157138, 0.353809, 0.862595, 0.885693, 0.340074, 0.464184,
0.257084, 0.154395, 0.787718, 0.700299, 0.137829, 0.591059,
0.873153, 0.843381, 0.571159, 0.152347, 0.754137, 0.330954,
0.121117, 0.323741, 0.333547, 0.67477, 0.586061, 0.165859};
check_shape(x.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(x.Data()), sub_result.data(),
sub_result.size());
}

TEST(fastdeploy, check_broadcast_dim1) {
Expand Down Expand Up @@ -498,6 +507,15 @@ TEST(fastdeploy, mixed_operation) {
check_shape(output.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(output.Data()), result.data(),
result.size());

result = {2.854443, 1.87709, 1.585621, 1.012709, 0.332781, 0.998346,
0.228024, 2.140475, 0.246941, 0.301517, 1.575438, 0.595582,
-0.410393, -0.163718, -0.405571, 0.58563, -0.177035, 0.263035,
0.075725, 0.591098, 0.156365, -0.106078, -0.475957, 0.626429};
output = a + b * c / d - e;
check_shape(output.shape, {2, 3, 4});
check_data(reinterpret_cast<const float*>(output.Data()), result.data(),
result.size());
}

} // namespace function
Expand Down

0 comments on commit d95094c

Please sign in to comment.