Skip to content

Commit

Permalink
[Pten]Support parse kernel key by multi-inputs (PaddlePaddle#37517)
Browse files Browse the repository at this point in the history
* Support parse kernel key by multi-inputs

* optimize code according to reviewer
  • Loading branch information
YuanRisheng authored Nov 26, 2021
1 parent 097e098 commit a0b895c
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 4 deletions.
85 changes: 85 additions & 0 deletions paddle/pten/api/lib/data_type_set.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <ostream>

#include "paddle/pten/api/ext/exception.h"
#include "paddle/pten/common/data_type.h"
namespace paddle {
namespace experimental {

/* This class is used to store DataType in a bit set*/
class DataTypeSet final {
public:
constexpr DataTypeSet() : bitset_(0) {}
explicit constexpr DataTypeSet(DataType dtype)
: bitset_(dtype == DataType::UNDEFINED
? 0
: 1ULL << (static_cast<uint8_t>(dtype) - 1)) {}

uint64_t bitset() const { return bitset_; }

bool inline Has(DataType dtype) const {
PD_CHECK(dtype != DataType::UNDEFINED,
"Data type argument can't be UNDEFINED.");
return static_cast<bool>(bitset_ & DataTypeSet(dtype).bitset());
}
bool IsEmpty() const { return bitset_ == 0; }

DataTypeSet operator|(const DataTypeSet& other) const {
return DataTypeSet(bitset_ | other.bitset());
}
DataTypeSet operator&(const DataTypeSet& other) const {
return DataTypeSet(bitset_ & other.bitset());
}
DataTypeSet operator-(const DataTypeSet& other) const {
return DataTypeSet(bitset_ & ~other.bitset());
}
DataTypeSet operator^(const DataTypeSet& other) const {
return DataTypeSet(bitset_ ^ other.bitset());
}

bool operator==(const DataTypeSet& other) const {
return bitset_ == other.bitset();
}

private:
constexpr DataTypeSet(uint64_t bitset) : bitset_(bitset) {}
uint64_t bitset_;
};

// Now only supports promotion of complex type
inline DataType PromoteTypes(const DataTypeSet& dtype_set) {
constexpr auto f8 = 1ULL << (static_cast<uint8_t>(DataType::FLOAT64) - 1);
constexpr auto c4 = 1ULL << (static_cast<uint8_t>(DataType::COMPLEX64) - 1);
constexpr auto c8 = 1ULL << (static_cast<uint8_t>(DataType::COMPLEX128) - 1);
DataType promote_type = DataType::UNDEFINED;

// kernel dtype need promote when meet input dtype with more precision
if ((dtype_set.bitset() & c8) == c8) {
promote_type = DataType::COMPLEX128;
} else if ((dtype_set.bitset() & c4) == c4) {
if ((dtype_set.bitset() & f8) == f8) {
promote_type = DataType::COMPLEX128;
} else {
promote_type = DataType::COMPLEX64;
}
}
return promote_type;
}

} // namespace experimental
} // namespace paddle
9 changes: 9 additions & 0 deletions paddle/pten/api/lib/kernel_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License. */

#include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/api/lib/backend_set.h"
#include "paddle/pten/api/lib/data_type_set.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/common/layout.h"

Expand Down Expand Up @@ -88,6 +89,9 @@ struct ArgsIterator {

struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
KernelKeySet key_set;
// this dtype_set is used for cache multi-inputs dtype and used for
// data_promote
DataTypeSet dtype_set{DataType::UNDEFINED};

// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): add global device guard method to set backend
Expand All @@ -96,6 +100,11 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
// TODO(chenweihang): selecte multi layout and dtype
key_set.layout = x.layout();
key_set.dtype = x.type();
dtype_set = dtype_set | DataTypeSet(x.dtype());
auto promote_result = PromoteTypes(dtype_set);
if (promote_result != DataType::UNDEFINED) {
key_set.dtype = promote_result;
}
}

void operator()(const std::vector<Tensor>& x) {
Expand Down
8 changes: 4 additions & 4 deletions paddle/pten/api/lib/math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ PD_DLL_DECL Tensor mean(const Tensor& x) {

PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_add", kernel_key);
Expand Down Expand Up @@ -105,7 +105,7 @@ PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y) {

PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_sub", kernel_key);
Expand Down Expand Up @@ -140,7 +140,7 @@ PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y) {

PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_div", kernel_key);
Expand Down Expand Up @@ -175,7 +175,7 @@ PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y) {

PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key_set = ParseKernelKeyByInputArgs(x, y);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_mul", kernel_key);
Expand Down

0 comments on commit a0b895c

Please sign in to comment.