Skip to content

Commit

Permalink
No commit message
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaying committed Jun 27, 2022
1 parent 043c586 commit 2d13d6a
Show file tree
Hide file tree
Showing 23 changed files with 13,557 additions and 13,309 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ Group #1 (Full): 23329087

Group #2 (Full): 23350225

Group #3: https://h5.dingtalk.com/circle/healthCheckin.html?dtaction=os&corpId=ding2c1d5c85a81030b9a483726330e8af54&574b2bb2-c53a-4=497bad6b-25a5-4&cbdbhh=qwertyuiop
Group #3: https://h5.dingtalk.com/circle/healthCheckin.html?dtaction=os&corpId=ding8989a1d6ae6ef130b177420cc0e366ea&f0c81=1b93a&cbdbhh=qwertyuiop

## License
Apache 2.0
Expand Down
8 changes: 8 additions & 0 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ MNN 的输入(AI推理模型)是一个有向无环图(DAG),图中每
- MNN-CV :类似 OpenCV ,但核心计算功能基于 MNN 实现的图像处理算法库
- MNN-Train :MNN 训练模块,支持各平台训练

## 社区交流与反馈
钉钉群组:

- 钉钉群1:23329087
- 钉钉群2:23350225
- 钉钉群3:https://h5.dingtalk.com/circle/healthCheckin.html?dtaction=os&corpId=ding8989a1d6ae6ef130b177420cc0e366ea&f0c81=1b93a&cbdbhh=qwertyuiop



## License
Apache 2.0
Expand Down
19 changes: 11 additions & 8 deletions express/module/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,9 @@ static Module* loadInternal(const std::vector<std::string>& inputs, const std::v
#endif // MNN_INTERNAL_ENABLED

std::shared_ptr<Module::Info> info(new Module::Info);
if (net->extraInfo() && net->extraInfo()->version()) {
info->version = net->extraInfo()->version()->str();
}
auto rtMgr = _rtMgr;
Module::Config defaultConfig;
if (nullptr == config) {
Expand All @@ -308,14 +311,14 @@ static Module* loadInternal(const std::vector<std::string>& inputs, const std::v
sche_config.backendConfig = config->backend->config;
rtMgr.reset(Executor::RuntimeManager::createRuntimeManager(sche_config));
}
info->inputNames = inputs;
info->outputNames = outputs;
if ((!inputs.empty()) && (!outputs.empty())) {
_loadInputs(info.get(), inputs, net);
info->runTimeManager = rtMgr;
std::shared_ptr<Module> m(PipelineModule::load(inputs, outputs, buffer, length, rtMgr, config));
return new NetModule(m, info);
}
std::vector<std::string> newInputs = inputs;
std::vector<std::string> newOutputs = outputs;
std::set<int> inputIdx, outputIdx, realInput, realOutput;
for (int i=0; i< net->oplists()->size(); ++i) {
auto op = net->oplists()->GetAs<Op>(i);
Expand All @@ -338,18 +341,18 @@ static Module* loadInternal(const std::vector<std::string>& inputs, const std::v
}
}
std::set_difference(outputIdx.begin(), outputIdx.end(), inputIdx.begin(), inputIdx.end(), std::inserter(realOutput, realOutput.begin()));
if (newInputs.empty()) {
if (info->inputNames.empty()) {
for (auto index : realInput) {
newInputs.emplace_back(net->tensorName()->GetAsString(index)->str());
info->inputNames.emplace_back(net->tensorName()->GetAsString(index)->str());
}
}
if (newOutputs.empty()) {
if (info->outputNames.empty()) {
for (auto index : realOutput) {
newOutputs.emplace_back(net->tensorName()->GetAsString(index)->str());
info->outputNames.emplace_back(net->tensorName()->GetAsString(index)->str());
}
}
std::shared_ptr<Module> m(PipelineModule::load(newInputs, newOutputs, buffer, length, rtMgr, config));
_loadInputs(info.get(), newInputs, net);
std::shared_ptr<Module> m(PipelineModule::load(info->inputNames, info->outputNames, buffer, length, rtMgr, config));
_loadInputs(info.get(), info->inputNames, net);
info->runTimeManager = rtMgr;
return new NetModule(m, info);
}
Expand Down
9 changes: 8 additions & 1 deletion include/MNN/expr/Module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,17 @@ class MNN_PUBLIC Module {

struct Info {
// Input info load from model
// If the ith input has no info, it will be nullptr
std::vector<Variable::Info> inputs;
// The Module's defaultFormat, NCHW or NHWC
Dimensionformat defaultFormat;
// Runtime Info
std::shared_ptr<MNN::Express::Executor::RuntimeManager> runTimeManager;
// Input Names By Order
std::vector<std::string> inputNames;
// Output Names By Order
std::vector<std::string> outputNames;
// The MNNConvert's Version build the module
std::string version;
};
const Info* getInfo() const;
class CloneContext {
Expand Down
1 change: 0 additions & 1 deletion project/android/build_32.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ cmake ../../../ \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=false \
-DMNN_USE_SSE=OFF \
-DMNN_SUPPORT_BF16=OFF \
-DMNN_BUILD_TEST=ON \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. -DNATIVE_INCLUDE_OUTPUT=. $1 $2 $3
Expand Down
105 changes: 105 additions & 0 deletions source/backend/cpu/arm/arm32/bf16/MNNReluWithSlopeChannelBF16.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
//
// MNNReluWithSlopeChannelBF16.S
// MNN
//
// Created by MNN on 2022/06/23.
// Copyright © 2018, Alibaba Group Holding Limited
//

#ifdef __arm__
#ifndef __aarch64__
#include "MNNAsmGlobal.h"

.text
.align 5

asm_function MNNReluWithSlopeChannelBF16
//void MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad)

//Auto Load:
//r0:dst, r1:src, r2:slope, r3:sizeQuad

//Load from sp
//r4:depthQuad

push {r4,r5, lr}
ldr r4, [sp, #12]

cmp r4, #0
beq PReluEnd
cmp r3, #0
beq PReluEnd


PReluZLoop:
vld1.32 {d30}, [r2]!
vshll.s16 q15, d30, #16
mov r5, r3
cmp r5, #3
ble PReluL1

PReluL4Loop:
vld1.32 {q1}, [r1]!
vshll.s16 q0, d2, #16
vshll.s16 q1, d3, #16

vcle.f32 q12, q0, #0
vcle.f32 q13, q1, #0

vld1.32 {q3}, [r1]!
vshll.s16 q2, d6, #16
vshll.s16 q3, d7, #16

vmul.f32 q8, q0, q15
vmul.f32 q9, q1, q15
vbit.32 q0, q8, q12
vbit.32 q1, q9, q13

vmul.f32 q8, q2, q15
vmul.f32 q9, q3, q15

vshrn.i32 d0, q0, #16
vshrn.i32 d1, q1, #16

vst1.32 {q0}, [r0]!

vcle.f32 q12, q2, #0
vcle.f32 q13, q3, #0
vbit.32 q2, q8, q12
vbit.32 q3, q9, q13
vshrn.i32 d4, q2, #16
vshrn.i32 d5, q3, #16

vst1.32 {q2}, [r0]!
sub r5, r5, #4
cmp r5, #4
bge PReluL4Loop

PReluL1:
cmp r5, #0

beq PReluL1End

PReluL1Loop:
vld1.32 {d0}, [r1]!
vshll.s16 q0, d0, #16
vcle.f32 q2, q0, #0
vmul.f32 q1, q0, q15
vbit.32 q0, q1, q2
vshrn.i32 d0, q0, #16
vst1.32 {q0}, [r0]!
subs r5, r5, #1
bne PReluL1Loop

PReluL1End:

subs r4, r4, #1
bne PReluZLoop


PReluEnd:

pop {r4, r5, pc}

#endif
#endif
97 changes: 97 additions & 0 deletions source/backend/cpu/arm/arm64/bf16/MNNReluWithSlopeChannelBF16.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//
// MNNReluWithSlopeChannelBF16.S
// MNN
//
// Created by MNN on 2022/06/23.
// Copyright © 2018, Alibaba Group Holding Limited
//

#ifdef __aarch64__
#include "MNNAsmGlobal.h"

.text
.align 5

asm_function MNNReluWithSlopeChannelBF16
//void MNNReluWithSlopeChannel(float* dst, const float* src, const float* slope, size_t sizeQuad, size_t depthQuad)

//Auto Load:
//x0:dst, x1:src, x2:slope, x3:sizeQuad, x4:depthQuad


cmp x4, #0
beq PReluEnd
cmp x3, #0
beq PReluEnd


PReluZLoop:
ld1 {v23.4h}, [x2], #8
shll v23.4s, v23.4h, #16
mov x5, x3
cmp x5, #3
ble PReluL1

PReluL4Loop:
ld1 {v0.4h, v1.4h}, [x1], #16
shll v0.4s, v0.4h, #16
shll v1.4s, v1.4h, #16

fcmle v20.4s, v0.4s, #0
fcmle v21.4s, v1.4s, #0

ld1 {v2.4h, v3.4h}, [x1], #16
shll v2.4s, v2.4h, #16
shll v3.4s, v3.4h, #16

fmul v16.4s, v0.4s, v23.4s
fmul v17.4s, v1.4s, v23.4s
bit v0.16b, v16.16b, v20.16b
bit v1.16b, v17.16b, v21.16b

fmul v16.4s, v2.4s, v23.4s
fmul v17.4s, v3.4s, v23.4s
shrn v0.4h, v0.4s, #16
shrn v1.4h, v1.4s, #16


st1 {v0.4h, v1.4h}, [x0], #16

fcmle v20.4s, v2.4s, #0
fcmle v21.4s, v3.4s, #0
bit v2.16b, v16.16b, v20.16b
bit v3.16b, v17.16b, v21.16b
shrn v2.4h, v2.4s, #16
shrn v3.4h, v3.4s, #16

st1 {v2.4h, v3.4h}, [x0], #16
sub x5, x5, #4
cmp x5, #4
bge PReluL4Loop

PReluL1:
cmp x5, #0

beq PReluL1End

PReluL1Loop:
ld1 {v0.4h}, [x1], #8
shll v0.4s, v0.4h, #16
fcmle v2.4s, v0.4s, #0
fmul v1.4s, v0.4s, v23.4s
bit v0.16b, v1.16b, v2.16b
shrn v0.4h, v0.4s, #16
st1 {v0.4h}, [x0], #8
subs x5, x5, #1
bne PReluL1Loop

PReluL1End:

subs x4, x4, #1
bne PReluZLoop


PReluEnd:

ret
#endif
6 changes: 5 additions & 1 deletion source/backend/cpu/bf16/BF16Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include "BF16Unary.hpp"
using BFVec4 = MNN::Math::VecHalf<4>;
using Vec4 = MNN::Math::Vec<float, 4>;
extern "C" {
void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* slopeO, size_t sizeQuad, size_t depthQuad);
}
namespace MNN {
// just for reference BF16 converting of c++ code, not for arm or sse.
inline int16_t MNNFP32ToBF16(float fp32Value) {
Expand Down Expand Up @@ -139,6 +142,7 @@ void MNNAxByClampBroadcastUnitBF16(float* CF, const float* AF, const float* BF,
}
}
}
#ifndef MNN_USE_NEON
void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* slopeO, size_t sizeQuad, size_t depthQuad) {
auto slope = (const int16_t*)slopeO;
auto dst = (int16_t*)dstO;
Expand All @@ -163,7 +167,7 @@ void MNNReluWithSlopeChannelBF16(float* dstO, const float* srcO, const float* sl
}
}
}

#endif

#if !defined(MNN_USE_SSE) && !defined(MNN_USE_NEON)
void MNNPackC4ForMatMul_A_BF16(float* destOrigin, float const** sourceGroup, const int32_t* info, const int32_t* el) {
Expand Down
Loading

0 comments on commit 2d13d6a

Please sign in to comment.