Skip to content

Commit

Permalink
Fix MSVC build (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
Menooker authored Apr 12, 2024
1 parent 67f803c commit e19caf2
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 82 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ message(STATUS ${pybind11_INCLUDE_DIRS})
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fvisibility=hidden -fvisibility-inlines-hidden -mavx2 -mfma -pthread")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4200 /wd4305")
endif()


Expand Down
2 changes: 1 addition & 1 deletion KunQuant/passes/CodegenCpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def codegen_cpp(f: Function, input_name_to_idx: Dict[str, int], inputs: List[Tup
scope.scope.append(_CppSingleLine(scope, f"auto v{idx} = {thename}({rhs}, v{inp[0]});"))
elif isinstance(op, SetInfOrNanToValue):
thename = op.__class__.__name__
scope.scope.append(_CppSingleLine(scope, f"auto v{idx} = {thename}(v{inp[0]}, {op.attrs['value']});"))
scope.scope.append(_CppSingleLine(scope, f"auto v{idx} = {thename}(v{inp[0]}, {_value_to_float(op, elem_type)});"))
elif isinstance(op, BinaryElementwiseOp):
thename = op.__class__.__name__
scope.scope.append(_CppSingleLine(scope, f"auto v{idx} = {thename}(v{inp[0]}, v{inp[1]});"))
Expand Down
11 changes: 8 additions & 3 deletions cpp/Kun/Base.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#pragma once

#include <immintrin.h>

#include <cstddef>
#ifdef __cplusplus
namespace kun {
struct Context;
using f32x8 = __m256;
static constexpr size_t time_stride = 8;
} // namespace kun
#endif
Expand All @@ -14,10 +12,17 @@ static constexpr size_t time_stride = 8;
#define KUN_EXPORT extern "C" __declspec(dllexport)
#ifdef KUN_CORE_LIB
#define KUN_API __declspec(dllexport)
#define KUN_TEMPLATE_EXPORT KUN_API
#else
#define KUN_API __declspec(dllimport)
#define KUN_TEMPLATE_EXPORT
#endif
#define KUN_TEMPLATE_ARG
#else
#define KUN_API __attribute__((visibility("default")))
#define KUN_EXPORT KUN_API
#define KUN_TEMPLATE_EXPORT KUN_API
// g++ has an strange behavior, it needs T to be
// exported if we want to export func<T>
#define KUN_TEMPLATE_ARG KUN_API
#endif
27 changes: 12 additions & 15 deletions cpp/Kun/LayoutMappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
namespace kun {
namespace ops {

template<typename T, size_t simd_len>
struct KUN_API MapperSTs {
static const T *getInput(Buffer *b, BufferInfo *info,
size_t num_stock) {
template <typename T, size_t simd_len>
struct KUN_TEMPLATE_ARG MapperSTs {
static const T *getInput(Buffer *b, BufferInfo *info, size_t num_stock) {
return b->getPtr<T>();
}
static T *getOutput(Buffer *b, BufferInfo *info, size_t num_stock,
size_t simd_len2) {
size_t simd_len2) {
return b->getPtr<T>();
}
static size_t call(size_t stockid, size_t t, size_t num_time,
Expand All @@ -23,14 +22,13 @@ struct KUN_API MapperSTs {
}
};

template<typename T, size_t simd_len>
struct KUN_API MapperTS {
static const T *getInput(Buffer *b, BufferInfo *info,
size_t num_stock) {
template <typename T, size_t simd_len>
struct KUN_TEMPLATE_ARG MapperTS {
static const T *getInput(Buffer *b, BufferInfo *info, size_t num_stock) {
return b->getPtr<T>();
}
static T *getOutput(Buffer *b, BufferInfo *info, size_t num_stock,
size_t simd_len2) {
size_t simd_len2) {
return b->getPtr<T>();
}
static size_t call(size_t stockid, size_t t, size_t num_time,
Expand All @@ -39,14 +37,13 @@ struct KUN_API MapperTS {
}
};

template<typename T, size_t simd_len>
struct KUN_API MapperSTREAM {
static const T *getInput(Buffer *b, BufferInfo *info,
size_t num_stock) {
template <typename T, size_t simd_len>
struct KUN_TEMPLATE_ARG MapperSTREAM {
static const T *getInput(Buffer *b, BufferInfo *info, size_t num_stock) {
return b->stream_buf->getCurrentBufferPtr(num_stock, info->window);
}
static T *getOutput(Buffer *b, BufferInfo *info, size_t num_stock,
size_t simd_len2) {
size_t simd_len2) {
return b->stream_buf->pushData(num_stock, info->window, simd_len);
}
static size_t call(size_t stockid, size_t t, size_t num_time,
Expand Down
25 changes: 10 additions & 15 deletions cpp/Kun/Rank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,16 @@
namespace kun {
namespace ops {

template void RankStocks<MapperSTs<float, 8>, MapperSTs<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void RankStocks<MapperSTs<float, 8>, MapperTS<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void RankStocks<MapperTS<float, 8>, MapperTS<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void RankStocks<MapperTS<float, 8>, MapperSTs<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void RankStocks<MapperSTREAM<float, 8>, MapperSTREAM<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
#define DEF_INSTANCE(...) \
template KUN_TEMPLATE_EXPORT void RankStocks<__VA_ARGS__>( \
RuntimeStage * stage, size_t time_idx, size_t __total_time, \
size_t __start, size_t __length);

DEF_INSTANCE(MapperSTs<float, 8>, MapperSTs<float, 8>)
DEF_INSTANCE(MapperSTs<float, 8>, MapperTS<float, 8>)
DEF_INSTANCE(MapperTS<float, 8>, MapperTS<float, 8>)
DEF_INSTANCE(MapperTS<float, 8>, MapperSTs<float, 8>)
DEF_INSTANCE(MapperSTREAM<float, 8>, MapperSTREAM<float, 8>)

} // namespace ops
} // namespace kun
15 changes: 8 additions & 7 deletions cpp/Kun/Rank.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
namespace kun {
namespace ops {
template <typename INPUT, typename OUTPUT>
void KUN_API RankStocks(RuntimeStage *stage, size_t time_idx,
size_t __total_time, size_t __start, size_t __length) {
void KUN_TEMPLATE_EXPORT RankStocks(RuntimeStage *stage, size_t time_idx,
size_t __total_time, size_t __start,
size_t __length) {
auto num_stocks = stage->ctx->stock_count;
auto &inbuf = stage->ctx->buffers[stage->stage->in_buffers[0]->id];
auto in_num_time = inbuf.num_time;
Expand All @@ -23,7 +24,7 @@ void KUN_API RankStocks(RuntimeStage *stage, size_t time_idx,
auto outinfo = stage->stage->out_buffers[0];
auto simd_len = stage->ctx->simd_len;
T *output = OUTPUT::getOutput(&stage->ctx->buffers[outinfo->id], outinfo,
num_stocks, simd_len);
num_stocks, simd_len);
auto time_end =
std::min(__start + (time_idx + 1) * time_stride, __start + __length);
std::vector<T> data;
Expand All @@ -32,7 +33,7 @@ void KUN_API RankStocks(RuntimeStage *stage, size_t time_idx,
for (size_t i = 0; i < num_stocks; i++) {
auto S = i / simd_len;
T in = input[INPUT::call(i, t - in_base_time, in_num_time,
num_stocks, simd_len)];
num_stocks, simd_len)];
if (!std::isnan(in)) {
data.push_back(in);
}
Expand All @@ -41,7 +42,7 @@ void KUN_API RankStocks(RuntimeStage *stage, size_t time_idx,
for (size_t i = 0; i < num_stocks; i++) {
auto S = i / simd_len;
T in = input[INPUT::call(i, t - in_base_time, in_num_time,
num_stocks, simd_len)];
num_stocks, simd_len)];
T out;
if (!std::isnan(in)) {
auto pos = std::equal_range(data.begin(), data.end(), in);
Expand All @@ -51,13 +52,13 @@ void KUN_API RankStocks(RuntimeStage *stage, size_t time_idx,
} else {
out = NAN;
}
output[OUTPUT::call(i, t - __start, __length, num_stocks, simd_len)] = out;
output[OUTPUT::call(i, t - __start, __length, num_stocks,
simd_len)] = out;
}
data.clear();
}
}


extern template void RankStocks<MapperSTs<float, 8>, MapperSTs<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
Expand Down
24 changes: 17 additions & 7 deletions cpp/Kun/RunGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,23 @@ KUN_API void runGraph(std::shared_ptr<Executor> exec, const Module *m,
size_t num_stocks, size_t total_time, size_t cur_time,
size_t length);

struct KUN_API StreamContext {
struct Deleter {
struct AlignedPtr {
void* ptr;
#if CHECKED_PTR
size_t size;
Deleter(size_t size) : size{size} {}
size_t size;
#endif
void operator()(char *b);
};
std::vector<std::unique_ptr<char[], Deleter>> buffers;
char* get() const noexcept {
return (char*)ptr;
}
AlignedPtr(void* ptr, size_t size) noexcept;
AlignedPtr(AlignedPtr&& other) noexcept;
AlignedPtr& operator=(AlignedPtr&& other) noexcept;
void release() noexcept;
~AlignedPtr();
};

struct KUN_API StreamContext {
std::vector<AlignedPtr> buffers;
Context ctx;
const Module *m;
StreamContext(std::shared_ptr<Executor> exec, const Module *m,
Expand All @@ -33,6 +41,8 @@ struct KUN_API StreamContext {
// position register.
void pushData(size_t handle, const float *data);
void run();
StreamContext(const StreamContext&) = delete;
StreamContext& operator=(const StreamContext&) = delete;
~StreamContext();
};

Expand Down
49 changes: 40 additions & 9 deletions cpp/Kun/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ static const uint64_t VERSION = 0x64100002;
void Buffer::alloc(size_t count, size_t use_count, size_t elem_size) {
if (!ptr) {
ptr = (float *)kunAlignedAlloc(32, count * elem_size);
refcount = use_count;
refcount = (int)use_count;
#if CHECKED_PTR
size = count * elem_size;
#endif
Expand Down Expand Up @@ -127,7 +127,7 @@ bool RuntimeStage::doJob() {
}

static size_t getSizeofDtype(Datatype dtype) {
if (dtype==Datatype::Double) {
if (dtype == Datatype::Double) {
return sizeof(double);
}
return sizeof(float);
Expand Down Expand Up @@ -218,11 +218,45 @@ void runGraph(std::shared_ptr<Executor> exec, const Module *m,
exec->runUntilDone();
}

void StreamContext::Deleter::operator()(char *b) { kunAlignedFree(b); }
AlignedPtr::AlignedPtr(void *ptr, size_t size) noexcept {
this->ptr = ptr;
#if CHECKED_PTR
this->size = size;
#endif
}
AlignedPtr::AlignedPtr(AlignedPtr &&other) noexcept {
ptr = other.ptr;
other.ptr = nullptr;
#if CHECKED_PTR
size = other.size;
#endif
}

void AlignedPtr::release() noexcept {
if (ptr) {
kunAlignedFree(ptr);
ptr = nullptr;
}
}

AlignedPtr &AlignedPtr::operator=(AlignedPtr &&other) noexcept {
if (&other == this) {
return *this;
}
release();
ptr = other.ptr;
other.ptr = nullptr;
#if CHECKED_PTR
size = other.size;
#endif
return *this;
}

AlignedPtr::~AlignedPtr() { release(); }

template <typename T>
char *StreamBuffer<T>::make(size_t stock_count, size_t window_size,
size_t simd_len) {
size_t simd_len) {
auto ret = kunAlignedAlloc(
32, StreamBuffer::getBufferSize(stock_count, window_size, simd_len));
auto buf = (StreamBuffer *)ret;
Expand Down Expand Up @@ -260,11 +294,8 @@ StreamContext::StreamContext(std::shared_ptr<Executor> exec, const Module *m,
auto &buf = m->buffers[i];
buffers.emplace_back(
StreamBuffer<float>::make(num_stocks, buf.window, m->blocking_len),
StreamContext::Deleter {
#if CHECKED_PTR
StreamBuffer<float>::getBufferSize(num_stocks, buf.window)
#endif
});
StreamBuffer<float>::getBufferSize(num_stocks, buf.window,
m->blocking_len));
rtlbuffers.emplace_back((float *)buffers.back().get(), 1);
}
ctx.buffers = std::move(rtlbuffers);
Expand Down
25 changes: 10 additions & 15 deletions cpp/Kun/Scale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,16 @@

namespace kun {
namespace ops {
template void ScaleStocks<MapperSTs<float, 8>, MapperSTs<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void ScaleStocks<MapperSTs<float, 8>, MapperTS<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void ScaleStocks<MapperTS<float, 8>, MapperTS<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void ScaleStocks<MapperTS<float, 8>, MapperSTs<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
template void ScaleStocks<MapperSTREAM<float, 8>, MapperSTREAM<float, 8>>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
#define DEF_INSTANCE(...) \
template KUN_TEMPLATE_EXPORT void ScaleStocks<__VA_ARGS__>( \
RuntimeStage * stage, size_t time_idx, size_t __total_time, \
size_t __start, size_t __length);

DEF_INSTANCE(MapperSTs<float, 8>, MapperSTs<float, 8>)
DEF_INSTANCE(MapperSTs<float, 8>, MapperTS<float, 8>)
DEF_INSTANCE(MapperTS<float, 8>, MapperTS<float, 8>)
DEF_INSTANCE(MapperTS<float, 8>, MapperSTs<float, 8>)
DEF_INSTANCE(MapperSTREAM<float, 8>, MapperSTREAM<float, 8>)

} // namespace ops
} // namespace kun
Loading

0 comments on commit e19caf2

Please sign in to comment.