Skip to content

Commit

Permalink
Merge branch 'ztxz16:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
wildkid1024 authored Jun 14, 2023
2 parents e356a23 + 4d99a41 commit 20c36bd
Show file tree
Hide file tree
Showing 21 changed files with 3,437 additions and 1,732 deletions.
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ endif()
include_directories(include)

message(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/chatglm.cpp src/moss.cpp src/vicuna.cpp)
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/devices/cpu/cpudevice.cpp src/executor.cpp src/chatglm.cpp src/moss.cpp src/vicuna.cpp)

if (USE_CUDA)
enable_language(CUDA)
add_compile_definitions(USE_CUDA)
set(FASTLLM_CUDA_SOURCES src/fastllm-cuda.cu)
set(FASTLLM_CUDA_SOURCES src/fastllm-cuda.cu src/devices/cuda/cudadevice.cpp)
set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} cublas)
#set(CMAKE_CUDA_ARCHITECTURES "70")
endif()

add_library(fastllm OBJECT
Expand All @@ -41,4 +42,7 @@ add_executable(quant tools/quant.cpp)
target_link_libraries(quant fastllm)

add_executable(webui example/webui/webui.cpp)
target_link_libraries(webui fastllm)
target_link_libraries(webui fastllm)

add_executable(benchmark example/benchmark/benchmark.cpp)
target_link_libraries(benchmark fastllm)
171 changes: 171 additions & 0 deletions example/benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
//
// Created by huangyuyang on 6/9/23.
//

#include "factoryllm.h"
#include "utils.h"
#include "fstream"

static factoryllm fllm;
static int modeltype = 0;
static char* modelpath = NULL;
static fastllm::basellm* chatGlm = fllm.createllm(LLM_TYPE_CHATGLM);
static fastllm::basellm* moss = fllm.createllm(LLM_TYPE_MOSS);
static fastllm::basellm* vicuna = fllm.createllm(LLM_TYPE_VICUNA);
static int sRound = 0;
static std::string history;

std::map <std::string, int> modelDict = {
{"chatglm", 0}, {"moss", 1}, {"vicuna", 2}
};

struct BenchmarkConfig {
int model = LLM_TYPE_CHATGLM; // 模型类型, 0 chatglm,1 moss,2 vicuna
std::string path = "chatglm-6b-int4.bin"; // 模型文件路径
int threads = 4; // 使用的线程数
int limit = -1; // 输出token数限制,如果 < 0 则代表无限制
int batch = -1; // batch数, -1时使用文件中的行数作为batch
std::string file; // 输入文件
std::string output; // 输出文件,如果不设定则输出到屏幕
};

void Usage() {
std::cout << "Usage:" << std::endl;
std::cout << "[-h|--help]: 显示帮助" << std::endl;
std::cout << "<-m|--model> <args>: 模型类型,默认为0, 可以设置为0(chatglm),1(moss),2(vicuna)" << std::endl;
std::cout << "<-p|--path> <args>: 模型文件的路径" << std::endl;
std::cout << "<-t|--threads> <args>: 使用的线程数量" << std::endl;
std::cout << "<-l|--limit> <args>: 输出token数限制" << std::endl;
std::cout << "<-b|--batch> <args>: batch数" << std::endl;
std::cout << "<-f|--file> <args>: 输入文件,文件中每行一个prompt,如果行数不足batch则用之前的prompt补充" << std::endl;
}

void ParseArgs(int argc, char **argv, BenchmarkConfig &config) {
std::vector <std::string> sargv;
for (int i = 0; i < argc; i++) {
sargv.push_back(std::string(argv[i]));
}
for (int i = 1; i < argc; i++) {
if (sargv[i] == "-h" || sargv[i] == "--help") {
Usage();
exit(0);
}
else if (sargv[i] == "-m" || sargv[i] == "--model") {
if (modelDict.find(sargv[i + 1]) != modelDict.end()) {
config.model = modelDict[sargv[++i]];
} else {
config.model = atoi(sargv[++i].c_str());
}
}
else if (sargv[i] == "-p" || sargv[i] == "--path") {
config.path = sargv[++i];
}
else if (sargv[i] == "-t" || sargv[i] == "--threads") {
config.threads = atoi(sargv[++i].c_str());
} else if (sargv[i] == "-l" || sargv[i] == "--limit") {
config.limit = atoi(sargv[++i].c_str());
} else if (sargv[i] == "-b" || sargv[i] == "--batch") {
config.batch = atoi(sargv[++i].c_str());
} else if (sargv[i] == "-f" || sargv[i] == "--file") {
config.file = sargv[++i];
} else if (sargv[i] == "-o" || sargv[i] == "--output") {
config.output = sargv[++i];
} else {
Usage();
exit(-1);
}
}
}

int initLLMConf(int model, const char* modelPath, int threads) {
fastllm::SetThreads(threads);
modeltype = model;
//printf("@@init llm:type:%d,path:%s\n", model, modelPath);
if (modeltype == 0) {
chatGlm->LoadFromFile(modelPath);
chatGlm->WarmUp();
}
if (modeltype == 1) {
moss->LoadFromFile(modelPath);
}
if (modeltype == 2) {
vicuna->LoadFromFile(modelPath);
}
return 0;
}


void uninitLLM()
{
if (chatGlm)
{
delete chatGlm;
chatGlm = NULL;
}
if (moss)
{
delete moss;
moss = NULL;
}
if (vicuna) {
delete vicuna;
vicuna = NULL;
}
}

int main(int argc, char **argv) {
BenchmarkConfig config;
ParseArgs(argc, argv, config);
initLLMConf(config.model, config.path.c_str(), config.threads);
chatGlm->output_token_limit = config.limit;

std::vector <std::string> inputs;
if (config.file != "") {
std::ifstream finputs(config.file, std::ios::in);
while (true) {
std::string input = "";
std::getline(finputs, input);
if (input == "") {
break;
} else {
inputs.push_back(input);
}
}
} else {
inputs.push_back("Hello!");
}
while (inputs.size() < config.batch) {
inputs.push_back(inputs[rand() % inputs.size()]);
}
if (inputs.size() > config.batch && config.batch != -1) {
inputs.resize(config.batch);
}

std::vector <std::string> outputs;
static int tokens = 0;
auto st = std::chrono::system_clock::now();
chatGlm->ResponseBatch(inputs, outputs, [](int index, std::vector <std::string> &contents) {
if (index != -1) {
for (int i = 0; i < contents.size(); i++) {
tokens += (contents[i].size() > 0);
}
}
});
float spend = fastllm::GetSpan(st, std::chrono::system_clock::now());

if (config.output != "") {
FILE *fo = fopen(config.output.c_str(), "w");
for (int i = 0; i < outputs.size(); i++) {
fprintf(fo, "[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
fclose(fo);
} else {
for (int i = 0; i < outputs.size(); i++) {
printf("[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
}

printf("batch: %d\n", (int)inputs.size());
printf("output %d tokens\nuse %f s\nspeed = %f tokens / s\n", tokens, spend, tokens / spend);
return 0;
}
1 change: 1 addition & 0 deletions example/benchmark/prompts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello
6 changes: 4 additions & 2 deletions include/basellm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include "fastllm.h"

typedef void(*RuntimeResult) (int index, const char* content); //实时生成的内容回调 index: 0开始回复,-1本次回复结束
typedef void(*RuntimeResultBatch) (int index, std::vector <const char*> contents); //实时生成的内容回调 index: 0开始回复,-1本次回复结束
typedef void(*RuntimeResultBatch) (int index, std::vector <std::string> &contents); //实时生成的内容回调 index: 0开始回复,-1本次回复结束

namespace fastllm {
class basellm {
Expand All @@ -22,7 +22,7 @@ namespace fastllm {

virtual void ResponseBatch(const std::vector <std::string> &inputs,
std::vector <std::string> &outputs,
RuntimeResult retCb) {} // 批量根据给出的内容回复
RuntimeResultBatch retCb = nullptr) {} // 批量根据给出的内容回复

virtual void SaveLowBitModel(const std::string &fileName, int bit) {}; // 存储成量化模型

Expand All @@ -32,6 +32,8 @@ namespace fastllm {

virtual void CausalMask(Data &data, int start) {}; // 因果mask

int output_token_limit = -1;

int embed_dim = 4096;
int num_attention_heads = 32;
int head_dim = embed_dim / num_attention_heads;
Expand Down
11 changes: 8 additions & 3 deletions include/chatglm.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,23 @@ namespace fastllm {
const Data &positionIds,
std::vector <std::pair <Data, Data> > &pastKeyValues);

std::vector <int> ForwardBatch(
int batch,
const Data &inputIds,
const Data &attentionMask,
const Data &positionIds,
std::vector <std::pair <Data, Data> > &pastKeyValues);

virtual std::string Response(const std::string& input, RuntimeResult retCb); // 根据给出的内容回复

virtual void ResponseBatch(const std::vector <std::string> &inputs,
std::vector <std::string> &outputs,
RuntimeResult retCb);
RuntimeResultBatch retCb);

virtual void SaveLowBitModel(const std::string &fileName, int bit); // 存储成量化模型

virtual void WarmUp(); // 预热
private:
virtual void RotatePosition2D(Data &data, const Data &positionIds); // 二维位置编码

virtual void CausalMask(Data &data, int start) {}; // 因果mask?
};
}
Expand Down
56 changes: 56 additions & 0 deletions include/device.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//
// Created by huangyuyang on 6/13/23.
//

#ifndef FASTLLM_DEVICE_H
#define FASTLLM_DEVICE_H

#include "fastllm.h"

namespace fastllm {
typedef std::map <std::string, Data*> DataDict;
typedef std::map <std::string, float> FloatDict;
typedef std::map <std::string, int> IntDict;

class BaseOperator {
public:
// 是否可以运行某一个算子
virtual bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);

// 对某一个算子进行形状推理
virtual void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);

// 对某一个算子进行推理
virtual void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams) = 0;
};

class BaseDevice {
public:
virtual bool Malloc (void **ret, size_t size) = 0; // 分配尺寸为size的空间
virtual bool Malloc (void **ret, Data &data); // 分配形状为dims的空间
virtual bool Free(void *ret) = 0; // 释放ret

virtual bool CopyDataToCPU(void *dst, void *src, size_t size) = 0; // device上的src拷贝到cpu上的dst
virtual bool CopyDataToCPU(Data &data); // data数据从该device移动到CPU

virtual bool CopyDataFromCPU(void *dst, void *src, size_t size) = 0; // cpu上的src拷贝到device上的dst
virtual bool CopyDataFromCPU(Data &data); // data数据从CPU移动到该device

// 是否可以运行某一个算子
virtual bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);

// 对某一个算子进行形状推理
virtual void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);

// 对某一个算子进行推理
virtual void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams);

std::string deviceType;
std::string deviceName;
std::vector <int> deviceIds;

std::map <std::string, BaseOperator*> ops;
};
}

#endif //FASTLLM_DEVICE_H
Loading

0 comments on commit 20c36bd

Please sign in to comment.