From 02dfd5b8c3802c894a5dd299ff358ce037e97a78 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 2 Nov 2022 18:31:18 +0200 Subject: [PATCH] whisper : fix extra memory usage after recent processor changes Had increased the memory buffer to the size of the model and forgot to bring it down. --- whisper.cpp | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 7f2b49b893c..b230d0c0ce3 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -133,11 +133,19 @@ static const std::map> g_lang = { static const size_t MB = 1024*1024; static const std::map MEM_REQ_MODEL = { - { MODEL_TINY, 86ull*MB }, - { MODEL_BASE, 165ull*MB }, - { MODEL_SMALL, 540ull*MB }, - { MODEL_MEDIUM, 1650ull*MB }, - { MODEL_LARGE, 3260ull*MB }, + { MODEL_TINY, 74ull*MB }, + { MODEL_BASE, 142ull*MB }, + { MODEL_SMALL, 466ull*MB }, + { MODEL_MEDIUM, 1464ull*MB }, + { MODEL_LARGE, 2952ull*MB }, +}; + +static const std::map MEM_REQ_MEMORY = { + { MODEL_TINY, 12ull*MB }, + { MODEL_BASE, 24ull*MB }, + { MODEL_SMALL, 70ull*MB }, + { MODEL_MEDIUM, 184ull*MB }, + { MODEL_LARGE, 306ull*MB }, }; static const std::map MEM_REQ_ENCODE = { @@ -498,7 +506,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) { wctx.buf_model = new std::vector(); wctx.buf_model->resize(MEM_REQ_MODEL.at(model.type)); - wctx.buf_memory.resize(std::max(MEM_REQ_MODEL.at(model.type), MEM_REQ_MODEL.at(model.type))); // TODO: TMP !!! + wctx.buf_memory.resize(MEM_REQ_MEMORY.at(model.type)); wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type))); wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type))); @@ -722,20 +730,6 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) { } } - // create the ggml memory context - { - struct ggml_init_params params = { - .mem_size = wctx.buf_memory.size(), - .mem_buffer = wctx.buf_memory.data(), - }; - - model.ctx_mem = ggml_init(params); - if (!model.ctx_mem) { - fprintf(stderr, "%s: ggml_init() failed\n", __func__); - return false; - } - } - // prepare memory for the weights { auto & ctx = model.ctx; @@ -932,6 +926,20 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) { } } + // create the ggml memory context + { + struct ggml_init_params params = { + .mem_size = wctx.buf_memory.size(), + .mem_buffer = wctx.buf_memory.data(), + }; + + model.ctx_mem = ggml_init(params); + if (!model.ctx_mem) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + // key + value memory { auto & ctx = model.ctx_mem;