Skip to content

Commit

Permalink
llama : decide to disable Vulkan before loading tensors
Browse files Browse the repository at this point in the history
  • Loading branch information
cebtenzzre committed Oct 6, 2023
1 parent 7b8f00f commit 411bebd
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2405,7 +2405,7 @@ static bool llama_model_load(
llama_model & model,
int n_ctx,
int n_batch,
int n_gpu_layers,
int * n_gpu_layers,
int main_gpu,
const float * tensor_split,
const bool mul_mat_q,
Expand Down Expand Up @@ -2436,8 +2436,23 @@ static bool llama_model_load(
return true;
}

#ifdef GGML_USE_KOMPUTE
if (ggml_vk_has_device() && *n_gpu_layers > 0 && (
!(model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON)
|| !(
model.ftype == LLAMA_FTYPE_ALL_F32 ||
model.ftype == LLAMA_FTYPE_MOSTLY_F16 ||
model.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
model.ftype == LLAMA_FTYPE_MOSTLY_Q4_1
)
)) {
// disable Vulkan due to unsupported model architecture or quantization type
*n_gpu_layers = 0;
}
#endif

llm_load_tensors(
*ml, model, n_batch, n_gpu_layers,
*ml, model, n_batch, *n_gpu_layers,
main_gpu, tensor_split, mul_mat_q, low_vram, memory_type,
use_mlock, progress_callback, progress_callback_user_data);
} catch (const std::exception & err) {
Expand Down Expand Up @@ -6360,7 +6375,7 @@ struct llama_model * llama_load_model_from_file(
};
}

if (!llama_model_load(path_model, *model, params.n_ctx, params.n_batch, params.n_gpu_layers,
if (!llama_model_load(path_model, *model, params.n_ctx, params.n_batch, &params.n_gpu_layers,
params.main_gpu, params.tensor_split, params.mul_mat_q, params.rope_freq_base, params.rope_freq_scale,
params.low_vram, memory_type, params.use_mmap, params.use_mlock, params.vocab_only,
params.progress_callback, params.progress_callback_user_data)) {
Expand Down Expand Up @@ -6508,12 +6523,7 @@ struct llama_context * llama_new_context_with_model(
#undef LLAMA_METAL_CHECK_BUF
}
#elif defined(GGML_USE_KOMPUTE)
if (ggml_vk_has_device() && params.n_gpu_layers > 0
&& (model->arch == LLM_ARCH_LLAMA || model->arch == LLM_ARCH_FALCON)
&& (model->ftype == LLAMA_FTYPE_ALL_F32
|| model->ftype == LLAMA_FTYPE_MOSTLY_F16
|| model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0
|| model->ftype == LLAMA_FTYPE_MOSTLY_Q4_1)) {
if (ggml_vk_has_device() && params.n_gpu_layers > 0) {
// this allocates all Vulkan resources and memory buffers
ctx->ctx_kompute = ggml_vk_init();

Expand Down

0 comments on commit 411bebd

Please sign in to comment.