Skip to content

Commit

Permalink
falcon h2d + reenable vulkan
Browse files Browse the repository at this point in the history
  • Loading branch information
apage43 authored and cebtenzzre committed Oct 5, 2023
1 parent 7d4ecef commit 7a27f88
Showing 1 changed file with 20 additions and 3 deletions.
23 changes: 20 additions & 3 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3828,6 +3828,9 @@ static struct ggml_cgraph * llm_build_falcon(

struct ggml_tensor * cur;
struct ggml_tensor * inpL;
#if defined(GGML_USE_KOMPUTE)
struct ggml_tensor * toDeviceTensor = nullptr;
#endif

if (batch.token) {
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
Expand All @@ -3837,7 +3840,9 @@ static struct ggml_cgraph * llm_build_falcon(
memcpy(inp_tokens->data, batch.token, n_tokens*ggml_element_size(inp_tokens));
}
ggml_set_name(inp_tokens, "inp_tokens");

#if defined(GGML_USE_KOMPUTE)
toDeviceTensor = inp_tokens;
#endif
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
} else {
#ifdef GGML_USE_MPI
Expand All @@ -3850,6 +3855,9 @@ static struct ggml_cgraph * llm_build_falcon(
if (!ggml_allocr_is_measure(lctx.alloc)) {
memcpy(inpL->data, batch.embd, n_tokens * n_embd * ggml_element_size(inpL));
}
#if defined(GGML_USE_KOMPUTE)
toDeviceTensor = inpL;
#endif
}

const int i_gpu_start = n_layer - n_gpu_layers;
Expand Down Expand Up @@ -4142,6 +4150,16 @@ static struct ggml_cgraph * llm_build_falcon(

ggml_free(ctx0);

#if defined(GGML_USE_KOMPUTE)
if (lctx.ctx_kompute) {
if (!ggml_vk_has_h2d_all(lctx.ctx_kompute)) {
ggml_vk_h2d_all(lctx.ctx_kompute);
} else {
ggml_vk_h2d_tensor(lctx.ctx_kompute, toDeviceTensor);
}
}
#endif

return gf;
}

Expand Down Expand Up @@ -7442,9 +7460,8 @@ struct llama_context * llama_new_context_with_model(
#undef LLAMA_METAL_CHECK_BUF
}
#elif defined(GGML_USE_KOMPUTE)
// TODO(cebtenzzre): we need to check the type of each tensor because Q8_0 is not currently supported
if (ggml_vk_has_device() && model->n_gpu_layers > 0
&& model->arch == LLM_ARCH_LLAMA
&& (model->arch == LLM_ARCH_LLAMA || model->arch == LLM_ARCH_FALCON)
&& (model->ftype == LLAMA_FTYPE_ALL_F32
|| model->ftype == LLAMA_FTYPE_MOSTLY_F16
|| model->ftype == LLAMA_FTYPE_MOSTLY_Q4_0
Expand Down

0 comments on commit 7a27f88

Please sign in to comment.