Skip to content

Commit

Permalink
ggml : sync with llama.cpp
Browse files Browse the repository at this point in the history
- int64_t number of elements
- remove mlock
- expose quantization functions
- expose ggml_object
- add ggml_view_3d()
- multi-thread ggml_rope()
- fix ggml_cpy()
- add ggml_init_params.no_alloc
- fix ggml_mul_mat() backward
  • Loading branch information
ggerganov committed Apr 10, 2023
1 parent 42cbb07 commit 3ac8072
Show file tree
Hide file tree
Showing 18 changed files with 1,086 additions and 716 deletions.
12 changes: 8 additions & 4 deletions examples/gpt-2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
struct ggml_init_params params = {
.mem_size = ctx_size,
.mem_buffer = NULL,
.no_alloc = false,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -315,9 +316,11 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
}

int32_t nelements = 1;
int32_t ne[2] = { 1, 1 };
int64_t ne[2] = { 1, 1 };
for (int i = 0; i < n_dims; ++i) {
fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
int32_t ne_cur;
fin.read(reinterpret_cast<char *>(&ne_cur), sizeof(ne_cur));
ne[i] = ne_cur;
nelements *= ne[i];
}

Expand All @@ -336,14 +339,14 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
}

if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n",
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
return false;
}

if (0) {
static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
printf("%24s - [%5lld, %5lld], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
}

size_t bpe = 0;
Expand Down Expand Up @@ -432,6 +435,7 @@ bool gpt2_eval(
struct ggml_init_params params = {
.mem_size = buf_size,
.mem_buffer = buf,
.no_alloc = false,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL };
struct ggml_init_params params = { 0, NULL, false };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
16 changes: 10 additions & 6 deletions examples/gpt-j/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
struct ggml_init_params params = {
.mem_size = ctx_size,
.mem_buffer = NULL,
.no_alloc = false,
};

model.ctx = ggml_init(params);
Expand Down Expand Up @@ -310,10 +311,12 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
break;
}

int32_t nelements = 1;
int32_t ne[2] = { 1, 1 };
int64_t nelements = 1;
int64_t ne[2] = { 1, 1 };
for (int i = 0; i < n_dims; ++i) {
fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
int32_t ne_cur;
fin.read(reinterpret_cast<char *>(&ne_cur), sizeof(ne_cur));
ne[i] = ne_cur;
nelements *= ne[i];
}

Expand All @@ -332,14 +335,14 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
}

if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n",
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
return false;
}

if (0) {
static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
printf("%24s - [%5lld, %5lld], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
}

size_t bpe = 0;
Expand All @@ -357,7 +360,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
};

if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %llu\n",
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
return false;
}
Expand Down Expand Up @@ -431,6 +434,7 @@ bool gptj_eval(
struct ggml_init_params params = {
.mem_size = buf_size,
.mem_buffer = buf,
.no_alloc = false,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-j/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL };
struct ggml_init_params params = { 0, NULL, false };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
61 changes: 40 additions & 21 deletions examples/whisper/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <string>
#include <thread>
#include <vector>
#include <cstring>

// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
// Lowest is red, middle is yellow, highest is green.
Expand Down Expand Up @@ -371,6 +372,39 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
return true;
}

char *escape_double_quotes(const char *str) {
if (str == NULL) {
return NULL;
}

size_t escaped_length = strlen(str) + 1;

for (size_t i = 0; str[i] != '\0'; i++) {
if (str[i] == '"') {
escaped_length++;
}
}

char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
if (escaped == NULL) {
return NULL;
}

size_t pos = 0;
for (size_t i = 0; str[i] != '\0'; i++) {
if (str[i] == '"') {
escaped[pos++] = '\\';
escaped[pos++] = '"';
} else {
escaped[pos++] = str[i];
}
}

// no need to set zero due to calloc() being used prior

return escaped;
}

bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params) {
std::ofstream fout(fname);
int indent = 0;
Expand Down Expand Up @@ -414,7 +448,9 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper

auto value_s = [&](const char *name, const char *val, bool end = false) {
start_value(name);
fout << "\"" << val << (end ? "\"\n" : "\",\n");
char * val_escaped = escape_double_quotes(val);
fout << "\"" << val_escaped << (end ? "\"\n" : "\",\n");
free(val_escaped);
};

auto end_value = [&](bool end = false) {
Expand Down Expand Up @@ -455,7 +491,7 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
value_i("ctx", whisper_model_n_text_ctx(ctx));
value_i("state", whisper_model_n_text_state(ctx));
value_i("head", whisper_model_n_text_head(ctx));
value_i("leyer", whisper_model_n_text_layer(ctx), true);
value_i("layer", whisper_model_n_text_layer(ctx), true);
end_obj();
value_i("mels", whisper_model_n_mels(ctx));
value_i("f16", whisper_model_f16(ctx), true);
Expand All @@ -477,7 +513,7 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);

start_obj();
start_obj("timestanps");
start_obj("timestamps");
value_s("from", to_timestamp(t0, true).c_str());
value_s("to", to_timestamp(t1, true).c_str(), true);
end_obj();
Expand Down Expand Up @@ -639,22 +675,6 @@ int main(int argc, char ** argv) {
return 3;
}

// initial prompt
std::vector<whisper_token> prompt_tokens;

if (!params.prompt.empty()) {
prompt_tokens.resize(1024);
prompt_tokens.resize(whisper_tokenize(ctx, params.prompt.c_str(), prompt_tokens.data(), prompt_tokens.size()));

fprintf(stderr, "\n");
fprintf(stderr, "initial prompt: '%s'\n", params.prompt.c_str());
fprintf(stderr, "initial tokens: [ ");
for (int i = 0; i < (int) prompt_tokens.size(); ++i) {
fprintf(stderr, "%d ", prompt_tokens[i]);
}
fprintf(stderr, "]\n");
}

for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
const auto fname_inp = params.fname_inp[f];
const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
Expand Down Expand Up @@ -718,8 +738,7 @@ int main(int argc, char ** argv) {

wparams.speed_up = params.speed_up;

wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size();
wparams.initial_prompt = params.prompt.c_str();

wparams.greedy.best_of = params.best_of;
wparams.beam_search.beam_size = params.beam_size;
Expand Down
2 changes: 1 addition & 1 deletion examples/whisper/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ int main(int argc, char ** argv) {

// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL };
struct ggml_init_params params = { 0, NULL, false };
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
Expand Down
Loading

0 comments on commit 3ac8072

Please sign in to comment.