forked from nomic-ai/gpt4all
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
673 additions
and
389 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#ifndef BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE | ||
#error This file is NOT meant to be included outside of bert.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE | ||
#endif | ||
#ifndef BERT_H | ||
#define BERT_H | ||
|
||
#include <string> | ||
#include <functional> | ||
#include <vector> | ||
#include <memory> | ||
#include "llmodel.h" | ||
|
||
struct BertPrivate; | ||
class Bert : public LLModel { | ||
public: | ||
Bert(); | ||
~Bert(); | ||
|
||
bool supportsEmbedding() const override { return true; } | ||
bool supportsCompletion() const override { return true; } | ||
bool loadModel(const std::string &modelPath) override; | ||
bool isModelLoaded() const override; | ||
size_t requiredMem(const std::string &modelPath) override; | ||
size_t stateSize() const override; | ||
size_t saveState(uint8_t *dest) const override; | ||
size_t restoreState(const uint8_t *src) override; | ||
void setThreadCount(int32_t n_threads) override; | ||
int32_t threadCount() const override; | ||
|
||
std::vector<float> embedding(const std::string &text) override; | ||
|
||
private: | ||
std::unique_ptr<BertPrivate> d_ptr; | ||
|
||
protected: | ||
std::vector<Token> tokenize(PromptContext &, const std::string&) const override; | ||
Token sampleToken(PromptContext &ctx) const override; | ||
std::string tokenToString(Token) const override; | ||
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override; | ||
int32_t contextLength() const override; | ||
const std::vector<Token>& endTokens() const override; | ||
}; | ||
|
||
#endif // BERT_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import sys | ||
import struct | ||
import json | ||
import torch | ||
import numpy as np | ||
|
||
from transformers import AutoModel, AutoTokenizer | ||
|
||
if len(sys.argv) < 3: | ||
print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n") | ||
print(" ftype == 0 -> float32") | ||
print(" ftype == 1 -> float16") | ||
sys.exit(1) | ||
|
||
# output in the same directory as the model | ||
dir_model = sys.argv[1] | ||
fname_out = sys.argv[1] + "/ggml-model.bin" | ||
|
||
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f: | ||
encoder = json.load(f) | ||
|
||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f: | ||
hparams = json.load(f) | ||
|
||
with open(dir_model + "/vocab.txt", "r", encoding="utf-8") as f: | ||
vocab = f.readlines() | ||
# possible data types | ||
# ftype == 0 -> float32 | ||
# ftype == 1 -> float16 | ||
# | ||
# map from ftype to string | ||
ftype_str = ["f32", "f16"] | ||
|
||
ftype = 1 | ||
if len(sys.argv) > 2: | ||
ftype = int(sys.argv[2]) | ||
if ftype < 0 or ftype > 1: | ||
print("Invalid ftype: " + str(ftype)) | ||
sys.exit(1) | ||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" | ||
|
||
|
||
tokenizer = AutoTokenizer.from_pretrained(dir_model) | ||
model = AutoModel.from_pretrained(dir_model, low_cpu_mem_usage=True) | ||
print (model) | ||
|
||
print(tokenizer.encode('I believe the meaning of life is')) | ||
|
||
list_vars = model.state_dict() | ||
for name in list_vars.keys(): | ||
print(name, list_vars[name].shape, list_vars[name].dtype) | ||
|
||
fout = open(fname_out, "wb") | ||
|
||
print(hparams) | ||
|
||
fout.write(struct.pack("i", 0x62657274)) # magic: ggml in hex | ||
fout.write(struct.pack("i", hparams["vocab_size"])) | ||
fout.write(struct.pack("i", hparams["max_position_embeddings"])) | ||
fout.write(struct.pack("i", hparams["hidden_size"])) | ||
fout.write(struct.pack("i", hparams["intermediate_size"])) | ||
fout.write(struct.pack("i", hparams["num_attention_heads"])) | ||
fout.write(struct.pack("i", hparams["num_hidden_layers"])) | ||
fout.write(struct.pack("i", ftype)) | ||
|
||
for i in range(hparams["vocab_size"]): | ||
text = vocab[i][:-1] # strips newline at the end | ||
#print(f"{i}:{text}") | ||
data = bytes(text, 'utf-8') | ||
fout.write(struct.pack("i", len(data))) | ||
fout.write(data) | ||
|
||
for name in list_vars.keys(): | ||
data = list_vars[name].squeeze().numpy() | ||
if name in ['embeddings.position_ids', 'pooler.dense.weight', 'pooler.dense.bias']: | ||
continue | ||
print("Processing variable: " + name + " with shape: ", data.shape) | ||
|
||
n_dims = len(data.shape); | ||
|
||
# ftype == 0 -> float32, ftype == 1 -> float16 | ||
if ftype == 1 and name[-7:] == ".weight" and n_dims == 2: | ||
print(" Converting to float16") | ||
data = data.astype(np.float16) | ||
l_type = 1 | ||
else: | ||
l_type = 0 | ||
|
||
# header | ||
str = name.encode('utf-8') | ||
fout.write(struct.pack("iii", n_dims, len(str), l_type)) | ||
for i in range(n_dims): | ||
fout.write(struct.pack("i", data.shape[n_dims - 1 - i])) | ||
fout.write(str); | ||
|
||
# data | ||
data.tofile(fout) | ||
|
||
fout.close() | ||
|
||
print("Done. Output file: " + fname_out) | ||
print("") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
from .gpt4all import GPT4All # noqa | ||
from .gpt4all import GPT4All, embed # noqa | ||
from .pyllmodel import LLModel # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.