Skip to content

Commit

Permalink
catch when model vocab size is set correctly (ollama#6714)
Browse files Browse the repository at this point in the history
  • Loading branch information
pdevine authored Sep 10, 2024
1 parent bb6a086 commit 84b84ce
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,18 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
return err
}

if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
vocabSize := int(p.VocabSize)
switch {
case vocabSize > len(t.Vocabulary.Tokens):
slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
for i := range vocabSize - len(t.Vocabulary.Tokens) {
t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
}
} else {
case vocabSize < len(t.Vocabulary.Tokens):
return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
default:
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
}

Expand Down

0 comments on commit 84b84ce

Please sign in to comment.