-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample-tokenizer.rkt
74 lines (56 loc) · 1.7 KB
/
example-tokenizer.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#lang racket
(require "llama.rkt")
(require ffi/unsafe
ffi/unsafe/define)
#| INITIALIZE |#
; complete path, because raw string doesn't work with llama-load-model-from-file if called from DrRacket
(define model-path
(path->complete-path "model.gguf"))
#;(define model-path (path->complete-path "t5-v1_1-xxl-encoder-Q5_K_M.gguf"))
#;(define model-path (path->complete-path "gpt2.Q4_K_M.gguf"))
#;(define model-path (path->complete-path
"/Users/zostaw/projects/ai/models/Meta-Llama-3-8B-Instruct.Q5_K_M.gguf"))
(define model-params
(llama-model-default-params))
(define model
(llama-load-model-from-file model-path model-params))
(define ctx-params
(llama-context-default-params))
(define ctx
(llama-new-context-with-model model ctx-params))
(define add-special
(llama-add-bos-token model))
(define pooling-type
(llama-pooling-type ctx))
(define max-tokens 100)
(define n-seq-max
(llama-n-seq-max ctx))
#| TOKENIZE |#
(define tokenize
(tokenizer model
max-tokens
add-special
#f))
(define token-to-piece
(token-to-piecer model
1
#t))
; Tokenize from input text
(define text "This is some random text to tokenize")
(define-values (tokens tokens-len) (tokenize text))
; Display
(define (print-tokens tokens tokens-len)
(displayln "\nTokens:")
(for/list ([i (range tokens-len)])
(define token (ptr-ref tokens _llama_token i))
(define piece (token-to-piece token))
(displayln
(format " token ~a: ~a, piece: \"~a\""
i
token
piece)))
(newline))
(print-tokens tokens tokens-len)
#| DEALLOCATE |#
(llama-free-context ctx)
(llama-free-model model)