Skip to content

Commit

Permalink
chat api endpoint (ollama#1392)
Browse files Browse the repository at this point in the history
  • Loading branch information
BruceMacD authored Dec 5, 2023
1 parent 00d0661 commit 195e3d9
Show file tree
Hide file tree
Showing 9 changed files with 551 additions and 133 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,17 @@ curl http://localhost:11434/api/generate -d '{
}'
```

Or send a chat message:

```
curl http://localhost:11434/api/chat -d '{
"model": "mistral",
"messages": [
{ "role": "user", "content": "why is the sky blue?" }
]
}'
```

See the [API documentation](./docs/api.md) for all endpoints.

## Community Integrations
Expand Down
13 changes: 13 additions & 0 deletions api/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,19 @@ func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn Generate
})
}

type ChatResponseFunc func(ChatResponse) error

func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error {
return c.stream(ctx, http.MethodPost, "/api/chat", req, func(bts []byte) error {
var resp ChatResponse
if err := json.Unmarshal(bts, &resp); err != nil {
return err
}

return fn(resp)
})
}

type PullProgressFunc func(ProgressResponse) error

func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error {
Expand Down
70 changes: 49 additions & 21 deletions api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,39 @@ type GenerateRequest struct {
Options map[string]interface{} `json:"options"`
}

type ChatRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
Stream *bool `json:"stream,omitempty"`
Format string `json:"format"`

Options map[string]interface{} `json:"options"`
}

type Message struct {
Role string `json:"role"` // one of ["system", "user", "assistant"]
Content string `json:"content"`
}

type ChatResponse struct {
Model string `json:"model"`
CreatedAt time.Time `json:"created_at"`
Message *Message `json:"message,omitempty"`

Done bool `json:"done"`

Metrics
}

type Metrics struct {
TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
EvalDuration time.Duration `json:"eval_duration,omitempty"`
}

// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
type Options struct {
Runner
Expand Down Expand Up @@ -173,39 +206,34 @@ type GenerateResponse struct {
Done bool `json:"done"`
Context []int `json:"context,omitempty"`

TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
EvalDuration time.Duration `json:"eval_duration,omitempty"`
Metrics
}

func (r *GenerateResponse) Summary() {
if r.TotalDuration > 0 {
fmt.Fprintf(os.Stderr, "total duration: %v\n", r.TotalDuration)
func (m *Metrics) Summary() {
if m.TotalDuration > 0 {
fmt.Fprintf(os.Stderr, "total duration: %v\n", m.TotalDuration)
}

if r.LoadDuration > 0 {
fmt.Fprintf(os.Stderr, "load duration: %v\n", r.LoadDuration)
if m.LoadDuration > 0 {
fmt.Fprintf(os.Stderr, "load duration: %v\n", m.LoadDuration)
}

if r.PromptEvalCount > 0 {
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", r.PromptEvalCount)
if m.PromptEvalCount > 0 {
fmt.Fprintf(os.Stderr, "prompt eval count: %d token(s)\n", m.PromptEvalCount)
}

if r.PromptEvalDuration > 0 {
fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", r.PromptEvalDuration)
fmt.Fprintf(os.Stderr, "prompt eval rate: %.2f tokens/s\n", float64(r.PromptEvalCount)/r.PromptEvalDuration.Seconds())
if m.PromptEvalDuration > 0 {
fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
fmt.Fprintf(os.Stderr, "prompt eval rate: %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
}

if r.EvalCount > 0 {
fmt.Fprintf(os.Stderr, "eval count: %d token(s)\n", r.EvalCount)
if m.EvalCount > 0 {
fmt.Fprintf(os.Stderr, "eval count: %d token(s)\n", m.EvalCount)
}

if r.EvalDuration > 0 {
fmt.Fprintf(os.Stderr, "eval duration: %s\n", r.EvalDuration)
fmt.Fprintf(os.Stderr, "eval rate: %.2f tokens/s\n", float64(r.EvalCount)/r.EvalDuration.Seconds())
if m.EvalDuration > 0 {
fmt.Fprintf(os.Stderr, "eval duration: %s\n", m.EvalDuration)
fmt.Fprintf(os.Stderr, "eval rate: %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
}
}

Expand Down
143 changes: 136 additions & 7 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ All durations are returned in nanoseconds.

### Streaming responses

Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
Certain endpoints stream responses as JSON objects.

## Generate a completion

```shell
POST /api/generate
```

Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses. The final response object will include statistics and additional data from the request.
Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.

### Parameters

Expand All @@ -47,7 +47,7 @@ Advanced parameters (optional):
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API.

### JSON mode

Expand All @@ -57,7 +57,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
### Examples

#### Request
#### Request (Prompt)

```shell
curl http://localhost:11434/api/generate -d '{
Expand Down Expand Up @@ -114,6 +114,8 @@ To calculate how fast the response is generated in tokens per second (token/s),

#### Request (No streaming)

A response can be recieved in one reply when streaming is off.

```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
Expand Down Expand Up @@ -144,9 +146,9 @@ If `stream` is set to `false`, the response will be a single JSON object:
}
```

#### Request (Raw mode)
#### Request (Raw Mode)

In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting.

```shell
curl http://localhost:11434/api/generate -d '{
Expand All @@ -164,6 +166,7 @@ curl http://localhost:11434/api/generate -d '{
"model": "mistral",
"created_at": "2023-11-03T15:36:02.583064Z",
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
"context": [1, 2, 3],
"done": true,
"total_duration": 14648695333,
"load_duration": 3302671417,
Expand Down Expand Up @@ -275,7 +278,6 @@ curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3],
"done": true,
"total_duration": 5589157167,
"load_duration": 3013701500,
Expand All @@ -288,6 +290,133 @@ curl http://localhost:11434/api/generate -d '{
}
```

## Send Chat Messages
```shell
POST /api/chat
```

Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.

### Parameters

- `model`: (required) the [model name](#model-names)
- `messages`: the messages of the chat, this can be used to keep a chat memory

Advanced parameters (optional):

- `format`: the format to return a response in. Currently the only accepted value is `json`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects

### Examples

#### Request
Send a chat message with a streaming response.

```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"messages": [
{
"role": "user",
"content": "why is the sky blue?"
}
]
}'
```

#### Response

A stream of JSON objects is returned:

```json
{
"model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00",
"message": {
"role": "assisant",
"content": "The"
},
"done": false
}
```

Final response:

```json
{
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"done": true,
"total_duration": 5589157167,
"load_duration": 3013701500,
"sample_count": 114,
"sample_duration": 81442000,
"prompt_eval_count": 46,
"prompt_eval_duration": 1160282000,
"eval_count": 113,
"eval_duration": 1325948000
}
```

#### Request (With History)
Send a chat message with a conversation history.

```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"messages": [
{
"role": "user",
"content": "why is the sky blue?"
},
{
"role": "assistant",
"content": "due to rayleigh scattering."
},
{
"role": "user",
"content": "how is that different than mie scattering?"
}
]
}'
```

#### Response

A stream of JSON objects is returned:

```json
{
"model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00",
"message": {
"role": "assisant",
"content": "The"
},
"done": false
}
```

Final response:

```json
{
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"done": true,
"total_duration": 5589157167,
"load_duration": 3013701500,
"sample_count": 114,
"sample_duration": 81442000,
"prompt_eval_count": 46,
"prompt_eval_duration": 1160282000,
"eval_count": 113,
"eval_duration": 1325948000
}
```

## Create a Model

```shell
Expand Down
Loading

0 comments on commit 195e3d9

Please sign in to comment.