Skip to content

Commit

Permalink
re-janhq#143: use OpenAI decoder and mutate final result from client (j…
Browse files Browse the repository at this point in the history
…anhq#164)

* chore: use OpenAI parser

* chore: access host's services

* chore: take out llm service - GGUF model for the latest llama.cpp support
  • Loading branch information
louis-jan authored Sep 12, 2023
1 parent 83d2e34 commit 6aae985
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 49 deletions.
3 changes: 2 additions & 1 deletion conf/sample.env_web-client
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ NEXT_PUBLIC_DOWNLOAD_APP_IOS=#
NEXT_PUBLIC_DOWNLOAD_APP_ANDROID=#
NEXT_PUBLIC_GRAPHQL_ENGINE_URL=http://localhost:8080/v1/graphql
NEXT_PUBLIC_GRAPHQL_ENGINE_WEB_SOCKET_URL=ws://localhost:8080/v1/graphql
NEXT_PUBLIC_OPENAPI_ENDPOINT=http://localhost:8000/v1/completions
OPENAPI_ENDPOINT=http://host.docker.internal:8000/v1
OPENAPI_KEY=openapikey
KEYCLOAK_CLIENT_ID=hasura
KEYCLOAK_CLIENT_SECRET=oMtCPAV7diKpE564SBspgKj4HqlKM4Hy
AUTH_ISSUER=http://localhost:8088/realms/$KEYCLOAK_CLIENT_ID
Expand Down
14 changes: 0 additions & 14 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,6 @@ services:
jan_community:
ipv4_address: 172.20.0.15

llm:
image: ghcr.io/abetlen/llama-cpp-python@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
volumes:
- ./jan-inference/llm/models:/models
ports:
- 8000:8000
environment:
MODEL: /models/${LLM_MODEL_FILE}
PYTHONUNBUFFERED: 1
restart: on-failure
networks:
jan_community:
ipv4_address: 172.20.0.18

networks:
jan_community:
driver: bridge
Expand Down
4 changes: 2 additions & 2 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ progress 'cp -f sample.env .env' "Prepare .env file" $((step++))
###

### Download Model
if [ -f "jan-inference/llm/models/llama-2-7b-chat.ggmlv3.q4_1.bin" ]; then
if [ -f "jan-inference/llm/models/llama-2-7b.Q4_K_S.gguf" ]; then
progress '' "Llama model - Installed" $((step++))
else
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_1.bin -P jan-inference/llm/models' "Download Llama model" $((step++))
progress 'wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_S.gguf -P jan-inference/llm/models' "Download Llama model" $((step++))
fi
###

Expand Down
3 changes: 3 additions & 0 deletions web-client/app/_components/ChatBody/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ export const ChatBody: React.FC<Props> = observer(({ onPromptSelected }) => {
const renderItem = (
index: number,
{
id,
messageType,
senderAvatarUrl,
senderName,
Expand Down Expand Up @@ -172,9 +173,11 @@ const renderItem = (
) : (
<StreamTextMessage
key={index}
id={id}
avatarUrl={senderAvatarUrl ?? "/icons/app_icon.svg"}
senderName={senderName}
createdAt={createdAt}
text={text}
/>
);
default:
Expand Down
86 changes: 54 additions & 32 deletions web-client/app/_components/StreamTextMessage/index.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import React from "react";
import React, { useEffect } from "react";
import { displayDate } from "@/_utils/datetime";
import { useStore } from "@/_models/RootStore";
import { StreamingText, StreamingTextURL, useTextBuffer } from "nextjs-openai";
import { MessageSenderType } from "@/_models/ChatMessage";
import { StreamingText, useTextBuffer } from "nextjs-openai";
import { MessageSenderType, MessageStatus } from "@/_models/ChatMessage";
import { Role } from "@/_models/History";
import { useMutation } from "@apollo/client";
import { OpenAI } from "openai-streams";
import {
UpdateMessageDocument,
UpdateMessageMutation,
UpdateMessageMutationVariables,
} from "@/graphql";

type Props = {
id?: string;
Expand All @@ -14,50 +21,67 @@ type Props = {
};

const StreamTextMessage: React.FC<Props> = ({
id,
senderName,
createdAt,
avatarUrl = "",
}) => {
const [data, setData] = React.useState<any | undefined>();
const { historyStore } = useStore();
const conversation = historyStore?.getActiveConversation();
const [updateMessage] = useMutation<UpdateMessageMutation>(
UpdateMessageDocument
);

React.useEffect(() => {
const messages = conversation?.chatMessages.slice(-5).map((e) => ({
role:
e.messageSenderType === MessageSenderType.User
? Role.User
: Role.Assistant,
content: e.text,
}));
if (
!conversation ||
conversation.chatMessages.findIndex((e) => e.id === id) !==
conversation.chatMessages.length - 1
) {
return;
}
const messages = conversation?.chatMessages
.slice(-10)
.filter((e) => e.id !== id)
.map((e) => ({
role:
e.messageSenderType === MessageSenderType.User
? Role.User
: Role.Assistant,
content: e.text,
}));
setData({
messages,
stream: true,
model: "gpt-3.5-turbo",
max_tokens: 500,
});
}, [conversation]);

const { buffer, refresh, cancel } = useTextBuffer({
url: `${process.env.NEXT_PUBLIC_OPENAPI_ENDPOINT}`,
throttle: 100,
const { buffer, done } = useTextBuffer({
url: `api/openai`,
data,

options: {
headers: {
"Content-Type": "application/json",
},
},
});

const parsedBuffer = (buffer: String) => {
try {
const json = buffer.replace("data: ", "");
return JSON.parse(json).choices[0].text;
} catch (e) {
return "";
useEffect(() => {
if (done) {
// mutate result
const variables: UpdateMessageMutationVariables = {
id: id,
data: {
content: buffer.join(""),
status: MessageStatus.Ready,
},
};
updateMessage({
variables,
});
}
}, [done]);

useEffect(() => {
if (buffer.length > 0 && conversation?.isWaitingForModelResponse) {
historyStore.finishActiveConversationWaiting();
}
};
}, [buffer]);

return data ? (
<div className="flex items-start gap-2">
Expand All @@ -78,9 +102,7 @@ const StreamTextMessage: React.FC<Props> = ({
</div>
</div>
<div className="leading-[20px] whitespace-break-spaces text-[14px] font-normal dark:text-[#d1d5db]">
<StreamingText
buffer={buffer.map((b) => parsedBuffer(b))}
></StreamingText>
<StreamingText buffer={buffer} fade={100} />
</div>
</div>
</div>
Expand Down
26 changes: 26 additions & 0 deletions web-client/app/api/openai/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { OpenAI } from "openai-streams";

export async function POST(req: Request) {
const { messages } = await req.json();
if (!messages) {
return new Response(null, {
status: 400,
statusText: "Did not include `messages` parameter",
});
}
const completionsStream = await OpenAI(
"chat",
{
model: "gpt-3.5-turbo",
stream: true,
messages,
max_tokens: 500,
},
{
apiBase: process.env.OPENAPI_ENDPOINT,
apiKey: process.env.OPENAPI_KEY,
}
);

return new Response(completionsStream);
}

0 comments on commit 6aae985

Please sign in to comment.