Skip to content

Commit

Permalink
fix: incompatible GPU error message (janhq#2357)
Browse files Browse the repository at this point in the history
* fix: incompatible GPU error message

* fix: change port
  • Loading branch information
louis-jan authored Mar 14, 2024
1 parent 441af9d commit 758afdb
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 3 deletions.
47 changes: 47 additions & 0 deletions extensions/tensorrt-llm-extension/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,52 @@
"size": 2151000000
},
"engine": "nitro-tensorrt-llm"
},
{
"sources": [
{
"filename": "config.json",
"url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/config.json"
},
{
"filename": "rank0.engine",
"url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/rank0.engine"
},
{
"filename": "tokenizer.model",
"url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
},
{
"filename": "special_tokens_map.json",
"url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
},
{
"filename": "tokenizer.json",
"url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
},
{
"filename": "tokenizer_config.json",
"url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
}
],
"id": "tinyjensen-1.1b-chat-fp16",
"object": "model",
"name": "TinyJensen 1.1B Chat FP16",
"version": "1.0",
"description": "Do you want to chat with Jensen Huan? Here you are",
"format": "TensorRT-LLM",
"settings": {
"ctx_len": 2048,
"text_model": false
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "LLama",
"tags": ["TensorRT-LLM", "1B", "Finetuned"],
"size": 2151000000
},
"engine": "nitro-tensorrt-llm"
}
]
4 changes: 2 additions & 2 deletions extensions/tensorrt-llm-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
"name": "@janhq/tensorrt-llm-extension",
"version": "0.0.2",
"version": "0.0.3",
"description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <[email protected]>",
"license": "AGPL-3.0",
"config": {
"host": "127.0.0.1",
"port": "3928"
"port": "3929"
},
"compatibility": {
"platform": [
Expand Down
15 changes: 15 additions & 0 deletions extensions/tensorrt-llm-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
LocalOAIEngine,
fs,
MessageRequest,
ModelEvent,
} from '@janhq/core'
import models from '../models.json'

Expand Down Expand Up @@ -127,6 +128,20 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
}

async onModelInit(model: Model): Promise<void> {
if ((await this.installationState()) === 'Installed')
return super.onModelInit(model)
else {
events.emit(ModelEvent.OnModelFail, {
...model,
error: {
message: 'EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension',
},
})
return
}
}

override async installationState(): Promise<InstallationState> {
// For now, we just check the executable of nitro x tensor rt
const isNitroExecutableAvailable = await executeOnMain(
Expand Down
21 changes: 21 additions & 0 deletions web/screens/Chat/ErrorMessage/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@ import ModalTroubleShooting, {
modalTroubleShootingAtom,
} from '@/containers/ModalTroubleShoot'

import { MainViewState } from '@/constants/screens'

import { loadModelErrorAtom } from '@/hooks/useActiveModel'
import useSendChatMessage from '@/hooks/useSendChatMessage'

import { getErrorTitle } from '@/utils/errorMessage'

import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'

const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
const messages = useAtomValue(getCurrentChatMessagesAtom)
const { resendChatMessage } = useSendChatMessage()
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
const loadModelError = useAtomValue(loadModelErrorAtom)
const setMainState = useSetAtom(mainViewStateAtom)
const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'

const regenerateMessage = async () => {
Expand Down Expand Up @@ -70,6 +74,23 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
</p>
<ModalTroubleShooting />
</div>
) : loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? (
<div
key={message.id}
className="flex w-full flex-col items-center text-center text-sm font-medium text-gray-500"
>
<p className="w-[90%]">
Model is currently unavailable. Please switch to a different
model or install the{' '}
<button
className="font-medium text-blue-500"
onClick={() => setMainState(MainViewState.Settings)}
>
{loadModelError.split('::')[1] ?? ''}
</button>{' '}
to continue using it.
</p>
</div>
) : (
<div
key={message.id}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
</TooltipTrigger>
<TooltipPortal>
<TooltipContent side="top">
{compatibility ? (
{compatibility &&
!compatibility['platform']?.includes(PLATFORM) ? (
<span>
Only available on{' '}
{compatibility?.platform
Expand Down

0 comments on commit 758afdb

Please sign in to comment.