fix: incompatible GPU error message (janhq#2357)

* fix: incompatible GPU error message * fix: change port
Bura56 · Mar 14, 2024 · 758afdb · 758afdb
1 parent 441af9d
commit 758afdb
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 3 deletions.
diff --git a/extensions/tensorrt-llm-extension/models.json b/extensions/tensorrt-llm-extension/models.json
@@ -45,5 +45,52 @@
       "size": 2151000000
     },
     "engine": "nitro-tensorrt-llm"
+  },
+  {
+    "sources": [
+      {
+        "filename": "config.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/config.json"
+      },
+      {
+        "filename": "rank0.engine",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/rank0.engine"
+      },
+      {
+        "filename": "tokenizer.model",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
+      },
+      {
+        "filename": "special_tokens_map.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
+      },
+      {
+        "filename": "tokenizer.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
+      },
+      {
+        "filename": "tokenizer_config.json",
+        "url": "https://delta.jan.ai/dist/models/turing/windows/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
+      }
+    ],
+    "id": "tinyjensen-1.1b-chat-fp16",
+    "object": "model",
+    "name": "TinyJensen 1.1B Chat FP16",
+    "version": "1.0",
+    "description": "Do you want to chat with Jensen Huan? Here you are",
+    "format": "TensorRT-LLM",
+    "settings": {
+      "ctx_len": 2048,
+      "text_model": false
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "LLama",
+      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
+      "size": 2151000000
+    },
+    "engine": "nitro-tensorrt-llm"
   }
 ]
diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json
@@ -1,14 +1,14 @@
 {
   "name": "@janhq/tensorrt-llm-extension",
-  "version": "0.0.2",
+  "version": "0.0.3",
   "description": "Enables accelerated inference leveraging Nvidia's TensorRT-LLM for optimal GPU hardware optimizations. Compatible with models in TensorRT-LLM format. Requires Nvidia GPU driver and CUDA Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
   "author": "Jan <[email protected]>",
   "license": "AGPL-3.0",
   "config": {
     "host": "127.0.0.1",
-    "port": "3928"
+    "port": "3929"
   },
   "compatibility": {
     "platform": [

diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
@@ -20,6 +20,7 @@ import {
   LocalOAIEngine,
   fs,
   MessageRequest,
+  ModelEvent,
 } from '@janhq/core'
 import models from '../models.json'
 
@@ -127,6 +128,20 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
     events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
   }
 
+  async onModelInit(model: Model): Promise<void> {
+    if ((await this.installationState()) === 'Installed')
+      return super.onModelInit(model)
+    else {
+      events.emit(ModelEvent.OnModelFail, {
+        ...model,
+        error: {
+          message: 'EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension',
+        },
+      })
+      return
+    }
+  }
+
   override async installationState(): Promise<InstallationState> {
     // For now, we just check the executable of nitro x tensor rt
     const isNitroExecutableAvailable = await executeOnMain(

diff --git a/web/screens/Chat/ErrorMessage/index.tsx b/web/screens/Chat/ErrorMessage/index.tsx
@@ -7,18 +7,22 @@ import ModalTroubleShooting, {
   modalTroubleShootingAtom,
 } from '@/containers/ModalTroubleShoot'
 
+import { MainViewState } from '@/constants/screens'
+
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 import useSendChatMessage from '@/hooks/useSendChatMessage'
 
 import { getErrorTitle } from '@/utils/errorMessage'
 
+import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
 
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
   const messages = useAtomValue(getCurrentChatMessagesAtom)
   const { resendChatMessage } = useSendChatMessage()
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
   const loadModelError = useAtomValue(loadModelErrorAtom)
+  const setMainState = useSetAtom(mainViewStateAtom)
   const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
 
   const regenerateMessage = async () => {
@@ -70,6 +74,23 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
               </p>
               <ModalTroubleShooting />
             </div>
+          ) : loadModelError?.includes('EXTENSION_IS_NOT_INSTALLED') ? (
+            <div
+              key={message.id}
+              className="flex w-full flex-col items-center text-center text-sm font-medium text-gray-500"
+            >
+              <p className="w-[90%]">
+                Model is currently unavailable. Please switch to a different
+                model or install the{' '}
+                <button
+                  className="font-medium text-blue-500"
+                  onClick={() => setMainState(MainViewState.Settings)}
+                >
+                  {loadModelError.split('::')[1] ?? ''}
+                </button>{' '}
+                to continue using it.
+              </p>
+            </div>
           ) : (
             <div
               key={message.id}

diff --git a/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx b/web/screens/Settings/CoreExtensions/TensorRtExtensionItem.tsx
@@ -144,7 +144,8 @@ const TensorRtExtensionItem: React.FC<Props> = ({ item }) => {
               </TooltipTrigger>
               <TooltipPortal>
                 <TooltipContent side="top">
-                  {compatibility ? (
+                  {compatibility &&
+                  !compatibility['platform']?.includes(PLATFORM) ? (
                     <span>
                       Only available on{' '}
                       {compatibility?.platform