feat: Add triton trtllm for engine for remote models

orasik · Dec 12, 2023 · f268877 · f268877
1 parent 44d4368
commit f268877
Show file tree

Hide file tree

Showing 8 changed files with 478 additions and 1 deletion.
diff --git a/core/src/types/index.ts b/core/src/types/index.ts
@@ -174,7 +174,7 @@ export type ThreadState = {
 enum InferenceEngine {
   nitro = "nitro",
   openai = "openai",
-  nvidia_triton = "nvidia_triton",
+  triton_trtllm = "triton_trtllm",
   hf_endpoint = "hf_endpoint",
 }
 

diff --git a/extensions/inference-triton-trtllm-extension/README.md b/extensions/inference-triton-trtllm-extension/README.md
@@ -0,0 +1,78 @@
+# Jan inference plugin
+
+Created using Jan app example
+
+# Create a Jan Plugin using Typescript
+
+Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀
+
+## Create Your Own Plugin
+
+To create your own plugin, you can use this repository as a template! Just follow the below instructions:
+
+1. Click the Use this template button at the top of the repository
+2. Select Create a new repository
+3. Select an owner and name for your new repository
+4. Click Create repository
+5. Clone your new repository
+
+## Initial Setup
+
+After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin.
+
+> [!NOTE]
+>
+> You'll need to have a reasonably modern version of
+> [Node.js](https://nodejs.org) handy. If you are using a version manager like
+> [`nodenv`](https://github.com/nodenv/nodenv) or
+> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
+> root of your repository to install the version specified in
+> [`package.json`](./package.json). Otherwise, 20.x or later should work!
+
+1. :hammer_and_wrench: Install the dependencies
+
+   ```bash
+   npm install
+   ```
+
+1. :building_construction: Package the TypeScript for distribution
+
+   ```bash
+   npm run bundle
+   ```
+
+1. :white_check_mark: Check your artifact
+
+   There will be a tgz file in your plugin directory now
+
+## Update the Plugin Metadata
+
+The [`package.json`](package.json) file defines metadata about your plugin, such as
+plugin name, main entry, description and version.
+
+When you copy this repository, update `package.json` with the name, description for your plugin.
+
+## Update the Plugin Code
+
+The [`src/`](./src/) directory is the heart of your plugin! This contains the
+source code that will be run when your plugin extension functions are invoked. You can replace the
+contents of this directory with your own code.
+
+There are a few things to keep in mind when writing your plugin code:
+
+- Most Jan Plugin Extension functions are processed asynchronously.
+  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
+
+  ```typescript
+  import { core } from "@janhq/core";
+
+  function onStart(): Promise<any> {
+    return core.invokePluginFunc(MODULE_PATH, "run", 0);
+  }
+  ```
+
+  For more information about the Jan Plugin Core module, see the
+  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
+
+So, what are you waiting for? Go ahead and start customizing your plugin!
+
diff --git a/extensions/inference-triton-trtllm-extension/package.json b/extensions/inference-triton-trtllm-extension/package.json
@@ -0,0 +1,41 @@
+{
+  "name": "@janhq/inference-triton-trt-llm-extension",
+  "version": "1.0.0",
+  "description": "Inference Engine for NVIDIA Triton with TensorRT-LLM Extension integration on Jan extension framework",
+  "main": "dist/index.js",
+  "module": "dist/module.js",
+  "author": "Jan <[email protected]>",
+  "license": "AGPL-3.0",
+  "scripts": {
+    "build": "tsc -b . && webpack --config webpack.config.js",
+    "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install"
+  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/module.js"
+  },
+  "devDependencies": {
+    "cpx": "^1.5.0",
+    "rimraf": "^3.0.2",
+    "webpack": "^5.88.2",
+    "webpack-cli": "^5.1.4"
+  },
+  "dependencies": {
+    "@janhq/core": "file:../../core",
+    "fetch-retry": "^5.0.6",
+    "path-browserify": "^1.0.1",
+    "ts-loader": "^9.5.0",
+    "ulid": "^2.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "fetch-retry"
+  ]
+}
diff --git a/extensions/inference-triton-trtllm-extension/src/@types/global.d.ts b/extensions/inference-triton-trtllm-extension/src/@types/global.d.ts
@@ -0,0 +1,7 @@
+import { Model } from "@janhq/core";
+
+declare const MODULE: string;
+
+declare interface EngineSettings {
+    base_url?: string;
+}
diff --git a/extensions/inference-triton-trtllm-extension/src/helpers/sse.ts b/extensions/inference-triton-trtllm-extension/src/helpers/sse.ts
@@ -0,0 +1,63 @@
+import { Observable } from "rxjs";
+import { EngineSettings } from "../@types/global";
+import { Model } from "@janhq/core";
+
+/**
+ * Sends a request to the inference server to generate a response based on the recent messages.
+ * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @param engine - The engine settings to use for the inference.
+ * @param model - The model to use for the inference.
+ * @returns An Observable that emits the generated response as a string.
+ */
+export function requestInference(
+  recentMessages: any[],
+  engine: EngineSettings,
+  model: Model,
+  controller?: AbortController
+): Observable<string> {
+  return new Observable((subscriber) => {
+    const text_input = recentMessages.map((message) => message.text).join("\n");
+    const requestBody = JSON.stringify({
+      text_input: text_input,
+      max_tokens: 4096,
+      temperature: 0,
+      bad_words: "",
+      stop_words: "[DONE]",
+      stream: true
+    });
+    fetch(`${engine.base_url}/v2/models/ensemble/generate_stream`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Accept: "text/event-stream",
+        "Access-Control-Allow-Origin": "*",
+      },
+      body: requestBody,
+      signal: controller?.signal,
+    })
+      .then(async (response) => {
+        const stream = response.body;
+        const decoder = new TextDecoder("utf-8");
+        const reader = stream?.getReader();
+        let content = "";
+
+        while (true && reader) {
+          const { done, value } = await reader.read();
+          if (done) {
+            break;
+          }
+          const text = decoder.decode(value);
+          const lines = text.trim().split("\n");
+          for (const line of lines) {
+            if (line.startsWith("data: ") && !line.includes("data: [DONE]")) {
+              const data = JSON.parse(line.replace("data: ", ""));
+              content += data.choices[0]?.delta?.content ?? "";
+              subscriber.next(content);
+            }
+          }
+        }
+        subscriber.complete();
+      })
+      .catch((err) => subscriber.error(err));
+  });
+}