Merge branch 'dev' into docs-pena-team

orinocoz · Mar 19, 2024 · 86b6dd5 · 86b6dd5
2 parents bf1abd3 + 55c0a91
commit 86b6dd5
Show file tree

Hide file tree

Showing 16 changed files with 378 additions and 90 deletions.
diff --git a/README.md b/README.md
@@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
   <tr style="text-align:center">
     <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.9-335.exe'>
+      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.9-336.exe'>
         <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
         <b>jan.exe</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.9-335.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.9-336.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>Intel</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.9-335.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.9-336.dmg'>
         <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
         <b>M1/M2</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.9-335.deb'>
+      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.9-336.deb'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.deb</b>
       </a>
     </td>
     <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.9-335.AppImage'>
+      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.9-336.AppImage'>
         <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
         <b>jan.AppImage</b>
       </a>

diff --git a/core/src/node/api/processors/fsExt.ts b/core/src/node/api/processors/fsExt.ts
@@ -101,7 +101,7 @@ export class FSExt implements Processor {
     })
   }
 
-  rmdir(path: string): Promise<void> {
+  rm(path: string): Promise<void> {
     return new Promise((resolve, reject) => {
       fs.rm(path, { recursive: true }, (err) => {
         if (err) {

diff --git a/docs/.env.example b/docs/.env.example
@@ -3,5 +3,4 @@ UMAMI_PROJECT_API_KEY=xxxx
 UMAMI_APP_URL=xxxx
 ALGOLIA_API_KEY=xxxx
 ALGOLIA_APP_ID=xxxx
-GITHUB_ACCESS_TOKEN=xxxx
-API_KEY_BREVO=xxxx
+GITHUB_ACCESS_TOKEN=xxxx
diff --git a/docs/docs/guides/providers/tensorrt-llm.md b/docs/docs/guides/providers/tensorrt-llm.md
@@ -0,0 +1,222 @@
+---
+title: TensorRT-LLM
+slug: /guides/providers/tensorrt-llm
+---
+
+<head>
+    <title>TensorRT-LLM - Jan Guides</title>
+    <meta name="description" content="Learn how to install Jan's official TensorRT-LLM Extension, which offers 20-40% faster token speeds on Nvidia GPUs. Understand the requirements, installation steps, and troubleshooting tips."/>
+    <meta name="keywords" content="Jan AI, Jan, ChatGPT alternative, TensorRT-LLM, Nvidia GPU, TensorRT, extension, installation, troubleshooting"/>
+    <meta property="og:title" content="TensorRT-LLM - Jan Guides"/>
+    <meta property="og:description" content="Learn how to install Jan's official TensorRT-LLM Extension, which offers 20-40% faster token speeds on Nvidia GPUs. Understand the requirements, installation steps, and troubleshooting tips."/>
+    <meta property="og:url" content="https://jan.ai/guides/providers/tensorrt-llm"/>
+    <meta name="twitter:card" content="summary"/>
+    <meta name="twitter:title" content="TensorRT-LLM - Jan Guides"/>
+    <meta name="twitter:description" content="Learn how to install Jan's official TensorRT-LLM Extension, which offers 20-40% faster token speeds on Nvidia GPUs. Understand the requirements, installation steps, and troubleshooting tips."/>
+</head>
+
+:::info
+
+TensorRT-LLM support was launched in 0.4.9, and should be regarded as an Experimental feature. 
+
+- Only Windows is supported for now.
+- Please report bugs in our Discord's [#tensorrt-llm](https://discord.com/channels/1107178041848909847/1201832734704795688) channel.  
+
+:::
+
+Jan supports [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) as an alternate Inference Engine, for users who have Nvidia GPUs with large VRAM. TensorRT-LLM allows for blazing fast inference, but requires Nvidia GPUs with [larger VRAM](https://nvidia.github.io/TensorRT-LLM/memory.html). 
+
+## What is TensorRT-LLM?
+
+[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is an hardware-optimized LLM inference engine for Nvidia GPUs, that compiles models to run extremely fast on Nvidia GPUs. 
+- Mainly used on Nvidia's Datacenter-grade GPUs like the H100s [to produce 10,000 tok/s](https://nvidia.github.io/TensorRT-LLM/blogs/H100vsA100.html). 
+- Can be used on Nvidia's workstation (e.g. [A6000](https://www.nvidia.com/en-us/design-visualization/rtx-6000/)) and consumer-grade GPUs (e.g. [RTX 4090](https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/rtx-4090/))
+
+:::tip[Benefits]
+
+- Our performance testing shows 20-40% faster token/s speeds on consumer-grade GPUs
+- On datacenter-grade GPUs, TensorRT-LLM can go up to 10,000 tokens/s
+- TensorRT-LLM is a relatively new library, that was [released in Sept 2023](https://github.com/NVIDIA/TensorRT-LLM/graphs/contributors). We anticipate performance and resource utilization improvements in the future.
+
+:::
+
+:::warning[Caveats]
+
+- TensorRT-LLM requires models to be compiled into GPU and OS-specific "Model Engines" (vs. GGUF's "convert once, run anywhere" approach)
+- TensorRT-LLM Model Engines tend to utilize larger amount of VRAM and RAM in exchange for performance
+- This usually means only people with top-of-the-line Nvidia GPUs can use TensorRT-LLM
+
+:::
+
+
+## Requirements
+
+### Hardware
+
+- Windows PC
+- Nvidia GPU(s): Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon.
+- 3GB+ of disk space to download TRT-LLM artifacts and a Nitro binary
+
+**Compatible GPUs**
+
+| Architecture | Supported?    | Consumer-grade | Workstation-grade |
+| ------------ | --- | -------------- | ----------------- |
+| Ada          | ✅    | 4050 and above | RTX A2000 Ada     |
+| Ampere       | ✅    | 3050 and above | A100              |
+| Turing       | ❌    | Not Supported  | Not Supported     |
+
+:::info
+
+Please ping us in Discord's [#tensorrt-llm](https://discord.com/channels/1107178041848909847/1201832734704795688) channel if you would like Turing support.
+
+:::
+
+### Software
+
+- Jan v0.4.9+ or Jan v0.4.8-321+ (nightly)
+-  [Nvidia Driver v535+](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements)
+-  [CUDA Toolkit v12.2+](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements)
+
+## Getting Started
+
+### Install TensorRT-Extension
+
+1. Go to Settings > Extensions
+2. Install the TensorRT-LLM Extension
+
+:::info
+You can check if files have been correctly downloaded:
+
+```sh
+ls ~\jan\extensions\@janhq\tensorrt-llm-extension\dist\bin
+# Your Extension Folder should now include `nitro.exe`, among other `.dll` files needed to run TRT-LLM
+```
+:::
+
+### Download a TensorRT-LLM Model
+
+Jan's Hub has a few pre-compiled TensorRT-LLM models that you can download, which have a `TensorRT-LLM` label
+
+- We automatically download the TensorRT-LLM Model Engine for your GPU architecture
+- We have made a few 1.1b models available that can run even on Laptop GPUs with 8gb VRAM
+
+
+| Model               | OS      | Ada (40XX) | Ampere (30XX) | Description                                         |
+| ------------------- | ------- | ---------- | ------------- | --------------------------------------------------- |
+| Llamacorn 1.1b      | Windows | ✅          | ✅             | TinyLlama-1.1b, fine-tuned for usability            |
+| TinyJensen 1.1b     | Windows | ✅          | ✅             | TinyLlama-1.1b, fine-tuned on Jensen Huang speeches |
+| Mistral Instruct 7b | Windows | ✅          | ✅             | Mistral                                             |
+
+### Importing Pre-built Models
+
+You can import a pre-built model, by creating a new folder in Jan's `/models` directory that includes:
+
+- TensorRT-LLM Engine files (e.g. `tokenizer`, `.engine`, etc)
+- `model.json` that registers these files, and specifies `engine` as `nitro-tensorrt-llm`
+
+:::note[Sample model.json]
+
+Note the `engine` is `nitro-tensorrt-llm`: this won't work without it!
+
+```js
+{
+  "sources": [
+    {
+      "filename": "config.json",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
+    },
+    {
+      "filename": "mistral_float16_tp1_rank0.engine",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
+    },
+    {
+      "filename": "tokenizer.model",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
+    },
+    {
+      "filename": "special_tokens_map.json",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
+    },
+    {
+      "filename": "tokenizer.json",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
+    },
+    {
+      "filename": "tokenizer_config.json",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
+    },
+    {
+      "filename": "model.cache",
+      "url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
+    }
+  ],
+  "id": "tinyjensen-1.1b-chat-fp16",
+  "object": "model",
+  "name": "TinyJensen 1.1B Chat FP16",
+  "version": "1.0",
+  "description": "Do you want to chat with Jensen Huan? Here you are",
+  "format": "TensorRT-LLM",
+  "settings": {
+    "ctx_len": 2048,
+    "text_model": false
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "LLama",
+    "tags": [
+      "TensorRT-LLM",
+      "1B",
+      "Finetuned"
+    ],
+    "size": 2151000000
+  },
+  "engine": "nitro-tensorrt-llm"
+}
+```
+
+:::
+
+### Using a TensorRT-LLM Model
+
+You can just select and use a TensorRT-LLM model from Jan's Thread interface.
+- Jan will automatically start the TensorRT-LLM model engine in the background
+- You may encounter a pop-up from Windows Security, asking for Nitro to allow public and private network access
+
+:::info[Why does Nitro need network access?]
+
+- This is because Jan runs TensorRT-LLM using the [Nitro Server](https://github.com/janhq/nitro-tensorrt-llm/)
+- Jan makes network calls to the Nitro server running on your computer on a separate port
+
+:::
+
+### Configure Settings
+
+:::note
+coming soon
+:::
+
+## Troubleshooting
+
+## Extension Details
+
+Jan's TensorRT-LLM Extension is built on top of the open source [Nitro TensorRT-LLM Server](https://github.com/janhq/nitro-tensorrt-llm), a C++ inference server on top of TensorRT-LLM that provides an OpenAI-compatible API. 
+
+### Manual Build
+
+To manually build the artifacts needed to run the server and TensorRT-LLM, you can reference the source code. [Read here](https://github.com/janhq/nitro-tensorrt-llm?tab=readme-ov-file#quickstart).
+
+### Uninstall Extension
+
+1. Quit the app
+2. Go to Settings > Extensions
+3. Delete the entire Extensions folder.
+4. Reopen the app, only the default extensions should be restored.
+
+
+## Build your own TensorRT models
+
+:::info
+coming soon
+:::
diff --git a/docs/docs/integrations/tensorrt.md b/docs/docs/integrations/tensorrt.md
diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js
@@ -117,6 +117,10 @@ const config = {
             from: '/guides/using-extensions/',
             to: '/extensions/',
           },
+          {
+            from: '/integrations/tensorrt',
+            to: '/guides/providers/tensorrt-llm',
+          },
         ],
       },
     ],
@@ -404,11 +408,6 @@ const config = {
     },
   },
 
-  // Put your custom environment here
-  customFields: {
-    apiKeyBrevo: process.env.API_KEY_BREVO,
-  },
-
   themes: ['@docusaurus/theme-live-codeblock', '@docusaurus/theme-mermaid'],
 }
 

diff --git a/docs/src/containers/Footer/index.js b/docs/src/containers/Footer/index.js
@@ -1,4 +1,4 @@
-import React from 'react'
+import React, { useState } from 'react'
 
 import useDocusaurusContext from '@docusaurus/useDocusaurusContext'
 import { AiOutlineGithub, AiOutlineTwitter } from 'react-icons/ai'
@@ -130,15 +130,13 @@ export default function Footer() {
     siteConfig: { customFields },
   } = useDocusaurusContext()
 
+  const [formMessage, setFormMessage] = useState('')
+
   const onSubmit = (data) => {
     const { email } = data
     const options = {
       method: 'POST',
-      headers: {
-        'accept': 'application/json',
-        'content-type': 'application/json',
-        'api-key': customFields.apiKeyBrevo,
-      },
+
       body: JSON.stringify({
         updateEnabled: false,
         email,
@@ -147,12 +145,18 @@ export default function Footer() {
     }
 
     if (email) {
-      fetch('https://api.brevo.com/v3/contacts', options)
+      fetch('https://brevo.jan.ai/', options)
         .then((response) => response.json())
         .then((response) => {
           if (response.id) {
-            reset()
+            setFormMessage('You have successfully joined our newsletter')
+          } else {
+            setFormMessage(response.message)
           }
+          reset()
+          setTimeout(() => {
+            setFormMessage('')
+          }, 5000)
         })
         .catch((err) => console.error(err))
     }
@@ -203,6 +207,7 @@ export default function Footer() {
                     </svg>
                   </button>
                 </form>
+                {formMessage && <p className="text-left mt-4">{formMessage}</p>}
               </div>
             </div>
           </div>