Skip to content

Commit

Permalink
feat: nitro additional dependencies (janhq#2674)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan authored Apr 11, 2024
1 parent 8917be5 commit d93d74c
Show file tree
Hide file tree
Showing 17 changed files with 362 additions and 330 deletions.
9 changes: 1 addition & 8 deletions core/src/browser/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ export interface Compatibility {
const ALL_INSTALLATION_STATE = [
'NotRequired', // not required.
'Installed', // require and installed. Good to go.
'Updatable', // require and installed but need to be updated.
'NotInstalled', // require to be installed.
'Corrupted', // require but corrupted. Need to redownload.
'NotCompatible', // require but not compatible.
] as const

export type InstallationStateTuple = typeof ALL_INSTALLATION_STATE
Expand Down Expand Up @@ -98,13 +98,6 @@ export abstract class BaseExtension implements ExtensionType {
return undefined
}

/**
* Determine if the extension is updatable.
*/
updatable(): boolean {
return false
}

async registerSettings(settings: SettingComponentProps[]): Promise<void> {
if (!this.name) {
console.error('Extension name is not defined')
Expand Down
7 changes: 5 additions & 2 deletions extensions/inference-nitro-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@janhq/inference-nitro-extension",
"version": "1.0.0",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See nitro.jan.ai",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nUse this setting if you encounter errors related to **CUDA toolkit** during application execution.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <[email protected]>",
Expand Down Expand Up @@ -29,6 +29,7 @@
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-replace": "^5.0.5",
"@types/decompress": "^4.2.7",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.4",
"@types/os-utils": "^0.0.4",
Expand All @@ -47,6 +48,7 @@
},
"dependencies": {
"@janhq/core": "file:../../core",
"decompress": "^4.2.1",
"fetch-retry": "^5.0.6",
"path-browserify": "^1.0.1",
"rxjs": "^7.8.1",
Expand All @@ -65,6 +67,7 @@
"bundleDependencies": [
"tcp-port-used",
"fetch-retry",
"@janhq/core"
"@janhq/core",
"decompress"
]
}
3 changes: 3 additions & 0 deletions extensions/inference-nitro-extension/rollup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ export default [
JAN_SERVER_INFERENCE_URL: JSON.stringify(
'http://localhost:1337/v1/chat/completions'
),
CUDA_DOWNLOAD_URL: JSON.stringify(
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
),
}),
// Allow json resolution
json(),
Expand Down
92 changes: 92 additions & 0 deletions extensions/inference-nitro-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,19 @@ import {
Model,
ModelEvent,
LocalOAIEngine,
InstallationState,
systemInformation,
fs,
getJanDataFolderPath,
joinPath,
DownloadRequest,
baseName,
downloadFile,
DownloadState,
DownloadEvent,
} from '@janhq/core'

declare const CUDA_DOWNLOAD_URL: string
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
* The class provides methods for initializing and stopping a model, and for making inference requests.
Expand Down Expand Up @@ -61,6 +72,11 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const models = MODELS as unknown as Model[]
this.registerModels(models)
super.onLoad()

executeOnMain(NODE, 'addAdditionalDependencies', {
name: this.name,
version: this.version,
})
}

/**
Expand Down Expand Up @@ -96,4 +112,80 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
}
return super.unloadModel(model)
}

override async install(): Promise<void> {
const info = await systemInformation()

const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux'
const downloadUrl = CUDA_DOWNLOAD_URL

const url = downloadUrl
.replace('<version>', info.gpuSetting.cuda?.version ?? '12.4')
.replace('<platform>', platform)

console.debug('Downloading Cuda Toolkit Dependency: ', url)

const janDataFolderPath = await getJanDataFolderPath()

const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
this.name ?? 'nitro',
this.version ?? '1.0.0',
])

if (!(await fs.existsSync(executableFolderPath))) {
await fs.mkdir(executableFolderPath)
}

const tarball = await baseName(url)
const tarballFullPath = await joinPath([executableFolderPath, tarball])

const downloadRequest: DownloadRequest = {
url,
localPath: tarballFullPath,
extensionId: this.name,
downloadType: 'extension',
}
downloadFile(downloadRequest)

const onFileDownloadSuccess = async (state: DownloadState) => {
console.log(state)
// if other download, ignore
if (state.fileName !== tarball) return
events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
await executeOnMain(
NODE,
'decompressRunner',
tarballFullPath,
executableFolderPath
)
events.emit(DownloadEvent.onFileUnzipSuccess, state)
}
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
}

override async installationState(): Promise<InstallationState> {
const info = await systemInformation()
if (
info.gpuSetting.run_mode === 'gpu' &&
!info.gpuSetting.vulkan &&
info.osInfo &&
info.osInfo.platform !== 'darwin' &&
!info.gpuSetting.cuda?.exist
) {
const janDataFolderPath = await getJanDataFolderPath()

const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
this.name ?? 'nitro',
this.version ?? '1.0.0',
])

if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled'
return 'Installed'
}
return 'NotRequired'
}
}
25 changes: 25 additions & 0 deletions extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ import {
ModelSettingParams,
PromptTemplate,
SystemInformation,
getJanDataFolderPath,
} from '@janhq/core/node'
import { executableNitroFile } from './execute'
import terminate from 'terminate'
import decompress from 'decompress'

// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch)
Expand Down Expand Up @@ -420,9 +422,32 @@ const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
}
}

const addAdditionalDependencies = (data: { name: string; version: string }) => {
const additionalPath = path.delimiter.concat(
path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
)
// Set the updated PATH
process.env.PATH = (process.env.PATH || '').concat(additionalPath)
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
additionalPath
)
}

const decompressRunner = async (zipPath: string, output: string) => {
console.debug(`Decompressing ${zipPath} to ${output}...`)
try {
const files = await decompress(zipPath, output)
console.debug('Decompress finished!', files)
} catch (err) {
console.error(`Decompress ${zipPath} failed: ${err}`)
}
}

export default {
loadModel,
unloadModel,
dispose,
getCurrentNitroProcessInfo,
addAdditionalDependencies,
decompressRunner,
}
4 changes: 2 additions & 2 deletions extensions/model-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ export default class JanModelExtension extends ModelExtension {
}

if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
console.error(
`Your GPU: ${firstGpu} is not supported. Only 20xx, 30xx, 40xx series are supported.`
console.debug(
`Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
)
return
}
Expand Down
30 changes: 24 additions & 6 deletions extensions/monitoring-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ const updateGpuInfo = async () =>
process.platform === 'win32'
? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
: `${__dirname}/../bin/vulkaninfo --summary`,
(error, stdout) => {
async (error, stdout) => {
if (!error) {
const output = stdout.toString()

Expand All @@ -221,7 +221,7 @@ const updateGpuInfo = async () =>
data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
}

data = updateCudaExistence(data)
data = await updateCudaExistence(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
log(`[APP]::${JSON.stringify(data)}`)
resolve({})
Expand All @@ -233,7 +233,7 @@ const updateGpuInfo = async () =>
} else {
exec(
'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
(error, stdout) => {
async (error, stdout) => {
if (!error) {
log(`[SPECS]::${stdout}`)
// Get GPU info and gpu has higher memory first
Expand Down Expand Up @@ -264,7 +264,8 @@ const updateGpuInfo = async () =>
data.gpus_in_use = [data.gpu_highest_vram]
}

data = updateCudaExistence(data)
data = await updateCudaExistence(data)
console.log(data)
writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
log(`[APP]::${JSON.stringify(data)}`)
resolve({})
Expand All @@ -283,9 +284,9 @@ const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
/**
* Validate cuda for linux and windows
*/
const updateCudaExistence = (
const updateCudaExistence = async (
data: GpuSetting = DEFAULT_SETTINGS
): GpuSetting => {
): Promise<GpuSetting> => {
let filesCuda12: string[]
let filesCuda11: string[]
let paths: string[]
Expand Down Expand Up @@ -329,6 +330,23 @@ const updateCudaExistence = (
}

data.is_initial = false

// Attempt to query CUDA using NVIDIA SMI
if (!cudaExists) {
await new Promise<void>((resolve, reject) => {
exec('nvidia-smi', (error, stdout) => {
if (!error) {
const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
const match = regex.exec(stdout)
if (match && match[1]) {
data.cuda.version = match[1]
}
}
console.log(data)
resolve()
})
})
}
return data
}

Expand Down
Loading

0 comments on commit d93d74c

Please sign in to comment.