Skip to content

Commit

Permalink
chore: replace nitro by cortex-cpp (janhq#2912)
Browse files Browse the repository at this point in the history
  • Loading branch information
louis-jan authored May 16, 2024
1 parent 2182599 commit 537ef20
Show file tree
Hide file tree
Showing 16 changed files with 96 additions and 71 deletions.
6 changes: 3 additions & 3 deletions core/src/node/api/restful/helper/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`

// The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`

export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
26 changes: 15 additions & 11 deletions core/src/node/api/restful/helper/startStopModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
}

const spawnNitroProcess = async (): Promise<void> => {
log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
log(`[SERVER]::Debug: Spawning cortex subprocess...`)

let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'inference-cortex-extension',
'dist',
'bin'
)
Expand All @@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary
log(
`[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
`[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
)
subprocess = spawn(
executableOptions.executablePath,
Expand All @@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
})

subprocess.on('close', (code: any) => {
log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
log(`[SERVER]::Debug: cortex exited with code: ${code}`)
subprocess = undefined
})

tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
log(`[SERVER]::Debug: Nitro is ready`)
log(`[SERVER]::Debug: cortex is ready`)
})
}

Expand All @@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'inference-cortex-extension',
'dist',
'bin'
)

let cudaVisibleDevices = ''
let binaryName = 'nitro'
let binaryName = 'cortex-cpp'
/**
* The binary folder is different for each platform.
*/
Expand All @@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
binaryName = 'nitro.exe'
binaryName = 'cortex-cpp.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-universal both Silicon and InteL
*/
binaryFolder = join(binaryFolder, 'mac-universal')
if(process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-amd64')
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
Expand Down Expand Up @@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
retryDelay: 500,
})
.then((res: any) => {
log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
return Promise.resolve(res)
})
.catch((err: any) => {
Expand All @@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
})
}, 5000)
const tcpPortUsed = require('tcp-port-used')
log(`[SERVER]::Debug: Request to kill Nitro`)
log(`[SERVER]::Debug: Request to kill cortex`)

fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE',
Expand Down
2 changes: 1 addition & 1 deletion core/src/node/helper/resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { log } from './logger'

export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount()
log(`[NITRO]::CPU information - ${cpu}`)
log(`[CORTEX]::CPU information - ${cpu}`)

return {
numCpuPhysicalCore: cpu,
Expand Down
3 changes: 2 additions & 1 deletion extensions/assistant-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
}
export async function toolRetrievalIngestNewDocument(
file: string,
model: string,
engine: string
) {
const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
const threadPath = path.dirname(filePath.replace('files', ''))
retrieval.updateEmbeddingEngine(engine)
retrieval.updateEmbeddingEngine(model, engine)
return retrieval
.ingestAgentKnowledge(filePath, `${threadPath}/memory`)
.catch((err) => {
Expand Down
6 changes: 3 additions & 3 deletions extensions/assistant-extension/src/node/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ export class Retrieval {
})
}

public updateEmbeddingEngine(engine: string): void {
public updateEmbeddingEngine(model: string, engine: string): void {
// Engine settings are not compatible with the current embedding model params
// Switch case manually for now
if (engine === 'nitro') {
this.embeddingModel = new OpenAIEmbeddings(
{ openAIApiKey: 'nitro-embedding' },
{ openAIApiKey: 'nitro-embedding', model },
// TODO: Raw settings
{ basePath: 'http://127.0.0.1:3928/v1' }
{ basePath: 'http://127.0.0.1:3928/v1' },
)
} else {
// Fallback to OpenAI Settings
Expand Down
1 change: 1 addition & 0 deletions extensions/assistant-extension/src/tools/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
NODE,
'toolRetrievalIngestNewDocument',
docFile,
data.model?.id,
data.model?.engine
)
} else {
Expand Down
2 changes: 2 additions & 0 deletions extensions/inference-nitro-extension/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bin
!version.txt
2 changes: 1 addition & 1 deletion extensions/inference-nitro-extension/bin/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.22
0.4.4
4 changes: 2 additions & 2 deletions extensions/inference-nitro-extension/download.bat
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
@echo off
set /p NITRO_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
set /p CORTEX_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
10 changes: 5 additions & 5 deletions extensions/inference-nitro-extension/package.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
{
"name": "@janhq/inference-nitro-extension",
"productName": "Nitro Inference Engine",
"name": "@janhq/inference-cortex-extension",
"productName": "Cortex Inference Engine",
"version": "1.0.7",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <[email protected]>",
"license": "AGPL-3.0",
"scripts": {
"test": "jest",
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
"downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
"downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
"downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os",
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
Expand Down
2 changes: 1 addition & 1 deletion extensions/inference-nitro-extension/rollup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ export default [
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL ||
'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
'http://127.0.0.1:3928/inferences/server/chat_completion'
),
TROUBLESHOOTING_URL: JSON.stringify(
'https://jan.ai/guides/troubleshooting'
Expand Down
4 changes: 2 additions & 2 deletions extensions/inference-nitro-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
this.name ?? 'nitro',
this.name ?? 'cortex-cpp',
this.version ?? '1.0.0',
])

Expand Down Expand Up @@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
const executableFolderPath = await joinPath([
janDataFolderPath,
'engines',
this.name ?? 'nitro',
this.name ?? 'cortex-cpp',
this.version ?? '1.0.0',
])

Expand Down
27 changes: 20 additions & 7 deletions extensions/inference-nitro-extension/src/node/execute.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
Object.defineProperty(process, 'platform', {
value: 'darwin',
})
Object.defineProperty(process, 'arch', {
value: 'arm64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
)
Object.defineProperty(process, 'arch', {
value: 'amd64',
})
expect(executableNitroFile(testSettings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
Expand All @@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
Expand Down Expand Up @@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
Expand Down Expand Up @@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
Expand All @@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
cudaVisibleDevices: '',
vkVisibleDevices: '',
})
Expand Down Expand Up @@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
Expand Down Expand Up @@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
}
expect(executableNitroFile(settings)).toEqual(
expect.objectContaining({
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
cudaVisibleDevices: '0',
vkVisibleDevices: '0',
})
Expand Down
6 changes: 3 additions & 3 deletions extensions/inference-nitro-extension/src/node/execute.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { GpuSetting, SystemInformation } from '@janhq/core'
import { GpuSetting } from '@janhq/core'
import * as path from 'path'

export interface NitroExecutableOptions {
Expand All @@ -24,7 +24,7 @@ const os = (): string => {
return process.platform === 'win32'
? 'win'
: process.platform === 'darwin'
? 'mac-universal'
? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
: 'linux'
}

Expand Down Expand Up @@ -52,7 +52,7 @@ export const executableNitroFile = (
.join('-')
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
let binaryName = `nitro${extension()}`
let binaryName = `cortex-cpp${extension()}`

return {
executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),
Expand Down
Loading

0 comments on commit 537ef20

Please sign in to comment.