From 4116aaa98a390b35fb6cde7d02de76c52e2f04b0 Mon Sep 17 00:00:00 2001 From: NamH Date: Thu, 1 Feb 2024 11:25:34 +0700 Subject: [PATCH] feat: add start/stop model via http api (#1862) Signed-off-by: nam --- core/src/node/api/common/builder.ts | 17 +- core/src/node/api/common/consts.ts | 19 + core/src/node/api/common/startStopModel.ts | 351 ++++++++++++++++++ core/src/node/api/routes/common.ts | 22 +- core/src/node/utils/index.ts | 148 ++++++-- core/src/types/index.ts | 1 + core/src/types/miscellaneous/index.ts | 2 + .../src/types/miscellaneous/promptTemplate.ts | 6 + .../types/miscellaneous/systemResourceInfo.ts | 4 + core/src/types/model/modelEntity.ts | 1 + .../src/@types/global.d.ts | 21 -- .../inference-nitro-extension/src/index.ts | 3 +- .../src/node/index.ts | 64 ++-- .../src/node/utils.ts | 56 --- server/package.json | 3 + web/hooks/useCreateNewThread.ts | 5 +- web/hooks/useSetActiveThread.ts | 2 - web/screens/LocalServer/index.tsx | 24 +- .../FactoryReset/ModalConfirmReset.tsx | 4 +- 19 files changed, 559 insertions(+), 194 deletions(-) create mode 100644 core/src/node/api/common/consts.ts create mode 100644 core/src/node/api/common/startStopModel.ts create mode 100644 core/src/types/miscellaneous/index.ts create mode 100644 core/src/types/miscellaneous/promptTemplate.ts create mode 100644 core/src/types/miscellaneous/systemResourceInfo.ts delete mode 100644 extensions/inference-nitro-extension/src/node/utils.ts diff --git a/core/src/node/api/common/builder.ts b/core/src/node/api/common/builder.ts index a9819bce68..5c99cf4d8e 100644 --- a/core/src/node/api/common/builder.ts +++ b/core/src/node/api/common/builder.ts @@ -2,7 +2,8 @@ import fs from 'fs' import { JanApiRouteConfiguration, RouteConfiguration } from './configuration' import { join } from 'path' import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index' -import { getJanDataFolderPath } from '../../utils' +import { getEngineConfiguration, getJanDataFolderPath } from '../../utils' +import { DEFAULT_CHAT_COMPLETION_URL } from './consts' export const getBuilder = async (configuration: RouteConfiguration) => { const directoryPath = join(getJanDataFolderPath(), configuration.dirName) @@ -309,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => { const engineConfiguration = await getEngineConfiguration(requestedModel.engine) let apiKey: string | undefined = undefined - let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url + let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL if (engineConfiguration) { apiKey = engineConfiguration.api_key @@ -320,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', - "Access-Control-Allow-Origin": "*" + 'Access-Control-Allow-Origin': '*', }) const headers: Record = { @@ -346,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => { response.body.pipe(reply.raw) } } - -const getEngineConfiguration = async (engineId: string) => { - if (engineId !== 'openai') { - return undefined - } - const directoryPath = join(getJanDataFolderPath(), 'engines') - const filePath = join(directoryPath, `${engineId}.json`) - const data = await fs.readFileSync(filePath, 'utf-8') - return JSON.parse(data) -} diff --git a/core/src/node/api/common/consts.ts b/core/src/node/api/common/consts.ts new file mode 100644 index 0000000000..bc3cfe3001 --- /dev/null +++ b/core/src/node/api/common/consts.ts @@ -0,0 +1,19 @@ +// The PORT to use for the Nitro subprocess +export const NITRO_DEFAULT_PORT = 3928 + +// The HOST address to use for the Nitro subprocess +export const LOCAL_HOST = '127.0.0.1' + +export const SUPPORTED_MODEL_FORMAT = '.gguf' + +// The URL for the Nitro subprocess +const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` +// The URL for the Nitro subprocess to load a model +export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel` +// The URL for the Nitro subprocess to validate a model +export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus` + +// The URL for the Nitro subprocess to kill itself +export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` + +export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url diff --git a/core/src/node/api/common/startStopModel.ts b/core/src/node/api/common/startStopModel.ts new file mode 100644 index 0000000000..7665883804 --- /dev/null +++ b/core/src/node/api/common/startStopModel.ts @@ -0,0 +1,351 @@ +import fs from 'fs' +import { join } from 'path' +import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils' +import { logServer } from '../../log' +import { ChildProcessWithoutNullStreams, spawn } from 'child_process' +import { Model, ModelSettingParams, PromptTemplate } from '../../../types' +import { + LOCAL_HOST, + NITRO_DEFAULT_PORT, + NITRO_HTTP_KILL_URL, + NITRO_HTTP_LOAD_MODEL_URL, + NITRO_HTTP_VALIDATE_MODEL_URL, + SUPPORTED_MODEL_FORMAT, +} from './consts' + +// The subprocess instance for Nitro +let subprocess: ChildProcessWithoutNullStreams | undefined = undefined + +// TODO: move this to core type +interface NitroModelSettings extends ModelSettingParams { + llama_model_path: string + cpu_threads: number +} + +export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => { + try { + await runModel(modelId, settingParams) + + return { + message: `Model ${modelId} started`, + } + } catch (e) { + return { + error: e, + } + } +} + +const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise => { + const janDataFolderPath = getJanDataFolderPath() + const modelFolderFullPath = join(janDataFolderPath, 'models', modelId) + + if (!fs.existsSync(modelFolderFullPath)) { + throw `Model not found: ${modelId}` + } + + const files: string[] = fs.readdirSync(modelFolderFullPath) + + // Look for GGUF model file + const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)) + + const modelMetadataPath = join(modelFolderFullPath, 'model.json') + const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8')) + + if (!ggufBinFile) { + throw 'No GGUF model file found' + } + const modelBinaryPath = join(modelFolderFullPath, ggufBinFile) + + const nitroResourceProbe = await getSystemResourceInfo() + const nitroModelSettings: NitroModelSettings = { + ...modelMetadata.settings, + ...settingParams, + llama_model_path: modelBinaryPath, + // This is critical and requires real CPU physical core count (or performance core) + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), + ...(modelMetadata.settings.mmproj && { + mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj), + }), + } + + logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`) + + // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt + if (modelMetadata.settings.prompt_template) { + const promptTemplate = modelMetadata.settings.prompt_template + const prompt = promptTemplateConverter(promptTemplate) + if (prompt?.error) { + return Promise.reject(prompt.error) + } + nitroModelSettings.system_prompt = prompt.system_prompt + nitroModelSettings.user_prompt = prompt.user_prompt + nitroModelSettings.ai_prompt = prompt.ai_prompt + } + + await runNitroAndLoadModel(modelId, nitroModelSettings) +} + +// TODO: move to util +const promptTemplateConverter = (promptTemplate: string): PromptTemplate => { + // Split the string using the markers + const systemMarker = '{system_message}' + const promptMarker = '{prompt}' + + if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) { + // Find the indices of the markers + const systemIndex = promptTemplate.indexOf(systemMarker) + const promptIndex = promptTemplate.indexOf(promptMarker) + + // Extract the parts of the string + const system_prompt = promptTemplate.substring(0, systemIndex) + const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex) + const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length) + + // Return the split parts + return { system_prompt, user_prompt, ai_prompt } + } else if (promptTemplate.includes(promptMarker)) { + // Extract the parts of the string for the case where only promptMarker is present + const promptIndex = promptTemplate.indexOf(promptMarker) + const user_prompt = promptTemplate.substring(0, promptIndex) + const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length) + + // Return the split parts + return { user_prompt, ai_prompt } + } + + // Return an error if none of the conditions are met + return { error: 'Cannot split prompt template' } +} + +const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => { + // Gather system information for CPU physical cores and memory + const tcpPortUsed = require('tcp-port-used') + + await stopModel(modelId) + await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000) + + /** + * There is a problem with Windows process manager + * Should wait for awhile to make sure the port is free and subprocess is killed + * The tested threshold is 500ms + **/ + if (process.platform === 'win32') { + await new Promise((resolve) => setTimeout(resolve, 500)) + } + + await spawnNitroProcess() + await loadLLMModel(modelSettings) + await validateModelStatus() +} + +const spawnNitroProcess = async (): Promise => { + logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`) + + let binaryFolder = join( + getJanExtensionsPath(), + '@janhq', + 'inference-nitro-extension', + 'dist', + 'bin' + ) + + let executableOptions = executableNitroFile() + const tcpPortUsed = require('tcp-port-used') + + const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()] + // Execute the binary + logServer( + `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` + ) + subprocess = spawn( + executableOptions.executablePath, + ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()], + { + cwd: binaryFolder, + env: { + ...process.env, + CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, + }, + } + ) + + // Handle subprocess output + subprocess.stdout.on('data', (data: any) => { + logServer(`[NITRO]::Debug: ${data}`) + }) + + subprocess.stderr.on('data', (data: any) => { + logServer(`[NITRO]::Error: ${data}`) + }) + + subprocess.on('close', (code: any) => { + logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`) + subprocess = undefined + }) + + tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => { + logServer(`[NITRO]::Debug: Nitro is ready`) + }) +} + +type NitroExecutableOptions = { + executablePath: string + cudaVisibleDevices: string +} + +const executableNitroFile = (): NitroExecutableOptions => { + const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json') + let binaryFolder = join( + getJanExtensionsPath(), + '@janhq', + 'inference-nitro-extension', + 'dist', + 'bin' + ) + + let cudaVisibleDevices = '' + let binaryName = 'nitro' + /** + * The binary folder is different for each platform. + */ + if (process.platform === 'win32') { + /** + * For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0 + */ + let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8')) + if (nvidiaInfo['run_mode'] === 'cpu') { + binaryFolder = join(binaryFolder, 'win-cpu') + } else { + if (nvidiaInfo['cuda'].version === '12') { + binaryFolder = join(binaryFolder, 'win-cuda-12-0') + } else { + binaryFolder = join(binaryFolder, 'win-cuda-11-7') + } + cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] + } + binaryName = 'nitro.exe' + } else if (process.platform === 'darwin') { + /** + * For MacOS: mac-arm64 (Silicon), mac-x64 (InteL) + */ + if (process.arch === 'arm64') { + binaryFolder = join(binaryFolder, 'mac-arm64') + } else { + binaryFolder = join(binaryFolder, 'mac-x64') + } + } else { + /** + * For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0 + */ + let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8')) + if (nvidiaInfo['run_mode'] === 'cpu') { + binaryFolder = join(binaryFolder, 'linux-cpu') + } else { + if (nvidiaInfo['cuda'].version === '12') { + binaryFolder = join(binaryFolder, 'linux-cuda-12-0') + } else { + binaryFolder = join(binaryFolder, 'linux-cuda-11-7') + } + cudaVisibleDevices = nvidiaInfo['gpu_highest_vram'] + } + } + + return { + executablePath: join(binaryFolder, binaryName), + cudaVisibleDevices, + } +} + +const validateModelStatus = async (): Promise => { + // Send a GET request to the validation URL. + // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. + const fetchRT = require('fetch-retry') + const fetchRetry = fetchRT(fetch) + + return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + retries: 5, + retryDelay: 500, + }).then(async (res: Response) => { + logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`) + // If the response is OK, check model_loaded status. + if (res.ok) { + const body = await res.json() + // If the model is loaded, return an empty object. + // Otherwise, return an object with an error message. + if (body.model_loaded) { + return Promise.resolve() + } + } + return Promise.reject('Validate model status failed') + }) +} + +const loadLLMModel = async (settings: NitroModelSettings): Promise => { + logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`) + const fetchRT = require('fetch-retry') + const fetchRetry = fetchRT(fetch) + + return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(settings), + retries: 3, + retryDelay: 500, + }) + .then((res: any) => { + logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`) + return Promise.resolve(res) + }) + .catch((err: any) => { + logServer(`[NITRO]::Error: Load model failed with error ${err}`) + return Promise.reject() + }) +} + +/** + * Stop model and kill nitro process. + */ +export const stopModel = async (_modelId: string) => { + if (!subprocess) { + return { + error: "Model isn't running", + } + } + return new Promise((resolve, reject) => { + const controller = new AbortController() + setTimeout(() => { + controller.abort() + reject({ + error: 'Failed to stop model: Timedout', + }) + }, 5000) + const tcpPortUsed = require('tcp-port-used') + logServer(`[NITRO]::Debug: Request to kill Nitro`) + + fetch(NITRO_HTTP_KILL_URL, { + method: 'DELETE', + signal: controller.signal, + }) + .then(() => { + subprocess?.kill() + subprocess = undefined + }) + .catch(() => { + // don't need to do anything, we still kill the subprocess + }) + .then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)) + .then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`)) + .then(() => + resolve({ + message: 'Model stopped', + }) + ) + }) +} diff --git a/core/src/node/api/routes/common.ts b/core/src/node/api/routes/common.ts index a6c65a382c..27385e5619 100644 --- a/core/src/node/api/routes/common.ts +++ b/core/src/node/api/routes/common.ts @@ -10,6 +10,8 @@ import { } from '../common/builder' import { JanApiRouteConfiguration } from '../common/configuration' +import { startModel, stopModel } from '../common/startStopModel' +import { ModelSettingParams } from '../../../types' export const commonRouter = async (app: HttpServer) => { // Common Routes @@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => { app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key])) app.get(`/${key}/:id`, async (request: any) => - retrieveBuilder(JanApiRouteConfiguration[key], request.params.id), + retrieveBuilder(JanApiRouteConfiguration[key], request.params.id) ) app.delete(`/${key}/:id`, async (request: any) => - deleteBuilder(JanApiRouteConfiguration[key], request.params.id), + deleteBuilder(JanApiRouteConfiguration[key], request.params.id) ) }) // Download Model Routes app.get(`/models/download/:modelId`, async (request: any) => - downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }), + downloadModel(request.params.modelId, { + ignoreSSL: request.query.ignoreSSL === 'true', + proxy: request.query.proxy, + }) ) + app.put(`/models/:modelId/start`, async (request: any) => { + let settingParams: ModelSettingParams | undefined = undefined + if (Object.keys(request.body).length !== 0) { + settingParams = JSON.parse(request.body) as ModelSettingParams + } + + return startModel(request.params.modelId, settingParams) + }) + + app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId)) + // Chat Completion Routes app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply)) diff --git a/core/src/node/utils/index.ts b/core/src/node/utils/index.ts index 00db04c9bd..4bcbf13b17 100644 --- a/core/src/node/utils/index.ts +++ b/core/src/node/utils/index.ts @@ -1,16 +1,18 @@ -import { AppConfiguration } from "../../types"; -import { join } from "path"; -import fs from "fs"; -import os from "os"; +import { AppConfiguration, SystemResourceInfo } from '../../types' +import { join } from 'path' +import fs from 'fs' +import os from 'os' +import { log, logServer } from '../log' +import childProcess from 'child_process' // TODO: move this to core -const configurationFileName = "settings.json"; +const configurationFileName = 'settings.json' // TODO: do no specify app name in framework module -const defaultJanDataFolder = join(os.homedir(), "jan"); +const defaultJanDataFolder = join(os.homedir(), 'jan') const defaultAppConfig: AppConfiguration = { data_folder: defaultJanDataFolder, -}; +} /** * Getting App Configurations. @@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = { export const getAppConfigurations = (): AppConfiguration => { // Retrieve Application Support folder path // Fallback to user home directory if not found - const configurationFile = getConfigurationFilePath(); + const configurationFile = getConfigurationFilePath() if (!fs.existsSync(configurationFile)) { // create default app config if we don't have one - console.debug(`App config not found, creating default config at ${configurationFile}`); - fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig)); - return defaultAppConfig; + console.debug(`App config not found, creating default config at ${configurationFile}`) + fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig)) + return defaultAppConfig } try { const appConfigurations: AppConfiguration = JSON.parse( - fs.readFileSync(configurationFile, "utf-8"), - ); - return appConfigurations; + fs.readFileSync(configurationFile, 'utf-8') + ) + return appConfigurations } catch (err) { - console.error(`Failed to read app config, return default config instead! Err: ${err}`); - return defaultAppConfig; + console.error(`Failed to read app config, return default config instead! Err: ${err}`) + return defaultAppConfig } -}; +} const getConfigurationFilePath = () => join( - global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"], - configurationFileName, - ); + global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'], + configurationFileName + ) export const updateAppConfiguration = (configuration: AppConfiguration): Promise => { - const configurationFile = getConfigurationFilePath(); - console.debug("updateAppConfiguration, configurationFile: ", configurationFile); + const configurationFile = getConfigurationFilePath() + console.debug('updateAppConfiguration, configurationFile: ', configurationFile) - fs.writeFileSync(configurationFile, JSON.stringify(configuration)); - return Promise.resolve(); -}; + fs.writeFileSync(configurationFile, JSON.stringify(configuration)) + return Promise.resolve() +} /** * Utility function to get server log path @@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise * @returns {string} The log path. */ export const getServerLogPath = (): string => { - const appConfigurations = getAppConfigurations(); - const logFolderPath = join(appConfigurations.data_folder, "logs"); + const appConfigurations = getAppConfigurations() + const logFolderPath = join(appConfigurations.data_folder, 'logs') if (!fs.existsSync(logFolderPath)) { - fs.mkdirSync(logFolderPath, { recursive: true }); + fs.mkdirSync(logFolderPath, { recursive: true }) } - return join(logFolderPath, "server.log"); -}; + return join(logFolderPath, 'server.log') +} /** * Utility function to get app log path @@ -74,13 +76,13 @@ export const getServerLogPath = (): string => { * @returns {string} The log path. */ export const getAppLogPath = (): string => { - const appConfigurations = getAppConfigurations(); - const logFolderPath = join(appConfigurations.data_folder, "logs"); + const appConfigurations = getAppConfigurations() + const logFolderPath = join(appConfigurations.data_folder, 'logs') if (!fs.existsSync(logFolderPath)) { - fs.mkdirSync(logFolderPath, { recursive: true }); + fs.mkdirSync(logFolderPath, { recursive: true }) } - return join(logFolderPath, "app.log"); -}; + return join(logFolderPath, 'app.log') +} /** * Utility function to get data folder path @@ -88,9 +90,9 @@ export const getAppLogPath = (): string => { * @returns {string} The data folder path. */ export const getJanDataFolderPath = (): string => { - const appConfigurations = getAppConfigurations(); - return appConfigurations.data_folder; -}; + const appConfigurations = getAppConfigurations() + return appConfigurations.data_folder +} /** * Utility function to get extension path @@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => { * @returns {string} The extensions path. */ export const getJanExtensionsPath = (): string => { - const appConfigurations = getAppConfigurations(); - return join(appConfigurations.data_folder, "extensions"); -}; + const appConfigurations = getAppConfigurations() + return join(appConfigurations.data_folder, 'extensions') +} + +/** + * Utility function to physical cpu count + * + * @returns {number} The physical cpu count. + */ +export const physicalCpuCount = async (): Promise => { + const platform = os.platform() + if (platform === 'linux') { + const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') + return parseInt(output.trim(), 10) + } else if (platform === 'darwin') { + const output = await exec('sysctl -n hw.physicalcpu_max') + return parseInt(output.trim(), 10) + } else if (platform === 'win32') { + const output = await exec('WMIC CPU Get NumberOfCores') + return output + .split(os.EOL) + .map((line: string) => parseInt(line)) + .filter((value: number) => !isNaN(value)) + .reduce((sum: number, number: number) => sum + number, 1) + } else { + const cores = os.cpus().filter((cpu: any, index: number) => { + const hasHyperthreading = cpu.model.includes('Intel') + const isOdd = index % 2 === 1 + return !hasHyperthreading || isOdd + }) + return cores.length + } +} + +const exec = async (command: string): Promise => { + return new Promise((resolve, reject) => { + childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => { + if (error) { + reject(error) + } else { + resolve(stdout) + } + }) + }) +} + +export const getSystemResourceInfo = async (): Promise => { + const cpu = await physicalCpuCount() + const message = `[NITRO]::CPU informations - ${cpu}` + log(message) + logServer(message) + + return { + numCpuPhysicalCore: cpu, + memAvailable: 0, // TODO: this should not be 0 + } +} + +export const getEngineConfiguration = async (engineId: string) => { + if (engineId !== 'openai') { + return undefined + } + const directoryPath = join(getJanDataFolderPath(), 'engines') + const filePath = join(directoryPath, `${engineId}.json`) + const data = fs.readFileSync(filePath, 'utf-8') + return JSON.parse(data) +} diff --git a/core/src/types/index.ts b/core/src/types/index.ts index 3bdcb5421b..ee6f4ef08b 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -6,3 +6,4 @@ export * from './inference' export * from './monitoring' export * from './file' export * from './config' +export * from './miscellaneous' diff --git a/core/src/types/miscellaneous/index.ts b/core/src/types/miscellaneous/index.ts new file mode 100644 index 0000000000..02c973323f --- /dev/null +++ b/core/src/types/miscellaneous/index.ts @@ -0,0 +1,2 @@ +export * from './systemResourceInfo' +export * from './promptTemplate' diff --git a/core/src/types/miscellaneous/promptTemplate.ts b/core/src/types/miscellaneous/promptTemplate.ts new file mode 100644 index 0000000000..a6743c67cd --- /dev/null +++ b/core/src/types/miscellaneous/promptTemplate.ts @@ -0,0 +1,6 @@ +export type PromptTemplate = { + system_prompt?: string + ai_prompt?: string + user_prompt?: string + error?: string +} diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts new file mode 100644 index 0000000000..1472cda474 --- /dev/null +++ b/core/src/types/miscellaneous/systemResourceInfo.ts @@ -0,0 +1,4 @@ +export type SystemResourceInfo = { + numCpuPhysicalCore: number + memAvailable: number +} diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts index 727ff085fc..644c34dfb1 100644 --- a/core/src/types/model/modelEntity.ts +++ b/core/src/types/model/modelEntity.ts @@ -123,6 +123,7 @@ export type ModelSettingParams = { user_prompt?: string llama_model_path?: string mmproj?: string + cont_batching?: boolean } /** diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts index 5fb41f0f8a..bc126337f6 100644 --- a/extensions/inference-nitro-extension/src/@types/global.d.ts +++ b/extensions/inference-nitro-extension/src/@types/global.d.ts @@ -2,22 +2,6 @@ declare const NODE: string; declare const INFERENCE_URL: string; declare const TROUBLESHOOTING_URL: string; -/** - * The parameters for the initModel function. - * @property settings - The settings for the machine learning model. - * @property settings.ctx_len - The context length. - * @property settings.ngl - The number of generated tokens. - * @property settings.cont_batching - Whether to use continuous batching. - * @property settings.embedding - Whether to use embedding. - */ -interface EngineSettings { - ctx_len: number; - ngl: number; - cpu_threads: number; - cont_batching: boolean; - embedding: boolean; -} - /** * The response from the initModel function. * @property error - An error message if the model fails to load. @@ -26,8 +10,3 @@ interface ModelOperationResponse { error?: any; modelFile?: string; } - -interface ResourcesInfo { - numCpuPhysicalCore: number; - memAvailable: number; -} \ No newline at end of file diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index 0e6edb992a..aaa230ca34 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -24,6 +24,7 @@ import { MessageEvent, ModelEvent, InferenceEvent, + ModelSettingParams, } from "@janhq/core"; import { requestInference } from "./helpers/sse"; import { ulid } from "ulid"; @@ -45,7 +46,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { private _currentModel: Model | undefined; - private _engineSettings: EngineSettings = { + private _engineSettings: ModelSettingParams = { ctx_len: 2048, ngl: 100, cpu_threads: 1, diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 77060e4140..443e686e80 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -3,11 +3,19 @@ import path from "path"; import { ChildProcessWithoutNullStreams, spawn } from "child_process"; import tcpPortUsed from "tcp-port-used"; import fetchRT from "fetch-retry"; -import { log, getJanDataFolderPath } from "@janhq/core/node"; +import { + log, + getJanDataFolderPath, + getSystemResourceInfo, +} from "@janhq/core/node"; import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia"; -import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core"; +import { + Model, + InferenceEngine, + ModelSettingParams, + PromptTemplate, +} from "@janhq/core"; import { executableNitroFile } from "./execute"; -import { physicalCpuCount } from "./utils"; // Polyfill fetch with retry const fetchRetry = fetchRT(fetch); @@ -20,16 +28,6 @@ interface ModelInitOptions { model: Model; } -/** - * The response object of Prompt Template parsing. - */ -interface PromptTemplate { - system_prompt?: string; - ai_prompt?: string; - user_prompt?: string; - error?: string; -} - /** * Model setting args for Nitro model load. */ @@ -78,7 +76,7 @@ function stopModel(): Promise { * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package */ async function runModel( - wrapper: ModelInitOptions, + wrapper: ModelInitOptions ): Promise { if (wrapper.model.engine !== InferenceEngine.nitro) { // Not a nitro model @@ -96,7 +94,7 @@ async function runModel( const ggufBinFile = files.find( (file) => file === path.basename(currentModelFile) || - file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT), + file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ); if (!ggufBinFile) return Promise.reject("No GGUF model file found"); @@ -106,7 +104,7 @@ async function runModel( if (wrapper.model.engine !== InferenceEngine.nitro) { return Promise.reject("Not a nitro model"); } else { - const nitroResourceProbe = await getResourcesInfo(); + const nitroResourceProbe = await getSystemResourceInfo(); // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt if (wrapper.model.settings.prompt_template) { const promptTemplate = wrapper.model.settings.prompt_template; @@ -191,10 +189,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { const system_prompt = promptTemplate.substring(0, systemIndex); const user_prompt = promptTemplate.substring( systemIndex + systemMarker.length, - promptIndex, + promptIndex ); const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length, + promptIndex + promptMarker.length ); // Return the split parts @@ -204,7 +202,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate { const promptIndex = promptTemplate.indexOf(promptMarker); const user_prompt = promptTemplate.substring(0, promptIndex); const ai_prompt = promptTemplate.substring( - promptIndex + promptMarker.length, + promptIndex + promptMarker.length ); // Return the split parts @@ -233,8 +231,8 @@ function loadLLMModel(settings: any): Promise { .then((res) => { log( `[NITRO]::Debug: Load model success with response ${JSON.stringify( - res, - )}`, + res + )}` ); return Promise.resolve(res); }) @@ -263,8 +261,8 @@ async function validateModelStatus(): Promise { }).then(async (res: Response) => { log( `[NITRO]::Debug: Validate model state success with response ${JSON.stringify( - res, - )}`, + res + )}` ); // If the response is OK, check model_loaded status. if (res.ok) { @@ -315,7 +313,7 @@ function spawnNitroProcess(): Promise { const args: string[] = ["1", LOCAL_HOST, PORT.toString()]; // Execute the binary log( - `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`, + `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}` ); subprocess = spawn( executableOptions.executablePath, @@ -326,7 +324,7 @@ function spawnNitroProcess(): Promise { ...process.env, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, }, - }, + } ); // Handle subprocess output @@ -351,22 +349,6 @@ function spawnNitroProcess(): Promise { }); } -/** - * Get the system resources information - * TODO: Move to Core so that it can be reused - */ -function getResourcesInfo(): Promise { - return new Promise(async (resolve) => { - const cpu = await physicalCpuCount(); - log(`[NITRO]::CPU informations - ${cpu}`); - const response: ResourcesInfo = { - numCpuPhysicalCore: cpu, - memAvailable: 0, - }; - resolve(response); - }); -} - /** * Every module should have a dispose function * This will be called when the extension is unloaded and should clean up any resources diff --git a/extensions/inference-nitro-extension/src/node/utils.ts b/extensions/inference-nitro-extension/src/node/utils.ts deleted file mode 100644 index c7ef2e9a65..0000000000 --- a/extensions/inference-nitro-extension/src/node/utils.ts +++ /dev/null @@ -1,56 +0,0 @@ -import os from "os"; -import childProcess from "child_process"; - -function exec(command: string): Promise { - return new Promise((resolve, reject) => { - childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => { - if (error) { - reject(error); - } else { - resolve(stdout); - } - }); - }); -} - -let amount: number; -const platform = os.platform(); - -export async function physicalCpuCount(): Promise { - return new Promise((resolve, reject) => { - if (platform === "linux") { - exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') - .then((output) => { - amount = parseInt(output.trim(), 10); - resolve(amount); - }) - .catch(reject); - } else if (platform === "darwin") { - exec("sysctl -n hw.physicalcpu_max") - .then((output) => { - amount = parseInt(output.trim(), 10); - resolve(amount); - }) - .catch(reject); - } else if (platform === "win32") { - exec("WMIC CPU Get NumberOfCores") - .then((output) => { - amount = output - .split(os.EOL) - .map((line: string) => parseInt(line)) - .filter((value: number) => !isNaN(value)) - .reduce((sum: number, number: number) => sum + number, 1); - resolve(amount); - }) - .catch(reject); - } else { - const cores = os.cpus().filter((cpu: any, index: number) => { - const hasHyperthreading = cpu.model.includes("Intel"); - const isOdd = index % 2 === 1; - return !hasHyperthreading || isOdd; - }); - amount = cores.length; - resolve(amount); - } - }); -} diff --git a/server/package.json b/server/package.json index 9495a0d657..f61730da4a 100644 --- a/server/package.json +++ b/server/package.json @@ -26,6 +26,8 @@ "dotenv": "^16.3.1", "fastify": "^4.24.3", "request": "^2.88.2", + "fetch-retry": "^5.0.6", + "tcp-port-used": "^1.0.2", "request-progress": "^3.0.0" }, "devDependencies": { @@ -35,6 +37,7 @@ "@typescript-eslint/parser": "^6.7.3", "eslint-plugin-react": "^7.33.2", "run-script-os": "^1.1.6", + "@types/tcp-port-used": "^1.0.4", "typescript": "^5.2.2" } } diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index d9451a46c3..aad42aba95 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -7,7 +7,7 @@ import { ThreadState, Model, } from '@janhq/core' -import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' +import { atom, useAtomValue, useSetAtom } from 'jotai' import { fileUploadAtom } from '@/containers/Providers/Jotai' @@ -48,7 +48,8 @@ export const useCreateNewThread = () => { const createNewThread = useSetAtom(createNewThreadAtom) const setActiveThreadId = useSetAtom(setActiveThreadIdAtom) const updateThread = useSetAtom(updateThreadAtom) - const [fileUpload, setFileUpload] = useAtom(fileUploadAtom) + + const setFileUpload = useSetAtom(fileUploadAtom) const { deleteThread } = useDeleteThread() const requestCreateNewThread = async ( diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts index 035f0551a6..76a744bcd3 100644 --- a/web/hooks/useSetActiveThread.ts +++ b/web/hooks/useSetActiveThread.ts @@ -1,5 +1,3 @@ -import { useEffect } from 'react' - import { InferenceEvent, ExtensionTypeEnum, diff --git a/web/screens/LocalServer/index.tsx b/web/screens/LocalServer/index.tsx index 7e1ba1fab3..e7f3c7fc20 100644 --- a/web/screens/LocalServer/index.tsx +++ b/web/screens/LocalServer/index.tsx @@ -1,7 +1,6 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ 'use client' -import React, { useEffect, useState } from 'react' +import React, { useCallback, useEffect, useState } from 'react' import ScrollToBottom from 'react-scroll-to-bottom' @@ -81,14 +80,17 @@ const LocalServerScreen = () => { const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] = useState(false) - const handleChangePort = (value: any) => { - if (Number(value) <= 0 || Number(value) >= 65536) { - setErrorRangePort(true) - } else { - setErrorRangePort(false) - } - setPort(value) - } + const handleChangePort = useCallback( + (value: string) => { + if (Number(value) <= 0 || Number(value) >= 65536) { + setErrorRangePort(true) + } else { + setErrorRangePort(false) + } + setPort(value) + }, + [setPort] + ) useEffect(() => { if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) { @@ -98,7 +100,7 @@ const LocalServerScreen = () => { useEffect(() => { handleChangePort(port) - }, []) + }, [handleChangePort, port]) return (
diff --git a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx index d8a2321a91..89a8759551 100644 --- a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx +++ b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx @@ -1,6 +1,4 @@ -import React, { useCallback, useEffect, useState } from 'react' - -import { fs, AppConfiguration, joinPath, getUserHomePath } from '@janhq/core' +import React, { useCallback, useState } from 'react' import { Modal,