From 4116aaa98a390b35fb6cde7d02de76c52e2f04b0 Mon Sep 17 00:00:00 2001
From: NamH <NamNh0122@gmail.com>
Date: Thu, 1 Feb 2024 11:25:34 +0700
Subject: [PATCH] feat: add start/stop model via http api (#1862)

Signed-off-by: nam <namnh0122@gmail.com>
---
 core/src/node/api/common/builder.ts           |  17 +-
 core/src/node/api/common/consts.ts            |  19 +
 core/src/node/api/common/startStopModel.ts    | 351 ++++++++++++++++++
 core/src/node/api/routes/common.ts            |  22 +-
 core/src/node/utils/index.ts                  | 148 ++++++--
 core/src/types/index.ts                       |   1 +
 core/src/types/miscellaneous/index.ts         |   2 +
 .../src/types/miscellaneous/promptTemplate.ts |   6 +
 .../types/miscellaneous/systemResourceInfo.ts |   4 +
 core/src/types/model/modelEntity.ts           |   1 +
 .../src/@types/global.d.ts                    |  21 --
 .../inference-nitro-extension/src/index.ts    |   3 +-
 .../src/node/index.ts                         |  64 ++--
 .../src/node/utils.ts                         |  56 ---
 server/package.json                           |   3 +
 web/hooks/useCreateNewThread.ts               |   5 +-
 web/hooks/useSetActiveThread.ts               |   2 -
 web/screens/LocalServer/index.tsx             |  24 +-
 .../FactoryReset/ModalConfirmReset.tsx        |   4 +-
 19 files changed, 559 insertions(+), 194 deletions(-)
 create mode 100644 core/src/node/api/common/consts.ts
 create mode 100644 core/src/node/api/common/startStopModel.ts
 create mode 100644 core/src/types/miscellaneous/index.ts
 create mode 100644 core/src/types/miscellaneous/promptTemplate.ts
 create mode 100644 core/src/types/miscellaneous/systemResourceInfo.ts
 delete mode 100644 extensions/inference-nitro-extension/src/node/utils.ts

diff --git a/core/src/node/api/common/builder.ts b/core/src/node/api/common/builder.ts
index a9819bce68..5c99cf4d8e 100644
--- a/core/src/node/api/common/builder.ts
+++ b/core/src/node/api/common/builder.ts
@@ -2,7 +2,8 @@ import fs from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
 import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
-import { getJanDataFolderPath } from '../../utils'
+import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
+import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
 
 export const getBuilder = async (configuration: RouteConfiguration) => {
   const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
@@ -309,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
   const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
 
   let apiKey: string | undefined = undefined
-  let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
+  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
 
   if (engineConfiguration) {
     apiKey = engineConfiguration.api_key
@@ -320,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
     'Content-Type': 'text/event-stream',
     'Cache-Control': 'no-cache',
     'Connection': 'keep-alive',
-    "Access-Control-Allow-Origin": "*"
+    'Access-Control-Allow-Origin': '*',
   })
 
   const headers: Record<string, any> = {
@@ -346,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
     response.body.pipe(reply.raw)
   }
 }
-
-const getEngineConfiguration = async (engineId: string) => {
-  if (engineId !== 'openai') {
-    return undefined
-  }
-  const directoryPath = join(getJanDataFolderPath(), 'engines')
-  const filePath = join(directoryPath, `${engineId}.json`)
-  const data = await fs.readFileSync(filePath, 'utf-8')
-  return JSON.parse(data)
-}
diff --git a/core/src/node/api/common/consts.ts b/core/src/node/api/common/consts.ts
new file mode 100644
index 0000000000..bc3cfe3001
--- /dev/null
+++ b/core/src/node/api/common/consts.ts
@@ -0,0 +1,19 @@
+// The PORT to use for the Nitro subprocess
+export const NITRO_DEFAULT_PORT = 3928
+
+// The HOST address to use for the Nitro subprocess
+export const LOCAL_HOST = '127.0.0.1'
+
+export const SUPPORTED_MODEL_FORMAT = '.gguf'
+
+// The URL for the Nitro subprocess
+const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
+// The URL for the Nitro subprocess to load a model
+export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+// The URL for the Nitro subprocess to validate a model
+export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+
+// The URL for the Nitro subprocess to kill itself
+export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
+
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
diff --git a/core/src/node/api/common/startStopModel.ts b/core/src/node/api/common/startStopModel.ts
new file mode 100644
index 0000000000..7665883804
--- /dev/null
+++ b/core/src/node/api/common/startStopModel.ts
@@ -0,0 +1,351 @@
+import fs from 'fs'
+import { join } from 'path'
+import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
+import { logServer } from '../../log'
+import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
+import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
+import {
+  LOCAL_HOST,
+  NITRO_DEFAULT_PORT,
+  NITRO_HTTP_KILL_URL,
+  NITRO_HTTP_LOAD_MODEL_URL,
+  NITRO_HTTP_VALIDATE_MODEL_URL,
+  SUPPORTED_MODEL_FORMAT,
+} from './consts'
+
+// The subprocess instance for Nitro
+let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
+
+// TODO: move this to core type
+interface NitroModelSettings extends ModelSettingParams {
+  llama_model_path: string
+  cpu_threads: number
+}
+
+export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
+  try {
+    await runModel(modelId, settingParams)
+
+    return {
+      message: `Model ${modelId} started`,
+    }
+  } catch (e) {
+    return {
+      error: e,
+    }
+  }
+}
+
+const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
+  const janDataFolderPath = getJanDataFolderPath()
+  const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
+
+  if (!fs.existsSync(modelFolderFullPath)) {
+    throw `Model not found: ${modelId}`
+  }
+
+  const files: string[] = fs.readdirSync(modelFolderFullPath)
+
+  // Look for GGUF model file
+  const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
+
+  const modelMetadataPath = join(modelFolderFullPath, 'model.json')
+  const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
+
+  if (!ggufBinFile) {
+    throw 'No GGUF model file found'
+  }
+  const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
+
+  const nitroResourceProbe = await getSystemResourceInfo()
+  const nitroModelSettings: NitroModelSettings = {
+    ...modelMetadata.settings,
+    ...settingParams,
+    llama_model_path: modelBinaryPath,
+    // This is critical and requires real CPU physical core count (or performance core)
+    cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
+    ...(modelMetadata.settings.mmproj && {
+      mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
+    }),
+  }
+
+  logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
+
+  // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
+  if (modelMetadata.settings.prompt_template) {
+    const promptTemplate = modelMetadata.settings.prompt_template
+    const prompt = promptTemplateConverter(promptTemplate)
+    if (prompt?.error) {
+      return Promise.reject(prompt.error)
+    }
+    nitroModelSettings.system_prompt = prompt.system_prompt
+    nitroModelSettings.user_prompt = prompt.user_prompt
+    nitroModelSettings.ai_prompt = prompt.ai_prompt
+  }
+
+  await runNitroAndLoadModel(modelId, nitroModelSettings)
+}
+
+// TODO: move to util
+const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
+  // Split the string using the markers
+  const systemMarker = '{system_message}'
+  const promptMarker = '{prompt}'
+
+  if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
+    // Find the indices of the markers
+    const systemIndex = promptTemplate.indexOf(systemMarker)
+    const promptIndex = promptTemplate.indexOf(promptMarker)
+
+    // Extract the parts of the string
+    const system_prompt = promptTemplate.substring(0, systemIndex)
+    const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
+    const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
+
+    // Return the split parts
+    return { system_prompt, user_prompt, ai_prompt }
+  } else if (promptTemplate.includes(promptMarker)) {
+    // Extract the parts of the string for the case where only promptMarker is present
+    const promptIndex = promptTemplate.indexOf(promptMarker)
+    const user_prompt = promptTemplate.substring(0, promptIndex)
+    const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
+
+    // Return the split parts
+    return { user_prompt, ai_prompt }
+  }
+
+  // Return an error if none of the conditions are met
+  return { error: 'Cannot split prompt template' }
+}
+
+const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
+  // Gather system information for CPU physical cores and memory
+  const tcpPortUsed = require('tcp-port-used')
+
+  await stopModel(modelId)
+  await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
+
+  /**
+   * There is a problem with Windows process manager
+   * Should wait for awhile to make sure the port is free and subprocess is killed
+   * The tested threshold is 500ms
+   **/
+  if (process.platform === 'win32') {
+    await new Promise((resolve) => setTimeout(resolve, 500))
+  }
+
+  await spawnNitroProcess()
+  await loadLLMModel(modelSettings)
+  await validateModelStatus()
+}
+
+const spawnNitroProcess = async (): Promise<void> => {
+  logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
+
+  let binaryFolder = join(
+    getJanExtensionsPath(),
+    '@janhq',
+    'inference-nitro-extension',
+    'dist',
+    'bin'
+  )
+
+  let executableOptions = executableNitroFile()
+  const tcpPortUsed = require('tcp-port-used')
+
+  const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
+  // Execute the binary
+  logServer(
+    `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+  )
+  subprocess = spawn(
+    executableOptions.executablePath,
+    ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
+    {
+      cwd: binaryFolder,
+      env: {
+        ...process.env,
+        CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+      },
+    }
+  )
+
+  // Handle subprocess output
+  subprocess.stdout.on('data', (data: any) => {
+    logServer(`[NITRO]::Debug: ${data}`)
+  })
+
+  subprocess.stderr.on('data', (data: any) => {
+    logServer(`[NITRO]::Error: ${data}`)
+  })
+
+  subprocess.on('close', (code: any) => {
+    logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
+    subprocess = undefined
+  })
+
+  tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
+    logServer(`[NITRO]::Debug: Nitro is ready`)
+  })
+}
+
+type NitroExecutableOptions = {
+  executablePath: string
+  cudaVisibleDevices: string
+}
+
+const executableNitroFile = (): NitroExecutableOptions => {
+  const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
+  let binaryFolder = join(
+    getJanExtensionsPath(),
+    '@janhq',
+    'inference-nitro-extension',
+    'dist',
+    'bin'
+  )
+
+  let cudaVisibleDevices = ''
+  let binaryName = 'nitro'
+  /**
+   * The binary folder is different for each platform.
+   */
+  if (process.platform === 'win32') {
+    /**
+     *  For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
+     */
+    let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
+    if (nvidiaInfo['run_mode'] === 'cpu') {
+      binaryFolder = join(binaryFolder, 'win-cpu')
+    } else {
+      if (nvidiaInfo['cuda'].version === '12') {
+        binaryFolder = join(binaryFolder, 'win-cuda-12-0')
+      } else {
+        binaryFolder = join(binaryFolder, 'win-cuda-11-7')
+      }
+      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
+    }
+    binaryName = 'nitro.exe'
+  } else if (process.platform === 'darwin') {
+    /**
+     *  For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
+     */
+    if (process.arch === 'arm64') {
+      binaryFolder = join(binaryFolder, 'mac-arm64')
+    } else {
+      binaryFolder = join(binaryFolder, 'mac-x64')
+    }
+  } else {
+    /**
+     *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
+     */
+    let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
+    if (nvidiaInfo['run_mode'] === 'cpu') {
+      binaryFolder = join(binaryFolder, 'linux-cpu')
+    } else {
+      if (nvidiaInfo['cuda'].version === '12') {
+        binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
+      } else {
+        binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
+      }
+      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
+    }
+  }
+
+  return {
+    executablePath: join(binaryFolder, binaryName),
+    cudaVisibleDevices,
+  }
+}
+
+const validateModelStatus = async (): Promise<void> => {
+  // Send a GET request to the validation URL.
+  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
+  const fetchRT = require('fetch-retry')
+  const fetchRetry = fetchRT(fetch)
+
+  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
+    method: 'GET',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    retries: 5,
+    retryDelay: 500,
+  }).then(async (res: Response) => {
+    logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
+    // If the response is OK, check model_loaded status.
+    if (res.ok) {
+      const body = await res.json()
+      // If the model is loaded, return an empty object.
+      // Otherwise, return an object with an error message.
+      if (body.model_loaded) {
+        return Promise.resolve()
+      }
+    }
+    return Promise.reject('Validate model status failed')
+  })
+}
+
+const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
+  logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
+  const fetchRT = require('fetch-retry')
+  const fetchRetry = fetchRT(fetch)
+
+  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(settings),
+    retries: 3,
+    retryDelay: 500,
+  })
+    .then((res: any) => {
+      logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
+      return Promise.resolve(res)
+    })
+    .catch((err: any) => {
+      logServer(`[NITRO]::Error: Load model failed with error ${err}`)
+      return Promise.reject()
+    })
+}
+
+/**
+ * Stop model and kill nitro process.
+ */
+export const stopModel = async (_modelId: string) => {
+  if (!subprocess) {
+    return {
+      error: "Model isn't running",
+    }
+  }
+  return new Promise((resolve, reject) => {
+    const controller = new AbortController()
+    setTimeout(() => {
+      controller.abort()
+      reject({
+        error: 'Failed to stop model: Timedout',
+      })
+    }, 5000)
+    const tcpPortUsed = require('tcp-port-used')
+    logServer(`[NITRO]::Debug: Request to kill Nitro`)
+
+    fetch(NITRO_HTTP_KILL_URL, {
+      method: 'DELETE',
+      signal: controller.signal,
+    })
+      .then(() => {
+        subprocess?.kill()
+        subprocess = undefined
+      })
+      .catch(() => {
+        // don't need to do anything, we still kill the subprocess
+      })
+      .then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
+      .then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
+      .then(() =>
+        resolve({
+          message: 'Model stopped',
+        })
+      )
+  })
+}
diff --git a/core/src/node/api/routes/common.ts b/core/src/node/api/routes/common.ts
index a6c65a382c..27385e5619 100644
--- a/core/src/node/api/routes/common.ts
+++ b/core/src/node/api/routes/common.ts
@@ -10,6 +10,8 @@ import {
 } from '../common/builder'
 
 import { JanApiRouteConfiguration } from '../common/configuration'
+import { startModel, stopModel } from '../common/startStopModel'
+import { ModelSettingParams } from '../../../types'
 
 export const commonRouter = async (app: HttpServer) => {
   // Common Routes
@@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
     app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
 
     app.get(`/${key}/:id`, async (request: any) =>
-      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
+      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
     )
 
     app.delete(`/${key}/:id`, async (request: any) =>
-      deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
+      deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
     )
   })
 
   // Download Model Routes
   app.get(`/models/download/:modelId`, async (request: any) =>
-    downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
+    downloadModel(request.params.modelId, {
+      ignoreSSL: request.query.ignoreSSL === 'true',
+      proxy: request.query.proxy,
+    })
   )
 
+  app.put(`/models/:modelId/start`, async (request: any) => {
+    let settingParams: ModelSettingParams | undefined = undefined
+    if (Object.keys(request.body).length !== 0) {
+      settingParams = JSON.parse(request.body) as ModelSettingParams
+    }
+
+    return startModel(request.params.modelId, settingParams)
+  })
+
+  app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
+
   // Chat Completion Routes
   app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
 
diff --git a/core/src/node/utils/index.ts b/core/src/node/utils/index.ts
index 00db04c9bd..4bcbf13b17 100644
--- a/core/src/node/utils/index.ts
+++ b/core/src/node/utils/index.ts
@@ -1,16 +1,18 @@
-import { AppConfiguration } from "../../types";
-import { join } from "path";
-import fs from "fs";
-import os from "os";
+import { AppConfiguration, SystemResourceInfo } from '../../types'
+import { join } from 'path'
+import fs from 'fs'
+import os from 'os'
+import { log, logServer } from '../log'
+import childProcess from 'child_process'
 
 // TODO: move this to core
-const configurationFileName = "settings.json";
+const configurationFileName = 'settings.json'
 
 // TODO: do no specify app name in framework module
-const defaultJanDataFolder = join(os.homedir(), "jan");
+const defaultJanDataFolder = join(os.homedir(), 'jan')
 const defaultAppConfig: AppConfiguration = {
   data_folder: defaultJanDataFolder,
-};
+}
 
 /**
  * Getting App Configurations.
@@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
 export const getAppConfigurations = (): AppConfiguration => {
   // Retrieve Application Support folder path
   // Fallback to user home directory if not found
-  const configurationFile = getConfigurationFilePath();
+  const configurationFile = getConfigurationFilePath()
 
   if (!fs.existsSync(configurationFile)) {
     // create default app config if we don't have one
-    console.debug(`App config not found, creating default config at ${configurationFile}`);
-    fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
-    return defaultAppConfig;
+    console.debug(`App config not found, creating default config at ${configurationFile}`)
+    fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
+    return defaultAppConfig
   }
 
   try {
     const appConfigurations: AppConfiguration = JSON.parse(
-      fs.readFileSync(configurationFile, "utf-8"),
-    );
-    return appConfigurations;
+      fs.readFileSync(configurationFile, 'utf-8')
+    )
+    return appConfigurations
   } catch (err) {
-    console.error(`Failed to read app config, return default config instead! Err: ${err}`);
-    return defaultAppConfig;
+    console.error(`Failed to read app config, return default config instead! Err: ${err}`)
+    return defaultAppConfig
   }
-};
+}
 
 const getConfigurationFilePath = () =>
   join(
-    global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
-    configurationFileName,
-  );
+    global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
+    configurationFileName
+  )
 
 export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
-  const configurationFile = getConfigurationFilePath();
-  console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
+  const configurationFile = getConfigurationFilePath()
+  console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
 
-  fs.writeFileSync(configurationFile, JSON.stringify(configuration));
-  return Promise.resolve();
-};
+  fs.writeFileSync(configurationFile, JSON.stringify(configuration))
+  return Promise.resolve()
+}
 
 /**
  * Utility function to get server log path
@@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
  * @returns {string} The log path.
  */
 export const getServerLogPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  const logFolderPath = join(appConfigurations.data_folder, "logs");
+  const appConfigurations = getAppConfigurations()
+  const logFolderPath = join(appConfigurations.data_folder, 'logs')
   if (!fs.existsSync(logFolderPath)) {
-    fs.mkdirSync(logFolderPath, { recursive: true });
+    fs.mkdirSync(logFolderPath, { recursive: true })
   }
-  return join(logFolderPath, "server.log");
-};
+  return join(logFolderPath, 'server.log')
+}
 
 /**
  * Utility function to get app log path
@@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
  * @returns {string} The log path.
  */
 export const getAppLogPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  const logFolderPath = join(appConfigurations.data_folder, "logs");
+  const appConfigurations = getAppConfigurations()
+  const logFolderPath = join(appConfigurations.data_folder, 'logs')
   if (!fs.existsSync(logFolderPath)) {
-    fs.mkdirSync(logFolderPath, { recursive: true });
+    fs.mkdirSync(logFolderPath, { recursive: true })
   }
-  return join(logFolderPath, "app.log");
-};
+  return join(logFolderPath, 'app.log')
+}
 
 /**
  * Utility function to get data folder path
@@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
  * @returns {string} The data folder path.
  */
 export const getJanDataFolderPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  return appConfigurations.data_folder;
-};
+  const appConfigurations = getAppConfigurations()
+  return appConfigurations.data_folder
+}
 
 /**
  * Utility function to get extension path
@@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
  * @returns {string} The extensions path.
  */
 export const getJanExtensionsPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  return join(appConfigurations.data_folder, "extensions");
-};
+  const appConfigurations = getAppConfigurations()
+  return join(appConfigurations.data_folder, 'extensions')
+}
+
+/**
+ * Utility function to physical cpu count
+ *
+ * @returns {number} The physical cpu count.
+ */
+export const physicalCpuCount = async (): Promise<number> => {
+  const platform = os.platform()
+  if (platform === 'linux') {
+    const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
+    return parseInt(output.trim(), 10)
+  } else if (platform === 'darwin') {
+    const output = await exec('sysctl -n hw.physicalcpu_max')
+    return parseInt(output.trim(), 10)
+  } else if (platform === 'win32') {
+    const output = await exec('WMIC CPU Get NumberOfCores')
+    return output
+      .split(os.EOL)
+      .map((line: string) => parseInt(line))
+      .filter((value: number) => !isNaN(value))
+      .reduce((sum: number, number: number) => sum + number, 1)
+  } else {
+    const cores = os.cpus().filter((cpu: any, index: number) => {
+      const hasHyperthreading = cpu.model.includes('Intel')
+      const isOdd = index % 2 === 1
+      return !hasHyperthreading || isOdd
+    })
+    return cores.length
+  }
+}
+
+const exec = async (command: string): Promise<string> => {
+  return new Promise((resolve, reject) => {
+    childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
+      if (error) {
+        reject(error)
+      } else {
+        resolve(stdout)
+      }
+    })
+  })
+}
+
+export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
+  const cpu = await physicalCpuCount()
+  const message = `[NITRO]::CPU informations - ${cpu}`
+  log(message)
+  logServer(message)
+
+  return {
+    numCpuPhysicalCore: cpu,
+    memAvailable: 0, // TODO: this should not be 0
+  }
+}
+
+export const getEngineConfiguration = async (engineId: string) => {
+  if (engineId !== 'openai') {
+    return undefined
+  }
+  const directoryPath = join(getJanDataFolderPath(), 'engines')
+  const filePath = join(directoryPath, `${engineId}.json`)
+  const data = fs.readFileSync(filePath, 'utf-8')
+  return JSON.parse(data)
+}
diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index 3bdcb5421b..ee6f4ef08b 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -6,3 +6,4 @@ export * from './inference'
 export * from './monitoring'
 export * from './file'
 export * from './config'
+export * from './miscellaneous'
diff --git a/core/src/types/miscellaneous/index.ts b/core/src/types/miscellaneous/index.ts
new file mode 100644
index 0000000000..02c973323f
--- /dev/null
+++ b/core/src/types/miscellaneous/index.ts
@@ -0,0 +1,2 @@
+export * from './systemResourceInfo'
+export * from './promptTemplate'
diff --git a/core/src/types/miscellaneous/promptTemplate.ts b/core/src/types/miscellaneous/promptTemplate.ts
new file mode 100644
index 0000000000..a6743c67cd
--- /dev/null
+++ b/core/src/types/miscellaneous/promptTemplate.ts
@@ -0,0 +1,6 @@
+export type PromptTemplate = {
+  system_prompt?: string
+  ai_prompt?: string
+  user_prompt?: string
+  error?: string
+}
diff --git a/core/src/types/miscellaneous/systemResourceInfo.ts b/core/src/types/miscellaneous/systemResourceInfo.ts
new file mode 100644
index 0000000000..1472cda474
--- /dev/null
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@@ -0,0 +1,4 @@
+export type SystemResourceInfo = {
+  numCpuPhysicalCore: number
+  memAvailable: number
+}
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 727ff085fc..644c34dfb1 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -123,6 +123,7 @@ export type ModelSettingParams = {
   user_prompt?: string
   llama_model_path?: string
   mmproj?: string
+  cont_batching?: boolean
 }
 
 /**
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index 5fb41f0f8a..bc126337f6 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -2,22 +2,6 @@ declare const NODE: string;
 declare const INFERENCE_URL: string;
 declare const TROUBLESHOOTING_URL: string;
 
-/**
- * The parameters for the initModel function.
- * @property settings - The settings for the machine learning model.
- * @property settings.ctx_len - The context length.
- * @property settings.ngl - The number of generated tokens.
- * @property settings.cont_batching - Whether to use continuous batching.
- * @property settings.embedding - Whether to use embedding.
- */
-interface EngineSettings {
-  ctx_len: number;
-  ngl: number;
-  cpu_threads: number;
-  cont_batching: boolean;
-  embedding: boolean;
-}
-
 /**
  * The response from the initModel function.
  * @property error - An error message if the model fails to load.
@@ -26,8 +10,3 @@ interface ModelOperationResponse {
   error?: any;
   modelFile?: string;
 }
-
-interface ResourcesInfo {
-  numCpuPhysicalCore: number;
-  memAvailable: number;
-}
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 0e6edb992a..aaa230ca34 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -24,6 +24,7 @@ import {
   MessageEvent,
   ModelEvent,
   InferenceEvent,
+  ModelSettingParams,
 } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
@@ -45,7 +46,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
 
   private _currentModel: Model | undefined;
 
-  private _engineSettings: EngineSettings = {
+  private _engineSettings: ModelSettingParams = {
     ctx_len: 2048,
     ngl: 100,
     cpu_threads: 1,
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
index 77060e4140..443e686e80 100644
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@@ -3,11 +3,19 @@ import path from "path";
 import { ChildProcessWithoutNullStreams, spawn } from "child_process";
 import tcpPortUsed from "tcp-port-used";
 import fetchRT from "fetch-retry";
-import { log, getJanDataFolderPath } from "@janhq/core/node";
+import {
+  log,
+  getJanDataFolderPath,
+  getSystemResourceInfo,
+} from "@janhq/core/node";
 import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
-import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
+import {
+  Model,
+  InferenceEngine,
+  ModelSettingParams,
+  PromptTemplate,
+} from "@janhq/core";
 import { executableNitroFile } from "./execute";
-import { physicalCpuCount } from "./utils";
 
 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch);
@@ -20,16 +28,6 @@ interface ModelInitOptions {
   model: Model;
 }
 
-/**
- * The response object of Prompt Template parsing.
- */
-interface PromptTemplate {
-  system_prompt?: string;
-  ai_prompt?: string;
-  user_prompt?: string;
-  error?: string;
-}
-
 /**
  * Model setting args for Nitro model load.
  */
@@ -78,7 +76,7 @@ function stopModel(): Promise<void> {
  * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
  */
 async function runModel(
-  wrapper: ModelInitOptions,
+  wrapper: ModelInitOptions
 ): Promise<ModelOperationResponse | void> {
   if (wrapper.model.engine !== InferenceEngine.nitro) {
     // Not a nitro model
@@ -96,7 +94,7 @@ async function runModel(
   const ggufBinFile = files.find(
     (file) =>
       file === path.basename(currentModelFile) ||
-      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
+      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
   );
 
   if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@@ -106,7 +104,7 @@ async function runModel(
   if (wrapper.model.engine !== InferenceEngine.nitro) {
     return Promise.reject("Not a nitro model");
   } else {
-    const nitroResourceProbe = await getResourcesInfo();
+    const nitroResourceProbe = await getSystemResourceInfo();
     // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
     if (wrapper.model.settings.prompt_template) {
       const promptTemplate = wrapper.model.settings.prompt_template;
@@ -191,10 +189,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
     const system_prompt = promptTemplate.substring(0, systemIndex);
     const user_prompt = promptTemplate.substring(
       systemIndex + systemMarker.length,
-      promptIndex,
+      promptIndex
     );
     const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length,
+      promptIndex + promptMarker.length
     );
 
     // Return the split parts
@@ -204,7 +202,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
     const promptIndex = promptTemplate.indexOf(promptMarker);
     const user_prompt = promptTemplate.substring(0, promptIndex);
     const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length,
+      promptIndex + promptMarker.length
     );
 
     // Return the split parts
@@ -233,8 +231,8 @@ function loadLLMModel(settings: any): Promise<Response> {
     .then((res) => {
       log(
         `[NITRO]::Debug: Load model success with response ${JSON.stringify(
-          res,
-        )}`,
+          res
+        )}`
       );
       return Promise.resolve(res);
     })
@@ -263,8 +261,8 @@ async function validateModelStatus(): Promise<void> {
   }).then(async (res: Response) => {
     log(
       `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
-        res,
-      )}`,
+        res
+      )}`
     );
     // If the response is OK, check model_loaded status.
     if (res.ok) {
@@ -315,7 +313,7 @@ function spawnNitroProcess(): Promise<any> {
     const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
     // Execute the binary
     log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
+      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
     );
     subprocess = spawn(
       executableOptions.executablePath,
@@ -326,7 +324,7 @@ function spawnNitroProcess(): Promise<any> {
           ...process.env,
           CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
         },
-      },
+      }
     );
 
     // Handle subprocess output
@@ -351,22 +349,6 @@ function spawnNitroProcess(): Promise<any> {
   });
 }
 
-/**
- * Get the system resources information
- * TODO: Move to Core so that it can be reused
- */
-function getResourcesInfo(): Promise<ResourcesInfo> {
-  return new Promise(async (resolve) => {
-    const cpu = await physicalCpuCount();
-    log(`[NITRO]::CPU informations - ${cpu}`);
-    const response: ResourcesInfo = {
-      numCpuPhysicalCore: cpu,
-      memAvailable: 0,
-    };
-    resolve(response);
-  });
-}
-
 /**
  * Every module should have a dispose function
  * This will be called when the extension is unloaded and should clean up any resources
diff --git a/extensions/inference-nitro-extension/src/node/utils.ts b/extensions/inference-nitro-extension/src/node/utils.ts
deleted file mode 100644
index c7ef2e9a65..0000000000
--- a/extensions/inference-nitro-extension/src/node/utils.ts
+++ /dev/null
@@ -1,56 +0,0 @@
-import os from "os";
-import childProcess from "child_process";
-
-function exec(command: string): Promise<string> {
-  return new Promise((resolve, reject) => {
-    childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
-      if (error) {
-        reject(error);
-      } else {
-        resolve(stdout);
-      }
-    });
-  });
-}
-
-let amount: number;
-const platform = os.platform();
-
-export async function physicalCpuCount(): Promise<number> {
-  return new Promise((resolve, reject) => {
-    if (platform === "linux") {
-      exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
-        .then((output) => {
-          amount = parseInt(output.trim(), 10);
-          resolve(amount);
-        })
-        .catch(reject);
-    } else if (platform === "darwin") {
-      exec("sysctl -n hw.physicalcpu_max")
-        .then((output) => {
-          amount = parseInt(output.trim(), 10);
-          resolve(amount);
-        })
-        .catch(reject);
-    } else if (platform === "win32") {
-      exec("WMIC CPU Get NumberOfCores")
-        .then((output) => {
-          amount = output
-            .split(os.EOL)
-            .map((line: string) => parseInt(line))
-            .filter((value: number) => !isNaN(value))
-            .reduce((sum: number, number: number) => sum + number, 1);
-          resolve(amount);
-        })
-        .catch(reject);
-    } else {
-      const cores = os.cpus().filter((cpu: any, index: number) => {
-        const hasHyperthreading = cpu.model.includes("Intel");
-        const isOdd = index % 2 === 1;
-        return !hasHyperthreading || isOdd;
-      });
-      amount = cores.length;
-      resolve(amount);
-    }
-  });
-}
diff --git a/server/package.json b/server/package.json
index 9495a0d657..f61730da4a 100644
--- a/server/package.json
+++ b/server/package.json
@@ -26,6 +26,8 @@
     "dotenv": "^16.3.1",
     "fastify": "^4.24.3",
     "request": "^2.88.2",
+    "fetch-retry": "^5.0.6",
+    "tcp-port-used": "^1.0.2",
     "request-progress": "^3.0.0"
   },
   "devDependencies": {
@@ -35,6 +37,7 @@
     "@typescript-eslint/parser": "^6.7.3",
     "eslint-plugin-react": "^7.33.2",
     "run-script-os": "^1.1.6",
+    "@types/tcp-port-used": "^1.0.4",
     "typescript": "^5.2.2"
   }
 }
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index d9451a46c3..aad42aba95 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -7,7 +7,7 @@ import {
   ThreadState,
   Model,
 } from '@janhq/core'
-import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
+import { atom, useAtomValue, useSetAtom } from 'jotai'
 
 import { fileUploadAtom } from '@/containers/Providers/Jotai'
 
@@ -48,7 +48,8 @@ export const useCreateNewThread = () => {
   const createNewThread = useSetAtom(createNewThreadAtom)
   const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
   const updateThread = useSetAtom(updateThreadAtom)
-  const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
+
+  const setFileUpload = useSetAtom(fileUploadAtom)
   const { deleteThread } = useDeleteThread()
 
   const requestCreateNewThread = async (
diff --git a/web/hooks/useSetActiveThread.ts b/web/hooks/useSetActiveThread.ts
index 035f0551a6..76a744bcd3 100644
--- a/web/hooks/useSetActiveThread.ts
+++ b/web/hooks/useSetActiveThread.ts
@@ -1,5 +1,3 @@
-import { useEffect } from 'react'
-
 import {
   InferenceEvent,
   ExtensionTypeEnum,
diff --git a/web/screens/LocalServer/index.tsx b/web/screens/LocalServer/index.tsx
index 7e1ba1fab3..e7f3c7fc20 100644
--- a/web/screens/LocalServer/index.tsx
+++ b/web/screens/LocalServer/index.tsx
@@ -1,7 +1,6 @@
-/* eslint-disable @typescript-eslint/no-explicit-any */
 'use client'
 
-import React, { useEffect, useState } from 'react'
+import React, { useCallback, useEffect, useState } from 'react'
 
 import ScrollToBottom from 'react-scroll-to-bottom'
 
@@ -81,14 +80,17 @@ const LocalServerScreen = () => {
   const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
     useState<boolean>(false)
 
-  const handleChangePort = (value: any) => {
-    if (Number(value) <= 0 || Number(value) >= 65536) {
-      setErrorRangePort(true)
-    } else {
-      setErrorRangePort(false)
-    }
-    setPort(value)
-  }
+  const handleChangePort = useCallback(
+    (value: string) => {
+      if (Number(value) <= 0 || Number(value) >= 65536) {
+        setErrorRangePort(true)
+      } else {
+        setErrorRangePort(false)
+      }
+      setPort(value)
+    },
+    [setPort]
+  )
 
   useEffect(() => {
     if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) {
@@ -98,7 +100,7 @@ const LocalServerScreen = () => {
 
   useEffect(() => {
     handleChangePort(port)
-  }, [])
+  }, [handleChangePort, port])
 
   return (
     <div className="flex h-full w-full">
diff --git a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
index d8a2321a91..89a8759551 100644
--- a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
+++ b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
@@ -1,6 +1,4 @@
-import React, { useCallback, useEffect, useState } from 'react'
-
-import { fs, AppConfiguration, joinPath, getUserHomePath } from '@janhq/core'
+import React, { useCallback, useState } from 'react'
 
 import {
   Modal,