diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 90984acef3..9379e194be 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -40,9 +40,7 @@ "dependencies": { "@janhq/core": "file:../../core", "@rollup/plugin-replace": "^5.0.5", - "@types/os-utils": "^0.0.4", "fetch-retry": "^5.0.6", - "os-utils": "^0.0.14", "path-browserify": "^1.0.1", "rxjs": "^7.8.1", "tcp-port-used": "^1.0.2", @@ -59,7 +57,6 @@ "bundleDependencies": [ "tcp-port-used", "fetch-retry", - "os-utils", "@janhq/core" ] } diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index ba3705fde6..735383a61e 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -86,11 +86,17 @@ export default class JanInferenceNitroExtension extends InferenceExtension { this.onMessageRequest(data) ); - events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model)); + events.on(ModelEvent.OnModelInit, (model: Model) => + this.onModelInit(model) + ); - events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model)); + events.on(ModelEvent.OnModelStop, (model: Model) => + this.onModelStop(model) + ); - events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped()); + events.on(InferenceEvent.OnInferenceStopped, () => + this.onInferenceStopped() + ); // Attempt to fetch nvidia info await executeOnMain(NODE, "updateNvidiaInfo", {}); @@ -200,11 +206,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension { if (!this._currentModel) return Promise.reject("No model loaded"); requestInference(data.messages ?? [], this._currentModel).subscribe({ - next: (_content) => {}, + next: (_content: any) => {}, complete: async () => { resolve(message); }, - error: async (err) => { + error: async (err: any) => { reject(err); }, }); @@ -245,7 +251,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { ...(data.model || {}), }; requestInference(data.messages ?? [], model, this.controller).subscribe({ - next: (content) => { + next: (content: any) => { const messageContent: ThreadContent = { type: ContentType.Text, text: { @@ -262,7 +268,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension { : MessageStatus.Error; events.emit(MessageEvent.OnMessageUpdate, message); }, - error: async (err) => { + error: async (err: any) => { if (this.isCancelled || message.content.length) { message.status = MessageStatus.Stopped; events.emit(MessageEvent.OnMessageUpdate, message); diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts index 3aef77afda..0a7a2e33e2 100644 --- a/extensions/inference-nitro-extension/src/node/index.ts +++ b/extensions/inference-nitro-extension/src/node/index.ts @@ -3,11 +3,12 @@ import path from "path"; import { ChildProcessWithoutNullStreams, spawn } from "child_process"; import tcpPortUsed from "tcp-port-used"; import fetchRT from "fetch-retry"; -import osUtils from "os-utils"; import { log, getJanDataFolderPath } from "@janhq/core/node"; import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia"; import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core"; import { executableNitroFile } from "./execute"; +import { physicalCpuCount } from "./utils"; + // Polyfill fetch with retry const fetchRetry = fetchRT(fetch); @@ -121,9 +122,10 @@ async function runModel( currentSettings = { llama_model_path: currentModelFile, ...wrapper.model.settings, - // This is critical and requires real system information - cpu_threads: Math.max(1, Math.round(nitroResourceProbe.numCpuPhysicalCore / 2)), + // This is critical and requires real CPU physical core count (or performance core) + cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), }; + console.log(currentSettings); return runNitroAndLoadModel(); } } @@ -348,7 +350,7 @@ function spawnNitroProcess(): Promise { */ function getResourcesInfo(): Promise { return new Promise(async (resolve) => { - const cpu = await osUtils.cpuCount(); + const cpu = await physicalCpuCount(); log(`[NITRO]::CPU informations - ${cpu}`); const response: ResourcesInfo = { numCpuPhysicalCore: cpu, diff --git a/extensions/inference-nitro-extension/src/node/utils.ts b/extensions/inference-nitro-extension/src/node/utils.ts new file mode 100644 index 0000000000..c7ef2e9a65 --- /dev/null +++ b/extensions/inference-nitro-extension/src/node/utils.ts @@ -0,0 +1,56 @@ +import os from "os"; +import childProcess from "child_process"; + +function exec(command: string): Promise { + return new Promise((resolve, reject) => { + childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => { + if (error) { + reject(error); + } else { + resolve(stdout); + } + }); + }); +} + +let amount: number; +const platform = os.platform(); + +export async function physicalCpuCount(): Promise { + return new Promise((resolve, reject) => { + if (platform === "linux") { + exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l') + .then((output) => { + amount = parseInt(output.trim(), 10); + resolve(amount); + }) + .catch(reject); + } else if (platform === "darwin") { + exec("sysctl -n hw.physicalcpu_max") + .then((output) => { + amount = parseInt(output.trim(), 10); + resolve(amount); + }) + .catch(reject); + } else if (platform === "win32") { + exec("WMIC CPU Get NumberOfCores") + .then((output) => { + amount = output + .split(os.EOL) + .map((line: string) => parseInt(line)) + .filter((value: number) => !isNaN(value)) + .reduce((sum: number, number: number) => sum + number, 1); + resolve(amount); + }) + .catch(reject); + } else { + const cores = os.cpus().filter((cpu: any, index: number) => { + const hasHyperthreading = cpu.model.includes("Intel"); + const isOdd = index % 2 === 1; + return !hasHyperthreading || isOdd; + }); + amount = cores.length; + resolve(amount); + } + }); +}