Skip to content

Commit

Permalink
Merge pull request janhq#1726 from janhq/fix/nitro_cpu_count
Browse files Browse the repository at this point in the history
fix: Nitro CPU threads with correct physical/ performance CPU count
  • Loading branch information
hiro-v authored Jan 24, 2024
2 parents d6242de + 9da59b4 commit 0b129ba
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 14 deletions.
3 changes: 0 additions & 3 deletions extensions/inference-nitro-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@
"dependencies": {
"@janhq/core": "file:../../core",
"@rollup/plugin-replace": "^5.0.5",
"@types/os-utils": "^0.0.4",
"fetch-retry": "^5.0.6",
"os-utils": "^0.0.14",
"path-browserify": "^1.0.1",
"rxjs": "^7.8.1",
"tcp-port-used": "^1.0.2",
Expand All @@ -59,7 +57,6 @@
"bundleDependencies": [
"tcp-port-used",
"fetch-retry",
"os-utils",
"@janhq/core"
]
}
20 changes: 13 additions & 7 deletions extensions/inference-nitro-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,17 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
this.onMessageRequest(data)
);

events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model));
events.on(ModelEvent.OnModelInit, (model: Model) =>
this.onModelInit(model)
);

events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model));
events.on(ModelEvent.OnModelStop, (model: Model) =>
this.onModelStop(model)
);

events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped());
events.on(InferenceEvent.OnInferenceStopped, () =>
this.onInferenceStopped()
);

// Attempt to fetch nvidia info
await executeOnMain(NODE, "updateNvidiaInfo", {});
Expand Down Expand Up @@ -200,11 +206,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
if (!this._currentModel) return Promise.reject("No model loaded");

requestInference(data.messages ?? [], this._currentModel).subscribe({
next: (_content) => {},
next: (_content: any) => {},
complete: async () => {
resolve(message);
},
error: async (err) => {
error: async (err: any) => {
reject(err);
},
});
Expand Down Expand Up @@ -245,7 +251,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
...(data.model || {}),
};
requestInference(data.messages ?? [], model, this.controller).subscribe({
next: (content) => {
next: (content: any) => {
const messageContent: ThreadContent = {
type: ContentType.Text,
text: {
Expand All @@ -262,7 +268,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
: MessageStatus.Error;
events.emit(MessageEvent.OnMessageUpdate, message);
},
error: async (err) => {
error: async (err: any) => {
if (this.isCancelled || message.content.length) {
message.status = MessageStatus.Stopped;
events.emit(MessageEvent.OnMessageUpdate, message);
Expand Down
10 changes: 6 additions & 4 deletions extensions/inference-nitro-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ import path from "path";
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import tcpPortUsed from "tcp-port-used";
import fetchRT from "fetch-retry";
import osUtils from "os-utils";
import { log, getJanDataFolderPath } from "@janhq/core/node";
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
import { executableNitroFile } from "./execute";
import { physicalCpuCount } from "./utils";

// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch);

Expand Down Expand Up @@ -121,9 +122,10 @@ async function runModel(
currentSettings = {
llama_model_path: currentModelFile,
...wrapper.model.settings,
// This is critical and requires real system information
cpu_threads: Math.max(1, Math.round(nitroResourceProbe.numCpuPhysicalCore / 2)),
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
};
console.log(currentSettings);
return runNitroAndLoadModel();
}
}
Expand Down Expand Up @@ -348,7 +350,7 @@ function spawnNitroProcess(): Promise<any> {
*/
function getResourcesInfo(): Promise<ResourcesInfo> {
return new Promise(async (resolve) => {
const cpu = await osUtils.cpuCount();
const cpu = await physicalCpuCount();
log(`[NITRO]::CPU informations - ${cpu}`);
const response: ResourcesInfo = {
numCpuPhysicalCore: cpu,
Expand Down
56 changes: 56 additions & 0 deletions extensions/inference-nitro-extension/src/node/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os from "os";
import childProcess from "child_process";

function exec(command: string): Promise<string> {
return new Promise((resolve, reject) => {
childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
if (error) {
reject(error);
} else {
resolve(stdout);
}
});
});
}

let amount: number;
const platform = os.platform();

export async function physicalCpuCount(): Promise<number> {
return new Promise((resolve, reject) => {
if (platform === "linux") {
exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
.then((output) => {
amount = parseInt(output.trim(), 10);
resolve(amount);
})
.catch(reject);
} else if (platform === "darwin") {
exec("sysctl -n hw.physicalcpu_max")
.then((output) => {
amount = parseInt(output.trim(), 10);
resolve(amount);
})
.catch(reject);
} else if (platform === "win32") {
exec("WMIC CPU Get NumberOfCores")
.then((output) => {
amount = output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1);
resolve(amount);
})
.catch(reject);
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes("Intel");
const isOdd = index % 2 === 1;
return !hasHyperthreading || isOdd;
});
amount = cores.length;
resolve(amount);
}
});
}

0 comments on commit 0b129ba

Please sign in to comment.