Skip to content

Commit

Permalink
Node.js package (ggerganov#260)
Browse files Browse the repository at this point in the history
* npm : preparing infra for node package

* npm : package infra ready

* npm : initial version ready

* npm : change name to whisper.cpp

whisper.js is taken
ggerganov authored Dec 12, 2022
1 parent aa6adda commit f309f97
Showing 15 changed files with 373 additions and 67 deletions.
24 changes: 13 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
endif()
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
else()
set(WHISPER_STANDALONE OFF)
endif()
@@ -151,8 +152,7 @@ else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
else()
if (EMSCRIPTEN)
# we require support for WASM SIMD 128-bit
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
else()
if(NOT WHISPER_NO_AVX)
@@ -203,6 +203,10 @@ if (BUILD_SHARED_LIBS)
)
endif()

if (EMSCRIPTEN)
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
endif()

target_compile_definitions(${TARGET} PUBLIC
${WHISPER_EXTRA_FLAGS}
)
@@ -222,13 +226,11 @@ add_subdirectory(bindings)
# programs, examples and tests
#

if (WHISPER_STANDALONE)
if (WHISPER_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif ()

if (WHISPER_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
if (WHISPER_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif ()

if (WHISPER_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
16 changes: 16 additions & 0 deletions bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
if (EMSCRIPTEN)
add_subdirectory(javascript)

add_custom_command(
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
COMMAND npm publish
COMMAND touch publish.log
COMMENT "Publishing npm module v${PROJECT_VERSION}"
VERBATIM
)

add_custom_target(publish-npm
DEPENDS javascript/publish.log
)
endif()
15 changes: 11 additions & 4 deletions bindings/javascript/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -20,15 +20,22 @@ if (WHISPER_WASM_SINGLE_FILE)
${CMAKE_BINARY_DIR}/bin/libwhisper.js
${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
)

add_custom_command(
TARGET ${TARGET} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_BINARY_DIR}/bin/libwhisper.worker.js
${CMAKE_CURRENT_SOURCE_DIR}/libwhisper.worker.js
)
endif()

set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
--bind \
-s MODULARIZE=1 \
-s EXPORT_NAME=\"'whisper_factory'\" \
-s FORCE_FILESYSTEM=1 \
-s USE_PTHREADS=1 \
-s PTHREAD_POOL_SIZE=8 \
-s INITIAL_MEMORY=1610612736 \
-s TOTAL_MEMORY=1610612736 \
-s FORCE_FILESYSTEM=1 \
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-s ALLOW_MEMORY_GROWTH=1 \
${EXTRA_FLAGS} \
")
5 changes: 5 additions & 0 deletions bindings/javascript/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# whisper.cpp

Node.js package for Whisper speech recognition

For sample usage check [tests/test-whisper.js](/tests/test-whisper.js)
77 changes: 31 additions & 46 deletions bindings/javascript/emscripten.cpp
Original file line number Diff line number Diff line change
@@ -1,71 +1,56 @@
//
// This is the Javascript API of whisper.cpp
//
// Very crude at the moment.
// Feel free to contribute and make this better!
//
// See the tests/test-whisper.js for sample usage
//

#include "whisper.h"

#include <emscripten.h>
#include <emscripten/bind.h>

#include <vector>
#include <thread>
#include <vector>

std::thread g_worker;

std::vector<struct whisper_context *> g_contexts(4, nullptr);
struct whisper_context * g_context;

EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
if (g_worker.joinable()) {
g_worker.join();
}

for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init(path_model.c_str());
if (g_contexts[i] != nullptr) {
return i + 1;
} else {
return (size_t) 0;
}
if (g_context == nullptr) {
g_context = whisper_init(path_model.c_str());
if (g_context != nullptr) {
return true;
} else {
return false;
}
}

return (size_t) 0;
return false;
}));

emscripten::function("free", emscripten::optional_override([](size_t index) {
if (g_worker.joinable()) {
g_worker.join();
}

--index;

if (index < g_contexts.size()) {
whisper_free(g_contexts[index]);
g_contexts[index] = nullptr;
emscripten::function("free", emscripten::optional_override([]() {
if (g_context) {
whisper_free(g_context);
g_context = nullptr;
}
}));

emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
if (g_worker.joinable()) {
g_worker.join();
}

--index;

if (index >= g_contexts.size()) {
emscripten::function("full_default", emscripten::optional_override([](const emscripten::val & audio, const std::string & lang, bool translate) {
if (g_context == nullptr) {
return -1;
}

if (g_contexts[index] == nullptr) {
return -2;
}

struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);

params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.language = whisper_is_multilingual(g_context) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.offset_ms = 0;

@@ -82,9 +67,11 @@ EMSCRIPTEN_BINDINGS(whisper) {

// print system information
{
printf("\n");
printf("system_info: n_threads = %d / %d | %s\n",
params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());

printf("\n");
printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
__func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
params.n_threads, 1,
@@ -94,13 +81,11 @@ EMSCRIPTEN_BINDINGS(whisper) {
printf("\n");
}

// run the worker
// run whisper
{
g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() {
whisper_reset_timings(g_contexts[index]);
whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
whisper_print_timings(g_contexts[index]);
});
whisper_reset_timings(g_context);
whisper_full(g_context, params, pcmf32.data(), pcmf32.size());
whisper_print_timings(g_context);
}

return 0;
1 change: 1 addition & 0 deletions bindings/javascript/libwhisper.worker.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions bindings/javascript/package-tmpl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "whisper.cpp",
"version": "@PROJECT_VERSION@",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
"test": "echo \"todo: add tests\" && exit 0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ggerganov/whisper.cpp"
},
"keywords": [
"openai",
"whisper",
"speech-to-text",
"speech-recognition",
"transformer"
],
"author": "Georgi Gerganov",
"license": "MIT",
"bugs": {
"url": "https://github.com/ggerganov/whisper.cpp/issues"
},
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
}
26 changes: 26 additions & 0 deletions bindings/javascript/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "whisper.cpp",
"version": "1.0.0",
"description": "Whisper speech recognition",
"main": "whisper.js",
"scripts": {
"test": "echo \"todo: add tests\" && exit 0"
},
"repository": {
"type": "git",
"url": "git+https://github.com/ggerganov/whisper.cpp"
},
"keywords": [
"openai",
"whisper",
"speech-to-text",
"speech-recognition",
"transformer"
],
"author": "Georgi Gerganov",
"license": "MIT",
"bugs": {
"url": "https://github.com/ggerganov/whisper.cpp/issues"
},
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
}
22 changes: 21 additions & 1 deletion bindings/javascript/whisper.js

Large diffs are not rendered by default.

48 changes: 45 additions & 3 deletions examples/whisper.wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,47 @@
#
# libmain
#

set(TARGET libmain)

add_executable(${TARGET}
emscripten.cpp
)

target_link_libraries(${TARGET} PRIVATE
whisper
)

unset(EXTRA_FLAGS)

if (WHISPER_WASM_SINGLE_FILE)
set(EXTRA_FLAGS "-s SINGLE_FILE=1")
message(STATUS "Embedding WASM inside main.js")

add_custom_command(
TARGET ${TARGET} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_BINARY_DIR}/bin/libmain.js
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/whisper.wasm/main.js
)
endif()

set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
--bind \
-s USE_PTHREADS=1 \
-s PTHREAD_POOL_SIZE=8 \
-s INITIAL_MEMORY=1024MB \
-s TOTAL_MEMORY=1024MB \
-s FORCE_FILESYSTEM=1 \
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
${EXTRA_FLAGS} \
")

#
# whisper.wasm
#

set(TARGET whisper.wasm)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
108 changes: 108 additions & 0 deletions examples/whisper.wasm/emscripten.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#include "whisper.h"

#include <emscripten.h>
#include <emscripten/bind.h>

#include <vector>
#include <thread>

std::thread g_worker;

std::vector<struct whisper_context *> g_contexts(4, nullptr);

EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
if (g_worker.joinable()) {
g_worker.join();
}

for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init(path_model.c_str());
if (g_contexts[i] != nullptr) {
return i + 1;
} else {
return (size_t) 0;
}
}
}

return (size_t) 0;
}));

emscripten::function("free", emscripten::optional_override([](size_t index) {
if (g_worker.joinable()) {
g_worker.join();
}

--index;

if (index < g_contexts.size()) {
whisper_free(g_contexts[index]);
g_contexts[index] = nullptr;
}
}));

emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
if (g_worker.joinable()) {
g_worker.join();
}

--index;

if (index >= g_contexts.size()) {
return -1;
}

if (g_contexts[index] == nullptr) {
return -2;
}

struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);

params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.offset_ms = 0;

std::vector<float> pcmf32;
const int n = audio["length"].as<int>();

emscripten::val heap = emscripten::val::module_property("HEAPU8");
emscripten::val memory = heap["buffer"];

pcmf32.resize(n);

emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
memoryView.call<void>("set", audio);

// print system information
{
printf("system_info: n_threads = %d / %d | %s\n",
params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());

printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
__func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
params.n_threads, 1,
params.language,
params.translate ? "translate" : "transcribe");

printf("\n");
}

// run the worker
{
g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() {
whisper_reset_timings(g_contexts[index]);
whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
whisper_print_timings(g_contexts[index]);
});
}

return 0;
}));
}
2 changes: 1 addition & 1 deletion examples/whisper.wasm/index-tmpl.html
Original file line number Diff line number Diff line change
@@ -550,6 +550,6 @@
}
}
</script>
<script type="text/javascript" src="whisper.js"></script>
<script type="text/javascript" src="main.js"></script>
</body>
</html>
2 changes: 1 addition & 1 deletion extra/deploy-wasm.sh
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then
fi

# copy all wasm files to the node
scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/libwhisper.worker.js root@linode0:/var/www/html/whisper/
scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/libmain.worker.js root@linode0:/var/www/html/whisper/
scp bin/stream.wasm/* root@linode0:/var/www/html/whisper/stream/ && scp bin/libstream.worker.js root@linode0:/var/www/html/whisper/stream/
scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
scp bin/talk.wasm/* root@linode0:/var/www/html/whisper/talk/ && scp bin/libtalk.worker.js root@linode0:/var/www/html/whisper/talk/
10 changes: 10 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
if (EMSCRIPTEN)
#
# test-whisper-js

set(TEST_TARGET test-whisper-js)

add_test(NAME ${TEST_TARGET}
COMMAND node test-whisper.js --experimental-wasm-threads
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)

return()
endif()

58 changes: 58 additions & 0 deletions tests/test-whisper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
var factory = require('../bindings/javascript/whisper.js')

factory().then(function(whisper) {
var fs = require('fs');

// to avoid reading WAV files and depending on some 3rd-party package, we read
// 32-bit float PCM directly. to genereate it:
//
// $ ffmpeg -i samples/jfk.wav -f f32le -acodec pcm_f32le samples/jfk.pcmf32
//
let fname_wav = "../samples/jfk.pcmf32";
let fname_model = "../models/ggml-base.en.bin";

// init whisper
{
// read binary data from file
var model_data = fs.readFileSync(fname_model);
if (model_data == null) {
console.log("whisper: failed to read model file");
process.exit(1);
}

// write binary data to WASM memory
whisper.FS_createDataFile("/", "whisper.bin", model_data, true, true);

// init the model
var ret = whisper.init("whisper.bin");
if (ret == false) {
console.log('whisper: failed to init');
process.exit(1);
}
}

// transcribe wav file
{
// read raw binary data
var pcm_data = fs.readFileSync(fname_wav);
if (pcm_data == null) {
console.log("whisper: failed to read wav file");
process.exit(1);
}

// convert to 32-bit float array
var pcm = new Float32Array(pcm_data.buffer);

// transcribe
var ret = whisper.full_default(pcm, "en", false);
if (ret != 0) {
console.log("whisper: failed to transcribe");
process.exit(1);
}
}

// free memory
{
whisper.free();
}
});

0 comments on commit f309f97

Please sign in to comment.