Node.js package (ggerganov#260)

* npm : preparing infra for node package * npm : package infra ready * npm : initial version ready * npm : change name to whisper.cpp whisper.js is taken
James3039 · Dec 12, 2022 · f309f97 · f309f97
1 parent aa6adda
commit f309f97
Showing 15 changed files with 373 additions and 67 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,6 +14,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
     if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
         configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
     endif()
+    configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
 else()
     set(WHISPER_STANDALONE OFF)
 endif()
@@ -151,8 +152,7 @@ else()
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
     else()
         if (EMSCRIPTEN)
-            # we require support for WASM SIMD 128-bit
-            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -msimd128")
+            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
         else()
             if(NOT WHISPER_NO_AVX)
@@ -203,6 +203,10 @@ if (BUILD_SHARED_LIBS)
         )
 endif()
 
+if (EMSCRIPTEN)
+    set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
+endif()
+
 target_compile_definitions(${TARGET} PUBLIC
     ${WHISPER_EXTRA_FLAGS}
     )
@@ -222,13 +226,11 @@ add_subdirectory(bindings)
 # programs, examples and tests
 #
 
-if (WHISPER_STANDALONE)
-    if (WHISPER_BUILD_TESTS)
-        enable_testing()
-        add_subdirectory(tests)
-    endif ()
-
-    if (WHISPER_BUILD_EXAMPLES)
-        add_subdirectory(examples)
-    endif()
+if (WHISPER_BUILD_TESTS)
+    enable_testing()
+    add_subdirectory(tests)
 endif ()
+
+if (WHISPER_BUILD_EXAMPLES)
+    add_subdirectory(examples)
+endif()
diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
@@ -1,3 +1,19 @@
 if (EMSCRIPTEN)
     add_subdirectory(javascript)
+
+    add_custom_command(
+        OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
+        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
+        COMMAND npm publish
+        COMMAND touch publish.log
+        COMMENT "Publishing npm module v${PROJECT_VERSION}"
+        VERBATIM
+        )
+
+    add_custom_target(publish-npm
+        DEPENDS javascript/publish.log
+        )
 endif()
diff --git a/bindings/javascript/CMakeLists.txt b/bindings/javascript/CMakeLists.txt
@@ -20,15 +20,22 @@ if (WHISPER_WASM_SINGLE_FILE)
         ${CMAKE_BINARY_DIR}/bin/libwhisper.js
         ${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
         )
+
+    add_custom_command(
+        TARGET ${TARGET} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+        ${CMAKE_BINARY_DIR}/bin/libwhisper.worker.js
+        ${CMAKE_CURRENT_SOURCE_DIR}/libwhisper.worker.js
+        )
 endif()
 
 set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
     --bind \
+    -s MODULARIZE=1 \
+    -s EXPORT_NAME=\"'whisper_factory'\" \
+    -s FORCE_FILESYSTEM=1 \
     -s USE_PTHREADS=1 \
     -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1610612736 \
-    -s TOTAL_MEMORY=1610612736 \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
+    -s ALLOW_MEMORY_GROWTH=1 \
     ${EXTRA_FLAGS} \
     ")
diff --git a/bindings/javascript/README.md b/bindings/javascript/README.md
@@ -0,0 +1,5 @@
+# whisper.cpp
+
+Node.js package for Whisper speech recognition
+
+For sample usage check [tests/test-whisper.js](/tests/test-whisper.js)
diff --git a/bindings/javascript/emscripten.cpp b/bindings/javascript/emscripten.cpp
@@ -1,71 +1,56 @@
+//
+// This is the Javascript API of whisper.cpp
+//
+// Very crude at the moment.
+// Feel free to contribute and make this better!
+//
+// See the tests/test-whisper.js for sample usage
+//
+
 #include "whisper.h"
 
 #include <emscripten.h>
 #include <emscripten/bind.h>
 
-#include <vector>
 #include <thread>
+#include <vector>
 
-std::thread g_worker;
-
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
+struct whisper_context * g_context;
 
 EMSCRIPTEN_BINDINGS(whisper) {
     emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        if (g_worker.joinable()) {
-            g_worker.join();
-        }
-
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init(path_model.c_str());
-                if (g_contexts[i] != nullptr) {
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
+        if (g_context == nullptr) {
+            g_context = whisper_init(path_model.c_str());
+            if (g_context != nullptr) {
+                return true;
+            } else {
+                return false;
             }
         }
 
-        return (size_t) 0;
+        return false;
     }));
 
-    emscripten::function("free", emscripten::optional_override([](size_t index) {
-        if (g_worker.joinable()) {
-            g_worker.join();
-        }
-
-        --index;
-
-        if (index < g_contexts.size()) {
-            whisper_free(g_contexts[index]);
-            g_contexts[index] = nullptr;
+    emscripten::function("free", emscripten::optional_override([]() {
+        if (g_context) {
+            whisper_free(g_context);
+            g_context = nullptr;
         }
     }));
 
-    emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
-        if (g_worker.joinable()) {
-            g_worker.join();
-        }
-
-        --index;
-
-        if (index >= g_contexts.size()) {
+    emscripten::function("full_default", emscripten::optional_override([](const emscripten::val & audio, const std::string & lang, bool translate) {
+        if (g_context == nullptr) {
             return -1;
         }
 
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
         struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
 
         params.print_realtime   = true;
         params.print_progress   = false;
         params.print_timestamps = true;
         params.print_special    = false;
         params.translate        = translate;
-        params.language         = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
+        params.language         = whisper_is_multilingual(g_context) ? lang.c_str() : "en";
         params.n_threads        = std::min(8, (int) std::thread::hardware_concurrency());
         params.offset_ms        = 0;
 
@@ -82,9 +67,11 @@ EMSCRIPTEN_BINDINGS(whisper) {
 
         // print system information
         {
+            printf("\n");
             printf("system_info: n_threads = %d / %d | %s\n",
                     params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
 
+            printf("\n");
             printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
                     __func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
                     params.n_threads, 1,
@@ -94,13 +81,11 @@ EMSCRIPTEN_BINDINGS(whisper) {
             printf("\n");
         }
 
-        // run the worker
+        // run whisper
         {
-            g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() {
-                whisper_reset_timings(g_contexts[index]);
-                whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
-                whisper_print_timings(g_contexts[index]);
-            });
+            whisper_reset_timings(g_context);
+            whisper_full(g_context, params, pcmf32.data(), pcmf32.size());
+            whisper_print_timings(g_context);
         }
 
         return 0;

diff --git a/bindings/javascript/libwhisper.worker.js b/bindings/javascript/libwhisper.worker.js
diff --git a/bindings/javascript/package-tmpl.json b/bindings/javascript/package-tmpl.json
@@ -0,0 +1,26 @@
+{
+  "name": "whisper.cpp",
+  "version": "@PROJECT_VERSION@",
+  "description": "Whisper speech recognition",
+  "main": "whisper.js",
+  "scripts": {
+    "test": "echo \"todo: add tests\" && exit 0"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ggerganov/whisper.cpp"
+  },
+  "keywords": [
+    "openai",
+    "whisper",
+    "speech-to-text",
+    "speech-recognition",
+    "transformer"
+  ],
+  "author": "Georgi Gerganov",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/ggerganov/whisper.cpp/issues"
+  },
+  "homepage": "https://github.com/ggerganov/whisper.cpp#readme"
+}
diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "whisper.cpp",
+  "version": "1.0.0",
+  "description": "Whisper speech recognition",
+  "main": "whisper.js",
+  "scripts": {
+    "test": "echo \"todo: add tests\" && exit 0"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ggerganov/whisper.cpp"
+  },
+  "keywords": [
+    "openai",
+    "whisper",
+    "speech-to-text",
+    "speech-recognition",
+    "transformer"
+  ],
+  "author": "Georgi Gerganov",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/ggerganov/whisper.cpp/issues"
+  },
+  "homepage": "https://github.com/ggerganov/whisper.cpp#readme"
+}
diff --git a/bindings/javascript/whisper.js b/bindings/javascript/whisper.js
diff --git a/examples/whisper.wasm/CMakeLists.txt b/examples/whisper.wasm/CMakeLists.txt
@@ -1,5 +1,47 @@
+#
+# libmain
+#
+
+set(TARGET libmain)
+
+add_executable(${TARGET}
+    emscripten.cpp
+    )
+
+target_link_libraries(${TARGET} PRIVATE
+    whisper
+    )
+
+unset(EXTRA_FLAGS)
+
+if (WHISPER_WASM_SINGLE_FILE)
+    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
+    message(STATUS "Embedding WASM inside main.js")
+
+    add_custom_command(
+        TARGET ${TARGET} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+        ${CMAKE_BINARY_DIR}/bin/libmain.js
+        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/whisper.wasm/main.js
+        )
+endif()
+
+set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
+    --bind \
+    -s USE_PTHREADS=1 \
+    -s PTHREAD_POOL_SIZE=8 \
+    -s INITIAL_MEMORY=1024MB \
+    -s TOTAL_MEMORY=1024MB \
+    -s FORCE_FILESYSTEM=1 \
+    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
+    ${EXTRA_FLAGS} \
+    ")
+
+#
+# whisper.wasm
+#
+
 set(TARGET whisper.wasm)
 
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js          ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
-configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js  COPYONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)