Skip to content

Commit

Permalink
wip : initial WASM port
Browse files Browse the repository at this point in the history
Works but it is very slow because no SIMD is used.
For example, jfk.wav is processed in ~23 seconds using "tiny.en" model
  • Loading branch information
ggerganov committed Oct 22, 2022
1 parent 7d0dee7 commit e905c6f
Show file tree
Hide file tree
Showing 14 changed files with 441 additions and 24 deletions.
80 changes: 57 additions & 23 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,37 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(WHISPER_STANDALONE ON)
include(cmake/GitVars.cmake)
include(cmake/BuildTypes.cmake)
else()
set(WHISPER_STANDALONE OFF)
endif()

if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
else()
set(BUILD_SHARED_LIBS_DEFAULT ON)
endif()
endif()

# options

option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})

option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)

option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)

option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})

option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)

Expand Down Expand Up @@ -69,16 +86,6 @@ if (APPLE AND NOT WHISPER_NO_ACCELERATE)
endif()
endif()

if (WHISPER_SUPPORT_SDL2)
# SDL2
find_package(SDL2 REQUIRED)

string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)

message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()

# compiler flags

if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
Expand Down Expand Up @@ -120,6 +127,11 @@ else()
endif()
endif()

if (EMSCRIPTEN)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
endif()

# whisper - this is the main library of the project

set(TARGET whisper)
Expand Down Expand Up @@ -154,24 +166,46 @@ install(TARGETS ${TARGET}
ARCHIVE DESTINATION lib/static
)

# bindings

add_subdirectory(bindings)

# programs, examples and tests

if (WHISPER_STANDALONE)
# main
set(TARGET main)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})

if (WHISPER_SUPPORT_SDL2)
# stream
set(TARGET stream)
add_executable(${TARGET} stream.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endif ()
if (NOT EMSCRIPTEN)
# TODO: move to examples
# main
set(TARGET main)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})

# TODO: move to examples
if (WHISPER_SUPPORT_SDL2)
if (WHISPER_SUPPORT_SDL2)
# SDL2
find_package(SDL2 REQUIRED)

string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)

message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()

# stream
set(TARGET stream)
add_executable(${TARGET} stream.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endif ()
endif()

if (WHISPER_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif ()

if (WHISPER_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
endif ()
3 changes: 3 additions & 0 deletions bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
if (EMSCRIPTEN)
add_subdirectory(javascript)
endif()
1 change: 1 addition & 0 deletions bindings/javascript/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
publish.log
34 changes: 34 additions & 0 deletions bindings/javascript/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
set(TARGET libwhisper)

add_executable(${TARGET}
emscripten.cpp
)

target_link_libraries(${TARGET} PRIVATE
whisper
)

unset(EXTRA_FLAGS)
if (WHISPER_WASM_SINGLE_FILE)
set(EXTRA_FLAGS "-s SINGLE_FILE=1")
message(STATUS "Embedding WASM inside whisper.js")

add_custom_command(
TARGET libwhisper POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_BINARY_DIR}/bin/libwhisper.js
${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
)
endif()

set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
--bind \
-s MODULARIZE=1 \
-s ASSERTIONS=1 \
-s USE_PTHREADS=1 \
-s PTHREAD_POOL_SIZE=8 \
-s TOTAL_MEMORY=536870912 \
-s FORCE_FILESYSTEM=1 \
-s EXPORT_NAME=\"'whisper_factory'\" \
${EXTRA_FLAGS} \
")
70 changes: 70 additions & 0 deletions bindings/javascript/emscripten.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "whisper.h"

#include <emscripten.h>
#include <emscripten/bind.h>

#include <vector>

std::vector<struct whisper_context *> g_contexts(4, nullptr);

EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init(path_model.c_str());
return i + 1;
}
}

return (size_t) 0;
}));

emscripten::function("free", emscripten::optional_override([](size_t index) {
--index;

if (index < g_contexts.size()) {
whisper_free(g_contexts[index]);
g_contexts[index] = nullptr;
}
}));

emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
--index;

if (index >= g_contexts.size()) {
return -1;
}

if (g_contexts[index] == nullptr) {
return -2;
}

struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);

params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special_tokens = false;
params.translate = false;
params.language = "en";
params.n_threads = 4;
params.offset_ms = 0;

std::vector<float> pcmf32;
const int n = audio["length"].as<int>();

emscripten::val heap = emscripten::val::module_property("HEAPU8");
emscripten::val memory = heap["buffer"];

pcmf32.resize(n);

emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
memoryView.call<void>("set", audio);

int ret = whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());

whisper_print_timings(g_contexts[index]);

return ret;
}));
}
21 changes: 21 additions & 0 deletions bindings/javascript/whisper.js

Large diffs are not rendered by default.

54 changes: 54 additions & 0 deletions cmake/BuildTypes.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Add new build types

# ReleaseGG - Release with enabled asserts

SET(CMAKE_CXX_FLAGS_RELEASEGG
"-O3"
CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
FORCE )
SET(CMAKE_C_FLAGS_RELEASEGG
"-O3"
CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
FORCE )
SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
""
CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
FORCE )
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
""
CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
FORCE )
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_RELEASEGG
CMAKE_C_FLAGS_RELEASEGG
CMAKE_EXE_LINKER_FLAGS_RELEASEGG
CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )

# RelWithDebInfoGG - RelWithDebInfo with enabled asserts

SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
"-O2 -g"
CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
"-O2 -g"
CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
""
CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
""
CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
FORCE )
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
CMAKE_C_FLAGS_RELWITHDEBINFOGG
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
endif()
22 changes: 22 additions & 0 deletions cmake/GitVars.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
find_package(Git)

# the commit's SHA1
execute_process(COMMAND
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_SHA1
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

# the date of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

# the subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
14 changes: 14 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# dependencies

find_package(Threads REQUIRED)

# third-party

#add_subdirectory(third-party)

# examples

if (EMSCRIPTEN)
add_subdirectory(whisper.wasm)
else()
endif()
4 changes: 4 additions & 0 deletions examples/whisper.wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
set(TARGET whisper.wasm)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js COPYONLY)
3 changes: 3 additions & 0 deletions examples/whisper.wasm/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# whisper.wasm

Live demo: https://whisper.ggerganov.com
Loading

0 comments on commit e905c6f

Please sign in to comment.