refactoring : more readable code

jb41 · Nov 25, 2022 · b8ce25d · b8ce25d
1 parent fd11368
commit b8ce25d
Show file tree

Hide file tree

Showing 11 changed files with 280 additions and 330 deletions.
diff --git a/.gitignore b/.gitignore
@@ -17,6 +17,7 @@ bench
 sync.sh
 compile_commands.json
 
+examples/arm_neon.h
 examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
 examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
 examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
diff --git a/README.md b/README.md
@@ -99,7 +99,6 @@ usage: ./main [options] file0.wav file1.wav ...
 
 options:
   -h,       --help           show this help message and exit
-  -s SEED,  --seed SEED      RNG seed (default: -1)
   -t N,     --threads N      number of threads to use during computation (default: 4)
   -p N,     --processors N   number of processors to use during computation (default: 1)
   -ot N,    --offset-t N     time offset in milliseconds (default: 0)

diff --git a/bindings/javascript/emscripten.cpp b/bindings/javascript/emscripten.cpp
@@ -46,14 +46,14 @@ EMSCRIPTEN_BINDINGS(whisper) {
 
         struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
 
-        params.print_realtime       = true;
-        params.print_progress       = false;
-        params.print_timestamps     = true;
-        params.print_special_tokens = false;
-        params.translate            = translate;
-        params.language             = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
-        params.n_threads            = std::min(8, (int) std::thread::hardware_concurrency());
-        params.offset_ms            = 0;
+        params.print_realtime   = true;
+        params.print_progress   = false;
+        params.print_timestamps = true;
+        params.print_special    = false;
+        params.translate        = translate;
+        params.language         = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
+        params.n_threads        = std::min(8, (int) std::thread::hardware_concurrency());
+        params.offset_ms        = 0;
 
         std::vector<float> pcmf32;
         const int n = audio["length"].as<int>();

diff --git a/examples/bench/bench.cpp b/examples/bench/bench.cpp
@@ -6,9 +6,9 @@
 
 // command-line parameters
 struct whisper_params {
-    int32_t n_threads   = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
 
-    std::string model     = "models/ggml-base.en.bin";
+    std::string model = "models/ggml-base.en.bin";
 };
 
 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@@ -17,14 +17,13 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
     for (int i = 1; i < argc; i++) {
         std::string arg = argv[i];
 
-        if (arg == "-t" || arg == "--threads") {
-            params.n_threads = std::stoi(argv[++i]);
-        } else if (arg == "-m" || arg == "--model") {
-            params.model = argv[++i];
-        } else if (arg == "-h" || arg == "--help") {
+        if (arg == "-h" || arg == "--help") {
             whisper_print_usage(argc, argv, params);
             exit(0);
-        } else {
+        }
+        else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
+        else if (arg == "-m" || arg == "--model")   { params.model     = argv[++i]; }
+        else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             whisper_print_usage(argc, argv, params);
             exit(0);
@@ -39,9 +38,9 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
     fprintf(stderr, "usage: %s [options]\n", argv[0]);
     fprintf(stderr, "\n");
     fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help           show this help message and exit\n");
-    fprintf(stderr, "  -t N,     --threads N      number of threads to use during computation (default: %d)\n", params.n_threads);
-    fprintf(stderr, "  -m FNAME, --model FNAME    model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "  -h,       --help        [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
+    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
     fprintf(stderr, "\n");
 }
 

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp
diff --git a/examples/talk.wasm/emscripten.cpp b/examples/talk.wasm/emscripten.cpp
@@ -51,15 +51,15 @@ void talk_main(size_t index) {
 
     struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
 
-    wparams.n_threads            = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-    wparams.offset_ms            = 0;
-    wparams.translate            = false;
-    wparams.no_context           = true;
-    wparams.single_segment       = true;
-    wparams.print_realtime       = false;
-    wparams.print_progress       = false;
-    wparams.print_timestamps     = true;
-    wparams.print_special_tokens = false;
+    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
+    wparams.offset_ms        = 0;
+    wparams.translate        = false;
+    wparams.no_context       = true;
+    wparams.single_segment   = true;
+    wparams.print_realtime   = false;
+    wparams.print_progress   = false;
+    wparams.print_timestamps = true;
+    wparams.print_special    = false;
 
     wparams.max_tokens           = 32;
     wparams.audio_ctx            = 768; // partial encoder context for better performance
@@ -75,9 +75,9 @@ void talk_main(size_t index) {
     // whisper context
     auto & ctx = g_contexts[index];
 
-    const int64_t step_samples = 2*WHISPER_SAMPLE_RATE;
-    const int64_t step_ms = (step_samples*1000)/WHISPER_SAMPLE_RATE;
+    const int64_t step_samples   = 2*WHISPER_SAMPLE_RATE;
     const int64_t window_samples = 9*WHISPER_SAMPLE_RATE;
+    const int64_t step_ms        = (step_samples*1000)/WHISPER_SAMPLE_RATE;
 
     auto t_last = std::chrono::high_resolution_clock::now();
 
@@ -111,7 +111,7 @@ void talk_main(size_t index) {
             pcmf32 = std::vector<float>(g_pcmf32.end() - std::min((int64_t) g_pcmf32.size(), window_samples), g_pcmf32.end());
         }
 
-        // if energy in during last second is above threshold, then skip
+        // VAD: if energy in during last second is above threshold, then skip
         {
             float energy_all = 0.0f;
             float energy_1s  = 0.0f;

diff --git a/examples/whisper.objc/whisper.objc/ViewController.m b/examples/whisper.objc/whisper.objc/ViewController.m
@@ -161,14 +161,14 @@ - (IBAction)onTranscribe:(id)sender {
     // run the model
     struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
 
-    params.print_realtime       = true;
-    params.print_progress       = false;
-    params.print_timestamps     = true;
-    params.print_special_tokens = false;
-    params.translate            = false;
-    params.language             = "en";
-    params.n_threads            = 4;
-    params.offset_ms            = 0;
+    params.print_realtime   = true;
+    params.print_progress   = false;
+    params.print_timestamps = true;
+    params.print_special    = false;
+    params.translate        = false;
+    params.language         = "en";
+    params.n_threads        = 4;
+    params.offset_ms        = 0;
 
     CFTimeInterval startTime = CACurrentMediaTime();
 

diff --git a/examples/whisper.wasm/CMakeLists.txt b/examples/whisper.wasm/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(TARGET whisper.wasm)
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js          ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
 configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js  COPYONLY)
diff --git a/whisper.cpp b/whisper.cpp
@@ -2389,92 +2389,92 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
         case WHISPER_SAMPLING_GREEDY:
             {
                 result = {
-                    /*.strategy             =*/ WHISPER_SAMPLING_GREEDY,
+                    /*.strategy         =*/ WHISPER_SAMPLING_GREEDY,
 
-                    /*.n_threads            =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
-                    /*.n_max_text_ctx       =*/ 16384,
-                    /*.offset_ms            =*/ 0,
-                    /*.duration_ms          =*/ 0,
+                    /*.n_threads        =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+                    /*.n_max_text_ctx   =*/ 16384,
+                    /*.offset_ms        =*/ 0,
+                    /*.duration_ms      =*/ 0,
 
-                    /*.translate            =*/ false,
-                    /*.no_context           =*/ false,
-                    /*.single_segment       =*/ false,
-                    /*.print_special_tokens =*/ false,
-                    /*.print_progress       =*/ true,
-                    /*.print_realtime       =*/ false,
-                    /*.print_timestamps     =*/ true,
+                    /*.translate        =*/ false,
+                    /*.no_context       =*/ false,
+                    /*.single_segment   =*/ false,
+                    /*.print_special    =*/ false,
+                    /*.print_progress   =*/ true,
+                    /*.print_realtime   =*/ false,
+                    /*.print_timestamps =*/ true,
 
-                    /*.token_timestamps     =*/ false,
-                    /*.thold_pt             =*/ 0.01f,
-                    /*.thold_ptsum          =*/ 0.01f,
-                    /*.max_len              =*/ 0,
-                    /*.max_tokens           =*/ 0,
+                    /*.token_timestamps =*/ false,
+                    /*.thold_pt         =*/ 0.01f,
+                    /*.thold_ptsum      =*/ 0.01f,
+                    /*.max_len          =*/ 0,
+                    /*.max_tokens       =*/ 0,
 
-                    /*.speed_up             =*/ false,
-                    /*.audio_ctx            =*/ 0,
+                    /*.speed_up         =*/ false,
+                    /*.audio_ctx        =*/ 0,
 
-                    /*.prompt_tokens        =*/ nullptr,
-                    /*.prompt_n_tokens      =*/ 0,
+                    /*.prompt_tokens    =*/ nullptr,
+                    /*.prompt_n_tokens  =*/ 0,
 
-                    /*.language             =*/ "en",
+                    /*.language         =*/ "en",
 
-                    /*.greedy               =*/ {
+                    /*.greedy           =*/ {
                         /*.n_past =*/ 0,
                     },
 
-                    /*.beam_search          =*/ {
+                    /*.beam_search      =*/ {
                         /*.n_past     =*/ -1,
                         /*.beam_width =*/ -1,
                         /*.n_best     =*/ -1,
                     },
 
-                    /*.new_segment_callback =*/ nullptr,
+                    /*.new_segment_callback           =*/ nullptr,
                     /*.new_segment_callback_user_data =*/ nullptr,
                 };
             } break;
         case WHISPER_SAMPLING_BEAM_SEARCH:
             {
                 result = {
-                    /*.strategy             =*/ WHISPER_SAMPLING_BEAM_SEARCH,
+                    /*.strategy         =*/ WHISPER_SAMPLING_BEAM_SEARCH,
 
-                    /*.n_threads            =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
-                    /*.n_max_text_ctx       =*/ 16384,
-                    /*.offset_ms            =*/ 0,
-                    /*.duration_ms          =*/ 0,
+                    /*.n_threads        =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+                    /*.n_max_text_ctx   =*/ 16384,
+                    /*.offset_ms        =*/ 0,
+                    /*.duration_ms      =*/ 0,
 
-                    /*.translate            =*/ false,
-                    /*.no_context           =*/ false,
-                    /*.single_segment       =*/ false,
-                    /*.print_special_tokens =*/ false,
-                    /*.print_progress       =*/ true,
-                    /*.print_realtime       =*/ false,
-                    /*.print_timestamps     =*/ true,
+                    /*.translate        =*/ false,
+                    /*.no_context       =*/ false,
+                    /*.single_segment   =*/ false,
+                    /*.print_special    =*/ false,
+                    /*.print_progress   =*/ true,
+                    /*.print_realtime   =*/ false,
+                    /*.print_timestamps =*/ true,
 
-                    /*.token_timestamps     =*/ false,
-                    /*.thold_pt             =*/ 0.01f,
-                    /*.thold_ptsum          =*/ 0.01f,
-                    /*.max_len              =*/ 0,
-                    /*.max_tokens           =*/ 0,
+                    /*.token_timestamps =*/ false,
+                    /*.thold_pt         =*/ 0.01f,
+                    /*.thold_ptsum      =*/ 0.01f,
+                    /*.max_len          =*/ 0,
+                    /*.max_tokens       =*/ 0,
 
-                    /*.speed_up             =*/ false,
-                    /*.audio_ctx            =*/ 0,
+                    /*.speed_up         =*/ false,
+                    /*.audio_ctx        =*/ 0,
 
-                    /*.prompt_tokens        =*/ nullptr,
-                    /*.prompt_n_tokens      =*/ 0,
+                    /*.prompt_tokens    =*/ nullptr,
+                    /*.prompt_n_tokens  =*/ 0,
 
-                    /*.language             =*/ "en",
+                    /*.language         =*/ "en",
 
-                    /*.greedy               =*/ {
+                    /*.greedy           =*/ {
                         /*.n_past =*/ -1,
                     },
 
-                    /*.beam_search          =*/ {
+                    /*.beam_search      =*/ {
                         /*.n_past     =*/ 0,
                         /*.beam_width =*/ 10,
                         /*.n_best     =*/ 5,
                     },
 
-                    /*.new_segment_callback =*/ nullptr,
+                    /*.new_segment_callback           =*/ nullptr,
                     /*.new_segment_callback_user_data =*/ nullptr,
                 };
             } break;
@@ -2762,7 +2762,7 @@ int whisper_full(
                 //        ctx->vocab.id_to_token[tokens_cur[i].id].c_str(), tokens_cur[i].p,
                 //        ctx->vocab.id_to_token[tokens_cur[i].tid].c_str(), tokens_cur[i].pt);
 
-                if (params.print_special_tokens == false && tokens_cur[i].id >= whisper_token_eot(ctx)) {
+                if (params.print_special == false && tokens_cur[i].id >= whisper_token_eot(ctx)) {
                 } else {
                     text += whisper_token_to_str(ctx, tokens_cur[i].id);
                 }

diff --git a/whisper.h b/whisper.h
@@ -192,7 +192,7 @@ extern "C" {
         bool translate;
         bool no_context;
         bool single_segment; // force single segment output (useful for streaming)
-        bool print_special_tokens;
+        bool print_special;
         bool print_progress;
         bool print_realtime;
         bool print_timestamps;