Skip to content

Commit

Permalink
refactoring : more readable code
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Nov 25, 2022
1 parent fd11368 commit b8ce25d
Show file tree
Hide file tree
Showing 11 changed files with 280 additions and 330 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ bench
sync.sh
compile_commands.json

examples/arm_neon.h
examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ usage: ./main [options] file0.wav file1.wav ...

options:
-h, --help show this help message and exit
-s SEED, --seed SEED RNG seed (default: -1)
-t N, --threads N number of threads to use during computation (default: 4)
-p N, --processors N number of processors to use during computation (default: 1)
-ot N, --offset-t N time offset in milliseconds (default: 0)
Expand Down
16 changes: 8 additions & 8 deletions bindings/javascript/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ EMSCRIPTEN_BINDINGS(whisper) {

struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);

params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special_tokens = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.offset_ms = 0;
params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.offset_ms = 0;

std::vector<float> pcmf32;
const int n = audio["length"].as<int>();
Expand Down
21 changes: 10 additions & 11 deletions examples/bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

// command-line parameters
struct whisper_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());

std::string model = "models/ggml-base.en.bin";
std::string model = "models/ggml-base.en.bin";
};

void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
Expand All @@ -17,14 +17,13 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];

if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-m" || arg == "--model") {
params.model = argv[++i];
} else if (arg == "-h" || arg == "--help") {
if (arg == "-h" || arg == "--help") {
whisper_print_usage(argc, argv, params);
exit(0);
} else {
}
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params);
exit(0);
Expand All @@ -39,9 +38,9 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME model path (default: %s)\n", params.model.c_str());
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, "\n");
}

Expand Down
184 changes: 80 additions & 104 deletions examples/main/main.cpp

Large diffs are not rendered by default.

242 changes: 108 additions & 134 deletions examples/stream/stream.cpp

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions examples/talk.wasm/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ void talk_main(size_t index) {

struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);

wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
wparams.offset_ms = 0;
wparams.translate = false;
wparams.no_context = true;
wparams.single_segment = true;
wparams.print_realtime = false;
wparams.print_progress = false;
wparams.print_timestamps = true;
wparams.print_special_tokens = false;
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
wparams.offset_ms = 0;
wparams.translate = false;
wparams.no_context = true;
wparams.single_segment = true;
wparams.print_realtime = false;
wparams.print_progress = false;
wparams.print_timestamps = true;
wparams.print_special = false;

wparams.max_tokens = 32;
wparams.audio_ctx = 768; // partial encoder context for better performance
Expand All @@ -75,9 +75,9 @@ void talk_main(size_t index) {
// whisper context
auto & ctx = g_contexts[index];

const int64_t step_samples = 2*WHISPER_SAMPLE_RATE;
const int64_t step_ms = (step_samples*1000)/WHISPER_SAMPLE_RATE;
const int64_t step_samples = 2*WHISPER_SAMPLE_RATE;
const int64_t window_samples = 9*WHISPER_SAMPLE_RATE;
const int64_t step_ms = (step_samples*1000)/WHISPER_SAMPLE_RATE;

auto t_last = std::chrono::high_resolution_clock::now();

Expand Down Expand Up @@ -111,7 +111,7 @@ void talk_main(size_t index) {
pcmf32 = std::vector<float>(g_pcmf32.end() - std::min((int64_t) g_pcmf32.size(), window_samples), g_pcmf32.end());
}

// if energy in during last second is above threshold, then skip
// VAD: if energy in during last second is above threshold, then skip
{
float energy_all = 0.0f;
float energy_1s = 0.0f;
Expand Down
16 changes: 8 additions & 8 deletions examples/whisper.objc/whisper.objc/ViewController.m
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,14 @@ - (IBAction)onTranscribe:(id)sender {
// run the model
struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);

params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special_tokens = false;
params.translate = false;
params.language = "en";
params.n_threads = 4;
params.offset_ms = 0;
params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special = false;
params.translate = false;
params.language = "en";
params.n_threads = 4;
params.offset_ms = 0;

CFTimeInterval startTime = CACurrentMediaTime();

Expand Down
1 change: 1 addition & 0 deletions examples/whisper.wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
set(TARGET whisper.wasm)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js COPYONLY)
102 changes: 51 additions & 51 deletions whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2389,92 +2389,92 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
case WHISPER_SAMPLING_GREEDY:
{
result = {
/*.strategy =*/ WHISPER_SAMPLING_GREEDY,
/*.strategy =*/ WHISPER_SAMPLING_GREEDY,

/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
/*.n_max_text_ctx =*/ 16384,
/*.offset_ms =*/ 0,
/*.duration_ms =*/ 0,
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
/*.n_max_text_ctx =*/ 16384,
/*.offset_ms =*/ 0,
/*.duration_ms =*/ 0,

/*.translate =*/ false,
/*.no_context =*/ false,
/*.single_segment =*/ false,
/*.print_special_tokens =*/ false,
/*.print_progress =*/ true,
/*.print_realtime =*/ false,
/*.print_timestamps =*/ true,
/*.translate =*/ false,
/*.no_context =*/ false,
/*.single_segment =*/ false,
/*.print_special =*/ false,
/*.print_progress =*/ true,
/*.print_realtime =*/ false,
/*.print_timestamps =*/ true,

/*.token_timestamps =*/ false,
/*.thold_pt =*/ 0.01f,
/*.thold_ptsum =*/ 0.01f,
/*.max_len =*/ 0,
/*.max_tokens =*/ 0,
/*.token_timestamps =*/ false,
/*.thold_pt =*/ 0.01f,
/*.thold_ptsum =*/ 0.01f,
/*.max_len =*/ 0,
/*.max_tokens =*/ 0,

/*.speed_up =*/ false,
/*.audio_ctx =*/ 0,
/*.speed_up =*/ false,
/*.audio_ctx =*/ 0,

/*.prompt_tokens =*/ nullptr,
/*.prompt_n_tokens =*/ 0,
/*.prompt_tokens =*/ nullptr,
/*.prompt_n_tokens =*/ 0,

/*.language =*/ "en",
/*.language =*/ "en",

/*.greedy =*/ {
/*.greedy =*/ {
/*.n_past =*/ 0,
},

/*.beam_search =*/ {
/*.beam_search =*/ {
/*.n_past =*/ -1,
/*.beam_width =*/ -1,
/*.n_best =*/ -1,
},

/*.new_segment_callback =*/ nullptr,
/*.new_segment_callback =*/ nullptr,
/*.new_segment_callback_user_data =*/ nullptr,
};
} break;
case WHISPER_SAMPLING_BEAM_SEARCH:
{
result = {
/*.strategy =*/ WHISPER_SAMPLING_BEAM_SEARCH,
/*.strategy =*/ WHISPER_SAMPLING_BEAM_SEARCH,

/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
/*.n_max_text_ctx =*/ 16384,
/*.offset_ms =*/ 0,
/*.duration_ms =*/ 0,
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
/*.n_max_text_ctx =*/ 16384,
/*.offset_ms =*/ 0,
/*.duration_ms =*/ 0,

/*.translate =*/ false,
/*.no_context =*/ false,
/*.single_segment =*/ false,
/*.print_special_tokens =*/ false,
/*.print_progress =*/ true,
/*.print_realtime =*/ false,
/*.print_timestamps =*/ true,
/*.translate =*/ false,
/*.no_context =*/ false,
/*.single_segment =*/ false,
/*.print_special =*/ false,
/*.print_progress =*/ true,
/*.print_realtime =*/ false,
/*.print_timestamps =*/ true,

/*.token_timestamps =*/ false,
/*.thold_pt =*/ 0.01f,
/*.thold_ptsum =*/ 0.01f,
/*.max_len =*/ 0,
/*.max_tokens =*/ 0,
/*.token_timestamps =*/ false,
/*.thold_pt =*/ 0.01f,
/*.thold_ptsum =*/ 0.01f,
/*.max_len =*/ 0,
/*.max_tokens =*/ 0,

/*.speed_up =*/ false,
/*.audio_ctx =*/ 0,
/*.speed_up =*/ false,
/*.audio_ctx =*/ 0,

/*.prompt_tokens =*/ nullptr,
/*.prompt_n_tokens =*/ 0,
/*.prompt_tokens =*/ nullptr,
/*.prompt_n_tokens =*/ 0,

/*.language =*/ "en",
/*.language =*/ "en",

/*.greedy =*/ {
/*.greedy =*/ {
/*.n_past =*/ -1,
},

/*.beam_search =*/ {
/*.beam_search =*/ {
/*.n_past =*/ 0,
/*.beam_width =*/ 10,
/*.n_best =*/ 5,
},

/*.new_segment_callback =*/ nullptr,
/*.new_segment_callback =*/ nullptr,
/*.new_segment_callback_user_data =*/ nullptr,
};
} break;
Expand Down Expand Up @@ -2762,7 +2762,7 @@ int whisper_full(
// ctx->vocab.id_to_token[tokens_cur[i].id].c_str(), tokens_cur[i].p,
// ctx->vocab.id_to_token[tokens_cur[i].tid].c_str(), tokens_cur[i].pt);

if (params.print_special_tokens == false && tokens_cur[i].id >= whisper_token_eot(ctx)) {
if (params.print_special == false && tokens_cur[i].id >= whisper_token_eot(ctx)) {
} else {
text += whisper_token_to_str(ctx, tokens_cur[i].id);
}
Expand Down
2 changes: 1 addition & 1 deletion whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ extern "C" {
bool translate;
bool no_context;
bool single_segment; // force single segment output (useful for streaming)
bool print_special_tokens;
bool print_special;
bool print_progress;
bool print_realtime;
bool print_timestamps;
Expand Down

0 comments on commit b8ce25d

Please sign in to comment.