diff --git a/src/ragbuilder/executor.py b/src/ragbuilder/executor.py index 1b69e10..fa397fe 100644 --- a/src/ragbuilder/executor.py +++ b/src/ragbuilder/executor.py @@ -86,6 +86,7 @@ def rag_builder_bayes_optmization(**kwargs): max_chunk_size=kwargs.get('max_chunk_size', 1000) hf_embedding=kwargs.get('hf_embedding') hf_llm=kwargs.get('hf_llm') + num_runs=kwargs.get('num_runs') test_data=kwargs['test_data'] #loader_kwargs ={'source':'url','input_path': url1}, test_df=pd.read_csv(test_data) test_ds = Dataset.from_pandas(test_df) @@ -104,10 +105,10 @@ def rag_builder_bayes_optmization(**kwargs): logger.info(f"Number of RAG combinations : {cnt_combos}") configs_evaluated=dict() - if cnt_combos < BAYESIAN_RUNS: + if cnt_combos < num_runs: total_runs=cnt_combos else: - total_runs = BAYESIAN_RUNS + len(configs_to_run) + total_runs = num_runs + len(configs_to_run) progress_state.set_total_runs(total_runs) # Run Templates first if templates have been selected @@ -183,7 +184,7 @@ def objective(**params): # Run Bayesian optimization logger.info(f"Running Bayesian optimization...") - result = gp_minimize(objective, space, n_calls=BAYESIAN_RUNS, random_state=42) #, callback=DeltaXStopper(1e-8)) + result = gp_minimize(objective, space, n_calls=num_runs, random_state=42) #, callback=DeltaXStopper(1e-8)) logger.info(f"Completed Bayesian optimization...") best_params = result.x diff --git a/src/ragbuilder/ragbuilder.py b/src/ragbuilder/ragbuilder.py index 857cfa2..c2b7a2c 100644 --- a/src/ragbuilder/ragbuilder.py +++ b/src/ragbuilder/ragbuilder.py @@ -291,6 +291,7 @@ class ProjectData(BaseModel): criticLLM: Optional[str] = Field(default=None) generatorLLM: Optional[str] = Field(default=None) embedding: Optional[str] = Field(default=None) + numRuns: Optional[str] = Field(default=None) @app.post("/rbuilder") def rbuilder_route(project_data: ProjectData, db: sqlite3.Connection = Depends(get_db)): @@ -429,6 +430,7 @@ def parse_config(config: dict, db: sqlite3.Connection): try: if optimization=='bayesianOptimization': logger.info(f"Using Bayesian optimization to find optimal RAG configs...") + num_runs = int(config.get("numRuns", 50)) res = rag_builder_bayes_optmization( run_id=run_id, compare_templates=compare_templates, @@ -440,6 +442,7 @@ def parse_config(config: dict, db: sqlite3.Connection): max_chunk_size=max_chunk_size, hf_embedding=hf_embedding, hf_llm=hf_llm, + num_runs=num_runs, disabled_opts=disabled_opts ) elif optimization=='fullParameterSearch' : diff --git a/src/ragbuilder/static/main.js b/src/ragbuilder/static/main.js index 02f02d2..be67e34 100644 --- a/src/ragbuilder/static/main.js +++ b/src/ragbuilder/static/main.js @@ -62,6 +62,15 @@ $(document).ready(function () { } }); + // Show or hide the number of runs input based on the selected optimization option + $('input[name="optimization"]').change(function () { + if ($('#bayesianOptimization').is(':checked')) { + $('#numRunsContainer').show(); + } else { + $('#numRunsContainer').hide(); + } + }); + $('#nextStep1').click(function () { const sourceData = $('#sourceData').val(); $.ajax({ @@ -305,6 +314,10 @@ $(document).ready(function () { embedding: $('#embedding').val() }; } + + if (projectData.optimization === "bayesianOptimization") { + projectData.numRuns = $('#numRuns').val(); + } console.log(JSON.stringify(projectData)); diff --git a/src/ragbuilder/templates/layouts.html b/src/ragbuilder/templates/layouts.html index e7fcade..8620c2d 100644 --- a/src/ragbuilder/templates/layouts.html +++ b/src/ragbuilder/templates/layouts.html @@ -247,12 +247,16 @@
Advanced Optimization Settings
Ragbuilder uses Bayesian optimization for efficient hyperparameter tuning. Alternatively, you can generate and assess all parameter combinations for exhaustive insights, but this approach is long-running, costly, and computationally intensive.

- + +
+ + +
- - + +