Skip to content

Commit

Permalink
feat: use unified pickle cacher & move llm config into a isolated con…
Browse files Browse the repository at this point in the history
…fig (microsoft#424)

* simplify RDAgent conf

* add unified cacher(untested)

* fix small bugs

* fix a bug

* fix a small bug in runner

* use hash_key = None to skip cache

* fix CI

* in factor execution, ignore cache when raise exception

* add file locker to avoid mp calling

* fix CI

* use function __module__ name as folder in cache
  • Loading branch information
peteryang1 authored Oct 14, 2024
1 parent 52f30a6 commit 2879ecf
Show file tree
Hide file tree
Showing 25 changed files with 299 additions and 346 deletions.
2 changes: 0 additions & 2 deletions docs/installation_and_configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,6 @@ Configuration List
+------------------------------+--------------------------------------------------+-------------------------+
| prompt_cache_path | Path to prompt cache | ./prompt_cache.db |
+------------------------------+--------------------------------------------------+-------------------------+
| session_cache_folder_location| Path to session cache | ./session_cache_folder |
+------------------------------+--------------------------------------------------+-------------------------+
| max_past_message_include | Maximum number of past messages to include | 10 |
+------------------------------+--------------------------------------------------+-------------------------+

Expand Down
2 changes: 1 addition & 1 deletion docs/scens/data_agent_fin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,6 @@ The following environment variables can be set in the `.env` file to customize t

.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorImplementSettings
:settings-show-field-summary: False
:members: coder_use_cache, data_folder, data_folder_debug, cache_location, enable_execution_cache, file_based_execution_timeout, select_method, select_threshold, max_loop, knowledge_base_path, new_knowledge_base_path
:members: coder_use_cache, data_folder, data_folder_debug, file_based_execution_timeout, select_method, select_threshold, max_loop, knowledge_base_path, new_knowledge_base_path
:exclude-members: Config, fail_task_trial_limit, v1_query_former_trace_limit, v1_query_similar_success_limit, v2_query_component_limit, v2_query_error_limit, v2_query_former_trace_limit, v2_error_summary, v2_knowledge_sampler
:no-index:
2 changes: 1 addition & 1 deletion docs/scens/data_copilot_fin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,6 @@ The following environment variables can be set in the `.env` file to customize t

.. autopydantic_settings:: rdagent.components.coder.factor_coder.config.FactorImplementSettings
:settings-show-field-summary: False
:members: coder_use_cache, data_folder, data_folder_debug, cache_location, enable_execution_cache, file_based_execution_timeout, select_method, select_threshold, max_loop, knowledge_base_path, new_knowledge_base_path
:members: coder_use_cache, data_folder, data_folder_debug, file_based_execution_timeout, select_method, select_threshold, max_loop, knowledge_base_path, new_knowledge_base_path
:exclude-members: Config, python_bin, fail_task_trial_limit, v1_query_former_trace_limit, v1_query_similar_success_limit, v2_query_component_limit, v2_query_error_limit, v2_query_former_trace_limit, v2_error_summary, v2_knowledge_sampler
:no-index:
5 changes: 3 additions & 2 deletions rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from rdagent.core.prompts import Prompts
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_conf import LLM_SETTINGS
from rdagent.oai.llm_utils import APIBackend

evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
Expand Down Expand Up @@ -118,7 +119,7 @@ def evaluate(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
> RD_AGENT_SETTINGS.chat_token_limit
> LLM_SETTINGS.chat_token_limit
):
execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :]
else:
Expand Down Expand Up @@ -521,7 +522,7 @@ def evaluate(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
> RD_AGENT_SETTINGS.chat_token_limit
> LLM_SETTINGS.chat_token_limit
):
execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :]
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from rdagent.core.experiment import Workspace
from rdagent.core.prompts import Prompts
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.oai.llm_conf import LLM_SETTINGS
from rdagent.oai.llm_utils import APIBackend

if TYPE_CHECKING:
Expand Down Expand Up @@ -160,7 +161,7 @@ def implement_one_factor(
session.build_chat_completion_message_and_calculate_token(
user_prompt,
)
< RD_AGENT_SETTINGS.chat_token_limit
< LLM_SETTINGS.chat_token_limit
):
break
elif len(queried_former_failed_knowledge_to_render) > 1:
Expand Down Expand Up @@ -281,7 +282,7 @@ def implement_one_factor(
)
if (
session_summary.build_chat_completion_message_and_calculate_token(error_summary_user_prompt)
< RD_AGENT_SETTINGS.chat_token_limit
< LLM_SETTINGS.chat_token_limit
):
break
elif len(queried_similar_error_knowledge_to_render) > 0:
Expand Down Expand Up @@ -310,7 +311,7 @@ def implement_one_factor(
session.build_chat_completion_message_and_calculate_token(
user_prompt,
)
< RD_AGENT_SETTINGS.chat_token_limit
< LLM_SETTINGS.chat_token_limit
):
break
elif len(queried_former_failed_knowledge_to_render) > 1:
Expand Down
4 changes: 2 additions & 2 deletions rdagent/components/coder/factor_coder/CoSTEER/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import (
FactorEvolvingItem,
)
from rdagent.core.conf import RD_AGENT_SETTINGS
from rdagent.core.prompts import Prompts
from rdagent.core.scenario import Scenario
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_conf import LLM_SETTINGS
from rdagent.oai.llm_utils import APIBackend

scheduler_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
Expand Down Expand Up @@ -68,7 +68,7 @@ def LLMSelect(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
< RD_AGENT_SETTINGS.chat_token_limit
< LLM_SETTINGS.chat_token_limit
):
break

Expand Down
6 changes: 0 additions & 6 deletions rdagent/components/coder/factor_coder/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ class Config:
data_folder_debug: str = "git_ignore_folder/factor_implementation_source_data_debug"
"""Path to the folder containing partial financial data (for debugging)"""

cache_location: str = "git_ignore_folder/factor_implementation_execution_cache"
"""Path to the cache location"""

enable_execution_cache: bool = True
"""Indicates whether to enable the execution cache"""

# TODO: the factor implement specific settings should not appear in this settings
# Evolving should have a method specific settings
# evolving related config
Expand Down
50 changes: 10 additions & 40 deletions rdagent/components/coder/factor_coder/factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS
from rdagent.core.exception import CodeFormatError, CustomRuntimeError, NoOutputError
from rdagent.core.experiment import Experiment, FBWorkspace, Task
from rdagent.core.utils import cache_with_pickle
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import md5_hash

Expand Down Expand Up @@ -80,17 +81,21 @@ class FactorFBWorkspace(FBWorkspace):
def __init__(
self,
*args,
executed_factor_value_dataframe: pd.DataFrame = None,
raise_exception: bool = False,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.executed_factor_value_dataframe = executed_factor_value_dataframe
self.raise_exception = raise_exception

def execute(
self, enable_cache: bool = FACTOR_IMPLEMENT_SETTINGS.enable_execution_cache, data_type: str = "Debug"
) -> Tuple[str, pd.DataFrame]:
def hash_func(self, data_type: str = "Debug") -> str:
return (
md5_hash(data_type + self.code_dict["factor.py"])
if ("factor.py" in self.code_dict and not self.raise_exception)
else None
)

@cache_with_pickle(hash_func)
def execute(self, data_type: str = "Debug") -> Tuple[str, pd.DataFrame]:
"""
execute the implementation and get the factor value by the following steps:
1. make the directory in workspace path
Expand All @@ -108,8 +113,6 @@ def execute(
1. We will store the function's return value to ensure it behaves as expected.
- The cached information will include a tuple with the following: (execution_feedback, executed_factor_value_dataframe, Optional[Exception])
parameters:
enable_cache: if True, store the factor value in the instance variable, this feature is to be used in the gt implementation to avoid multiple execution on the same gt implementation
"""
super().execute()
if self.code_dict is None or "factor.py" not in self.code_dict:
Expand All @@ -118,31 +121,6 @@ def execute(
else:
return self.FB_CODE_NOT_SET, None
with FileLock(self.workspace_path / "execution.lock"):
if FACTOR_IMPLEMENT_SETTINGS.enable_execution_cache:
# NOTE: cache the result for the same code and same data type
target_file_name = md5_hash(data_type + self.code_dict["factor.py"])
cache_file_path = Path(FACTOR_IMPLEMENT_SETTINGS.cache_location) / f"{target_file_name}.pkl"
Path(FACTOR_IMPLEMENT_SETTINGS.cache_location).mkdir(exist_ok=True, parents=True)
if enable_cache and cache_file_path.exists():
cached_res = pickle.load(open(cache_file_path, "rb"))

if len(cached_res) == 2:
# NOTE: this is trying to be compatible with previous results.
# Previously, the exception is not saved. we should not enable the cache mechanism
# othersise we can raise the exception directly.
if self.raise_exception:
pass # pass to disable the cache mechanism
else:
self.executed_factor_value_dataframe = cached_res[1]
return cached_res
else:
# NOTE: (execution_feedback, executed_factor_value_dataframe, Optional[Exception])
if self.raise_exception and cached_res[-1] is not None:
raise cached_res[-1]
else:
self.executed_factor_value_dataframe = cached_res[1]
return cached_res[:2]

if self.target_task.version == 1:
source_data_path = (
Path(
Expand Down Expand Up @@ -220,14 +198,6 @@ def execute(
else:
execution_error = NoOutputError(execution_feedback)

if enable_cache and executed_factor_value_dataframe is not None:
self.executed_factor_value_dataframe = executed_factor_value_dataframe

if FACTOR_IMPLEMENT_SETTINGS.enable_execution_cache:
pickle.dump(
(execution_feedback, executed_factor_value_dataframe, execution_error),
open(cache_file_path, "wb"),
)
return execution_feedback, executed_factor_value_dataframe

def __str__(self) -> str:
Expand Down
21 changes: 13 additions & 8 deletions rdagent/components/coder/model_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from rdagent.core.prompts import Prompts
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_conf import LLM_SETTINGS
from rdagent.oai.llm_utils import APIBackend

evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
Expand Down Expand Up @@ -90,9 +91,11 @@ def evaluate(
Environment(undefined=StrictUndefined)
.from_string(evaluate_prompts["evaluator_code_feedback"]["system"])
.render(
scenario=self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
scenario=(
self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
)
)
)

Expand All @@ -116,7 +119,7 @@ def evaluate(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
> RD_AGENT_SETTINGS.chat_token_limit
> LLM_SETTINGS.chat_token_limit
):
execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :]
else:
Expand Down Expand Up @@ -150,9 +153,11 @@ def evaluate(
Environment(undefined=StrictUndefined)
.from_string(evaluate_prompts["evaluator_final_feedback"]["system"])
.render(
scenario=self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
scenario=(
self.scen.get_scenario_all_desc(target_task)
if self.scen is not None
else "No scenario description."
)
)
)

Expand All @@ -176,7 +181,7 @@ def evaluate(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
> RD_AGENT_SETTINGS.chat_token_limit
> LLM_SETTINGS.chat_token_limit
):
execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :]
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from rdagent.core.evolving_framework import EvolvingStrategy
from rdagent.core.prompts import Prompts
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.oai.llm_conf import LLM_SETTINGS
from rdagent.oai.llm_utils import APIBackend
from rdagent.scenarios.kaggle.experiment.kaggle_experiment import KG_MODEL_MAPPING

Expand Down Expand Up @@ -100,7 +101,7 @@ def implement_one_model(
user_prompt=user_prompt,
system_prompt=system_prompt,
)
< RD_AGENT_SETTINGS.chat_token_limit
< LLM_SETTINGS.chat_token_limit
):
break
elif len(queried_former_failed_knowledge_to_render) > 1:
Expand Down
6 changes: 0 additions & 6 deletions rdagent/components/coder/model_coder/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ class Config:

coder_use_cache: bool = False

cache_location: str = str(
(Path().cwd() / "git_ignore_folder" / "model_implementation_execution_cache").absolute(),
)

knowledge_base_path: Union[str, None] = None
new_knowledge_base_path: Union[str, None] = None

Expand All @@ -23,7 +19,5 @@ class Config:
query_similar_success_limit: int = 5
fail_task_trial_limit: int = 20

enable_execution_cache: bool = True # whether to enable the execution cache


MODEL_IMPL_SETTINGS = ModelImplSettings()
33 changes: 16 additions & 17 deletions rdagent/components/coder/model_coder/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from rdagent.components.coder.model_coder.conf import MODEL_IMPL_SETTINGS
from rdagent.core.experiment import Experiment, FBWorkspace, Task
from rdagent.core.utils import cache_with_pickle
from rdagent.oai.llm_utils import md5_hash
from rdagent.utils.env import KGDockerEnv, QTDockerEnv

Expand Down Expand Up @@ -71,6 +72,21 @@ class ModelFBWorkspace(FBWorkspace):
(version 2) for kaggle we'll make a script to call the fit and predict function in the implementation in file `model.py` after setting the cwd into the directory
"""

def hash_func(
self,
batch_size: int = 8,
num_features: int = 10,
num_timesteps: int = 4,
num_edges: int = 20,
input_value: float = 1.0,
param_init_value: float = 1.0,
) -> str:
target_file_name = f"{batch_size}_{num_features}_{num_timesteps}_{input_value}_{param_init_value}"
for code_file_name in sorted(list(self.code_dict.keys())):
target_file_name = f"{target_file_name}_{self.code_dict[code_file_name]}"
return md5_hash(target_file_name)

@cache_with_pickle(hash_func)
def execute(
self,
batch_size: int = 8,
Expand All @@ -82,17 +98,6 @@ def execute(
):
super().execute()
try:
if MODEL_IMPL_SETTINGS.enable_execution_cache:
# NOTE: cache the result for the same code
target_file_name = f"{batch_size}_{num_features}_{num_timesteps}_{input_value}_{param_init_value}"
for code_file_name in sorted(list(self.code_dict.keys())):
target_file_name = f"{target_file_name}_{self.code_dict[code_file_name]}"
target_file_name = md5_hash(target_file_name)
cache_file_path = Path(MODEL_IMPL_SETTINGS.cache_location) / f"{target_file_name}.pkl"
Path(MODEL_IMPL_SETTINGS.cache_location).mkdir(exist_ok=True, parents=True)
if cache_file_path.exists():
return pickle.load(open(cache_file_path, "rb"))

qtde = QTDockerEnv() if self.target_task.version == 1 else KGDockerEnv()
qtde.prepare()

Expand Down Expand Up @@ -121,12 +126,6 @@ def execute(
raise RuntimeError(f"Error in running the model code: {log}")
[execution_feedback_str, execution_model_output] = results

if MODEL_IMPL_SETTINGS.enable_execution_cache:
pickle.dump(
(execution_feedback_str, execution_model_output),
open(cache_file_path, "wb"),
)

except Exception as e:
execution_feedback_str = f"Execution error: {e}\nTraceback: {traceback.format_exc()}"
execution_model_output = None
Expand Down
21 changes: 6 additions & 15 deletions rdagent/components/runner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import pickle
from pathlib import Path
from typing import Tuple
from typing import Any, Tuple

from rdagent.components.runner.conf import RUNNER_SETTINGS
from rdagent.core.developer import Developer
from rdagent.core.experiment import ASpecificExp, Experiment
from rdagent.oai.llm_utils import md5_hash
Expand All @@ -18,16 +17,8 @@ def get_cache_key(self, exp: Experiment) -> str:
task_info_str = "\n".join(task_info_list)
return md5_hash(task_info_str)

def get_cache_result(self, exp: Experiment) -> Tuple[bool, object]:
task_info_key = self.get_cache_key(exp)
Path(RUNNER_SETTINGS.cache_path).mkdir(parents=True, exist_ok=True)
cache_path = Path(RUNNER_SETTINGS.cache_path) / f"{task_info_key}.pkl"
if cache_path.exists():
return True, pickle.load(open(cache_path, "rb"))
else:
return False, None

def dump_cache_result(self, exp: Experiment, result: object):
task_info_key = self.get_cache_key(exp)
cache_path = Path(RUNNER_SETTINGS.cache_path) / f"{task_info_key}.pkl"
pickle.dump(result, open(cache_path, "wb"))
def assign_cached_result(self, exp: Experiment, cached_res: Experiment) -> Experiment:
if exp.based_experiments and exp.based_experiments[-1].result is None:
exp.based_experiments[-1].result = cached_res.based_experiments[-1].result
exp.result = cached_res.result
return exp
Loading

0 comments on commit 2879ecf

Please sign in to comment.