Skip to content

Commit

Permalink
Feat: more robust naive agent (balrog-ai#13)
Browse files Browse the repository at this point in the history
* feat: robust naive agent

* docs
  • Loading branch information
DavidePaglieri authored Dec 17, 2024
1 parent df38b8a commit 9c65dad
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 0 deletions.
3 changes: 3 additions & 0 deletions balrog/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .dummy import DummyAgent
from .few_shot import FewShotAgent
from .naive import NaiveAgent
from .robust_naive import RobustNaiveAgent


class AgentFactory:
Expand Down Expand Up @@ -50,6 +51,8 @@ def create_agent(self):
return CustomAgent(client_factory, prompt_builder)
elif self.config.agent.type == "few_shot":
return FewShotAgent(client_factory, prompt_builder, self.config.agent.max_icl_history)
elif self.config.agent.type == "robust_naive":
return RobustNaiveAgent(client_factory, prompt_builder)

else:
raise ValueError(f"Unknown agent type: {self.config.agent}")
69 changes: 69 additions & 0 deletions balrog/agents/robust_naive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import copy
import re

from balrog.agents.base import BaseAgent


class RobustNaiveAgent(BaseAgent):
"""An agent that generates actions based on observations without complex reasoning."""

def __init__(self, client_factory, prompt_builder):
"""Initialize the NaiveAgent with a client and prompt builder."""
super().__init__(client_factory, prompt_builder)
self.client = client_factory()

def act(self, obs, prev_action=None):
"""Generate the next action based on the observation and previous action.
Args:
obs (dict): The current observation in the environment.
prev_action (str, optional): The previous action taken.
Returns:
str: The selected action from the LLM response.
"""
if prev_action:
self.prompt_builder.update_action(prev_action)

self.prompt_builder.update_observation(obs)

messages = self.prompt_builder.get_prompt()

# Updated instructions to require a very strict output format
naive_instruction = """
You must choose exactly one of the listed actions and output it strictly in the following format:
<|ACTION|>YOUR_CHOSEN_ACTION</|ACTION|>
You must not output any other text before or after these tags. No explanation, no reasoning, just the action within these tags.
""".strip()

if messages and messages[-1].role == "user":
messages[-1].content += "\n\n" + naive_instruction

response = self.client.generate(messages)
final_answer = self._extract_final_answer(response)
return final_answer

def _extract_final_answer(self, answer):
"""Extract the action from the completion by looking for <|ACTION|> ... </|ACTION|> tags.
Args:
answer (LLMResponse): The response from the LLM.
Returns:
LLMResponse: The sanitized response containing just the extracted action.
"""
completion_text = answer.completion
# Use a regex to find the text inside <|ACTION|> and </|ACTION|>
match = re.search(r"<\|ACTION\|>(.*?)</\|ACTION\|>", completion_text, re.DOTALL)
if match:
extracted_action = match.group(1).strip()
else:
# If no match is found, fallback to the original completion (or handle error)
extracted_action = completion_text.strip()

final_answer = copy.deepcopy(answer)
final_answer = final_answer._replace(completion=extracted_action)

return final_answer
2 changes: 2 additions & 0 deletions docs/agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ BALROG ships with two pre-built agents:
| **Agent Type** | **Description** |
|-------------------------|---------------------------------------------------------------------------------|
| **naive** | Outputs actions based on the current action/observation history without any additional reasoning. |
| **robust_naive** | Outputs actions based on the current action/observation history without any additional reasoning using a more robust template. |
| **chain_of_thought** | Generates actions through step-by-step reasoning, providing a final action output. |


We encourage the community to open PRs to include more agents to BALROG.

## 🤖 Creating Custom Agents
Expand Down

0 comments on commit 9c65dad

Please sign in to comment.