Skip to content

Commit

Permalink
final
Browse files Browse the repository at this point in the history
  • Loading branch information
n.semenov committed May 22, 2024
1 parent ee31f24 commit 44e4cef
Show file tree
Hide file tree
Showing 23 changed files with 137,979 additions and 7,672 deletions.
Binary file modified agents/__pycache__/parent_agent.cpython-311.pyc
Binary file not shown.
5 changes: 3 additions & 2 deletions agents/parent_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def __init__(self, model, system_prompt, api_key):
api_key=api_key,
)

# def generate(self, prompt, json = False, t = 0.7):
# if json == True:
# def generate(self, prompt, jsn = False, t = 0.7):
# if jsn:
# chat_completion = self.client.chat.completions.create(
# messages=[
# {
Expand Down Expand Up @@ -54,6 +54,7 @@ def __init__(self, model, system_prompt, api_key):
# completion_tokens = chat_completion.usage.completion_tokens

# cost = completion_tokens * 3 / 100000 + prompt_tokens * 1 / 100000
# self.total_amount += cost
# return response, cost


Expand Down
Binary file modified graphs/__pycache__/parent_graph.cpython-311.pyc
Binary file not shown.
5 changes: 3 additions & 2 deletions graphs/parent_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def __init__(self, model, system_prompt, api_key):
api_key=api_key,
)

# def generate(self, prompt, json = False, t = 0.7):
# if json == True:
# def generate(self, prompt, jsn = False, t = 0.7):
# if jsn:
# chat_completion = self.client.chat.completions.create(
# messages=[
# {
Expand Down Expand Up @@ -56,6 +56,7 @@ def __init__(self, model, system_prompt, api_key):
# completion_tokens = chat_completion.usage.completion_tokens

# cost = completion_tokens * 3 / 100000 + prompt_tokens * 1 / 100000
# self.total_amount += cost
# return response, cost


Expand Down
2 changes: 1 addition & 1 deletion interactive_human_games.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def run():
log(f"Total time: {round(total_time, 2)} sec, attempt time: {round(attempt_time, 2)} sec, step time: {round(step_time, 2)} sec")
log("=" * 70)

log(f"\n\nREWARDS: {rewards}\n\n")
log(f"\n\nREWARDS: {rewards}\n\n", verbose = False)


def process_action_get_reward(action, env, info, env_name):
Expand Down
29,905 changes: 29,905 additions & 0 deletions logs/clean_arigraph.txt

Large diffs are not rendered by default.

33,203 changes: 33,203 additions & 0 deletions logs/clean_arigraph_without_exploration.txt

Large diffs are not rendered by default.

6,392 changes: 6,392 additions & 0 deletions logs/cook_arigraph.txt

Large diffs are not rendered by default.

5,632 changes: 5,632 additions & 0 deletions logs/cook_arigraph_without_episodic.txt

Large diffs are not rendered by default.

10,352 changes: 10,352 additions & 0 deletions logs/cook_arigraph_without_exploration.txt

Large diffs are not rendered by default.

8,713 changes: 8,713 additions & 0 deletions logs/cook_hard_arigraph.txt

Large diffs are not rendered by default.

8,157 changes: 8,157 additions & 0 deletions logs/hunt_arigraph.txt

Large diffs are not rendered by default.

25,865 changes: 25,865 additions & 0 deletions logs/hunt_arigraph_without_exploration.txt

Large diffs are not rendered by default.

9,716 changes: 9,716 additions & 0 deletions logs/hunt_hard_arigraph.txt

Large diffs are not rendered by default.

30 changes: 17 additions & 13 deletions pipeline_arigraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@

# Changeable part of pipeline

log_file = "test_new_pipe_arigraph"
log_file = "test_new_pipe_arigraph_without_hunt_1"

# env_name can be picked from:
# ["hunt", "hunt_hard", "cook", "cook_hard", "cook_rl_baseline", "clean"]
# for test another envs edit utils.envs_cfg
env_name = "cook"
env_name = "hunt"
model = "gpt-4-0125-preview"
retriever_device = "cpu"
api_key = "insert your key here"
n_prev, topk_episodic = 5, 2
max_steps, n_attempts = 150, 1
need_exp = True
max_steps, n_attempts = 150, 3
need_exp = False

# End of changeable part of pipeline

Expand All @@ -47,7 +47,7 @@ def run():
for attempt in range(n_attempts):
log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
log("=" * 70)
observations = []
observations, history = [], []
locations = set()
observation, info = env.reset()
action = "start"
Expand Down Expand Up @@ -91,27 +91,29 @@ def run():

subgraph, top_episodic = graph.update(observation, observations, plan=plan0, prev_subgraph=subgraph, locations=list(locations), curr_location=env.curr_location.lower(), previous_location=previous_location, action=action, log=log, items1 = items, topk_episodic=topk_episodic)
observation += f"\nInventory: {inventory}"
observation += f"\nAction that led to this observation: {action}"

log("Length of subgraph: " + str(len(subgraph)))
log("Associated triplets: " + str(subgraph))
log("Episodic memory: " + str(top_episodic))

if_explore, _ = agent_if_expl.generate(prompt=f"Plan: \n{plan0}", t=0.2)
if_explore = if_explore == "True" and need_exp
if_explore, _ = agent_if_expl.generate(prompt=f"Plan: \n{plan0}", t=0.2) if need_exp else ("False", 0)
if_explore = "True" in if_explore
log('If explore: ' + str(if_explore))

#Exploration
all_unexpl_exits = get_unexpl_exits(locations, graph) if if_explore else ""

valid_actions = [action_processing(action) for action in env.get_valid_actions()] if "cook" in env_name else env.get_valid_actions()
valid_actions += [f"go to {loc}" for loc in locations]
hist_obs = "\n".join(history)

plan0 = planning(observations, observation, plan0, subgraph, top_episodic, if_explore, all_unexpl_exits)
action = choose_action(observations, observation, subgraph, top_episodic, plan0, all_unexpl_exits, valid_actions, if_explore)
plan0 = planning(hist_obs, observation, plan0, subgraph, top_episodic, if_explore, all_unexpl_exits)
action = choose_action(hist_obs, observation, subgraph, top_episodic, plan0, all_unexpl_exits, valid_actions, if_explore)

observations.append(observation)
observations = observations[-n_prev:]
history.append(f"Observation: {observation}\nAction taken: {action}")
history = history[-n_prev:]
previous_location = env.curr_location.lower()

observation, step_reward, done, info = process_action_get_reward(action, env, info, graph, locations, env_name)
Expand Down Expand Up @@ -174,10 +176,12 @@ def choose_action(observations, observation, subgraph, top_episodic, plan0, all_
\n6. Your current plan: {plan0}'''

if if_explore:
prompt += f'''\n7. Yet unexplored exits in the environment: {all_unexpl_exits}'''
prompt += f'''\n7. Yet unexplored exits in the environment: {all_unexpl_exits}'''


prompt += f'''Possible actions in current situation: {valid_actions}'''
action0, cost_action = agent_action.generate(prompt, jsn=True, t=1)
prompt += f'''\n\nPossible actions in current situation: {valid_actions}'''
t = 0.2 if need_exp else 1
action0, cost_action = agent_action.generate(prompt, jsn=True, t = t)
log("Action: " + action0)

try:
Expand Down
6 changes: 2 additions & 4 deletions pipeline_fullhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def run():
for attempt in range(n_attempts):
log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
log("=" * 70)
observations = []
observations, history = [], []
observation, info = env.reset()
action = "start"
plan0 = f'''{{
Expand Down Expand Up @@ -74,14 +74,12 @@ def run():
log("Observation: " + observation)

observation += f"\nInventory: {inventory}"
observation += f"\nAction that led to this observation: {action}"

valid_actions = [action_processing(action) for action in env.get_valid_actions()] if "cook" in env_name else env.get_valid_actions()

plan0 = planning(observations, observation, plan0)
action = choose_action(observations, observation, plan0, valid_actions)

observations.append(observation)
observations.append(f"\nObservation: {observation}\nAction taken: {action}")

observation, step_reward, done, info = process_action_get_reward(action, env, info, env_name)
reward += step_reward
Expand Down
9 changes: 4 additions & 5 deletions pipeline_smartrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run():
for attempt in range(n_attempts):
log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
log("=" * 70)
observations = []
observations, history = [], []
observation, info = env.reset()
action = "start"
plan0 = f'''{{
Expand Down Expand Up @@ -80,13 +80,12 @@ def run():
log("Observation: " + observation)

observation += f"\nInventory: {inventory}"
observation += f"\nAction that led to this observation: {action}"

observation_with_plan = observation + f"\nPlan: {plan0}"
current_emb = retriever.embed([observation_with_plan])[0].cpu().detach().numpy()

relevant_observations = smart_rag(current_emb, step, observations, n = n_retrieve)
relevant_observations += [observations[-i][0] for i in range(min(n_prev, len(observations)))]
relevant_observations += [f"Observations: {observations[-i][0]}\nAction taken: {observations[-i][4]}" for i in range(min(n_prev, len(observations)))]
relevant_observations = list(set(relevant_observations))
log("RELEVANT OBSERVATIONS: " + str(relevant_observations))

Expand All @@ -96,7 +95,7 @@ def run():
action = choose_action(relevant_observations, observation, plan0, valid_actions)

score = get_score(observation, plan0)
observations.append((observation, step, score, current_emb))
observations.append((observation, step, score, current_emb, action))

observation, step_reward, done, info = process_action_get_reward(action, env, info, env_name)
reward += step_reward
Expand Down Expand Up @@ -176,7 +175,7 @@ def smart_rag(current_emb, step, observations, n = 3):
def minmax(arr):
return (np.array(arr) - np.min(arr)) / (np.max(arr) - np.min(arr) + 1e-9)
best_idx = np.argsort(minmax(time_scores) + minmax(importance_scores) + minmax(relative_scores))[-n:]
return [observations[i][0] for i in best_idx]
return [f"Observations: {observations[i][0]}\nAction taken: {observations[i][4]}" for i in best_idx]

def get_score(observation, plan0):
prompt = f"""On the scale of 1 to 10, where 1 is purely mundane
Expand Down
16 changes: 8 additions & 8 deletions pipeline_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def run():
for attempt in range(n_attempts):
log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
log("=" * 70)
observations = []
history = []
observation, info = env.reset()
action = "start"
plan0 = f'''{{
Expand Down Expand Up @@ -77,17 +77,17 @@ def run():
log("Observation: " + observation)

observation += f"\nInventory: {inventory}"
observation += f"\nAction that led to this observation: {action}"
hist_obs = "\n".join(history)

summary = get_summary(observations, observation, summary)
summary = get_summary(hist_obs, observation, summary)

valid_actions = [action_processing(action) for action in env.get_valid_actions()] if "cook" in env_name else env.get_valid_actions()

plan0 = planning(observations, observation, summary, plan0)
action = choose_action(observations, observation, plan0, valid_actions, summary)

observations.append(observation)
observations = observations[-n_prev:]
plan0 = planning(hist_obs, observation, summary, plan0)
action = choose_action(hist_obs, observation, plan0, valid_actions, summary)

history.append(f"Observation: {observation}\nAction taken: {action}")
history = history[-n_prev:]

observation, step_reward, done, info = process_action_get_reward(action, env, info, env_name)
reward += step_reward
Expand Down
File renamed without changes.
Loading

0 comments on commit 44e4cef

Please sign in to comment.