final

FermiQ · May 22, 2024 · 44e4cef · 44e4cef
1 parent ee31f24
commit 44e4cef
Show file tree

Hide file tree

Showing 23 changed files with 137,979 additions and 7,672 deletions.
diff --git a/agents/__pycache__/parent_agent.cpython-311.pyc b/agents/__pycache__/parent_agent.cpython-311.pyc
diff --git a/agents/parent_agent.py b/agents/parent_agent.py
@@ -17,8 +17,8 @@ def __init__(self, model, system_prompt, api_key):
             api_key=api_key,
         )
 
-    # def generate(self, prompt, json = False, t = 0.7):
-    #     if json == True:   
+    # def generate(self, prompt, jsn = False, t = 0.7):
+    #     if jsn:   
     #         chat_completion = self.client.chat.completions.create(
     #             messages=[
     #                 {
@@ -54,6 +54,7 @@ def __init__(self, model, system_prompt, api_key):
     #     completion_tokens = chat_completion.usage.completion_tokens
 
     #     cost = completion_tokens * 3 / 100000 + prompt_tokens * 1 / 100000
+        # self.total_amount += cost
     #     return response, cost
 
 

diff --git a/graphs/__pycache__/parent_graph.cpython-311.pyc b/graphs/__pycache__/parent_graph.cpython-311.pyc
diff --git a/graphs/parent_graph.py b/graphs/parent_graph.py
@@ -19,8 +19,8 @@ def __init__(self, model, system_prompt, api_key):
             api_key=api_key,
         )
 
-    # def generate(self, prompt, json = False, t = 0.7):
-    #     if json == True:   
+    # def generate(self, prompt, jsn = False, t = 0.7):
+    #     if jsn:   
     #         chat_completion = self.client.chat.completions.create(
     #             messages=[
     #                 {
@@ -56,6 +56,7 @@ def __init__(self, model, system_prompt, api_key):
     #     completion_tokens = chat_completion.usage.completion_tokens
 
     #     cost = completion_tokens * 3 / 100000 + prompt_tokens * 1 / 100000
+    #     self.total_amount += cost
     #     return response, cost
 
 

diff --git a/interactive_human_games.py b/interactive_human_games.py
@@ -108,7 +108,7 @@ def run():
             log(f"Total time: {round(total_time, 2)} sec, attempt time: {round(attempt_time, 2)} sec, step time: {round(step_time, 2)} sec")
             log("=" * 70)
 
-            log(f"\n\nREWARDS: {rewards}\n\n")
+            log(f"\n\nREWARDS: {rewards}\n\n", verbose = False)
 
 
 def process_action_get_reward(action, env, info, env_name):

diff --git a/logs/clean_arigraph.txt b/logs/clean_arigraph.txt
diff --git a/logs/clean_arigraph_without_exploration.txt b/logs/clean_arigraph_without_exploration.txt
diff --git a/logs/cook_arigraph.txt b/logs/cook_arigraph.txt
diff --git a/logs/cook_arigraph_without_episodic.txt b/logs/cook_arigraph_without_episodic.txt
diff --git a/logs/cook_arigraph_without_exploration.txt b/logs/cook_arigraph_without_exploration.txt
diff --git a/logs/cook_hard_arigraph.txt b/logs/cook_hard_arigraph.txt
diff --git a/logs/hunt_arigraph.txt b/logs/hunt_arigraph.txt
diff --git a/logs/hunt_arigraph_without_exploration.txt b/logs/hunt_arigraph_without_exploration.txt
diff --git a/logs/hunt_hard_arigraph.txt b/logs/hunt_hard_arigraph.txt
diff --git a/pipeline_arigraph.py b/pipeline_arigraph.py
@@ -17,18 +17,18 @@
 
 # Changeable part of pipeline
 
-log_file = "test_new_pipe_arigraph"
+log_file = "test_new_pipe_arigraph_without_hunt_1"
 
 # env_name can be picked from:
 # ["hunt", "hunt_hard", "cook", "cook_hard", "cook_rl_baseline", "clean"]
 # for test another envs edit utils.envs_cfg
-env_name = "cook"
+env_name = "hunt"
 model = "gpt-4-0125-preview"
 retriever_device = "cpu"
 api_key = "insert your key here"
 n_prev, topk_episodic = 5, 2
-max_steps, n_attempts = 150, 1
-need_exp = True
+max_steps, n_attempts = 150, 3
+need_exp = False
 
 # End of changeable part of pipeline
 
@@ -47,7 +47,7 @@ def run():
     for attempt in range(n_attempts):
         log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
         log("=" * 70)
-        observations = []
+        observations, history = [], []
         locations = set()
         observation, info = env.reset()
         action = "start"
@@ -91,27 +91,29 @@ def run():
 
             subgraph, top_episodic = graph.update(observation, observations, plan=plan0, prev_subgraph=subgraph, locations=list(locations), curr_location=env.curr_location.lower(), previous_location=previous_location, action=action, log=log, items1 = items, topk_episodic=topk_episodic)
             observation += f"\nInventory: {inventory}"
-            observation += f"\nAction that led to this observation: {action}"
 
             log("Length of subgraph: " + str(len(subgraph)))
             log("Associated triplets: " + str(subgraph))
             log("Episodic memory: " + str(top_episodic))
 
-            if_explore, _ = agent_if_expl.generate(prompt=f"Plan: \n{plan0}", t=0.2)
-            if_explore = if_explore == "True" and need_exp
+            if_explore, _ = agent_if_expl.generate(prompt=f"Plan: \n{plan0}", t=0.2) if need_exp else ("False", 0)
+            if_explore = "True" in if_explore
             log('If explore: ' + str(if_explore))
 
             #Exploration
             all_unexpl_exits = get_unexpl_exits(locations, graph) if if_explore else ""
 
             valid_actions = [action_processing(action) for action in env.get_valid_actions()] if "cook" in env_name else env.get_valid_actions()
             valid_actions += [f"go to {loc}" for loc in locations]
+            hist_obs = "\n".join(history)
 
-            plan0 = planning(observations, observation, plan0, subgraph, top_episodic, if_explore, all_unexpl_exits)
-            action = choose_action(observations, observation, subgraph, top_episodic, plan0, all_unexpl_exits, valid_actions, if_explore)
+            plan0 = planning(hist_obs, observation, plan0, subgraph, top_episodic, if_explore, all_unexpl_exits)
+            action = choose_action(hist_obs, observation, subgraph, top_episodic, plan0, all_unexpl_exits, valid_actions, if_explore)
 
             observations.append(observation)
             observations = observations[-n_prev:]
+            history.append(f"Observation: {observation}\nAction taken: {action}")
+            history = history[-n_prev:]
             previous_location = env.curr_location.lower()
 
             observation, step_reward, done, info = process_action_get_reward(action, env, info, graph, locations, env_name)
@@ -174,10 +176,12 @@ def choose_action(observations, observation, subgraph, top_episodic, plan0, all_
 \n6. Your current plan: {plan0}'''
 
     if if_explore:
-        prompt += f'''\n7. Yet unexplored exits in the environment: {all_unexpl_exits}'''   
+        prompt += f'''\n7. Yet unexplored exits in the environment: {all_unexpl_exits}'''
+
 
-    prompt += f'''Possible actions in current situation: {valid_actions}'''       
-    action0, cost_action = agent_action.generate(prompt, jsn=True, t=1)
+    prompt += f'''\n\nPossible actions in current situation: {valid_actions}'''  
+    t = 0.2 if need_exp else 1
+    action0, cost_action = agent_action.generate(prompt, jsn=True, t = t)
     log("Action: " + action0)
 
     try:

diff --git a/pipeline_fullhist.py b/pipeline_fullhist.py
@@ -40,7 +40,7 @@ def run():
     for attempt in range(n_attempts):
         log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
         log("=" * 70)
-        observations = []
+        observations, history = [], []
         observation, info = env.reset()
         action = "start"
         plan0 = f'''{{
@@ -74,14 +74,12 @@ def run():
             log("Observation: " + observation)        
 
             observation += f"\nInventory: {inventory}"
-            observation += f"\nAction that led to this observation: {action}"
 
             valid_actions = [action_processing(action) for action in env.get_valid_actions()] if "cook" in env_name else env.get_valid_actions()
 
             plan0 = planning(observations, observation, plan0)
             action = choose_action(observations, observation, plan0, valid_actions)
-
-            observations.append(observation)
+            observations.append(f"\nObservation: {observation}\nAction taken: {action}")
 
             observation, step_reward, done, info = process_action_get_reward(action, env, info, env_name)
             reward += step_reward

diff --git a/pipeline_smartrag.py b/pipeline_smartrag.py
@@ -46,7 +46,7 @@ def run():
     for attempt in range(n_attempts):
         log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
         log("=" * 70)
-        observations = []
+        observations, history = [], []
         observation, info = env.reset()
         action = "start"
         plan0 = f'''{{
@@ -80,13 +80,12 @@ def run():
             log("Observation: " + observation)        
 
             observation += f"\nInventory: {inventory}"
-            observation += f"\nAction that led to this observation: {action}"
 
             observation_with_plan = observation + f"\nPlan: {plan0}"
             current_emb = retriever.embed([observation_with_plan])[0].cpu().detach().numpy()
 
             relevant_observations = smart_rag(current_emb, step, observations, n = n_retrieve)
-            relevant_observations += [observations[-i][0] for i in range(min(n_prev, len(observations)))]
+            relevant_observations += [f"Observations: {observations[-i][0]}\nAction taken: {observations[-i][4]}" for i in range(min(n_prev, len(observations)))]
             relevant_observations = list(set(relevant_observations))
             log("RELEVANT OBSERVATIONS: " + str(relevant_observations))
 
@@ -96,7 +95,7 @@ def run():
             action = choose_action(relevant_observations, observation, plan0, valid_actions)
 
             score = get_score(observation, plan0)
-            observations.append((observation, step, score, current_emb))
+            observations.append((observation, step, score, current_emb, action))
 
             observation, step_reward, done, info = process_action_get_reward(action, env, info, env_name)
             reward += step_reward
@@ -176,7 +175,7 @@ def smart_rag(current_emb, step, observations, n = 3):
     def minmax(arr):
         return (np.array(arr) - np.min(arr)) / (np.max(arr) - np.min(arr) + 1e-9)
     best_idx = np.argsort(minmax(time_scores) + minmax(importance_scores) + minmax(relative_scores))[-n:]
-    return [observations[i][0] for i in best_idx]
+    return [f"Observations: {observations[i][0]}\nAction taken: {observations[i][4]}" for i in best_idx]
 
 def get_score(observation, plan0):
     prompt = f"""On the scale of 1 to 10, where 1 is purely mundane

diff --git a/pipeline_summary.py b/pipeline_summary.py
@@ -42,7 +42,7 @@ def run():
     for attempt in range(n_attempts):
         log("\n\n\n\n\n\n\nAttempt: " + str(attempt + 1))
         log("=" * 70)
-        observations = []
+        history = []
         observation, info = env.reset()
         action = "start"
         plan0 = f'''{{
@@ -77,17 +77,17 @@ def run():
             log("Observation: " + observation)        
 
             observation += f"\nInventory: {inventory}"
-            observation += f"\nAction that led to this observation: {action}"
+            hist_obs = "\n".join(history)
 
-            summary = get_summary(observations, observation, summary)
+            summary = get_summary(hist_obs, observation, summary)
 
             valid_actions = [action_processing(action) for action in env.get_valid_actions()] if "cook" in env_name else env.get_valid_actions()
-
-            plan0 = planning(observations, observation, summary, plan0)
-            action = choose_action(observations, observation, plan0, valid_actions, summary)
 
-            observations.append(observation)
-            observations = observations[-n_prev:]
+            plan0 = planning(hist_obs, observation, summary, plan0)
+            action = choose_action(hist_obs, observation, plan0, valid_actions, summary)
+
+            history.append(f"Observation: {observation}\nAction taken: {action}")
+            history = history[-n_prev:]
 
             observation, step_reward, done, info = process_action_get_reward(action, env, info, env_name)
             reward += step_reward

diff --git a/human_game_Nastya/log.txt → ...pts/__pycache__/human_game_Nastya/log.txt b/human_game_Nastya/log.txt → ...pts/__pycache__/human_game_Nastya/log.txt