Vastly improves context window management.

Now uses tokens and biggest context possible.
YuckLou · Apr 3, 2023 · 41daf07 · 41daf07
1 parent 04710ae
commit 41daf07
Showing 1 changed file with 51 additions and 5 deletions.
diff --git a/scripts/chat.py b/scripts/chat.py
@@ -2,10 +2,13 @@
 import openai
 from dotenv import load_dotenv
 from config import Config
+import token_counter
+
 cfg = Config()
 
 from llm_utils import create_chat_completion
 
+
 def create_chat_message(role, content):
     """
     Create a chat message with the given role and content.
@@ -20,13 +23,15 @@ def create_chat_message(role, content):
     return {"role": role, "content": content}
 
 
+
+# TODO: Change debug from hardcode to argument
 def chat_with_ai(
         prompt,
         user_input,
         full_message_history,
         permanent_memory,
         token_limit,
-        debug=True):
+        debug=False):
     while True:
         try:
             """
@@ -42,29 +47,70 @@ def chat_with_ai(
             Returns:
             str: The AI's response.
             """
+            model = cfg.fast_llm_model # TODO: Change model from hardcode to argument
+            # Reserve 1000 tokens for the response
+            if debug:
+                print(f"Token limit: {token_limit}")
+            send_token_limit = token_limit - 1000
+
             current_context = [
                 create_chat_message(
                     "system", prompt), create_chat_message(
-                    "system", f"Permanent memory: {permanent_memory}")]
-            current_context.extend(
-                full_message_history[-(token_limit - len(prompt) - len(permanent_memory) - 10):])
+                    "system", f"Permanent memory: {permanent_memory}")]                
+
+            # Add messages from the full message history until we reach the token limit
+            next_message_to_add_index = len(full_message_history) - 1
+            current_tokens_used = 0
+            insertion_index = len(current_context)
+
+            # Count the currently used tokens
+            current_tokens_used = token_counter.count_message_tokens(current_context, model)
+            current_tokens_used += token_counter.count_message_tokens([create_chat_message("user", user_input)], model) # Account for user input (appended later)
+
+            while next_message_to_add_index >= 0:
+                # print (f"CURRENT TOKENS USED: {current_tokens_used}")
+                message_to_add = full_message_history[next_message_to_add_index]
+
+                tokens_to_add = token_counter.count_message_tokens([message_to_add], model)
+                if current_tokens_used + tokens_to_add > send_token_limit:
+                    break
+
+                # Add the most recent message to the start of the current context, after the two system prompts.
+                current_context.insert(insertion_index, full_message_history[next_message_to_add_index])
+
+                # Count the currently used tokens
+                current_tokens_used += tokens_to_add
+
+                # Move to the next most recent message in the full message history
+                next_message_to_add_index -= 1
+
+            # Append user input, the length of this is accounted for above
             current_context.extend([create_chat_message("user", user_input)])
 
+            # Calculate remaining tokens
+            tokens_remaining = token_limit - current_tokens_used
+            # assert tokens_remaining >= 0, "Tokens remaining is negative. This should never happen, please submit a bug report at https://www.github.com/Torantulino/Auto-GPT"
+
             # Debug print the current context
             if debug:
+                print(f"Token limit: {token_limit}")
+                print(f"Send Token Count: {current_tokens_used}")
+                print(f"Tokens remaining for response: {tokens_remaining}")
                 print("------------ CONTEXT SENT TO AI ---------------")
                 for message in current_context:
                     # Skip printing the prompt
                     if message["role"] == "system" and message["content"] == prompt:
                         continue
                     print(
                         f"{message['role'].capitalize()}: {message['content']}")
+                    print()
                 print("----------- END OF CONTEXT ----------------")
 
             # TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about
             assistant_reply = create_chat_completion(
-                model=cfg.smart_llm_model,
+                model=model,
                 messages=current_context,
+                max_tokens=tokens_remaining,
             )
 
             # Update full message history