add test and remove unused code

hari-abacies · Oct 30, 2023 · db6e5c8 · db6e5c8
1 parent 032b6ae
commit db6e5c8
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 52 deletions.
diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py
@@ -1,27 +1,27 @@
 """
 This module provides utilities to handle and process chat content, especially for extracting code blocks
-and managing them within a specified GPT Engineer project ("workspace"). It offers functionalities like parsing chat messages to
-retrieve code blocks, storing these blocks into a workspace, and overwriting workspace content based on
-new chat messages. Moreover, it aids in formatting and reading file content for an AI agent's input.
+and managing them within a specified GPT Engineer project ("workspace"). 
 
 Key Features:
 - Parse and extract code blocks from chat messages.
-- Store and overwrite files within a workspace based on chat content.
-- Format files to be used as inputs for AI agents.
-- Retrieve files and their content based on a provided list.
+- Save chat content to memory.
+- Add extracted files to a workspace.
+- Overwrite workspace files based on edits.
+- Retrieve and format file content.
 
 Dependencies:
-- `os` and `pathlib`: For handling OS-level operations and path manipulations.
-- `re`: For regex-based parsing of chat content.
+- `os` and `re`: For handling OS-level operations and regex-based parsing of chat content.
 - `gpt_engineer.core.db`: Database handling functionalities for the workspace.
 - `gpt_engineer.cli.file_selector`: Constants related to file selection.
 
 Functions:
 - parse_chat: Extracts code blocks from chat messages.
-- to_files: Parses a chat and adds the extracted files to a workspace.
-- overwrite_files: Parses a chat and overwrites files in the workspace.
-- get_code_strings: Reads a file list and returns filenames and their content.
-- format_file_to_input: Formats a file's content for input to an AI agent.
+- to_files_and_memory: Saves chat content to memory and adds extracted files to a workspace.
+- to_files: Adds extracted files to a workspace.
+- get_code_strings: Retrieves file names and their content.
+- format_file_to_input: Formats file content for AI input.
+- overwrite_files_with_edits: Overwrites workspace files based on parsed edits from chat.
+- apply_edits: Applies file edits to a workspace.
 """
 
 import os
@@ -115,28 +115,6 @@ def to_files(chat: str, workspace: DB):
     for file_name, file_content in files:
         workspace[file_name] = file_content
 
-
-def overwrite_files(chat: str, dbs: DBs) -> None:
-    """
-    Parse the chat and overwrite all files in the workspace.
-
-    Parameters
-    ----------
-    chat : str
-        The chat containing the AI files.
-    dbs : DBs
-        The database containing the workspace.
-    """
-    dbs.memory["all_output_overwrite.txt"] = chat
-
-    files = parse_chat(chat)
-    for file_name, file_content in files:
-        if file_name == "README.md":
-            dbs.memory["LAST_MODIFICATION_README.md"] = file_content
-        else:
-            dbs.workspace[file_name] = file_content
-
-
 def get_code_strings(workspace: DB, metadata_db: DB) -> dict[str, str]:
     """
     Read file_list.txt and return file names and their content.
@@ -150,19 +128,12 @@ def get_code_strings(workspace: DB, metadata_db: DB) -> dict[str, str]:
         A dictionary mapping file names to their content.
     """
 
-    def get_all_files_in_dir(directory):
-        for root, dirs, files in os.walk(directory):
-            for file in files:
-                yield os.path.join(root, file)
-        for dir in dirs:
-            yield from get_all_files_in_dir(os.path.join(root, dir))
-
     files_paths = metadata_db[FILE_LIST_NAME].strip().split("\n")
     files = []
 
     for full_file_path in files_paths:
         if os.path.isdir(full_file_path):
-            for file_path in get_all_files_in_dir(full_file_path):
+            for file_path in _get_all_files_in_dir(full_file_path):
                 files.append(file_path)
         else:
             files.append(full_file_path)
@@ -177,15 +148,7 @@ def get_all_files_in_dir(directory):
         file_name = os.path.relpath(path, workspace.path)
 
         if file_name in workspace:
-            try:
-                with open(path, "r", encoding="utf-8") as f:
-                    file_content = f.read()
-            except UnicodeDecodeError:
-                raise ValueError(
-                    f"Non-text file detected: {file_name}, gpt-engineer currently only supports utf-8 decodable text files."
-                )
-
-            files_dict[file_name] = file_content
+            files_dict[file_name] = _open_file(path)
 
     return files_dict
 
@@ -285,3 +248,19 @@ def apply_edits(edits: List[Edit], workspace: DB):
             workspace[filename] = workspace[filename].replace(
                 edit.before, edit.after
             )  # existing file
+
+def _get_all_files_in_dir(directory):
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            yield os.path.join(root, file)
+    for dir in dirs:
+        yield from _get_all_files_in_dir(os.path.join(root, dir))
+
+def _open_file(file_path) -> str:
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            return f.read()
+    except UnicodeDecodeError:
+        raise ValueError(
+            f"Non-text file detected: {file_path}, gpt-engineer currently only supports utf-8 decodable text files."
+        )
diff --git a/tests/test_chat_to_files.py b/tests/test_chat_to_files.py
@@ -1,6 +1,9 @@
 import textwrap
 
-from gpt_engineer.core.chat_to_files import to_files_and_memory
+from gpt_engineer.core.chat_to_files import to_files_and_memory, get_code_strings
+from gpt_engineer.cli.file_selector import FILE_LIST_NAME
+
+from unittest.mock import MagicMock
 
 
 class DummyDBs:
@@ -208,3 +211,41 @@ def test_files_with_newline_between_header():
 
     for file_name, file_content in expected_files.items():
         assert dbs.workspace[file_name] == file_content
+
+
+def test_get_code_strings(monkeypatch):
+
+    # arrange   
+    mock_db = MagicMock()
+    mock_db.path = "path/to"
+    data = {
+        "file1.txt": "This is file 1 content",
+        "file2.txt": "This is file 2 content"
+    }
+    mock_db.__getitem__ = lambda self, x: data.get(x)
+    mock_db.__contains__ = lambda self, x: x in data
+
+    mock_metadata_db = {
+        FILE_LIST_NAME: "path/to/file1.txt\npath/to/file2.txt"
+    }
+
+    def mock_get_all_files_in_dir(directory):
+        return [
+            "path/to/file1.txt",
+            "path/to/file2.txt"
+        ]
+
+    def mock_open_file(path):
+        return f"File Data for file: {path}"
+
+    monkeypatch.setattr('gpt_engineer.core.chat_to_files._get_all_files_in_dir', mock_get_all_files_in_dir)
+
+    monkeypatch.setattr('gpt_engineer.core.chat_to_files._open_file', mock_open_file)
+
+    # act
+    result = get_code_strings(mock_db, mock_metadata_db)
+
+    print(result)
+    # assert
+    assert result["file1.txt"] == "File Data for file: path/to/file1.txt"
+    assert result["file2.txt"] == "File Data for file: path/to/file2.txt"