Skip to content

Commit

Permalink
prompts and eval
Browse files Browse the repository at this point in the history
  • Loading branch information
smith-nathanh committed Nov 27, 2024
1 parent 5099520 commit a7f8d75
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 24 deletions.
20 changes: 20 additions & 0 deletions system/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,23 @@
# execute the generated unit tests on the reference repository
# compute the coverage of the generated unit tests has on the reference repository


# for each directory in "system/output/v2"
# cd into the directory
# create a new conda environment (or virtualenv if easier?) from the requirements.txt
# execute the repository's example code in each `examples` directory
#
import os

for directory in os.listdir("system/output/v2"):
if os.path.isdir(directory):
os.chdir(directory)
os.system("conda create --name test_env --file requirements.txt")
os.system("conda activate test_env")
os.system("python examples/demo.py")
os.system("pytest tests/unit/test_module.py")
os.system("pytest tests/acceptance/test_features.py")
os.system("coverage run -m pytest tests/unit/test_module.py")
os.system("coverage report")
os.system("conda deactivate")
os.chdir("..")
16 changes: 14 additions & 2 deletions system/eval_config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
prd_paths:
ArXiv_Digest: "system/benchmark_data/python/ArXiv_digest/docs/PRD.md"
ArXiv_digest: "system/benchmark_data/python/ArXiv_digest/docs/PRD.md"
chakin: "system/benchmark_data/python/chakin/PRD.md"
geotext: "system/benchmark_data/python/geotext/PRD.md"
hone: "system/benchmark_data/python/hone/docs/PRD.md"
Expand All @@ -20,4 +20,16 @@ tracker_v2:
particle_swarm_optimization: 1
readtime: 2 # changed the approve implementation prompt
stocktrends: 1
TextCNN: 1
TextCNN: 1

tracker_v3:
ArXiv_Digest: 1
chakin: 2
geotext: 1
hone: 1
Hybrid_Images: 4 # change the approve implementation prompt
lice:
particle_swarm_optimization:
readtime:
stocktrends:
TextCNN:
77 changes: 63 additions & 14 deletions system/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,21 +228,29 @@ def unit_tests(state: GraphState):
Generate unit tests for the software.
"""
logging.info("---UNIT TESTS---")
state['approvals']['unit_tests_iter'] = state['approvals'].get('unit_tests_iter', 0) + 1

code = '\n\n'.join(f"# ---{filename}---\n{content}"
for filename, content in state['documents']['code'].items())
prompt = UNIT_TEST_PROMPT.format(PRD=state["documents"]['PRD'],
prompt = [HumanMessage(content=UNIT_TEST_PROMPT.format(PRD=state["documents"]['PRD'],
architecture_design=state["documents"]['architecture_design'],
code=code)
code=code))]
if 'unit_tests' in state['approvals']:
if not state['approvals']['unit_tests_coverage']:
prompt.append(state['messages'][-1])
structured_llm = llm.with_structured_output(UnitTests)
test = structured_llm.invoke([HumanMessage(content=prompt)])
test = structured_llm.invoke(prompt)
state["documents"].update(test.dict())
return state

def approve_unit_tests(state: GraphState):
logging.info("---APPROVE UNIT TESTS---")


# Get first directory name, handling paths with leading slash
root_dir = next(name for name in next(iter(state['documents']['code'])).split('/') if name)
cmd = f"cd temp/{root_dir} && {state['documents']['unit_tests']['command'].replace('python ', 'coverage run ')}"

try:
cmd = f"cd temp && {state['documents']['unit_tests']['command']}"
# Run command in shell, capture output
process = subprocess.run(
cmd,
Expand All @@ -253,27 +261,68 @@ def approve_unit_tests(state: GraphState):
)

# Add command to messages
state['messages'].append(state['documents']['unit_tests']['command'])
state['messages'].append(cmd)

# Check return code
if process.returncode == 0:
state['approvals'].update({"unit_tests": True})
state['messages'].append("Unit tests passed")
state['messages'].append(f"Unit tests passed: {process.stdout}")
else:
state['approvals'].update({"unit_tests": False})
state['messages'].append(f"Acceptance tests failed: {process.stderr}")
state['messages'].append(f"Unit tests failed: {process.stderr}")

# Run coverage report
coverage_cmd = f"cd temp/{root_dir} && coverage report"
coverage_process = subprocess.run(
coverage_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)

# Add coverage command to messages
state['messages'].append(coverage_cmd)

if coverage_process.returncode == 0:
# Parse coverage report output
coverage_output = coverage_process.stdout
#state['messages'].append(coverage_output)

# Extract total coverage percentage
for line in coverage_output.splitlines():
if line.startswith("TOTAL"):
total_coverage = int(line.split()[3].replace('%', ''))
if total_coverage < 60:
state['approvals'].update({"unit_tests_coverage": False})
msg = f"Coverage report failed to cover at least 60% please revise unit tests: \n{coverage_output}"
state['messages'].append(msg)
logging.info(msg)
else:
state['approvals'].update({"unit_tests_coverage": True})
msg = f"Coverage report successful: \n{coverage_output}"
state['messages'].append(msg)
logging.info(msg)
break
else:
msg = f"Coverage report failed execution: {coverage_process.stdout}"
state['messages'].append(msg)
logging.info(msg)
state['approvals'].update({"unit_tests_coverage": False})

except Exception as e:
state['approvals'].update({"unit_tests": False})
state['messages'].append(f"Error running unit tests: {str(e)}")
state['approvals'].update({"unit_tests": False, "unit_tests_coverage": False})
msg = f"Error running unit tests: {str(e)}"
state['messages'].append(msg)
logging.info(msg)

return state

def route_unit_tests(state: GraphState) -> Literal["__end__", 'assistant']:
if all(state["approvals"].values()):
def route_unit_tests(state: GraphState) -> Literal["__end__", 'unit_tests']:
if state['approvals']['unit_tests_coverage'] or state['approvals']['unit_tests_iter'] > 2:
return END
else:
return "assistant" # go back to implementation with a message from the controller
return "unit_tests" # go back and regenerate unit tests

def environment_setup(state: GraphState):
"""
Expand Down Expand Up @@ -349,6 +398,6 @@ def build_graph():
graph.add_edge('unit_tests', "environment_setup")
graph.add_edge("environment_setup", "approve_acceptance_tests")
graph.add_edge("approve_acceptance_tests", "approve_unit_tests")
graph.add_edge("approve_unit_tests", END)
graph.add_conditional_edges("approve_unit_tests", route_unit_tests)

return graph.compile()
Binary file modified system/images/swegraph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion system/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main():
prd_content = prd_file.read()

graph = build_graph()
#graph.get_graph().draw_mermaid_png(output_file_path="system/images/swegraph.png")
graph.get_graph().draw_mermaid_png(output_file_path="system/images/swegraph.png")
state = {'documents': {'PRD': prd_content},
'messages': [HumanMessage(content=DESIGN_PROMPT.format(PRD=prd_content))]}
final_state = graph.invoke(state)
Expand Down
16 changes: 10 additions & 6 deletions system/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
2. UML_sequence: A Mermaid 11 sequence diagram showing the key interactions and flow between components as specified in the PRD
3. architecture_design: A detailed text based representation of the file tree that is true to the PRD and includes but is not limited to:
- A root-level README.md file documenting the system overview
- An 'examples' directory containing:
- An 'examples' directory (inside the root directory) containing:
- example_usage.sh demonstrating core functionality along with any additional example files that align with use cases mentioned in the PRD
"""

Expand Down Expand Up @@ -255,21 +255,25 @@
- README.md in the root directory with complete documentation
- example_usage.sh in the "examples" directory with working examples that are consistent with the PRD
- if the PRD or architecture design call for other files to be in the "examples" directory then include them as well
5. Implement all necessary code files with full, working implementations
5. Implement all necessary code files with full, working implementations, don't specify empty directories
6. Include any CSV/JSON files mentioned in the PRD or architecture design
7. Ensure the code is production-ready and follows best practices
7. Ensure the code is production-ready and follows best practices
8. Tests will be run from the root directory of the repository so keep that in mind for import statements
"""


APPROVE_IMPLEMENTATION_PROMPT = """
Please verify if the architectural design described in the text representation below:
Below is the architectural design:
```
{architecture_design}
```
is accurately mirrored in the documents below. There should be one name for each file in the architecture design.
Please verify that the architecture design is accurately mirrored in the document list below. Each file path in the architecture design should have a corresponding file in the documents list, with the following exceptions:
- Directories that only contain image files (e.g., .jpg, .png, etc.)
- Empty directories that are intended as storage locations
- Non-text based files since the LLM cannot generate them
```
{code}
Expand Down Expand Up @@ -328,7 +332,7 @@
-----Instructions--------
Your task is to generate unit tests to ensure the software adheres to the requirements in the PRD.
Pay close attention to the code and the PRD to ensure the tests are comprehensive and accurate.
The unit tests will be written using the unittest module and ultimately written to a file at: tests/unit/test_module.py. Keep this in mind.
The unit tests will be written using the unittest module and ultimately written to a file at: tests/unit/test_module.py. Keep this in mind for relative imports and file paths.
Write the content of the unit tests to a dictionary where the key is "test_module" and the value is the content of the unit test.
Make another key in this dictionary called "command" and write the command to run the unit tests as the value for the "command" key.
Nest this dictionary in another dictionary with the key "unit_tests" and return this nested dictionary.
Expand Down
2 changes: 2 additions & 0 deletions system/relocate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def relocate_directory(temp_dir: str, dest_dir: str, new_name: str) -> None:
for file in os.listdir(os.path.join(temp_dir, directory)):
shutil.move(os.path.join(temp_dir, directory, file), os.path.join(new_path, file))

os.rmdir(os.path.join(temp_dir, directory))


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Relocate and rename a directory")
Expand Down
18 changes: 17 additions & 1 deletion system/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import subprocess
import sys
import os
import shutil
import copy
from typing import Dict, List, Union

Expand Down Expand Up @@ -58,14 +59,27 @@ def write_files(base_path: str, files_content: Dict[str, str]) -> None:

def create_repository(base_path: str, documents: Dict) -> None:
"""Create repository structure and write files including test files"""
# Create base directory
# Create base directory if it doesn't exist
os.makedirs(base_path, exist_ok=True)

# If base_path exists, delete all its contents
for filename in os.listdir(base_path):
file_path = os.path.join(base_path, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print(f'Failed to delete {file_path}. Reason: {e}')

files = copy.deepcopy(documents['code'])

# Get first directory name, handling paths with leading slash
root_dir = next(name for name in next(iter(files)).split('/') if name)

coverage = "[run]\nomit =\n */__init__.py\n tests/*\n"

files.update({
f'{root_dir}/tests/unit/test_module.py': documents['unit_tests']['test_module'],
f'{root_dir}/tests/unit/__init__.py': '',
Expand All @@ -75,6 +89,8 @@ def create_repository(base_path: str, documents: Dict) -> None:
f'{root_dir}/docs/UML_class.md': documents['UML_class'],
f'{root_dir}/docs/UML_sequence.md': documents['UML_sequence'],
f'{root_dir}/docs/architecture_design.md': documents['architecture_design'],
f'{root_dir}/requirements.txt': documents['requirements'],
f'{root_dir}/.coveragerc': coverage,
})

# Write files
Expand Down

0 comments on commit a7f8d75

Please sign in to comment.