add evaluation step

AllyW · Sep 23, 2024 · 5649f9e · 5649f9e
1 parent 99e0b45
commit 5649f9e
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 13 deletions.
diff --git a/action.yml b/action.yml
@@ -26,6 +26,9 @@ inputs:
   pr_reset:
     description: "Whether clean last round pr reviews"
     required: false
+  review_filter:
+    description: "Whether filter review result by evaluation"
+    required: false
 runs:
   using: "composite"
   steps:

diff --git a/code_review/_const.py b/code_review/_const.py
@@ -25,7 +25,7 @@
 
 PR_DIFF_COMP_PROMPT = """
 Below is the diff info from a github pull requests, please make a simple code review, and find the places that can be refined, in a simple and concise way. Please describe the refinement with no more than 100 words. And for the return message please follow the following rules:
-1. if the code diff is like "VERSION=something", please ignore this change and do not return any review result, just do not return anything;
+1. if the code diff is like "VERSION=something", please ignore review this code change and do not return anything, just do not return anything;
 2. if the code diff is from history notes, help messages or examples, please just check the sentence grammar, if no grammar error, please do not return anything; 
 3. if the code diff is from history notes, help messages or examples, and contains '<>', please ensure it's surrounding html tags, if it's not, then please ensure it uses backtick to mark it as placeholder, if not, please pointed it out. If they contains url link, please help check whether the url link is accessible, if it's not accessible, please make sure it uses backtick to mark it as an fake url example, if not, pointed it out. Please pay attention, for history notes, help messages or examples, do not say anything if they follow previous rules, nothing like 'Everything else looks good.' needs to be returned, just do not return anything
 4. for code diff from python, please review it as an expert python programmer, give a refined way for it if applicable. If your advice is just a different way for writing it, then please do not return anything. Just give the review suggestion that you thnk is way more refined, and git it in a concise manner.
@@ -35,3 +35,21 @@
 PR_TAG = """
 :mag_right:
 """
+
+PR_EVALUATE_SYSTEM_SET = """
+You are an expert in github pr developer and reviewer.
+"""
+
+
+PR_EVALUATE_PROMPT = """
+Below is a list of evaluation score for existing git pr review result and its corresponding score, delimitd by @@@.
+Nothing to return.@@@-10
+The URL is incomplete. It should be https://learn.microsoft.com/en-us/cli/azure/monitor/data-collection/endpoint/association?view=azure-cli-latest#az-monitor-data-collection-endpoint-association-list.@@@5
+The placeholder <resource/monitor/endpoint_id> should be enclosed in backticks for clarity and compliance with markdown formatting.@@@3
+`az aks connection create` should be backticked in the history notes for consistency with the previous usage.@@@-1
+Review-Ignored@@@-10
+Can you evaluate the below sentence according to the standard set up in the above evaluation example data list, just give a score please:
+
+"""
+
+DEFAULT_EVALUATE_SCORE = -1
diff --git a/code_review/pr_processor.py b/code_review/pr_processor.py
@@ -10,7 +10,9 @@
 from code_review.git_client import GitClient
 from code_review.gpt_client import GptClient, format_gpt_message
 from code_review.util import filter_review_patch_pattern, get_patch_position
-from code_review._const import MAX_PATCH_LIMITATION, PR_DIFF_COMP_PROMPT, PR_SUMMARY_PROMPT, MODEL_USER_ROLE, PR_TAG
+from code_review._const import MAX_PATCH_LIMITATION, \
+    PR_DIFF_COMP_PROMPT, PR_SUMMARY_PROMPT, PR_EVALUATE_PROMPT, PR_EVALUATE_SYSTEM_SET, PR_TAG, \
+    MODEL_USER_ROLE, MODEL_SYSTEM_ROLE, DEFAULT_EVALUATE_SCORE
 logger = logging.getLogger(__name__)
 
 
@@ -26,15 +28,17 @@ def review_pr(self):
             self.git_manager.reset_pr_comment()
         pr_diffs = self.git_manager.get_pr_diff_files()
         code_suggest = os.environ.get("code_suggest", False)
+        review_evaluate = os.environ.get("review_filter", True)
         if code_suggest:
-            self.__review_pr_code_line__(pr_diffs)
+            self.__review_pr_code_line__(pr_diffs, review_evaluate)
         pr_summary = os.environ.get("pr_summary", True)
         if pr_summary:
             self.__review_pr_summary__(pr_diffs)
 
-    def __review_pr_code_line__(self, pr_diffs: dict[str, Any]) -> None:
+    def __review_pr_code_line__(self, pr_diffs: dict[str, Any], review_evaluate: bool) -> None:
         if not pr_diffs or "files" not in pr_diffs or not pr_diffs["files"]:
             logger.warning("No pr diff files, code review ignored")
+            return
         commit_id = pr_diffs["commits"][-1]["sha"]
         review_res = []
         for diff_item in pr_diffs["files"]:
@@ -53,8 +57,13 @@ def __review_pr_code_line__(self, pr_diffs: dict[str, Any]) -> None:
             format_gpt_message(messages, [PR_DIFF_COMP_PROMPT], role=MODEL_USER_ROLE)
             format_gpt_message(messages, [patch], role=MODEL_USER_ROLE)
             gpt_resp = self.gpt_manager.request_gpt(messages)
-            if not gpt_resp or gpt_resp == "Review-Ignored":
+            if not gpt_resp:
                 continue
+            if review_evaluate:
+                res_score = self.__evaluate_review_comment__(gpt_resp)
+                if res_score < 0:
+                    logger.info("Unused review comment, ignored")
+                    continue
             review_item = {
                 "path": filename,
                 "commit_id": commit_id,
@@ -72,6 +81,7 @@ def __review_pr_code_line__(self, pr_diffs: dict[str, Any]) -> None:
     def __review_pr_summary__(self, pr_diffs: dict[str, Any]) -> None:
         if not pr_diffs or "files" not in pr_diffs or not pr_diffs["files"]:
             logger.warning("No pr diff files, pr summary ignored")
+            return
         commit_id = pr_diffs["commits"][-1]["sha"]
         pr_contents = [diff_item["patch"] for diff_item in pr_diffs["files"]]
         messages: list[dict[str, str]] = []
@@ -89,3 +99,21 @@ def __review_pr_summary__(self, pr_diffs: dict[str, Any]) -> None:
         }
         logger.warning("summary review_item: {0}".format(json.dumps(review_item)))
         self.git_manager.comment_pr([review_item])
+
+    def __evaluate_review_comment__(self, review_comment: str) -> int:
+        if not review_comment:
+            logger.warning("No review comment, shouldn't be here")
+            return DEFAULT_EVALUATE_SCORE
+        messages: list[dict[str, str]] = []
+        evaluate_prompt = PR_EVALUATE_PROMPT + review_comment
+        format_gpt_message(messages, [PR_EVALUATE_SYSTEM_SET], role=MODEL_SYSTEM_ROLE)
+        format_gpt_message(messages, [evaluate_prompt], role=MODEL_USER_ROLE)
+        gpt_resp = self.gpt_manager.request_gpt(messages)
+        logger.warning("Get result {0} from message: {1}".format(gpt_resp, review_comment))
+        result = DEFAULT_EVALUATE_SCORE
+        try:
+            result = int(gpt_resp)
+        except Exception:  # pylint: disable=broad-except
+            result = DEFAULT_EVALUATE_SCORE
+        finally:
+            return result
diff --git a/code_review/util.py b/code_review/util.py
@@ -23,16 +23,14 @@ def get_patch_position(patch_body: str) -> int | None:
         logger.warning("No git patch found, shouldn't be here")
         return
     start: int = int(new_start)
-    found: bool = False
+    last_add: int = 0
+    line_add: int = 0
     logger.warning("patch body: {0}".format(patch_body))
     logger.warning("start line: {0}".format(start))
     for line in patch_body.split("\n"):
         if line.find("-") == 0:
             continue
-        if not found or line.find("+") == 0:
-            start += 1
-            if line.find("+") == 0:
-                found = True
-            continue
-        if found and line.find("+") != 0:
-            return start - 2
+        line_add += 1
+        if line.find("+") == 0:
+            last_add = line_add
+    return start + last_add