fix update mode, use buildaction and source file name to detect if

the same file is analyzed and to cleanup previous results
PeterKaszab · Jul 5, 2016 · d31829e · d31829e
1 parent 6a7f70a
commit d31829e
Show file tree

Hide file tree

Showing 8 changed files with 156 additions and 30 deletions.
diff --git a/alembic.ini b/alembic.ini
@@ -29,7 +29,7 @@ script_location = db_migrate
 # are written from script.py.mako
 # output_encoding = utf-8
 
-sqlalchemy.url = postgres://codechecker@localhost:8764/codechecker
+sqlalchemy.url = postgres://codechecker@localhost:5432/codechecker
 
 # Logging configuration
 [loggers]

diff --git a/codechecker_lib/analyzers/result_handler_plist_to_db.py b/codechecker_lib/analyzers/result_handler_plist_to_db.py
@@ -47,13 +47,17 @@ def handle_results(self):
         with client.get_connection() as connection:
 
             LOG.debug('Storing original build and analyzer command to the database')
+
+            _, source_file_name = ntpath.split(self.analyzed_source_file)
+
             analisys_id = connection.add_build_action(self.__run_id,
                                                       self.buildaction.original_command,
-                                                      ' '.join(self.analyzer_cmd))
+                                                      ' '.join(self.analyzer_cmd),
+                                                      self.buildaction.analyzer_type,
+                                                      source_file_name)
 
             # store buildaction and analyzer command to the database
 
-            _, source_file_name = ntpath.split(self.analyzed_source_file)
 
             if self.analyzer_returncode == 0:
 

diff --git a/codechecker_lib/client.py b/codechecker_lib/client.py
@@ -150,9 +150,16 @@ def replace_config_info(self, run_id, config_list):
         ''' bool replaceConfigInfo(1: i64 run_id, 2: list<ConfigValue> values) '''
         return self._client.replaceConfigInfo(run_id, config_list)
 
-    def add_build_action(self, run_id, build_cmd, check_cmd):
-        ''' i64  addBuildAction(1: i64 run_id, 2: string build_cmd) '''
-        return self._client.addBuildAction(run_id, build_cmd, check_cmd)
+    def add_build_action(self, run_id, build_cmd, check_cmd, analyzer_type,
+                         analyzed_source_file):
+        ''' i64  addBuildAction(1: i64 run_id, 2: string build_cmd,
+        3: string check_cmd, 4: string analyzer_type, 5: string analyzed_source_file)
+        '''
+        return self._client.addBuildAction(run_id,
+                                           build_cmd,
+                                           check_cmd,
+                                           analyzer_type,
+                                           analyzed_source_file)
 
     def finish_build_action(self, action_id, failure):
         ''' bool finishBuildAction(1: i64 action_id, 2: string failure) '''

diff --git a/db_migrate/versions/30e41fdf2e85_store_analyzer_type_and_analyzed_source_.py b/db_migrate/versions/30e41fdf2e85_store_analyzer_type_and_analyzed_source_.py
@@ -0,0 +1,48 @@
+"""Store analyzer type and analyzed source file to the database for each buildaction
+
+Revision ID: 30e41fdf2e85
+Revises: 4e97419519b3
+Create Date: 2016-07-04 15:36:26.208047
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '30e41fdf2e85'
+down_revision = '4e97419519b3'
+branch_labels = None
+depends_on = None
+
+import sys
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    '''
+    extend build_actions table with columns to identify if
+    the results for a build_action should be deleted in update mode
+
+    analyzer_type: is required to identify the analyzer which analyzer analyzed the build action
+
+    analyzed_source_file: is required to identify which source file was analyzed in the build action (it is possible to contain multiple source files)
+    '''
+
+    op.add_column('build_actions',
+                  sa.Column('analyzed_source_file',
+                            sa.String(),
+                            nullable=False,
+                            server_default='')
+                  )
+
+    op.add_column('build_actions',
+                  sa.Column('analyzer_type',
+                            sa.String(),
+                            nullable=False,
+                            server_default='')
+                  )
+
+
+def downgrade():
+    # downgrade is not supported
+    sys.exit(1)
diff --git a/db_model/orm_model.py b/db_model/orm_model.py
@@ -106,6 +106,8 @@ class BuildAction(Base):
     id = Column(Integer, autoincrement=True, primary_key=True)
     run_id = Column(Integer, ForeignKey('runs.id', deferrable = True, initially = "DEFERRED", ondelete='CASCADE'))
     build_cmd = Column(String)
+    analyzer_type = Column(String, nullable=False)
+    analyzed_source_file = Column(String, nullable=False)
     check_cmd = Column(String)
     # No failure if the text is empty.
     failure_txt = Column(String)
@@ -114,10 +116,12 @@ class BuildAction(Base):
     # Seconds, -1 if unfinished.
     duration = Column(Integer)
 
-    def __init__(self, run_id, build_cmd, check_cmd):
+    def __init__(self, run_id, build_cmd, check_cmd, analyzer_type, analyzed_source_file):
         self.run_id, self.build_cmd, self.check_cmd, self.failure_txt = \
             run_id, build_cmd, check_cmd, ''
         self.date = datetime.now()
+        self.analyzer_type = analyzer_type
+        self.analyzed_source_file = analyzed_source_file
         self.duration = -1
 
     def mark_finished(self, failure_txt):
@@ -204,6 +208,12 @@ class Report(Base):
     end_bugevent = Column(Integer, ForeignKey('bug_path_events.id', deferrable = True, initially = "DEFERRED", ondelete='CASCADE'), index = True)
     suppressed = Column(Boolean)
 
+    # Cascade delete might remove rows SQLAlchemy warns about this
+    # to remove warnings about already deleted items set this to False.
+    __mapper_args__ = {
+        'confirm_deleted_rows' : False
+    }
+
 
     # Priority/severity etc...
     def __init__(self, run_id, bug_id, file_id, checker_message, start_bugpoint, start_bugevent, end_bugevent, checker_id, checker_cat, bug_type, severity, suppressed):
@@ -225,7 +235,8 @@ class ReportsToBuildActions(Base):
         Integer, ForeignKey('build_actions.id', deferrable = True, initially = "DEFERRED", ondelete='CASCADE'), primary_key=True)
 
     def __init__(self, report_id, build_action_id):
-        self.report_id, self.build_action_id = report_id, build_action_id
+        self.report_id = report_id
+        self.build_action_id = build_action_id
 
 
 class SuppressBug(Base):

diff --git a/storage_server/report_server.py b/storage_server/report_server.py
@@ -67,26 +67,32 @@ def __del_source_file_for_report(self, run_id, report_id, report_file_id):
                             and_(Report.run_id == run_id,
                                  Report.file_id == report_file_id,
                                  Report.id != report_id))
-        if report_reference_to_file.count() == 0:
+        rep_ref_count = report_reference_to_file.count()
+        if rep_ref_count == 0:
+            LOG.debug("No other references to the source file \n id: " +
+                      str(report_file_id) + " can be deleted.")
             # There are no other references to the file, it can be deleted.
             self.session.query(File).filter(File.id == report_file_id)\
                                     .delete()
+        return rep_ref_count
 
     def __del_buildaction_results(self, build_action_id, run_id):
         """
         Delete the build action and related analysis results from the database.
 
         Report entry will be deleted by ReportsToBuildActions cascade delete.
         """
+        LOG.debug("Cleaning old buildactions")
+
         try:
             rep_to_ba = self.session.query(ReportsToBuildActions) \
                               .filter(ReportsToBuildActions.build_action_id ==
                                       build_action_id)
 
             reports_to_delete = [r.report_id for r in rep_to_ba]
 
-            self.session.query(BuildAction).filter(BuildAction.id == build_action_id)\
-                                           .delete()
+            LOG.debug("Trying to delete reports belonging to the buildaction:")
+            LOG.debug(reports_to_delete)
 
             for report_id in reports_to_delete:
                 # Check if there is another reference to this report from
@@ -96,13 +102,31 @@ def __del_buildaction_results(self, build_action_id, run_id):
                                     and_(ReportsToBuildActions.report_id == report_id,
                                          ReportsToBuildActions.build_action_id != build_action_id))
 
+                LOG.debug("Checking report id:" + str(report_id))
+
+                LOG.debug("Report id " + str(report_id) +
+                          " reference count: " +
+                          str(other_reference.count()))
+
                 if other_reference.count() == 0:
                     # There is no other reference, data related to the report
                     # can be deleted.
                     report = self.session.query(Report).get(report_id)
+
+                    LOG.debug("Removing bug path events")
                     self.__sequence_deleter(BugPathEvent, report.start_bugevent)
+                    LOG.debug("Removing bug report points")
                     self.__sequence_deleter(BugReportPoint, report.start_bugpoint)
-                    self.__del_source_file_for_report(run_id, report.id, report.file_id)
+
+                    if self.__del_source_file_for_report(run_id, report.id, report.file_id):
+                        LOG.debug("Stored source file needs to be kept, there is reference to it from another report.")
+                        # report needs to be deleted if there is no reference the
+                        # file cascade delete will remove it
+                        # else manual cleanup is needed
+                        self.session.delete(report)
+
+            self.session.query(BuildAction).filter(BuildAction.id == build_action_id)\
+                                           .delete()
 
             self.session.query(ReportsToBuildActions).filter(
                 ReportsToBuildActions.build_action_id == build_action_id).delete()
@@ -184,30 +208,49 @@ def replaceConfigInfo(self, run_id, config_values):
         return True
 
     @decorators.catch_sqlalchemy
-    def addBuildAction(self, run_id, build_cmd, check_cmd):
+    def addBuildAction(self,
+                       run_id,
+                       build_cmd,
+                       check_cmd,
+                       analyzer_type,
+                       analyzed_source_file):
         '''
         '''
         try:
+
             build_actions = \
                 self.session.query(BuildAction) \
-                            .filter(and_(BuildAction.run_id == run_id,
-                                         BuildAction.build_cmd == build_cmd,
-                                         BuildAction.check_cmd == check_cmd))\
-                            .all()
+                    .filter(and_(BuildAction.run_id == run_id,
+                                BuildAction.build_cmd == build_cmd,
+                                or_(
+                                 and_(BuildAction.analyzer_type == analyzer_type,
+                                    BuildAction.analyzed_source_file == analyzed_source_file),
+                                 and_(BuildAction.analyzer_type == "",
+                                    BuildAction.analyzed_source_file == "")
+                               )))\
+            .all()
+
 
             if build_actions:
                 # Delete the already stored buildaction and analysis results.
                 for build_action in build_actions:
+
                     self.__del_buildaction_results(build_action.id, run_id)
 
                 self.session.commit()
 
+            action = BuildAction(run_id,
+                                 build_cmd,
+                                 check_cmd,
+                                 analyzer_type,
+                                 analyzed_source_file)
+            self.session.add(action)
+            self.session.commit()
+
         except Exception as ex:
             LOG.error(ex)
+            raise
 
-        action = BuildAction(run_id, build_cmd, check_cmd)
-        self.session.add(action)
-        self.session.commit()
         return action.id
 
     @decorators.catch_sqlalchemy

diff --git a/tests/test_packages/functional/package_test/test_hash_clash.py b/tests/test_packages/functional/package_test/test_hash_clash.py
@@ -15,6 +15,7 @@
 
 from test_utils.thrift_client_to_db import CCReportHelper
 
+from nose.tools import nottest
 
 def _generate_content(cols, lines):
     """Generates a random file content string."""
@@ -49,12 +50,12 @@ def _create_file(self, run_id, name, cols=10, lines=10):
         success = self._report.addFileContent(need.fileId, content)
         self.assertTrue(success)
 
-        return need.fileId
+        return need.fileId, path
 
-    def _create_build_action(self, run_id, name):
+    def _create_build_action(self, run_id, name, analyzer_type, source_file):
         """Creates a new build action."""
 
-        return self._report.addBuildAction(run_id, name, name)
+        return self._report.addBuildAction(run_id, name, name, analyzer_type, source_file)
 
     def _create_simple_report(self, file_id, build_action_id, bug_hash, position):
         """Creates a new report with one bug path position and event."""
@@ -88,9 +89,13 @@ def _init_new_test(self, name):
         """
 
         run_id = self._create_run(name)
-        file_id = self._create_file(run_id, name)
-        build_action_id = self._create_build_action(run_id, name)
-        yield (run_id, file_id, build_action_id)
+        file_id, source_file = self._create_file(run_id, name)
+
+        # analyzer type needs to match with the supported analyzer types
+        # clangsa is used for testing
+        analyzer_type = 'clangsa'
+        build_action_id = self._create_build_action(run_id, name, analyzer_type, source_file)
+        yield (run_id, file_id, build_action_id, source_file)
         self._report.finishBuildAction(build_action_id, 'OK')
         self._report.finishCheckerRun(run_id)
 
@@ -110,8 +115,8 @@ def test(self):
 
         with self._init_new_test('test1') as ids1, \
              self._init_new_test('test2') as ids2:
-            _, file_id1, build_action_id1 = ids1
-            run_id2, file_id2, build_action_id2 = ids2
+            _, file_id1, build_action_id1, source_file1 = ids1
+            run_id2, file_id2, build_action_id2, source_file2 = ids2
             rep_id1 = self._create_simple_report(file_id1,
                                                  build_action_id1,
                                                  'XXX',
@@ -145,7 +150,13 @@ def test(self):
             # Same file and position, different hash
             self.assertNotEqual(rep_id4, rep_id5)
 
-            build_action_id2_2 = self._create_build_action(run_id2, 'test2_2')
+            # analyzer type needs to match with the supported analyzer types
+            # clangsa is used for testing
+            analyzer_type = 'clangsa'
+            build_action_id2_2 = self._create_build_action(run_id2,
+                                                           'test2_2',
+                                                           analyzer_type,
+                                                           source_file2)
             try:
                 rep_id6 = self._create_simple_report(file_id2,
                                                      build_action_id2_2,

diff --git a/thrift_api/report_storage_server.thrift b/thrift_api/report_storage_server.thrift
@@ -62,7 +62,9 @@ service CheckerReport {
                 i64  addBuildAction(
                                     1: i64 run_id,
                                     2: string build_cmd,
-                                    3: string check_cmd)
+                                    3: string check_cmd,
+                                    4: string analyzer_type,
+                                    5: string analyzed_source_file)
                                     throws (1: shared.RequestFailed requestError),
 
                 i64  addReport(