Skip to content

Commit

Permalink
extended filtering on MatchingResult
Browse files Browse the repository at this point in the history
  • Loading branch information
danielplohmann committed Jun 6, 2023
1 parent 8c99ee0 commit 41fc1e0
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ in IDA.


## Version History
* 2023-06-06 v1.0.7: Extended filtering capabilities on MatchingResult.
* 2023-06-02 v1.0.6: IDA plugin can now task matching jobs, show their results and batch import labels. Harmonization of MatchingResult.
* 2023-05-22 v1.0.3: More robustness for path verification when using MCRIT CLI on Malpedia repo folder.
* 2023-05-12 v1.0.1: Some progress on label import for the IDA plugin. Reflected API extension of MCRITweb in McritClient.
Expand Down
2 changes: 1 addition & 1 deletion mcrit/config/McritConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class McritConfig(object):

# NOTE to self: always change this in setup.py as well!
VERSION = "1.0.6"
VERSION = "1.0.7"
CONFIG_FILE_PATH = str(os.path.abspath(__file__))
PROJECT_ROOT = str(os.path.abspath(os.sep.join([CONFIG_FILE_PATH, "..", ".."])))

Expand Down
32 changes: 32 additions & 0 deletions mcrit/storage/MatchingResult.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def applyFilterValues(self):
self.filterToUniqueMatchesOnly()
if self.filter_values.get("filter_exclude_own_family", None):
self.excludeOwnFamily()
if self.filter_values.get("filter_family_name", None):
self.filterByFamilyName(self.filter_values["filter_family_name"])
# filter functions
if self.filter_values.get("filter_exclude_library", None):
self.excludeLibraryMatches()
Expand All @@ -104,8 +106,12 @@ def applyFilterValues(self):
self.filterToFunctionScore(min_score=self.filter_values["filter_function_min_score"])
if self.filter_values.get("filter_function_max_score", None):
self.filterToFunctionScore(max_score=self.filter_values["filter_function_max_score"])
if self.filter_values.get("filter_function_offset", None):
self.filterToFunctionOffset(self.filter_values["filter_function_offset"])
if self.filter_values.get("filter_exclude_pic", None):
self.excludePicMatches()
if self.filter_values.get("filter_func_unique", None):
self.filterToUniqueFunctionMatchesOnly()

def getFamilyNameByFamilyId(self, family_id):
if self.family_id_to_name_map is None:
Expand All @@ -119,6 +125,14 @@ def getFamilyIdsMatchedByFunctionId(self, function_id):
return 0
return self.function_id_to_family_ids_matched[function_id]

def filterByFamilyName(self, filter_term):
""" reduce families and samples to those where family_name is part of the family_name """
filtered_sample_matches = []
for sample_match in self.filtered_sample_matches:
if filter_term in sample_match.family:
filtered_sample_matches.append(sample_match)
self.filtered_sample_matches = filtered_sample_matches

def filterToDirectMinScore(self, min_score, nonlib=False):
""" reduce aggregated sample matches to those with direct score of min_score or higher, but nonlib flag is not applied to library samples """
filtered_sample_matches = []
Expand Down Expand Up @@ -156,6 +170,24 @@ def filterToUniqueMatchesOnly(self):
filtered_sample_matches.append(sample_match)
self.filtered_sample_matches = filtered_sample_matches

def filterToUniqueFunctionMatchesOnly(self):
""" reduce function matches to those with unique matches (with respect to the family) only """
aggregated = self.getAggregatedFunctionMatches()
filtered_function_matches = []
unique_info_by_function_id = {entry["function_id"]: entry["num_families_matched"] == 1 for entry in aggregated}
for function_match in self.filtered_function_matches:
if unique_info_by_function_id[function_match.function_id]:
filtered_function_matches.append(function_match)
self.filtered_function_matches = filtered_function_matches

def filterToFunctionOffset(self, offset):
""" reduce function matches to those that match a specific offset """
filtered_function_matches = []
for function_match in self.filtered_function_matches:
if function_match.offset == offset:
filtered_function_matches.append(function_match)
self.filtered_function_matches = filtered_function_matches

def excludeOwnFamily(self):
""" remove all sample matches with the same family_id as the reference samples"""
filtered_sample_matches = []
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

setup(
name='mcrit',
version="1.0.6",
version="1.0.7",
description='MCRIT is a framework created for simplified application of the MinHash algorithm to code similarity.',
long_description_content_type="text/markdown",
long_description=README,
Expand Down

0 comments on commit 41fc1e0

Please sign in to comment.