Skip to content

Commit

Permalink
Merge pull request Ericsson#3215 from csordasmarton/report_hash_type_…
Browse files Browse the repository at this point in the history
…hints

[tools] Add type hints to codechecker report hash tool
  • Loading branch information
csordasmarton authored Mar 2, 2021
2 parents 17b25a6 + 4485d42 commit 3871152
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 42 deletions.
1 change: 1 addition & 0 deletions analyzer/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ lxml==4.6.2
portalocker==1.7.0
psutil==5.7.0
PyYAML==5.3.1
mypy_extensions==0.4.3
1 change: 1 addition & 0 deletions analyzer/requirements_py/dev/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ portalocker==1.7.0
pylint==2.4.4
mkdocs==1.0.4
PyYAML==5.3.1
mypy_extensions==0.4.3
1 change: 1 addition & 0 deletions analyzer/requirements_py/osx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ portalocker==1.7.0
psutil==5.7.0
scan-build==2.0.19
PyYAML==5.3.1
mypy_extensions==0.4.3
116 changes: 76 additions & 40 deletions tools/codechecker_report_hash/codechecker_report_hash/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@
import sys
import traceback

from enum import Enum

from typing import List, Optional, Tuple

if sys.version_info >= (3, 8):
from typing import TypedDict # pylint: disable=no-name-in-module
else:
from mypy_extensions import TypedDict

LOG = logging.getLogger('codechecker_report_hash')

handler = logging.StreamHandler()
Expand All @@ -24,13 +33,37 @@
LOG.addHandler(handler)


class HashType(object):
class DiagLoc(TypedDict):
line: int
col: int


class DiagEdge(TypedDict):
start: Tuple[DiagLoc, DiagLoc]
end: Tuple[DiagLoc, DiagLoc]


class DiagPath(TypedDict):
kind: str
message: str
location: DiagLoc
edges: List[DiagEdge]


class Diag(TypedDict):
description: str
check_name: str
location: DiagLoc
path: List[DiagPath]


class HashType(Enum):
""" Report hash types. """
CONTEXT_FREE = 1
PATH_SENSITIVE = 2


def __get_line(file_name, line_no, errors='ignore'):
def __get_line(file_path: str, line_no: int, errors: str = 'ignore') -> str:
""" Return the given line from the file.
If line_no is larger than the number of lines in the file then empty
Expand All @@ -44,25 +77,25 @@ def __get_line(file_name, line_no, errors='ignore'):
Changing the encoding error handling can influence the hash content!
"""
try:
with open(file_name, mode='r',
encoding='utf-8', errors=errors) as source_file:
for line in source_file:
with open(file_path, mode='r',
encoding='utf-8', errors=errors) as f:
for line in f:
line_no -= 1
if line_no == 0:
return line
return ''
except IOError:
LOG.error("Failed to open file %s", file_name)
LOG.error("Failed to open file %s", file_path)
return ''


def __str_to_hash(string_to_hash, errors='ignore'):
def __str_to_hash(string_to_hash: str, errors: str = 'ignore') -> str:
""" Encodes the given string and generates a hash from it. """
string_hash = string_to_hash.encode(encoding="utf-8", errors=errors)
return hashlib.md5(string_hash).hexdigest()


def _remove_whitespace(line_content, old_col):
def _remove_whitespace(line_content: str, old_col: int) -> Tuple[str, int]:
"""
This function removes white spaces from the line content parameter and
calculates the new line location.
Expand All @@ -88,7 +121,7 @@ def _remove_whitespace(line_content, old_col):
old_col - line_strip_len


def __get_report_hash_path_sensitive(diag, source_file):
def __get_report_hash_path_sensitive(diag: Diag, file_path: str) -> str:
""" Report hash generation from the given diagnostic.
Hash generation algorithm for older plist versions where no
Expand All @@ -109,7 +142,10 @@ def __get_report_hash_path_sensitive(diag, source_file):
control diag section number in the bug path. If there are no control
sections event section column numbers are used.
"""
def compare_ctrl_sections(curr, prev):
def compare_ctrl_sections(
curr: DiagPath,
prev: DiagPath
) -> Optional[Tuple[int, int]]:
"""
Compare two sections and return column numbers which
should be included in the path hash or None if the
Expand Down Expand Up @@ -139,22 +175,22 @@ def compare_ctrl_sections(curr, prev):

main_section = path[-1]

m_loc = main_section.get('location')
source_line = m_loc.get('line')
m_loc = main_section.get('location', {})
source_line = m_loc.get('line', -1)

from_col = m_loc.get('col')
until_col = m_loc.get('col')
from_col = m_loc.get('col', -1)
until_col = m_loc.get('col', -1)

# WARNING!!! Changing the error handling type for encoding errors
# can influence the hash content!
line_content = __get_line(source_file, source_line, errors='ignore')
line_content = __get_line(file_path, source_line, errors='ignore')

if line_content == '' and not os.path.isfile(source_file):
if line_content == '' and not os.path.isfile(file_path):
LOG.error("Failed to generate report hash.")
LOG.error('%s does not exists!', source_file)
LOG.error('%s does not exists!', file_path)

file_name = os.path.basename(source_file)
msg = main_section.get('message')
file_name = os.path.basename(file_path)
msg = main_section.get('message', '')

hash_content = [file_name,
diag.get('check_name', 'unknown'),
Expand Down Expand Up @@ -209,7 +245,7 @@ def compare_ctrl_sections(curr, prev):
return ''


def __get_report_hash_context_free(diag, source_file):
def __get_report_hash_context_free(diag: Diag, file_path: str) -> str:
""" Generate report hash without bug path.
!!! NOT Compatible with the old hash generation method
Expand All @@ -222,31 +258,31 @@ def __get_report_hash_context_free(diag, source_file):
* 'column numbers' from the main diag sections location.
"""
try:
m_loc = diag.get('location')
source_line = m_loc.get('line')
m_loc = diag.get('location', {})
source_line = m_loc.get('line', -1)

from_col = m_loc.get('col')
until_col = m_loc.get('col')
from_col = m_loc.get('col', -1)
until_col = m_loc.get('col', -1)

# WARNING!!! Changing the error handling type for encoding errors
# can influence the hash content!
line_content = __get_line(source_file, source_line, errors='ignore')
line_content = __get_line(file_path, source_line, errors='ignore')

# Remove whitespaces so the hash will be independet of the
# source code indentation.
line_content, new_col = _remove_whitespace(line_content, from_col)

# Update the column number in sync with the
# removed whitespaces.
until_col = until_col - (from_col-new_col)
until_col = until_col - (from_col - new_col)
from_col = new_col

if line_content == '' and not os.path.isfile(source_file):
if line_content == '' and not os.path.isfile(file_path):
LOG.error("Failed to include soruce line in the report hash.")
LOG.error('%s does not exists!', source_file)
LOG.error('%s does not exists!', file_path)

file_name = os.path.basename(source_file)
msg = diag.get('description')
file_name = os.path.basename(file_path)
msg = diag.get('description', '')

hash_content = [file_name,
msg,
Expand All @@ -262,7 +298,7 @@ def __get_report_hash_context_free(diag, source_file):
return ''


def get_report_hash(diag, file_path, hash_type):
def get_report_hash(diag: Diag, file_path: str, hash_type: HashType) -> str:
""" Get report hash for the given diagnostic. """
if hash_type == HashType.CONTEXT_FREE:
return __get_report_hash_context_free(diag, file_path)
Expand All @@ -272,7 +308,7 @@ def get_report_hash(diag, file_path, hash_type):
raise Exception("Invalid report hash type: " + str(hash_type))


def get_report_path_hash(report):
def get_report_path_hash(report) -> str:
""" Returns path hash for the given bug path.
This can be used to filter deduplications of multiple reports.
Expand All @@ -284,8 +320,8 @@ def get_report_path_hash(report):
for event in events:
file_name = \
os.path.basename(report.files.get(event['location']['file']))
line = str(event['location']['line']) if 'location' in event else 0
col = str(event['location']['col']) if 'location' in event else 0
line = str(event['location']['line'] if 'location' in event else 0)
col = str(event['location']['col'] if 'location' in event else 0)

report_path_hash += line + '|' + col + '|' + event['message'] + \
file_name
Expand All @@ -300,21 +336,21 @@ def get_report_path_hash(report):
return __str_to_hash(report_path_hash)


def replace_report_hash(plist_file, hash_type=HashType.CONTEXT_FREE):
def replace_report_hash(plist_file: str, hash_type=HashType.CONTEXT_FREE):
""" Override hash in the given file by using the given version hash. """
try:
with open(plist_file, 'rb+') as pfile:
plist = plistlib.load(pfile)
pfile.seek(0)
pfile.truncate()
with open(plist_file, 'rb+') as f:
plist = plistlib.load(f)
f.seek(0)
f.truncate()
files = plist['files']

for diag in plist['diagnostics']:
file_path = files[diag['location']['file']]
report_hash = get_report_hash(diag, file_path, hash_type)
diag['issue_hash_content_of_line_in_context'] = report_hash

plistlib.dump(plist, pfile)
plistlib.dump(plist, f)

except (TypeError, AttributeError, plistlib.InvalidFileException) as err:
LOG.warning('Failed to process plist file: %s wrong file format?',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
nose==1.3.7
pycodestyle==2.5.0
pylint==2.4.4
mypy==0.812
mypy_extensions==0.4.3
10 changes: 9 additions & 1 deletion tools/codechecker_report_hash/tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,18 @@ REPO_ROOT ?= REPO_ROOT=$(ROOT)
# Nose test runner configuration options.
NOSECFG = --config .noserc

test: pycodestyle pylint test_unit
test: mypy pycodestyle pylint test_unit

test_in_env: pycodestyle_in_env pylint_in_env test_unit_in_env

MYPY_TEST_CMD = mypy --ignore-missing-imports codechecker_report_hash tests

mypy:
$(MYPY_TEST_CMD)

mypy_in_env: venv_dev
$(ACTIVATE_DEV_VENV) && $(MYPY_TEST_CMD)

PYCODESTYLE_TEST_CMD = pycodestyle codechecker_report_hash tests

pycodestyle:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,4 @@ def teardown_package():
global TEST_WORKSPACE

print("Removing: " + TEST_WORKSPACE)
# shutil.rmtree(TEST_WORKSPACE)
shutil.rmtree(TEST_WORKSPACE)
1 change: 1 addition & 0 deletions web/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ sqlalchemy==1.3.16
alembic==1.4.2
portalocker==1.7.0
psutil==5.7.0
mypy_extensions==0.4.3

codechecker_api==6.39.0
codechecker_api_shared==6.39.0
1 change: 1 addition & 0 deletions web/requirements_py/dev/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pylint==2.4.4
nose==1.3.7
mockldap==0.3.0
mkdocs==1.0.4
mypy_extensions==0.4.3

codechecker_api==6.39.0
codechecker_api_shared==6.39.0
Expand Down
1 change: 1 addition & 0 deletions web/requirements_py/osx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ alembic==1.4.2
portalocker==1.7.0
psutil==5.7.0
sqlalchemy==1.3.16
mypy_extensions==0.4.3

codechecker_api==6.39.0
codechecker_api_shared==6.39.0

0 comments on commit 3871152

Please sign in to comment.