Skip to content

Commit

Permalink
RDA: Keep DataSet displayable (angr#2368)
Browse files Browse the repository at this point in the history
Avoid clobbering the output when a `DataSet` contain super long strings.

Signed-off-by: Pamplemousse <[email protected]>
  • Loading branch information
Pamplemousse authored Nov 2, 2020
1 parent a6b9a31 commit c366024
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
21 changes: 20 additions & 1 deletion angr/knowledge_plugins/key_definitions/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Union, Set
import logging
import operator
import re

from ...engines.light import RegisterOffset
from .constants import DEBUG
Expand Down Expand Up @@ -168,11 +169,29 @@ def __str__(self):
if UNDEFINED in self.data:
data_string = str(self.data)
else:
data_string = str([ hex(i) if isinstance(i, int) else i for i in self.data ])
data_string = str([ _stringify_datum(i) for i in self.data ])
size = "%d" % self._bits if isinstance(self._bits, int) else self._bits

return 'DataSet<%s>: %s' % (size, data_string)

def _stringify_datum(datum):
if isinstance(datum, int): return hex(datum)

# Shorten long strings if possible
if isinstance(datum, str) and len(datum) > 50:
regex = r'(.)\1{10,}'
matches = list(re.finditer(regex, datum))

if len(matches) == 0: return datum

_new_datum = datum[:matches[0].span()[0]]
for m in matches:
number_of_occurences = m.span()[1] - m.span()[0]
_new_datum += "%s...(repeats %d times)" % (m.groups()[0], number_of_occurences)
return _new_datum

return datum


def dataset_from_datasets(datasets: List[DataSet]) -> DataSet:
"""
Expand Down
19 changes: 19 additions & 0 deletions tests/knowledge_plugins/key_definitions/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,22 @@ def test_dataset_from_datasets_with_different_sizes(self):

self.assertSetEqual(result.data, {1, 2, 3})
self.assertEqual(result._bits, UNKNOWN_SIZE)

def test_representation_shortens_content_with_repeated_character(self):
size = 30
long_string = 'test: ' + 'a' * size + 'b' * size
dataset = DataSet({long_string}, len(long_string))

self.assertEqual(
"%s" % dataset,
"DataSet<%s>: ['test: a...(repeats 30 times)b...(repeats 30 times)']" % (len(long_string))
)

def test_representation_does_not_shorten_content_of_reasonable_length(self):
string = 'not too long'
dataset = DataSet({string}, len(string))

self.assertEqual(
"%s" % dataset,
"DataSet<%s>: ['not too long']" % (len(string))
)

0 comments on commit c366024

Please sign in to comment.