Skip to content

Commit

Permalink
Merge pull request Yelp#501 from Yelp/bug-fix/inline-dictionary-yaml-…
Browse files Browse the repository at this point in the history
…support

Bug Fix: Fix Inline Dictionary For YAML Files
  • Loading branch information
jpdakran authored Jan 11, 2022
2 parents b914bb6 + f295ada commit c21ed92
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 9 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ $ git diff --staged --name-only -z | xargs -0 detect-secrets-hook --baseline .se
**Scanning All Tracked Files:**

```bash
$ git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline
$ git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline
```

### Viewing All Enabled Plugins:
Expand Down
4 changes: 2 additions & 2 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def __iter__(self) -> Generator[Tuple[str, PotentialSecret], None, None]:
key=lambda secret: (
getattr(secret, 'line_number', 0),
secret.secret_hash,
secret.type
)
secret.type,
),
):
yield filename, secret

Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def analyze_string(self, string: str) -> Generator[str, None, None]:
for regex in self.denylist:
for match in regex.findall(string):
if isinstance(match, tuple):
for submatch in filter(bool, tuple):
for submatch in filter(bool, match):
# It might make sense to paste break after yielding
yield submatch
else:
Expand Down
56 changes: 54 additions & 2 deletions detect_secrets/transformers/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from typing import Union

import yaml
from yaml.tokens import FlowEntryToken
from yaml.tokens import KeyToken

from ..types import NamedIO
from ..util.filetype import determine_file_type
Expand Down Expand Up @@ -152,6 +154,9 @@ def __init__(self, file: NamedIO):
self.loader = yaml.SafeLoader(self.content)
self.loader.compose_node = self._compose_node_shim # type: ignore

self.is_inline_flow_mapping_key = False
self.loader.parse_flow_mapping_key = self._parse_flow_mapping_key_shim # type: ignore

def json(self) -> Dict[str, Any]:
return cast(Dict[str, Any], self.loader.get_single_data())

Expand All @@ -164,7 +169,7 @@ def __iter__(self) -> Iterator[YAMLValue]:

to_search = deque([self.json()])
while to_search:
item: Any = to_search.pop()
item: Any = to_search.popleft()

if not item:
# mainly for base case (e.g. if file is all comments)
Expand Down Expand Up @@ -206,7 +211,11 @@ def _compose_node_shim(
parent: Optional[yaml.nodes.Node],
index: Optional[yaml.nodes.Node],
) -> yaml.nodes.Node:
line = self.loader.line
line = (
self.loader.marks[-1].line
if self.is_inline_flow_mapping_key
else self.loader.line
)

node = yaml.composer.Composer.compose_node(self.loader, parent, index)
node.__line__ = line + 1
Expand All @@ -218,6 +227,49 @@ def _compose_node_shim(

return cast(yaml.nodes.Node, node)

def _parse_flow_mapping_key_shim(
self,
first: bool = False,
) -> yaml.nodes.Node:
# There exists an edge case when a key and flow mapping start character `{` are on the same
# line (Ex. '{key: value}) followed by an empty line. The parser will produce an off-by-one
# error for the line number that it tracks internally. Since we track the start of the
# mapping, we will use this line number when we are processing:
# A) The first key in an inline dictionary where the flow mapping start character is on the
# same line as the key
# B) The n key of an inline dictionary that is followed by a FlowEntryToken (',') and
# KeyToken ('key:')
if (
first
and self.loader.marks[-1].line == self.loader.peek_token().start_mark.line
or self._check_next_tokens_shim(FlowEntryToken, KeyToken)
):
self.is_inline_flow_mapping_key = True
else:
self.is_inline_flow_mapping_key = False

return cast(yaml.nodes.Node, yaml.parser.Parser.parse_flow_mapping_key(self.loader, first))

def _check_next_tokens_shim(
self,
*choices: Any,
) -> bool:
# Check the next tokens type match the argument list of token types
result = True
i = 0

if self.loader.tokens:
if not choices:
return result
for choice in choices:
if i < len(self.loader.tokens):
result = result and isinstance(self.loader.tokens[i], choice)
i += 1
else:
result = False

return result


def _tag_dict_values(map_node: yaml.nodes.MappingNode) -> yaml.nodes.MappingNode:
"""
Expand Down
6 changes: 3 additions & 3 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ monotonic==1.5
mypy==0.790
mypy-extensions==0.4.3
nodeenv==1.5.0
packaging==20.7
packaging==20.9
pluggy==0.13.1
pre-commit==2.9.2
py==1.10.0
pyahocorasick==1.4.0
pycodestyle==2.3.1
pyflakes==1.6.0
pyparsing==2.4.7
pytest==6.1.2
pytest==6.2.2
PyYAML==5.4
requests==2.25.0
responses==0.12.1
Expand All @@ -38,5 +38,5 @@ typed-ast==1.4.1
typing-extensions==3.7.4.3
unidiff==0.6.0
urllib3==1.26.4
virtualenv==20.2.1
virtualenv==20.6.0
zipp==3.4.0
Empty file added tests/__init__.py
Empty file.
210 changes: 210 additions & 0 deletions tests/transformers/yaml_transformer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,47 @@ def test_multiline_block_scalar_literal_style(block_chomping):

assert YAMLTransformer().parse_file(file) == ['']

@staticmethod
def test_single_line_flow_mapping():
file = mock_file_object(
textwrap.dedent("""
object:
keyA: 1
dictionary:
- {keyB: valueB, keyC: valueC, keyD: valueD}
""")[1:-1],
)

assert YAMLTransformer().parse_file(file) == [
'',
'',
'',
'keyB: "valueB"',
'keyC: "valueC"',
'keyD: "valueD"',
]

@staticmethod
def test_multi_line_flow_mapping():
file = mock_file_object(
textwrap.dedent("""
object:
keyA: 1
dictionary:
- {keyB: valueB, keyC: valueC, keyD: valueD}
""")[1:-1],
)

assert YAMLTransformer().parse_file(file) == [
'',
'',
'',
'keyB: "valueB"',
'keyC: "valueC"',
'keyD: "valueD"',
]


class TestYAMLFileParser:
@staticmethod
Expand Down Expand Up @@ -147,3 +188,172 @@ def test_possible_secret_format(yaml_value, expected_value):
'__line__': mock.ANY,
'__original_key__': mock.ANY,
}

@staticmethod
@pytest.mark.parametrize(
'content, expected',
(
# NOTE: The trailing new lines are important here!
# It needs to be a string value, since we ignore non-string values (because we assume
# secrets will be strings). However, the combination of the dictionary start character
# `{` and the keys being on the same line causes unexpected results (see #374).
(
textwrap.dedent("""
{ a: "1" }
""")[1:],
['a: "1"'],
),
(
textwrap.dedent("""
a:
{b: "2"}
""")[1:],
['', 'b: "2"'],
),
(
textwrap.dedent("""
a:
- {b: "2"}
""")[1:],
['', 'b: "2"'],
),
# New lines aren't important here, but since the first key is on the same line
# as the start of the block, it will be handled funkily.
(
textwrap.dedent("""
{a: "1",
b: "2",
}
""")[1:-1],
['a: "1"', 'b: "2"'],
),
(
textwrap.dedent("""
{
a: "1",
b: "2",
}
""")[1:],
['', 'a: "1"', 'b: "2"'],
),
(
textwrap.dedent("""
{ a: "1", b: "2" }
""")[1:],
['a: "1"', 'b: "2"'],
),
),
)
def test_inline_dictionary(content, expected):
lines = YAMLTransformer().parse_file(mock_file_object(content))
assert lines == expected

@staticmethod
def test_single_line_flow_mapping():
file = mock_file_object(
textwrap.dedent("""
dictionary:
- {keyA: valueA, keyB: valueB, keyC: valueC}
""")[1:-1],
)

assert YAMLFileParser(file).json() == {
'dictionary': [
{
'keyA': {
'__value__': 'valueA',
'__line__': 2,
'__original_key__': 'keyA',
},
'keyB': {
'__value__': 'valueB',
'__line__': 2,
'__original_key__': 'keyB',
},
'keyC': {
'__value__': 'valueC',
'__line__': 2,
'__original_key__': 'keyC',
},
},
],
}

@staticmethod
def test_multi_line_flow_mapping():
file = mock_file_object(
textwrap.dedent("""
dictionary:
- {keyA: valueA, keyB: valueB, keyC: valueC}
""")[1:-1],
)

assert YAMLFileParser(file).json() == {
'dictionary': [
{
'keyA': {
'__value__': 'valueA',
'__line__': 2,
'__original_key__': 'keyA',
},
'keyB': {
'__value__': 'valueB',
'__line__': 2,
'__original_key__': 'keyB',
},
'keyC': {
'__value__': 'valueC',
'__line__': 2,
'__original_key__': 'keyC',
},
},
],
}

@staticmethod
def test_inline_dictionary_same_starting_line():
file = mock_file_object(
textwrap.dedent("""
{a: "1",
b: "2",
}
""")[1:-1],
)

assert YAMLFileParser(file).json() == {
'a': {
'__value__': '1',
'__line__': 1,
'__original_key__': 'a',
},
'b': {
'__value__': '2',
'__line__': 2,
'__original_key__': 'b',
},
}

@staticmethod
def test_inline_dictionary_different_starting_line():
file = mock_file_object(
textwrap.dedent("""
{
a: "1",
b: "2",
}
""")[1:-1],
)

assert YAMLFileParser(file).json() == {
'a': {
'__value__': '1',
'__line__': 2,
'__original_key__': 'a',
},
'b': {
'__value__': '2',
'__line__': 3,
'__original_key__': 'b',
},
}

0 comments on commit c21ed92

Please sign in to comment.