Skip to content

Commit

Permalink
json: Treat unwanted interpolation as lexical error
Browse files Browse the repository at this point in the history
The JSON parser optionally supports interpolation.  The lexer
recognizes interpolation tokens unconditionally.  The parser rejects
them when interpolation is disabled, in parse_interpolation().
However, it neglects to set an error then, which can make
json_parser_parse() fail without setting an error.

Move the check for unwanted interpolation from the parser's
parse_interpolation() into the lexer's finite state machine.  When
interpolation is disabled, '%' is now handled like any other
unexpected character.

The next commit will improve how such lexical errors are handled.

Signed-off-by: Markus Armbruster <[email protected]>
Reviewed-by: Eric Blake <[email protected]>
Message-Id: <[email protected]>
  • Loading branch information
Markus Armbruster committed Aug 24, 2018
1 parent 6103028 commit 2cbd15a
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 19 deletions.
4 changes: 2 additions & 2 deletions include/qapi/qmp/json-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ typedef enum json_token_type {
} JSONTokenType;

typedef struct JSONLexer {
int state;
int start_state, state;
GString *token;
int x, y;
} JSONLexer;

void json_lexer_init(JSONLexer *lexer);
void json_lexer_init(JSONLexer *lexer, bool enable_interpolation);

void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size);

Expand Down
30 changes: 18 additions & 12 deletions qobject/json-lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
* Like double-quoted strings, except they're delimited by %x27
* (apostrophe) instead of %x22 (quotation mark), and can't contain
* unescaped apostrophe, but can contain unescaped quotation mark.
* - Interpolation:
* - Interpolation, if enabled:
* interpolation = %((l|ll|I64)[du]|[ipsf])
*
* Note:
Expand Down Expand Up @@ -123,9 +123,11 @@ enum json_lexer_state {
IN_INTERP_I64,
IN_WHITESPACE,
IN_START,
IN_START_INTERP, /* must be IN_START + 1 */
};

QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START);
QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START_INTERP);
QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1);

#define TERMINAL(state) [0 ... 0x7F] = (state)

Expand Down Expand Up @@ -257,8 +259,12 @@ static const uint8_t json_lexer[][256] = {
['I'] = IN_INTERP_I,
},

/* top level rule */
[IN_START] = {
/*
* Two start states:
* - IN_START recognizes JSON tokens with our string extensions
* - IN_START_INTERP additionally recognizes interpolation.
*/
[IN_START ... IN_START_INTERP] = {
['"'] = IN_DQ_STRING,
['\''] = IN_SQ_STRING,
['0'] = IN_ZERO,
Expand All @@ -271,17 +277,18 @@ static const uint8_t json_lexer[][256] = {
[','] = JSON_COMMA,
[':'] = JSON_COLON,
['a' ... 'z'] = IN_KEYWORD,
['%'] = IN_INTERP,
[' '] = IN_WHITESPACE,
['\t'] = IN_WHITESPACE,
['\r'] = IN_WHITESPACE,
['\n'] = IN_WHITESPACE,
},
[IN_START_INTERP]['%'] = IN_INTERP,
};

void json_lexer_init(JSONLexer *lexer)
void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
{
lexer->state = IN_START;
lexer->start_state = lexer->state = enable_interpolation
? IN_START_INTERP : IN_START;
lexer->token = g_string_sized_new(3);
lexer->x = lexer->y = 0;
}
Expand Down Expand Up @@ -321,7 +328,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
/* fall through */
case JSON_SKIP:
g_string_truncate(lexer->token, 0);
new_state = IN_START;
new_state = lexer->start_state;
break;
case IN_ERROR:
/* XXX: To avoid having previous bad input leaving the parser in an
Expand All @@ -340,8 +347,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
json_message_process_token(lexer, lexer->token, JSON_ERROR,
lexer->x, lexer->y);
g_string_truncate(lexer->token, 0);
new_state = IN_START;
lexer->state = new_state;
lexer->state = lexer->start_state;
return;
default:
break;
Expand All @@ -356,7 +362,7 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
json_message_process_token(lexer, lexer->token, lexer->state,
lexer->x, lexer->y);
g_string_truncate(lexer->token, 0);
lexer->state = IN_START;
lexer->state = lexer->start_state;
}
}

Expand All @@ -371,7 +377,7 @@ void json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)

void json_lexer_flush(JSONLexer *lexer)
{
if (lexer->state != IN_START) {
if (lexer->state != lexer->start_state) {
json_lexer_feed_char(lexer, 0, true);
}
}
Expand Down
4 changes: 0 additions & 4 deletions qobject/json-parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,6 @@ static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap)
{
JSONToken *token;

if (ap == NULL) {
return NULL;
}

token = parser_context_pop_token(ctxt);
assert(token && token->type == JSON_INTERP);

Expand Down
2 changes: 1 addition & 1 deletion qobject/json-streamer.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void json_message_parser_init(JSONMessageParser *parser,
parser->tokens = g_queue_new();
parser->token_size = 0;

json_lexer_init(&parser->lexer);
json_lexer_init(&parser->lexer, !!ap);
}

void json_message_parser_feed(JSONMessageParser *parser,
Expand Down
4 changes: 4 additions & 0 deletions tests/qmp-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ static void test_malformed(QTestState *qts)

/* lexical error: interpolation */
qtest_qmp_send_raw(qts, "%%p\n");
/* two errors, one for "%", one for "p" */
resp = qtest_qmp_receive(qts);
g_assert_cmpstr(get_error_class(resp), ==, "GenericError");
qobject_unref(resp);
resp = qtest_qmp_receive(qts);
g_assert_cmpstr(get_error_class(resp), ==, "GenericError");
qobject_unref(resp);
Expand Down

0 comments on commit 2cbd15a

Please sign in to comment.