Skip to content

Commit

Permalink
Make PARSE3 RAISE errors on unsuccessful parse
Browse files Browse the repository at this point in the history
The design choice to raise a definitional error on incomplete
parses worked so well for UPARSE that this goes ahead and
patches the behavior into PARSE3.

As a result of making this change, bugs were discovered and
unclear code was rewritten to make it obvious when accepting
incomplete states was intentional.
  • Loading branch information
hostilefork committed Sep 16, 2023
1 parent 8255d7c commit 849df9f
Show file tree
Hide file tree
Showing 28 changed files with 602 additions and 358 deletions.
5 changes: 4 additions & 1 deletion extensions/console/ext-console-init.reb
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,10 @@ ext-console-impl: func [
let directives: collect [
let i
if block? prior [
parse3 prior [some [set i: issue! (keep i)] end]
parse3 prior [
try some [set i: issue! (keep i)]
accept (true)
]
]
]

Expand Down
2 changes: 1 addition & 1 deletion extensions/locale/iso3166.r
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ parse3 init-code [
#"[" thru #"]"
] iso-3166-table-count
to <end>
] else [
] except [
fail "Failed to update iso-3166-table"
]

Expand Down
2 changes: 1 addition & 1 deletion extensions/locale/iso639.r
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ parse3 init-code [
#"[" thru #"]"
] iso-639-table-count
to <end>
] else [
] except [
fail "Failed to update iso-639-table"
]

Expand Down
24 changes: 12 additions & 12 deletions scripts/encap.reb
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ elf-format: context [
(mode: 'write) seek pos, program-header-rule
]
to <end>
] else [
] except [
fail "Error updating offsets in program headers"
]

Expand All @@ -287,7 +287,7 @@ elf-format: context [
(mode: 'write) seek pos, section-header-rule
]
to <end>
] else [
] except [
fail "Error updating offsets in section headers"
]
]
Expand Down Expand Up @@ -325,7 +325,7 @@ elf-format: context [

parse3 skip executable string-header-offset [
(mode: 'read) section-header-rule to <end>
] else [
] except [
fail "Error finding string section in ELF binary"
]

Expand Down Expand Up @@ -415,7 +415,7 @@ elf-format: context [
)
(mode: 'write) seek pos, section-header-rule
to <end>
] else [
] except [
fail "Error updating string table size in string header"
]

Expand All @@ -441,7 +441,7 @@ elf-format: context [
)
(mode: 'write) section-header-rule
to <end>
] else [
] except [
fail "Error creating new section for the embedded data"
]

Expand Down Expand Up @@ -483,7 +483,7 @@ elf-format: context [

parse3 executable [
(mode: 'write) header-rule to <end>
] else [
] except [
fail "Error updating the ELF header"
]
]
Expand All @@ -494,7 +494,7 @@ elf-format: context [
][
let header-data: read/part file 64 ; 64-bit size, 32-bit is smaller

parse3 header-data [(mode: 'read) header-rule to <end>] else [
parse3 header-data [(mode: 'read) header-rule to <end>] except [
return null
]

Expand All @@ -506,7 +506,7 @@ elf-format: context [
;
parse3 skip section-headers-data (e_shstrndx * e_shentsize) [
(mode: 'read) section-header-rule to <end>
] else [
] except [
fail "Error finding string section in ELF binary"
]

Expand Down Expand Up @@ -614,7 +614,7 @@ pe-format: context [
| skip
]

parse3 rule [try some block-rule] else [fail]
parse3 rule [try some block-rule]

set name make object! append def '~
return bind rule get name
Expand Down Expand Up @@ -774,7 +774,7 @@ pe-format: context [
repeat (COFF-header.number-of-sections) section-rule
end-of-section-header: <here>

; !!! stop here, no END ?
accept (true) ; !!! stop here, no END ?
]
size-of-section-header: 40 ; Size of one entry

Expand Down Expand Up @@ -823,7 +823,7 @@ pe-format: context [
exe-data [binary!]
][
reset
parse3 exe-data exe-rule
try parse3 exe-data exe-rule
if err = 'missing-dos-signature [
return false ; soft failure (just wasn't an EXE, no "MZ")
]
Expand Down Expand Up @@ -1273,7 +1273,7 @@ encap: func [
print ["Compressed resource is" length of compressed "bytes long."]

case [
did parse3 executable [
try parse3 executable [
(elf-format.mode: 'read) elf-format.header-rule to <end>
][
print "ELF format found"
Expand Down
8 changes: 4 additions & 4 deletions scripts/prot-http.r
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ read-body: function [
; even have enough input data for the chunk *size*, much less
; the chunk. READ until we have at least a chunk size.
;
while [didn't parse3 conn.data [
while [not try parse3 conn.data [
copy chunk-size: some hex-digits, thru crlfbin
mk1: <here>, to <end>
]][
Expand All @@ -511,7 +511,7 @@ read-body: function [
; Now we have the chunk size but may not have the chunk data.
; Loop until enough data is gathered.
;
while [didn't parse3 mk1 [
while [not try parse3 mk1 [
repeat (chunk-size) skip, mk2: <here>, crlfbin, to <end>
]][
read conn
Expand All @@ -532,11 +532,11 @@ read-body: function [
;
; https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Trailer
;
parse3 mk1 [
if try parse3 mk1 [
crlfbin (trailer: "") to <end>
|
copy trailer to crlf2bin to <end>
] then [
][
trailer: scan-net-header as binary! trailer
append headers spread trailer
clear conn.data
Expand Down
1 change: 1 addition & 0 deletions src/boot/errors.r
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ Script: [
parse-command: [{PARSE - command cannot be used as variable:} :arg1]
parse-series: [{PARSE - input must be a series:} :arg1]
parse-multiple-set: {PARSE - SET can capture at most one value}
parse-incomplete: {PARSE rules did not reach end of input}

bad-library: {bad library (already closed?)}
only-callback-ptr: {Only callback functions may be passed by FFI pointer}
Expand Down
9 changes: 4 additions & 5 deletions src/core/c-path.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,11 @@ Bounce TO_Sequence(Frame(*) frame_, enum Reb_Kind kind, const REBVAL *arg) {
// (Inefficient! But just see how it feels before optimizing.)
//
return rebValue(
"as", Datatype_From_Kind(kind), "catch [",
"as", Datatype_From_Kind(kind),
"parse3 let v: load @", arg, "[",
"[any-sequence! | any-array!] end (throw first v)",
"| (throw v)", // try to convert whatever other block
"]",
"]"
"[any-sequence! | any-array!] <end> accept (first v)",
"| accept (v)", // try to convert whatever other block
"]"
);
}

Expand Down
4 changes: 2 additions & 2 deletions src/core/n-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -2665,8 +2665,8 @@ DECLARE_NATIVE(concretize)
//
// "Turn isotopes into their plain forms, pass thru other values"
//
// return: [element?]
// value [<opt> any-value!]
// return: [<void> element?]
// value [<opt> <void> any-value!]
// ]
//
DECLARE_NATIVE(noisotope)
Expand Down
27 changes: 14 additions & 13 deletions src/core/u-parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -1454,7 +1454,7 @@ DECLARE_NATIVE(subparse)
case SYM_WHILE:
if (not (P_FLAGS & PF_REDBOL)) {
fail (
"Please replace PARSE2's WHILE with TRY SOME -or-"
"Please replace PARSE3's WHILE with TRY SOME -or-"
" TRY FURTHER SOME--it's being reclaimed as arity-2."
" https://forum.rebol.info/t/1540/12 (or use PARSE2)"
);
Expand Down Expand Up @@ -1779,7 +1779,7 @@ DECLARE_NATIVE(subparse)
// (go-on?: either condition [[accept]][[reject]])
// go-on?
//
// !!! Note: PARSE/REDBOL may be a modality it needs to
// !!! Note: PARSE3/REDBOL may be a modality it needs to
// support, and Red added IF. It might be necessary to keep
// it (though Rebol2 did not have IF in PARSE...)
//
Expand Down Expand Up @@ -1822,7 +1822,7 @@ DECLARE_NATIVE(subparse)

DECLARE_LOCAL (thrown_arg);
if (IS_TAG(P_RULE)) {
if (rebDid(P_RULE, "= <here>"))
if (rebDid(rebR(rebUnrelativize(P_RULE)), "= <here>"))
Copy_Cell(thrown_arg, ARG(position));
else
fail ("PARSE3 ACCEPT TAG! only works with <here>");
Expand Down Expand Up @@ -2760,14 +2760,20 @@ DECLARE_NATIVE(parse3)
return THROWN;
}

if (Is_Nulled(OUT))
return nullptr; // the match failed
if (Is_Nulled(OUT)) { // a match failed (but may be at end of input)
if (REF(redbol))
return nullptr;
return RAISE(Error_Parse_Incomplete_Raw());
}

REBLEN index = VAL_UINT32(OUT);
assert(index <= VAL_LEN_HEAD(input));

if (index != VAL_LEN_HEAD(input))
return nullptr; // the match failed
if (index != VAL_LEN_HEAD(input)) { // didn't reach end of input
if (REF(redbol))
return nullptr;
return RAISE(Error_Parse_Incomplete_Raw());
}

// !!! R3-Alpha parse design had no means to bubble up a "synthesized"
// rule product. But that's important in the new design. Hack in support
Expand All @@ -2785,12 +2791,7 @@ DECLARE_NATIVE(parse3)
return OUT;
}

// !!! Give back a value that triggers a THEN clause and won't trigger an
// ELSE clause. See UPARSE for the redesign that will be applied to more
// native code in the future. But this is just to get people out of the
// habit of writing `IF PARSE ...`
//
return rebValue("~use-DID-PARSE-for-logic~");
return NONE;
}


Expand Down
12 changes: 6 additions & 6 deletions src/main/main-startup.reb
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ main-startup: func [

while [not tail? argv] [

let is-option: did parse3/case argv.1 [
let is-option: did try parse3/case argv.1 [

["--" end] (
; Double-dash means end of command line arguments, and the
Expand Down Expand Up @@ -673,11 +673,11 @@ main-startup: func [
; heuristic is to check for more than one letter.
;
alphanum: charset [#"A" - #"Z" #"a" - #"z" #"0" #"9"]
o.script: parse3 o.script [alphanum some alphanum ":" to <end>] then [
to url! o.script
]
else [
local-to-file o.script
parse3 o.script [
alphanum some alphanum ":" to <end>
(o.script: to url! o.script)
] except [
o.script: local-to-file o.script
]
]

Expand Down
4 changes: 0 additions & 4 deletions src/mezz/base-defs.r
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,6 @@ reorder: enclose :reorder* lambda [f] [
inherit-meta (do f) action
]

; The PARSE name has been taken by what was UPARSE.

parse2: :parse3/redbol


; REQUOTE is helpful when functions do not accept QUOTED! values.
;
Expand Down
2 changes: 1 addition & 1 deletion src/mezz/base-files.r
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ split-path: func [
][
parse3 as text! target [
copy directory try some thru "/"
copy file thru end
copy file thru <end>
]
if empty? directory [
directory: null
Expand Down
2 changes: 1 addition & 1 deletion src/mezz/base-funcs.r
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ redescribe: func [
)]
]
<end>
] else [
] except [
fail [{REDESCRIBE specs should be STRING! and ANY-WORD! only:} spec]
]

Expand Down
8 changes: 5 additions & 3 deletions src/mezz/base-series.r
Original file line number Diff line number Diff line change
Expand Up @@ -357,13 +357,15 @@ trim: function [
;
indent: null
if auto [
parse3* series [
parse3 series [
; Don't count empty lines, (e.g. trim/auto {^/^/^/ asdf})
remove [try some LF]
try remove some LF

(indent: 0)
s: <here>, some rule, e: <here>
s: <here>, try some rule, e: <here>
(indent: (index of e) - (index of s))

accept (true) ; don't need to reach end
]
]

Expand Down
10 changes: 5 additions & 5 deletions src/mezz/mezz-series.r
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ reword: function [
block? escape [
parse3 escape [
set prefix delimiter-types
[end | set suffix delimiter-types]
] else [
[<end> | set suffix delimiter-types]
] except [
fail ["Invalid /ESCAPE delimiter block" escape]
]
]
Expand Down Expand Up @@ -301,7 +301,7 @@ reword: function [
(append out a) ; finalize output - transfer any remainder verbatim
]

apply :parse3 [source rule /case case_REWORD] else [fail] ; should succeed
apply :parse3 [source rule /case case_REWORD] ; should succeed
return out
]

Expand Down Expand Up @@ -513,7 +513,7 @@ split: function [
[block! integer! char! bitset! text! tag! word!]
/into "If dlm is integer, split in n pieces (vs. pieces of length n)"
][
(parse3 (maybe match block! dlm) [some integer!]) then [
if try parse3 (maybe match block! dlm) [some integer!] [
return map-each len dlm [
if len <= 0 [
series: skip series negate len
Expand All @@ -538,7 +538,7 @@ split: function [

[
repeat (count) [
copy series [repeat (piece-size) skip] (
copy series try [repeat (piece-size) skip] (
keep series
)
]
Expand Down
7 changes: 2 additions & 5 deletions tests/control/match.test.reb
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,8 @@
[
(match+: reframer func [f [frame!] <local> p] [
p: f.(first parameters of action of f) ; get the first parameter
if did do f [
return p
] else [
return null
] ; evaluate to parameter if operation succeeds
do f except [return null]
return p ; evaluate to parameter if operation succeeds
]
true)

Expand Down
Loading

0 comments on commit 849df9f

Please sign in to comment.