Skip to content

Commit

Permalink
Add checkers
Browse files Browse the repository at this point in the history
  • Loading branch information
zyedidia committed Feb 17, 2021
1 parent f2f691d commit cd0740e
Show file tree
Hide file tree
Showing 12 changed files with 163 additions and 16 deletions.
22 changes: 12 additions & 10 deletions flare/grammar.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"github.com/zyedidia/gpeg/charset"
"github.com/zyedidia/gpeg/isa"
p "github.com/zyedidia/gpeg/pattern"
)

Expand Down Expand Up @@ -117,17 +118,18 @@ var (
)

func wordMatch(words ...string) p.Pattern {
patt := p.Concat(
p.Literal(words[0]),
p.Not(alnum),
)
for _, w := range words[1:] {
patt = p.Or(patt, p.Concat(
p.Literal(w),
p.Not(alnum),
))
m := make(map[string]struct{})
var chars []byte

for _, w := range words {
for _, c := range []byte(w) {
chars = append(chars, c)
}

m[w] = struct{}{}
}
return patt

return p.Check(p.Plus(p.Set(charset.New(chars))), isa.MapChecker(m))
}

func CreateHighlighter(grammar map[string]p.Pattern, names []string) p.Pattern {
Expand Down
11 changes: 11 additions & 0 deletions flare/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@ func main() {
prog := p.MustCompile(java)
code := vm.Encode(prog)

fmt.Println("Size of instructions:", code.Size())
codebytes, err := code.ToBytes()
if err != nil {
log.Fatal(err)
}
fmt.Println("Serialization size:", len(codebytes))
code, err = vm.FromBytes(codebytes)
if err != nil {
log.Fatal(err)
}

data, err := ioutil.ReadFile(flag.Args()[0])
if err != nil {
log.Fatal(err)
Expand Down
25 changes: 25 additions & 0 deletions gpeg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
"testing"

Expand Down Expand Up @@ -45,6 +46,30 @@ func TestConcat(t *testing.T) {
check(p, tests, t)
}

type uint8Checker struct{}

// only allows integers between 0 and 256
func (uint8Checker) Check(b []byte) bool {
i, err := strconv.Atoi(string(b))
if err != nil {
return false
}
return i >= 0 && i < 256
}

func TestChecker(t *testing.T) {
p := Check(Plus(Set(charset.Range('0', '9'))), uint8Checker{})

tests := []PatternTest{
{"123", 3},
{"256", -1},
{"foo", -1},
{"0", 1},
}

check(p, tests, t)
}

func TestOr(t *testing.T) {
p := Or(Literal("ana"), Literal("hi"))

Expand Down
24 changes: 24 additions & 0 deletions isa/checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package isa

// A Checker is used so the user can perform additional custom validation of
// parse results. For example, you might want to parse only 8-bit integers by
// matching [0-9]+ and then using a checker to ensure the matched integer is in
// the range 0-256.
type Checker interface {
Check(b []byte) bool
}

type MapChecker map[string]struct{}

func NewMapChecker(strs []string) MapChecker {
m := make(map[string]struct{})
for _, s := range strs {
m[s] = struct{}{}
}
return m
}

func (m MapChecker) Check(b []byte) bool {
_, ok := m[string(b)]
return ok
}
19 changes: 19 additions & 0 deletions isa/isa.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,15 @@ type CaptureFull struct {
basic
}

type CheckBegin struct {
basic
}

type CheckEnd struct {
Checker Checker
basic
}

type Error struct {
basic
Message string
Expand Down Expand Up @@ -370,6 +379,16 @@ func (i Nop) String() string {
return "Nop"
}

// String returns the string representation of this instruction.
func (i CheckBegin) String() string {
return "CheckBegin"
}

// String returns the string representation of this instruction.
func (i CheckEnd) String() string {
return fmt.Sprintf("CheckEnd %v", i.Checker)
}

// String returns the string representation of this instruction.
func (i MemoOpen) String() string {
return fmt.Sprintf("MemoOpen %v %v", i.Lbl, i.Id)
Expand Down
12 changes: 12 additions & 0 deletions pattern/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,18 @@ func (p *MemoNode) Compile() (isa.Program, error) {
return code, err
}

// Compile this node.
func (p *CheckNode) Compile() (isa.Program, error) {
L1 := isa.NewLabel()
sub, err := Get(p.Patt).Compile()
code := make(isa.Program, 0, len(sub)+3)
code = append(code, isa.CheckBegin{})
code = append(code, sub...)
code = append(code, isa.CheckEnd{Checker: p.Checker})
code = append(code, L1)
return code, err
}

// Compile this node.
func (p *SearchNode) Compile() (isa.Program, error) {
var rsearch Pattern
Expand Down
5 changes: 5 additions & 0 deletions pattern/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ type MemoNode struct {
Id int16
}

type CheckNode struct {
Patt Pattern
Checker isa.Checker
}

// GrammarNode represents a grammar of non-terminals and their associated
// patterns. The Grammar must also have an entry non-terminal.
type GrammarNode struct {
Expand Down
8 changes: 8 additions & 0 deletions pattern/pattern.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@ import (
"strings"

"github.com/zyedidia/gpeg/charset"
"github.com/zyedidia/gpeg/isa"
)

// Cap marks a pattern to be captured.
func Cap(p Pattern) Pattern {
return CapId(p, 0)
}

func Check(p Pattern, c isa.Checker) Pattern {
return &CheckNode{
Patt: p,
Checker: c,
}
}

// CapId marks a pattern with an ID to be captured.
func CapId(p Pattern, id int16) Pattern {
return &CapNode{
Expand Down
16 changes: 16 additions & 0 deletions vm/code.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ type code struct {
Sets []charset.Set
// list of error messages
Errors []string
// list of checker functions
Checkers []isa.Checker

// the encoded instructions
Insns []byte
Expand All @@ -32,6 +34,10 @@ func (c *VMCode) Size() int {
return len(c.data.Insns)
}

func init() {
gob.Register(isa.MapChecker{})
}

// ToBytes serializes and compresses this VMCode.
func (c *VMCode) ToBytes() ([]byte, error) {
var buf bytes.Buffer
Expand Down Expand Up @@ -172,6 +178,11 @@ func Encode(insns isa.Program) VMCode {
op = opMemoTree
case isa.MemoTreeClose:
op = opMemoTreeClose
case isa.CheckBegin:
op = opCheckBegin
case isa.CheckEnd:
op = opCheckEnd
args = append(encodeU24(addChecker(&code, t.Checker)))
case isa.Error:
op = opError
args = encodeU24(addError(&code, t.Message))
Expand Down Expand Up @@ -281,3 +292,8 @@ func addError(code *VMCode, msg string) uint {
code.data.Errors = append(code.data.Errors, msg)
return uint(len(code.data.Errors) - 1)
}

func addChecker(code *VMCode, checker isa.Checker) uint {
code.data.Checkers = append(code.data.Checkers, checker)
return uint(len(code.data.Checkers) - 1)
}
6 changes: 5 additions & 1 deletion vm/op.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ const (
opCaptureLate
opCaptureEnd
opCaptureFull
opCheckBegin
opCheckEnd
opMemoOpen
opMemoClose
opMemoTree
Expand All @@ -56,6 +58,8 @@ const (
szMemoClose = 2
szMemoTree = 2
szMemoTreeClose = 2
szCheckBegin = 2
szCheckEnd = 4
szError = 4

// jumps
Expand Down Expand Up @@ -89,7 +93,7 @@ func size(insn isa.Insn) uint {
switch insn.(type) {
case isa.MemoOpen, isa.CaptureBegin, isa.CaptureLate, isa.CaptureFull,
isa.TestChar, isa.TestCharNoChoice, isa.TestSet, isa.TestSetNoChoice,
isa.TestAny, isa.Error:
isa.TestAny, isa.Error, isa.CheckEnd:
sz += 2
}

Expand Down
10 changes: 9 additions & 1 deletion vm/stack.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@ const (
stBtrack
stMemo
stCapt
stCheck
)

type stackEntry struct {
stype byte
// we could use a union to avoid the space cost but I have found this
// doesn't impact performance and the space cost itself is quite small
// because the stack is usually small.
ret stackRet
ret stackRet // stackRet is reused for stCheck
btrack stackBacktrack
memo stackMemo // stackMemo is reused for stCapt

Expand Down Expand Up @@ -146,3 +147,10 @@ func (s *stack) pushCapt(m stackMemo) {
memo: m,
})
}

func (s *stack) pushCheck(r stackRet) {
s.push(stackEntry{
stype: stCheck,
ret: r,
})
}
21 changes: 17 additions & 4 deletions vm/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,22 @@ loop:
}

ip += szMemoTree
case opCheckBegin:
st.pushCheck(stackRet(src.Pos()))
ip += szCheckBegin
case opCheckEnd:
ent := st.pop(true)
if ent == nil || ent.stype != stCheck {
panic("check end needs check stack entry")
}
checkid := decodeU24(idata[ip+1:])
checker := vm.data.Checkers[checkid]

if !checker.Check(src.Slice(int(ent.ret), src.Pos())) {
goto fail
}

ip += szCheckEnd
case opError:
errid := decodeU24(idata[ip+1:])
msg := vm.data.Errors[errid]
Expand Down Expand Up @@ -337,10 +353,7 @@ fail:
memoize(int(ent.memo.id), ent.memo.pos, -1, nil)
ent.capt = nil
goto fail
case stRet:
ent.capt = nil
goto fail
case stCapt:
case stRet, stCapt, stCheck:
ent.capt = nil
goto fail
}
Expand Down

0 comments on commit cd0740e

Please sign in to comment.