Skip to content

Commit

Permalink
Fix handling of ^ and $
Browse files Browse the repository at this point in the history
Acme limits these to actual newlines, where go.regexp includes
start and end of selection.  Filter these cases out unless they
are also start or ends of lines.
Rework Regx to now use a []byte instead of a reader, since the
Reader regexps can't return two results at once, blocking correct
interpretation of /^/
Fix Looper at the same time.

This is completely broken for backwards RE searches.
  • Loading branch information
paul-lalonde committed Apr 11, 2018
1 parent ad9a712 commit 03f4790
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 107 deletions.
3 changes: 2 additions & 1 deletion addr.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ func acmeregexp(showerr bool, t Texter, lim Range, r Range, pat string, dir int)
} else {
q = lim.q1
}
sel = pattern.rxexecute(t, nil, r.q1, q, 1)
sels := pattern.rxexecute(t, nil, r.q1, q, 1)
if len(sels) > 0 { sel = sels[0] } else { sel = nil }
}
if len(sel) == 0 && showerr {
warning(nil, "no match for regexp\n")
Expand Down
10 changes: 10 additions & 0 deletions buf.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,13 @@ func (b *Buffer) Nc() int {
func fbufalloc() []rune {
return make([]rune, BUFSIZE/utf8.UTFMax)
}

// TODO(flux): This is another design constraint of Buffer - we want to efficiently
// present contiguous segments of bytes, possibly by merging/flattening our tree
// when a view is requested. This should be a rare operation...
func (b *Buffer) View(q0, q1 int) []byte {
if q1 > len(*b) {
q1 = len(*b)
}
return []byte(string((*b)[q0:q1]))
}
83 changes: 43 additions & 40 deletions ecmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ func edittext(w *Window, q int, r []rune) error {

// string is known to be NUL-terminated
func filelist(t *Text, r string) string{
fmt.Println("Filelist =", r)
if len(r) == 0 {
return ""
}
Expand Down Expand Up @@ -161,7 +160,6 @@ func b_cmd(t *Text, cp *Cmd) bool {
}

func B_cmd(t *Text, cp *Cmd) bool {
fmt.Printf("B_cmd cp = %+v\n", cp)
list := filelist(t, cp.text);
if list == "" {
editerror(Enoname);
Expand Down Expand Up @@ -408,7 +406,8 @@ func s_cmd(t *Text, cp *Cmd) bool {
delta := 0;
didsub := false;
for p1 := addr.r.q0; p1<=addr.r.q1; {
if sel = are.rxexecute(t, nil, p1, addr.r.q1, 1); len(sel) > 0 {
if sels := are.rxexecute(t, nil, p1, addr.r.q1, 1); len(sels) > 0 {
sel = sels[0]
if sel[0].q0 == sel[0].q1 { // empty match?
if sel[0].q0 == op {
p1++;
Expand Down Expand Up @@ -688,7 +687,6 @@ func printposn (t * Text, mode int) () {

func eq_cmd(t *Text, cp *Cmd) bool {
mode := 0
fmt.Printf("cp = %+v\n", cp)
switch(len(cp.text)){
case 0:
mode = PosnLine;
Expand Down Expand Up @@ -782,45 +780,40 @@ func loopcmd (f * File, cp * Cmd, rp []Range) () {
}
}

func looper(f * File, cp * Cmd, xy bool) () {
func looper(f * File, cp * Cmd, isX bool) () {
rp := []Range{}
tr := Range{}
r := addr.r;
op := -1
if !xy { op = r.q0 }
isY := !isX
nest++;
are, err := rxcompile(cp.re)
if err != nil {
editerror("bad regexp in %c command", cp.cmdc);
}
for p := r.q0; p<=r.q1; {
sel := are.rxexecute(f.curtext, nil, p, r.q1, 1)
if len(sel) == 0 { // no match, but y should still run
if xy || op>r.q1 {
break;
}
tr.q0 = op
tr.q1 = r.q1;
p = r.q1+1; // exit next loop
}else{
if sel[0].q0==sel[0].q1 { // empty match?
if sel[0].q0==op {
p++;
continue;
}
p = sel[0].q1+1;
}else {
p = sel[0].q1;
}
if xy {
tr = sel[0];
/*if isX */ op := -1 // Not used in the X case.
if isY { op = r.q0 }
sels := are.rxexecute(f.curtext, nil, r.q0, r.q1, -1)
if len(sels) == 0 {
if isY {
rp = append(rp, Range{r.q0, r.q1})
}
} else {
for _, s := range sels {
if isX {
tr = s[0];
} else {
tr.q0 = op
tr.q1 = sel[0].q0;
tr.q1 = s[0].q0
}
op = sel[0].q1;
rp = append(rp, tr)
op = s[0].q1
}
// For the Y case we need to end the set
if isY {
tr.q0 = op
tr.q1 = r.q1
rp = append(rp, tr)
}
rp = append(rp, tr)
}
loopcmd(f, cp.cmd, rp);
nest--;
Expand Down Expand Up @@ -928,30 +921,40 @@ func filelooper (cp *Cmd, XY bool) () {
nest--
}

// TODO(flux) This actually looks like "find one match after p"
// This is almost certainly broken for ^
func nextmatch(f *File, r string, p int, sign int) {
are, err := rxcompile(r)
if err != nil {
editerror("bad regexp in command address")
}
sel = RangeSet{Range{0,0}}
if sign >= 0 {
sel = are.rxexecute(f.curtext, nil, p, 0x7FFFFFFF, NRange)
if len(sel) == 0 {
sels := are.rxexecute(f.curtext, nil, p, 0x7FFFFFFF, 2)
if len(sels) == 0 {
editerror("no match for regexp")
} else {
sel = sels[0]
}
if sel[0].q0 == sel[0].q1 && sel[0].q0 == p {
p++
if p > f.b.Nc() {
p = 0
}
sel = are.rxexecute(f.curtext, nil, p, 0x7FFFFFFF, NRange)
if len(sel) == 0 {
editerror("address")
if len(sels) == 2 {
sel = sels[1]
} else { // wrap around
p++
if p > f.b.Nc() { p = 0 }
sels := are.rxexecute(f.curtext, nil, p, 0x7FFFFFFF, 1)
if len(sels) == 0 {
editerror("address")
} else {
sel = sels[0]
}
}
}
} else {
sel = are.rxbexecute(f.curtext, p, NRange)
if len(sel) == 0 {
editerror("no match for regexp")
sel = RangeSet{Range{0,0}}
}
if sel[0].q0 == sel[0].q1 && sel[0].q1 == p {
p--
Expand Down
2 changes: 1 addition & 1 deletion edit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func TestEdit(t *testing.T) {
{Range{0, 0}, "test", "a/junk", "junkThis is a\nshort text\nto try addressing\n"},
{Range{7, 12}, "test", "a/junk", "This is a\nshjunkort text\nto try addressing\n"},
{Range{0, 0}, "test", "/This/a/junk", "Thisjunk is a\nshort text\nto try addressing\n"},
// {Range{0, 0}, "test", "/^/a/junk", "This is a\njunkshort text\nto try addressing\n"},
{Range{0, 0}, "test", "/^/a/junk", "This is a\njunkshort text\nto try addressing\n"},
{Range{0, 0}, "test", "/$/a/junk", "This is ajunk\nshort text\nto try addressing\n"},


Expand Down
97 changes: 42 additions & 55 deletions regx.go
Original file line number Diff line number Diff line change
@@ -1,92 +1,78 @@
package main

import (
"fmt"
"regexp"
)

// An interface to regexp for acme.

type AcmeRegexp struct {
re *regexp.Regexp
exception rune // ^ or $ or 0
}

func rxcompile(r string) (*AcmeRegexp, error) {
re, err := regexp.Compile("(?m)" + string(r))
re, err := regexp.Compile("(?m)"+r)
if err != nil {
return nil, err
}
return &AcmeRegexp{re}, nil
}

type FRuneReader struct {
buf Texter
q int
eof int
}

type BRuneReader FRuneReader

func NewFRuneReader(b Texter, offset int, eof int) *FRuneReader {
if eof > b.Nc() {
eof = b.Nc()
}
if eof < 0 {
eof = b.Nc()
are := &AcmeRegexp{re, 0}
switch r {
case "^": are.exception = '^'
case "$": are.exception = '$'
}
return &FRuneReader{b, offset, eof}
}

func NewBRuneReader(b Texter, offset int) *BRuneReader {
frr := NewFRuneReader(b, offset, 0)
frr.q = offset - 1
return (*BRuneReader)(frr)
}

func (frr *FRuneReader) ReadRune() (r rune, size int, err error) {
if frr.q >= frr.eof {
return 0, 0, fmt.Errorf("end of buffer")
}
rr := frr.buf.ReadC(frr.q)
frr.q++
return rr, 1, nil
}

func (brr *BRuneReader) ReadRune() (r rune, size int, err error) {
if brr.q < 0 {
return 0, 0, fmt.Errorf("end of buffer")
}
rr := brr.buf.ReadC(brr.q)
brr.q--
return rr, 1, nil
return are, nil
}

// works on Text if present, rune otherwise
func (re *AcmeRegexp) rxexecute(t Texter, r []rune, startp int, eof int, nmatch int) (rp RangeSet) {
func (re *AcmeRegexp) rxexecute(t Texter, r []rune, startp int, eof int, nmatch int) (rp []RangeSet) {
var source Texter
if t != nil {
source = t
} else {
source = &TextBuffer{0, 0, r}
}

rngs := RangeSet([]Range{})
for len(rngs) < nmatch {
reader := NewFRuneReader(source, int(startp), int(eof))
locs := re.re.FindReaderSubmatchIndex(reader)
if locs == nil {
return rngs
if eof == -1 {
eof = source.Nc()
}
view := source.View(startp, eof)
rngs := []RangeSet{}
locs := re.re.FindAllSubmatchIndex(view, nmatch)
loop:
for _, loc := range locs {
// Filter out ^ not at start of a line, $ not at end
if len(loc) != 0 && loc[0] == loc[1] {
switch {
case re.exception == '^' && loc[0] + startp == 0: // start of text is star-of-line
break
case re.exception == '^' && t.ReadC(loc[0]+startp-1) == '\n': // ^ after newline
break
case re.exception == '$' && loc[0] == t.Nc()-startp: // $ at end of text
break
case re.exception == '$' && t.ReadC(loc[0]+startp) == '\n': // $ at newline
break
default:
continue loop
}
}
for i := 0; i < len(locs); i += 2 {
rng := Range{locs[i] + startp, locs[i+1] + startp}
rngs = append(rngs, rng)
rs := RangeSet([]Range{})
for i := 0; i < len(loc); i += 2 {
rng := Range{loc[i] + startp, loc[i+1] + startp}
rs = append(rs, rng)
}
startp += locs[1]
rngs = append(rngs, rs)
}
return rngs
}

func (re *AcmeRegexp) rxbexecute(t Texter, startp int, nmatch int) (rp RangeSet) {
Unimpl()
return []Range{}
}
/* TODO(flux): This is broken, I'm pretty sure. You can'd just read backwards,
you also need the backwards regexp
source := t
rngs := RangeSet([]Range{})
Expand All @@ -104,3 +90,4 @@ func (re *AcmeRegexp) rxbexecute(t Texter, startp int, nmatch int) (rp RangeSet)
}
return rngs
}
*/
21 changes: 11 additions & 10 deletions regx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ func testRegexpForward(t *testing.T) {
tests := []struct {
text string
re string
expected RangeSet
expected []RangeSet
nmax int // Max number of matches
}{
{"aaa", "b", RangeSet{}, 10},
{"aaa", "a", RangeSet{{0, 1}, {1, 2}, {2, 3}}, 10},
{"cba", "a", RangeSet{{2, 3}}, 10},
{"aaaaa", "a", RangeSet{{0, 1}, {1, 2}}, 2},
{"aaa", "b", []RangeSet{}, 10},
{"aaa", "a", []RangeSet{{{0, 1}}, {{1, 2}}, {{2, 3}}}, 10},
{"cba", "ba", []RangeSet{{{2, 3}}}, 10},
{"aaaaa", "a", []RangeSet{{{0, 1}}, {{1, 2}}}, 2},
}

for i, test := range tests {
Expand All @@ -29,11 +29,12 @@ func testRegexpForward(t *testing.T) {
t.Errorf("\trs = %#v", rs)
} else {
for j, r := range rs {
if r.q0 != test.expected[j].q0 {
t.Errorf("Mismatch tests[%d].expected[%d].q0=%d, got %d", i, j, tests[i].expected[j].q0, r.q0)
// TODO(flux): r[0] below assumes only one element coming back in each RangeSet
if r[0].q0 != test.expected[j][0].q0 {
t.Errorf("Mismatch tests[%d].expected[%d][0].q0=%d, got %d", i, j, tests[i].expected[j][0].q0, r[0].q0)
}
if r.q1 != test.expected[j].q1 {
t.Errorf("Mismatch tests[%d].expected[%d].q1=%d, got %d", i, j, tests[i].expected[j].q1, r.q1)
if r[0].q1 != test.expected[j][0].q1 {
t.Errorf("Mismatch tests[%d].expected[%d][0].q1=%d, got %d", i, j, tests[i].expected[j][0].q1, r[0].q1)
}
}
}
Expand All @@ -47,7 +48,7 @@ func TestRegexpBackward(t *testing.T) {
expected RangeSet
nmax int
}{
{"baa", "b", RangeSet{{0, 1}}, 10},
{"baa", "ba", RangeSet{{0, 1}}, 10},
{"aaa", "a", RangeSet{{2, 3}, {1, 2}, {0, 1}}, 10},
{"cba", "a", RangeSet{{2, 3}}, 10},
{"aba", "a", RangeSet{{2, 3}, {0, 1}}, 10},
Expand Down
1 change: 1 addition & 0 deletions text.go
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,7 @@ func (t *Text) Delete(q0, q1 int, tofile bool) {
}
}

func (t *Text) View(q0, q1 int) []byte { return t.file.b.View(q0,q1) }
func (t *Text) ReadB(q int, r []rune) (n int, err error) { n, err = t.file.b.Read(q, r); return }
func (t *Text) nc() int { return t.file.b.Nc() }
func (t *Text) Q0() int { return t.q0 }
Expand Down
8 changes: 8 additions & 0 deletions texter.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type Texter interface {
Nc() int
ReadB(q int, r []rune) (n int, err error)
ReadC(q int) rune
View(q0, q1 int) []byte // Return a "read only" slice
}

// TestText implements Texter around a buffer
Expand All @@ -28,6 +29,13 @@ func (t TextBuffer) Constrain(q0, q1 int) (p0, p1 int) {
return p0, p1
}

func (t *TextBuffer) View(q0, q1 int) []byte {
if q1 > len(t.buf) {
q1 = len(t.buf)
}
return []byte(string(t.buf[q0:q1]))
}

func (t *TextBuffer) Delete(q0, q1 int, tofile bool) {
_ = tofile
if q0 > (len(t.buf)) || q1 > (len(t.buf)) {
Expand Down

0 comments on commit 03f4790

Please sign in to comment.