Skip to content

Commit

Permalink
Better factorization.
Browse files Browse the repository at this point in the history
  • Loading branch information
pygy committed Jun 26, 2013
1 parent bf5d082 commit 4781a87
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 46 deletions.
58 changes: 43 additions & 15 deletions lulpeg.lua
Original file line number Diff line number Diff line change
Expand Up @@ -642,13 +642,35 @@ function byterange_new (low, high)
end
return set
end
local tmpa, tmpb ={}, {}
local
function set_if_not_yet (s, dest)
if type(s) == "number" then
dest[s] = true
return dest
else
return s
end
end
local
function clean_ab (a,b)
tmpa[a] = nil
tmpb[b] = nil
end
local
function byteset_union (a ,b)
local upper = m_max(#a, #b)
local upper = m_max(
type(a) == "number" and a or #a,
type(b) == "number" and b or #b
)
local A, B
= set_if_not_yet(a, tmpa)
, set_if_not_yet(b, tmpb)
local res = byteset_new(upper)
for i = 0, upper do
res[i] = a[i] or b[i] or false
res[i] = A[i] or B[i] or false
end
clean_ab(a,b)
return res
end
local
Expand Down Expand Up @@ -1798,6 +1820,7 @@ local --Range, Set,
S_union
= --Builder.Range, Builder.set.new,
Builder.set.union
local mergeable = setify{"char", "set"}
local type2cons = {
["/zero"] = "__div",
["div_number"] = "__div",
Expand All @@ -1824,16 +1847,23 @@ function choice (a,b, ...)
for i = 2,#src do
local p1, p2 = dest[#dest], src[i]
local type1, type2 = p1.pkind, p2.pkind
if type1 == "set" and type2 == "set" then
dest[#dest] = constructors.aux(
"set", S_union(p1.aux, p2.aux),
"Union( "..p1.as_is.." || "..p2.as_is.." )"
)
if mergeable[type1] and mergeable[type2] then
dest[#dest] = constructors.aux("set", S_union(p1.aux, p2.aux))
changed = true
elseif ( type1 == type2 ) and unary[type1] and ( p1.aux == p2.aux ) then
dest[#dest] = LL[type2cons[type1] or type1](p1.pattern + p2.pattern, p1.aux)
elseif mergeable[type1] and type2 == "any" and p2.aux == 1
or mergeable[type2] and type1 == "any" and p1.aux == 1 then
dest[#dest] = type1 == "any" and p1 or p2
changed = true
elseif p1 ~= p2 or V_hasCmt(p1) then
elseif type1 == type2 then
if unary[type1] and ( p1.aux == p2.aux ) then
dest[#dest] = LL[type2cons[type1] or type1](p1.pattern + p2.pattern, p1.aux)
changed = true
elseif p1 == p2 then
changed = true
else
dest[#dest + 1] = p2
end
else
dest[#dest + 1] = p2
end -- if identical and without Cmt, fold them into one.
end
Expand Down Expand Up @@ -2308,8 +2338,8 @@ local truept, falsept, Cppt
= constructors.constant.truept
, constructors.constant.falsept
, constructors.constant.Cppt
local split_int, tochar, validate
= cs.split_int, cs.tochar, cs.validate
local split_int, validate
= cs.split_int, cs.validate
local Range, Set, S_union, S_tostring
= Builder.Range, Builder.set.new
, Builder.set.union, Builder.set.tostring
Expand Down Expand Up @@ -2383,10 +2413,8 @@ function LL_R (...)
assert(#r == 2, "bad argument #1 to 'R' (range must have two characters)")
range = S_union ( range, Range(t_unpack(split_int(r))) )
end
local representation = t_concat(map(tochar,
{load("return "..S_tostring(range))()}))
return
constructors.aux("set", range, representation)
constructors.aux("set", range)
end
end
LL.R = LL_R
Expand Down
9 changes: 3 additions & 6 deletions src/API.lua
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ local truept, falsept, Cppt
, constructors.constant.falsept
, constructors.constant.Cppt

local split_int, tochar, validate
= cs.split_int, cs.tochar, cs.validate
local split_int, validate
= cs.split_int, cs.validate

local Range, Set, S_union, S_tostring
= Builder.Range, Builder.set.new
Expand Down Expand Up @@ -140,13 +140,10 @@ function LL_R (...)
assert(#r == 2, "bad argument #1 to 'R' (range must have two characters)")
range = S_union ( range, Range(t_unpack(split_int(r))) )
end
-- This is awful.
local representation = t_concat(map(tochar,
{load("return "..S_tostring(range))()}))
-- [[DBG]] local p = constructors.aux("set", range, representation)
return
--[[DBG]] true and
constructors.aux("set", range, representation)
constructors.aux("set", range)
end
end
LL.R = LL_R
Expand Down
34 changes: 31 additions & 3 deletions src/datastructures.lua
Original file line number Diff line number Diff line change
Expand Up @@ -117,22 +117,50 @@ function byterange_new (low, high)
return set
end


local tmpa, tmpb ={}, {}

local
function set_if_not_yet (s, dest)
if type(s) == "number" then
dest[s] = true
return dest
else
return s
end
end

local
function clean_ab (a,b)
tmpa[a] = nil
tmpb[b] = nil
end

local
function byteset_union (a ,b)
-- [[DBG]] print("\nUNION\n", #a, #b, m_max(#a,#b))
local upper = m_max(#a, #b)
local upper = m_max(
type(a) == "number" and a or #a,
type(b) == "number" and b or #b
)
local A, B
= set_if_not_yet(a, tmpa)
, set_if_not_yet(b, tmpb)

local res = byteset_new(upper)
for i = 0, upper do
res[i] = a[i] or b[i] or false
res[i] = A[i] or B[i] or false
-- [[DBG]] print(i, res[i])
end
-- [[DBG]] print("BS Un ==========================")
-- [[DBG]] print"/// A /////////////////////// "
-- [[DBG]] expose(a)
-- [[DBG]] expose(A)
-- [[DBG]] print"*** B *********************** "
-- [[DBG]] expose(b)
-- [[DBG]] expose(B)
-- [[DBG]] print" RES "
-- [[DBG]] expose(res)
clean_ab(a,b)
return res
end

Expand Down
50 changes: 29 additions & 21 deletions src/factorizer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ local --Range, Set,
= --Builder.Range, Builder.set.new,
Builder.set.union


local mergeable = setify{"char", "set"}


local type2cons = {
Expand All @@ -121,7 +121,7 @@ function choice (a,b, ...)
end
-- 2. handle P(true) and P(false)
dest = process_booleans(dest, { id = falsept, brk = truept })
-- ???? Concatenate `string` and `any` patterns.

local changed
local src
repeat
Expand All @@ -130,27 +130,35 @@ function choice (a,b, ...)
for i = 2,#src do
local p1, p2 = dest[#dest], src[i]
local type1, type2 = p1.pkind, p2.pkind
if type1 == "set" and type2 == "set" then
-- Merge character sets. S"abc" + S"ABC" => S"abcABC"
dest[#dest] = constructors.aux(
"set", S_union(p1.aux, p2.aux),
"Union( "..p1.as_is.." || "..p2.as_is.." )"
)
-- [[DBG]] print("Optimizing", type1, type2)
if mergeable[type1] and mergeable[type2] then
dest[#dest] = constructors.aux("set", S_union(p1.aux, p2.aux))
changed = true
elseif ( type1 == type2 ) and unary[type1] and ( p1.aux == p2.aux ) then
-- C(a) + C(b) => C(a + b)
dest[#dest] = LL[type2cons[type1] or type1](p1.pattern + p2.pattern, p1.aux)
elseif mergeable[type1] and type2 == "any" and p2.aux == 1
or mergeable[type2] and type1 == "any" and p1.aux == 1 then
-- [[DBG]] print("=== Folding "..type1.." and "..type2..".")
dest[#dest] = type1 == "any" and p1 or p2
changed = true
-- elseif ( type1 == type2 ) and type1 == "sequence" then
-- -- "abd" + "acd" => "a" * ( "b" + "c" ) * "d"
-- if p1[1] == p2[1] then
-- mergeseqheads(p1,p2, dest)
-- changed = true
-- elseif p1[#p1] == p2[#p2] then
-- dest[#dest] = mergeseqtails(p1,p2)
-- changed = true
-- end
elseif p1 ~= p2 or V_hasCmt(p1) then
elseif type1 == type2 then
-- C(a) + C(b) => C(a + b)
if unary[type1] and ( p1.aux == p2.aux ) then
dest[#dest] = LL[type2cons[type1] or type1](p1.pattern + p2.pattern, p1.aux)
changed = true
-- elseif ( type1 == type2 ) and type1 == "sequence" then
-- -- "abd" + "acd" => "a" * ( "b" + "c" ) * "d"
-- if p1[1] == p2[1] then
-- mergeseqheads(p1,p2, dest)
-- changed = true
-- elseif p1[#p1] == p2[#p2] then
-- dest[#dest] = mergeseqtails(p1,p2)
-- changed = true
-- end
elseif p1 == p2 then
changed = true
else
dest[#dest + 1] = p2
end
else
dest[#dest + 1] = p2
end -- if identical and without Cmt, fold them into one.
end
Expand Down
10 changes: 9 additions & 1 deletion tests/factorization.lua
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
require(arg[1]):global(_G or _ENV)

-- print"cache"
-- print"identity"

assert(P"a" == P"a")
assert(S"ab" == S"ab")
assert(R("ac","em") == R("ac","em"))
assert(P"a"^0 == P"a"^0)
assert(R"AZ" == R"AZ")
assert(S"AB" == S"AB")
assert((P"A"*"B" + P"B" * "F") == (P"A"*"B" + P"B" * "F"))

-- print"booleans"

Expand Down Expand Up @@ -42,6 +43,13 @@ assert(P"A"/1 + P"B"/1 == (P"A" + P"B")/1)
-- print"set and range unions"

assert(R"az"+R"AZ" == R("az", "AZ"))
assert(S"ABC" == P"A" + "B" + "C")
assert(S"ABC" == P"A" + S"BC")
assert(S"ABCDEF" == P"A" + S"BC" + R"DF")

-- print"type1 == type2 bug"

assert((P"A"*"B" + P"B" * "F") ~= (P"A"*"B" + P"B" * "H"))

-- print"captures"
-- local e = _G or _ENV
Expand Down

0 comments on commit 4781a87

Please sign in to comment.