From 3ea4a8e237315a024ee04f37589d0d0628a2c770 Mon Sep 17 00:00:00 2001 From: AKhatskevich Date: Wed, 19 Sep 2018 00:36:16 +0300 Subject: [PATCH] Fix pattern cache key collision bug Complex patterns are cached by this library to speedup parsing. In rare cases the parser was initialized with errors and did not parse a valid string. Factors which lead to the error: 1. The pattern cache is weak by value. 2. Intermediate syntax constructions are garbagecollected after those were used. 3. Key in the cache is a raw address sometimes. There was possible the following case: 1. User defines grammar: `c = a + b`. 2. `c` is cached by addresses of `a` and `b` 3. `a` and `b` are garbagecollected 4. User defines grammar: `c1 = a1 + b1`. 5. There is a chance that `a1` and `b1` have got the same addresses as was `a` and `b`; in that case `c1` would be wrongly taken from the cache as its caching key is the same as for `c`. 6. Parser works wrong. This commit introduces unique pattern ids, which are generated by a counter. Cache key is then calculated based on those values. --- src/API.lua | 7 ++++--- src/constructors.lua | 18 ++++++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/API.lua b/src/API.lua index a3b58a7..2b5c877 100644 --- a/src/API.lua +++ b/src/API.lua @@ -212,14 +212,15 @@ do end +-- Give a unique string for a combination of two patterns. local function nameify(a, b) - return tostring(a)..tostring(b) + return ('%s:%s'):format(a.id, b.id) end -- pt*pt local function choice (a, b) - local name = tostring(a)..tostring(b) + local name = nameify(a, b) local ch = Builder.ptcache.choice[name] if not ch then ch = factorize_choice(a, b) or constructors.binary("choice", a, b) @@ -238,7 +239,7 @@ end local function sequence (a, b) - local name = tostring(a)..tostring(b) + local name = nameify(a, b) local seq = Builder.ptcache.sequence[name] if not seq then seq = factorize_sequence(a, b) or constructors.binary("sequence", a, b) diff --git a/src/constructors.lua b/src/constructors.lua index c6ddce8..8720cb4 100644 --- a/src/constructors.lua +++ b/src/constructors.lua @@ -86,6 +86,8 @@ local newpattern, pattmt -- This deals with the Lua 5.1/5.2 compatibility, and restricted -- environements without access to newproxy and/or debug.setmetatable. +-- Augment a pattern with unique identifier. +local next_pattern_id = 1 if compat.proxies and not compat.lua52_len then -- Lua 5.1 / LuaJIT without compat. local proxycache = weakkey{} @@ -109,6 +111,8 @@ if compat.proxies and not compat.lua52_len then local pt = newproxy(baseproxy) setmetatable(cons, __index_LL) proxycache[pt]=cons + pt.id = "__ptid" .. next_pattern_id + next_pattern_id = next_pattern_id + 1 return pt end else @@ -122,6 +126,8 @@ else function LL.getdirect (p) return p end function newpattern(pt) + pt.id = "__ptid" .. next_pattern_id + next_pattern_id = next_pattern_id + 1 return setmetatable(pt,LL) end end @@ -230,13 +236,13 @@ end constructors["subpt"] = function(typ, pt) -- [[DP]]print("CONS: ", typ, pt, aux) local cache = ptcache[typ] - if not cache[pt] then - cache[pt] = newpattern{ + if not cache[pt.id] then + cache[pt.id] = newpattern{ pkind = typ, pattern = pt } end - return cache[pt] + return cache[pt.id] end constructors["both"] = function(typ, pt, aux) @@ -246,15 +252,15 @@ constructors["both"] = function(typ, pt, aux) ptcache[typ][aux] = weakval{} cache = ptcache[typ][aux] end - if not cache[pt] then - cache[pt] = newpattern{ + if not cache[pt.id] then + cache[pt.id] = newpattern{ pkind = typ, pattern = pt, aux = aux, cache = cache -- needed to keep the cache as long as the pattern exists. } end - return cache[pt] + return cache[pt.id] end constructors["binary"] = function(typ, a, b)