--[[ LPEGLJ lpvm.lua Virtual machine Copyright (C) 2014 Rostislav Sacek. based on LPeg v1.0 - PEG pattern matching for Lua Lua.org & PUC-Rio written by Roberto Ierusalimschy http://www.inf.puc-rio.br/~roberto/lpeg/ ** Permission is hereby granted, free of charge, to any person obtaining ** a copy of this software and associated documentation files (the ** "Software"), to deal in the Software without restriction, including ** without limitation the rights to use, copy, modify, merge, publish, ** distribute, sublicense, and/or sell copies of the Software, and to ** permit persons to whom the Software is furnished to do so, subject to ** the following conditions: ** ** The above copyright notice and this permission notice shall be ** included in all copies or substantial portions of the Software. ** ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ** ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] --]] local ffi = require "ffi" local lpcap = require "lpcap" --[[ Only for debug purpose local lpprint = require"lpprint" --]] local band, rshift, lshift = bit.band, bit.rshift, bit.lshift -- {====================================================== -- Virtual Machine -- ======================================================= -- Interpret the result of a dynamic capture: false -> fail; -- true -> keep current position; number -> next position. -- Return new subject position. 'fr' is stack index where -- is the result; 'curr' is current subject position; 'limit' -- is subject's size. local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate local MAXOFF = 0xF -- maximum for full capture local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos local INITBACK = 400 -- default maximum size for call/backtrack stack local IAny = 0 -- if no char, fail local IChar = 1 -- if char != val, fail local ISet = 2 -- if char not in val, fail local ITestAny = 3 -- in no char, jump to 'offset' local ITestChar = 4 -- if char != val, jump to 'offset' local ITestSet = 5 -- if char not in val, jump to 'offset' local ISpan = 6 -- read a span of chars in val local IBehind = 7 -- walk back 'val' characters (fail if not possible) local IRet = 8 -- return from a rule local IEnd = 9 -- end of pattern local IChoice = 10 -- stack a choice; next fail will jump to 'offset' local IJmp = 11 -- jump to 'offset' local ICall = 12 -- call rule at 'offset' local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) local ICommit = 14 -- pop choice and jump to 'offset' local IPartialCommit = 15 -- update top choice to current position and jump local IBackCommit = 16 -- "fails" but jump to its own 'offset' local IFailTwice = 17 -- pop one choice and then fail local IFail = 18 -- go back to saved state on choice and jump to saved offset local IGiveup = 19 -- internal use local IFullCapture = 20 -- complete capture of last 'off' chars local IOpenCapture = 21 -- start a capture local ICloseCapture = 22 local ICloseRunTime = 23 local Cclose = 0 local Cposition = 1 local Cconst = 2 local Cbackref = 3 local Carg = 4 local Csimple = 5 local Ctable = 6 local Cfunction = 7 local Cquery = 8 local Cstring = 9 local Cnum = 10 local Csubst = 11 local Cfold = 12 local Cruntime = 13 local Cgroup = 14 local BCapcandelete = 0x30000 local maxstack = INITBACK local maxcapturedefault = 100 local maxmemo = 1000 local usememoization = false local trace = false local FAIL = -1 local LRFAIL = -1 local VOID = -2 local CHOICE = -3 local CALL = -4 ffi.cdef [[ typedef struct { int code; int val; int offset; int aux; } PATTERN_ELEMENT; typedef struct { int allocsize; int size; PATTERN_ELEMENT *p; } PATTERN; typedef struct { int tag; int val; int ps; int cap; } TREEPATTERN_ELEMENT; typedef struct { int id; int treesize; PATTERN *code; TREEPATTERN_ELEMENT p[?]; } TREEPATTERN; typedef struct { double s; double X; double memos; int p; int caplevel; int pA; int valuetabletop; } STACK; typedef struct { double s; int siz; int idx; int kind; int candelete; } CAPTURE; void *malloc( size_t size ); void free( void *memblock ); void *realloc( void *memblock, size_t size ); ]] local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT') local treepattern = ffi.typeof('TREEPATTERN') local patternelement = ffi.typeof('PATTERN_ELEMENT') local pattern = ffi.typeof('PATTERN') local settype = ffi.typeof('int32_t[8]') local function resdyncaptures(fr, curr, limit, checkstreamlen) local typ = type(fr) -- false value? if not fr then return FAIL -- and fail elseif typ == 'boolean' then -- true? return curr -- keep current position else local res = fr -- new position if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then error("invalid position returned by match-time capture", 0) end return res end assert(false) end -- Add capture values returned by a dynamic capture to the capture list -- 'base', nested inside a group capture. 'fd' indexes the first capture -- value, 'n' is the number of values (at least 1). local function adddyncaptures(s, base, index, n, fd, valuetable) -- Cgroup capture is already there assert(base[index].kind == Cgroup and base[index].siz == 0) base[index].idx = 0 -- make it an anonymous group base[index + 1] = {} -- add runtime captures for i = 1, n do base[index + i].kind = Cruntime base[index + i].siz = 1 -- mark it as closed local ind = #valuetable + 1 valuetable[ind] = fd[i + 1] base[index + i].idx = ind -- stack index of capture value base[index + i].s = s base[index + i + 1] = {} end base[index + n + 1].kind = Cclose -- close group base[index + n + 1].siz = 1 base[index + n + 1].s = s base[index + n + 2] = {} end -- Opcode interpreter local function match(stream, last, o, s, op, valuetable, ...) local arg = { ... } local argcount = select('#', ...) local len = #o local ptr = ffi.cast('const unsigned char*', o) s = s - 1 local stackptr = 0 -- point to first empty slot in stack local captop = 0 -- point to first empty slot in captures local STACK = ffi.new("STACK[?]", INITBACK) local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault) local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } } local capturestackptr = #CAPTURESTACK local maxcapture = maxcapturedefault local stacklimit = INITBACK local L = {} local Memo1, Memo2 = {}, {} local memoind = 0 local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2)) local nocapturereleased = true local p = 0 -- current instruction local streambufsize = 2 ^ 8 local streambufsizemask = streambufsize - 1 -- faster modulo local streambufs = {} local streambufoffset = 0 local streamstartbuffer = 0 local streambufferscount = 0 local level = -1 local function deletestreambuffers() local min = s for i = stackptr - 1, 0, -1 do local val = STACK[i].s if val >= 0 then min = math.min(val, min) end end for i = captop - 1, 0, -1 do local val = CAPTURE[i].s if val >= 0 then min = math.min(val, min) end end for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do -- max behind for full capture and max behind for Look-behind predicate if i + streambufsize + MAXBEHIND < min then streambufs[i] = nil streambufferscount = streambufferscount - 1 else streamstartbuffer = i - 1 break end end end local function addstreamdata(s, last) local len = #s local srcoffset = 0 if streambufferscount > 128 then deletestreambuffers() end repeat local offset = bit.band(streambufoffset, streambufsizemask) if offset > 0 then local index = streambufoffset - offset + 1 local count = math.min(len, streambufsize - offset) ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count) len = len - count srcoffset = srcoffset + count streambufoffset = streambufoffset + count end if len > 0 then local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1 local buf = ffi.new('unsigned char[?]', streambufsize) streambufferscount = streambufferscount + 1 streambufs[index] = buf local count = math.min(len, streambufsize) ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count) len = len - count srcoffset = srcoffset + count streambufoffset = streambufoffset + count end if streambufoffset >= 2 ^ 47 then error("too big input stream", 0) end until len == 0 end local function getstreamchar(s) local offset = bit.band(s, streambufsizemask) local index = s - offset + 1 return streambufs[index][offset] end local checkstreamlen local function getstreamstring(st, en) -- TODO Optimalize access local str = {} local i = st >= 0 and st or 1 local to = en >= 0 and en or math.huge while true do if i > to then break end if not checkstreamlen(i - 1) then return end if last and (st < 0 or en < 0) then for j = i, streambufoffset do str[#str + 1] = string.char(getstreamchar(j - 1)) end en = en < 0 and streambufoffset + en + 1 or en en = st > 0 and en - st + 1 or en st = st < 0 and streambufoffset + st + 1 or 1 return table.concat(str):sub(st, en) else str[#str + 1] = string.char(getstreamchar(i - 1)) i = i + 1 end end return table.concat(str) end function checkstreamlen(index) local str while true do if index < streambufoffset then return true else if last then s = streambufoffset return false end local max = captop for i = stackptr - 1, 0, -1 do local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1 if val >= 0 then max = math.min(val, max) end end local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount)) if n > 0 then for i = stackptr - 1, 0, -1 do local val = STACK[i].caplevel if val > 0 then STACK[i].caplevel = STACK[i].caplevel - n end end captop = captop - n end if outindex > 0 then nocapturereleased = false end str, last = coroutine.yield(1, unpack(out, 1, outindex)) addstreamdata(str) end end end local function doublecapture() maxcapture = maxcapture * 2 local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture) ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop) CAPTURE = NEWCAPTURE CAPTURESTACK[capturestackptr].capture = CAPTURE CAPTURESTACK[capturestackptr].maxcapture = maxcapture end local function pushcapture() CAPTURE[captop].idx = op.p[p].offset CAPTURE[captop].kind = band(op.p[p].val, 0x0f) CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0 captop = captop + 1 p = p + 1 if captop >= maxcapture then doublecapture() end end local function traceenter(typ, par) level = level + (par or 0) io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux])) end local function traceleave(inst) io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux])) level = level - 1 end local function tracematch(typ, start, par, from, to, inst, extra, ...) local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...) local capstr = {} for i = 1, capscount do capstr[i] = tostring(caps[i]) end extra = extra and '(' .. extra .. ')' or '' io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra, o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " "))) level = level - par end local function fail() -- pattern failed: try to backtrack local X repeat -- remove pending calls stackptr = stackptr - 1 if stackptr == -1 then p = FAIL return end s = STACK[stackptr].s X = STACK[stackptr].X if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL end -- lvar.2 rest if X == LRFAIL then CAPTURESTACK[capturestackptr] = nil capturestackptr = capturestackptr - 1 CAPTURE = CAPTURESTACK[capturestackptr].capture maxcapture = CAPTURESTACK[capturestackptr].maxcapture L[STACK[stackptr].pA + s * maxpointer] = nil end if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end until X == CHOICE or X >= 0 p = STACK[stackptr].p for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do table.remove(valuetable) end -- inc.2 if X >= 0 then s = X capturestackptr = capturestackptr - 1 CAPTURE = CAPTURESTACK[capturestackptr].capture captop = CAPTURESTACK[capturestackptr].captop maxcapture = CAPTURESTACK[capturestackptr].maxcapture local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit while captop + capture.captop >= maxcapture do doublecapture() end ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) captop = captop + capture.captop if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end CAPTURESTACK[capturestackptr + 1] = nil L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil else captop = STACK[stackptr].caplevel end end local function doublestack() if stackptr >= maxstack then error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0) end stacklimit = stacklimit * 2 stacklimit = (stacklimit > maxstack) and maxstack or stacklimit local NEWSTACK = ffi.new("STACK[?]", stacklimit) ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr) STACK = NEWSTACK end if stream then addstreamdata(o) len = nil o = nil ptr = nil end while true do --[[ Only for debug io.write(("s: |%s| stck:%d, caps:%d \n"):format(s + 1, stackptr, captop)) if p ~= FAIL then lpprint.printinst(op.p, p, valuetable) lpprint.printcaplist(CAPTURE, captop, valuetable) end --]] if p == FAIL then return -1 end local code = op.p[p].code if code == IEnd then CAPTURE[captop].kind = Cclose CAPTURE[captop].s = -1 return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...) elseif code == IRet then if STACK[stackptr - 1].X == CALL then stackptr = stackptr - 1 if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end p = STACK[stackptr].p if usememoization and STACK[stackptr].memos ~= VOID then local dif = captop - STACK[stackptr].caplevel local caps if dif > 0 then caps = ffi.new("CAPTURE[?]", dif) ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE')) end local val = { s, dif, caps } Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val end else local X = STACK[stackptr - 1].X -- lvar.1 inc.1 if X == LRFAIL or s > X then if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end STACK[stackptr - 1].X = s p = STACK[stackptr - 1].pA s = STACK[stackptr - 1].s local lambda = L[p + s * maxpointer] lambda.level = lambda.level + 1 lambda.X = STACK[stackptr - 1].X STACK[stackptr - 1].caplevel = captop STACK[stackptr - 1].valuetabletop = #valuetable CAPTURESTACK[capturestackptr].captop = captop lambda.capturecommit = CAPTURESTACK[capturestackptr] captop = 0 CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault) CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } maxcapture = maxcapturedefault else -- inc.3 stackptr = stackptr - 1 p = STACK[stackptr].p s = STACK[stackptr].X for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do table.remove(valuetable) end local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] capturestackptr = capturestackptr - 1 CAPTURE = CAPTURESTACK[capturestackptr].capture captop = CAPTURESTACK[capturestackptr].captop maxcapture = CAPTURESTACK[capturestackptr].maxcapture local capture = lambda.capturecommit while captop + capture.captop >= maxcapture do doublecapture() end ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) captop = captop + capture.captop if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end CAPTURESTACK[capturestackptr + 1] = nil L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil end end elseif code == IBehind then local n = op.p[p].val if n > s then fail() else s = s - n p = p + 1 end elseif code == IJmp then if trace and op.p[p].aux ~= 0 then traceenter('TC') end p = p + op.p[p].offset elseif code == IChoice then if stackptr == stacklimit then doublestack() end STACK[stackptr].X = CHOICE STACK[stackptr].p = p + op.p[p].offset STACK[stackptr].s = s STACK[stackptr].caplevel = captop STACK[stackptr].valuetabletop = #valuetable stackptr = stackptr + 1 p = p + 1 elseif code == ICall then if stackptr == stacklimit then doublestack() end local k = bit.band(op.p[p].val, 0xffff) if k == 0 then local pA = p + op.p[p].offset local memo = Memo1[pA + s * maxpointer] if usememoization and memo then if trace then traceenter('M', 1) end if memo == FAIL then if trace then traceleave(p) end fail() else local dif = memo[2] if dif > 0 then while captop + dif >= maxcapture do doublecapture() end local caps = memo[3] ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE')) captop = captop + dif end if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end s = memo[1] p = p + 1 end else if trace then traceenter('', 1) end STACK[stackptr].X = CALL STACK[stackptr].s = s STACK[stackptr].p = p + 1 -- save return address STACK[stackptr].pA = pA STACK[stackptr].memos = s STACK[stackptr].caplevel = captop stackptr = stackptr + 1 p = pA if usememoization and not memo then memoind = memoind + 1 if memoind > maxmemo then memoind = 0 Memo1 = Memo2 Memo2 = {} end end end else local pA = p + op.p[p].offset local X = L[pA + s * maxpointer] -- lvar.1 lvar.2 if not X then if trace then traceenter('', 1) end CAPTURESTACK[capturestackptr].captop = captop local capture = ffi.new("CAPTURE[?]", maxcapturedefault) capturestackptr = capturestackptr + 1 CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault } CAPTURE = capture maxcapture = maxcapturedefault captop = 0 L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 } STACK[stackptr].p = p + 1 STACK[stackptr].pA = pA STACK[stackptr].s = s STACK[stackptr].X = LRFAIL stackptr = stackptr + 1 p = pA elseif X.X == LRFAIL or k < X.k then -- lvar.3 lvar.5 fail() else -- lvar.4 local capture = X.capturecommit while captop + capture.captop >= maxcapture do doublecapture() end ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) captop = captop + capture.captop p = p + 1 s = X.X end end elseif code == ICommit then stackptr = stackptr - 1 p = p + op.p[p].offset elseif code == IPartialCommit then STACK[stackptr - 1].s = s STACK[stackptr - 1].caplevel = captop STACK[stackptr - 1].valuetabletop = #valuetable p = p + op.p[p].offset elseif code == IBackCommit then stackptr = stackptr - 1 s = STACK[stackptr].s captop = STACK[stackptr].caplevel for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do table.remove(valuetable) end p = p + op.p[p].offset elseif code == IFailTwice then stackptr = stackptr - 1 fail() elseif code == IFail then fail() elseif code == ICloseRunTime then -- invalidate memo for i = 0, stackptr - 1 do STACK[i].memos = VOID end local cs = {} cs.s = o cs.stream = getstreamstring cs.ocap = CAPTURE cs.ptop = arg cs.ptopcount = argcount local out = { outindex = 0, out = {} } local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function captop = captop - n local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result -- fail? if res == FAIL then fail() else s = res - 1 -- else update current position n = out.outindex - 1 -- number of new captures -- any new capture? if n > 0 then captop = captop + 1 while captop + n + 1 >= maxcapture do doublecapture() end captop = captop + n + 1 -- add new captures to 'capture' list adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable) end p = p + 1 end elseif code == ICloseCapture then local s1 = s + 1 assert(captop > 0) -- if possible, turn capture into a full capture if CAPTURE[captop - 1].siz == 0 and s1 - CAPTURE[captop - 1].s < 255 then CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1 p = p + 1 else CAPTURE[captop].siz = 1 CAPTURE[captop].s = s + 1 pushcapture() end elseif code == IOpenCapture then CAPTURE[captop].siz = 0 CAPTURE[captop].s = s + 1 pushcapture() elseif code == IFullCapture then CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F) pushcapture() -- standard mode elseif o then if code == IAny then if s < len then p = p + 1 s = s + 1 else fail() end elseif code == ITestAny then if s < len then p = p + 1 else p = p + op.p[p].offset end elseif code == IChar then if s < len and ptr[s] == op.p[p].val then p = p + 1 s = s + 1 else fail() end elseif code == ITestChar then if s < len and ptr[s] == op.p[p].val then p = p + 1 else p = p + op.p[p].offset end elseif code == ISet then local c = ptr[s] local set = valuetable[op.p[p].val] if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then p = p + 1 s = s + 1 else fail() end elseif code == ITestSet then local c = ptr[s] local set = valuetable[op.p[p].val] if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then p = p + 1 else p = p + op.p[p].offset end elseif code == ISpan then while s < len do local c = ptr[s] local set = valuetable[op.p[p].val] if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then break end s = s + 1 end p = p + 1 end else -- stream mode if code == IAny then if checkstreamlen(s) then p = p + 1 s = s + 1 else fail() end elseif code == ITestAny then if checkstreamlen(s) then p = p + 1 else p = p + op.p[p].offset end elseif code == IChar then if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then p = p + 1 s = s + 1 else fail() end elseif code == ITestChar then if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then p = p + 1 else p = p + op.p[p].offset end elseif code == ISet then local c = checkstreamlen(s) and getstreamchar(s) local set = valuetable[op.p[p].val] if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then p = p + 1 s = s + 1 else fail() end elseif code == ITestSet then local c = checkstreamlen(s) and getstreamchar(s) local set = valuetable[op.p[p].val] if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then p = p + 1 else p = p + op.p[p].offset end elseif code == ISpan then while checkstreamlen(s) do local c = getstreamchar(s) local set = valuetable[op.p[p].val] if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then break end s = s + 1 end p = p + 1 end end end end local function setmax(val) maxstack = val if maxstack < INITBACK then maxstack = INITBACK end end local function setmaxbehind(val) MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0) end local function enablememoization(val) usememoization = val end local function enabletracing(val) trace = val end -- Get the initial position for the match, interpreting negative -- values from the end of the subject local function initposition(len, pos) local ii = pos or 1 -- positive index? if (ii > 0) then -- inside the string? if ii <= len then return ii - 1; -- return it (corrected to 0-base) else return len; -- crop at the end end else -- negative index -- inside the string? if -ii <= len then return len + ii -- return position from the end else return 0; -- crop at the beginning end end end local function lp_match(pat, s, init, valuetable, ...) local i = initposition(s:len(), init) + 1 return select(2, match(false, true, s, i, pat.code, valuetable, ...)) end local function lp_streammatch(pat, init, valuetable, ...) local params = { ... } local paramslength = select('#', ...) local fce = coroutine.wrap(function(s, last) return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength)) end) return fce end local function retcount(...) return select('#', ...), { ... } end -- Only for testing purpose -- stream emulation (send all chars from string one char after char) local function lp_emulatestreammatch(pat, s, init, valuetable, ...) local init = initposition(s:len(), init) + 1 local fce = lp_streammatch(pat, init, valuetable, ...) local ret, count = {}, 0 for j = 1, #s do local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char if pret[1] == -1 then return -- fail elseif pret[1] == 0 then -- parsing finished -- collect result for i = 2, pcount do ret[count + i - 1] = pret[i] end count = count + pcount - 1 return unpack(ret, 1, count) end for i = 2, pcount do ret[count + i - 1] = pret[i] end count = count + pcount - 1 end return select(2, fce(s, true)) -- empty string end local function lp_load(str, fcetab, usemeta) local index = 0 assert(str) local ptr = ffi.cast('const char*', str) local patsize = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local len = ffi.sizeof(treepatternelement) * patsize local pat if usemeta then pat = treepattern(patsize) else pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))), function(ct) if ct.code ~= nil then ffi.C.free(ct.code.p) ffi.C.free(ct.code) end ffi.C.free(ct) end) ffi.fill(pat, ffi.sizeof(treepattern, patsize)) pat.treesize = patsize pat.id = 0 end ffi.copy(pat.p, ptr + index, len) index = index + len if usemeta then pat.code = pattern() else pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern))) assert(pat.code ~= nil) pat.code.allocsize = 10 pat.code.size = 0 pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize) assert(pat.code.p ~= nil) ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize) end pat.code.size = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local len = pat.code.size * ffi.sizeof(patternelement) local data = ffi.string(ptr + index, len) index = index + len local count = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local valuetable = {} for i = 1, count do local tag = ffi.string(ptr + index, 3) index = index + 3 --string if tag == 'str' then local len = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local val = ffi.string(ptr + index, len) index = index + len valuetable[#valuetable + 1] = val elseif tag == 'num' then --number local len = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local val = ffi.string(ptr + index, len) index = index + len valuetable[#valuetable + 1] = tonumber(val) elseif tag == 'cdt' then --ctype local val = settype() ffi.copy(val, ptr + index, ffi.sizeof(settype)) index = index + ffi.sizeof(settype) valuetable[#valuetable + 1] = val elseif tag == 'fnc' then --function local len = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local fname = ffi.string(ptr + index, len) index = index + len len = ffi.cast('uint32_t*', ptr + index)[0] index = index + 4 local val = ffi.string(ptr + index, len) index = index + len if fcetab and fcetab[fname] then assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname)) valuetable[#valuetable + 1] = fcetab[fname] else valuetable[#valuetable + 1] = loadstring(val) end end end pat.code.allocsize = pat.code.size pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize) assert(pat.code.p ~= nil) ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize) return pat, valuetable end local function lp_loadfile(fname, fcetab, usemeta) local file = assert(io.open(fname, 'rb')) local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta) file:close() return pat, valuetable end -- ====================================================== return { match = lp_match, streammatch = lp_streammatch, emulatestreammatch = lp_emulatestreammatch, load = lp_load, loadfile = lp_loadfile, setmax = setmax, setmaxbehind = setmaxbehind, enablememoization = enablememoization, enabletracing = enabletracing }