1042 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			1042 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
| --[[
 | |
| LPEGLJ
 | |
| lpvm.lua
 | |
| Virtual machine
 | |
| Copyright (C) 2014 Rostislav Sacek.
 | |
| based on LPeg v1.0 - PEG pattern matching for Lua
 | |
| Lua.org & PUC-Rio  written by Roberto Ierusalimschy
 | |
| http://www.inf.puc-rio.br/~roberto/lpeg/
 | |
| 
 | |
| ** Permission is hereby granted, free of charge, to any person obtaining
 | |
| ** a copy of this software and associated documentation files (the
 | |
| ** "Software"), to deal in the Software without restriction, including
 | |
| ** without limitation the rights to use, copy, modify, merge, publish,
 | |
| ** distribute, sublicense, and/or sell copies of the Software, and to
 | |
| ** permit persons to whom the Software is furnished to do so, subject to
 | |
| ** the following conditions:
 | |
| **
 | |
| ** The above copyright notice and this permission notice shall be
 | |
| ** included in all copies or substantial portions of the Software.
 | |
| **
 | |
| ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | |
| ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 | |
| ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 | |
| ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 | |
| ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 | |
| ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 | |
| ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 | |
| **
 | |
| ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
 | |
| --]]
 | |
| 
 | |
| local ffi = require "ffi"
 | |
| local lpcap = require "lpcap"
 | |
| --[[ Only for debug purpose
 | |
| local lpprint = require"lpprint"
 | |
| --]]
 | |
| 
 | |
| local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
 | |
| 
 | |
| -- {======================================================
 | |
| -- Virtual Machine
 | |
| -- =======================================================
 | |
| 
 | |
| -- Interpret the result of a dynamic capture: false -> fail;
 | |
| -- true -> keep current position; number -> next position.
 | |
| -- Return new subject position. 'fr' is stack index where
 | |
| -- is the result; 'curr' is current subject position; 'limit'
 | |
| -- is subject's size.
 | |
| 
 | |
| local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate
 | |
| local MAXOFF = 0xF -- maximum for full capture
 | |
| local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos
 | |
| local INITBACK = 400 -- default maximum size for call/backtrack stack
 | |
| 
 | |
| local IAny = 0 -- if no char, fail
 | |
| local IChar = 1 -- if char != val, fail
 | |
| local ISet = 2 -- if char not in val, fail
 | |
| local ITestAny = 3 -- in no char, jump to 'offset'
 | |
| local ITestChar = 4 -- if char != val, jump to 'offset'
 | |
| local ITestSet = 5 -- if char not in val, jump to 'offset'
 | |
| local ISpan = 6 -- read a span of chars in val
 | |
| local IBehind = 7 -- walk back 'val' characters (fail if not possible)
 | |
| local IRet = 8 -- return from a rule
 | |
| local IEnd = 9 -- end of pattern
 | |
| local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
 | |
| local IJmp = 11 -- jump to 'offset'
 | |
| local ICall = 12 -- call rule at 'offset'
 | |
| local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
 | |
| local ICommit = 14 -- pop choice and jump to 'offset'
 | |
| local IPartialCommit = 15 -- update top choice to current position and jump
 | |
| local IBackCommit = 16 -- "fails" but jump to its own 'offset'
 | |
| local IFailTwice = 17 -- pop one choice and then fail
 | |
| local IFail = 18 -- go back to saved state on choice and jump to saved offset
 | |
| local IGiveup = 19 -- internal use
 | |
| local IFullCapture = 20 -- complete capture of last 'off' chars
 | |
| local IOpenCapture = 21 -- start a capture
 | |
| local ICloseCapture = 22
 | |
| local ICloseRunTime = 23
 | |
| 
 | |
| local Cclose = 0
 | |
| local Cposition = 1
 | |
| local Cconst = 2
 | |
| local Cbackref = 3
 | |
| local Carg = 4
 | |
| local Csimple = 5
 | |
| local Ctable = 6
 | |
| local Cfunction = 7
 | |
| local Cquery = 8
 | |
| local Cstring = 9
 | |
| local Cnum = 10
 | |
| local Csubst = 11
 | |
| local Cfold = 12
 | |
| local Cruntime = 13
 | |
| local Cgroup = 14
 | |
| 
 | |
| local BCapcandelete = 0x30000
 | |
| local maxstack = INITBACK
 | |
| local maxcapturedefault = 100
 | |
| local maxmemo = 1000
 | |
| local usememoization = false
 | |
| local trace = false
 | |
| 
 | |
| local FAIL = -1
 | |
| local LRFAIL = -1
 | |
| local VOID = -2
 | |
| local CHOICE = -3
 | |
| local CALL = -4
 | |
| 
 | |
| ffi.cdef [[
 | |
| typedef struct {
 | |
|           int code;
 | |
|           int val;
 | |
|           int offset;
 | |
|           int aux;
 | |
|          } PATTERN_ELEMENT;
 | |
| typedef struct {
 | |
|           int allocsize;
 | |
|           int size;
 | |
|           PATTERN_ELEMENT *p;
 | |
|          } PATTERN;
 | |
| typedef struct {
 | |
|           int tag;
 | |
|           int val;
 | |
|           int ps;
 | |
|           int cap;
 | |
|          } TREEPATTERN_ELEMENT;
 | |
| typedef struct {
 | |
|           int id;
 | |
|           int treesize;
 | |
|           PATTERN *code;
 | |
|           TREEPATTERN_ELEMENT p[?];
 | |
|          } TREEPATTERN;
 | |
| 
 | |
| typedef struct {
 | |
|           double s;
 | |
|           double X;
 | |
|           double memos;
 | |
|           int p;
 | |
|           int caplevel;
 | |
|           int pA;
 | |
|           int valuetabletop;
 | |
|          } STACK;
 | |
| 
 | |
| typedef struct {
 | |
|           double s;
 | |
|           int siz;
 | |
|           int idx;
 | |
|           int kind;
 | |
|           int candelete;
 | |
|          } CAPTURE;
 | |
| 
 | |
| void *malloc( size_t size );
 | |
| void free( void *memblock );
 | |
| void *realloc( void *memblock, size_t size );
 | |
| ]]
 | |
| 
 | |
| local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
 | |
| local treepattern = ffi.typeof('TREEPATTERN')
 | |
| local patternelement = ffi.typeof('PATTERN_ELEMENT')
 | |
| local pattern = ffi.typeof('PATTERN')
 | |
| local settype = ffi.typeof('int32_t[8]')
 | |
| 
 | |
| local function resdyncaptures(fr, curr, limit, checkstreamlen)
 | |
|     local typ = type(fr)
 | |
|     -- false value?
 | |
|     if not fr then
 | |
|         return FAIL -- and fail
 | |
|     elseif typ == 'boolean' then
 | |
|         -- true?
 | |
|         return curr -- keep current position
 | |
|     else
 | |
|         local res = fr -- new position
 | |
|         if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then
 | |
|             error("invalid position returned by match-time capture", 0)
 | |
|         end
 | |
|         return res
 | |
|     end
 | |
|     assert(false)
 | |
| end
 | |
| 
 | |
| 
 | |
| -- Add capture values returned by a dynamic capture to the capture list
 | |
| -- 'base', nested inside a group capture. 'fd' indexes the first capture
 | |
| -- value, 'n' is the number of values (at least 1).
 | |
| 
 | |
| local function adddyncaptures(s, base, index, n, fd, valuetable)
 | |
|     -- Cgroup capture is already there
 | |
|     assert(base[index].kind == Cgroup and base[index].siz == 0)
 | |
|     base[index].idx = 0 -- make it an anonymous group
 | |
|     base[index + 1] = {}
 | |
|     -- add runtime captures
 | |
|     for i = 1, n do
 | |
|         base[index + i].kind = Cruntime
 | |
|         base[index + i].siz = 1 -- mark it as closed
 | |
|         local ind = #valuetable + 1
 | |
|         valuetable[ind] = fd[i + 1]
 | |
|         base[index + i].idx = ind -- stack index of capture value
 | |
|         base[index + i].s = s
 | |
|         base[index + i + 1] = {}
 | |
|     end
 | |
|     base[index + n + 1].kind = Cclose -- close group
 | |
|     base[index + n + 1].siz = 1
 | |
|     base[index + n + 1].s = s
 | |
|     base[index + n + 2] = {}
 | |
| end
 | |
| 
 | |
| 
 | |
| -- Opcode interpreter
 | |
| 
 | |
| local function match(stream, last, o, s, op, valuetable, ...)
 | |
|     local arg = { ... }
 | |
|     local argcount = select('#', ...)
 | |
|     local len = #o
 | |
|     local ptr = ffi.cast('const unsigned char*', o)
 | |
|     s = s - 1
 | |
|     local stackptr = 0 -- point to first empty slot in stack
 | |
|     local captop = 0 -- point to first empty slot in captures
 | |
|     local STACK = ffi.new("STACK[?]", INITBACK)
 | |
|     local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
 | |
|     local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } }
 | |
|     local capturestackptr = #CAPTURESTACK
 | |
|     local maxcapture = maxcapturedefault
 | |
|     local stacklimit = INITBACK
 | |
|     local L = {}
 | |
|     local Memo1, Memo2 = {}, {}
 | |
|     local memoind = 0
 | |
|     local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2))
 | |
|     local nocapturereleased = true
 | |
| 
 | |
|     local p = 0 -- current instruction
 | |
|     local streambufsize = 2 ^ 8
 | |
|     local streambufsizemask = streambufsize - 1 -- faster modulo
 | |
|     local streambufs = {}
 | |
|     local streambufoffset = 0
 | |
|     local streamstartbuffer = 0
 | |
|     local streambufferscount = 0
 | |
|     local level = -1
 | |
| 
 | |
|     local function deletestreambuffers()
 | |
|         local min = s
 | |
|         for i = stackptr - 1, 0, -1 do
 | |
|             local val = STACK[i].s
 | |
|             if val >= 0 then
 | |
|                 min = math.min(val, min)
 | |
|             end
 | |
|         end
 | |
| 
 | |
|         for i = captop - 1, 0, -1 do
 | |
|             local val = CAPTURE[i].s
 | |
|             if val >= 0 then
 | |
|                 min = math.min(val, min)
 | |
|             end
 | |
|         end
 | |
|         for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do
 | |
|             -- max behind for full capture and max behind for Look-behind predicate
 | |
|             if i + streambufsize + MAXBEHIND < min then
 | |
|                 streambufs[i] = nil
 | |
|                 streambufferscount = streambufferscount - 1
 | |
|             else
 | |
|                 streamstartbuffer = i - 1
 | |
|                 break
 | |
|             end
 | |
|         end
 | |
|     end
 | |
| 
 | |
|     local function addstreamdata(s, last)
 | |
|         local len = #s
 | |
|         local srcoffset = 0
 | |
|         if streambufferscount > 128 then
 | |
|             deletestreambuffers()
 | |
|         end
 | |
|         repeat
 | |
|             local offset = bit.band(streambufoffset, streambufsizemask)
 | |
|             if offset > 0 then
 | |
|                 local index = streambufoffset - offset + 1
 | |
|                 local count = math.min(len, streambufsize - offset)
 | |
|                 ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
 | |
|                 len = len - count
 | |
|                 srcoffset = srcoffset + count
 | |
|                 streambufoffset = streambufoffset + count
 | |
|             end
 | |
|             if len > 0 then
 | |
|                 local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1
 | |
|                 local buf = ffi.new('unsigned char[?]', streambufsize)
 | |
|                 streambufferscount = streambufferscount + 1
 | |
|                 streambufs[index] = buf
 | |
|                 local count = math.min(len, streambufsize)
 | |
|                 ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
 | |
|                 len = len - count
 | |
|                 srcoffset = srcoffset + count
 | |
|                 streambufoffset = streambufoffset + count
 | |
|             end
 | |
|             if streambufoffset >= 2 ^ 47 then
 | |
|                 error("too big input stream", 0)
 | |
|             end
 | |
|         until len == 0
 | |
|     end
 | |
| 
 | |
|     local function getstreamchar(s)
 | |
|         local offset = bit.band(s, streambufsizemask)
 | |
|         local index = s - offset + 1
 | |
|         return streambufs[index][offset]
 | |
|     end
 | |
| 
 | |
|     local checkstreamlen
 | |
| 
 | |
|     local function getstreamstring(st, en)
 | |
|         -- TODO Optimalize access
 | |
|         local str = {}
 | |
|         local i = st >= 0 and st or 1
 | |
|         local to = en >= 0 and en or math.huge
 | |
|         while true do
 | |
|             if i > to then break end
 | |
|             if not checkstreamlen(i - 1) then return end
 | |
|             if last and (st < 0 or en < 0) then
 | |
|                 for j = i, streambufoffset do
 | |
|                     str[#str + 1] = string.char(getstreamchar(j - 1))
 | |
|                 end
 | |
|                 en = en < 0 and streambufoffset + en + 1 or en
 | |
|                 en = st > 0 and en - st + 1 or en
 | |
|                 st = st < 0 and streambufoffset + st + 1 or 1
 | |
|                 return table.concat(str):sub(st, en)
 | |
|             else
 | |
|                 str[#str + 1] = string.char(getstreamchar(i - 1))
 | |
|                 i = i + 1
 | |
|             end
 | |
|         end
 | |
|         return table.concat(str)
 | |
|     end
 | |
| 
 | |
|     function checkstreamlen(index)
 | |
|         local str
 | |
|         while true do
 | |
|             if index < streambufoffset then
 | |
|                 return true
 | |
|             else
 | |
|                 if last then
 | |
|                     s = streambufoffset
 | |
|                     return false
 | |
|                 end
 | |
|                 local max = captop
 | |
|                 for i = stackptr - 1, 0, -1 do
 | |
|                     local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1
 | |
|                     if val >= 0 then
 | |
|                         max = math.min(val, max)
 | |
|                     end
 | |
|                 end
 | |
|                 local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount))
 | |
|                 if n > 0 then
 | |
|                     for i = stackptr - 1, 0, -1 do
 | |
|                         local val = STACK[i].caplevel
 | |
|                         if val > 0 then
 | |
|                             STACK[i].caplevel = STACK[i].caplevel - n
 | |
|                         end
 | |
|                     end
 | |
|                     captop = captop - n
 | |
|                 end
 | |
|                 if outindex > 0 then
 | |
|                     nocapturereleased = false
 | |
|                 end
 | |
|                 str, last = coroutine.yield(1, unpack(out, 1, outindex))
 | |
|                 addstreamdata(str)
 | |
|             end
 | |
|         end
 | |
|     end
 | |
| 
 | |
|     local function doublecapture()
 | |
|         maxcapture = maxcapture * 2
 | |
|         local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture)
 | |
|         ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop)
 | |
|         CAPTURE = NEWCAPTURE
 | |
|         CAPTURESTACK[capturestackptr].capture = CAPTURE
 | |
|         CAPTURESTACK[capturestackptr].maxcapture = maxcapture
 | |
|     end
 | |
| 
 | |
|     local function pushcapture()
 | |
|         CAPTURE[captop].idx = op.p[p].offset
 | |
|         CAPTURE[captop].kind = band(op.p[p].val, 0x0f)
 | |
|         CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0
 | |
|         captop = captop + 1
 | |
|         p = p + 1
 | |
|         if captop >= maxcapture then
 | |
|             doublecapture()
 | |
|         end
 | |
|     end
 | |
| 
 | |
|     local function traceenter(typ, par)
 | |
|         level = level + (par or 0)
 | |
|         io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux]))
 | |
|     end
 | |
| 
 | |
|     local function traceleave(inst)
 | |
|         io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux]))
 | |
|         level = level - 1
 | |
|     end
 | |
| 
 | |
|     local function tracematch(typ, start, par, from, to, inst, extra, ...)
 | |
|         local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...)
 | |
|         local capstr = {}
 | |
|         for i = 1, capscount do capstr[i] = tostring(caps[i]) end
 | |
|         extra = extra and '(' .. extra .. ')' or ''
 | |
|         io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra,
 | |
|             o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " ")))
 | |
|         level = level - par
 | |
|     end
 | |
| 
 | |
|     local function fail()
 | |
|         -- pattern failed: try to backtrack
 | |
|         local X
 | |
|         repeat -- remove pending calls
 | |
|             stackptr = stackptr - 1
 | |
|             if stackptr == -1 then
 | |
|                 p = FAIL
 | |
|                 return
 | |
|             end
 | |
|             s = STACK[stackptr].s
 | |
|             X = STACK[stackptr].X
 | |
|             if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then
 | |
|                 Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
 | |
|                 Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
 | |
|             end
 | |
|             -- lvar.2 rest
 | |
|             if X == LRFAIL then
 | |
|                 CAPTURESTACK[capturestackptr] = nil
 | |
|                 capturestackptr = capturestackptr - 1
 | |
|                 CAPTURE = CAPTURESTACK[capturestackptr].capture
 | |
|                 maxcapture = CAPTURESTACK[capturestackptr].maxcapture
 | |
|                 L[STACK[stackptr].pA + s * maxpointer] = nil
 | |
|             end
 | |
|             if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end
 | |
|         until X == CHOICE or X >= 0
 | |
|         p = STACK[stackptr].p
 | |
|         for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
 | |
|             table.remove(valuetable)
 | |
|         end
 | |
|         -- inc.2
 | |
|         if X >= 0 then
 | |
|             s = X
 | |
|             capturestackptr = capturestackptr - 1
 | |
|             CAPTURE = CAPTURESTACK[capturestackptr].capture
 | |
|             captop = CAPTURESTACK[capturestackptr].captop
 | |
|             maxcapture = CAPTURESTACK[capturestackptr].maxcapture
 | |
|             local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit
 | |
|             while captop + capture.captop >= maxcapture do
 | |
|                 doublecapture()
 | |
|             end
 | |
|             ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
 | |
|             captop = captop + capture.captop
 | |
|             if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end
 | |
|             CAPTURESTACK[capturestackptr + 1] = nil
 | |
|             L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
 | |
|         else
 | |
|             captop = STACK[stackptr].caplevel
 | |
|         end
 | |
|     end
 | |
| 
 | |
|     local function doublestack()
 | |
|         if stackptr >= maxstack then
 | |
|             error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0)
 | |
|         end
 | |
|         stacklimit = stacklimit * 2
 | |
|         stacklimit = (stacklimit > maxstack) and maxstack or stacklimit
 | |
|         local NEWSTACK = ffi.new("STACK[?]", stacklimit)
 | |
|         ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr)
 | |
|         STACK = NEWSTACK
 | |
|     end
 | |
| 
 | |
|     if stream then
 | |
|         addstreamdata(o)
 | |
|         len = nil
 | |
|         o = nil
 | |
|         ptr = nil
 | |
|     end
 | |
|     while true do
 | |
|         --[[ Only for debug
 | |
|         io.write(("s: |%s| stck:%d, caps:%d  \n"):format(s + 1, stackptr, captop))
 | |
|         if p ~= FAIL then
 | |
|             lpprint.printinst(op.p, p, valuetable)
 | |
|             lpprint.printcaplist(CAPTURE, captop, valuetable)
 | |
|         end
 | |
|         --]]
 | |
|         if p == FAIL then return -1 end
 | |
|         local code = op.p[p].code
 | |
|         if code == IEnd then
 | |
|             CAPTURE[captop].kind = Cclose
 | |
|             CAPTURE[captop].s = -1
 | |
|             return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...)
 | |
|         elseif code == IRet then
 | |
|             if STACK[stackptr - 1].X == CALL then
 | |
|                 stackptr = stackptr - 1
 | |
|                 if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end
 | |
|                 p = STACK[stackptr].p
 | |
|                 if usememoization and STACK[stackptr].memos ~= VOID then
 | |
|                     local dif = captop - STACK[stackptr].caplevel
 | |
|                     local caps
 | |
|                     if dif > 0 then
 | |
|                         caps = ffi.new("CAPTURE[?]", dif)
 | |
|                         ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE'))
 | |
|                     end
 | |
|                     local val = { s, dif, caps }
 | |
|                     Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
 | |
|                     Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
 | |
|                 end
 | |
|             else
 | |
|                 local X = STACK[stackptr - 1].X
 | |
|                 -- lvar.1 inc.1
 | |
|                 if X == LRFAIL or s > X then
 | |
|                     if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end
 | |
|                     STACK[stackptr - 1].X = s
 | |
|                     p = STACK[stackptr - 1].pA
 | |
|                     s = STACK[stackptr - 1].s
 | |
|                     local lambda = L[p + s * maxpointer]
 | |
|                     lambda.level = lambda.level + 1
 | |
|                     lambda.X = STACK[stackptr - 1].X
 | |
|                     STACK[stackptr - 1].caplevel = captop
 | |
|                     STACK[stackptr - 1].valuetabletop = #valuetable
 | |
|                     CAPTURESTACK[capturestackptr].captop = captop
 | |
|                     lambda.capturecommit = CAPTURESTACK[capturestackptr]
 | |
|                     captop = 0
 | |
|                     CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
 | |
|                     CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault }
 | |
|                     maxcapture = maxcapturedefault
 | |
|                 else
 | |
|                     -- inc.3
 | |
|                     stackptr = stackptr - 1
 | |
|                     p = STACK[stackptr].p
 | |
|                     s = STACK[stackptr].X
 | |
|                     for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
 | |
|                         table.remove(valuetable)
 | |
|                     end
 | |
|                     local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer]
 | |
|                     capturestackptr = capturestackptr - 1
 | |
|                     CAPTURE = CAPTURESTACK[capturestackptr].capture
 | |
|                     captop = CAPTURESTACK[capturestackptr].captop
 | |
|                     maxcapture = CAPTURESTACK[capturestackptr].maxcapture
 | |
|                     local capture = lambda.capturecommit
 | |
|                     while captop + capture.captop >= maxcapture do
 | |
|                         doublecapture()
 | |
|                     end
 | |
|                     ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
 | |
|                     captop = captop + capture.captop
 | |
|                     if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end
 | |
|                     CAPTURESTACK[capturestackptr + 1] = nil
 | |
|                     L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
 | |
|                 end
 | |
|             end
 | |
|         elseif code == IBehind then
 | |
|             local n = op.p[p].val
 | |
|             if n > s then
 | |
|                 fail()
 | |
|             else
 | |
|                 s = s - n
 | |
|                 p = p + 1
 | |
|             end
 | |
|         elseif code == IJmp then
 | |
|             if trace and op.p[p].aux ~= 0 then traceenter('TC') end
 | |
|             p = p + op.p[p].offset
 | |
|         elseif code == IChoice then
 | |
|             if stackptr == stacklimit then
 | |
|                 doublestack()
 | |
|             end
 | |
|             STACK[stackptr].X = CHOICE
 | |
|             STACK[stackptr].p = p + op.p[p].offset
 | |
|             STACK[stackptr].s = s
 | |
|             STACK[stackptr].caplevel = captop
 | |
|             STACK[stackptr].valuetabletop = #valuetable
 | |
|             stackptr = stackptr + 1
 | |
|             p = p + 1
 | |
|         elseif code == ICall then
 | |
|             if stackptr == stacklimit then
 | |
|                 doublestack()
 | |
|             end
 | |
|             local k = bit.band(op.p[p].val, 0xffff)
 | |
|             if k == 0 then
 | |
|                 local pA = p + op.p[p].offset
 | |
|                 local memo = Memo1[pA + s * maxpointer]
 | |
|                 if usememoization and memo then
 | |
|                     if trace then traceenter('M', 1) end
 | |
|                     if memo == FAIL then
 | |
|                         if trace then traceleave(p) end
 | |
|                         fail()
 | |
|                     else
 | |
|                         local dif = memo[2]
 | |
|                         if dif > 0 then
 | |
|                             while captop + dif >= maxcapture do
 | |
|                                 doublecapture()
 | |
|                             end
 | |
|                             local caps = memo[3]
 | |
|                             ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE'))
 | |
|                             captop = captop + dif
 | |
|                         end
 | |
|                         if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end
 | |
|                         s = memo[1]
 | |
|                         p = p + 1
 | |
|                     end
 | |
|                 else
 | |
|                     if trace then traceenter('', 1) end
 | |
|                     STACK[stackptr].X = CALL
 | |
|                     STACK[stackptr].s = s
 | |
|                     STACK[stackptr].p = p + 1 -- save return address
 | |
|                     STACK[stackptr].pA = pA
 | |
|                     STACK[stackptr].memos = s
 | |
|                     STACK[stackptr].caplevel = captop
 | |
|                     stackptr = stackptr + 1
 | |
|                     p = pA
 | |
|                     if usememoization and not memo then
 | |
|                         memoind = memoind + 1
 | |
|                         if memoind > maxmemo then
 | |
|                             memoind = 0
 | |
|                             Memo1 = Memo2
 | |
|                             Memo2 = {}
 | |
|                         end
 | |
|                     end
 | |
|                 end
 | |
|             else
 | |
|                 local pA = p + op.p[p].offset
 | |
|                 local X = L[pA + s * maxpointer]
 | |
|                 -- lvar.1 lvar.2
 | |
|                 if not X then
 | |
|                     if trace then traceenter('', 1) end
 | |
|                     CAPTURESTACK[capturestackptr].captop = captop
 | |
|                     local capture = ffi.new("CAPTURE[?]", maxcapturedefault)
 | |
|                     capturestackptr = capturestackptr + 1
 | |
|                     CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault }
 | |
|                     CAPTURE = capture
 | |
|                     maxcapture = maxcapturedefault
 | |
|                     captop = 0
 | |
|                     L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 }
 | |
|                     STACK[stackptr].p = p + 1
 | |
|                     STACK[stackptr].pA = pA
 | |
|                     STACK[stackptr].s = s
 | |
|                     STACK[stackptr].X = LRFAIL
 | |
|                     stackptr = stackptr + 1
 | |
|                     p = pA
 | |
|                 elseif X.X == LRFAIL or k < X.k then
 | |
|                     -- lvar.3 lvar.5
 | |
|                     fail()
 | |
|                 else
 | |
|                     -- lvar.4
 | |
|                     local capture = X.capturecommit
 | |
|                     while captop + capture.captop >= maxcapture do
 | |
|                         doublecapture()
 | |
|                     end
 | |
|                     ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
 | |
|                     captop = captop + capture.captop
 | |
|                     p = p + 1
 | |
|                     s = X.X
 | |
|                 end
 | |
|             end
 | |
|         elseif code == ICommit then
 | |
|             stackptr = stackptr - 1
 | |
|             p = p + op.p[p].offset
 | |
|         elseif code == IPartialCommit then
 | |
|             STACK[stackptr - 1].s = s
 | |
|             STACK[stackptr - 1].caplevel = captop
 | |
|             STACK[stackptr - 1].valuetabletop = #valuetable
 | |
|             p = p + op.p[p].offset
 | |
|         elseif code == IBackCommit then
 | |
|             stackptr = stackptr - 1
 | |
|             s = STACK[stackptr].s
 | |
|             captop = STACK[stackptr].caplevel
 | |
|             for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
 | |
|                 table.remove(valuetable)
 | |
|             end
 | |
|             p = p + op.p[p].offset
 | |
|         elseif code == IFailTwice then
 | |
|             stackptr = stackptr - 1
 | |
|             fail()
 | |
|         elseif code == IFail then
 | |
|             fail()
 | |
|         elseif code == ICloseRunTime then
 | |
|             -- invalidate memo
 | |
|             for i = 0, stackptr - 1 do
 | |
|                 STACK[i].memos = VOID
 | |
|             end
 | |
|             local cs = {}
 | |
|             cs.s = o
 | |
|             cs.stream = getstreamstring
 | |
|             cs.ocap = CAPTURE
 | |
|             cs.ptop = arg
 | |
|             cs.ptopcount = argcount
 | |
|             local out = { outindex = 0, out = {} }
 | |
|             local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function
 | |
|             captop = captop - n
 | |
|             local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result
 | |
|             -- fail?
 | |
|             if res == FAIL then
 | |
|                 fail()
 | |
|             else
 | |
|                 s = res - 1 -- else update current position
 | |
|                 n = out.outindex - 1 -- number of new captures
 | |
|                 -- any new capture?
 | |
|                 if n > 0 then
 | |
|                     captop = captop + 1
 | |
|                     while captop + n + 1 >= maxcapture do
 | |
|                         doublecapture()
 | |
|                     end
 | |
|                     captop = captop + n + 1
 | |
|                     -- add new captures to 'capture' list
 | |
|                     adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable)
 | |
|                 end
 | |
|                 p = p + 1
 | |
|             end
 | |
|         elseif code == ICloseCapture then
 | |
|             local s1 = s + 1
 | |
|             assert(captop > 0)
 | |
|             -- if possible, turn capture into a full capture
 | |
|             if CAPTURE[captop - 1].siz == 0 and
 | |
|                     s1 - CAPTURE[captop - 1].s < 255 then
 | |
|                 CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1
 | |
|                 p = p + 1
 | |
|             else
 | |
|                 CAPTURE[captop].siz = 1
 | |
|                 CAPTURE[captop].s = s + 1
 | |
|                 pushcapture()
 | |
|             end
 | |
|         elseif code == IOpenCapture then
 | |
|             CAPTURE[captop].siz = 0
 | |
|             CAPTURE[captop].s = s + 1
 | |
|             pushcapture()
 | |
|         elseif code == IFullCapture then
 | |
|             CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size
 | |
|             CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F)
 | |
|             pushcapture()
 | |
|             -- standard mode
 | |
|         elseif o then
 | |
|             if code == IAny then
 | |
|                 if s < len then
 | |
|                     p = p + 1
 | |
|                     s = s + 1
 | |
|                 else
 | |
|                     fail()
 | |
|                 end
 | |
|             elseif code == ITestAny then
 | |
|                 if s < len then
 | |
|                     p = p + 1
 | |
|                 else
 | |
|                     p = p + op.p[p].offset
 | |
|                 end
 | |
|             elseif code == IChar then
 | |
|                 if s < len and ptr[s] == op.p[p].val then
 | |
|                     p = p + 1
 | |
|                     s = s + 1
 | |
|                 else
 | |
|                     fail()
 | |
|                 end
 | |
|             elseif code == ITestChar then
 | |
|                 if s < len and ptr[s] == op.p[p].val then
 | |
|                     p = p + 1
 | |
|                 else
 | |
|                     p = p + op.p[p].offset
 | |
|                 end
 | |
|             elseif code == ISet then
 | |
|                 local c = ptr[s]
 | |
|                 local set = valuetable[op.p[p].val]
 | |
|                 if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 | |
|                     p = p + 1
 | |
|                     s = s + 1
 | |
|                 else
 | |
|                     fail()
 | |
|                 end
 | |
|             elseif code == ITestSet then
 | |
|                 local c = ptr[s]
 | |
|                 local set = valuetable[op.p[p].val]
 | |
|                 if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 | |
|                     p = p + 1
 | |
|                 else
 | |
|                     p = p + op.p[p].offset
 | |
|                 end
 | |
|             elseif code == ISpan then
 | |
|                 while s < len do
 | |
|                     local c = ptr[s]
 | |
|                     local set = valuetable[op.p[p].val]
 | |
|                     if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
 | |
|                         break
 | |
|                     end
 | |
|                     s = s + 1
 | |
|                 end
 | |
|                 p = p + 1
 | |
|             end
 | |
|         else
 | |
|             -- stream mode
 | |
|             if code == IAny then
 | |
|                 if checkstreamlen(s) then
 | |
|                     p = p + 1
 | |
|                     s = s + 1
 | |
|                 else
 | |
|                     fail()
 | |
|                 end
 | |
|             elseif code == ITestAny then
 | |
|                 if checkstreamlen(s) then
 | |
|                     p = p + 1
 | |
|                 else
 | |
|                     p = p + op.p[p].offset
 | |
|                 end
 | |
|             elseif code == IChar then
 | |
|                 if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
 | |
|                     p = p + 1
 | |
|                     s = s + 1
 | |
|                 else
 | |
|                     fail()
 | |
|                 end
 | |
|             elseif code == ITestChar then
 | |
|                 if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
 | |
|                     p = p + 1
 | |
|                 else
 | |
|                     p = p + op.p[p].offset
 | |
|                 end
 | |
|             elseif code == ISet then
 | |
|                 local c = checkstreamlen(s) and getstreamchar(s)
 | |
|                 local set = valuetable[op.p[p].val]
 | |
|                 if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 | |
|                     p = p + 1
 | |
|                     s = s + 1
 | |
|                 else
 | |
|                     fail()
 | |
|                 end
 | |
|             elseif code == ITestSet then
 | |
|                 local c = checkstreamlen(s) and getstreamchar(s)
 | |
|                 local set = valuetable[op.p[p].val]
 | |
|                 if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 | |
|                     p = p + 1
 | |
|                 else
 | |
|                     p = p + op.p[p].offset
 | |
|                 end
 | |
|             elseif code == ISpan then
 | |
|                 while checkstreamlen(s) do
 | |
|                     local c = getstreamchar(s)
 | |
|                     local set = valuetable[op.p[p].val]
 | |
|                     if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
 | |
|                         break
 | |
|                     end
 | |
|                     s = s + 1
 | |
|                 end
 | |
|                 p = p + 1
 | |
|             end
 | |
|         end
 | |
|     end
 | |
| end
 | |
| 
 | |
| local function setmax(val)
 | |
|     maxstack = val
 | |
|     if maxstack < INITBACK then
 | |
|         maxstack = INITBACK
 | |
|     end
 | |
| end
 | |
| 
 | |
| local function setmaxbehind(val)
 | |
|     MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0)
 | |
| end
 | |
| 
 | |
| local function enablememoization(val)
 | |
|     usememoization = val
 | |
| end
 | |
| 
 | |
| local function enabletracing(val)
 | |
|     trace = val
 | |
| end
 | |
| 
 | |
| -- Get the initial position for the match, interpreting negative
 | |
| -- values from the end of the subject
 | |
| 
 | |
| local function initposition(len, pos)
 | |
|     local ii = pos or 1
 | |
|     -- positive index?
 | |
|     if (ii > 0) then
 | |
|         -- inside the string?
 | |
|         if ii <= len then
 | |
|             return ii - 1; -- return it (corrected to 0-base)
 | |
|         else
 | |
|             return len; -- crop at the end
 | |
|         end
 | |
|     else
 | |
|         -- negative index
 | |
|         -- inside the string?
 | |
|         if -ii <= len then
 | |
|             return len + ii -- return position from the end
 | |
|         else
 | |
|             return 0; -- crop at the beginning
 | |
|         end
 | |
|     end
 | |
| end
 | |
| 
 | |
| local function lp_match(pat, s, init, valuetable, ...)
 | |
|     local i = initposition(s:len(), init) + 1
 | |
|     return select(2, match(false, true, s, i, pat.code, valuetable, ...))
 | |
| end
 | |
| 
 | |
| local function lp_streammatch(pat, init, valuetable, ...)
 | |
|     local params = { ... }
 | |
|     local paramslength = select('#', ...)
 | |
|     local fce = coroutine.wrap(function(s, last)
 | |
|         return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength))
 | |
|     end)
 | |
|     return fce
 | |
| end
 | |
| 
 | |
| local function retcount(...)
 | |
|     return select('#', ...), { ... }
 | |
| end
 | |
| 
 | |
| -- Only for testing purpose
 | |
| -- stream emulation (send all chars from string one char after char)
 | |
| local function lp_emulatestreammatch(pat, s, init, valuetable, ...)
 | |
|     local init = initposition(s:len(), init) + 1
 | |
|     local fce = lp_streammatch(pat, init, valuetable, ...)
 | |
|     local ret, count = {}, 0
 | |
|     for j = 1, #s do
 | |
|         local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char
 | |
|         if pret[1] == -1 then
 | |
|             return -- fail
 | |
|         elseif pret[1] == 0 then
 | |
|             -- parsing finished
 | |
|             -- collect result
 | |
|             for i = 2, pcount do
 | |
|                 ret[count + i - 1] = pret[i]
 | |
|             end
 | |
|             count = count + pcount - 1
 | |
|             return unpack(ret, 1, count)
 | |
|         end
 | |
|         for i = 2, pcount do
 | |
|             ret[count + i - 1] = pret[i]
 | |
|         end
 | |
|         count = count + pcount - 1
 | |
|     end
 | |
|     return select(2, fce(s, true)) -- empty string
 | |
| end
 | |
| 
 | |
| local function lp_load(str, fcetab, usemeta)
 | |
|     local index = 0
 | |
|     assert(str)
 | |
|     local ptr = ffi.cast('const char*', str)
 | |
|     local patsize = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|     index = index + 4
 | |
|     local len = ffi.sizeof(treepatternelement) * patsize
 | |
| 
 | |
|     local pat
 | |
|     if usemeta then
 | |
|         pat = treepattern(patsize)
 | |
|     else
 | |
|         pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))),
 | |
|             function(ct)
 | |
|                 if ct.code ~= nil then
 | |
|                     ffi.C.free(ct.code.p)
 | |
|                     ffi.C.free(ct.code)
 | |
|                 end
 | |
|                 ffi.C.free(ct)
 | |
|             end)
 | |
|         ffi.fill(pat, ffi.sizeof(treepattern, patsize))
 | |
|         pat.treesize = patsize
 | |
|         pat.id = 0
 | |
|     end
 | |
|     ffi.copy(pat.p, ptr + index, len)
 | |
|     index = index + len
 | |
|     if usemeta then
 | |
|         pat.code = pattern()
 | |
|     else
 | |
|         pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
 | |
|         assert(pat.code ~= nil)
 | |
|         pat.code.allocsize = 10
 | |
|         pat.code.size = 0
 | |
|         pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize)
 | |
|         assert(pat.code.p ~= nil)
 | |
|         ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
 | |
|     end
 | |
|     pat.code.size = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|     index = index + 4
 | |
|     local len = pat.code.size * ffi.sizeof(patternelement)
 | |
|     local data = ffi.string(ptr + index, len)
 | |
|     index = index + len
 | |
|     local count = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|     index = index + 4
 | |
|     local valuetable = {}
 | |
|     for i = 1, count do
 | |
|         local tag = ffi.string(ptr + index, 3)
 | |
|         index = index + 3
 | |
|         --string
 | |
|         if tag == 'str' then
 | |
|             local len = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|             index = index + 4
 | |
|             local val = ffi.string(ptr + index, len)
 | |
|             index = index + len
 | |
|             valuetable[#valuetable + 1] = val
 | |
|         elseif tag == 'num' then
 | |
|             --number
 | |
|             local len = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|             index = index + 4
 | |
|             local val = ffi.string(ptr + index, len)
 | |
|             index = index + len
 | |
|             valuetable[#valuetable + 1] = tonumber(val)
 | |
|         elseif tag == 'cdt' then
 | |
|             --ctype
 | |
|             local val = settype()
 | |
|             ffi.copy(val, ptr + index, ffi.sizeof(settype))
 | |
|             index = index + ffi.sizeof(settype)
 | |
|             valuetable[#valuetable + 1] = val
 | |
|         elseif tag == 'fnc' then
 | |
|             --function
 | |
|             local len = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|             index = index + 4
 | |
|             local fname = ffi.string(ptr + index, len)
 | |
|             index = index + len
 | |
|             len = ffi.cast('uint32_t*', ptr + index)[0]
 | |
|             index = index + 4
 | |
|             local val = ffi.string(ptr + index, len)
 | |
|             index = index + len
 | |
|             if fcetab and fcetab[fname] then
 | |
|                 assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname))
 | |
|                 valuetable[#valuetable + 1] = fcetab[fname]
 | |
|             else
 | |
|                 valuetable[#valuetable + 1] = loadstring(val)
 | |
|             end
 | |
|         end
 | |
|     end
 | |
|     pat.code.allocsize = pat.code.size
 | |
|     pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
 | |
|     assert(pat.code.p ~= nil)
 | |
|     ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize)
 | |
|     return pat, valuetable
 | |
| end
 | |
| 
 | |
| local function lp_loadfile(fname, fcetab, usemeta)
 | |
|     local file = assert(io.open(fname, 'rb'))
 | |
|     local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta)
 | |
|     file:close()
 | |
|     return pat, valuetable
 | |
| end
 | |
| 
 | |
| -- ======================================================
 | |
| 
 | |
| return {
 | |
|     match = lp_match,
 | |
|     streammatch = lp_streammatch,
 | |
|     emulatestreammatch = lp_emulatestreammatch,
 | |
|     load = lp_load,
 | |
|     loadfile = lp_loadfile,
 | |
|     setmax = setmax,
 | |
|     setmaxbehind = setmaxbehind,
 | |
|     enablememoization = enablememoization,
 | |
|     enabletracing = enabletracing
 | |
| }
 |