From 8e03faa61e294bc7e9699ecef197c4b77bd634d1 Mon Sep 17 00:00:00 2001 From: sladecraven Date: Sat, 28 May 2022 09:30:19 +0200 Subject: [PATCH] Update translation keys --- changelog.md | 5 + fr.json | 39 +- module.json | 4 +- tools/detect_missing_strings.lua | 39 + tools/lpeg/lpcap.lua | 625 ++++++++ tools/lpeg/lpcode.lua | 1057 ++++++++++++++ tools/lpeg/lpeg.lua | 1373 ++++++++++++++++++ tools/lpeg/lpprint.lua | 356 +++++ tools/lpeg/lpvm.lua | 1041 +++++++++++++ tools/lpeg/re.lua | 286 ++++ tools/luajson/json.lua | 25 + tools/luajson/json/decode.lua | 171 +++ tools/luajson/json/decode/composite.lua | 190 +++ tools/luajson/json/decode/number.lua | 100 ++ tools/luajson/json/decode/others.lua | 62 + tools/luajson/json/decode/state.lua | 189 +++ tools/luajson/json/decode/strings.lua | 133 ++ tools/luajson/json/decode/util.lua | 121 ++ tools/luajson/json/encode.lua | 161 ++ tools/luajson/json/encode/array.lua | 110 ++ tools/luajson/json/encode/calls.lua | 68 + tools/luajson/json/encode/number.lua | 58 + tools/luajson/json/encode/object.lua | 77 + tools/luajson/json/encode/others.lua | 66 + tools/luajson/json/encode/output.lua | 91 ++ tools/luajson/json/encode/output_utility.lua | 54 + tools/luajson/json/encode/strings.lua | 88 ++ tools/luajson/json/util.lua | 152 ++ tools/luajson/test_json.lua | 95 ++ 29 files changed, 6833 insertions(+), 3 deletions(-) create mode 100644 tools/detect_missing_strings.lua create mode 100644 tools/lpeg/lpcap.lua create mode 100644 tools/lpeg/lpcode.lua create mode 100644 tools/lpeg/lpeg.lua create mode 100644 tools/lpeg/lpprint.lua create mode 100644 tools/lpeg/lpvm.lua create mode 100644 tools/lpeg/re.lua create mode 100644 tools/luajson/json.lua create mode 100644 tools/luajson/json/decode.lua create mode 100644 tools/luajson/json/decode/composite.lua create mode 100644 tools/luajson/json/decode/number.lua create mode 100644 tools/luajson/json/decode/others.lua create mode 100644 tools/luajson/json/decode/state.lua create mode 100644 tools/luajson/json/decode/strings.lua create mode 100644 tools/luajson/json/decode/util.lua create mode 100644 tools/luajson/json/encode.lua create mode 100644 tools/luajson/json/encode/array.lua create mode 100644 tools/luajson/json/encode/calls.lua create mode 100644 tools/luajson/json/encode/number.lua create mode 100644 tools/luajson/json/encode/object.lua create mode 100644 tools/luajson/json/encode/others.lua create mode 100644 tools/luajson/json/encode/output.lua create mode 100644 tools/luajson/json/encode/output_utility.lua create mode 100644 tools/luajson/json/encode/strings.lua create mode 100644 tools/luajson/json/util.lua create mode 100644 tools/luajson/test_json.lua diff --git a/changelog.md b/changelog.md index 02a8feb..342626b 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,8 @@ +# 0.21.0.0 +# 0.21.1.0 + +Alignement sur SWADE v1.1.X et traductions de clés manquantes + # 0.21.0.0 Alignement sur SWADE v1.1.X diff --git a/fr.json b/fr.json index 427b408..d530b10 100644 --- a/fr.json +++ b/fr.json @@ -18,6 +18,8 @@ "ITEM.TypeSkill": "Compétence", "ITEM.TypePower": "Pouvoir", "ITEM.TypeAbility": "Capacité", + "CARD.TypeAdventure":" Cartes d'aventure", + "CARD.TypePoker":" Cartes de Poker", "SWADE.Name": "Nom", "SWADE.Tough": "Résistance", "SWADE.Race": "Espèce", @@ -306,6 +308,39 @@ "SWADE.Crew": "Équipage requis", "SWADE.Passengers": "Passagers", + "SWADE.ActAfterCurrentCombatant":"Agir après le combattant actuel", + "SWADE.ActNow": "Agir maintenant", + "SWADE.ActionDeckPresetDark":"Cartes d'action (Sombre)", + "SWADE.ActionDeckPresetLight":" Action Deck (Light)", + "SWADE.ActionDeckReset":" Reset Action Deck", + "SWADE.ActionDeckResetNotification":"Réinitialise les cartes d'action", + "SWADE.ActivateArcaneDevice":"Activer l'objet", + "SWADE.AddTokenFollowers":"Ajouter les tokens sélectionnés comme suivants", + "SWADE.AnimalSmartsMarker":"Un", + "SWADE.ArcaneDevice":"Objet Arcanique", + "SWADE.ArcaneSkill":"Compétence d'arcane", + "SWADE.Archetype":"Archétype", + "SWADE.ArchetypeAbilities":"Capacités d'archetype", + "SWADE.ButtonReset":"Ré-initialisation", + "SWADE.ButtonSubmit":"Soumettre", + "SWADE.ConvictionActivate":"Faire appel à leur convictin!", + "SWADE.CoreSkill":"Compétence de base", + "SWADE.CoreSkills":"Compétences de base", + "SWADE.CoreSkillsDesc":" >-", + "SWADE.DrawInit":"Lancer l'initiative", + "SWADE.Follow":"Suivre {name}", + "SWADE.GroupByName":"Grouper par nom", + "SWADE.Hold":"Conserver", + "SWADE.LoseTurn":"Toggle Lose Turn", + "SWADE.MakeGroupLeader":"Faire un Chef de groupe", + "SWADE.PPCost":"Coût en PP", + "SWADE.RemoveGroupLeader":"Supprimer le chef de groupe", + "SWADE.SelectColor":"Selectionner une couleur", + "SWADE.SetGroupColor":"Choisir une couleur de groupe", + "SWADE.Unfollow":"Arrêter de suivre {name}", + "SWADE.WeaponsAndArmor":"Armes & Armuress", + "SWADE.WildCard":"Joker", + "SWADE.String": "Texte", "SWADE.Number": "Nombre", "SWADE.Checkbox": "Checkbox", @@ -465,6 +500,7 @@ "SWADE.Background": "Background", "SWADE.ActiveEffects.Add": "Ajouter un Active Effect", "SWADE.ActiveEffects.Source": "Source", + "SWADE.Advances.Number": "Numéro d'avancement", "Actor": "Acteur", "Vehicles": "Véhicules", @@ -506,5 +542,6 @@ "6. Bestiary": "6. Bestiaire", "7. Game Mastering": "6. Conseils aux Maîtres de Jeu", "Character Creation": "Création de Personnages", - "The Adventure toolkit": "Boite à Outils des Aventures" + "The Adventure toolkit": "Boite à Outils des Aventures", + "name":" Portée" } diff --git a/module.json b/module.json index 600ed25..1a67696 100644 --- a/module.json +++ b/module.json @@ -2,7 +2,7 @@ "name": "swade-fr", "title": "SWADE - Traduction française", "description": "Ajoute le français (FRANCE) au système SWADE.

Une traduction Babele des compendiums est inclue mais optionnelle. (Encore en test, à utiliser à vos risques et périls !)

*** Join the official Discord server: Official Discord

*** Rejoignez la communauté Francophone: Discord francophone

", - "version": "0.21.0.0", + "version": "0.21.1.0", "minimumCoreVersion" : "0.7.9", "compatibleCoreVersion" : "9", "author": "BoboursToutCool, Gronyon, Kyane, LeRatierBretonnien, Sasmira, U~man,X.O. de Vorcen", @@ -65,5 +65,5 @@ "url": "https://www.uberwald.me/gitea/public//foundryvtt-swade-fr", "manifest": "https://www.uberwald.me/gitea/public/foundryvtt-swade-fr/raw/branch/master/module.json", - "download": "https://www.uberwald.me/gitea/public/foundryvtt-swade-fr/archive/foundryvtt-swade-fr-0.21.0.0.zip" + "download": "https://www.uberwald.me/gitea/public/foundryvtt-swade-fr/archive/foundryvtt-swade-fr-0.21.1.0.zip" } \ No newline at end of file diff --git a/tools/detect_missing_strings.lua b/tools/detect_missing_strings.lua new file mode 100644 index 0000000..34f22a0 --- /dev/null +++ b/tools/detect_missing_strings.lua @@ -0,0 +1,39 @@ +package.path = package.path .. ";luajson/?.lua" +local JSON = require"json" + +local enjsonf = "../../swade/src/lang/en.yml" +local frjsonf = "../fr.json" + +local fp = io.open(enjsonf, "r") +local line = fp:read() +local entags = {} +while line do + --print("LINE", line) + local key, value = line:match("([%w%.]*):([>%-%+%p%s%w%d%.]*)" ) + if key then + entags[key] = value + end + line = fp:read() +end +fp:close() + +fp = io.open(frjsonf, "r") +local frtags = JSON.decode( fp:read("*a") ) +fp:close() + +local todisplay = {} +for tag, value in pairs(entags) do + if not frtags[tag] then + todisplay[#todisplay+1] = { tag=tag, value=value } + end +end + +table.sort(todisplay, function (a, b) + return a.tag < b.tag + end +) +for _, tagDef in pairs(todisplay) do + print('"'.. tagDef.tag ..'":"'.. tagDef.value..'",') +end + + diff --git a/tools/lpeg/lpcap.lua b/tools/lpeg/lpcap.lua new file mode 100644 index 0000000..06fbee8 --- /dev/null +++ b/tools/lpeg/lpcap.lua @@ -0,0 +1,625 @@ +--[[ +LPEGLJ +lpcap.lua +Capture functions +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] +local ffi = require "ffi" + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local MAXSTRCAPS = 10 + +local pushcapture +local addonestring + + +-- Goes back in a list of captures looking for an open capture +-- corresponding to a close + +local function findopen(cs, index) + local n = 0; -- number of closes waiting an open + while true do + index = index - 1 + if cs.ocap[index].kind == Cclose then + n = n + 1 -- one more open to skip + elseif cs.ocap[index].siz == 0 then + if n == 0 then + return index + end + n = n - 1 + end + end +end + + +local function checknextcap(cs, captop) + local cap = cs.cap; + -- not a single capture? ((cap)->siz != 0) + if cs.ocap[cap].siz == 0 then + local n = 0; -- number of opens waiting a close + -- look for corresponding close + while true do + cap = cap + 1 + if cap > captop then return end + if cs.ocap[cap].kind == Cclose then + n = n - 1 + if n + 1 == 0 then + break; + end + elseif cs.ocap[cap].siz == 0 then + n = n + 1 + end + end + end + cap = cap + 1; -- + 1 to skip last close (or entire single capture) + if cap > captop then return end + return true +end + + +-- Go to the next capture + +local function nextcap(cs) + local cap = cs.cap; + -- not a single capture? ((cap)->siz != 0) + if cs.ocap[cap].siz == 0 then + local n = 0; -- number of opens waiting a close + -- look for corresponding close + while true do + cap = cap + 1 + if cs.ocap[cap].kind == Cclose then + n = n - 1 + if n + 1 == 0 then + break; + end + elseif cs.ocap[cap].siz == 0 then + n = n + 1 + end + end + end + cs.cap = cap + 1; -- + 1 to skip last close (or entire single capture) +end + + +-- Push on the Lua stack all values generated by nested captures inside +-- the current capture. Returns number of values pushed. 'addextra' +-- makes it push the entire match after all captured values. The +-- entire match is pushed also if there are no other nested values, +-- so the function never returns zero. + +local function pushnestedvalues(cs, addextra, out, valuetable) + local co = cs.cap + cs.cap = cs.cap + 1 + -- no nested captures? + if cs.ocap[cs.cap - 1].siz ~= 0 then + local st = cs.ocap[co].s + local l = cs.ocap[co].siz - 1 + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1) + return 1; -- that is it + else + local n = 0; + while cs.ocap[cs.cap].kind ~= Cclose do -- repeat for all nested patterns + n = n + pushcapture(cs, out, valuetable); + end + -- need extra? + if addextra or n == 0 then + local st = cs.ocap[co].s + local l = cs.ocap[cs.cap].s - cs.ocap[co].s + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1) + n = n + 1 + end + cs.cap = cs.cap + 1 -- skip close entry + return n; + end +end + + +-- Push only the first value generated by nested captures + +local function pushonenestedvalue(cs, out, valuetable) + local n = pushnestedvalues(cs, false, out, valuetable) + for i = n, 2, -1 do + out.out[out.outindex] = nil + out.outindex = out.outindex - 1 + end +end + + +-- Try to find a named group capture with the name given at the top of +-- the stack; goes backward from 'cap'. + +local function findback(cs, cap, name, valuetable) + -- repeat until end of list + while cap > 0 do + cap = cap - 1 + local continue + if cs.ocap[cap].kind == Cclose then + cap = findopen(cs, cap); -- skip nested captures + elseif cs.ocap[cap].siz == 0 then + continue = true -- opening an enclosing capture: skip and get previous + end + if not continue and cs.ocap[cap].kind == Cgroup and cs.ocap[cap].idx ~= 0 then + local gname = valuetable[cs.ocap[cap].idx] -- get group name + -- right group? + if name == gname then + return cap; + end + end + end + error(("back reference '%s' not found"):format(name), 0) +end + + +-- Back-reference capture. Return number of values pushed. + +local function backrefcap(cs, out, valuetable) + local curr = cs.cap; + local name = valuetable[cs.ocap[cs.cap].idx] -- reference name + cs.cap = findback(cs, curr, name, valuetable) -- find corresponding group + local n = pushnestedvalues(cs, false, out, valuetable); -- push group's values + cs.cap = curr + 1; + return n; +end + + +-- Table capture: creates a new table and populates it with nested +-- captures. + +local function tablecap(cs, out, valuetable) + local n = 0; + local t = {} + cs.cap = cs.cap + 1 + -- table is empty + if cs.ocap[cs.cap - 1].siz == 0 then + while cs.ocap[cs.cap].kind ~= Cclose do + local subout = { outindex = 0, out = {} } + -- named group? + if cs.ocap[cs.cap].kind == Cgroup and cs.ocap[cs.cap].idx ~= 0 then + local groupname = valuetable[cs.ocap[cs.cap].idx] -- push group name + pushonenestedvalue(cs, subout, valuetable) + t[groupname] = subout.out[1] + else + -- not a named group + local k = pushcapture(cs, subout, valuetable) + -- store all values into table + for i = 1, subout.outindex do + t[i + n] = subout.out[i] + end + n = n + k; + end + end + cs.cap = cs.cap + 1 -- skip close entry + end + out.outindex = out.outindex + 1 + out.out[out.outindex] = t + return 1; -- number of values pushed (only the table) +end + + +-- Table-query capture + +local function querycap(cs, out, valuetable) + local table = valuetable[cs.ocap[cs.cap].idx] + local subout = { outindex = 0, out = {} } + pushonenestedvalue(cs, subout, valuetable) -- get nested capture + -- query cap. value at table + if table[subout.out[1]] ~= nil then + out.outindex = out.outindex + 1 + out.out[out.outindex] = table[subout.out[1]] + return 1 + end + return 0 +end + + +-- Fold capture + +local function foldcap(cs, out, valuetable) + local fce = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + -- no nested captures? + -- or no nested captures (large subject)? + if cs.ocap[cs.cap - 1].siz ~= 0 or + cs.ocap[cs.cap].kind == Cclose then + error("no initial value for fold capture", 0); + end + local subout = { outindex = 0; out = {} } + local n = pushcapture(cs, subout, valuetable) -- nested captures with no values? + if n == 0 then + error("no initial value for fold capture", 0); + end + local acumulator = subout.out[1] -- leave only one result for accumulator + while cs.ocap[cs.cap].kind ~= Cclose do + local subout = { outindex = 0; out = {} } + n = pushcapture(cs, subout, valuetable); -- get next capture's values + acumulator = fce(acumulator, unpack(subout.out, 1, subout.outindex)) -- call folding function + end + cs.cap = cs.cap + 1; -- skip close entry + out.outindex = out.outindex + 1 + out.out[out.outindex] = acumulator + return 1; -- only accumulator left on the stack +end + + +local function retcount(...) + return select('#', ...), { ... } +end + + +-- Function capture + +local function functioncap(cs, out, valuetable) + local fce = valuetable[cs.ocap[cs.cap].idx] -- push function + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures + local count, ret = retcount(fce(unpack(subout.out, 1, n))) -- call function + for i = 1, count do + out.outindex = out.outindex + 1 + out.out[out.outindex] = ret[i] + end + return count +end + + +-- Select capture + +local function numcap(cs, out, valuetable) + local idx = valuetable[cs.ocap[cs.cap].idx] -- value to select + -- no values? + if idx == 0 then + nextcap(cs); -- skip entire capture + return 0; -- no value produced + else + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable) + -- invalid index? + if n < idx then + error(("no capture '%d'"):format(idx), 0) + else + out.outindex = out.outindex + 1 + out.out[out.outindex] = subout.out[idx] -- get selected capture + return 1; + end + end +end + + +-- Calls a runtime capture. Returns number of captures removed by +-- the call, including the initial Cgroup. (Captures to be added are +-- on the Lua stack.) + +local function runtimecap(cs, close, s, out, valuetable) + local open = findopen(cs, close) + assert(cs.ocap[open].kind == Cgroup) + cs.ocap[close].kind = Cclose; -- closes the group + cs.ocap[close].s = s; + cs.cap = open; + local fce = valuetable[cs.ocap[cs.cap].idx] -- push function to be called + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures + local count, ret = retcount(fce(cs.s or cs.stream, s, unpack(subout.out, 1, n))) -- call dynamic function + for i = 1, count do + out.outindex = out.outindex + 1 + out.out[out.outindex] = ret[i] + end + return close - open -- number of captures of all kinds removed +end + +-- Collect values from current capture into array 'cps'. Current +-- capture must be Cstring (first call) or Csimple (recursive calls). +-- (In first call, fills %0 with whole match for Cstring.) +-- Returns number of elements in the array that were filled. + +local function getstrcaps(cs, cps, n) + local k = n + n = n + 1 + cps[k + 1].isstring = true; -- get string value + cps[k + 1].startstr = cs.ocap[cs.cap].s; -- starts here + cs.cap = cs.cap + 1 + -- nested captures? + if cs.ocap[cs.cap - 1].siz == 0 then + -- traverse them + while cs.ocap[cs.cap].kind ~= Cclose do + -- too many captures? + if n >= MAXSTRCAPS then + nextcap(cs); -- skip extra captures (will not need them) + elseif cs.ocap[cs.cap].kind == Csimple then + -- string? + n = getstrcaps(cs, cps, n); -- put info. into array + else + cps[n + 1].isstring = false; -- not a string + cps[n + 1].origcap = cs.cap; -- keep original capture + nextcap(cs); + n = n + 1; + end + end + cs.cap = cs.cap + 1 -- skip close + end + cps[k + 1].endstr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1 -- ends here + return n; +end + + +-- add next capture value (which should be a string) to buffer 'b' + +-- String capture: add result to buffer 'b' (instead of pushing +-- it into the stack) + +local function stringcap(cs, b, valuetable) + local cps = {} + for i = 1, MAXSTRCAPS do + cps[#cps + 1] = {} + end + local fmt = valuetable[cs.ocap[cs.cap].idx] + local n = getstrcaps(cs, cps, 0) - 1; -- collect nested captures + local i = 1 + -- traverse them + while i <= #fmt do + local c = fmt:sub(i, i) + -- not an escape? + if c ~= '%' then + b[#b + 1] = c -- add it to buffer + elseif fmt:sub(i + 1, i + 1) < '0' or fmt:sub(i + 1, i + 1) > '9' then + -- not followed by a digit? + i = i + 1 + b[#b + 1] = fmt:sub(i, i) + else + i = i + 1 + local l = fmt:sub(i, i) - '0'; -- capture index + if l > n then + error(("invalid capture index (%d)"):format(l), 0) + elseif cps[l + 1].isstring then + b[#b + 1] = cs.s and cs.s:sub(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) or + cs.stream(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) + else + local curr = cs.cap; + cs.cap = cps[l + 1].origcap; -- go back to evaluate that nested capture + if not addonestring(cs, b, "capture", valuetable) then + error(("no values in capture index %d"):format(l), 0) + end + cs.cap = curr; -- continue from where it stopped + end + end + i = i + 1 + end +end + + +-- Substitution capture: add result to buffer 'b' + +local function substcap(cs, b, valuetable) + local curr = cs.ocap[cs.cap].s; + -- no nested captures? + if cs.ocap[cs.cap].siz ~= 0 then + -- keep original text + b[#b + 1] = cs.s and cs.s:sub(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) or + cs.stream(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) + else + cs.cap = cs.cap + 1 -- skip open entry + -- traverse nested captures + while cs.ocap[cs.cap].kind ~= Cclose do + local next = cs.ocap[cs.cap].s; + b[#b + 1] = cs.s and cs.s:sub(curr, next - curr + curr - 1) or + cs.stream(curr, next - curr + curr - 1) -- add text up to capture + if addonestring(cs, b, "replacement", valuetable) then + curr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1; -- continue after match + else + -- no capture value + curr = next; -- keep original text in final result + end + end + b[#b + 1] = cs.s and cs.s:sub(curr, curr + cs.ocap[cs.cap].s - curr - 1) or + cs.stream(curr, curr + cs.ocap[cs.cap].s - curr - 1) -- add last piece of text + end + cs.cap = cs.cap + 1 -- go to next capture +end + + +-- Evaluates a capture and adds its first value to buffer 'b'; returns +-- whether there was a value + +function addonestring(cs, b, what, valuetable) + local tag = cs.ocap[cs.cap].kind + if tag == Cstring then + stringcap(cs, b, valuetable); -- add capture directly to buffer + return 1 + elseif tag == Csubst then + substcap(cs, b, valuetable); -- add capture directly to buffer + return 1 + else + local subout = { outindex = 0, out = {} } + local n = pushcapture(cs, subout, valuetable); + if n > 0 then + if type(subout.out[1]) ~= 'string' and type(subout.out[1]) ~= 'number' then + error(("invalid %s value (a %s)"):format(what, type(subout.out[1])), 0) + end + b[#b + 1] = subout.out[1] + return n + end + end +end + + +-- Push all values of the current capture into the stack; returns +-- number of values pushed + +function pushcapture(cs, out, valuetable) + local type = cs.ocap[cs.cap].kind + if type == Cposition then + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.ocap[cs.cap].s + cs.cap = cs.cap + 1; + return 1; + elseif type == Cconst then + out.outindex = out.outindex + 1 + out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + return 1; + elseif type == Carg then + local arg = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + if arg > cs.ptopcount then + error(("reference to absent extra argument #%d"):format(arg), 0) + end + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.ptop[arg] + return 1; + elseif type == Csimple then + local k = pushnestedvalues(cs, true, out, valuetable) + local index = out.outindex + table.insert(out.out, index - k + 1, out.out[index]) + out[index + 1] = nil + return k; + elseif type == Cruntime then + out.outindex = out.outindex + 1 + out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1; + return 1; + elseif type == Cstring then + local b = {} + stringcap(cs, b, valuetable) + out.outindex = out.outindex + 1 + out.out[out.outindex] = table.concat(b) + return 1; + elseif type == Csubst then + local b = {} + substcap(cs, b, valuetable); + out.outindex = out.outindex + 1 + out.out[out.outindex] = table.concat(b) + return 1; + elseif type == Cgroup then + -- anonymous group? + if cs.ocap[cs.cap].idx == 0 then + return pushnestedvalues(cs, false, out, valuetable); -- add all nested values + else + -- named group: add no values + nextcap(cs); -- skip capture + return 0 + end + elseif type == Cbackref then + return backrefcap(cs, out, valuetable) + elseif type == Ctable then + return tablecap(cs, out, valuetable) + elseif type == Cfunction then + return functioncap(cs, out, valuetable) + elseif type == Cnum then + return numcap(cs, out, valuetable) + elseif type == Cquery then + return querycap(cs, out, valuetable) + elseif type == Cfold then + return foldcap(cs, out, valuetable) + else + assert(false) + end +end + + +-- Prepare a CapState structure and traverse the entire list of +-- captures in the stack pushing its results. 's' is the subject +-- string, 'r' is the final position of the match, and 'ptop' +-- the index in the stack where some useful values were pushed. +-- Returns the number of results pushed. (If the list produces no +-- results, push the final position of the match.) + +local function getcaptures(capture, s, stream, r, valuetable, ...) + local n = 0; + local cs = { cap = 0 } + local out = { outindex = 0; out = {} } + -- is there any capture? + if capture[cs.cap].kind ~= Cclose then + cs.ocap = capture + cs.s = s; + cs.stream = stream + cs.ptopcount, cs.ptop = retcount(...) + repeat -- collect their values + n = n + pushcapture(cs, out, valuetable) + until cs.ocap[cs.cap].kind == Cclose + end + -- no capture values? + if n == 0 then + if not r then + return + else + return r + end + end + assert(out.outindex < 7998, "(too many captures)") + return unpack(out.out, 1, out.outindex) +end + +local function getcapturesruntime(capture, s, stream, notdelete, min, max, captop, valuetable, ...) + local n = 0; + local cs = { cap = min } + local out = { outindex = 0; out = {} } + cs.ocap = capture + cs.s = s + cs.stream = stream + cs.ptopcount, cs.ptop = retcount(...) + local start = 0 + repeat -- collect their values + if not checknextcap(cs, max) then break end + local notdelete = notdelete or capture[cs.cap].kind == Cgroup and capture[cs.cap].idx ~= 0 and capture[cs.cap].candelete == 0 + pushcapture(cs, out, valuetable) + if notdelete then + start = cs.cap + else + n = n + cs.cap - start + for i = 0, captop - cs.cap - 1 do + ffi.copy(capture + start + i, capture + cs.cap + i, ffi.sizeof('CAPTURE')) + end + max = max - (cs.cap - start) + captop = captop - (cs.cap - start) + cs.cap = start + end + until cs.cap == max + assert(out.outindex < 7998, "(too many captures)") + return n, out.out, out.outindex +end + +return { + getcaptures = getcaptures, + runtimecap = runtimecap, + getcapturesruntime = getcapturesruntime, +} + diff --git a/tools/lpeg/lpcode.lua b/tools/lpeg/lpcode.lua new file mode 100644 index 0000000..bd0639f --- /dev/null +++ b/tools/lpeg/lpcode.lua @@ -0,0 +1,1057 @@ +--[[ +LPEGLJ +lpcode.lua +Generating code from tree +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] +local ffi = require "ffi" +require "lpvm" + +local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + + +local PEnullable = 0 +local PEnofail = 1 +local RuleLR = 0x10000 +local NOINST = -2 + + +local MAXBEHINDPREDICATE = 255 +local MAXRULES = 200 +local MAXOFF = 0xF + +-- number of siblings for each tree +local numsiblings = { + 0, 0, 0, -- char, set, any + 0, 0, -- true, false + 1, -- rep + 2, 2, -- seq, choice + 1, 1, -- not, and + 0, 0, 2, 1, -- call, opencall, rule, grammar + 1, -- behind + 1, 1 -- capture, runtime capture +} + + +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') +local fullset = settype(-1, -1, -1, -1, -1, -1, -1, -1) + +-- {====================================================== +-- Analysis and some optimizations +-- ======================================================= + +local codegen + + +-- Check whether a charset is empty (IFail), singleton (IChar), +-- full (IAny), or none of those (ISet). + +local function charsettype(cs) + local count = 0; + local candidate = -1; -- candidate position for a char + for i = 0, 8 - 1 do + local b = cs[i]; + if b == 0 then + if count > 1 then + return ISet; -- else set is still empty + end + elseif b == -1 then + if count < (i * 32) then + return ISet; + else + count = count + 32; -- set is still full + end + -- byte has only one bit? + elseif band(b, (b - 1)) == 0 then + if count > 0 then + return ISet; -- set is neither full nor empty + -- set has only one char till now; track it + else + count = count + 1; + candidate = i; + end + else + return ISet; -- byte is neither empty, full, nor singleton + end + end + if count == 0 then + return IFail, 0 -- empty set + -- singleton; find character bit inside byte + elseif count == 1 then + local b = cs[candidate]; + local c = candidate * 32; + for i = 1, 32 do + if b == 1 then + c = c + i - 1 + break + end + b = rshift(b, 1) + end + return IChar, c + elseif count == 256 then + return IAny, 0 -- full set + else + assert(false) -- should have returned by now + end +end + + +-- A few basic operations on Charsets + +local function cs_complement(cs) + for i = 0, 8 - 1 do + cs[i] = bnot(cs[i]) + end +end + + +local function cs_equal(cs1, cs2) + for i = 0, 8 - 1 do + if cs1[i] ~= cs2[i] then + return + end + end + return true +end + + +-- computes whether sets st1 and st2 are disjoint + +local function cs_disjoint(st1, st2) + for i = 0, 8 - 1 do + if band(st1[i], st2[i]) ~= 0 then + return + end + end + return true +end + + +-- Convert a 'char' pattern (TSet, TChar, TAny) to a charset + +local function tocharset(tree, index, valuetable) + local val = settype() + if tree.p[index].tag == TSet then + ffi.copy(val, valuetable[tree.p[index].val], ffi.sizeof(val)) + return val + elseif tree.p[index].tag == TChar then + local b = tree.p[index].val + -- only one char + -- add that one + val[rshift(b, 5)] = lshift(1, band(b, 31)) + return val + elseif tree.p[index].tag == TAny then + ffi.fill(val, ffi.sizeof(val), 0xff) + return val + end +end + + +-- checks whether a pattern has captures + +local function hascaptures(tree, index) + if tree.p[index].tag == TCapture or tree.p[index].tag == TRunTime then + return true + elseif tree.p[index].tag == TCall then + return hascaptures(tree, index + tree.p[index].ps) + else + local ns = numsiblings[tree.p[index].tag + 1] + if ns == 0 then + return + elseif ns == 1 then + return hascaptures(tree, index + 1) + elseif ns == 2 then + if hascaptures(tree, index + 1) then + return true + else + return hascaptures(tree, index + tree.p[index].ps) + end + else + assert(false) + end + end +end + + +-- Checks how a pattern behaves regarding the empty string, +-- in one of two different ways: +-- A pattern is *nullable* if it can match without consuming any character; +-- A pattern is *nofail* if it never fails for any string +-- (including the empty string). +-- The difference is only for predicates; for patterns without +-- predicates, the two properties are equivalent. +-- (With predicates, &'a' is nullable but not nofail. Of course, +-- nofail => nullable.) +-- These functions are all convervative in the following way: +-- p is nullable => nullable(p) +-- nofail(p) => p cannot fail +-- (The function assumes that TOpenCall and TRunTime are not nullable: +-- TOpenCall must be checked again when the grammar is fixed; +-- TRunTime is an arbitrary choice.) + +local function checkaux(tree, pred, index, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or + tag == TFalse or tag == TOpenCall then + return -- not nullable + elseif tag == TRep or tag == TTrue then + return true -- no fail + elseif tag == TNot or tag == TBehind then + -- can match empty, but may fail + if pred == PEnofail then + return + else + return true -- PEnullable + end + elseif tag == TAnd then + -- can match empty; fail iff body does + if pred == PEnullable then + return true + else + return checkaux(tree, pred, index + 1, lrcall) + end + -- can fail; match empty iff body does + elseif tag == TRunTime then + if pred == PEnofail then + return + else + return checkaux(tree, pred, index + 1, lrcall) + end + elseif tag == TSeq then + if not checkaux(tree, pred, index + 1, lrcall) then + return + else + return checkaux(tree, pred, index + tree.p[index].ps, lrcall) + end + elseif tag == TChoice then + if checkaux(tree, pred, index + tree.p[index].ps, lrcall) then + return true + else + return checkaux(tree, pred, index + 1, lrcall) + end + elseif tag == TCapture or tag == TGrammar or tag == TRule then + return checkaux(tree, pred, index + 1, lrcall) + elseif tag == TCall then + --left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return + end + lrcall[lr] = true + end + return checkaux(tree, pred, index + tree.p[index].ps, lrcall) + else + assert(false) + end +end + + +-- number of characters to match a pattern (or -1 if variable) +-- ('count' avoids infinite loops for grammars) + +local function fixedlenx(tree, count, len, index) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny then + return len + 1; + elseif tag == TFalse or tag == TTrue or tag == TNot or tag == TAnd or tag == TBehind then + return len; + elseif tag == TRep or tag == TRunTime or tag == TOpenCall then + return -1; + elseif tag == TCapture or tag == TRule or tag == TGrammar then + return fixedlenx(tree, count, len, index + 1) + elseif tag == TCall then + if count >= MAXRULES then + return -1; -- may be a loop + else + return fixedlenx(tree, count + 1, len, index + tree.p[index].ps) + end + elseif tag == TSeq then + len = fixedlenx(tree, count, len, index + 1) + if (len < 0) then + return -1; + else + return fixedlenx(tree, count, len, index + tree.p[index].ps) + end + elseif tag == TChoice then + local n1 = fixedlenx(tree, count, len, index + 1) + if n1 < 0 then return -1 end + local n2 = fixedlenx(tree, count, len, index + tree.p[index].ps) + if n1 == n2 then + return n1 + else + return -1 + end + else + assert(false) + end +end + + +-- Computes the 'first set' of a pattern. +-- The result is a conservative aproximation: +-- match p ax -> x' for some x ==> a in first(p). +-- match p '' -> '' ==> returns 1. +-- The set 'follow' is the first set of what follows the +-- pattern (full set if nothing follows it) + +local function getfirst(tree, follow, index, valuetable, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny then + local firstset = tocharset(tree, index, valuetable) + return 0, firstset + elseif tag == TTrue then + local firstset = settype() + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return 1, firstset + elseif tag == TFalse then + local firstset = settype() + return 0, firstset + elseif tag == TChoice then + local e1, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = bor(firstset[i], csaux[i]) + end + return bor(e1, e2), firstset + elseif tag == TSeq then + if not checkaux(tree, PEnullable, index + 1) then + return getfirst(tree, fullset, index + 1, valuetable, lrcall) + -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) + else + local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + local e1, firstset = getfirst(tree, csaux, index + 1, valuetable, lrcall) + if e1 == 0 then -- 'e1' ensures that first can be used + return 0, firstset + -- one of the children has a matchtime? + elseif band(bor(e1, e2), 2) == 2 then + return 2, firstset -- pattern has a matchtime capture + else + return e2, firstset -- else depends on 'e2' + end + end + elseif tag == TRep then + local _, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = bor(firstset[i], follow[i]) + end + return 1, firstset -- accept the empty string + elseif tag == TCapture or tag == TGrammar or tag == TRule then + return getfirst(tree, follow, index + 1, valuetable, lrcall) + -- function invalidates any follow info. + elseif tag == TRunTime then + local e, firstset = getfirst(tree, fullset, index + 1, valuetable, lrcall) + if e ~= 0 then + return 2, firstset -- function is not "protected"? + else + return 0, firstset -- pattern inside capture ensures first can be used + end + elseif tag == TCall then + -- left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return 0, settype() + else + lrcall[lr] = true + end + end + return getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + elseif tag == TAnd then + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = band(firstset[i], follow[i]) + end + return e, firstset + elseif tag == TNot then + local firstset = tocharset(tree, index + 1, valuetable) + if firstset then + cs_complement(firstset) + return 1, firstset + end + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return bor(e, 1), firstset -- always can accept the empty string + -- instruction gives no new information + elseif tag == TBehind then + -- call 'getfirst' to check for math-time captures + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return bor(e, 1), firstset -- always can accept the empty string + else + assert(false) + end +end + + +-- If it returns true, then pattern can fail only depending on the next +-- character of the subject + +local function headfail(tree, index, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then + return true + elseif tag == TTrue or tag == TRep or tag == TRunTime or tag == TNot or tag == TBehind then + return + elseif tag == TCapture or tag == TGrammar or tag == TRule or tag == TAnd then + return headfail(tree, index + 1, lrcall) + elseif tag == TCall then + -- left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return true + else + lrcall[lr] = true + end + end + return headfail(tree, index + tree.p[index].ps, lrcall) + elseif tag == TSeq then + if not checkaux(tree, PEnofail, index + tree.p[index].ps) then + return + else + return headfail(tree, index + 1, lrcall) + end + elseif tag == TChoice then + if not headfail(tree, index + 1, lrcall) then + return + else + return headfail(tree, index + tree.p[index].ps, lrcall) + end + else + assert(false) + end +end + + +-- Check whether the code generation for the given tree can benefit +-- from a follow set (to avoid computing the follow set when it is +-- not needed) + +local function needfollow(tree, index) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse or tag == TTrue or tag == TAnd or tag == TNot or + tag == TRunTime or tag == TGrammar or tag == TCall or tag == TBehind then + return + elseif tag == TChoice or tag == TRep then + return true + elseif tag == TCapture then + return needfollow(tree, index + 1) + elseif tag == TSeq then + return needfollow(tree, index + tree.p[index].ps) + else + assert(false) + end +end + +-- ====================================================== + + +-- {====================================================== +-- Code generation +-- ======================================================= + + +-- code generation is recursive; 'opt' indicates that the code is +-- being generated under a 'IChoice' operator jumping to its end. +-- 'tt' points to a previous test protecting this code. 'fl' is +-- the follow set of the pattern. + + +local function addinstruction(code, op, val) + local size = code.size + if size >= code.allocsize then + code:doublesize() + end + code.p[size].code = op + code.p[size].val = val + code.size = size + 1 + return size +end + + +local function setoffset(code, instruction, offset) + code.p[instruction].offset = offset; +end + + +-- Add a capture instruction: +-- 'op' is the capture instruction; 'cap' the capture kind; +-- 'key' the key into ktable; 'aux' is optional offset + +local function addinstcap(code, op, cap, key, aux) + local i = addinstruction(code, op, bor(cap, lshift(aux, 4))) + setoffset(code, i, key) + return i +end + + +local function jumptothere(code, instruction, target) + if instruction >= 0 then + setoffset(code, instruction, target - instruction) + end +end + + +local function jumptohere(code, instruction) + jumptothere(code, instruction, code.size) +end + + +-- Code an IChar instruction, or IAny if there is an equivalent +-- test dominating it + +local function codechar(code, c, tt) + assert(tt ~= -1) + if tt >= 0 and code.p[tt].code == ITestChar and + code.p[tt].val == c then + addinstruction(code, IAny, 0) + else + addinstruction(code, IChar, c) + end +end + + +-- Code an ISet instruction + +local function coderealcharset(code, cs, valuetable) + local ind = #valuetable + 1 + valuetable[ind] = cs + return addinstruction(code, ISet, ind) +end + + +-- code a char set, optimizing unit sets for IChar, "complete" +-- sets for IAny, and empty sets for IFail; also use an IAny +-- when instruction is dominated by an equivalent test. + +local function codecharset(code, cs, tt, valuetable) + local op, c = charsettype(cs) + if op == IChar then + codechar(code, c, tt) + elseif op == ISet then + assert(tt ~= -1) + if tt >= 0 and code.p[tt].code == ITestSet and + cs_equal(cs, valuetable[code.p[tt].val]) then + addinstruction(code, IAny, 0) + else + coderealcharset(code, cs, valuetable) + end + else + addinstruction(code, op, c) + end +end + + +-- code a test set, optimizing unit sets for ITestChar, "complete" +-- sets for ITestAny, and empty sets for IJmp (always fails). +-- 'e' is true iff test should accept the empty string. (Test +-- instructions in the current VM never accept the empty string.) + +local function codetestset(code, cs, e, valuetable) + if e ~= 0 then + return NOINST -- no test + else + local pos = code.size + codecharset(code, cs, NOINST, valuetable) + local inst = code.p[pos] + local code = inst.code + if code == IFail then + inst.code = IJmp -- always jump + elseif code == IAny then + inst.code = ITestAny + elseif code == IChar then + inst.code = ITestChar + elseif code == ISet then + inst.code = ITestSet + else + assert(false) + end + return pos + end +end + + +-- Find the final destination of a sequence of jumps + +local function finaltarget(code, i) + while code.p[i].code == IJmp do + i = i + code.p[i].offset + end + return i +end + + +-- final label (after traversing any jumps) + +local function finallabel(code, i) + return finaltarget(code, i + code.p[i].offset) +end + +-- == behind n;

(where n = fixedlen(p)) + +local function codebehind(code, tree, index, valuetable) + if tree.p[index].val > 0 then + addinstruction(code, IBehind, tree.p[index].val) + end + codegen(code, tree, fullset, false, NOINST, index + 1, valuetable) -- NOINST +end + + +-- Choice; optimizations: +-- - when p1 is headfail +-- - when first(p1) and first(p2) are disjoint; than +-- a character not in first(p1) cannot go to p1, and a character +-- in first(p1) cannot go to p2 (at it is not in first(p2)). +-- (The optimization is not valid if p1 accepts the empty string, +-- as then there is no character at all...) +-- - when p2 is empty and opt is true; a IPartialCommit can resuse +-- the Choice already active in the stack. + +local function codechoice(code, tree, fl, opt, p1, p2, valuetable) + local emptyp2 = tree.p[p2].tag == TTrue + local e1, st1 = getfirst(tree, fullset, p1, valuetable) + local _, st2 = getfirst(tree, fl, p2, valuetable) + if headfail(tree, p1) or (e1 == 0 and cs_disjoint(st1, st2)) then + -- == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: + local test = codetestset(code, st1, 0, valuetable) + local jmp = NOINST; + codegen(code, tree, fl, false, test, p1, valuetable) + if not emptyp2 then + jmp = addinstruction(code, IJmp, 0) + end + jumptohere(code, test) + codegen(code, tree, fl, opt, NOINST, p2, valuetable) + jumptohere(code, jmp) + elseif opt and emptyp2 then + -- p1? == IPartialCommit; p1 + jumptohere(code, addinstruction(code, IPartialCommit, 0)) + codegen(code, tree, fullset, true, NOINST, p1, valuetable) + else + -- == + -- test(fail(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: + local test = codetestset(code, st1, e1, valuetable) + local pchoice = addinstruction(code, IChoice, 0) + codegen(code, tree, fullset, emptyp2, test, p1, valuetable) + local pcommit = addinstruction(code, ICommit, 0) + jumptohere(code, pchoice) + jumptohere(code, test) + codegen(code, tree, fl, opt, NOINST, p2, valuetable) + jumptohere(code, pcommit) + end +end + + +-- And predicate +-- optimization: fixedlen(p) = n ==> <&p> ==

; behind n +-- (valid only when 'p' has no captures) + +local function codeand(code, tree, tt, index, valuetable) + local n = fixedlenx(tree, 0, 0, index) + if n >= 0 and n <= MAXBEHINDPREDICATE and not hascaptures(tree, index) then + codegen(code, tree, fullset, false, tt, index, valuetable) + if n > 0 then + addinstruction(code, IBehind, n) + end + else + -- default: Choice L1; p1; BackCommit L2; L1: Fail; L2: + local pchoice = addinstruction(code, IChoice, 0) + codegen(code, tree, fullset, false, tt, index, valuetable) + local pcommit = addinstruction(code, IBackCommit, 0) + jumptohere(code, pchoice) + addinstruction(code, IFail, 0) + jumptohere(code, pcommit) + end +end + + +-- Captures: if pattern has fixed (and not too big) length, use +-- a single IFullCapture instruction after the match; otherwise, +-- enclose the pattern with OpenCapture - CloseCapture. + +local function codecapture(code, tree, fl, tt, index, valuetable) + local len = fixedlenx(tree, 0, 0, index + 1) + if len >= 0 and len <= MAXOFF and not hascaptures(tree, index + 1) then + codegen(code, tree, fl, false, tt, index + 1, valuetable) + addinstcap(code, IFullCapture, tree.p[index].cap, tree.p[index].val, len) + else + addinstcap(code, IOpenCapture, tree.p[index].cap, tree.p[index].val, 0) + codegen(code, tree, fl, false, tt, index + 1, valuetable) + addinstcap(code, ICloseCapture, Cclose, 0, 0) + end +end + + +local function coderuntime(code, tree, tt, index, valuetable) + addinstcap(code, IOpenCapture, Cgroup, tree.p[index].val, 0) + codegen(code, tree, fullset, false, tt, index + 1, valuetable) + addinstcap(code, ICloseRunTime, Cclose, 0, 0) +end + + +-- Repetion; optimizations: +-- When pattern is a charset, can use special instruction ISpan. +-- When pattern is head fail, or if it starts with characters that +-- are disjoint from what follows the repetions, a simple test +-- is enough (a fail inside the repetition would backtrack to fail +-- again in the following pattern, so there is no need for a choice). +-- When 'opt' is true, the repetion can reuse the Choice already +-- active in the stack. + +local function coderep(code, tree, opt, fl, index, valuetable) + local st = tocharset(tree, index, valuetable) + if st then + local op = coderealcharset(code, st, valuetable) + code.p[op].code = ISpan; + else + local e1, st = getfirst(tree, fullset, index, valuetable) + if headfail(tree, index) or (e1 == 0 and cs_disjoint(st, fl)) then + -- L1: test (fail(p1)) -> L2;

; jmp L1; L2: + local test = codetestset(code, st, 0, valuetable) + codegen(code, tree, fullset, false, test, index, valuetable) + local jmp = addinstruction(code, IJmp, 0) + jumptohere(code, test) + jumptothere(code, jmp, test) + else + -- test(fail(p1)) -> L2; choice L2; L1:

; partialcommit L1; L2: + -- or (if 'opt'): partialcommit L1; L1:

; partialcommit L1; + local test = codetestset(code, st, e1, valuetable) + local pchoice = NOINST; + if opt then + jumptohere(code, addinstruction(code, IPartialCommit, 0)) + else + pchoice = addinstruction(code, IChoice, 0) + end + local l2 = code.size + codegen(code, tree, fullset, false, NOINST, index, valuetable) + local commit = addinstruction(code, IPartialCommit, 0) + jumptothere(code, commit, l2) + jumptohere(code, pchoice) + jumptohere(code, test) + end + end +end + + +-- Not predicate; optimizations: +-- In any case, if first test fails, 'not' succeeds, so it can jump to +-- the end. If pattern is headfail, that is all (it cannot fail +-- in other parts); this case includes 'not' of simple sets. Otherwise, +-- use the default code (a choice plus a failtwice). + +local function codenot(code, tree, index, valuetable) + local e, st = getfirst(tree, fullset, index, valuetable) + local test = codetestset(code, st, e, valuetable) + -- test (fail(p1)) -> L1; fail; L1: + if headfail(tree, index) then + addinstruction(code, IFail, 0) + else + -- test(fail(p))-> L1; choice L1;

; failtwice; L1: + local pchoice = addinstruction(code, IChoice, 0) + codegen(code, tree, fullset, false, NOINST, index, valuetable) + addinstruction(code, IFailTwice, 0) + jumptohere(code, pchoice) + end + jumptohere(code, test) +end + + +-- change open calls to calls, using list 'positions' to find +-- correct offsets; also optimize tail calls + +local function correctcalls(code, positions, from, to) + for i = from, to - 1 do + if code.p[i].code == IOpenCall then + local n = code.p[i].offset; -- rule number + local rule = positions[n]; -- rule position + assert(rule == from or code.p[rule - 1].code == IRet) + -- call; ret ? + if bit.band(code.p[i].val, 0xffff) == 0 and code.p[finaltarget(code, i + 1)].code == IRet then + code.p[i].code = IJmp; -- tail call + else + code.p[i].code = ICall; + end + jumptothere(code, i, rule) -- call jumps to respective rule + end + end +end + + +-- Code for a grammar: +-- call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: + +local function codegrammar(code, tree, index, valuetable) + local positions = {} + local rulenumber = 1; + -- tree.p[rule].tag + local rule = index + 1 + assert(tree.p[rule].tag == TRule) + local LR = 0 + if band(RuleLR, tree.p[rule].cap) ~= 0 then LR = 1 end + local firstcall = addinstruction(code, ICall, LR) -- call initial rule + code.p[firstcall].aux = tree.p[rule].val + local jumptoend = addinstruction(code, IJmp, 0) -- jump to the end + jumptohere(code, firstcall) -- here starts the initial rule + while tree.p[rule].tag == TRule do + positions[rulenumber] = code.size -- save rule position + rulenumber = rulenumber + 1 + codegen(code, tree, fullset, false, NOINST, rule + 1, valuetable) -- code rule + addinstruction(code, IRet, 0) + rule = rule + tree.p[rule].ps + end + assert(tree.p[rule].tag == TTrue) + jumptohere(code, jumptoend) + correctcalls(code, positions, firstcall + 2, code.size) +end + + +local function codecall(code, tree, index, val) + local c = addinstruction(code, IOpenCall, tree.p[index].cap) -- to be corrected later + code.p[c].aux = val + assert(tree.p[index + tree.p[index].ps].tag == TRule) + setoffset(code, c, band(tree.p[index + tree.p[index].ps].cap, 0x7fff)) -- offset = rule number +end + + +local function codeseq(code, tree, fl, opt, tt, p1, p2, valuetable) + if needfollow(tree, p1) then + local _, fll = getfirst(tree, fl, p2, valuetable) -- p1 follow is p2 first + codegen(code, tree, fll, false, tt, p1, valuetable) + else + -- use 'fullset' as follow + codegen(code, tree, fullset, false, tt, p1, valuetable) + end + -- can p1 consume anything? + if (fixedlenx(tree, 0, 0, p1) ~= 0) then + tt = NOINST; -- invalidate test + end + return codegen(code, tree, fl, opt, tt, p2, valuetable) +end + + +-- Main code-generation function: dispatch to auxiliar functions +-- according to kind of tree + +-- code generation is recursive; 'opt' indicates that the code is being +-- generated as the last thing inside an optional pattern (so, if that +-- code is optional too, it can reuse the 'IChoice' already in place for +-- the outer pattern). 'tt' points to a previous test protecting this +-- code (or NOINST). 'fl' is the follow set of the pattern. + +function codegen(code, tree, fl, opt, tt, index, valuetable) + local tag = tree.p[index].tag + if tag == TChar then + return codechar(code, tree.p[index].val, tt) + elseif tag == TAny then + return addinstruction(code, IAny, 0) + elseif tag == TSet then + return codecharset(code, valuetable[tree.p[index].val], tt, valuetable) + elseif tag == TTrue then + elseif tag == TFalse then + return addinstruction(code, IFail, 0) + elseif tag == TSeq then + return codeseq(code, tree, fl, opt, tt, index + 1, index + tree.p[index].ps, valuetable) + elseif tag == TChoice then + return codechoice(code, tree, fl, opt, index + 1, index + tree.p[index].ps, valuetable) + elseif tag == TRep then + return coderep(code, tree, opt, fl, index + 1, valuetable) + elseif tag == TBehind then + return codebehind(code, tree, index, valuetable) + elseif tag == TNot then + return codenot(code, tree, index + 1, valuetable) + elseif tag == TAnd then + return codeand(code, tree, tt, index + 1, valuetable) + elseif tag == TCapture then + return codecapture(code, tree, fl, tt, index, valuetable) + elseif tag == TRunTime then + return coderuntime(code, tree, tt, index, valuetable) + elseif tag == TGrammar then + return codegrammar(code, tree, index, valuetable) + elseif tag == TCall then + return codecall(code, tree, index, tree.p[index].val) + else + assert(false) + end +end + + +-- Optimize jumps and other jump-like instructions. +-- * Update labels of instructions with labels to their final +-- destinations (e.g., choice L1; ... L1: jmp L2: becomes +-- choice L2) +-- * Jumps to other instructions that do jumps become those +-- instructions (e.g., jump to return becomes a return; jump +-- to commit becomes a commit) + +local function peephole(code) + local i = 0 + while i < code.size do + local tag = code.p[i].code + if tag == IChoice or tag == ICall or tag == ICommit or tag == IPartialCommit or + tag == IBackCommit or tag == ITestChar or tag == ITestSet or tag == ITestAny then + -- instructions with labels + jumptothere(code, i, finallabel(code, i)) -- optimize label + + elseif tag == IJmp then + local ft = finaltarget(code, i) + local tag = code.p[ft].code -- jumping to what? + -- instructions with unconditional implicit jumps + if tag == IRet or tag == IFail or tag == IFailTwice or tag == IEnd then + ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction + elseif tag == ICommit or tag == IPartialCommit or tag == IBackCommit then + -- inst. with unconditional explicit jumps + local fft = finallabel(code, ft) + ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction... + jumptothere(code, i, fft) -- but must correct its offset + i = i - 1 -- reoptimize its label + else + jumptothere(code, i, ft) -- optimize label + end + end + i = i + 1 + end +end + + +-- Compile a pattern + +local function compile(tree, index, valuetable) + local code = pattern() + codegen(code, tree, fullset, false, NOINST, index, valuetable) + addinstruction(code, IEnd, 0) + peephole(code) + ffi.C.free(tree.code) + tree.code = code +end + +local function pat_new(ct, size) + size = size or 0 + local allocsize = size + if allocsize < 10 then + allocsize = 10 + end + local pat = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern))) + assert(pat ~= nil) + pat.allocsize = allocsize + pat.size = size + pat.p = ffi.C.malloc(ffi.sizeof(patternelement) * allocsize) + assert(pat.p ~= nil) + ffi.fill(pat.p, ffi.sizeof(patternelement) * allocsize) + return pat +end + +local function doublesize(ct) + ct.p = ffi.C.realloc(ct.p, ffi.sizeof(patternelement) * ct.allocsize * 2) + assert(ct.p ~= nil) + ffi.fill(ct.p + ct.allocsize, ffi.sizeof(patternelement) * ct.allocsize) + ct.allocsize = ct.allocsize * 2 +end + +local pattreg = { + doublesize = doublesize, +} + +local metareg = { + ["__new"] = pat_new, + ["__index"] = pattreg +} + +ffi.metatype(pattern, metareg) + +return { + checkaux = checkaux, + tocharset = tocharset, + fixedlenx = fixedlenx, + hascaptures = hascaptures, + compile = compile, +} \ No newline at end of file diff --git a/tools/lpeg/lpeg.lua b/tools/lpeg/lpeg.lua new file mode 100644 index 0000000..de4fca3 --- /dev/null +++ b/tools/lpeg/lpeg.lua @@ -0,0 +1,1373 @@ +--[[ +LPEGLJ +lpeglj.lua +Main module and tree generation +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.") + +local ffi = require "ffi" +local lpcode = require "lpcode" +local lpprint = require "lpprint" +local lpvm = require "lpvm" + +local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift + +ffi.cdef [[ + int isalnum(int c); + int isalpha(int c); + int iscntrl(int c); + int isdigit(int c); + int isgraph(int c); + int islower(int c); + int isprint(int c); + int ispunct(int c); + int isspace(int c); + int isupper(int c); + int isxdigit(int c); +]] + +local MAXBEHIND = 255 +local MAXRULES = 200 +local VERSION = "1.0.0.0LJ" + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local PEnullable = 0 +local PEnofail = 1 +local PEleftrecursion = 2 + +local newgrammar + +local RuleLR = 0x10000 +local Ruleused = 0x20000 +local BCapcandelete = 0x30000 + +local LREnable = false + +-- number of siblings for each tree +local numsiblings = { + 0, 0, 0, -- char, set, any + 0, 0, -- true, false + 1, -- rep + 2, 2, -- seq, choice + 1, 1, -- not, and + 0, 0, 2, 1, -- call, opencall, rule, grammar + 1, -- behind + 1, 1 -- capture, runtime capture +} + + + +local patternid = 0 +local valuetable = {} + +local funcnames = setmetatable({}, { __mode = 'k' }) + +local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT') +local treepattern = ffi.typeof('TREEPATTERN') +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') +local uint32 = ffi.typeof('uint32_t[1]') + +-- Fix a TOpenCall into a TCall node, using table 'postable' to +-- translate a key to its rule address in the tree. Raises an +-- error if key does not exist. + +local function fixonecall(postable, grammar, index, valuetable) + local name = valuetable[grammar.p[index].val] -- get rule's name + local n = postable[name] -- query name in position table + -- no position? + if not n then + error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0) + end + grammar.p[index].tag = TCall; + grammar.p[index].ps = n - index -- position relative to node + grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused) +end + + +-- Transform left associative constructions into right +-- associative ones, for sequence and choice; that is: +-- (t11 + t12) + t2 => t11 + (t12 + t2) +-- (t11 * t12) * t2 => t11 * (t12 * t2) +-- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) + +local function correctassociativity(tree, index) + local t1 = index + 1 + assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq) + while tree.p[t1].tag == tree.p[index].tag do + local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12 + local n11size = tree.p[t1].ps - 1; + local n12size = n1size - n11size - 1 + for i = 1, n11size do + ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement)) + end + tree.p[index].ps = n11size + 1 + tree.p[index + tree.p[index].ps].tag = tree.p[index].tag + tree.p[index + tree.p[index].ps].ps = n12size + 1 + end +end + + +-- Make final adjustments in a tree. Fix open calls in tree, +-- making them refer to their respective rules or raising appropriate +-- errors (if not inside a grammar). Correct associativity of associative +-- constructions (making them right associative). + +local function finalfix(fix, postable, grammar, index, valuetable) + + local tag = grammar.p[index].tag + --subgrammars were already fixed + if tag == TGrammar then + return + elseif tag == TOpenCall then + -- inside a grammar? + if fix then + fixonecall(postable, grammar, index, valuetable) + -- open call outside grammar + else + error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0) + end + elseif tag == TSeq or tag == TChoice then + correctassociativity(grammar, index) + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return finalfix(fix, postable, grammar, index + 1, valuetable) + elseif ns == 2 then + finalfix(fix, postable, grammar, index + 1, valuetable) + return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable) + else + assert(false) + end +end + + +-- {====================================================== +-- Tree generation +-- ======================================================= + +local function newcharset() + local tree = treepattern(1) + valuetable[tree.id] = { settype() } + tree.p[0].tag = TSet + tree.p[0].val = 1 + return tree, valuetable[tree.id][1] +end + + +-- add to tree a sequence where first sibling is 'sib' (with size +-- 'sibsize') + +local function seqaux(tree, sib, start, sibsize) + tree.p[start].tag = TSeq; + tree.p[start].ps = sibsize + 1 + ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize) +end + + +-- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got +-- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it +-- must build a sequence of sequence of sequence...) + +local function fillseq(tree, tag, start, n, s) + -- initial n-1 copies of Seq tag; Seq ... + for i = 1, n - 1 do + tree.p[start].tag = TSeq + tree.p[start].ps = 2 + tree.p[start + 1].tag = tag + if s then + tree.p[start + 1].val = s:sub(i, i):byte() + end + start = start + tree.p[start].ps + end + tree.p[start].tag = tag -- last one does not need TSeq + if s then + tree.p[start].val = s:sub(n, n):byte() + end +end + + +-- Numbers as patterns: +-- 0 == true (always match); n == TAny repeated 'n' times; +-- -n == not (TAny repeated 'n' times) + +local function numtree(n) + if n == 0 then + local tree = treepattern(1) + tree.p[0].tag = TTrue + return tree + else + local tree, start + if n > 0 then + tree = treepattern(2 * n - 1) + start = 0 + -- negative: code it as !(-n) + else + n = -n; + tree = treepattern(2 * n) + tree.p[0].tag = TNot + start = 1 + end + fillseq(tree, TAny, start, n) -- sequence of 'n' any's + return tree; + end +end + + +-- Convert value to a pattern + +local function getpatt(val, name) + local typ = type(val) + if typ == 'string' then + -- empty? + if #val == 0 then + local pat = treepattern(1) + pat.p[0].tag = TTrue -- always match + return pat + else + local tree = treepattern(2 * (#val - 1) + 1) + fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars + return tree + end + elseif typ == 'number' then + return numtree(val) + elseif typ == 'boolean' then + local pat = treepattern(1) + pat.p[0].tag = val and TTrue or TFalse + return pat + elseif typ == 'table' then + return newgrammar(val) + elseif typ == 'function' then + if name and type(name) == 'string' then + funcnames[val] = name + end + local pat = treepattern(2) + valuetable[pat.id] = { val } + pat.p[0].tag = TRunTime + pat.p[0].val = 1 + pat.p[1].tag = TTrue + return pat + elseif ffi.istype(treepattern, val) then + assert(val.treesize > 0) + return val + end + assert(false) +end + +local function copykeys(ktable1, ktable2) + local ktable, offset = {}, 0 + if not ktable1 and not ktable2 then + return ktable, 0 + elseif ktable1 then + for i = 1, #ktable1 do + ktable[#ktable + 1] = ktable1[i] + end + offset = #ktable1 + if not ktable2 then + return ktable, 0 + end + end + if ktable2 then + for i = 1, #ktable2 do + ktable[#ktable + 1] = ktable2[i] + end + end + assert(#ktable < 65536, "too many Lua values in pattern") + return ktable, offset +end + +local function correctkeys(tree, index, offset) + local tag = tree.p[index].tag + if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and + tree.p[index].val ~= 0 then + tree.p[index].val = tree.p[index].val + offset + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return correctkeys(tree, index + 1, offset) + elseif ns == 2 then + correctkeys(tree, index + 1, offset) + return correctkeys(tree, index + tree.p[index].ps, offset) + else + assert(false) + end +end + + + +-- create a new tree, with a new root and one sibling. + +local function newroot1sib(tag, pat) + local tree1 = getpatt(pat) + local tree = treepattern(1 + tree1.treesize) -- create new tree + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + tree.p[0].tag = tag + ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + return tree +end + + +-- create a new tree, with a new root and 2 siblings. + +local function newroot2sib(tag, pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree + local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id]) + valuetable[tree.id] = ktable + tree.p[0].tag = tag + tree.p[0].ps = 1 + tree1.treesize + ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize) + if offset > 0 then + correctkeys(tree, 1 + tree1.treesize, offset) + end + return tree; +end + + +local function lp_P(val, name) + assert(type(val) ~= 'nil') + return getpatt(val, name) +end + + +-- sequence operator; optimizations: +-- false x => false, x true => x, true x => x +-- (cannot do x . false => false because x may have runtime captures) + +local function lp_seq(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + -- false . x == false, x . true = x + if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then + return tree1 + -- true . x = x + elseif tree1.p[0].tag == TTrue then + return tree2 + else + return newroot2sib(TSeq, tree1, tree2) + end +end + + +-- choice operator; optimizations: +-- charset / charset => charset +-- true / x => true, x / false => x, false / x => x +-- (x / true is not equivalent to true) + +local function lp_choice(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id]) + local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id]) + if charset1 and charset2 then + local t, set = newcharset() + for i = 0, 7 do + set[i] = bor(charset1[i], charset2[i]) + end + return t + elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then + return tree1 -- true / x => true, x / false => x + elseif tree1.p[0].tag == TFalse then + return tree2 -- false / x => x + else + return newroot2sib(TChoice, tree1, tree2) + end +end + + +-- p^n + +local function lp_star(tree1, n) + local tree + n = tonumber(n) + assert(type(n) == 'number') + -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) + if n >= 0 then + tree = treepattern((n + 1) * (tree1.treesize + 1)) + if lpcode.checkaux(tree1, PEnullable, 0) then + error("loop body may accept empty string", 0) + end + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + local start = 0 + -- repeat 'n' times + for i = 1, n do + seqaux(tree, tree1, start, tree1.treesize) + start = start + tree.p[start].ps + end + tree.p[start].tag = TRep + ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + -- choice (seq tree1 ... choice tree1 true ...) true + else + n = -n; + -- size = (choice + seq + tree1 + true) * n, but the last has no seq + tree = treepattern(n * (tree1.treesize + 3) - 1) + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + local start = 0 + -- repeat (n - 1) times + for i = n, 2, -1 do + tree.p[start].tag = TChoice; + tree.p[start].ps = i * (tree1.treesize + 3) - 2 + tree.p[start + tree.p[start].ps].tag = TTrue; + start = start + 1 + seqaux(tree, tree1, start, tree1.treesize) + start = start + tree.p[start].ps + end + tree.p[start].tag = TChoice; + tree.p[start].ps = tree1.treesize + 1 + tree.p[start + tree.p[start].ps].tag = TTrue + ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + end + return tree +end + + +-- #p == &p + +local function lp_and(pat) + return newroot1sib(TAnd, pat) +end + + +-- -p == !p + +local function lp_not(pat) + return newroot1sib(TNot, pat) +end + + +-- [t1 - t2] == Seq (Not t2) t1 +-- If t1 and t2 are charsets, make their difference. + +local function lp_sub(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id]) + local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id]) + if charset1 and charset2 then + local tree, set = newcharset() + for i = 0, 7 do + set[i] = band(charset1[i], bnot(charset2[i])) + end + return tree + else + local tree = treepattern(2 + tree1.treesize + tree2.treesize) + local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id]) + valuetable[tree.id] = ktable + tree.p[0].tag = TSeq; -- sequence of... + tree.p[0].ps = 2 + tree2.treesize + tree.p[1].tag = TNot; -- ...not... + ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize) + ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + if offset > 0 then + correctkeys(tree, 2 + tree2.treesize, offset) + end + return tree + end +end + + +local function lp_set(val) + assert(type(val) == 'string') + local tree, set = newcharset() + for i = 1, #val do + local b = val:sub(i, i):byte() + set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31))) + end + return tree +end + + +local function lp_range(...) + local args = { ... } + local top = #args + local tree, set = newcharset() + for i = 1, top do + assert(#args[i] == 2, args[i] .. " range must have two characters") + for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do + set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31))) + end + end + return tree +end + + +-- Look-behind predicate + +local function lp_behind(pat) + local tree1 = getpatt(pat) + local n = lpcode.fixedlenx(tree1, 0, 0, 0) + assert(not lpcode.hascaptures(tree1, 0), "pattern have captures") + assert(n >= 0, "pattern may not have fixed length") + assert(n <= MAXBEHIND, "pattern too long to look behind") + local tree = newroot1sib(TBehind, pat) + tree.p[0].val = n; + return tree +end + + +-- Create a non-terminal + +local function lp_V(val, p) + assert(val, "non-nil value expected") + local tree = treepattern(1) + valuetable[tree.id] = { val } + tree.p[0].tag = TOpenCall + tree.p[0].val = 1 + tree.p[0].cap = p or 0 + return tree +end + + +-- Create a tree for a non-empty capture, with a body and +-- optionally with an associated value + +local function capture_aux(cap, pat, val) + local tree = newroot1sib(TCapture, pat) + tree.p[0].cap = cap + if val then + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern" .. ind) + valuetable[tree.id][ind] = val + tree.p[0].val = ind + end + return tree +end + + +-- Fill a tree with an empty capture, using an empty (TTrue) sibling. + +local function auxemptycap(tree, cap, par, start) + tree.p[start].tag = TCapture; + tree.p[start].cap = cap + if type(par) ~= 'nil' then + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = par + tree.p[start].val = ind + end + tree.p[start + 1].tag = TTrue; +end + + +-- Create a tree for an empty capture + +local function newemptycap(cap, par) + local tree = treepattern(2) + if type(par) ~= 'nil' then valuetable[tree.id] = {} end + auxemptycap(tree, cap, par, 0) + return tree +end + + +-- Captures with syntax p / v +-- (function capture, query capture, string capture, or number capture) + +local function lp_divcapture(pat, par, xxx) + local typ = type(par) + if typ == "function" then + return capture_aux(Cfunction, pat, par) + elseif typ == "table" then + return capture_aux(Cquery, pat, par) + elseif typ == "string" then + return capture_aux(Cstring, pat, par) + elseif typ == "number" then + local tree = newroot1sib(TCapture, pat) + assert(0 <= par and par <= 0xffff, "invalid number") + tree.p[0].cap = Cnum; + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = par + tree.p[0].val = ind + return tree + else + error("invalid replacement value", 0) + end +end + + +local function lp_substcapture(pat) + return capture_aux(Csubst, pat) +end + + +local function lp_tablecapture(pat) + return capture_aux(Ctable, pat, 0) +end + + +local function lp_groupcapture(pat, val) + if not val then + return capture_aux(Cgroup, pat) + else + return capture_aux(Cgroup, pat, val) + end +end + + +local function lp_foldcapture(pat, fce) + assert(type(fce) == 'function') + return capture_aux(Cfold, pat, fce) +end + + +local function lp_simplecapture(pat) + return capture_aux(Csimple, pat) +end + + +local function lp_poscapture() + return newemptycap(Cposition) +end + + +local function lp_argcapture(val) + assert(type(val) == 'number') + local tree = newemptycap(Carg, 0) + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = val + tree.p[0].val = ind + assert(0 < val and val <= 0xffff, "invalid argument index") + return tree +end + + +local function lp_backref(val) + return newemptycap(Cbackref, val) +end + + +-- Constant capture + +local function lp_constcapture(...) + local tree + local args = { ... } + local n = select('#', ...) -- number of values + -- no values? + if n == 0 then + tree = treepattern(1) -- no capture + tree.p[0].tag = TTrue + elseif n == 1 then + tree = newemptycap(Cconst, args[1]) -- single constant capture + -- create a group capture with all values + else + tree = treepattern(3 + 3 * (n - 1)) + valuetable[tree.id] = {} + tree.p[0].tag = TCapture + tree.p[0].cap = Cgroup + local start = 1 + for i = 1, n - 1 do + tree.p[start].tag = TSeq + tree.p[start].ps = 3 + auxemptycap(tree, Cconst, args[i], start + 1) + start = start + tree.p[start].ps + end + auxemptycap(tree, Cconst, args[n], start) + end + return tree +end + + +local function lp_matchtime(pat, fce, name) + assert(type(fce) == 'function') + if name and type(name) == 'string' then + funcnames[fce] = name + end + local tree = newroot1sib(TRunTime, pat) + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = fce + tree.p[0].val = ind + return tree +end + +-- ====================================================== + + + +-- ====================================================== +-- Grammar - Tree generation +-- ======================================================= + + +-- return index and the pattern for the +-- initial rule of grammar; +-- also add that index into position table. + +local function getfirstrule(pat, postab) + local key + -- access first element + if type(pat[1]) == 'string' then + key = pat[1] + else + key = 1 + end + local rule = pat[key] + if not rule then + error("grammar has no initial rule", 0) + end + -- initial rule not a pattern? + if not ffi.istype(treepattern, rule) then + error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0) + end + postab[key] = 1 + return key, rule +end + + +-- traverse grammar, collect all its keys and patterns +-- into rule table. Create a new table (before all pairs key-pattern) to +-- collect all keys and their associated positions in the final tree +-- (the "position table"). +-- Return the number of rules and the total size +-- for the new tree. + +local function collectrules(pat) + local n = 1; -- to count number of rules + local postab = {} + local firstkeyrule, firstrule = getfirstrule(pat, postab) + local rules = { firstkeyrule, firstrule } + local size = 2 + firstrule.treesize -- TGrammar + TRule + rule + for key, val in pairs(pat) do + -- initial rule? + if key ~= 1 and tostring(val) ~= tostring(firstrule) then + -- value is not a pattern? + if not ffi.istype(treepattern, val) then + error(("rule '%s' is not a pattern"):format(tostring(key)), 0) + end + rules[#rules + 1] = key + rules[#rules + 1] = val + postab[key] = size + size = 1 + size + val.treesize + n = n + 1 + end + end + size = size + 1; -- TTrue to finish list of rules + return n, size, rules, postab +end + + +local function buildgrammar(grammar, rules, n, index, valuetable) + local ktable, offset = {}, 0 + -- add each rule into new tree + for i = 1, n do + local size = rules[i * 2].treesize + grammar.p[index].tag = TRule; + grammar.p[index].cap = i; -- rule number + grammar.p[index].ps = size + 1; -- point to next rule + local ind = #ktable + 1 + ktable[ind] = rules[i * 2 - 1] + grammar.p[index].val = ind + ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule + ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id]) + if offset > 0 then + correctkeys(grammar, index + 1, offset) + end + index = index + grammar.p[index].ps; -- move to next rule + end + grammar.p[index].tag = TTrue; -- finish list of rules + return ktable +end + + +-- Check whether a tree has potential infinite loops + +local function checkloops(tree, index) + local tag = tree.p[index].tag + if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then + return true + elseif tag == TGrammar then + return -- sub-grammars already checked + else + local tag = numsiblings[tree.p[index].tag + 1] + if tag == 0 then + return + elseif tag == 1 then + return checkloops(tree, index + 1) + elseif tag == 2 then + if checkloops(tree, index + 1) then + return true + else + return checkloops(tree, index + tree.p[index].ps) + end + else + assert(false) + end + end +end + +-- Check whether a rule can be left recursive; returns PEleftrecursion in that +-- case; otherwise return 1 iff pattern is nullable. + +local function verifyrule(rulename, tree, passed, nullable, index, valuetable) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then + return nullable; -- cannot pass from here + elseif tag == TTrue or tag == TBehind then + return true; + elseif tag == TNot or tag == TAnd or tag == TRep then + return verifyrule(rulename, tree, passed, true, index + 1, valuetable) + elseif tag == TCapture or tag == TRunTime then + return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + elseif tag == TCall then + local rule = valuetable[tree.p[index].val] + if rule == rulename then return PEleftrecursion end + if passed[rule] and passed[rule] > MAXRULES then + return nullable + end + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + -- only check 2nd child if first is nullable + elseif tag == TSeq then + local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable) + if res == PEleftrecursion then + return res + elseif not res then + return nullable + else + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + end + -- must check both children + elseif tag == TChoice then + nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + if nullable == PEleftrecursion then return nullable end + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + elseif tag == TRule then + local rule = valuetable[tree.p[index].val] + passed[rule] = (passed[rule] or 0) + 1 + return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + elseif tag == TGrammar then + return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive + else + assert(false) + end +end + + +local function verifygrammar(rule, index, valuetable) + -- check left-recursive rules + local LR = {} + local ind = index + 1 + while rule.p[ind].tag == TRule do + local rulename = valuetable[rule.p[ind].val] + -- used rule + if rulename then + if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then + if not LREnable then + error(("rule '%s' may be left recursive"):format(rulename), 0) + end + LR[rulename] = true + end + end + ind = ind + rule.p[ind].ps + end + assert(rule.p[ind].tag == TTrue) + + for i = 0, rule.treesize - 1 do + if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then + rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive + end + if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then + if rule.p[i].cap == 0 then + rule.p[i].cap = 1 --TCall can be left recursive + end + end + end + + -- check infinite loops inside rules + ind = index + 1 + while rule.p[ind].tag == TRule do + -- used rule + if rule.p[ind].val then + if checkloops(rule, ind + 1) then + error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0) + end + end + ind = ind + rule.p[ind].ps + end + assert(rule.p[ind].tag == TTrue) +end + + +-- Give a name for the initial rule if it is not referenced + +local function initialrulename(grammar, val, valuetable) + grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused) + -- initial rule is not referenced? + if grammar.p[1].val == 0 then + local ind = #valuetable + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[ind] = val + grammar.p[1].val = ind + end +end + + +function newgrammar(pat) + -- traverse grammar. Create a new table (before all pairs key-pattern) to + -- collect all keys and their associated positions in the final tree + -- (the "position table"). + -- Return new tree. + + local n, size, rules, postab = collectrules(pat) + local grammar = treepattern(size) + local start = 0 + grammar.p[start].tag = TGrammar + grammar.p[start].val = n + valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable) + finalfix(true, postab, grammar, start + 1, valuetable[grammar.id]) + initialrulename(grammar, rules[1], valuetable[grammar.id]) + verifygrammar(grammar, 0, valuetable[grammar.id]) + return grammar +end + +-- ====================================================== + +-- remove duplicity from value table + +local function reducevaluetable(p) + local vtable = valuetable[p.id] + local value = {} + local newvaluetable = {} + + local function check(v) + if v > 0 then + local ord = value[vtable[v]] + if not ord then + newvaluetable[#newvaluetable + 1] = vtable[v] + ord = #newvaluetable + value[vtable[v]] = ord + end + return ord + end + return 0 + end + + local function itertree(p, index) + local tag = p.p[index].tag + if tag == TSet or tag == TCall or tag == TOpenCall or + tag == TRule or tag == TCapture or tag == TRunTime then + p.p[index].val = check(p.p[index].val) + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return itertree(p, index + 1) + elseif ns == 2 then + itertree(p, index + 1) + return itertree(p, index + p.p[index].ps) + else + assert(false) + end + end + + if p.treesize > 0 then + itertree(p, 0) + end + if p.code ~= nil then + for i = 0, p.code.size - 1 do + local code = p.code.p[i].code + if code == ICall or code == IJmp then + p.code.p[i].aux = check(p.code.p[i].aux) + elseif code == ISet or code == ITestSet or code == ISpan then + p.code.p[i].val = check(p.code.p[i].val) + elseif code == IOpenCapture or code == IFullCapture then + p.code.p[i].offset = check(p.code.p[i].offset) + end + end + end + valuetable[p.id] = newvaluetable +end + + +local function checkalt(tree) + local notchecked = {} + local notinalternativerules = {} + + local function iter(tree, index, choice, rule) + local tag = tree[index].tag + if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then + if not choice then + if rule then + notchecked[rule] = index + end + else + tree[index].cap = bit.bor(tree[index].cap, BCapcandelete) + end + elseif tag == TChoice then + choice = true + elseif tag == TRule then + rule = tree[index].val + if bit.band(tree[index].cap, 0xffff) - 1 == 0 then + notinalternativerules[rule] = notinalternativerules[rule] or true + end + elseif tag == TCall then + local r = tree[index].val + if not choice then + notinalternativerules[r] = notinalternativerules[r] or true + end + end + local sibs = numsiblings[tree[index].tag + 1] or 0 + if sibs >= 1 then + iter(tree, index + 1, choice, rule) + if sibs >= 2 then + return iter(tree, index + tree[index].ps, choice, rule) + end + end + end + + iter(tree, 0) + for k, v in pairs(notchecked) do + if not notinalternativerules[k] then + tree[v].cap = bit.bor(tree[v].cap, BCapcandelete) + end + end +end + + +local function prepcompile(p, index) + finalfix(false, nil, p, index, valuetable[p.id]) + checkalt(p.p) + lpcode.compile(p, index, valuetable[p.id]) + reducevaluetable(p) + return p.code +end + + +local function lp_printtree(pat, c) + assert(pat.treesize > 0) + if c then + finalfix(false, nil, pat, 0, valuetable[pat.id]) + end + lpprint.printtree(pat.p, 0, 0, valuetable[pat.id]) +end + + +local function lp_printcode(pat) + -- not compiled yet? + if pat.code == nil then + prepcompile(pat, 0) + end + lpprint.printpatt(pat.code, valuetable[pat.id]) +end + + +-- Main match function + +local function lp_match(pat, s, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.match(p, s, init, valuetable[p.id], ...) +end + +local function lp_streammatch(pat, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.streammatch(p, init, valuetable[p.id], ...) +end + +-- Only for testing purpose +-- stream emulation (send all chars from string one char after char) +local function lp_emulatestreammatch(pat, s, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...) +end + +-- {====================================================== +-- Library creation and functions not related to matching +-- ======================================================= + +local function lp_setmax(val) + lpvm.setmax(val) +end + +local function lp_setmaxbehind(val) + lpvm.setmaxbehind(val) +end + +local function lp_enableleftrecursion(val) + LREnable = val +end + +local function lp_version() + return VERSION +end + + +local function lp_type(pat) + if ffi.istype(treepattern, pat) then + return "pattern" + end +end + + +local function createcat(tab, catname, catfce) + local t, set = newcharset() + for i = 0, 255 do + if catfce(i) ~= 0 then + set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31))) + end + end + tab[catname] = t +end + + +local function lp_locale(tab) + tab = tab or {} + createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end) + createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end) + createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end) + createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end) + createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end) + createcat(tab, "lower", function(c) return ffi.C.islower(c) end) + createcat(tab, "print", function(c) return ffi.C.isprint(c) end) + createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end) + createcat(tab, "space", function(c) return ffi.C.isspace(c) end) + createcat(tab, "upper", function(c) return ffi.C.isupper(c) end) + createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end) + return tab +end + + +local function lp_new(ct, size) + local pat = ffi.new(ct, size) + pat.treesize = size + patternid = patternid + 1 + pat.id = patternid + return pat +end + + +local function lp_gc(ct) + valuetable[ct.id] = nil + if ct.code ~= nil then + ffi.C.free(ct.code.p) + ffi.C.free(ct.code) + end +end + +local function lp_eq(ct1, ct2) + return tostring(ct1) == tostring(ct2) +end + +local function lp_load(str, fcetab) + local pat, t = lpvm.load(str, fcetab, true) + valuetable[pat.id] = t + return pat +end + +local function lp_loadfile(fname, fcetab) + local pat, t = lpvm.loadfile(fname, fcetab, true) + valuetable[pat.id] = t + return pat +end + +local function lp_dump(ct, tree) + local funccount = 0 + -- not compiled yet? + if ct.code == nil then + prepcompile(ct, 0) + end + local out = {} + if tree then + out[#out + 1] = ffi.string(uint32(ct.treesize), 4) + out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize) + else + out[#out + 1] = ffi.string(uint32(0), 4) + end + out[#out + 1] = ffi.string(uint32(ct.code.size), 4) + out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement)) + local t = valuetable[ct.id] + local len = t and #t or 0 + out[#out + 1] = ffi.string(uint32(len), 4) + if len > 0 then + for _, val in ipairs(t) do + local typ = type(val) + if typ == 'string' then + out[#out + 1] = 'str' + out[#out + 1] = ffi.string(uint32(#val), 4) + out[#out + 1] = val + elseif typ == 'number' then + local val = tostring(val) + out[#out + 1] = 'num' + out[#out + 1] = ffi.string(uint32(#val), 4) + out[#out + 1] = val + elseif typ == 'cdata' then + out[#out + 1] = 'cdt' + out[#out + 1] = ffi.string(val, ffi.sizeof(val)) + elseif typ == 'function' then + out[#out + 1] = 'fnc' + funccount = funccount + 1 + local name = funcnames[val] or ('FNAME%03d'):format(funccount) + out[#out + 1] = ffi.string(uint32(#name), 4) + out[#out + 1] = name + if not funcnames[val] and debug.getupvalue(val, 1) then + io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0) + end + local data = string.dump(val, true) + out[#out + 1] = ffi.string(uint32(#data), 4) + out[#out + 1] = data + else + error(("Type '%s' NYI for dump"):format(typ), 0) + end + end + end + return table.concat(out) +end + +local function lp_save(ct, fname, tree) + local file = assert(io.open(fname, 'wb')) + file:write(lp_dump(ct, tree)) + file:close() +end + + +local pattreg = { + ["ptree"] = lp_printtree, + ["pcode"] = lp_printcode, + ["match"] = lp_match, + ["streammatch"] = lp_streammatch, + ["emulatestreammatch"] = lp_emulatestreammatch, + ["setmaxbehind"] = lp_setmaxbehind, + ["B"] = lp_behind, + ["V"] = lp_V, + ["C"] = lp_simplecapture, + ["Cc"] = lp_constcapture, + ["Cmt"] = lp_matchtime, + ["Cb"] = lp_backref, + ["Carg"] = lp_argcapture, + ["Cp"] = lp_poscapture, + ["Cs"] = lp_substcapture, + ["Ct"] = lp_tablecapture, + ["Cf"] = lp_foldcapture, + ["Cg"] = lp_groupcapture, + ["P"] = lp_P, + ["S"] = lp_set, + ["R"] = lp_range, + ["L"] = lp_and, + ["locale"] = lp_locale, + ["version"] = lp_version, + ["setmaxstack"] = lp_setmax, + ["type"] = lp_type, + ["enableleftrecursion"] = lp_enableleftrecursion, + ["enablememoization"] = lpvm.enablememoization, + ["enabletracing"] = lpvm.enabletracing, + ["save"] = lp_save, + ["dump"] = lp_dump, + ["load"] = lp_load, + ["loadfile"] = lp_loadfile, + ["__mul"] = lp_seq, + ["__add"] = lp_choice, + ["__pow"] = lp_star, + ["__len"] = lp_and, + ["__div"] = lp_divcapture, + ["__unm"] = lp_not, + ["__sub"] = lp_sub, +} + +local metareg = { + ["__gc"] = lp_gc, + ["__new"] = lp_new, + ["__mul"] = lp_seq, + ["__add"] = lp_choice, + ["__pow"] = lp_star, + ["__len"] = lp_and, + ["__div"] = lp_divcapture, + ["__unm"] = lp_not, + ["__sub"] = lp_sub, + ["__eq"] = lp_eq, + ["__index"] = pattreg +} + +ffi.metatype(treepattern, metareg) + +return pattreg diff --git a/tools/lpeg/lpprint.lua b/tools/lpeg/lpprint.lua new file mode 100644 index 0000000..86f6897 --- /dev/null +++ b/tools/lpeg/lpprint.lua @@ -0,0 +1,356 @@ +--[[ +LPEGLJ +lpprint.lua +Tree, code and debug print function (only for debuging) +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +local ffi = require"ffi" +local band, rshift, lshift = bit.band, bit.rshift, bit.lshift + +ffi.cdef[[ + int isprint ( int c ); +]] + +local RuleLR = 0x10000 +local Ruleused = 0x20000 + +-- {====================================================== +-- Printing patterns (for debugging) +-- ======================================================= + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != aux, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != aux, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'aux' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + + +-- number of siblings for each tree +local numsiblings = { + [TRep] = 1, + [TSeq] = 2, + [TChoice] = 2, + [TNot] = 1, + [TAnd] = 1, + [TRule] = 2, + [TGrammar] = 1, + [TBehind] = 1, + [TCapture] = 1, + [TRunTime] = 1, +} +local names = { + [IAny] = "any", + [IChar] = "char", + [ISet] = "set", + [ITestAny] = "testany", + [ITestChar] = "testchar", + [ITestSet] = "testset", + [ISpan] = "span", + [IBehind] = "behind", + [IRet] = "ret", + [IEnd] = "end", + [IChoice] = "choice", + [IJmp] = "jmp", + [ICall] = "call", + [IOpenCall] = "open_call", + [ICommit] = "commit", + [IPartialCommit] = "partial_commit", + [IBackCommit] = "back_commit", + [IFailTwice] = "failtwice", + [IFail] = "fail", + [IGiveup] = "giveup", + [IFullCapture] = "fullcapture", + [IOpenCapture] = "opencapture", + [ICloseCapture] = "closecapture", + [ICloseRunTime] = "closeruntime" +} + +local function printcharset(st) + io.write("["); + local i = 0 + while i <= 255 do + local first = i; + while band(st[rshift(i, 5)], lshift(1, band(i, 31))) ~= 0 and i <= 255 do + i = i + 1 + end + if i - 1 == first then -- unary range? + io.write(("(%02x)"):format(first)) + elseif i - 1 > first then -- non-empty range? + io.write(("(%02x-%02x)"):format(first, i - 1)) + end + i = i + 1 + end + io.write("]") +end + +local modes = { + [Cclose] = "close", + [Cposition] = "position", + [Cconst] = "constant", + [Cbackref] = "backref", + [Carg] = "argument", + [Csimple] = "simple", + [Ctable] = "table", + [Cfunction] = "function", + [Cquery] = "query", + [Cstring] = "string", + [Cnum] = "num", + [Csubst] = "substitution", + [Cfold] = "fold", + [Cruntime] = "runtime", + [Cgroup] = "group" +} + +local function printcapkind(kind) + io.write(("%s"):format(modes[kind])) +end + +local function printjmp(p, index) + io.write(("-> %d"):format(index + p[index].offset)) +end + +local function printrulename(p, index, rulenames) + if rulenames and rulenames[index + p[index].offset] then + io.write(' ', rulenames[index + p[index].offset]) + end +end + +local function printinst(p, index, valuetable, rulenames) + local code = p[index].code + if rulenames and rulenames[index] then + io.write(rulenames[index], '\n') + end + io.write(("%04d: %s "):format(index, names[code])) + if code == IChar then + io.write(("'%s'"):format(string.char(p[index].val))) + elseif code == ITestChar then + io.write(("'%s'"):format(string.char(p[index].val))) + printjmp(p, index) + printrulename(p, index, rulenames) + elseif code == IFullCapture then + printcapkind(band(p[index].val, 0x0f)); + io.write((" (size = %d) (idx = %s)"):format(band(rshift(p[index].val, 4), 0xF), tostring(valuetable[p[index].offset]))) + elseif code == IOpenCapture then + printcapkind(band(p[index].val, 0x0f)) + io.write((" (idx = %s)"):format(tostring(valuetable[p[index].offset]))) + elseif code == ISet then + printcharset(valuetable[p[index].val]); + elseif code == ITestSet then + printcharset(valuetable[p[index].val]) + printjmp(p, index); + printrulename(p, index, rulenames) + elseif code == ISpan then + printcharset(valuetable[p[index].val]); + elseif code == IOpenCall then + io.write(("-> %d"):format(p[index].offset)) + elseif code == IBehind then + io.write(("%d"):format(p[index].val)) + elseif code == IJmp or code == ICall or code == ICommit or code == IChoice or + code == IPartialCommit or code == IBackCommit or code == ITestAny then + printjmp(p, index); + if (code == ICall or code == IJmp) and p[index].aux > 0 then + io.write(' ', valuetable[p[index].aux]) + else + printrulename(p, index, rulenames) + end + end + io.write("\n") +end + + +local function printpatt(p, valuetable) + local ruleNames = {} + for i = 0, p.size - 1 do + local code = p.p[i].code + if (code == ICall or code == IJmp) and p.p[i].aux > 0 then + local index = i + p.p[i].offset + ruleNames[index] = valuetable[p.p[i].aux] + end + end + for i = 0, p.size - 1 do + printinst(p.p, i, valuetable, ruleNames) + end +end + + +local function printcap(cap, index, valuetable) + printcapkind(cap[index].kind) + io.write((" (idx: %s - size: %d) -> %d\n"):format(valuetable[cap[index].idx], cap[index].siz, cap[index].s)) +end + + +local function printcaplist(cap, limit, valuetable) + io.write(">======\n") + local index = 0 + while cap[index].s and index < limit do + printcap(cap, index, valuetable) + index = index + 1 + end + io.write("=======\n") +end + +-- ====================================================== + + + +-- {====================================================== +-- Printing trees (for debugging) +-- ======================================================= + +local tagnames = { + [TChar] = "char", + [TSet] = "set", + [TAny] = "any", + [TTrue] = "true", + [TFalse] = "false", + [TRep] = "rep", + [TSeq] = "seq", + [TChoice] = "choice", + [TNot] = "not", + [TAnd] = "and", + [TCall] = "call", + [TOpenCall] = "opencall", + [TRule] = "rule", + [TGrammar] = "grammar", + [TBehind] = "behind", + [TCapture] = "capture", + [TRunTime] = "run-time" +} + + +local function printtree(tree, ident, index, valuetable) + for i = 1, ident do + io.write(" ") + end + local tag = tree[index].tag + io.write(("%s"):format(tagnames[tag])) + if tag == TChar then + local c = tree[index].val + if ffi.C.isprint(c) then + io.write((" '%c'\n"):format(c)) + else + io.write((" (%02X)\n"):format(c)) + end + elseif tag == TSet then + printcharset(valuetable[tree[index].val]); + io.write("\n") + elseif tag == TOpenCall or tag == TCall then + io.write((" key: %s\n"):format(tostring(valuetable[tree[index].val]))) + elseif tag == TBehind then + io.write((" %d\n"):format(tree[index].val)) + printtree(tree, ident + 2, index + 1, valuetable); + elseif tag == TCapture then + io.write((" cap: %s n: %s\n"):format(modes[bit.band(tree[index].cap, 0xffff)], valuetable[tree[index].val])) + printtree(tree, ident + 2, index + 1, valuetable); + elseif tag == TRule then + local extra = bit.band(tree[index].cap, RuleLR) == RuleLR and ' left recursive' or '' + extra = extra .. (bit.band(tree[index].cap, Ruleused) ~= Ruleused and ' not used' or '') + io.write((" n: %d key: %s%s\n"):format(bit.band(tree[index].cap, 0xffff) - 1, valuetable[tree[index].val], extra)) + printtree(tree, ident + 2, index + 1, valuetable); + -- do not print next rule as a sibling + elseif tag == TGrammar then + local ruleindex = index + 1 + io.write((" %d\n"):format(tree[index].val)) -- number of rules + for i = 1, tree[index].val do + printtree(tree, ident + 2, ruleindex, valuetable); + ruleindex = ruleindex + tree[ruleindex].ps + end + assert(tree[ruleindex].tag == TTrue); -- sentinel + else + local sibs = numsiblings[tree[index].tag] or 0 + io.write("\n") + if sibs >= 1 then + printtree(tree, ident + 2, index + 1, valuetable); + if sibs >= 2 then + printtree(tree, ident + 2, index + tree[index].ps, valuetable) + end + end + end +end + +-- }====================================================== */ + +return { + printtree = printtree, + printpatt = printpatt, + printcaplist = printcaplist, + printinst = printinst +} \ No newline at end of file diff --git a/tools/lpeg/lpvm.lua b/tools/lpeg/lpvm.lua new file mode 100644 index 0000000..1a86dc4 --- /dev/null +++ b/tools/lpeg/lpvm.lua @@ -0,0 +1,1041 @@ +--[[ +LPEGLJ +lpvm.lua +Virtual machine +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +local ffi = require "ffi" +local lpcap = require "lpcap" +--[[ Only for debug purpose +local lpprint = require"lpprint" +--]] + +local band, rshift, lshift = bit.band, bit.rshift, bit.lshift + +-- {====================================================== +-- Virtual Machine +-- ======================================================= + +-- Interpret the result of a dynamic capture: false -> fail; +-- true -> keep current position; number -> next position. +-- Return new subject position. 'fr' is stack index where +-- is the result; 'curr' is current subject position; 'limit' +-- is subject's size. + +local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate +local MAXOFF = 0xF -- maximum for full capture +local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos +local INITBACK = 400 -- default maximum size for call/backtrack stack + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local BCapcandelete = 0x30000 +local maxstack = INITBACK +local maxcapturedefault = 100 +local maxmemo = 1000 +local usememoization = false +local trace = false + +local FAIL = -1 +local LRFAIL = -1 +local VOID = -2 +local CHOICE = -3 +local CALL = -4 + +ffi.cdef [[ +typedef struct { + int code; + int val; + int offset; + int aux; + } PATTERN_ELEMENT; +typedef struct { + int allocsize; + int size; + PATTERN_ELEMENT *p; + } PATTERN; +typedef struct { + int tag; + int val; + int ps; + int cap; + } TREEPATTERN_ELEMENT; +typedef struct { + int id; + int treesize; + PATTERN *code; + TREEPATTERN_ELEMENT p[?]; + } TREEPATTERN; + +typedef struct { + double s; + double X; + double memos; + int p; + int caplevel; + int pA; + int valuetabletop; + } STACK; + +typedef struct { + double s; + int siz; + int idx; + int kind; + int candelete; + } CAPTURE; + +void *malloc( size_t size ); +void free( void *memblock ); +void *realloc( void *memblock, size_t size ); +]] + +local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT') +local treepattern = ffi.typeof('TREEPATTERN') +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') + +local function resdyncaptures(fr, curr, limit, checkstreamlen) + local typ = type(fr) + -- false value? + if not fr then + return FAIL -- and fail + elseif typ == 'boolean' then + -- true? + return curr -- keep current position + else + local res = fr -- new position + if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then + error("invalid position returned by match-time capture", 0) + end + return res + end + assert(false) +end + + +-- Add capture values returned by a dynamic capture to the capture list +-- 'base', nested inside a group capture. 'fd' indexes the first capture +-- value, 'n' is the number of values (at least 1). + +local function adddyncaptures(s, base, index, n, fd, valuetable) + -- Cgroup capture is already there + assert(base[index].kind == Cgroup and base[index].siz == 0) + base[index].idx = 0 -- make it an anonymous group + base[index + 1] = {} + -- add runtime captures + for i = 1, n do + base[index + i].kind = Cruntime + base[index + i].siz = 1 -- mark it as closed + local ind = #valuetable + 1 + valuetable[ind] = fd[i + 1] + base[index + i].idx = ind -- stack index of capture value + base[index + i].s = s + base[index + i + 1] = {} + end + base[index + n + 1].kind = Cclose -- close group + base[index + n + 1].siz = 1 + base[index + n + 1].s = s + base[index + n + 2] = {} +end + + +-- Opcode interpreter + +local function match(stream, last, o, s, op, valuetable, ...) + local arg = { ... } + local argcount = select('#', ...) + local len = #o + local ptr = ffi.cast('const unsigned char*', o) + s = s - 1 + local stackptr = 0 -- point to first empty slot in stack + local captop = 0 -- point to first empty slot in captures + local STACK = ffi.new("STACK[?]", INITBACK) + local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault) + local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } } + local capturestackptr = #CAPTURESTACK + local maxcapture = maxcapturedefault + local stacklimit = INITBACK + local L = {} + local Memo1, Memo2 = {}, {} + local memoind = 0 + local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2)) + local nocapturereleased = true + + local p = 0 -- current instruction + local streambufsize = 2 ^ 8 + local streambufsizemask = streambufsize - 1 -- faster modulo + local streambufs = {} + local streambufoffset = 0 + local streamstartbuffer = 0 + local streambufferscount = 0 + local level = -1 + + local function deletestreambuffers() + local min = s + for i = stackptr - 1, 0, -1 do + local val = STACK[i].s + if val >= 0 then + min = math.min(val, min) + end + end + + for i = captop - 1, 0, -1 do + local val = CAPTURE[i].s + if val >= 0 then + min = math.min(val, min) + end + end + for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do + -- max behind for full capture and max behind for Look-behind predicate + if i + streambufsize + MAXBEHIND < min then + streambufs[i] = nil + streambufferscount = streambufferscount - 1 + else + streamstartbuffer = i - 1 + break + end + end + end + + local function addstreamdata(s, last) + local len = #s + local srcoffset = 0 + if streambufferscount > 128 then + deletestreambuffers() + end + repeat + local offset = bit.band(streambufoffset, streambufsizemask) + if offset > 0 then + local index = streambufoffset - offset + 1 + local count = math.min(len, streambufsize - offset) + ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count) + len = len - count + srcoffset = srcoffset + count + streambufoffset = streambufoffset + count + end + if len > 0 then + local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1 + local buf = ffi.new('unsigned char[?]', streambufsize) + streambufferscount = streambufferscount + 1 + streambufs[index] = buf + local count = math.min(len, streambufsize) + ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count) + len = len - count + srcoffset = srcoffset + count + streambufoffset = streambufoffset + count + end + if streambufoffset >= 2 ^ 47 then + error("too big input stream", 0) + end + until len == 0 + end + + local function getstreamchar(s) + local offset = bit.band(s, streambufsizemask) + local index = s - offset + 1 + return streambufs[index][offset] + end + + local checkstreamlen + + local function getstreamstring(st, en) + -- TODO Optimalize access + local str = {} + local i = st >= 0 and st or 1 + local to = en >= 0 and en or math.huge + while true do + if i > to then break end + if not checkstreamlen(i - 1) then return end + if last and (st < 0 or en < 0) then + for j = i, streambufoffset do + str[#str + 1] = string.char(getstreamchar(j - 1)) + end + en = en < 0 and streambufoffset + en + 1 or en + en = st > 0 and en - st + 1 or en + st = st < 0 and streambufoffset + st + 1 or 1 + return table.concat(str):sub(st, en) + else + str[#str + 1] = string.char(getstreamchar(i - 1)) + i = i + 1 + end + end + return table.concat(str) + end + + function checkstreamlen(index) + local str + while true do + if index < streambufoffset then + return true + else + if last then + s = streambufoffset + return false + end + local max = captop + for i = stackptr - 1, 0, -1 do + local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1 + if val >= 0 then + max = math.min(val, max) + end + end + local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount)) + if n > 0 then + for i = stackptr - 1, 0, -1 do + local val = STACK[i].caplevel + if val > 0 then + STACK[i].caplevel = STACK[i].caplevel - n + end + end + captop = captop - n + end + if outindex > 0 then + nocapturereleased = false + end + str, last = coroutine.yield(1, unpack(out, 1, outindex)) + addstreamdata(str) + end + end + end + + local function doublecapture() + maxcapture = maxcapture * 2 + local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture) + ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop) + CAPTURE = NEWCAPTURE + CAPTURESTACK[capturestackptr].capture = CAPTURE + CAPTURESTACK[capturestackptr].maxcapture = maxcapture + end + + local function pushcapture() + CAPTURE[captop].idx = op.p[p].offset + CAPTURE[captop].kind = band(op.p[p].val, 0x0f) + CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0 + captop = captop + 1 + p = p + 1 + if captop >= maxcapture then + doublecapture() + end + end + + local function traceenter(typ, par) + level = level + (par or 0) + io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux])) + end + + local function traceleave(inst) + io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux])) + level = level - 1 + end + + local function tracematch(typ, start, par, from, to, inst, extra, ...) + local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...) + local capstr = {} + for i = 1, capscount do capstr[i] = tostring(caps[i]) end + extra = extra and '(' .. extra .. ')' or '' + io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra, + o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " "))) + level = level - par + end + + local function fail() + -- pattern failed: try to backtrack + local X + repeat -- remove pending calls + stackptr = stackptr - 1 + if stackptr == -1 then + p = FAIL + return + end + s = STACK[stackptr].s + X = STACK[stackptr].X + if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then + Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL + Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL + end + -- lvar.2 rest + if X == LRFAIL then + CAPTURESTACK[capturestackptr] = nil + capturestackptr = capturestackptr - 1 + CAPTURE = CAPTURESTACK[capturestackptr].capture + maxcapture = CAPTURESTACK[capturestackptr].maxcapture + L[STACK[stackptr].pA + s * maxpointer] = nil + end + if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end + until X == CHOICE or X >= 0 + p = STACK[stackptr].p + for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do + table.remove(valuetable) + end + -- inc.2 + if X >= 0 then + s = X + capturestackptr = capturestackptr - 1 + CAPTURE = CAPTURESTACK[capturestackptr].capture + captop = CAPTURESTACK[capturestackptr].captop + maxcapture = CAPTURESTACK[capturestackptr].maxcapture + local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit + while captop + capture.captop >= maxcapture do + doublecapture() + end + ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) + captop = captop + capture.captop + if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end + CAPTURESTACK[capturestackptr + 1] = nil + L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil + else + captop = STACK[stackptr].caplevel + end + end + + local function doublestack() + if stackptr >= maxstack then + error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0) + end + stacklimit = stacklimit * 2 + stacklimit = (stacklimit > maxstack) and maxstack or stacklimit + local NEWSTACK = ffi.new("STACK[?]", stacklimit) + ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr) + STACK = NEWSTACK + end + + if stream then + addstreamdata(o) + len = nil + o = nil + ptr = nil + end + while true do + --[[ Only for debug + io.write(("s: |%s| stck:%d, caps:%d \n"):format(s + 1, stackptr, captop)) + if p ~= FAIL then + lpprint.printinst(op.p, p, valuetable) + lpprint.printcaplist(CAPTURE, captop, valuetable) + end + --]] + if p == FAIL then return -1 end + local code = op.p[p].code + if code == IEnd then + CAPTURE[captop].kind = Cclose + CAPTURE[captop].s = -1 + return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...) + elseif code == IRet then + if STACK[stackptr - 1].X == CALL then + stackptr = stackptr - 1 + if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end + p = STACK[stackptr].p + if usememoization and STACK[stackptr].memos ~= VOID then + local dif = captop - STACK[stackptr].caplevel + local caps + if dif > 0 then + caps = ffi.new("CAPTURE[?]", dif) + ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE')) + end + local val = { s, dif, caps } + Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val + Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val + end + else + local X = STACK[stackptr - 1].X + -- lvar.1 inc.1 + if X == LRFAIL or s > X then + if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end + STACK[stackptr - 1].X = s + p = STACK[stackptr - 1].pA + s = STACK[stackptr - 1].s + local lambda = L[p + s * maxpointer] + lambda.level = lambda.level + 1 + lambda.X = STACK[stackptr - 1].X + STACK[stackptr - 1].caplevel = captop + STACK[stackptr - 1].valuetabletop = #valuetable + CAPTURESTACK[capturestackptr].captop = captop + lambda.capturecommit = CAPTURESTACK[capturestackptr] + captop = 0 + CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault) + CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } + maxcapture = maxcapturedefault + else + -- inc.3 + stackptr = stackptr - 1 + p = STACK[stackptr].p + s = STACK[stackptr].X + for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do + table.remove(valuetable) + end + local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] + capturestackptr = capturestackptr - 1 + CAPTURE = CAPTURESTACK[capturestackptr].capture + captop = CAPTURESTACK[capturestackptr].captop + maxcapture = CAPTURESTACK[capturestackptr].maxcapture + local capture = lambda.capturecommit + while captop + capture.captop >= maxcapture do + doublecapture() + end + ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) + captop = captop + capture.captop + if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end + CAPTURESTACK[capturestackptr + 1] = nil + L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil + end + end + elseif code == IBehind then + local n = op.p[p].val + if n > s then + fail() + else + s = s - n + p = p + 1 + end + elseif code == IJmp then + if trace and op.p[p].aux ~= 0 then traceenter('TC') end + p = p + op.p[p].offset + elseif code == IChoice then + if stackptr == stacklimit then + doublestack() + end + STACK[stackptr].X = CHOICE + STACK[stackptr].p = p + op.p[p].offset + STACK[stackptr].s = s + STACK[stackptr].caplevel = captop + STACK[stackptr].valuetabletop = #valuetable + stackptr = stackptr + 1 + p = p + 1 + elseif code == ICall then + if stackptr == stacklimit then + doublestack() + end + local k = bit.band(op.p[p].val, 0xffff) + if k == 0 then + local pA = p + op.p[p].offset + local memo = Memo1[pA + s * maxpointer] + if usememoization and memo then + if trace then traceenter('M', 1) end + if memo == FAIL then + if trace then traceleave(p) end + fail() + else + local dif = memo[2] + if dif > 0 then + while captop + dif >= maxcapture do + doublecapture() + end + local caps = memo[3] + ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE')) + captop = captop + dif + end + if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end + s = memo[1] + p = p + 1 + end + else + if trace then traceenter('', 1) end + STACK[stackptr].X = CALL + STACK[stackptr].s = s + STACK[stackptr].p = p + 1 -- save return address + STACK[stackptr].pA = pA + STACK[stackptr].memos = s + STACK[stackptr].caplevel = captop + stackptr = stackptr + 1 + p = pA + if usememoization and not memo then + memoind = memoind + 1 + if memoind > maxmemo then + memoind = 0 + Memo1 = Memo2 + Memo2 = {} + end + end + end + else + local pA = p + op.p[p].offset + local X = L[pA + s * maxpointer] + -- lvar.1 lvar.2 + if not X then + if trace then traceenter('', 1) end + CAPTURESTACK[capturestackptr].captop = captop + local capture = ffi.new("CAPTURE[?]", maxcapturedefault) + capturestackptr = capturestackptr + 1 + CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault } + CAPTURE = capture + maxcapture = maxcapturedefault + captop = 0 + L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 } + STACK[stackptr].p = p + 1 + STACK[stackptr].pA = pA + STACK[stackptr].s = s + STACK[stackptr].X = LRFAIL + stackptr = stackptr + 1 + p = pA + elseif X.X == LRFAIL or k < X.k then + -- lvar.3 lvar.5 + fail() + else + -- lvar.4 + local capture = X.capturecommit + while captop + capture.captop >= maxcapture do + doublecapture() + end + ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) + captop = captop + capture.captop + p = p + 1 + s = X.X + end + end + elseif code == ICommit then + stackptr = stackptr - 1 + p = p + op.p[p].offset + elseif code == IPartialCommit then + STACK[stackptr - 1].s = s + STACK[stackptr - 1].caplevel = captop + STACK[stackptr - 1].valuetabletop = #valuetable + p = p + op.p[p].offset + elseif code == IBackCommit then + stackptr = stackptr - 1 + s = STACK[stackptr].s + captop = STACK[stackptr].caplevel + for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do + table.remove(valuetable) + end + p = p + op.p[p].offset + elseif code == IFailTwice then + stackptr = stackptr - 1 + fail() + elseif code == IFail then + fail() + elseif code == ICloseRunTime then + -- invalidate memo + for i = 0, stackptr - 1 do + STACK[i].memos = VOID + end + local cs = {} + cs.s = o + cs.stream = getstreamstring + cs.ocap = CAPTURE + cs.ptop = arg + cs.ptopcount = argcount + local out = { outindex = 0, out = {} } + local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function + captop = captop - n + local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result + -- fail? + if res == FAIL then + fail() + else + s = res - 1 -- else update current position + n = out.outindex - 1 -- number of new captures + -- any new capture? + if n > 0 then + captop = captop + 1 + while captop + n + 1 >= maxcapture do + doublecapture() + end + captop = captop + n + 1 + -- add new captures to 'capture' list + adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable) + end + p = p + 1 + end + elseif code == ICloseCapture then + local s1 = s + 1 + assert(captop > 0) + -- if possible, turn capture into a full capture + if CAPTURE[captop - 1].siz == 0 and + s1 - CAPTURE[captop - 1].s < 255 then + CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1 + p = p + 1 + else + CAPTURE[captop].siz = 1 + CAPTURE[captop].s = s + 1 + pushcapture() + end + elseif code == IOpenCapture then + CAPTURE[captop].siz = 0 + CAPTURE[captop].s = s + 1 + pushcapture() + elseif code == IFullCapture then + CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size + CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F) + pushcapture() + -- standard mode + elseif o then + if code == IAny then + if s < len then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestAny then + if s < len then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == IChar then + if s < len and ptr[s] == op.p[p].val then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestChar then + if s < len and ptr[s] == op.p[p].val then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISet then + local c = ptr[s] + local set = valuetable[op.p[p].val] + if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestSet then + local c = ptr[s] + local set = valuetable[op.p[p].val] + if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISpan then + while s < len do + local c = ptr[s] + local set = valuetable[op.p[p].val] + if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then + break + end + s = s + 1 + end + p = p + 1 + end + else + -- stream mode + if code == IAny then + if checkstreamlen(s) then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestAny then + if checkstreamlen(s) then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == IChar then + if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestChar then + if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISet then + local c = checkstreamlen(s) and getstreamchar(s) + local set = valuetable[op.p[p].val] + if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestSet then + local c = checkstreamlen(s) and getstreamchar(s) + local set = valuetable[op.p[p].val] + if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISpan then + while checkstreamlen(s) do + local c = getstreamchar(s) + local set = valuetable[op.p[p].val] + if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then + break + end + s = s + 1 + end + p = p + 1 + end + end + end +end + +local function setmax(val) + maxstack = val + if maxstack < INITBACK then + maxstack = INITBACK + end +end + +local function setmaxbehind(val) + MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0) +end + +local function enablememoization(val) + usememoization = val +end + +local function enabletracing(val) + trace = val +end + +-- Get the initial position for the match, interpreting negative +-- values from the end of the subject + +local function initposition(len, pos) + local ii = pos or 1 + -- positive index? + if (ii > 0) then + -- inside the string? + if ii <= len then + return ii - 1; -- return it (corrected to 0-base) + else + return len; -- crop at the end + end + else + -- negative index + -- inside the string? + if -ii <= len then + return len + ii -- return position from the end + else + return 0; -- crop at the beginning + end + end +end + +local function lp_match(pat, s, init, valuetable, ...) + local i = initposition(s:len(), init) + 1 + return select(2, match(false, true, s, i, pat.code, valuetable, ...)) +end + +local function lp_streammatch(pat, init, valuetable, ...) + local params = { ... } + local paramslength = select('#', ...) + local fce = coroutine.wrap(function(s, last) + return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength)) + end) + return fce +end + +local function retcount(...) + return select('#', ...), { ... } +end + +-- Only for testing purpose +-- stream emulation (send all chars from string one char after char) +local function lp_emulatestreammatch(pat, s, init, valuetable, ...) + local init = initposition(s:len(), init) + 1 + local fce = lp_streammatch(pat, init, valuetable, ...) + local ret, count = {}, 0 + for j = 1, #s do + local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char + if pret[1] == -1 then + return -- fail + elseif pret[1] == 0 then + -- parsing finished + -- collect result + for i = 2, pcount do + ret[count + i - 1] = pret[i] + end + count = count + pcount - 1 + return unpack(ret, 1, count) + end + for i = 2, pcount do + ret[count + i - 1] = pret[i] + end + count = count + pcount - 1 + end + return select(2, fce(s, true)) -- empty string +end + +local function lp_load(str, fcetab, usemeta) + local index = 0 + assert(str) + local ptr = ffi.cast('const char*', str) + local patsize = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local len = ffi.sizeof(treepatternelement) * patsize + + local pat + if usemeta then + pat = treepattern(patsize) + else + pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))), + function(ct) + if ct.code ~= nil then + ffi.C.free(ct.code.p) + ffi.C.free(ct.code) + end + ffi.C.free(ct) + end) + ffi.fill(pat, ffi.sizeof(treepattern, patsize)) + pat.treesize = patsize + pat.id = 0 + end + ffi.copy(pat.p, ptr + index, len) + index = index + len + if usemeta then + pat.code = pattern() + else + pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern))) + assert(pat.code ~= nil) + pat.code.allocsize = 10 + pat.code.size = 0 + pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize) + assert(pat.code.p ~= nil) + ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize) + end + pat.code.size = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local len = pat.code.size * ffi.sizeof(patternelement) + local data = ffi.string(ptr + index, len) + index = index + len + local count = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local valuetable = {} + for i = 1, count do + local tag = ffi.string(ptr + index, 3) + index = index + 3 + --string + if tag == 'str' then + local len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local val = ffi.string(ptr + index, len) + index = index + len + valuetable[#valuetable + 1] = val + elseif tag == 'num' then + --number + local len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local val = ffi.string(ptr + index, len) + index = index + len + valuetable[#valuetable + 1] = tonumber(val) + elseif tag == 'cdt' then + --ctype + local val = settype() + ffi.copy(val, ptr + index, ffi.sizeof(settype)) + index = index + ffi.sizeof(settype) + valuetable[#valuetable + 1] = val + elseif tag == 'fnc' then + --function + local len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local fname = ffi.string(ptr + index, len) + index = index + len + len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local val = ffi.string(ptr + index, len) + index = index + len + if fcetab and fcetab[fname] then + assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname)) + valuetable[#valuetable + 1] = fcetab[fname] + else + valuetable[#valuetable + 1] = loadstring(val) + end + end + end + pat.code.allocsize = pat.code.size + pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize) + assert(pat.code.p ~= nil) + ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize) + return pat, valuetable +end + +local function lp_loadfile(fname, fcetab, usemeta) + local file = assert(io.open(fname, 'rb')) + local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta) + file:close() + return pat, valuetable +end + +-- ====================================================== + +return { + match = lp_match, + streammatch = lp_streammatch, + emulatestreammatch = lp_emulatestreammatch, + load = lp_load, + loadfile = lp_loadfile, + setmax = setmax, + setmaxbehind = setmaxbehind, + enablememoization = enablememoization, + enabletracing = enabletracing +} diff --git a/tools/lpeg/re.lua b/tools/lpeg/re.lua new file mode 100644 index 0000000..3d00232 --- /dev/null +++ b/tools/lpeg/re.lua @@ -0,0 +1,286 @@ +-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ +-- 2014/08/15 changes rostislav + +-- imported functions and modules +local tonumber, print, error = tonumber, print, error +local setmetatable = setmetatable +local m = require"lpeglj" + +-- 'm' will be used to parse expressions, and 'mm' will be used to +-- create expressions; that is, 're' runs on 'm', creating patterns +-- on 'mm' +local mm = m + +-- pattern's metatable +local mt = getmetatable(mm.P(0)) +mt = m.version() == "1.0.0.0LJ" and m or mt + + + +-- No more global accesses after this point +local version = _VERSION +if version == "Lua 5.2" then _ENV = nil end + + +local any = m.P(1) + + +-- Pre-defined names +local Predef = { nl = m.P"\n" } + + +local mem +local fmem +local gmem + + +local function updatelocale () + mm.locale(Predef) + Predef.a = Predef.alpha + Predef.c = Predef.cntrl + Predef.d = Predef.digit + Predef.g = Predef.graph + Predef.l = Predef.lower + Predef.p = Predef.punct + Predef.s = Predef.space + Predef.u = Predef.upper + Predef.w = Predef.alnum + Predef.x = Predef.xdigit + Predef.A = any - Predef.a + Predef.C = any - Predef.c + Predef.D = any - Predef.d + Predef.G = any - Predef.g + Predef.L = any - Predef.l + Predef.P = any - Predef.p + Predef.S = any - Predef.s + Predef.U = any - Predef.u + Predef.W = any - Predef.w + Predef.X = any - Predef.x + mem = {} -- restart memoization + fmem = {} + gmem = {} + local mt = {__mode = "v"} + setmetatable(mem, mt) + setmetatable(fmem, mt) + setmetatable(gmem, mt) +end + + +updatelocale() + + + +local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) + + +local function getdef (id, defs) + local c = defs and defs[id] + if not c then error("undefined name: " .. id) end + return c +end + + +local function patt_error (s, i) + local msg = (#s < i + 20) and s:sub(i) + or s:sub(i,i+20) .. "..." + msg = ("pattern error near '%s'"):format(msg) + error(msg, 2) +end + +local function mult (p, n) + local np = mm.P(true) + while n >= 1 do + if n%2 >= 1 then np = np * p end + p = p * p + n = n/2 + end + return np +end + +local function equalcap (s, i, c) + if type(c) ~= "string" then return nil end + local e = #c + i + if type(s) == 'function' then -- stream mode + if s(i, e - 1) == c then return e else return nil end + else + if s:sub(i, e - 1) == c then return e else return nil end + end +end + + +local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 + +local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0 + +local arrow = S * "<-" + +local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1 + +name = m.C(name) + + +-- a defined name only have meaning in a given environment +local Def = name * m.Carg(1) + +local num = m.C(m.R"09"^1) * S / tonumber + +local String = "'" * m.C((any - "'")^0) * "'" + + '"' * m.C((any - '"')^0) * '"' + + +local defined = "%" * Def / function (c,Defs) + local cat = Defs and Defs[c] or Predef[c] + if not cat then error ("name '" .. c .. "' undefined") end + return cat +end + +local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R + +local item = defined + Range + m.C(any) + +local Class = + "[" + * (m.C(m.P"^"^-1)) -- optional complement symbol + * m.Cf(item * (item - "]")^0, mt.__add) / + function (c, p) return c == "^" and any - p or p end + * "]" + +local function adddef (t, k, exp) + if t[k] then + error("'"..k.."' already defined as a rule") + else + t[k] = exp + end + return t +end + +local function firstdef (n, r) return adddef({n}, n, r) end + + +local function NT (n, b, p) + if not b then + error("rule '"..n.."' used outside a grammar") + else return mm.V(n, p or 0) + end +end + + +local exp = m.P{ "Exp", + Exp = S * ( m.V"Grammar" + + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) ); + Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul) + * (#seq_follow + patt_error); + Prefix = "&" * S * m.V"Prefix" / mt.__len + + "!" * S * m.V"Prefix" / mt.__unm + + m.V"Suffix"; + Suffix = m.Cf(m.V"Primary" * S * + ( ( m.P"+" * m.Cc(1, mt.__pow) + + m.P"*" * m.Cc(0, mt.__pow) + + m.P"?" * m.Cc(-1, mt.__pow) + + "^" * ( m.Cg(num * m.Cc(mult)) + + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow)) + ) + + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div)) + + m.P"{}" * m.Cc(nil, m.Ct) + + m.Cg(Def / getdef * m.Cc(mt.__div)) + ) + + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt)) + ) * S + )^0, function (a,b,f) return f(a,b) end ); + Primary = "(" * m.V"Exp" * ")" + + String / mm.P + + Class + + defined + + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" / + function (n, p) return mm.Cg(p, n) end + + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end + + m.P"{}" / mm.Cp + + "{~" * m.V"Exp" * "~}" / mm.Cs + + "{|" * m.V"Exp" * "|}" / mm.Ct + + "{" * m.V"Exp" * "}" / mm.C + + m.P"." * m.Cc(any) + + (name * m.Cb("G") * (S * ":" * S * num)^-1 * -arrow + "<" * name * m.Cb("G") * (S * ":" * S * num)^-1 * ">") / NT; + Definition = name * arrow * m.V"Exp"; + Grammar = m.Cg(m.Cc(true), "G") * + m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, + adddef) / mm.P +} + +local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error) + + +local function compile (p, defs) + if mm.type(p) == "pattern" then return p end -- already compiled + local cp = pattern:match(p, 1, defs) + if not cp then error("incorrect pattern", 3) end + return cp +end + +local function match (s, p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:match(s, i or 1) +end + +local function streammatch (p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:streammatch(i or 1) +end + +-- Only for testing purpose +local function emulatestreammatch(s, p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:emulatestreammatch(s, i or 1) +end + +local function find (s, p, i) + local cp = fmem[p] + if not cp then + cp = compile(p) / 0 + cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) } + fmem[p] = cp + end + local i, e = cp:match(s, i or 1) + if i then return i, e - 1 + else return i + end +end + +local function gsub (s, p, rep) + local g = gmem[p] or {} -- ensure gmem[p] is not collected while here + gmem[p] = g + local cp = g[rep] + if not cp then + cp = compile(p) + cp = mm.Cs((cp / rep + 1)^0) + g[rep] = cp + end + return cp:match(s) +end + + +-- exported names +local re = { + compile = compile, + match = match, + streammatch = streammatch, + emulatestreammatch = emulatestreammatch, + find = find, + gsub = gsub, + updatelocale = updatelocale, +} + +if version == "Lua 5.1" then _G.re = re end + +return re diff --git a/tools/luajson/json.lua b/tools/luajson/json.lua new file mode 100644 index 0000000..d8e0e13 --- /dev/null +++ b/tools/luajson/json.lua @@ -0,0 +1,25 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +package.path = package.path .. ";lpeg/?.lua" +local decode = require("json.decode") +local encode = require("json.encode") +local util = require("json.util") + +local _G = _G + +local _ENV = nil + +local json = { + _VERSION = "1.3.4", + _DESCRIPTION = "LuaJSON : customizable JSON decoder/encoder", + _COPYRIGHT = "Copyright (c) 2007-2014 Thomas Harning Jr. ", + decode = decode, + encode = encode, + util = util +} + +_G.json = json + +return json diff --git a/tools/luajson/json/decode.lua b/tools/luajson/json/decode.lua new file mode 100644 index 0000000..b2c357c --- /dev/null +++ b/tools/luajson/json/decode.lua @@ -0,0 +1,171 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local lpeg = require("lpeg") + +local error = error +local pcall = pcall + +local jsonutil = require("json.util") +local merge = jsonutil.merge +local util = require("json.decode.util") + +local decode_state = require("json.decode.state") + +local setmetatable, getmetatable = setmetatable, getmetatable +local assert = assert +local ipairs, pairs = ipairs, pairs +local string_char = require("string").char + +local type = type + +local require = require + +local _ENV = nil + +local modulesToLoad = { + "composite", + "strings", + "number", + "others" +} +local loadedModules = { +} + +local json_decode = {} + +json_decode.default = { + unicodeWhitespace = true, + initialObject = false, + nothrow = false +} + +local modes_defined = { "default", "strict", "simple" } + +json_decode.simple = {} + +json_decode.strict = { + unicodeWhitespace = true, + initialObject = true, + nothrow = false +} + +for _,name in ipairs(modulesToLoad) do + local mod = require("json.decode." .. name) + if mod.mergeOptions then + for _, mode in pairs(modes_defined) do + mod.mergeOptions(json_decode[mode], mode) + end + end + loadedModules[#loadedModules + 1] = mod +end + +-- Shift over default into defaultOptions to permit build optimization +local defaultOptions = json_decode.default +json_decode.default = nil + +local function generateDecoder(lexer, options) + -- Marker to permit detection of final end + local marker = {} + local parser = lpeg.Ct((options.ignored * lexer)^0 * lpeg.Cc(marker)) * options.ignored * (lpeg.P(-1) + util.unexpected()) + local decoder = function(data) + local state = decode_state.create(options) + local parsed = parser:match(data) + assert(parsed, "Invalid JSON data") + local i = 0 + while true do + i = i + 1 + local item = parsed[i] + if item == marker then break end + if type(item) == 'function' and item ~= jsonutil.undefined and item ~= jsonutil.null then + item(state) + else + state:set_value(item) + end + end + if options.initialObject then + assert(type(state.previous) == 'table', "Initial value not an object or array") + end + -- Make sure stack is empty + assert(state.i == 0, "Unclosed elements present") + return state.previous + end + if options.nothrow then + return function(data) + local status, rv = pcall(decoder, data) + if status then + return rv + else + return nil, rv + end + end + end + return decoder +end + +local function buildDecoder(mode) + mode = mode and merge({}, defaultOptions, mode) or defaultOptions + for _, mod in ipairs(loadedModules) do + if mod.mergeOptions then + mod.mergeOptions(mode) + end + end + local ignored = mode.unicodeWhitespace and util.unicode_ignored or util.ascii_ignored + -- Store 'ignored' in the global options table + mode.ignored = ignored + + --local grammar = { + -- [1] = mode.initialObject and (ignored * (object_type + array_type)) or value_type + --} + local lexer + for _, mod in ipairs(loadedModules) do + local new_lexer = mod.generateLexer(mode) + lexer = lexer and lexer + new_lexer or new_lexer + end + return generateDecoder(lexer, mode) +end + +-- Since 'default' is nil, we cannot take map it +local defaultDecoder = buildDecoder(json_decode.default) +local prebuilt_decoders = {} +for _, mode in pairs(modes_defined) do + if json_decode[mode] ~= nil then + prebuilt_decoders[json_decode[mode]] = buildDecoder(json_decode[mode]) + end +end + +--[[ +Options: + number => number decode options + string => string decode options + array => array decode options + object => object decode options + initialObject => whether or not to require the initial object to be a table/array + allowUndefined => whether or not to allow undefined values +]] +local function getDecoder(mode) + mode = mode == true and json_decode.strict or mode or json_decode.default + local decoder = mode == nil and defaultDecoder or prebuilt_decoders[mode] + if decoder then + return decoder + end + return buildDecoder(mode) +end + +local function decode(data, mode) + local decoder = getDecoder(mode) + return decoder(data) +end + +local mt = {} +mt.__call = function(self, ...) + return decode(...) +end + +json_decode.getDecoder = getDecoder +json_decode.decode = decode +json_decode.util = util +setmetatable(json_decode, mt) + +return json_decode diff --git a/tools/luajson/json/decode/composite.lua b/tools/luajson/json/decode/composite.lua new file mode 100644 index 0000000..cd9c289 --- /dev/null +++ b/tools/luajson/json/decode/composite.lua @@ -0,0 +1,190 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local pairs = pairs +local type = type + +local lpeg = require("lpeg") + +local util = require("json.decode.util") +local jsonutil = require("json.util") + +local rawset = rawset + +local assert = assert +local tostring = tostring + +local error = error +local getmetatable = getmetatable + +local _ENV = nil + +local defaultOptions = { + array = { + trailingComma = true + }, + object = { + trailingComma = true, + number = true, + identifier = true, + setObjectKey = rawset + }, + calls = { + defs = nil, + -- By default, do not allow undefined calls to be de-serialized as call objects + allowUndefined = false + } +} + +local modeOptions = { + default = nil, + strict = { + array = { + trailingComma = false + }, + object = { + trailingComma = false, + number = false, + identifier = false + } + } +} + +local function BEGIN_ARRAY(state) + state:push() + state:new_array() +end +local function END_ARRAY(state) + state:end_array() + state:pop() +end + +local function BEGIN_OBJECT(state) + state:push() + state:new_object() +end +local function END_OBJECT(state) + state:end_object() + state:pop() +end + +local function END_CALL(state) + state:end_call() + state:pop() +end + +local function SET_KEY(state) + state:set_key() +end + +local function NEXT_VALUE(state) + state:put_value() +end + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, true, 'array', defaultOptions, mode and modeOptions[mode]) + jsonutil.doOptionMerge(options, true, 'object', defaultOptions, mode and modeOptions[mode]) + jsonutil.doOptionMerge(options, true, 'calls', defaultOptions, mode and modeOptions[mode]) +end + + +local isPattern +if lpeg.type then + function isPattern(value) + return lpeg.type(value) == 'pattern' + end +else + local metaAdd = getmetatable(lpeg.P("")).__add + function isPattern(value) + return getmetatable(value).__add == metaAdd + end +end + + +local function generateSingleCallLexer(name, func) + if type(name) ~= 'string' and not isPattern(name) then + error("Invalid functionCalls name: " .. tostring(name) .. " not a string or LPEG pattern") + end + -- Allow boolean or function to match up w/ encoding permissions + if type(func) ~= 'boolean' and type(func) ~= 'function' then + error("Invalid functionCalls item: " .. name .. " not a function") + end + local function buildCallCapture(name) + return function(state) + if func == false then + error("Function call on '" .. name .. "' not permitted") + end + state:push() + state:new_call(name, func) + end + end + local nameCallCapture + if type(name) == 'string' then + nameCallCapture = lpeg.P(name .. "(") * lpeg.Cc(name) / buildCallCapture + else + -- Name matcher expected to produce a capture + nameCallCapture = name * "(" / buildCallCapture + end + -- Call func over nameCallCapture and value to permit function receiving name + return nameCallCapture +end + +local function generateNamedCallLexers(options) + if not options.calls or not options.calls.defs then + return + end + local callCapture + for name, func in pairs(options.calls.defs) do + local newCapture = generateSingleCallLexer(name, func) + if not callCapture then + callCapture = newCapture + else + callCapture = callCapture + newCapture + end + end + return callCapture +end + +local function generateCallLexer(options) + local lexer + local namedCapture = generateNamedCallLexers(options) + if options.calls and options.calls.allowUndefined then + lexer = generateSingleCallLexer(lpeg.C(util.identifier), true) + end + if namedCapture then + lexer = lexer and lexer + namedCapture or namedCapture + end + if lexer then + lexer = lexer + lpeg.P(")") * lpeg.Cc(END_CALL) + end + return lexer +end + +local function generateLexer(options) + local ignored = options.ignored + local array_options, object_options = options.array, options.object + local lexer = + lpeg.P("[") * lpeg.Cc(BEGIN_ARRAY) + + lpeg.P("]") * lpeg.Cc(END_ARRAY) + + lpeg.P("{") * lpeg.Cc(BEGIN_OBJECT) + + lpeg.P("}") * lpeg.Cc(END_OBJECT) + + lpeg.P(":") * lpeg.Cc(SET_KEY) + + lpeg.P(",") * lpeg.Cc(NEXT_VALUE) + if object_options.identifier then + -- Add identifier match w/ validation check that it is in key + lexer = lexer + lpeg.C(util.identifier) * ignored * lpeg.P(":") * lpeg.Cc(SET_KEY) + end + local callLexers = generateCallLexer(options) + if callLexers then + lexer = lexer + callLexers + end + return lexer +end + +local composite = { + mergeOptions = mergeOptions, + generateLexer = generateLexer +} + +return composite diff --git a/tools/luajson/json/decode/number.lua b/tools/luajson/json/decode/number.lua new file mode 100644 index 0000000..94ed3b8 --- /dev/null +++ b/tools/luajson/json/decode/number.lua @@ -0,0 +1,100 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local lpeg = require("lpeg") +local tonumber = tonumber +local jsonutil = require("json.util") +local merge = jsonutil.merge +local util = require("json.decode.util") + +local _ENV = nil + +local digit = lpeg.R("09") +local digits = digit^1 + +-- Illegal octal declaration +local illegal_octal_detect = #(lpeg.P('0') * digits) * util.denied("Octal numbers") + +local int = (lpeg.P('-') + 0) * (lpeg.R("19") * digits + illegal_octal_detect + digit) + +local frac = lpeg.P('.') * digits + +local exp = lpeg.S("Ee") * (lpeg.S("-+") + 0) * digits + +local nan = lpeg.S("Nn") * lpeg.S("Aa") * lpeg.S("Nn") +local inf = lpeg.S("Ii") * lpeg.P("nfinity") +local ninf = lpeg.P('-') * lpeg.S("Ii") * lpeg.P("nfinity") +local hex = (lpeg.P("0x") + lpeg.P("0X")) * lpeg.R("09","AF","af")^1 + +local defaultOptions = { + nan = true, + inf = true, + frac = true, + exp = true, + hex = false +} + +local modeOptions = {} + +modeOptions.strict = { + nan = false, + inf = false +} + +local nan_value = 0/0 +local inf_value = 1/0 +local ninf_value = -1/0 + +--[[ + Options: configuration options for number rules + nan: match NaN + inf: match Infinity + frac: match fraction portion (.0) + exp: match exponent portion (e1) + DEFAULT: nan, inf, frac, exp +]] +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'number', defaultOptions, mode and modeOptions[mode]) +end + +local function generateLexer(options) + options = options.number + local ret = int + if options.frac then + ret = ret * (frac + 0) + else + ret = ret * (#frac * util.denied("Fractions", "number.frac") + 0) + end + if options.exp then + ret = ret * (exp + 0) + else + ret = ret * (#exp * util.denied("Exponents", "number.exp") + 0) + end + if options.hex then + ret = hex + ret + else + ret = #hex * util.denied("Hexadecimal", "number.hex") + ret + end + -- Capture number now + ret = ret / tonumber + if options.nan then + ret = ret + nan / function() return nan_value end + else + ret = ret + #nan * util.denied("NaN", "number.nan") + end + if options.inf then + ret = ret + ninf / function() return ninf_value end + inf / function() return inf_value end + else + ret = ret + (#ninf + #inf) * util.denied("+/-Inf", "number.inf") + end + return ret +end + +local number = { + int = int, + mergeOptions = mergeOptions, + generateLexer = generateLexer +} + +return number diff --git a/tools/luajson/json/decode/others.lua b/tools/luajson/json/decode/others.lua new file mode 100644 index 0000000..9fab7a8 --- /dev/null +++ b/tools/luajson/json/decode/others.lua @@ -0,0 +1,62 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local lpeg = require("lpeg") +local jsonutil = require("json.util") +local merge = jsonutil.merge +local util = require("json.decode.util") + +-- Container module for other JavaScript types (bool, null, undefined) + +local _ENV = nil + +-- For null and undefined, use the util.null value to preserve null-ness +local booleanCapture = + lpeg.P("true") * lpeg.Cc(true) + + lpeg.P("false") * lpeg.Cc(false) + +local nullCapture = lpeg.P("null") +local undefinedCapture = lpeg.P("undefined") + +local defaultOptions = { + allowUndefined = true, + null = jsonutil.null, + undefined = jsonutil.undefined +} + +local modeOptions = {} + +modeOptions.simple = { + null = false, -- Mapped to nil + undefined = false -- Mapped to nil +} +modeOptions.strict = { + allowUndefined = false +} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'others', defaultOptions, mode and modeOptions[mode]) +end + +local function generateLexer(options) + -- The 'or nil' clause allows false to map to a nil value since 'nil' cannot be merged + options = options.others + local valueCapture = ( + booleanCapture + + nullCapture * lpeg.Cc(options.null or nil) + ) + if options.allowUndefined then + valueCapture = valueCapture + undefinedCapture * lpeg.Cc(options.undefined or nil) + else + valueCapture = valueCapture + #undefinedCapture * util.denied("undefined", "others.allowUndefined") + end + return valueCapture +end + +local others = { + mergeOptions = mergeOptions, + generateLexer = generateLexer +} + +return others diff --git a/tools/luajson/json/decode/state.lua b/tools/luajson/json/decode/state.lua new file mode 100644 index 0000000..693d5df --- /dev/null +++ b/tools/luajson/json/decode/state.lua @@ -0,0 +1,189 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] + +local setmetatable = setmetatable +local jsonutil = require("json.util") +local assert = assert +local type = type +local next = next +local unpack = require("table").unpack or unpack + +local _ENV = nil + +local state_ops = {} +local state_mt = { + __index = state_ops +} + +function state_ops.pop(self) + self.previous_set = true + self.previous = self.active + local i = self.i + -- Load in this array into the active item + self.active = self.stack[i] + self.active_state = self.state_stack[i] + self.active_key = self.key_stack[i] + self.stack[i] = nil + self.state_stack[i] = nil + self.key_stack[i] = nil + + self.i = i - 1 +end + +function state_ops.push(self) + local i = self.i + 1 + self.i = i + + self.stack[i] = self.active + self.state_stack[i] = self.active_state + self.key_stack[i] = self.active_key +end + +function state_ops.put_object_value(self, trailing) + local object_options = self.options.object + if trailing and object_options.trailingComma then + if not self.active_key then + return + end + end + assert(self.active_key, "Missing key value") + object_options.setObjectKey(self.active, self.active_key, self:grab_value()) + self.active_key = nil +end + +function state_ops.put_array_value(self, trailing) + -- Safety check + if trailing and not self.previous_set and self.options.array.trailingComma then + return + end + local new_index = self.active_state + 1 + self.active_state = new_index + self.active[new_index] = self:grab_value() +end + +function state_ops.put_value(self, trailing) + if self.active_state == 'object' then + self:put_object_value(trailing) + else + self:put_array_value(trailing) + end +end + +function state_ops.new_array(self) + local new_array = {} + if jsonutil.InitArray then + new_array = jsonutil.InitArray(new_array) or new_array + end + self.active = new_array + self.active_state = 0 + self.active_key = nil + self:unset_value() +end + +function state_ops.end_array(self) + if self.previous_set or self.active_state ~= 0 then + -- Not an empty array + self:put_value(true) + end + if self.active_state ~= #self.active then + -- Store the length in + self.active.n = self.active_state + end +end + +function state_ops.new_object(self) + local new_object = {} + self.active = new_object + self.active_state = 'object' + self.active_key = nil + self:unset_value() +end + +function state_ops.end_object(self) + if self.previous_set or next(self.active) then + -- Not an empty object + self:put_value(true) + end +end + +function state_ops.new_call(self, name, func) + -- TODO setup properly + local new_call = {} + new_call.name = name + new_call.func = func + self.active = new_call + self.active_state = 0 + self.active_key = nil + self:unset_value() +end + +function state_ops.end_call(self) + if self.previous_set or self.active_state ~= 0 then + -- Not an empty array + self:put_value(true) + end + if self.active_state ~= #self.active then + -- Store the length in + self.active.n = self.active_state + end + local func = self.active.func + if func == true then + func = jsonutil.buildCall + end + self.active = func(self.active.name, unpack(self.active, 1, self.active.n or #self.active)) +end + + +function state_ops.unset_value(self) + self.previous_set = false + self.previous = nil +end + +function state_ops.grab_value(self) + assert(self.previous_set, "Previous value not set") + self.previous_set = false + return self.previous +end + +function state_ops.set_value(self, value) + assert(not self.previous_set, "Value set when one already in slot") + self.previous_set = true + self.previous = value +end + +function state_ops.set_key(self) + assert(self.active_state == 'object', "Cannot set key on array") + local value = self:grab_value() + local value_type = type(value) + if self.options.object.number then + assert(value_type == 'string' or value_type == 'number', "As configured, a key must be a number or string") + else + assert(value_type == 'string', "As configured, a key must be a string") + end + self.active_key = value +end + + +local function create(options) + local ret = { + options = options, + stack = {}, + state_stack = {}, + key_stack = {}, + i = 0, + active = nil, + active_key = nil, + previous = nil, + active_state = nil + + } + return setmetatable(ret, state_mt) +end + +local state = { + create = create +} + +return state diff --git a/tools/luajson/json/decode/strings.lua b/tools/luajson/json/decode/strings.lua new file mode 100644 index 0000000..4272f29 --- /dev/null +++ b/tools/luajson/json/decode/strings.lua @@ -0,0 +1,133 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local lpeg = require("lpeg") +local jsonutil = require("json.util") +local util = require("json.decode.util") +local merge = jsonutil.merge + +local tonumber = tonumber +local string_char = require("string").char +local floor = require("math").floor +local table_concat = require("table").concat + +local error = error + +local _ENV = nil + +local function get_error(item) + local fmt_string = item .. " in string [%q] @ %i:%i" + return lpeg.P(function(data, index) + local line, line_index, bad_char, last_line = util.get_invalid_character_info(data, index) + local err = fmt_string:format(bad_char, line, line_index) + error(err) + end) * 1 +end + +local bad_unicode = get_error("Illegal unicode escape") +local bad_hex = get_error("Illegal hex escape") +local bad_character = get_error("Illegal character") +local bad_escape = get_error("Illegal escape") + +local knownReplacements = { + ["'"] = "'", + ['"'] = '"', + ['\\'] = '\\', + ['/'] = '/', + b = '\b', + f = '\f', + n = '\n', + r = '\r', + t = '\t', + v = '\v', + z = '\z' +} + +-- according to the table at http://da.wikipedia.org/wiki/UTF-8 +local function utf8DecodeUnicode(code1, code2) + code1, code2 = tonumber(code1, 16), tonumber(code2, 16) + if code1 == 0 and code2 < 0x80 then + return string_char(code2) + end + if code1 < 0x08 then + return string_char( + 0xC0 + code1 * 4 + floor(code2 / 64), + 0x80 + code2 % 64) + end + return string_char( + 0xE0 + floor(code1 / 16), + 0x80 + (code1 % 16) * 4 + floor(code2 / 64), + 0x80 + code2 % 64) +end + +local function decodeX(code) + code = tonumber(code, 16) + return string_char(code) +end + +local doSimpleSub = lpeg.C(lpeg.S("'\"\\/bfnrtvz")) / knownReplacements +local doUniSub = lpeg.P('u') * (lpeg.C(util.hexpair) * lpeg.C(util.hexpair) + bad_unicode) +local doXSub = lpeg.P('x') * (lpeg.C(util.hexpair) + bad_hex) + +local defaultOptions = { + badChars = '', + additionalEscapes = false, -- disallow untranslated escapes + escapeCheck = #lpeg.S('bfnrtv/\\"xu\'z'), -- no check on valid characters + decodeUnicode = utf8DecodeUnicode, + strict_quotes = false +} + +local modeOptions = {} + +modeOptions.strict = { + badChars = '\b\f\n\r\t\v', + additionalEscapes = false, -- no additional escapes + escapeCheck = #lpeg.S('bfnrtv/\\"u'), --only these chars are allowed to be escaped + strict_quotes = true +} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'strings', defaultOptions, mode and modeOptions[mode]) +end + +local function buildCaptureString(quote, badChars, escapeMatch) + local captureChar = (1 - lpeg.S("\\" .. badChars .. quote)) + (lpeg.P("\\") / "" * escapeMatch) + -- During error, force end + local captureString = captureChar^0 + (-#lpeg.P(quote) * bad_character + -1) + return lpeg.P(quote) * lpeg.Cs(captureString) * lpeg.P(quote) +end + +local function generateLexer(options) + options = options.strings + local quotes = { '"' } + if not options.strict_quotes then + quotes[#quotes + 1] = "'" + end + local escapeMatch = doSimpleSub + escapeMatch = escapeMatch + doXSub / decodeX + escapeMatch = escapeMatch + doUniSub / options.decodeUnicode + if options.escapeCheck then + escapeMatch = options.escapeCheck * escapeMatch + bad_escape + end + if options.additionalEscapes then + escapeMatch = options.additionalEscapes + escapeMatch + end + local captureString + for i = 1, #quotes do + local cap = buildCaptureString(quotes[i], options.badChars, escapeMatch) + if captureString == nil then + captureString = cap + else + captureString = captureString + cap + end + end + return captureString +end + +local strings = { + mergeOptions = mergeOptions, + generateLexer = generateLexer +} + +return strings diff --git a/tools/luajson/json/decode/util.lua b/tools/luajson/json/decode/util.lua new file mode 100644 index 0000000..2493bf3 --- /dev/null +++ b/tools/luajson/json/decode/util.lua @@ -0,0 +1,121 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local lpeg = require("lpeg") +local select = select +local pairs, ipairs = pairs, ipairs +local tonumber = tonumber +local string_char = require("string").char +local rawset = rawset +local jsonutil = require("json.util") + +local error = error +local setmetatable = setmetatable + +local table_concat = require("table").concat + +local merge = require("json.util").merge + +local _ENV = nil + +local function get_invalid_character_info(input, index) + local parsed = input:sub(1, index) + local bad_character = input:sub(index, index) + local _, line_number = parsed:gsub('\n',{}) + local last_line = parsed:match("\n([^\n]+.)$") or parsed + return line_number, #last_line, bad_character, last_line +end + +local function build_report(msg) + local fmt = msg:gsub("%%", "%%%%") .. " @ character: %i %i:%i [%s] line:\n%s" + return lpeg.P(function(data, pos) + local line, line_index, bad_char, last_line = get_invalid_character_info(data, pos) + local text = fmt:format(pos, line, line_index, bad_char, last_line) + error(text) + end) * 1 +end +local function unexpected() + local msg = "unexpected character" + return build_report(msg) +end +local function denied(item, option) + local msg + if option then + msg = ("'%s' denied by option set '%s'"):format(item, option) + else + msg = ("'%s' denied"):format(item) + end + return build_report(msg) +end + +-- 09, 0A, 0B, 0C, 0D, 20 +local ascii_space = lpeg.S("\t\n\v\f\r ") +local unicode_space +do + local chr = string_char + local u_space = ascii_space + -- \u0085 \u00A0 + u_space = u_space + lpeg.P(chr(0xC2)) * lpeg.S(chr(0x85) .. chr(0xA0)) + -- \u1680 \u180E + u_space = u_space + lpeg.P(chr(0xE1)) * (lpeg.P(chr(0x9A, 0x80)) + chr(0xA0, 0x8E)) + -- \u2000 - \u200A, also 200B + local spacing_end = "" + for i = 0x80,0x8b do + spacing_end = spacing_end .. chr(i) + end + -- \u2028 \u2029 \u202F + spacing_end = spacing_end .. chr(0xA8) .. chr(0xA9) .. chr(0xAF) + u_space = u_space + lpeg.P(chr(0xE2, 0x80)) * lpeg.S(spacing_end) + -- \u205F + u_space = u_space + lpeg.P(chr(0xE2, 0x81, 0x9F)) + -- \u3000 + u_space = u_space + lpeg.P(chr(0xE3, 0x80, 0x80)) + -- BOM \uFEFF + u_space = u_space + lpeg.P(chr(0xEF, 0xBB, 0xBF)) + unicode_space = u_space +end + +local identifier = lpeg.R("AZ","az","__") * lpeg.R("AZ","az", "__", "09") ^0 + +local hex = lpeg.R("09","AF","af") +local hexpair = hex * hex + +local comments = { + cpp = lpeg.P("//") * (1 - lpeg.P("\n"))^0 * lpeg.P("\n"), + c = lpeg.P("/*") * (1 - lpeg.P("*/"))^0 * lpeg.P("*/") +} + +local comment = comments.cpp + comments.c + +local ascii_ignored = (ascii_space + comment)^0 + +local unicode_ignored = (unicode_space + comment)^0 + +-- Parse the lpeg version skipping patch-values +-- LPEG <= 0.7 have no version value... so 0.7 is value +local DecimalLpegVersion = lpeg.version and tonumber(lpeg.version():match("^(%d+%.%d+)")) or 0.7 + +local function setObjectKeyForceNumber(t, key, value) + key = tonumber(key) or key + return rawset(t, key, value) +end + +local util = { + unexpected = unexpected, + denied = denied, + ascii_space = ascii_space, + unicode_space = unicode_space, + identifier = identifier, + hex = hex, + hexpair = hexpair, + comments = comments, + comment = comment, + ascii_ignored = ascii_ignored, + unicode_ignored = unicode_ignored, + DecimalLpegVersion = DecimalLpegVersion, + get_invalid_character_info = get_invalid_character_info, + setObjectKeyForceNumber = setObjectKeyForceNumber +} + +return util diff --git a/tools/luajson/json/encode.lua b/tools/luajson/json/encode.lua new file mode 100644 index 0000000..5a13adc --- /dev/null +++ b/tools/luajson/json/encode.lua @@ -0,0 +1,161 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local type = type +local assert, error = assert, error +local getmetatable, setmetatable = getmetatable, setmetatable + +local ipairs, pairs = ipairs, pairs +local require = require + +local output = require("json.encode.output") + +local util = require("json.util") +local util_merge, isCall = util.merge, util.isCall + +local _ENV = nil + +--[[ + List of encoding modules to load. + Loaded in sequence such that earlier encoders get priority when + duplicate type-handlers exist. +]] +local modulesToLoad = { + "strings", + "number", + "calls", + "others", + "array", + "object" +} +-- Modules that have been loaded +local loadedModules = {} + +local json_encode = {} + +-- Configuration bases for client apps +local modes_defined = { "default", "strict" } + +json_encode.default = {} +json_encode.strict = { + initialObject = true -- Require an object at the root +} + +-- For each module, load it and its defaults +for _,name in ipairs(modulesToLoad) do + local mod = require("json.encode." .. name) + if mod.mergeOptions then + for _, mode in pairs(modes_defined) do + mod.mergeOptions(json_encode[mode], mode) + end + end + loadedModules[name] = mod +end + +-- NOTE: Nested not found, so assume unsupported until use case arises +local function flattenOutput(out, value) + assert(type(value) ~= 'table') + out = out or {} + out[#out + 1] = value + return out +end + +-- Prepares the encoding map from the already provided modules and new config +local function prepareEncodeMap(options) + local map = {} + for _, name in ipairs(modulesToLoad) do + local encodermap = loadedModules[name].getEncoder(options[name]) + for valueType, encoderSet in pairs(encodermap) do + map[valueType] = flattenOutput(map[valueType], encoderSet) + end + end + return map +end + +--[[ + Encode a value with a given encoding map and state +]] +local function encodeWithMap(value, map, state, isObjectKey) + local t = type(value) + local encoderList = assert(map[t], "Failed to encode value, unhandled type: " .. t) + for _, encoder in ipairs(encoderList) do + local ret = encoder(value, state, isObjectKey) + if false ~= ret then + return ret + end + end + error("Failed to encode value, encoders for " .. t .. " deny encoding") +end + + +local function getBaseEncoder(options) + local encoderMap = prepareEncodeMap(options) + if options.preProcess then + local preProcess = options.preProcess + return function(value, state, isObjectKey) + local ret = preProcess(value, isObjectKey or false) + if nil ~= ret then + value = ret + end + return encodeWithMap(value, encoderMap, state) + end + end + return function(value, state, isObjectKey) + return encodeWithMap(value, encoderMap, state) + end +end +--[[ + Retreive an initial encoder instance based on provided options + the initial encoder is responsible for initializing state + State has at least these values configured: encode, check_unique, already_encoded +]] +function json_encode.getEncoder(options) + options = options and util_merge({}, json_encode.default, options) or json_encode.default + local encode = getBaseEncoder(options) + + local function initialEncode(value) + if options.initialObject then + local errorMessage = "Invalid arguments: expects a JSON Object or Array at the root" + assert(type(value) == 'table' and not isCall(value, options), errorMessage) + end + + local alreadyEncoded = {} + local function check_unique(value) + assert(not alreadyEncoded[value], "Recursive encoding of value") + alreadyEncoded[value] = true + end + + local outputEncoder = options.output and options.output() or output.getDefault() + local state = { + encode = encode, + check_unique = check_unique, + already_encoded = alreadyEncoded, -- To unmark encoding when moving up stack + outputEncoder = outputEncoder + } + local ret = encode(value, state) + if nil ~= ret then + return outputEncoder.simple and outputEncoder.simple(ret) or ret + end + end + return initialEncode +end + +-- CONSTRUCT STATE WITH FOLLOWING (at least) +--[[ + encoder + check_unique -- used by inner encoders to make sure value is unique + already_encoded -- used to unmark a value as unique +]] +function json_encode.encode(data, options) + return json_encode.getEncoder(options)(data) +end + +local mt = {} +mt.__call = function(self, ...) + return json_encode.encode(...) +end + +setmetatable(json_encode, mt) + +return json_encode diff --git a/tools/luajson/json/encode/array.lua b/tools/luajson/json/encode/array.lua new file mode 100644 index 0000000..3744409 --- /dev/null +++ b/tools/luajson/json/encode/array.lua @@ -0,0 +1,110 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local jsonutil = require("json.util") + +local type = type +local pairs = pairs +local assert = assert + +local table = require("table") +local math = require("math") +local table_concat = table.concat +local math_floor, math_modf = math.floor, math.modf + +local jsonutil = require("json.util") +local util_IsArray = jsonutil.IsArray + +local _ENV = nil + +local defaultOptions = { + isArray = util_IsArray +} + +local modeOptions = {} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'array', defaultOptions, mode and modeOptions[mode]) +end + +--[[ + Utility function to determine whether a table is an array or not. + Criteria for it being an array: + * ExternalIsArray returns true (or false directly reports not-array) + * If the table has an 'n' value that is an integer >= 1 then it + is an array... may result in false positives (should check some values + before it) + * It is a contiguous list of values with zero string-based keys +]] +local function isArray(val, options) + local externalIsArray = options and options.isArray + + if externalIsArray then + local ret = externalIsArray(val) + if ret == true or ret == false then + return ret + end + end + -- Use the 'n' element if it's a number + if type(val.n) == 'number' and math_floor(val.n) == val.n and val.n >= 1 then + return true + end + local len = #val + for k,v in pairs(val) do + if type(k) ~= 'number' then + return false + end + local _, decim = math_modf(k) + if not (decim == 0 and 1<=k) then + return false + end + if k > len then -- Use Lua's length as absolute determiner + return false + end + end + + return true +end + +--[[ + Cleanup function to unmark a value as in the encoding process and return + trailing results +]] +local function unmarkAfterEncode(tab, state, ...) + state.already_encoded[tab] = nil + return ... +end +local function getEncoder(options) + options = options and jsonutil.merge({}, defaultOptions, options) or defaultOptions + local function encodeArray(tab, state) + if not isArray(tab, options) then + return false + end + -- Make sure this value hasn't been encoded yet + state.check_unique(tab) + local encode = state.encode + local compositeEncoder = state.outputEncoder.composite + local valueEncoder = [[ + for i = 1, (composite.n or #composite) do + local val = composite[i] + PUTINNER(i ~= 1) + val = encode(val, state) + val = val or '' + if val then + PUTVALUE(val) + end + end + ]] + return unmarkAfterEncode(tab, state, compositeEncoder(valueEncoder, '[', ']', ',', tab, encode, state)) + end + return { table = encodeArray } +end + +local array = { + mergeOptions = mergeOptions, + isArray = isArray, + getEncoder = getEncoder +} + +return array diff --git a/tools/luajson/json/encode/calls.lua b/tools/luajson/json/encode/calls.lua new file mode 100644 index 0000000..11dddfe --- /dev/null +++ b/tools/luajson/json/encode/calls.lua @@ -0,0 +1,68 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local table = require("table") +local table_concat = table.concat + +local select = select +local getmetatable, setmetatable = getmetatable, setmetatable +local assert = assert + +local jsonutil = require("json.util") + +local isCall, decodeCall = jsonutil.isCall, jsonutil.decodeCall + +local _ENV = nil + +local defaultOptions = { +} + +-- No real default-option handling needed... +local modeOptions = {} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'calls', defaultOptions, mode and modeOptions[mode]) +end + + +--[[ + Encodes 'value' as a function call + Must have parameters in the 'callData' field of the metatable + name == name of the function call + parameters == array of parameters to encode +]] +local function getEncoder(options) + options = options and jsonutil.merge({}, defaultOptions, options) or defaultOptions + local function encodeCall(value, state) + if not isCall(value) then + return false + end + local encode = state.encode + local name, params = decodeCall(value) + local compositeEncoder = state.outputEncoder.composite + local valueEncoder = [[ + for i = 1, (composite.n or #composite) do + local val = composite[i] + PUTINNER(i ~= 1) + val = encode(val, state) + val = val or '' + if val then + PUTVALUE(val) + end + end + ]] + return compositeEncoder(valueEncoder, name .. '(', ')', ',', params, encode, state) + end + return { + table = encodeCall, + ['function'] = encodeCall + } +end + +local calls = { + mergeOptions = mergeOptions, + getEncoder = getEncoder +} + +return calls diff --git a/tools/luajson/json/encode/number.lua b/tools/luajson/json/encode/number.lua new file mode 100644 index 0000000..290b440 --- /dev/null +++ b/tools/luajson/json/encode/number.lua @@ -0,0 +1,58 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local tostring = tostring +local assert = assert +local jsonutil = require("json.util") +local huge = require("math").huge + +local _ENV = nil + +local defaultOptions = { + nan = true, + inf = true +} + +local modeOptions = {} +modeOptions.strict = { + nan = false, + inf = false +} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'number', defaultOptions, mode and modeOptions[mode]) +end + + +local function encodeNumber(number, options) + if number ~= number then + assert(options.nan, "Invalid number: NaN not enabled") + return "NaN" + end + if number == huge then + assert(options.inf, "Invalid number: Infinity not enabled") + return "Infinity" + end + if number == -huge then + assert(options.inf, "Invalid number: Infinity not enabled") + return "-Infinity" + end + return tostring(number) +end + +local function getEncoder(options) + options = options and jsonutil.merge({}, defaultOptions, options) or defaultOptions + return { + number = function(number, state) + return encodeNumber(number, options) + end + } +end + +local number = { + mergeOptions = mergeOptions, + getEncoder = getEncoder +} + +return number diff --git a/tools/luajson/json/encode/object.lua b/tools/luajson/json/encode/object.lua new file mode 100644 index 0000000..4716d52 --- /dev/null +++ b/tools/luajson/json/encode/object.lua @@ -0,0 +1,77 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local pairs = pairs +local assert = assert + +local type = type +local tostring = tostring + +local table_concat = require("table").concat +local jsonutil = require("json.util") + +local _ENV = nil + +local defaultOptions = { +} + +local modeOptions = {} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'object', defaultOptions, mode and modeOptions[mode]) +end + +--[[ + Cleanup function to unmark a value as in the encoding process and return + trailing results +]] +local function unmarkAfterEncode(tab, state, ...) + state.already_encoded[tab] = nil + return ... +end +--[[ + Encode a table as a JSON Object ( keys = strings, values = anything else ) +]] +local function encodeTable(tab, options, state) + -- Make sure this value hasn't been encoded yet + state.check_unique(tab) + local encode = state.encode + local compositeEncoder = state.outputEncoder.composite + local valueEncoder = [[ + local first = true + for k, v in pairs(composite) do + local ti = type(k) + assert(ti == 'string' or ti == 'number' or ti == 'boolean', "Invalid object index type: " .. ti) + local name = encode(tostring(k), state, true) + if first then + first = false + else + name = ',' .. name + end + PUTVALUE(name .. ':') + local val = encode(v, state) + val = val or '' + if val then + PUTVALUE(val) + end + end + ]] + return unmarkAfterEncode(tab, state, compositeEncoder(valueEncoder, '{', '}', nil, tab, encode, state)) +end + +local function getEncoder(options) + options = options and jsonutil.merge({}, defaultOptions, options) or defaultOptions + return { + table = function(tab, state) + return encodeTable(tab, options, state) + end + } +end + +local object = { + mergeOptions = mergeOptions, + getEncoder = getEncoder +} + +return object diff --git a/tools/luajson/json/encode/others.lua b/tools/luajson/json/encode/others.lua new file mode 100644 index 0000000..b527044 --- /dev/null +++ b/tools/luajson/json/encode/others.lua @@ -0,0 +1,66 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local tostring = tostring + +local assert = assert +local jsonutil = require("json.util") +local type = type + +local _ENV = nil + +-- Shortcut that works +local encodeBoolean = tostring + +local defaultOptions = { + allowUndefined = true, + null = jsonutil.null, + undefined = jsonutil.undefined +} + +local modeOptions = {} + +modeOptions.strict = { + allowUndefined = false +} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'others', defaultOptions, mode and modeOptions[mode]) +end +local function getEncoder(options) + options = options and jsonutil.merge({}, defaultOptions, options) or defaultOptions + local function encodeOthers(value, state) + if value == options.null then + return 'null' + elseif value == options.undefined then + assert(options.allowUndefined, "Invalid value: Unsupported 'Undefined' parameter") + return 'undefined' + else + return false + end + end + local function encodeBoolean(value, state) + return value and 'true' or 'false' + end + local nullType = type(options.null) + local undefinedType = options.undefined and type(options.undefined) + -- Make sure that all of the types handled here are handled + local ret = { + boolean = encodeBoolean, + ['nil'] = function() return 'null' end, + [nullType] = encodeOthers + } + if undefinedType then + ret[undefinedType] = encodeOthers + end + return ret +end + +local others = { + encodeBoolean = encodeBoolean, + mergeOptions = mergeOptions, + getEncoder = getEncoder +} + +return others diff --git a/tools/luajson/json/encode/output.lua b/tools/luajson/json/encode/output.lua new file mode 100644 index 0000000..8293b62 --- /dev/null +++ b/tools/luajson/json/encode/output.lua @@ -0,0 +1,91 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local type = type +local assert, error = assert, error +local table_concat = require("table").concat +local loadstring = loadstring or load + +local io = require("io") + +local setmetatable = setmetatable + +local output_utility = require("json.encode.output_utility") + +local _ENV = nil + +local tableCompositeCache = setmetatable({}, {__mode = 'v'}) + +local TABLE_VALUE_WRITER = [[ + ret[#ret + 1] = %VALUE% +]] + +local TABLE_INNER_WRITER = "" + +--[[ + nextValues can output a max of two values to throw into the data stream + expected to be called until nil is first return value + value separator should either be attached to v1 or in innerValue +]] +local function defaultTableCompositeWriter(nextValues, beginValue, closeValue, innerValue, composite, encode, state) + if type(nextValues) == 'string' then + local fun = output_utility.prepareEncoder(defaultTableCompositeWriter, nextValues, innerValue, TABLE_VALUE_WRITER, TABLE_INNER_WRITER) + local ret = {} + fun(composite, ret, encode, state) + return beginValue .. table_concat(ret, innerValue) .. closeValue + end +end + +-- no 'simple' as default action is just to return the value +local function getDefault() + return { composite = defaultTableCompositeWriter } +end + +-- BEGIN IO-WRITER OUTPUT +local IO_INNER_WRITER = [[ + if %WRITE_INNER% then + state.__outputFile:write(%INNER_VALUE%) + end +]] +local IO_VALUE_WRITER = [[ + state.__outputFile:write(%VALUE%) +]] + +local function buildIoWriter(output) + if not output then -- Default to stdout + output = io.output() + end + local function ioWriter(nextValues, beginValue, closeValue, innerValue, composite, encode, state) + -- HOOK OUTPUT STATE + state.__outputFile = output + if type(nextValues) == 'string' then + local fun = output_utility.prepareEncoder(ioWriter, nextValues, innerValue, IO_VALUE_WRITER, IO_INNER_WRITER) + local ret = {} + output:write(beginValue) + fun(composite, ret, encode, state) + output:write(closeValue) + return nil + end + end + + local function ioSimpleWriter(encoded) + if encoded then + output:write(encoded) + end + return nil + end + return { composite = ioWriter, simple = ioSimpleWriter } +end +local function getIoWriter(output) + return function() + return buildIoWriter(output) + end +end + +local output = { + getDefault = getDefault, + getIoWriter = getIoWriter +} + +return output diff --git a/tools/luajson/json/encode/output_utility.lua b/tools/luajson/json/encode/output_utility.lua new file mode 100644 index 0000000..b6607d1 --- /dev/null +++ b/tools/luajson/json/encode/output_utility.lua @@ -0,0 +1,54 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local setmetatable = setmetatable +local assert, loadstring = assert, loadstring or load + +local _ENV = nil + +-- Key == weak, if main key goes away, then cache cleared +local outputCache = setmetatable({}, {__mode = 'k'}) +-- TODO: inner tables weak? + +local function buildFunction(nextValues, innerValue, valueWriter, innerWriter) + local putInner = "" + if innerValue and innerWriter then + -- Prepare the lua-string representation of the separator to put in between values + local formattedInnerValue = ("%q"):format(innerValue) + -- Fill in the condition %WRITE_INNER% and the %INNER_VALUE% to actually write + putInner = innerWriter:gsub("%%WRITE_INNER%%", "%%1"):gsub("%%INNER_VALUE%%", formattedInnerValue) + end + -- Template-in the value writer (if present) and its conditional argument + local functionCode = nextValues:gsub("PUTINNER(%b())", putInner) + -- %VALUE% is to be filled in by the value-to-write + valueWriter = valueWriter:gsub("%%VALUE%%", "%%1") + -- Template-in the value writer with its argument + functionCode = functionCode:gsub("PUTVALUE(%b())", valueWriter) + functionCode = [[ + return function(composite, ret, encode, state) + ]] .. functionCode .. [[ + end + ]] + return assert(loadstring(functionCode))() +end + +local function prepareEncoder(cacheKey, nextValues, innerValue, valueWriter, innerWriter) + local cache = outputCache[cacheKey] + if not cache then + cache = {} + outputCache[cacheKey] = cache + end + local fun = cache[nextValues] + if not fun then + fun = buildFunction(nextValues, innerValue, valueWriter, innerWriter) + cache[nextValues] = fun + end + return fun +end + +local output_utility = { + prepareEncoder = prepareEncoder +} + +return output_utility diff --git a/tools/luajson/json/encode/strings.lua b/tools/luajson/json/encode/strings.lua new file mode 100644 index 0000000..09d85a9 --- /dev/null +++ b/tools/luajson/json/encode/strings.lua @@ -0,0 +1,88 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local string_char = require("string").char +local pairs = pairs + +local jsonutil = require("json.util") +local util_merge = jsonutil.merge + +local _ENV = nil + +local normalEncodingMap = { + ['"'] = '\\"', + ['\\'] = '\\\\', + ['/'] = '\\/', + ['\b'] = '\\b', + ['\f'] = '\\f', + ['\n'] = '\\n', + ['\r'] = '\\r', + ['\t'] = '\\t', + ['\v'] = '\\v' -- not in official spec, on report, removing +} + +local xEncodingMap = {} +for char, encoded in pairs(normalEncodingMap) do + xEncodingMap[char] = encoded +end + +-- Pre-encode the control characters to speed up encoding... +-- NOTE: UTF-8 may not work out right w/ JavaScript +-- JavaScript uses 2 bytes after a \u... yet UTF-8 is a +-- byte-stream encoding, not pairs of bytes (it does encode +-- some letters > 1 byte, but base case is 1) +for i = 0, 255 do + local c = string_char(i) + if c:match('[%z\1-\031\128-\255]') and not normalEncodingMap[c] then + -- WARN: UTF8 specializes values >= 0x80 as parts of sequences... + -- without \x encoding, do not allow encoding > 7F + normalEncodingMap[c] = ('\\u%.4X'):format(i) + xEncodingMap[c] = ('\\x%.2X'):format(i) + end +end + +local defaultOptions = { + xEncode = false, -- Encode single-bytes as \xXX + processor = nil, -- Simple processor for the string prior to quoting + -- / is not required to be quoted but it helps with certain decoding + -- Required encoded characters, " \, and 00-1F (0 - 31) + encodeSet = '\\"/%z\1-\031', + encodeSetAppend = nil -- Chars to append to the default set +} + +local modeOptions = {} + +local function mergeOptions(options, mode) + jsonutil.doOptionMerge(options, false, 'strings', defaultOptions, mode and modeOptions[mode]) +end + +local function getEncoder(options) + options = options and util_merge({}, defaultOptions, options) or defaultOptions + local encodeSet = options.encodeSet + if options.encodeSetAppend then + encodeSet = encodeSet .. options.encodeSetAppend + end + local encodingMap = options.xEncode and xEncodingMap or normalEncodingMap + local encodeString + if options.processor then + local processor = options.processor + encodeString = function(s, state) + return '"' .. processor(s:gsub('[' .. encodeSet .. ']', encodingMap)) .. '"' + end + else + encodeString = function(s, state) + return '"' .. s:gsub('[' .. encodeSet .. ']', encodingMap) .. '"' + end + end + return { + string = encodeString + } +end + +local strings = { + mergeOptions = mergeOptions, + getEncoder = getEncoder +} + +return strings diff --git a/tools/luajson/json/util.lua b/tools/luajson/json/util.lua new file mode 100644 index 0000000..a4599db --- /dev/null +++ b/tools/luajson/json/util.lua @@ -0,0 +1,152 @@ +--[[ + Licensed according to the included 'LICENSE' document + Author: Thomas Harning Jr +]] +local type = type +local print = print +local tostring = tostring +local pairs = pairs +local getmetatable, setmetatable = getmetatable, setmetatable +local select = select + +local _ENV = nil + +local function foreach(tab, func) + for k, v in pairs(tab) do + func(k,v) + end +end +local function printValue(tab, name) + local parsed = {} + local function doPrint(key, value, space) + space = space or '' + if type(value) == 'table' then + if parsed[value] then + print(space .. key .. '= <' .. parsed[value] .. '>') + else + parsed[value] = key + print(space .. key .. '= {') + space = space .. ' ' + foreach(value, function(key, value) doPrint(key, value, space) end) + end + else + if type(value) == 'string' then + value = '[[' .. tostring(value) .. ']]' + end + print(space .. key .. '=' .. tostring(value)) + end + end + doPrint(name, tab) +end + +local function clone(t) + local ret = {} + for k,v in pairs(t) do + ret[k] = v + end + return ret +end + +local function inner_merge(t, remaining, from, ...) + if remaining == 0 then + return t + end + if from then + for k,v in pairs(from) do + t[k] = v + end + end + return inner_merge(t, remaining - 1, ...) +end + +--[[* + Shallow-merges tables in order onto the first table. + + @param t table to merge entries onto + @param ... sequence of 0 or more tables to merge onto 't' + + @returns table 't' from input +]] +local function merge(t, ...) + return inner_merge(t, select('#', ...), ...) +end + +-- Function to insert nulls into the JSON stream +local function null() + return null +end + +-- Marker for 'undefined' values +local function undefined() + return undefined +end + +local ArrayMT = {} + +--[[ + Return's true if the metatable marks it as an array.. + Or false if it has no array component at all + Otherwise nil to get the normal detection component working +]] +local function IsArray(value) + if type(value) ~= 'table' then return false end + local meta = getmetatable(value) + local ret = meta == ArrayMT or (meta ~= nil and meta.__is_luajson_array) + if not ret then + if #value == 0 then return false end + else + return ret + end +end +local function InitArray(array) + setmetatable(array, ArrayMT) + return array +end + +local CallMT = {} + +local function isCall(value) + return CallMT == getmetatable(value) +end + +local function buildCall(name, ...) + local callData = { + name = name, + parameters = {n = select('#', ...), ...} + } + return setmetatable(callData, CallMT) +end + +local function decodeCall(callData) + if not isCall(callData) then return nil end + return callData.name, callData.parameters +end + +local function doOptionMerge(options, nested, name, defaultOptions, modeOptions) + if nested then + modeOptions = modeOptions and modeOptions[name] + defaultOptions = defaultOptions and defaultOptions[name] + end + options[name] = merge( + {}, + defaultOptions, + modeOptions, + options[name] + ) +end + +local json_util = { + printValue = printValue, + clone = clone, + merge = merge, + null = null, + undefined = undefined, + IsArray = IsArray, + InitArray = InitArray, + isCall = isCall, + buildCall = buildCall, + decodeCall = decodeCall, + doOptionMerge = doOptionMerge +} + +return json_util diff --git a/tools/luajson/test_json.lua b/tools/luajson/test_json.lua new file mode 100644 index 0000000..09d086b --- /dev/null +++ b/tools/luajson/test_json.lua @@ -0,0 +1,95 @@ +package.path = package.path .. ";lua_scripts/libraries/luajson/?.lua" + +local JSON = require"json" + +local jsontest = [[{ 1:{"scn_ptz_id":"", +"scn_ptz_prepos":"Preset 176", +"scn_ptz_order":1, +"scn_ptz_duration":"30", +"scn_ptz_rally_delay":"2"} +, +2:{"scn_ptz_id":"","scn_ptz_prepos":"route","scn_ptz_order":2,"scn_ptz_duration":"30","scn_ptz_rally_delay":"2"} } +]] +local jsontest2 = [[{ + "extension":"mpg", + "id":1545148451781, + "name":"Foule_1280x720p.mpg", + "size":67240746, + "date":1545148451, + "mime":"video\/mpeg", + "filename":"1545148451781.mpg", + "dir":"\/home\/pixalarm_data\/fileprocessor_data", + "function_metadatas": + { + "function_faceblur": + { + "date":1545228627, + "current_frame":"845", + "polygons":[ + { + "polygon_id":"new_1", + "polygon_vertex":"[ + [0.14254859611231102,0.12476007677543186],[0.13174946004319654,0.4740882917466411], + [0.3898488120950324,0.6621880998080614],[0.4038876889848812,0.11516314779270634] + ]", + "polygon_frame_start":"1", + "polygon_frame_stop":"300", + "polygon_type":"full_blur" + }, + { + "polygon_id":"new_2", + "polygon_vertex":"[ + [0.6198704103671706,0.1727447216890595],[0.5496760259179265,0.6007677543186181], + [0.7775377969762419,0.7946257197696737],[0.9028077753779697,0.761996161228407], + [0.9481641468682506,0.2821497120921305],[0.7829373650107991,0.04798464491362764] + ]", + "polygon_frame_start":"200", + "polygon_frame_stop":"845", + "polygon_type":"no_blur" + } + ], + "framecuts":[ + ["17","110"], + ["248","298"], + ["488","620"], + ["378","428"] + ], + "face_selection":[ + { + "frame":"21", + "x":"0.5", + "y":"0.356" + }, + { + "frame":"108", + "x":"0.4289", + "y":"0.275" + }, + { + "frame":"294", + "x":"0.726", + "y":"0.2364" + } + ], + "blur_type":"blur", + "blur_area":"face" + } + }, + "total_frame":"845", + "status":"DECODE_FINISHED", + "fps":"25.00" +}]] + +local res = JSON.decode(jsontest2) +for k, v in pairs(res) do + print( k, v) +end + +res = JSON.decode( '{"content" : {},"date" : "2014-12-30T08:29:48Z","error" : {"code" : 0,"httpcode" : 200,"message" : ""},"status" : 1}' ) +for k, v in pairs(res) do + print( k, v) +end + +local jsondata = JSON.encode( res ) +print(jsondata) +