-
Notifications
You must be signed in to change notification settings - Fork 8
/
xml.lua
232 lines (207 loc) · 5.66 KB
/
xml.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
-- XML parser in pure Lua
--
-- Each node in the "DOM" is either a string (for text data) or a table.
-- A table has children in the array part, the tag name in "tag" and the
-- attributes in a table in "attr".
--
-- { tag="html", attr={foo="bar", plugh="xyzzy"}, ...children... }
local sbyte, schar = string.byte, string.char
local sfind, ssub, gsub = string.find, string.sub, string.gsub
local tinsert, tremove = table.insert, table.remove
local EXCLAIM, QUOT, APOS, MINUS, SLASH, LT, EQ, GT, QUESTION, LSQUARE, RSQUARE = sbyte("!\"'-/<=>?[]", 1, 11)
-- TODO: expand numeric entities to UTF-8
local function sub_hex_ent(s) return schar(tonumber(s, 16)) end
local function sub_dec_ent(s) return schar(tonumber(s)) end
local function unescape(s)
s = gsub(s, "<", "<")
s = gsub(s, ">", ">")
s = gsub(s, "'", "'")
s = gsub(s, """, '"')
s = gsub(s, "&#x(%x+);", sub_hex_ent)
s = gsub(s, "&#(%d+);", sub_dec_ent)
s = gsub(s, "&", "&")
return s
end
local function escape(s)
s = gsub(s, "&", "&")
s = gsub(s, "<", "<")
s = gsub(s, ">", ">")
s = gsub(s, "'", "'")
s = gsub(s, '"', """)
return s
end
local function isname(c)
-- true if c is one of: - . : _ or 0-9 A-Z a-z
return c == 45 or c == 46 or c == 58 or c == 95 or
(c >= 48 and c <= 57) or
(c >= 65 and c <= 90) or
(c >= 97 and c <= 122)
end
local function iswhite(c)
-- true if c is one of: space, \r, \n or \t
return c == 32 or c == 13 or c == 10 or c == 9
end
local function parse_xml(s, preserve_white)
local mark, quote, att
local p, n = 1, #s
local stack = {{}}
local function emit_open_tag(s)
tinsert(stack, {tag=s, attr={}})
end
local function emit_att(k,v)
stack[#stack].attr[k] = unescape(v)
end
local function emit_close_tag()
local item = tremove(stack)
tinsert(stack[#stack], item)
end
local function emit_text(s)
if #stack > 1 then
if preserve_white or not sfind(s, "^[ \r\n\t]*$") then
tinsert(stack[#stack], unescape(s))
end
end
end
::parse_text::
do
mark = p
while p <= n and sbyte(s,p) ~= LT do p=p+1 end
if p > mark then emit_text(ssub(s, mark, p-1)) end
if sbyte(s,p) == LT then p=p+1 goto parse_element end
return stack[1][1]
end
::parse_element::
do
if sbyte(s,p) == SLASH then p=p+1 goto parse_closing_element end
if sbyte(s,p) == EXCLAIM then p=p+1 goto parse_comment end
if sbyte(s,p) == QUESTION then p=p+1 goto parse_processing_instruction end
while iswhite(sbyte(s,p)) do p=p+1 end
if isname(sbyte(s,p)) then goto parse_element_name end
return nil, "syntax error in element"
end
::parse_comment::
do
if sbyte(s,p) == LSQUARE then goto parse_cdata end
if sbyte(s,p) ~= MINUS then return nil, "syntax error in comment" end p=p+1
if sbyte(s,p) ~= MINUS then return nil, "syntax error in comment" end p=p+1
mark = p
while p <= n do
if sbyte(s,p) == MINUS and sbyte(s,p+1) == MINUS and sbyte(s,p+2) == GT then
p=p+3
goto parse_text
end
p=p+1
end
return nil, "end of data in comment"
end
::parse_cdata::
do
if ssub(s, p+1, p+6) ~= "CDATA[" then
return nil, "syntax error in CDATA section"
end
p=p+7
mark = p
while p <= n do
if sbyte(s,p) == RSQUARE and sbyte(s,p+1) == RSQUARE and sbyte(s,p+2) == GT then
if p > mark then emit_text(ssub(s, mark, p-1)) end
p=p+3
goto parse_text
end
p=p+1
end
return nil, "end of data in CDATA section";
end
::parse_processing_instruction::
do
while p <= n do
if sbyte(s,p) == QUESTION and sbyte(s,p+1) == GT then
p=p+2
goto parse_text
end
p=p+1
end
return nil, "end of data in processing instruction"
end
::parse_closing_element::
do
while iswhite(sbyte(s,p)) do p=p+1 end
mark = p
while isname(sbyte(s,p)) do p=p+1 end
while iswhite(sbyte(s,p)) do p=p+1 end
if sbyte(s,p) ~= GT then return nil, "syntax error in closing element" end
emit_close_tag()
p=p+1
goto parse_text
end
::parse_element_name::
do
mark = p
while isname(sbyte(s,p)) do p=p+1 end
emit_open_tag(ssub(s, mark, p-1))
if sbyte(s,p) == GT then p=p+1 goto parse_text end
if sbyte(s,p) == SLASH and sbyte(s,p+1) == GT then
emit_close_tag()
p=p+2
goto parse_text
end
if iswhite(sbyte(s,p)) then goto parse_attributes end
return nil, "syntax error after element name"
end
::parse_attributes::
do
while iswhite(sbyte(s,p)) do p=p+1 end
if isname(sbyte(s,p)) then goto parse_attribute_name end
if sbyte(s,p) == GT then p=p+1 goto parse_text end
if sbyte(s,p) == SLASH and sbyte(s,p+1) == GT then
emit_close_tag()
p=p+2
goto parse_text
end
return nil, "syntax error in attributes"
end
::parse_attribute_name::
do
mark = p
while isname(sbyte(s,p)) do p=p+1 end
att = ssub(s, mark, p-1)
while iswhite(sbyte(s,p)) do p=p+1 end
if sbyte(s,p) == EQ then p=p+1 goto parse_attribute_value end
return nil, "syntax error after attribute name"
end
::parse_attribute_value::
do
while iswhite(sbyte(s,p)) do p=p+1 end
quote = sbyte(s,p)
p=p+1
if quote ~= QUOT and quote ~= APOS then return nil, "missing quote character" end
mark = p
while p <= n and sbyte(s,p) ~= quote do p=p+1 end
if sbyte(s,p) == quote then
emit_att(att, ssub(s, mark, p-1))
p=p+1
goto parse_attributes
end
return nil, "end of data in attribute value"
end
return nil, "the impossible happened"
end
local function write_xml(item)
if type(item) == 'table' then
io.write("<", item.tag)
for k, v in pairs(item.attr) do
io.write(" ", k, '="', escape(v), '"')
end
if #item > 0 then
io.write(">")
for i, v in ipairs(item) do
write_xml(v)
end
io.write("</", item.tag, ">")
else
io.write("/>")
end
else
io.write(escape(item))
end
end
return { parse=parse_xml, print=write_xml }