This repository was archived by the owner on May 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathmodp_html.c
128 lines (124 loc) · 4.01 KB
/
modp_html.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
* <pre>
* modp_html xml decoders
* https://github.com/client9/stringencoders
*
* Copyright © 2013-2016 Nick Galbreath
* All rights reserved.
* Released under MIT license. See LICENSE for details.
* </PRE>
*/
#include "modp_html.h"
#include "modp_html_named_entities.h"
static const int gsHexDecodeMap[256] = {
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256,
256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
256, 256, 256, 256
};
int modp_html_decode_char_at(const char* src, size_t len, size_t* consumed)
{
if (len == 0 || src == NULL) {
*consumed = 0;
return -1;
}
*consumed = 1;
if (*src != '&' || len < 2) {
return (uint8_t)(*src);
}
int val = 0;
size_t i;
int ch;
if (*(src + 1) == '#') {
val = 0;
if (*(src + 2) == 'x' || *(src + 2) == 'X') {
i = 3;
ch = (uint8_t)(*(src + 3));
ch = gsHexDecodeMap[ch];
if (ch == 256) {
/* degenerate case '&#[?]' */
return '&';
}
val = ch;
i = 4;
while (i < len) {
ch = (uint8_t)src[i];
if (ch == ';') {
*consumed = i + 1;
return val;
}
ch = gsHexDecodeMap[ch];
if (ch == 256) {
*consumed = i;
return val;
}
val = (val * 16) + ch;
if (val > 0x1000FF) {
return '&';
}
++i;
}
*consumed = i;
return val;
} else {
i = 2;
ch = (uint8_t)src[i];
if (ch < '0' || ch > '9') {
return '&';
}
val = ch - '0';
i += 1;
while (i < len) {
ch = (uint8_t)src[i];
if (ch == ';') {
*consumed = i + 1;
return val;
}
if (ch < '0' || ch > '9') {
*consumed = i;
return val;
}
val = (val * 10) + (ch - '0');
if (val > 0x1000FF) {
return '&';
}
++i;
}
*consumed = i;
return val;
}
} else {
/* case &[^#] -- check for named entity */
/* named entities are surprisingly complicated
* * case sensitive
* * may or maybe end in ";"
* * longest match
*/
int codepoint = html_named_entity_decode(src + 1, len - 1, consumed);
if (codepoint == 0) {
*consumed = 1;
return '&';
} else {
*consumed += 1;
return codepoint;
}
}
}