Skip to content

Commit ca4dd29

Browse files
committed
Implement flag MD_FLAG_SKIPBOM on parser side.
Also update md2html to enable this on the parser side rather than renderer side. Make MD_HTML_FLAG_SKIP_UTF8_BOM deprecated. md_html() converts the flag into parser_flags.
1 parent a00df7d commit ca4dd29

File tree

5 files changed

+38
-30
lines changed

5 files changed

+38
-30
lines changed

md2html/md2html.c

+2-6
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,8 @@
3434

3535

3636
/* Global options. */
37-
static unsigned parser_flags = 0;
38-
#ifndef MD4C_USE_ASCII
39-
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG | MD_HTML_FLAG_SKIP_UTF8_BOM;
40-
#else
41-
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
42-
#endif
37+
static unsigned parser_flags = MD_FLAG_SKIPBOM;
38+
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
4339
static int want_fullhtml = 0;
4440
static int want_xhtml = 0;
4541
static int want_stat = 0;

src/md4c-html.c

+3-8
Original file line numberDiff line numberDiff line change
@@ -553,14 +553,9 @@ md_html(const MD_CHAR* input, MD_SIZE input_size,
553553
render.escape_map[i] |= NEED_URL_ESC_FLAG;
554554
}
555555

556-
/* Consider skipping UTF-8 byte order mark (BOM). */
557-
if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) {
558-
static const MD_CHAR bom[3] = { (char)0xef, (char)0xbb, (char)0xbf };
559-
if(input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) {
560-
input += sizeof(bom);
561-
input_size -= sizeof(bom);
562-
}
563-
}
556+
/* For compatibility with old apps. */
557+
if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM)
558+
parser.flags |= MD_FLAG_SKIPBOM;
564559

565560
return md_parse(input, input_size, &parser, (void*) &render);
566561
}

src/md4c-html.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
/* If set, debug output from md_parse() is sent to stderr. */
3737
#define MD_HTML_FLAG_DEBUG 0x0001
3838
#define MD_HTML_FLAG_VERBATIM_ENTITIES 0x0002
39-
#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004
39+
#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004 /* Deprecated; use MD_FLAG_SKIPBOM on the parser side in new code. */
4040
#define MD_HTML_FLAG_XHTML 0x0008
4141

4242

src/md4c.c

+15
Original file line numberDiff line numberDiff line change
@@ -6468,6 +6468,21 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
64686468
memcpy(&ctx.parser, parser, parser_size);
64696469
memset((uint8_t*)&ctx.parser + parser_size, 0, sizeof(ctx.parser) - parser_size);
64706470

6471+
#if defined MD4C_USE_UTF8 || defined MD4C_USE_UTF16
6472+
if(parser->flags & MD_FLAG_SKIPBOM) {
6473+
#ifdef MD4C_USE_UTF8
6474+
static const MD_CHAR bom[3] = { (char)0xef, (char)0xbb, (char)0xbf };
6475+
#endif
6476+
#ifdef MD4C_USE_UTF16
6477+
static const MD_CHAR bom[1] = { (WCHAR)0xfeff };
6478+
#endif
6479+
if(size >= SIZEOF_ARRAY(bom) && memcmp(text, bom, sizeof(bom)) == 0) {
6480+
text += SIZEOF_ARRAY(bom);
6481+
size -= SIZEOF_ARRAY(bom);
6482+
}
6483+
}
6484+
#endif
6485+
64716486
ctx.text = text;
64726487
ctx.size = size;
64736488
ctx.userdata = userdata;

src/md4c.h

+17-15
Original file line numberDiff line numberDiff line change
@@ -303,21 +303,23 @@ typedef struct MD_SPAN_WIKILINK {
303303
* By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
304304
* The following flags may allow some extensions or deviations from it.
305305
*/
306-
#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
307-
#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */
308-
#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */
309-
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
310-
#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */
311-
#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */
312-
#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */
313-
#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */
314-
#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */
315-
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
316-
#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */
317-
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
318-
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
319-
#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
320-
#define MD_FLAG_HARD_SOFT_BREAKS 0x8000 /* Force all soft breaks to act as hard breaks. */
306+
#define MD_FLAG_COLLAPSEWHITESPACE 0x00000001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
307+
#define MD_FLAG_PERMISSIVEATXHEADERS 0x00000002 /* Do not require space in ATX headers ( ###header ) */
308+
#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x00000004 /* Recognize URLs as autolinks even without '<', '>' */
309+
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x00000008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
310+
#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x00000010 /* Disable indented code blocks. (Only fenced code works.) */
311+
#define MD_FLAG_NOHTMLBLOCKS 0x00000020 /* Disable raw HTML blocks. */
312+
#define MD_FLAG_NOHTMLSPANS 0x00000040 /* Disable raw HTML (inline). */
313+
#define MD_FLAG_TABLES 0x00000100 /* Enable tables extension. */
314+
#define MD_FLAG_STRIKETHROUGH 0x00000200 /* Enable strikethrough extension. */
315+
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x00000400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
316+
#define MD_FLAG_TASKLISTS 0x00000800 /* Enable task list extension. */
317+
#define MD_FLAG_LATEXMATHSPANS 0x00001000 /* Enable $ and $$ containing LaTeX equations. */
318+
#define MD_FLAG_WIKILINKS 0x00002000 /* Enable wiki links extension. */
319+
#define MD_FLAG_UNDERLINE 0x00004000 /* Enable underline extension (and disables '_' for normal emphasis). */
320+
#define MD_FLAG_HARD_SOFT_BREAKS 0x00008000 /* Force all soft breaks to act as hard breaks. */
321+
#define MD_FLAG_SKIPBOM 0x00010000 /* Skip Unicode BOM, if present. */
322+
321323

322324
#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
323325
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)

0 commit comments

Comments
 (0)