Skip to content

Commit 787d4b7

Browse files
committed
Introduce an overall limit to link. ref. defs instantiations.
This is to prevent time and output size explosion in case of input pattern generated by this: $ python -c 'N=1000; print("[x]: " + "x" * N + "\n[x]" * N)' We roughly allow to blowing up the input size of the document 16 times by link reference definitions or up to 1 MB, whatever is smaller. When the threashold is reached, following reference definitions are sent to output unresolved as a text. Fixes #238.
1 parent ad8d411 commit 787d4b7

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

src/md4c.c

+21-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "md4c.h"
2727

2828
#include <limits.h>
29+
#include <stdint.h>
2930
#include <stdio.h>
3031
#include <stdlib.h>
3132
#include <string.h>
@@ -143,6 +144,9 @@
143144
#define SZ MD_SIZE
144145
#define OFF MD_OFFSET
145146

147+
#define SZ_MAX (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX)
148+
#define OFF_MAX (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX)
149+
146150
typedef struct MD_MARK_tag MD_MARK;
147151
typedef struct MD_BLOCK_tag MD_BLOCK;
148152
typedef struct MD_CONTAINER_tag MD_CONTAINER;
@@ -180,6 +184,7 @@ struct MD_CTX_tag {
180184
int alloc_ref_defs;
181185
void** ref_def_hashtable;
182186
int ref_def_hashtable_size;
187+
SZ max_ref_def_output;
183188

184189
/* Stack of inline/span markers.
185190
* This is only used for parsing a single block contents but by storing it
@@ -2283,11 +2288,14 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
22832288
int is_multiline;
22842289
CHAR* label;
22852290
SZ label_size;
2286-
int ret;
2291+
int ret = FALSE;
22872292

22882293
MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
22892294
MD_ASSERT(CH(end-1) == _T(']'));
22902295

2296+
if(ctx->max_ref_def_output == 0)
2297+
return FALSE;
2298+
22912299
beg += (CH(beg) == _T('!') ? 2 : 1);
22922300
end--;
22932301

@@ -2315,7 +2323,17 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
23152323
if(is_multiline)
23162324
free(label);
23172325

2318-
ret = (def != NULL);
2326+
if(def != NULL) {
2327+
/* See https://github.com/mity/md4c/issues/238 */
2328+
MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg;
2329+
if(output_size_estimation < ctx->max_ref_def_output) {
2330+
ctx->max_ref_def_output -= output_size_estimation;
2331+
ret = TRUE;
2332+
} else {
2333+
MD_LOG("Too many link reference definition instantiations.");
2334+
ctx->max_ref_def_output = 0;
2335+
}
2336+
}
23192337

23202338
abort:
23212339
return ret;
@@ -6470,6 +6488,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
64706488
ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
64716489
md_build_mark_char_map(&ctx);
64726490
ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
6491+
ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX);
64736492

64746493
/* Reset all mark stacks and lists. */
64756494
for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)

test/pathological-tests.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,10 @@
102102
"--ftables"),
103103
"many broken links":
104104
(("]([\n" * 50000),
105-
re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>"))
105+
re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>")),
106+
"many link ref. def. instantiations":
107+
(("[x]: " + "x" * 50000 + "\n[x]" * 50000),
108+
re.compile(""))
106109
}
107110

108111
whitespace_re = re.compile('/s+/')

0 commit comments

Comments
 (0)