Skip to content

Commit

Permalink
feat: uri decode function (#3161)
Browse files Browse the repository at this point in the history
  • Loading branch information
fmgornick authored Aug 21, 2024
1 parent 0b5ae30 commit 0e0cdd5
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 5 deletions.
5 changes: 2 additions & 3 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ endif

### Tests (make check)

TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test
TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test tests/uritest
if !WIN32
TESTS += tests/optionaltest
endif
Expand Down Expand Up @@ -218,7 +218,6 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
jq.1.prebuilt jq.spec src/lexer.c src/lexer.h src/parser.c \
src/parser.h src/version.h src/builtin.jq scripts/version \
libjq.pc \
tests/base64.test tests/jq-f-test.sh tests/jq.test \
tests/modules/a.jq tests/modules/b/b.jq tests/modules/c/c.jq \
tests/modules/c/d.jq tests/modules/data.json \
tests/modules/home1/.jq tests/modules/home2/.jq/g.jq \
Expand All @@ -232,7 +231,7 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
tests/onig.supp tests/local.supp \
tests/setup tests/torture/input0.json \
tests/optional.test tests/man.test tests/manonig.test \
tests/jq.test tests/onig.test tests/base64.test \
tests/jq.test tests/onig.test tests/base64.test tests/uri.test \
tests/utf8-truncate.jq tests/jq-f-test.sh \
tests/no-main-program.jq tests/yes-main-program.jq

Expand Down
5 changes: 5 additions & 0 deletions docs/content/manual/dev/manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2141,6 +2141,11 @@ sections:
Applies percent-encoding, by mapping all reserved URI
characters to a `%XX` sequence.
* `@urid`:
The inverse of `@uri`, applies percent-decoding, by mapping
all `%XX` sequences to their corresponding URI characters.
* `@csv`:
The input must be an array, and it is rendered as CSV
Expand Down
8 changes: 7 additions & 1 deletion jq.1.prebuilt

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,48 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
}
jv_free(input);
return line;
} else if (!strcmp(fmt_s, "urid")) {
jv_free(fmt);
input = f_tostring(jq, input);

jv line = jv_string("");
const char *errmsg = "is not a valid uri encoding";
const char *s = jv_string_value(input);
while (*s) {
if (*s != '%') {
line = jv_string_append_buf(line, s++, 1);
} else {
unsigned char unicode[4] = {0};
int b = 0;
// check leading bits of first octet to determine length of unicode character
// (https://datatracker.ietf.org/doc/html/rfc3629#section-3)
while (b == 0 || (b < 4 && unicode[0] >> 7 & 1 && unicode[0] >> (7-b) & 1)) {
if (*(s++) != '%') {
jv_free(line);
return type_error(input, errmsg);
}
for (int i=0; i<2; i++) {
unicode[b] <<= 4;
char c = *(s++);
if ('0' <= c && c <= '9') unicode[b] |= c - '0';
else if ('a' <= c && c <= 'f') unicode[b] |= c - 'a' + 10;
else if ('A' <= c && c <= 'F') unicode[b] |= c - 'A' + 10;
else {
jv_free(line);
return type_error(input, errmsg);
}
}
b++;
}
if (!jvp_utf8_is_valid((const char *)unicode, (const char *)unicode+b)) {
jv_free(line);
return type_error(input, errmsg);
}
line = jv_string_append_buf(line, (const char *)unicode, b);
}
}
jv_free(input);
return line;
} else if (!strcmp(fmt_s, "sh")) {
jv_free(fmt);
if (jv_get_kind(input) != JV_KIND_ARRAY)
Expand Down
7 changes: 6 additions & 1 deletion tests/jq.test
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@ null
null
"interpolation"

@text,@json,([1,.]|@csv,@tsv),@html,@uri,@sh,(@base64|.,@base64d)
@text,@json,([1,.]|@csv,@tsv),@html,(@uri|.,@urid),@sh,(@base64|.,@base64d)
"!()<>&'\"\t"
"!()<>&'\"\t"
"\"!()<>&'\\\"\\t\""
"1,\"!()<>&'\"\"\t\""
"1\t!()<>&'\"\\t"
"!()&lt;&gt;&amp;&apos;&quot;\t"
"%21%28%29%3C%3E%26%27%22%09"
"!()<>&'\"\t"
"'!()<>&'\\''\"\t'"
"ISgpPD4mJyIJ"
"!()<>&'\"\t"
Expand All @@ -86,6 +87,10 @@ null
"\u03bc"
"%CE%BC"

@urid
"%CE%BC"
"\u03bc"

@html "<b>\(.)</b>"
"<script>hax</script>"
"<b>&lt;script&gt;hax&lt;/script&gt;</b>"
Expand Down
38 changes: 38 additions & 0 deletions tests/uri.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Tests are groups of three lines: program, input, expected output
# Blank lines and lines starting with # are ignored

@uri
"<>&'\"\t"
"%3C%3E%26%27%22%09"

# decoding encoded output results in same text
(@uri|@urid)
"<>&'\"\t"
"<>&'\"\t"

# testing variable length unicode characters
@uri
"a \u03bc \u2230 \ud83d\ude0e"
"a%20%CE%BC%20%E2%88%B0%20%F0%9F%98%8E"

@urid
"a%20%CE%BC%20%E2%88%B0%20%F0%9F%98%8E"
"a \u03bc \u2230 \ud83d\ude0e"

### invalid uri strings

# unicode character should be length 4 (not 3)
. | try @urid catch .
"%F0%93%81"
"string (\"%F0%93%81\") is not a valid uri encoding"

# invalid hex value ('FX')
. | try @urid catch .
"%FX%9F%98%8E"
"string (\"%FX%9F%98%8E\") is not a valid uri encoding"

# trailing utf-8 octets must be formatted like 10xxxxxx
# 'C0' = 11000000 invalid
. | try @urid catch .
"%F0%C0%81%8E"
"string (\"%F0%C0%81%8E\") is not a valid uri encoding"
5 changes: 5 additions & 0 deletions tests/uritest
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh

. "${0%/*}/setup" "$@"

$VALGRIND $Q $JQ -L "$mods" --run-tests $JQTESTDIR/uri.test

0 comments on commit 0e0cdd5

Please sign in to comment.