From aee2ef7ed4c380875c4d5952d474487588b819d1 Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Fri, 3 Aug 2018 14:27:18 +0200 Subject: [PATCH] Don't unescape URIs twice Resolves: #149. --- app/main.lua | 1 + spec/client_spec.lua | 16 +++++++++------- spec/helpers/utils_spec.lua | 27 +++++++++++++++++---------- src/weserv/client.lua | 3 ++- src/weserv/helpers/utils.lua | 18 ++++++++++++------ 5 files changed, 41 insertions(+), 24 deletions(-) diff --git a/app/main.lua b/app/main.lua index 2bc9882c..9a149f6a 100644 --- a/app/main.lua +++ b/app/main.lua @@ -39,6 +39,7 @@ if config.throttler ~= nil and config.throttler.whitelist[ip_address] == nil the end end +-- Be careful: keys and values are unescaped according to URI escaping rules. local args, args_err = ngx.req.get_uri_args() if args.api == '3' then -- Internal redirection to the old api, if requested diff --git a/spec/client_spec.lua b/spec/client_spec.lua index b47680e7..fc7f7669 100644 --- a/spec/client_spec.lua +++ b/spec/client_spec.lua @@ -257,12 +257,14 @@ describe("client", function() end) it("max redirects error", function() - local redirect = 'https://ory.weserv.nl/image2.jpg?foo=bar' + -- Make sure that we unescape the redirect URI. + -- See: https://github.com/weserv/images/issues/142 + local redirect = 'https://ory.weserv.nl/%252A/image2.jpg?foo=bar' response.status = 302 response.headers['Location'] = redirect - local res, err = client:request("ory.weserv.nl/image.jpg?foo=bar") + local res, err = client:request("ory.weserv.nl/%2A/image.jpg?foo=bar") assert.equal(404, err.status) assert.equal(string.format("Will not follow more than %d redirects", default_config.max_redirects), @@ -270,20 +272,20 @@ describe("client", function() assert.falsy(res) assert.spy(stubbed_http.request).was.called(10) - assert.spy(stubbed_http.request).was.called_with(match._, match.same({ + assert.spy(stubbed_http.request).was.called_with(match._, match.contains({ headers = { ['User-Agent'] = default_config.user_agent }, - path = '/image.jpg', + path = '/%2A/image.jpg', query = 'foo=bar', })) - assert.spy(stubbed_http.request).was.called_with(match._, match.same({ + assert.spy(stubbed_http.request).was.called_with(match._, match.contains({ headers = { ['User-Agent'] = default_config.user_agent, -- Referer needs to be added - ['Referer'] = 'http://ory.weserv.nl/image.jpg?foo=bar' + ['Referer'] = 'http://ory.weserv.nl/%2A/image.jpg?foo=bar' }, - path = '/image2.jpg', + path = '/%2A/image2.jpg', query = 'foo=bar', })) assert.spy(stubbed_http.close).was.called() diff --git a/spec/helpers/utils_spec.lua b/spec/helpers/utils_spec.lua index a6198518..04c941e1 100644 --- a/spec/helpers/utils_spec.lua +++ b/spec/helpers/utils_spec.lua @@ -90,33 +90,40 @@ describe("utils", function() { utils.parse_uri('http:\\ory.weserv.nl') }) assert.are.same({ nil, "Invalid domain label" }, { utils.parse_uri('--example--.org') }) + -- Doesn't escape special / reserved characters + -- in accordance with RFC 3986 + -- See: https://github.com/weserv/images/issues/145 and + -- https://github.com/weserv/images/issues/144 assert.are.same({ 'https', 'ory.weserv.nl', 443, - "/-._~!$'()*%20,;=:#@", + "/-._~!$'()*+,;=:#@%", "bar=foo" }, { - unpack(utils.parse_uri("//ory.weserv.nl/-._~!$'()*+,;=:#@?bar=foo")) + unpack(utils.parse_uri("//ory.weserv.nl/-._~!$'()*+,;=:#@%?bar=foo")) }) assert.are.same({ 'https', 'ory.weserv.nl', 443, - "/-._~!$'()*+,;=:/?#@", - "bar=foo" + "/", + "bar=-._~!$'()*+,;=:/@%" }, { - unpack(utils.parse_uri("//ory.weserv.nl/%2D%2E%5F%7E%21%24%27%28%29%2A%2B%2C%3B%3D%3A%2F%3F%23%40?bar=foo")) + unpack(utils.parse_uri("//ory.weserv.nl/?bar=-._~!$'()*+,;=:/@%")) }) + + -- Doesn't unescape twice + -- See: https://github.com/weserv/images/issues/149 assert.are.same({ 'https', 'ory.weserv.nl', 443, "/", - "bar=-._~!$'()*%20,;=:/@" + "bar=%2D%2E%5F%7E%21%24%27%28%29%2A%2B%2C%3B%3D%3A%2F%40" }, { - unpack(utils.parse_uri("//ory.weserv.nl/?bar=-._~!$'()*+,;=:/@")) + unpack(utils.parse_uri("//ory.weserv.nl/?bar=%2D%2E%5F%7E%21%24%27%28%29%2A%2B%2C%3B%3D%3A%2F%40")) }) assert.are.same({ 'https', 'ory.weserv.nl', 443, - "/", - "bar=-._~!$'()*+,;=:/@" + "/%2D%2E%5F%7E%21%24%27%28%29%2A%2B%2C%3B%3D%3A%2F%3F%23%40", + "bar=foo" }, { - unpack(utils.parse_uri("//ory.weserv.nl/?bar=%2D%2E%5F%7E%21%24%27%28%29%2A%2B%2C%3B%3D%3A%2F%40")) + unpack(utils.parse_uri("//ory.weserv.nl/%2D%2E%5F%7E%21%24%27%28%29%2A%2B%2C%3B%3D%3A%2F%3F%23%40?bar=foo")) }) end) diff --git a/src/weserv/client.lua b/src/weserv/client.lua index 95dbb98e..14f52986 100644 --- a/src/weserv/client.lua +++ b/src/weserv/client.lua @@ -183,7 +183,8 @@ function client:request(uri, addl_headers, redirect_nr) } -- recursive call - return self:request(res.headers['Location'], referer, redirect_nr + 1) + -- Note: Make sure that we unescape the redirect URI. + return self:request(ngx.unescape_uri(res.headers['Location']), referer, redirect_nr + 1) end local valid, invalid_err = self:is_valid_response(res) diff --git a/src/weserv/helpers/utils.lua b/src/weserv/helpers/utils.lua index e37018cf..1720194c 100644 --- a/src/weserv/helpers/utils.lua +++ b/src/weserv/helpers/utils.lua @@ -27,10 +27,14 @@ local VIPS_META_ORIENTATION = 'orientation' -- vips_icc_transform() operations. local VIPS_META_ICC_NAME = 'icc-profile-data' --- Only alphanumerics, the special characters "-._~!$&'()*+,;=", and reserved --- characters used for delimiters purposes may be used unencoded within a URI. +-- Only alphanumerics, the percent character (prevents us from encoding +-- already percent-encoded URIs), the special characters "-._~!$&'()*+,;=", +-- and reserved characters used for delimiters purposes may be used +-- unencoded within a URI. The regex (in accordance with the naming +-- convention of RFC 3986) is as follows: +-- *( unreserved / pct-encoded / gen-delims / sub-delims ) -- See: https://tools.ietf.org/html/rfc3986#section-2.2 -local REGEX_DISALLOWED_CHARS = "[^%w%-%._~!%$&'%(%)%*%+,;=:/%?#@]" +local REGEX_DISALLOWED_CHARS = "[^%w%-%._~%%!%$&'%(%)%*%+,;=:/%?#@]" --- Are pixel values in this image 16-bit integer? -- @param interpretation The VipsInterpretation @@ -127,12 +131,13 @@ function utils.percent_encode(char) end --- Determines a "canonical" equivalent of a URI path. +-- Assumes that the URI path is already unescaped. -- @param path The URI path to canonicalise. -- @return The canonicalised path. function utils.canonicalise_path(path) local segments = {} for segment in path:gmatch("/([^/]*)") do - segments[#segments + 1] = ngx.unescape_uri(segment):gsub(REGEX_DISALLOWED_CHARS, utils.percent_encode) + segments[#segments + 1] = segment:gsub(REGEX_DISALLOWED_CHARS, utils.percent_encode) end local len = #segments if len == 0 then @@ -144,13 +149,14 @@ function utils.canonicalise_path(path) end --- Determines a "canonical" equivalent of a query string. +-- Assumes that the query string is already unescaped. -- @param path The query string to canonicalise. -- @return The canonicalised query string. function utils.canonicalise_query_string(query) local q = {} for key, val in query:gmatch("([^&=]+)=?([^&]*)") do - key = ngx.unescape_uri(key):gsub(REGEX_DISALLOWED_CHARS, utils.percent_encode) - val = ngx.unescape_uri(val):gsub(REGEX_DISALLOWED_CHARS, utils.percent_encode) + key = key:gsub(REGEX_DISALLOWED_CHARS, utils.percent_encode) + val = val:gsub(REGEX_DISALLOWED_CHARS, utils.percent_encode) q[#q + 1] = key .. "=" .. val end return table.concat(q, "&")