-
Notifications
You must be signed in to change notification settings - Fork 109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Escape path prefix and restrict it to be a pathname in Speculation Rules #951
Changes from 2 commits
34af17b
3c8d640
3a16b14
78fae85
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,12 @@ class PLSR_URL_Pattern_Prefixer { | |
*/ | ||
public function __construct( array $contexts = array() ) { | ||
if ( $contexts ) { | ||
$this->contexts = array_map( 'trailingslashit', $contexts ); | ||
$this->contexts = array_map( | ||
static function ( string $str ): string { | ||
return self::escape_pattern_string( trailingslashit( $str ) ); | ||
}, | ||
$contexts | ||
); | ||
} else { | ||
$this->contexts = self::get_default_contexts(); | ||
} | ||
|
@@ -69,12 +74,23 @@ public function prefix_path_pattern( string $path_pattern, string $context = 'ho | |
return $path_pattern; | ||
} | ||
|
||
// If the path already starts with the context path (including '/'), there is nothing to prefix. | ||
if ( str_starts_with( $path_pattern, $this->contexts[ $context ] ) ) { | ||
return $path_pattern; | ||
// In the event that the context path contains a :, ? or # (which can cause the URL pattern parser to | ||
// switch to another state, though only the latter two should be percent encoded anyway), we need to | ||
// additionally enclose it in grouping braces. The final forward slash (trailingslashit ensures there is | ||
// one) affects the meaning of the * wildcard, so is left outside the braces. | ||
$context_path = $this->contexts[ $context ]; | ||
$escaped_context_path = $context_path; | ||
if ( strcspn( $context_path, ':?#' ) !== strlen( $context_path ) ) { | ||
$escaped_context_path = '{' . substr( $context_path, 0, -1 ) . '}/'; | ||
} | ||
|
||
// If the path already starts with the context path (including '/'), remove it first | ||
// since it is about to be added back. | ||
if ( str_starts_with( $path_pattern, $context_path ) ) { | ||
$path_pattern = substr( $path_pattern, strlen( $context_path ) ); | ||
} | ||
|
||
return $this->contexts[ $context ] . ltrim( $path_pattern, '/' ); | ||
return $escaped_context_path . ltrim( $path_pattern, '/' ); | ||
} | ||
|
||
/** | ||
|
@@ -86,8 +102,32 @@ public function prefix_path_pattern( string $path_pattern, string $context = 'ho | |
*/ | ||
public static function get_default_contexts(): array { | ||
return array( | ||
'home' => trailingslashit( wp_parse_url( home_url( '/' ), PHP_URL_PATH ) ), | ||
'site' => trailingslashit( wp_parse_url( site_url( '/' ), PHP_URL_PATH ) ), | ||
'home' => self::escape_pattern_string( trailingslashit( wp_parse_url( home_url( '/' ), PHP_URL_PATH ) ) ), | ||
'site' => self::escape_pattern_string( trailingslashit( wp_parse_url( site_url( '/' ), PHP_URL_PATH ) ) ), | ||
Comment on lines
-89
to
+106
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This escapes the "path prefixes", which makes sense. But wouldn't we also want to escape any other parts of the path string in the same way? Any string that a 3P developer may provide via the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, such strings may in fact be patterns and not literal paths, so we probably do want the special characters to have their special meanings there. For example, the However, if WordPress is serving from a directory named |
||
); | ||
} | ||
|
||
/** | ||
* Escapes a string for use in a URL pattern component. | ||
* https://urlpattern.spec.whatwg.org/#escape-a-pattern-string | ||
* | ||
* @since n.e.x.t | ||
adamsilverstein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* | ||
* @param string $str String to be escaped. | ||
* @return string String with backslashes added where required. | ||
*/ | ||
private static function escape_pattern_string( string $str ): string { | ||
$replacements = array( | ||
'+' => '\\+', | ||
'*' => '\\*', | ||
'?' => '\\?', | ||
':' => '\\:', | ||
'{' => '\\{', | ||
'}' => '\\}', | ||
'(' => '\\(', | ||
')' => '\\)', | ||
'\\' => '\\\\', | ||
); | ||
return strtr( $str, $replacements ); | ||
jeremyroman marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Clever. I would have done
str_contains( $context_path, ':' ) || str_contains( $context_path, '?' ) || str_contains( $context_path, '#' )
but clearly what you've done is faster.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or rather:
This would be the more common way to do it (and perhaps more readable?) There are 31 uses of
strcspn()
in core, whereas there are 635 ofpreg_replace()
.I don't feel strongly either way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably being a C++ developer by day, but my mild preference is for the most efficient option if the options are similar in readability (though obviously neither is going to have a massive effect on your WordPress performance).
Happy to change if you felt more strongly; it seems not, so leaving this as-is for now.