From 15b4215285fc10f4851493abfffaa39a8c656057 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 18 Sep 2024 14:45:39 -0700 Subject: [PATCH 1/3] Add todos --- plugins/optimization-detective/class-od-html-tag-processor.php | 2 ++ plugins/optimization-detective/optimization.php | 1 + 2 files changed, 3 insertions(+) diff --git a/plugins/optimization-detective/class-od-html-tag-processor.php b/plugins/optimization-detective/class-od-html-tag-processor.php index 2975e47a4..e830c760e 100644 --- a/plugins/optimization-detective/class-od-html-tag-processor.php +++ b/plugins/optimization-detective/class-od-html-tag-processor.php @@ -342,6 +342,8 @@ public function next_token(): bool { /** * Gets the number of times the cursor has moved. * + * @todo Not needed once core short-circuits seek() when current cursor is the same as the sought-bookmark. + * * @since n.e.x.t * @see self::next_token() * @see self::seek() diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 47c6cc452..9f43b9127 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -219,6 +219,7 @@ function od_optimize_template_output_buffer( string $buffer ): string { $processor->set_bookmark( $current_tag_bookmark ); // TODO: Should we break if this returns false? foreach ( $visitors as $visitor ) { + // TODO: Remove get_cursor_move_count() logic once core automatically short-circuits seek(). $cursor_move_count = $processor->get_cursor_move_count(); $tracked_in_url_metrics = $visitor( $tag_visitor_context ) || $tracked_in_url_metrics; From 5bded1569437da0e1d676de82e5a868d4cc2f8c6 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 18 Sep 2024 15:27:16 -0700 Subject: [PATCH 2/3] WIP --- .../class-od-tag-visitor-context.php | 12 ++++++------ plugins/optimization-detective/optimization.php | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/optimization-detective/class-od-tag-visitor-context.php b/plugins/optimization-detective/class-od-tag-visitor-context.php index b31f02b3a..55847e37f 100644 --- a/plugins/optimization-detective/class-od-tag-visitor-context.php +++ b/plugins/optimization-detective/class-od-tag-visitor-context.php @@ -20,9 +20,9 @@ final class OD_Tag_Visitor_Context { /** - * HTML tag processor. + * HTML (tag) processor. * - * @var OD_HTML_Tag_Processor + * @var OD_HTML_Tag_Processor|OD_HTML_Processor * @readonly */ public $processor; @@ -46,11 +46,11 @@ final class OD_Tag_Visitor_Context { /** * Constructor. * - * @param OD_HTML_Tag_Processor $processor HTML tag processor. - * @param OD_URL_Metrics_Group_Collection $url_metrics_group_collection URL metrics group collection. - * @param OD_Link_Collection $link_collection Link collection. + * @param OD_HTML_Tag_Processor|OD_HTML_Processor $processor HTML tag processor. + * @param OD_URL_Metrics_Group_Collection $url_metrics_group_collection URL metrics group collection. + * @param OD_Link_Collection $link_collection Link collection. */ - public function __construct( OD_HTML_Tag_Processor $processor, OD_URL_Metrics_Group_Collection $url_metrics_group_collection, OD_Link_Collection $link_collection ) { + public function __construct( $processor, OD_URL_Metrics_Group_Collection $url_metrics_group_collection, OD_Link_Collection $link_collection ) { $this->processor = $processor; $this->url_metrics_group_collection = $url_metrics_group_collection; $this->link_collection = $link_collection; diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 9f43b9127..82f6420ef 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -178,6 +178,7 @@ function od_optimize_template_output_buffer( string $buffer ): string { // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. $processor = new OD_HTML_Tag_Processor( $buffer ); + //$processor = new OD_HTML_Processor::create_full_parser( $buffer ); if ( ! ( $processor->next_tag() && ! $processor->is_tag_closer() && From 384b44c655014709bb4467aa0985270a5223bd09 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 18 Sep 2024 18:49:12 -0700 Subject: [PATCH 3/3] Try using WP_HTML_Processor in Optimization Detective --- .../class-od-html-processor.php | 418 ++++++++++++++++++ .../optimization-detective/optimization.php | 10 +- 2 files changed, 426 insertions(+), 2 deletions(-) create mode 100644 plugins/optimization-detective/class-od-html-processor.php diff --git a/plugins/optimization-detective/class-od-html-processor.php b/plugins/optimization-detective/class-od-html-processor.php new file mode 100644 index 000000000..85bf370ba --- /dev/null +++ b/plugins/optimization-detective/class-od-html-processor.php @@ -0,0 +1,418 @@ +open_stack_tags` and + * `$this->open_stack_indices` whenever calling `self::set_bookmark()`. + * Then whenever `self::seek()` is called, the bookmarked open stacks are + * populated back into `$this->open_stack_tags` and `$this->open_stack_indices`. + * + * @since 0.4.0 + * @var array + */ + private $bookmarked_open_stacks = array(); + + /** + * XPath for the current tag. + * + * This is used so that repeated calls to {@see self::get_xpath()} won't needlessly reconstruct the string. This + * gets cleared whenever {@see self::open_tags()} iterates to the next tag. + * + * @since n.e.x.t + * @var string|null + */ + private $current_xpath = null; + + /** + * Mapping of bookmark name to a list of HTML strings which will be inserted at the time get_updated_html() is called. + * + * @since n.e.x.t + * @var array + */ + private $buffered_text_replacements = array(); + + /** + * Count for the number of times that the cursor was moved. + * + * @since n.e.x.t + * @var int + * @see self::next_token() + * @see self::seek() + */ + private $cursor_move_count = 0; + + /** + * Creates an HTML processor in the full parsing mode. + * + * It's likely that a fragment parser is more appropriate, unless sending an + * entire HTML document from start to finish. Consider a fragment parser with + * a context node of ``. + * + * Since UTF-8 is the only currently-accepted charset, if working with a + * document that isn't UTF-8, it's important to convert the document before + * creating the processor: pass in the converted HTML. + * + * @param string $html Input HTML document to process. + * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used + * in the input byte stream. Currently must be UTF-8. + * @return static|null The created processor if successful, otherwise null. + */ + public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) { + return parent::create_full_parser( $html, $known_definite_encoding ); + } + + /** + * Finds the next open tag. + * + * @since n.e.x.t + * + * @return bool Whether a tag was matched. + */ + public function next_open_tag(): bool { + while ( $this->next_tag() ) { + if ( ! $this->is_tag_closer() ) { + return true; + } + } + return false; + } + + /** + * Finds the next token in the HTML document. + * + * @inheritDoc + * @since n.e.x.t + * + * @return bool Whether a token was parsed. + */ + public function next_token(): bool { + $this->current_xpath = null; // Clear cache. + ++$this->cursor_move_count; + if ( ! parent::next_token() ) { + return false; + } + + if ( $this->get_token_type() === '#tag' && $this->is_tag_closer() ) { + $tag_name = $this->get_tag(); + + // Set bookmarks for insertion of preload links and the detection script module. + if ( 'HEAD' === $tag_name ) { + $this->set_bookmark( self::END_OF_HEAD_BOOKMARK ); + } elseif ( 'BODY' === $tag_name ) { + $this->set_bookmark( self::END_OF_BODY_BOOKMARK ); + } + } + return true; + } + + /** + * Gets the number of times the cursor has moved. + * + * @todo Not needed once core short-circuits seek() when current cursor is the same as the sought-bookmark. + * + * @since n.e.x.t + * @see self::next_token() + * @see self::seek() + * + * @return int Count of times the cursor has moved. + */ + public function get_cursor_move_count(): int { + return $this->cursor_move_count; + } + + /** + * Updates or creates a new attribute on the currently matched tag with the passed value. + * + * @inheritDoc + * @since n.e.x.t + * + * @param string $name The attribute name to target. + * @param string|bool $value The new attribute value. + * @return bool Whether an attribute value was set. + */ + public function set_attribute( $name, $value ): bool { // phpcs:ignore SlevomatCodingStandard.TypeHints.ParameterTypeHint.MissingNativeTypeHint + $existing_value = $this->get_attribute( $name ); + $result = parent::set_attribute( $name, $value ); + if ( $result ) { + if ( is_string( $existing_value ) ) { + $this->set_meta_attribute( "replaced-{$name}", $existing_value ); + } else { + $this->set_meta_attribute( "added-{$name}", true ); + } + } + return $result; + } + + /** + * Sets a meta attribute. + * + * All meta attributes are prefixed with data-od-. + * + * @since n.e.x.t + * + * @param string $name Meta attribute name. + * @param string|true $value Value. + * @return bool Whether an attribute was set. + */ + public function set_meta_attribute( string $name, $value ): bool { + return parent::set_attribute( "data-od-{$name}", $value ); + } + + /** + * Removes an attribute from the currently-matched tag. + * + * @inheritDoc + * @since n.e.x.t + * + * @param string $name The attribute name to remove. + */ + public function remove_attribute( $name ): bool { // phpcs:ignore SlevomatCodingStandard.TypeHints.ParameterTypeHint.MissingNativeTypeHint + $old_value = $this->get_attribute( $name ); + $result = parent::remove_attribute( $name ); + if ( $result ) { + $this->set_meta_attribute( "removed-{$name}", is_string( $old_value ) ? $old_value : true ); + } + return $result; + } + + /** + * Move the internal cursor in the Tag Processor to a given bookmark's location. + * + * @inheritDoc + * @since 0.4.0 + * + * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. + * @return bool Whether the internal cursor was successfully moved to the bookmark's location. + */ + public function seek( $bookmark_name ): bool { + $result = parent::seek( $bookmark_name ); + if ( $result ) { + $this->open_stack_tags = $this->bookmarked_open_stacks[ $bookmark_name ]['tags']; + $this->open_stack_indices = $this->bookmarked_open_stacks[ $bookmark_name ]['indices']; + } + return $result; + } + + /** + * Sets a bookmark in the HTML document. + * + * @inheritDoc + * @since 0.4.0 + * + * @param string $bookmark_name Identifies this particular bookmark. + * @return bool Whether the bookmark was successfully created. + */ + public function set_bookmark( $bookmark_name ): bool { + $result = parent::set_bookmark( $bookmark_name ); + if ( $result ) { + $this->bookmarked_open_stacks[ $bookmark_name ] = array( + 'tags' => $this->open_stack_tags, + 'indices' => $this->open_stack_indices, + ); + } + return $result; + } + + /** + * Removes a bookmark that is no longer needed. + * + * @inheritDoc + * @since n.e.x.t + * + * @param string $bookmark_name Name of the bookmark to remove. + * @return bool Whether the bookmark already existed before removal. + */ + public function release_bookmark( $bookmark_name ): bool { + if ( in_array( $bookmark_name, array( self::END_OF_HEAD_BOOKMARK, self::END_OF_BODY_BOOKMARK ), true ) ) { + $this->warn( + __METHOD__, + /* translators: %s is the bookmark name */ + sprintf( 'The %s bookmark is not allowed to be released.', 'optimization-detective' ) + ); + return false; + } + unset( $this->bookmarked_open_stacks[ $bookmark_name ] ); + return parent::release_bookmark( $bookmark_name ); + } + + /** + * Gets indexed breadcrumbs for the current open tag. + * + * A breadcrumb consists of a tag name and its sibling index. + * + * @since n.e.x.t + * + * @return Generator Breadcrumb. + */ + private function get_indexed_breadcrumbs(): Generator { + foreach ( $this->open_stack_tags as $i => $breadcrumb_tag_name ) { + yield array( $breadcrumb_tag_name, $this->open_stack_indices[ $i ] ); + } + } + + /** + * Gets XPath for the current open tag. + * + * It would be nicer if this were like `/html[1]/body[2]` but in XPath the position() here refers to the + * index of the preceding node set. So it has to rather be written `/*[1][self::html]/*[2][self::body]`. + * + * @since n.e.x.t + * + * @return string XPath. + */ + public function get_xpath(): string { + if ( null === $this->current_xpath ) { + $this->current_xpath = ''; + foreach ( $this->get_indexed_breadcrumbs() as list( $tag_name, $index ) ) { + $this->current_xpath .= sprintf( '/*[%d][self::%s]', $index + 1, $tag_name ); + } + } + return $this->current_xpath; + } + + /** + * Append HTML to the HEAD. + * + * The provided HTML must be valid! No validation is performed. + * + * @since n.e.x.t + * + * @param string $html HTML to inject. + */ + public function append_head_html( string $html ): void { + $this->buffered_text_replacements[ self::END_OF_HEAD_BOOKMARK ][] = $html; + } + + /** + * Append HTML to the BODY. + * + * The provided HTML must be valid! No validation is performed. + * + * @since n.e.x.t + * + * @param string $html HTML to inject. + */ + public function append_body_html( string $html ): void { + $this->buffered_text_replacements[ self::END_OF_BODY_BOOKMARK ][] = $html; + } + + /** + * Gets the final updated HTML. + * + * This should only be called after the closing HTML tag has been reached and just before + * calling {@see WP_HTML_Processor::get_updated_html()} to send the document back in the response. + * + * @since n.e.x.t + * + * @return string Final updated HTML. + */ + public function get_final_updated_html(): string { + foreach ( array_keys( $this->buffered_text_replacements ) as $bookmark ) { + $html_strings = $this->buffered_text_replacements[ $bookmark ]; + if ( count( $html_strings ) === 0 ) { + continue; + } + if ( ! $this->has_bookmark( $bookmark ) ) { + $this->warn( + __METHOD__, + sprintf( + /* translators: %s is the bookmark name */ + __( 'Unable to append markup to %s since the bookmark no longer exists.', 'optimization-detective' ), + $bookmark + ) + ); + } else { + $start = $this->bookmarks[ $bookmark ]->start; + + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $start, + 0, + implode( '', $html_strings ) + ); + + unset( $this->buffered_text_replacements[ $bookmark ] ); + } + } + + return parent::get_updated_html(); + } + + /** + * Warns of bad markup. + * + * @since n.e.x.t + * + * @param string $function_name Function name. + * @param string $message Warning message. + */ + private function warn( string $function_name, string $message ): void { + wp_trigger_error( + $function_name, + esc_html( $message ) + ); + } +} diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 82f6420ef..3efc224fa 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -177,8 +177,14 @@ function od_optimize_template_output_buffer( string $buffer ): string { } // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. - $processor = new OD_HTML_Tag_Processor( $buffer ); - //$processor = new OD_HTML_Processor::create_full_parser( $buffer ); + if ( version_compare( get_bloginfo( 'version' ), strtok( '6.7', '-' ), '>=' ) ) { + $processor = OD_HTML_Processor::create_full_parser( $buffer ); + if ( null === $processor ) { + return $buffer; + } + } else { + $processor = new OD_HTML_Tag_Processor( $buffer ); + } if ( ! ( $processor->next_tag() && ! $processor->is_tag_closer() &&