Skip to content

Commit 7b3d107

Browse files
committed
HTML API: Ensure that full processor can seek to earlier bookmarks.
When the HTML Processor seeks to an earlier place, it returns the the beginning of the document and proceeds forward until it reaches the appropriate location. This requires resetting internal state so that the processor can correctly proceed from the beginning of the document. The seeking reset logic was not adapted to account for the full processor (i.e. when created via `WP_HTML_Processor::create_full_parser()`). This change updates the seek logic to account for the full and fragment parsers as well as other state that has been introduced in the interim and should be reset. Props jonsurrell, dmsnell, westonruter, mi5t4n. Fixes #62290. git-svn-id: https://develop.svn.wordpress.org/trunk@59391 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 299bb08 commit 7b3d107

File tree

4 files changed

+229
-38
lines changed

4 files changed

+229
-38
lines changed

src/wp-includes/html-api/class-wp-html-open-elements.php

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -520,11 +520,6 @@ public function pop(): bool {
520520
return false;
521521
}
522522

523-
if ( 'context-node' === $item->bookmark_name ) {
524-
$this->stack[] = $item;
525-
return false;
526-
}
527-
528523
$this->after_element_pop( $item );
529524
return true;
530525
}
@@ -585,10 +580,6 @@ public function push( WP_HTML_Token $stack_item ): void {
585580
* @return bool Whether the node was found and removed from the stack of open elements.
586581
*/
587582
public function remove_node( WP_HTML_Token $token ): bool {
588-
if ( 'context-node' === $token->bookmark_name ) {
589-
return false;
590-
}
591-
592583
foreach ( $this->walk_up() as $position_from_end => $item ) {
593584
if ( $token->bookmark_name !== $item->bookmark_name ) {
594585
continue;

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 68 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5328,52 +5328,92 @@ public function seek( $bookmark_name ): bool {
53285328
* and computation time.
53295329
*/
53305330
if ( 'backward' === $direction ) {
5331+
53315332
/*
5332-
* Instead of clearing the parser state and starting fresh, calling the stack methods
5333-
* maintains the proper flags in the parser.
5333+
* When moving backward, stateful stacks should be cleared.
53345334
*/
53355335
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
5336-
if ( 'context-node' === $item->bookmark_name ) {
5337-
break;
5338-
}
5339-
53405336
$this->state->stack_of_open_elements->remove_node( $item );
53415337
}
53425338

53435339
foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
5344-
if ( 'context-node' === $item->bookmark_name ) {
5345-
break;
5346-
}
5347-
53485340
$this->state->active_formatting_elements->remove_node( $item );
53495341
}
53505342

5351-
parent::seek( 'context-node' );
5352-
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
5353-
$this->state->frameset_ok = true;
5354-
$this->element_queue = array();
5355-
$this->current_element = null;
5343+
/*
5344+
* **After** clearing stacks, more processor state can be reset.
5345+
* This must be done after clearing the stack because those stacks generate events that
5346+
* would appear on a subsequent call to `next_token()`.
5347+
*/
5348+
$this->state->frameset_ok = true;
5349+
$this->state->stack_of_template_insertion_modes = array();
5350+
$this->state->head_element = null;
5351+
$this->state->form_element = null;
5352+
$this->state->current_token = null;
5353+
$this->current_element = null;
5354+
$this->element_queue = array();
5355+
5356+
/*
5357+
* The absence of a context node indicates a full parse.
5358+
* The presence of a context node indicates a fragment parser.
5359+
*/
5360+
if ( null === $this->context_node ) {
5361+
$this->change_parsing_namespace( 'html' );
5362+
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL;
5363+
$this->breadcrumbs = array();
53565364

5357-
if ( isset( $this->context_node ) ) {
5358-
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
5365+
$this->bookmarks['initial'] = new WP_HTML_Span( 0, 0 );
5366+
parent::seek( 'initial' );
5367+
unset( $this->bookmarks['initial'] );
53595368
} else {
5360-
$this->breadcrumbs = array();
5361-
}
5362-
}
53635369

5364-
// When moving forwards, reparse the document until reaching the same location as the original bookmark.
5365-
if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
5366-
return true;
5370+
/*
5371+
* Push the root-node (HTML) back onto the stack of open elements.
5372+
*
5373+
* Fragment parsers require this extra bit of setup.
5374+
* It's handled in full parsers by advancing the processor state.
5375+
*/
5376+
$this->state->stack_of_open_elements->push(
5377+
new WP_HTML_Token(
5378+
'root-node',
5379+
'HTML',
5380+
false
5381+
)
5382+
);
5383+
5384+
$this->change_parsing_namespace(
5385+
$this->context_node->integration_node_type
5386+
? 'html'
5387+
: $this->context_node->namespace
5388+
);
5389+
5390+
if ( 'TEMPLATE' === $this->context_node->node_name ) {
5391+
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
5392+
}
5393+
5394+
$this->reset_insertion_mode_appropriately();
5395+
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
5396+
parent::seek( $this->context_node->bookmark_name );
5397+
}
53675398
}
53685399

5369-
while ( $this->next_token() ) {
5400+
/*
5401+
* Here, the processor moves forward through the document until it matches the bookmark.
5402+
* do-while is used here because the processor is expected to already be stopped on
5403+
* a token than may match the bookmarked location.
5404+
*/
5405+
do {
5406+
/*
5407+
* The processor will stop on virtual tokens, but bookmarks may not be set on them.
5408+
* They should not be matched when seeking a bookmark, skip them.
5409+
*/
5410+
if ( $this->is_virtual() ) {
5411+
continue;
5412+
}
53705413
if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
5371-
while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) {
5372-
$this->current_element = array_shift( $this->element_queue );
5373-
}
53745414
return true;
53755415
}
5376-
}
5416+
} while ( $this->next_token() );
53775417

53785418
return false;
53795419
}
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
<?php
2+
/**
3+
* Unit tests covering WP_HTML_Processor bookmark functionality.
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
*/
8+
9+
/**
10+
* @group html-api
11+
*
12+
* @coversDefaultClass WP_HTML_Processor
13+
*/
14+
class Tests_HtmlApi_WpHtmlProcessor_Bookmark extends WP_UnitTestCase {
15+
/**
16+
* @dataProvider data_processor_constructors
17+
*
18+
* @ticket 62290
19+
*/
20+
public function test_processor_seek_same_location( callable $factory ) {
21+
$processor = $factory( '<div><span>' );
22+
$this->assertTrue( $processor->next_tag( 'DIV' ) );
23+
$this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark.' );
24+
$this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed has_bookmark check.' );
25+
26+
// Confirm the bookmark works and processing continues normally.
27+
$this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark.' );
28+
$this->assertSame( 'DIV', $processor->get_tag() );
29+
$this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs() );
30+
$this->assertTrue( $processor->next_tag() );
31+
$this->assertSame( 'SPAN', $processor->get_tag() );
32+
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN' ), $processor->get_breadcrumbs() );
33+
}
34+
35+
/**
36+
* @dataProvider data_processor_constructors
37+
*
38+
* @ticket 62290
39+
*/
40+
public function test_processor_seek_backward( callable $factory ) {
41+
$processor = $factory( '<div><span>' );
42+
$this->assertTrue( $processor->next_tag( 'DIV' ) );
43+
$this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark.' );
44+
$this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed has_bookmark check.' );
45+
46+
// Move past the bookmark so it must scan backwards.
47+
$this->assertTrue( $processor->next_tag( 'SPAN' ) );
48+
49+
// Confirm the bookmark works.
50+
$this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark.' );
51+
$this->assertSame( 'DIV', $processor->get_tag() );
52+
}
53+
54+
/**
55+
* @dataProvider data_processor_constructors
56+
*
57+
* @ticket 62290
58+
*/
59+
public function test_processor_seek_forward( callable $factory ) {
60+
$processor = $factory( '<div one></div><span two></span><a three>' );
61+
$this->assertTrue( $processor->next_tag( 'DIV' ) );
62+
$this->assertTrue( $processor->set_bookmark( 'one' ), 'Failed to set bookmark "one".' );
63+
$this->assertTrue( $processor->has_bookmark( 'one' ), 'Failed "one" has_bookmark check.' );
64+
65+
// Move past the bookmark so it must scan backwards.
66+
$this->assertTrue( $processor->next_tag( 'SPAN' ) );
67+
$this->assertTrue( $processor->get_attribute( 'two' ) );
68+
$this->assertTrue( $processor->set_bookmark( 'two' ), 'Failed to set bookmark "two".' );
69+
$this->assertTrue( $processor->has_bookmark( 'two' ), 'Failed "two" has_bookmark check.' );
70+
71+
// Seek back.
72+
$this->assertTrue( $processor->seek( 'one' ), 'Failed to seek to bookmark "one".' );
73+
$this->assertSame( 'DIV', $processor->get_tag() );
74+
75+
// Seek forward and continue processing.
76+
$this->assertTrue( $processor->seek( 'two' ), 'Failed to seek to bookmark "two".' );
77+
$this->assertSame( 'SPAN', $processor->get_tag() );
78+
$this->assertTrue( $processor->get_attribute( 'two' ) );
79+
80+
$this->assertTrue( $processor->next_tag() );
81+
$this->assertSame( 'A', $processor->get_tag() );
82+
$this->assertTrue( $processor->get_attribute( 'three' ) );
83+
}
84+
85+
/**
86+
* Ensure the parsing namespace is handled when seeking from foreign content.
87+
*
88+
* @dataProvider data_processor_constructors
89+
*
90+
* @ticket 62290
91+
*/
92+
public function test_seek_back_from_foreign_content( callable $factory ) {
93+
$processor = $factory( '<custom-element /><svg><rect />' );
94+
$this->assertTrue( $processor->next_tag( 'CUSTOM-ELEMENT' ) );
95+
$this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark "mark".' );
96+
$this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed "mark" has_bookmark check.' );
97+
98+
/*
99+
* <custom-element /> has self-closing flag, but HTML elements (that are not void elements) cannot self-close,
100+
* they must be closed by some means, usually a closing tag.
101+
*
102+
* If the div were interpreted as foreign content, it would self-close.
103+
*/
104+
$this->assertTrue( $processor->has_self_closing_flag() );
105+
$this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted HTML custom-element with self-closing flag as self-closing element.' );
106+
107+
// Proceed into foreign content.
108+
$this->assertTrue( $processor->next_tag( 'RECT' ) );
109+
$this->assertSame( 'svg', $processor->get_namespace() );
110+
$this->assertTrue( $processor->has_self_closing_flag() );
111+
$this->assertFalse( $processor->expects_closer() );
112+
$this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() );
113+
114+
// Seek back.
115+
$this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark "mark".' );
116+
$this->assertSame( 'CUSTOM-ELEMENT', $processor->get_tag() );
117+
// If the parsing namespace were not correct here (html),
118+
// then the self-closing flag would be misinterpreted.
119+
$this->assertTrue( $processor->has_self_closing_flag() );
120+
$this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted HTML custom-element with self-closing flag as self-closing element.' );
121+
122+
// Proceed into foreign content again.
123+
$this->assertTrue( $processor->next_tag( 'RECT' ) );
124+
$this->assertSame( 'svg', $processor->get_namespace() );
125+
$this->assertTrue( $processor->has_self_closing_flag() );
126+
$this->assertFalse( $processor->expects_closer() );
127+
128+
// The RECT should still descend from the CUSTOM-ELEMENT despite its self-closing flag.
129+
$this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() );
130+
}
131+
132+
/**
133+
* Covers a regression where the root node may not be present on the stack of open elements.
134+
*
135+
* Heading elements (h1, h2, etc.) check the current node on the stack of open elements
136+
* and expect it to be defined. If the root-node has been popped, pushing a new heading
137+
* onto the stack will create a warning and fail the test.
138+
*
139+
* @ticket 62290
140+
*/
141+
public function test_fragment_starts_with_h1() {
142+
$processor = WP_HTML_Processor::create_fragment( '<h1>' );
143+
$this->assertTrue( $processor->next_tag( 'H1' ) );
144+
$this->assertTrue( $processor->set_bookmark( 'mark' ) );
145+
$this->assertTrue( $processor->next_token() );
146+
$this->assertTrue( $processor->seek( 'mark' ) );
147+
}
148+
149+
/**
150+
* Data provider.
151+
*
152+
* @return array
153+
*/
154+
public static function data_processor_constructors(): array {
155+
return array(
156+
'Full parser' => array( array( WP_HTML_Processor::class, 'create_full_parser' ) ),
157+
'Fragment parser' => array( array( WP_HTML_Processor::class, 'create_fragment' ) ),
158+
);
159+
}
160+
}

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ public function test_clear_to_navigate_after_seeking() {
133133

134134
// Create a bookmark inside of that stack.
135135
if ( null !== $processor->get_attribute( 'two' ) ) {
136-
$processor->set_bookmark( 'two' );
136+
$this->assertTrue( $processor->set_bookmark( 'two' ) );
137137
break;
138138
}
139139
}

0 commit comments

Comments
 (0)