Skip to content

Commit e99d839

Browse files
committed
HTML API: Make non-body fragment creation methods private.
The current implementation of `create_fragment` (and the underlying `create_fragment_at_current_node`) allows passing in a context that might result in a tree that cannot be represented by HTML. For example, a user might use `<p>` as context, and attempt to create a fragment that also consists of a paragraph element, `<p>like this`. This would result in a paragraph node nested inside another -- something that can never result from parsing HTML. To prevent this, this changeset makes `create_fragment_at_current_node` private and limits `create_fragment` to only `<body>` as context, while a comprehensive solution to allow other contexts is being worked on. Follow-up to [59444], [59467]. Props jonsurrell, dmsnell, bernhard-reiter. Fixes #62584. git-svn-id: https://develop.svn.wordpress.org/trunk@59469 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 4b34369 commit e99d839

File tree

3 files changed

+18
-264
lines changed

3 files changed

+18
-264
lines changed

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -279,44 +279,24 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
279279
* form is provided because a context element may have attributes that
280280
* impact the parse, such as with a SCRIPT tag and its `type` attribute.
281281
*
282-
* Example:
283-
*
284-
* // Usually, snippets of HTML ought to be processed in the default `<body>` context:
285-
* $processor = WP_HTML_Processor::create_fragment( '<p>Hi</p>' );
286-
*
287-
* // Some fragments should be processed in the correct context like this SVG:
288-
* $processor = WP_HTML_Processor::create_fragment( '<rect width="10" height="10" />', '<svg>' );
289-
*
290-
* // This fragment with TD tags should be processed in a TR context:
291-
* $processor = WP_HTML_Processor::create_fragment(
292-
* '<td>1<td>2<td>3',
293-
* '<table><tbody><tr>'
294-
* );
295-
*
296-
* In order to create a fragment processor at the correct location, the
297-
* provided fragment will be processed as part of a full HTML document.
298-
* The processor will search for the last opener tag in the document and
299-
* create a fragment processor at that location. The document will be
300-
* forced into "no-quirks" mode by including the HTML5 doctype.
301-
*
302-
* For advanced usage and precise control over the context element, use
303-
* `WP_HTML_Processor::create_full_processor()` and
304-
* `WP_HTML_Processor::create_fragment_at_current_node()`.
282+
* ## Current HTML Support
305283
*
306-
* UTF-8 is the only allowed encoding. If working with a document that
307-
* isn't UTF-8, first convert the document to UTF-8, then pass in the
308-
* converted HTML.
284+
* - The only supported context is `<body>`, which is the default value.
285+
* - The only supported document encoding is `UTF-8`, which is the default value.
309286
*
310287
* @since 6.4.0
311288
* @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances.
312-
* @since 6.8.0 Can create fragments with any context element.
313289
*
314290
* @param string $html Input HTML fragment to process.
315-
* @param string $context Context element for the fragment. Defaults to `<body>`.
291+
* @param string $context Context element for the fragment, must be default of `<body>`.
316292
* @param string $encoding Text encoding of the document; must be default of 'UTF-8'.
317293
* @return static|null The created processor if successful, otherwise null.
318294
*/
319295
public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) {
296+
if ( '<body>' !== $context || 'UTF-8' !== $encoding ) {
297+
return null;
298+
}
299+
320300
$context_processor = static::create_full_parser( "<!DOCTYPE html>{$context}", $encoding );
321301
if ( null === $context_processor ) {
322302
return null;
@@ -475,7 +455,7 @@ function ( WP_HTML_Token $token ): void {
475455
* @param string $html Input HTML fragment to process.
476456
* @return static|null The created processor if successful, otherwise null.
477457
*/
478-
public function create_fragment_at_current_node( string $html ) {
458+
private function create_fragment_at_current_node( string $html ) {
479459
if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) {
480460
_doing_it_wrong(
481461
__METHOD__,

tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php

Lines changed: 0 additions & 178 deletions
This file was deleted.

tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

Lines changed: 9 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ public function data_external_html5lib_tests() {
138138
* @return bool True if the test case should be skipped. False otherwise.
139139
*/
140140
private static function should_skip_test( ?string $test_context_element, string $test_name ): bool {
141+
if ( null !== $test_context_element && 'body' !== $test_context_element ) {
142+
return true;
143+
}
144+
141145
if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) {
142146
return true;
143147
}
@@ -153,63 +157,11 @@ private static function should_skip_test( ?string $test_context_element, string
153157
* @return string|null Tree structure of parsed HTML, if supported, else null.
154158
*/
155159
private static function build_tree_representation( ?string $fragment_context, string $html ) {
156-
if ( $fragment_context ) {
157-
/*
158-
* If the string of characters starts with "svg ", the context
159-
* element is in the SVG namespace and the substring after
160-
* "svg " is the local name. If the string of characters starts
161-
* with "math ", the context element is in the MathML namespace
162-
* and the substring after "math " is the local name.
163-
* Otherwise, the context element is in the HTML namespace and
164-
* the string is the local name.
165-
*/
166-
if ( str_starts_with( $fragment_context, 'svg ' ) ) {
167-
$tag_name = substr( $fragment_context, 4 );
168-
if ( 'svg' === $tag_name ) {
169-
$fragment_context_html = '<svg>';
170-
} else {
171-
$fragment_context_html = "<svg><{$tag_name}>";
172-
}
173-
} elseif ( str_starts_with( $fragment_context, 'math ' ) ) {
174-
$tag_name = substr( $fragment_context, 5 );
175-
if ( 'math' === $tag_name ) {
176-
$fragment_context_html = '<math>';
177-
} else {
178-
$fragment_context_html = "<math><{$tag_name}>";
179-
}
180-
} else {
181-
// Tags that only appear in tables need a special case.
182-
if ( in_array(
183-
$fragment_context,
184-
array(
185-
'caption',
186-
'col',
187-
'colgroup',
188-
'tbody',
189-
'td',
190-
'tfoot',
191-
'th',
192-
'thead',
193-
'tr',
194-
),
195-
true
196-
) ) {
197-
$fragment_context_html = "<table><{$fragment_context}>";
198-
} else {
199-
$fragment_context_html = "<{$fragment_context}>";
200-
}
201-
}
202-
203-
$processor = WP_HTML_Processor::create_fragment( $html, $fragment_context_html );
204-
205-
if ( null === $processor ) {
206-
throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
207-
}
208-
} else {
209-
$processor = WP_HTML_Processor::create_full_parser( $html );
210-
if ( null === $processor ) {
211-
throw new Exception( 'Could not create a full parser.' );
212-
}
160+
$processor = $fragment_context
161+
? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
162+
: WP_HTML_Processor::create_full_parser( $html );
163+
if ( null === $processor ) {
164+
throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() );
213165
}
214166

215167
$output = '';

0 commit comments

Comments
 (0)