WordPress · sirreal · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1496,13 +1496,39 @@ private function skip_script_data(): bool {
 		while ( false !== $at && $at < $doc_length ) {
 			$at += strcspn( $html, '-<', $at );
 
+			/*
+			 * *IMPORTANT:* Any changes to this loop *must* ensure the conditions described in this
+			 * comment remain valid.
+			 *
+			 * The rest of this loop matches different byte sequences. If a script close tag is not
+			 * found, the function will return false. The script close tag is the longest byte
+			 * sequenced to match. Therefore, a single length check for at least 8 additional
+			 * bytes allows for an early `false` return OR subsequent matches without length checks.
+			 *
+			 *     $at may be here.
+			 *       ↓
+			 *       </script>
+			 *        ╰──┬───╯
+			 *     $at + 8 additional bytes are required for a non-false return value.
+			 *
+			 * The length of shorter matches is already satisfied:
+			 *
+			 *     $at may be here.
+			 *          ↓
+			 *          -->
+			 *           ├╯
+			 *     $at + 2 additional characters does not require an additional length check.
+			 */
+			if ( $at + 8 >= $doc_length ) {
+				return false;
+			}
+
 			/*
 			 * For all script states a "-->"  transitions
 			 * back into the normal unescaped script mode,
 			 * even if that's the current state.
 			 */
 			if (
-				$at + 2 < $doc_length &&
 				'-' === $html[ $at ] &&
 				'-' === $html[ $at + 1 ] &&
 				'>' === $html[ $at + 2 ]
@@ -1512,10 +1538,6 @@ private function skip_script_data(): bool {
 				continue;
 			}
 
-			if ( $at + 1 >= $doc_length ) {
-				return false;
-			}
-
 			/*
 			 * Everything of interest past here starts with "<".
 			 * Check this character and advance position regardless.
@@ -1537,13 +1559,29 @@ private function skip_script_data(): bool {
 			 * parsing after updating the state.
 			 */
 			if (
-				$at + 2 < $doc_length &&
+				'unescaped' === $state &&
 				'!' === $html[ $at ] &&
 				'-' === $html[ $at + 1 ] &&
 				'-' === $html[ $at + 2 ]
 			) {
-				$at   += 3;
-				$state = 'unescaped' === $state ? 'escaped' : $state;
+				$at += 3;
+
+				/*
+				 * The parser is ready to enter the `escaped` state but may remain in the
+				 * `unescaped` state if there is immediately is a sequence of any number of 0 or
+				 * more "-" characters followed by ">". This is similar to abruptly closed HTML
+				 * comments like "<!-->" or "<!--->".
+				 *
+				 * Note that this check may have advanced the position significantly and requires
+				 * a length check to prevent bad offsets on inputs like `<script><!---------`.
+				 */
+				$at += strspn( $html, '-', $at );
+				if ( $at < $doc_length && '>' === $html[ $at ] ) {
+					++$at;
+					continue;
+				}
+
+				$state = 'escaped';
 				continue;
 			}
 
@@ -1561,7 +1599,6 @@ private function skip_script_data(): bool {
 			 * proceed scanning to the next potential token in the text.
 			 */
 			if ( ! (
-				$at + 6 < $doc_length &&
 				( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
 				( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
 				( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) &&
@@ -1579,13 +1616,9 @@ private function skip_script_data(): bool {
 			 * "<script123" should not end a script region even though
 			 * "<script" is found within the text.
 			 */
-			if ( $at + 6 >= $doc_length ) {
-				continue;
-			}
 			$at += 6;
 			$c   = $html[ $at ];
 			if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
-				++$at;
 				continue;
 			}
 
@@ -1611,8 +1644,6 @@ private function skip_script_data(): bool {
 				}
 
 				if ( $this->bytes_already_parsed >= $doc_length ) {
-					$this->parser_state = self::STATE_INCOMPLETE_INPUT;
-
 					return false;
 				}
 

diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -3046,4 +3046,52 @@ public static function data_alphabet_by_characters_uppercase() {
 			yield strtoupper( $data[0] ) => array( strtoupper( $data[0] ) );
 		}
 	}
+
+	/**
+	 * Test that script tags are parsed correctly.
+	 *
+	 * Script tag parsing is very complicated, see the following resources for more details:
+	 *
+	 * - https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
+	 * - https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
+	 *
+	 * @ticket 63738
+	 *
+	 * @dataProvider data_script_tag
+	 */
+	public function test_script_tag_parsing( string $input, bool $closes ) {
+		$processor = new WP_HTML_Tag_Processor( $input );
+
+		if ( $closes ) {
+			$this->assertTrue( $processor->next_token(), 'Expected to find complete script tag.' );
+			$this->assertSame( 'SCRIPT', $processor->get_tag() );
+			return;
+		}
+
+		$this->assertFalse( $processor->next_token(), 'Expected to fail next_token().' );
+		$this->assertTrue( $processor->paused_at_incomplete_token(), 'Expected an incomplete SCRIPT tag token.' );
+	}
+
+	/**
+	 * Data provider.
+	 */
+	public static function data_script_tag(): array {
+		return array(
+			'Basic script tag'                             => array( '<script></script>', true ),
+			'Script with type attribute'                   => array( '<script type="text/javascript"></script>', true ),
+			'Script data escaped'                          => array( '<script><!--</script>', true ),
+			'Script data double-escaped exit (comment)'    => array( '<script><!--<script>--></script>', true ),
+			'Script data double-escaped exit (closed)'     => array( '<script><!--<script></script></script>', true ),
+			'Script data double-escaped exit (closed/truncated)' => array( '<script><!--<script></script </script>', true ),
+			'Script data no double-escape'                 => array( '<script><!-- --><script></script>', true ),
+			'Script data no double-escape (short comment)' => array( '<script><!--><script></script>', true ),
+			'Script data almost double-escaped'            => array( '<script><!--<script</script>', true ),
+
+			'Script tag with self-close flag (ignored)'    => array( '<script />', false ),
+			'Script data double-escaped'                   => array( '<script><!--<script></script>', false ),
+
+			'Unclosed script in escaped state'             => array( '<script><!--------------', false ),
+			'Unclosed script in double escaped state'      => array( '<script><!--<script ', false ),
+		);
+	}
 }