@@ -1496,13 +1496,48 @@ private function skip_script_data(): bool {
14961496 while ( false !== $ at && $ at < $ doc_length ) {
14971497 $ at += strcspn ( $ html , '-< ' , $ at );
14981498
1499+ /*
1500+ * Optimization: Terminating a complete script element requires at least eight
1501+ * additional bytes in the document. Some checks below may cause local escaped
1502+ * state transitions when processing shorter strings, but those transitions are
1503+ * irrelevant if the script tag is incomplete and the function must return false.
1504+ *
1505+ * This may need updating if those transitions become significant or exported from
1506+ * this function in some way, such as when building safe methods to embed JavaScript
1507+ * or data inside a SCRIPT element.
1508+ *
1509+ * $at may be here.
1510+ * ↓
1511+ * ...</script>
1512+ * ╰──┬───╯
1513+ * $at + 8 additional bytes are required for a non-false return value.
1514+ *
1515+ * This single check eliminates the need to check lengths for the shorter spans:
1516+ *
1517+ * $at may be here.
1518+ * ↓
1519+ * <script><!-- --></script>
1520+ * ├╯
1521+ * $at + 2 additional characters does not require a length check.
1522+ *
1523+ * The transition from "escaped" to "unescaped" is not relevant if the document ends:
1524+ *
1525+ * $at may be here.
1526+ * ↓
1527+ * <script><!-- -->[[END-OF-DOCUMENT]]
1528+ * ╰──┬───╯
1529+ * $at + 8 additional bytes is not satisfied, return false.
1530+ */
1531+ if ( $ at + 8 >= $ doc_length ) {
1532+ return false ;
1533+ }
1534+
14991535 /*
15001536 * For all script states a "-->" transitions
15011537 * back into the normal unescaped script mode,
15021538 * even if that's the current state.
15031539 */
15041540 if (
1505- $ at + 2 < $ doc_length &&
15061541 '- ' === $ html [ $ at ] &&
15071542 '- ' === $ html [ $ at + 1 ] &&
15081543 '> ' === $ html [ $ at + 2 ]
@@ -1512,10 +1547,6 @@ private function skip_script_data(): bool {
15121547 continue ;
15131548 }
15141549
1515- if ( $ at + 1 >= $ doc_length ) {
1516- return false ;
1517- }
1518-
15191550 /*
15201551 * Everything of interest past here starts with "<".
15211552 * Check this character and advance position regardless.
@@ -1537,7 +1568,6 @@ private function skip_script_data(): bool {
15371568 * parsing after updating the state.
15381569 */
15391570 if (
1540- $ at + 2 < $ doc_length &&
15411571 '! ' === $ html [ $ at ] &&
15421572 '- ' === $ html [ $ at + 1 ] &&
15431573 '- ' === $ html [ $ at + 2 ]
@@ -1561,7 +1591,6 @@ private function skip_script_data(): bool {
15611591 * proceed scanning to the next potential token in the text.
15621592 */
15631593 if ( ! (
1564- $ at + 6 < $ doc_length &&
15651594 ( 's ' === $ html [ $ at ] || 'S ' === $ html [ $ at ] ) &&
15661595 ( 'c ' === $ html [ $ at + 1 ] || 'C ' === $ html [ $ at + 1 ] ) &&
15671596 ( 'r ' === $ html [ $ at + 2 ] || 'R ' === $ html [ $ at + 2 ] ) &&
@@ -1579,9 +1608,6 @@ private function skip_script_data(): bool {
15791608 * "<script123" should not end a script region even though
15801609 * "<script" is found within the text.
15811610 */
1582- if ( $ at + 6 >= $ doc_length ) {
1583- continue ;
1584- }
15851611 $ at += 6 ;
15861612 $ c = $ html [ $ at ];
15871613 if ( ' ' !== $ c && "\t" !== $ c && "\r" !== $ c && "\n" !== $ c && '/ ' !== $ c && '> ' !== $ c ) {
@@ -1611,8 +1637,6 @@ private function skip_script_data(): bool {
16111637 }
16121638
16131639 if ( $ this ->bytes_already_parsed >= $ doc_length ) {
1614- $ this ->parser_state = self ::STATE_INCOMPLETE_INPUT ;
1615-
16161640 return false ;
16171641 }
16181642
0 commit comments