-
Notifications
You must be signed in to change notification settings - Fork 3.2k
HTML API: Improve script tag escape state processing #9397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
6ad9951
b3b3177
ca16e0e
0456be7
ea6f7d3
4be62b9
df2affa
d0cbb00
69f3bce
c509f9d
de91e09
f041a9c
2b6833c
bba0547
728d13f
1b4478f
d22ef9a
360d896
9fd074f
840f6aa
f7bcfb4
e9dd022
f113f8a
479a704
98828b8
c2330b7
75ab18f
6593311
7036907
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1496,13 +1496,39 @@ private function skip_script_data(): bool { | |
| while ( false !== $at && $at < $doc_length ) { | ||
| $at += strcspn( $html, '-<', $at ); | ||
|
|
||
| /* | ||
| * *IMPORTANT:* Any changes to this loop *must* ensure the conditions described in this | ||
| * comment remain valid. | ||
| * | ||
| * The rest of this loop matches different byte sequences. If a script close tag is not | ||
| * found, the function will return false. The script close tag is the longest byte | ||
| * sequenced to match. Therefore, a single length check for at least 8 additional | ||
| * bytes allows for an early `false` return OR subsequent matches without length checks. | ||
| * | ||
| * $at may be here. | ||
| * ↓ | ||
| * </script> | ||
| * ╰──┬───╯ | ||
| * $at + 8 additional bytes are required for a non-false return value. | ||
| * | ||
| * The length of shorter matches is already satisfied: | ||
| * | ||
| * $at may be here. | ||
| * ↓ | ||
| * --> | ||
| * ├╯ | ||
| * $at + 2 additional characters does not require an additional length check. | ||
| */ | ||
| if ( $at + 8 >= $doc_length ) { | ||
| return false; | ||
| } | ||
|
|
||
| /* | ||
| * For all script states a "-->" transitions | ||
| * back into the normal unescaped script mode, | ||
| * even if that's the current state. | ||
| */ | ||
| if ( | ||
| $at + 2 < $doc_length && | ||
| '-' === $html[ $at ] && | ||
| '-' === $html[ $at + 1 ] && | ||
| '>' === $html[ $at + 2 ] | ||
|
|
@@ -1512,10 +1538,6 @@ private function skip_script_data(): bool { | |
| continue; | ||
| } | ||
|
|
||
| if ( $at + 1 >= $doc_length ) { | ||
| return false; | ||
| } | ||
|
|
||
| /* | ||
| * Everything of interest past here starts with "<". | ||
| * Check this character and advance position regardless. | ||
|
|
@@ -1537,13 +1559,29 @@ private function skip_script_data(): bool { | |
| * parsing after updating the state. | ||
|
||
| */ | ||
| if ( | ||
| $at + 2 < $doc_length && | ||
| 'unescaped' === $state && | ||
| '!' === $html[ $at ] && | ||
| '-' === $html[ $at + 1 ] && | ||
| '-' === $html[ $at + 2 ] | ||
| ) { | ||
| $at += 3; | ||
| $state = 'unescaped' === $state ? 'escaped' : $state; | ||
| $at += 3; | ||
|
|
||
| /* | ||
| * The parser is ready to enter the `escaped` state but may remain in the | ||
| * `unescaped` state if there is immediately is a sequence of any number of 0 or | ||
| * more "-" characters followed by ">". This is similar to abruptly closed HTML | ||
| * comments like "<!-->" or "<!--->". | ||
| * | ||
| * Note that this check may have advanced the position significantly and requires | ||
| * a length check to prevent bad offsets on inputs like `<script><!---------`. | ||
| */ | ||
| $at += strspn( $html, '-', $at ); | ||
| if ( $at < $doc_length && '>' === $html[ $at ] ) { | ||
| ++$at; | ||
| continue; | ||
| } | ||
|
|
||
| $state = 'escaped'; | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -1561,7 +1599,6 @@ private function skip_script_data(): bool { | |
| * proceed scanning to the next potential token in the text. | ||
| */ | ||
| if ( ! ( | ||
| $at + 6 < $doc_length && | ||
| ( 's' === $html[ $at ] || 'S' === $html[ $at ] ) && | ||
| ( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) && | ||
| ( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) && | ||
|
|
@@ -1579,13 +1616,9 @@ private function skip_script_data(): bool { | |
| * "<script123" should not end a script region even though | ||
| * "<script" is found within the text. | ||
| */ | ||
| if ( $at + 6 >= $doc_length ) { | ||
| continue; | ||
| } | ||
| $at += 6; | ||
| $c = $html[ $at ]; | ||
| if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) { | ||
| ++$at; | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -1611,8 +1644,6 @@ private function skip_script_data(): bool { | |
| } | ||
|
|
||
| if ( $this->bytes_already_parsed >= $doc_length ) { | ||
| $this->parser_state = self::STATE_INCOMPLETE_INPUT; | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.