Skip to content

Commit 51c23b6

Browse files
committed
HTML API: Reliably parse HTML in get_url_in_content()
Trac ticket: Core-63694 This also decodes the URL whereas the previous code didn’t, so strings like `http://` will be properly decoded as `http://`.
1 parent 8900f0a commit 51c23b6

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

src/wp-includes/formatting.php

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5803,17 +5803,22 @@ function wp_unslash( $value ) {
58035803
* Extracts and returns the first URL from passed content.
58045804
*
58055805
* @since 3.6.0
5806+
* @since {WP_VERSION} Reliably parses HTML via the HTML API.
58065807
*
5807-
* @param string $content A string which might contain a URL.
5808-
* @return string|false The found URL.
5808+
* @param string $content A string which might contain an `A` element with a non-empty `href` attribute.
5809+
* @return string|false Database-escaped URL via {@see esc_url} if found, otherwise `false`.
58095810
*/
58105811
function get_url_in_content( $content ) {
58115812
if ( empty( $content ) ) {
58125813
return false;
58135814
}
58145815

5815-
if ( preg_match( '/<a\s[^>]*?href=([\'"])(.+?)\1/is', $content, $matches ) ) {
5816-
return sanitize_url( $matches[2] );
5816+
$processor = new WP_HTML_Processor( $content );
5817+
while ( $processor->next_tag( 'A' ) ) {
5818+
$href = $processor->get_attribute( 'href' );
5819+
if ( is_string( $href ) && ! empty( $href ) ) {
5820+
return sanitize_url( $href );
5821+
}
58175822
}
58185823

58195824
return false;

0 commit comments

Comments
 (0)