_CH_informal`, and `ca`.
* @since 4.7.0 Added locale support for `sr_RS`.
* @since 4.8.0 Added locale support for `bs_BA`.
* @since 5.7.0 Added locale support for `de_AT`.
* @since 6.0.0 Added the `$locale` parameter.
* @since 6.1.0 Added Unicode NFC encoding normalization support.
*
* @param string $text Text that might have accent characters.
* @param string $locale Optional. The locale to use for accent removal. Some character
* replacements depend on the locale being used (e.g. 'de_DE').
* Defaults to the current locale.
* @return string Filtered string with replaced "nice" characters.
*/
function remove_accents( $text, $locale = '' ) {
if ( ! preg_match( '/[\x80-\xff]/', $text ) ) {
return $text;
}
if ( seems_utf8( $text ) ) {
/*
* Unicode sequence normalization from NFD (Normalization Form Decomposed)
* to NFC (Normalization Form [Pre]Composed), the encoding used in this function.
*/
if ( function_exists( 'normalizer_is_normalized' )
&& function_exists( 'normalizer_normalize' )
) {
if ( ! normalizer_is_normalized( $text ) ) {
$text = normalizer_normalize( $text );
}
}
$chars = array(
// Decompositions for Latin-1 Supplement.
'ª' => 'a',
'º' => 'o',
'À' => 'A',
'Á' => 'A',
'Â' => 'A',
'Ã' => 'A',
'Ä' => 'A',
'Å' => 'A',
'Æ' => 'AE',
'Ç' => 'C',
'È' => 'E',
'É' => 'E',
'Ê' => 'E',
'Ë' => 'E',
'Ì' => 'I',
'Í' => 'I',
'Î' => 'I',
'Ï' => 'I',
'Ð' => 'D',
'Ñ' => 'N',
'Ò' => 'O',
'Ó' => 'O',
'Ô' => 'O',
'Õ' => 'O',
'Ö' => 'O',
'Ù' => 'U',
'Ú' => 'U',
'Û' => 'U',
'Ü' => 'U',
'Ý' => 'Y',
'Þ' => 'TH',
'ß' => 's',
'à' => 'a',
'á' => 'a',
'â' => 'a',
'ã' => 'a',
'ä' => 'a',
'å' => 'a',
'æ' => 'ae',
'ç' => 'c',
'è' => 'e',
'é' => 'e',
'ê' => 'e',
'ë' => 'e',
'ì' => 'i',
'í' => 'i',
'î' => 'i',
'ï' => 'i',
'ð' => 'd',
'ñ' => 'n',
'ò' => 'o',
'ó' => 'o',
'ô' => 'o',
'õ' => 'o',
'ö' => 'o',
'ø' => 'o',
'ù' => 'u',
'ú' => 'u',
'û' => 'u',
'ü' => 'u',
'ý' => 'y',
'þ' => 'th',
'ÿ' => 'y',
'Ø' => 'O',
// Decompositions for Latin Extended-A.
'Ā' => 'A',
'ā' => 'a',
'Ă' => 'A',
'ă' => 'a',
'Ą' => 'A',
'ą' => 'a',
'Ć' => 'C',
'ć' => 'c',
'Ĉ' => 'C',
'ĉ' => 'c',
'Ċ' => 'C',
'ċ' => 'c',
'Č' => 'C',
'č' => 'c',
'Ď' => 'D',
'ď' => 'd',
'Đ' => 'D',
'đ' => 'd',
'Ē' => 'E',
'ē' => 'e',
'Ĕ' => 'E',
'ĕ' => 'e',
'Ė' => 'E',
'ė' => 'e',
'Ę' => 'E',
'ę' => 'e',
'Ě' => 'E',
'ě' => 'e',
'Ĝ' => 'G',
'ĝ' => 'g',
'Ğ' => 'G',
'ğ' => 'g',
'Ġ' => 'G',
'ġ' => 'g',
'Ģ' => 'G',
'ģ' => 'g',
'Ĥ' => 'H',
'ĥ' => 'h',
'Ħ' => 'H',
'ħ' => 'h',
'Ĩ' => 'I',
'ĩ' => 'i',
'Ī' => 'I',
'ī' => 'i',
'Ĭ' => 'I',
'ĭ' => 'i',
'Į' => 'I',
'į' => 'i',
'İ' => 'I',
'ı' => 'i',
'IJ' => 'IJ',
'ij' => 'ij',
'Ĵ' => 'J',
'ĵ' => 'j',
'Ķ' => 'K',
'ķ' => 'k',
'ĸ' => 'k',
'Ĺ' => 'L',
'ĺ' => 'l',
'Ļ' => 'L',
'ļ' => 'l',
'Ľ' => 'L',
'ľ' => 'l',
'Ŀ' => 'L',
'ŀ' => 'l',
'Ł' => 'L',
'ł' => 'l',
'Ń' => 'N',
'ń' => 'n',
'Ņ' => 'N',
'ņ' => 'n',
'Ň' => 'N',
'ň' => 'n',
'ʼn' => 'n',
'Ŋ' => 'N',
'ŋ' => 'n',
'Ō' => 'O',
'ō' => 'o',
'Ŏ' => 'O',
'ŏ' => 'o',
'Ő' => 'O',
'ő' => 'o',
'Œ' => 'OE',
'œ' => 'oe',
'Ŕ' => 'R',
'ŕ' => 'r',
'Ŗ' => 'R',
'ŗ' => 'r',
'Ř' => 'R',
'ř' => 'r',
'Ś' => 'S',
'ś' => 's',
'Ŝ' => 'S',
'ŝ' => 's',
'Ş' => 'S',
'ş' => 's',
'Š' => 'S',
'š' => 's',
'Ţ' => 'T',
'ţ' => 't',
'Ť' => 'T',
'ť' => 't',
'Ŧ' => 'T',
'ŧ' => 't',
'Ũ' => 'U',
'ũ' => 'u',
'Ū' => 'U',
'ū' => 'u',
'Ŭ' => 'U',
'ŭ' => 'u',
'Ů' => 'U',
'ů' => 'u',
'Ű' => 'U',
'ű' => 'u',
'Ų' => 'U',
'ų' => 'u',
'Ŵ' => 'W',
'ŵ' => 'w',
'Ŷ' => 'Y',
'ŷ' => 'y',
'Ÿ' => 'Y',
'Ź' => 'Z',
'ź' => 'z',
'Ż' => 'Z',
'ż' => 'z',
'Ž' => 'Z',
'ž' => 'z',
'ſ' => 's',
// Decompositions for Latin Extended-B.
'Ə' => 'E',
'ǝ' => 'e',
'Ș' => 'S',
'ș' => 's',
'Ț' => 'T',
'ț' => 't',
// Euro sign.
'€' => 'E',
// GBP (Pound) sign.
'£' => '',
// Vowels with diacritic (Vietnamese). Unmarked.
'Ơ' => 'O',
'ơ' => 'o',
'Ư' => 'U',
'ư' => 'u',
// Grave accent.
'Ầ' => 'A',
'ầ' => 'a',
'Ằ' => 'A',
'ằ' => 'a',
'Ề' => 'E',
'ề' => 'e',
'Ồ' => 'O',
'ồ' => 'o',
'Ờ' => 'O',
'ờ' => 'o',
'Ừ' => 'U',
'ừ' => 'u',
'Ỳ' => 'Y',
'ỳ' => 'y',
// Hook.
'Ả' => 'A',
'ả' => 'a',
'Ẩ' => 'A',
'ẩ' => 'a',
'Ẳ' => 'A',
'ẳ' => 'a',
'Ẻ' => 'E',
'ẻ' => 'e',
'Ể' => 'E',
'ể' => 'e',
'Ỉ' => 'I',
'ỉ' => 'i',
'Ỏ' => 'O',
'ỏ' => 'o',
'Ổ' => 'O',
'ổ' => 'o',
'Ở' => 'O',
'ở' => 'o',
'Ủ' => 'U',
'ủ' => 'u',
'Ử' => 'U',
'ử' => 'u',
'Ỷ' => 'Y',
'ỷ' => 'y',
// Tilde.
'Ẫ' => 'A',
'ẫ' => 'a',
'Ẵ' => 'A',
'ẵ' => 'a',
'Ẽ' => 'E',
'ẽ' => 'e',
'Ễ' => 'E',
'ễ' => 'e',
'Ỗ' => 'O',
'ỗ' => 'o',
'Ỡ' => 'O',
'ỡ' => 'o',
'Ữ' => 'U',
'ữ' => 'u',
'Ỹ' => 'Y',
'ỹ' => 'y',
// Acute accent.
'Ấ' => 'A',
'ấ' => 'a',
'Ắ' => 'A',
'ắ' => 'a',
'Ế' => 'E',
'ế' => 'e',
'Ố' => 'O',
'ố' => 'o',
'Ớ' => 'O',
'ớ' => 'o',
'Ứ' => 'U',
'ứ' => 'u',
// Dot below.
'Ạ' => 'A',
'ạ' => 'a',
'Ậ' => 'A',
'ậ' => 'a',
'Ặ' => 'A',
'ặ' => 'a',
'Ẹ' => 'E',
'ẹ' => 'e',
'Ệ' => 'E',
'ệ' => 'e',
'Ị' => 'I',
'ị' => 'i',
'Ọ' => 'O',
'ọ' => 'o',
'Ộ' => 'O',
'ộ' => 'o',
'Ợ' => 'O',
'ợ' => 'o',
'Ụ' => 'U',
'ụ' => 'u',
'Ự' => 'U',
'ự' => 'u',
'Ỵ' => 'Y',
'ỵ' => 'y',
// Vowels with diacritic (Chinese, Hanyu Pinyin).
'ɑ' => 'a',
// Macron.
'Ǖ' => 'U',
'ǖ' => 'u',
// Acute accent.
'Ǘ' => 'U',
'ǘ' => 'u',
// Caron.
'Ǎ' => 'A',
'ǎ' => 'a',
'Ǐ' => 'I',
'ǐ' => 'i',
'Ǒ' => 'O',
'ǒ' => 'o',
'Ǔ' => 'U',
'ǔ' => 'u',
'Ǚ' => 'U',
'ǚ' => 'u',
// Grave accent.
'Ǜ' => 'U',
'ǜ' => 'u',
);
// Used for locale-specific rules.
if ( empty( $locale ) ) {
$locale = get_locale();
}
/*
* German has various locales (de_DE, de_CH, de_AT, ...) with formal and informal variants.
* There is no 3-letter locale like 'def', so checking for 'de' instead of 'de_' is safe,
* since 'de' itself would be a valid locale too.
*/
if ( str_starts_with( $locale, 'de' ) ) {
$chars['Ä'] = 'Ae';
$chars['ä'] = 'ae';
$chars['Ö'] = 'Oe';
$chars['ö'] = 'oe';
$chars['Ü'] = 'Ue';
$chars['ü'] = 'ue';
$chars['ß'] = 'ss';
} elseif ( 'da_DK' === $locale ) {
$chars['Æ'] = 'Ae';
$chars['æ'] = 'ae';
$chars['Ø'] = 'Oe';
$chars['ø'] = 'oe';
$chars['Å'] = 'Aa';
$chars['å'] = 'aa';
} elseif ( 'ca' === $locale ) {
$chars['l·l'] = 'll';
} elseif ( 'sr_RS' === $locale || 'bs_BA' === $locale ) {
$chars['Đ'] = 'DJ';
$chars['đ'] = 'dj';
}
$text = strtr( $text, $chars );
} else {
$chars = array();
// Assume ISO-8859-1 if not UTF-8.
$chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e"
. "\x9f\xa2\xa5\xb5\xc0\xc1\xc2"
. "\xc3\xc4\xc5\xc7\xc8\xc9\xca"
. "\xcb\xcc\xcd\xce\xcf\xd1\xd2"
. "\xd3\xd4\xd5\xd6\xd8\xd9\xda"
. "\xdb\xdc\xdd\xe0\xe1\xe2\xe3"
. "\xe4\xe5\xe7\xe8\xe9\xea\xeb"
. "\xec\xed\xee\xef\xf1\xf2\xf3"
. "\xf4\xf5\xf6\xf8\xf9\xfa\xfb"
. "\xfc\xfd\xff";
$chars['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
$text = strtr( $text, $chars['in'], $chars['out'] );
$double_chars = array();
$double_chars['in'] = array( "\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe" );
$double_chars['out'] = array( 'OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th' );
$text = str_replace( $double_chars['in'], $double_chars['out'], $text );
}
return $text;
}
/**
* Sanitizes a filename, replacing whitespace with dashes.
*
* Removes special characters that are illegal in filenames on certain
* operating systems and special characters requiring special escaping
* to manipulate at the command line. Replaces spaces and consecutive
* dashes with a single dash. Trims period, dash and underscore from beginning
* and end of filename. It is not guaranteed that this function will return a
* filename that is allowed to be uploaded.
*
* @since 2.1.0
*
* @param string $filename The filename to be sanitized.
* @return string The sanitized filename.
*/
function sanitize_file_name( $filename ) {
$filename_raw = $filename;
$filename = remove_accents( $filename );
$special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', '’', '«', '»', '”', '“', chr( 0 ) );
// Check for support for utf8 in the installed PCRE library once and store the result in a static.
static $utf8_pcre = null;
if ( ! isset( $utf8_pcre ) ) {
// phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
$utf8_pcre = @preg_match( '/^./u', 'a' );
}
if ( ! seems_utf8( $filename ) ) {
$_ext = pathinfo( $filename, PATHINFO_EXTENSION );
$_name = pathinfo( $filename, PATHINFO_FILENAME );
$filename = sanitize_title_with_dashes( $_name ) . '.' . $_ext;
}
if ( $utf8_pcre ) {
$filename = preg_replace( "#\x{00a0}#siu", ' ', $filename );
}
/**
* Filters the list of characters to remove from a filename.
*
* @since 2.8.0
*
* @param string[] $special_chars Array of characters to remove.
* @param string $filename_raw The original filename to be sanitized.
*/
$special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw );
$filename = str_replace( $special_chars, '', $filename );
$filename = str_replace( array( '%20', '+' ), '-', $filename );
$filename = preg_replace( '/\.{2,}/', '.', $filename );
$filename = preg_replace( '/[\r\n\t -]+/', '-', $filename );
$filename = trim( $filename, '.-_' );
if ( ! str_contains( $filename, '.' ) ) {
$mime_types = wp_get_mime_types();
$filetype = wp_check_filetype( 'test.' . $filename, $mime_types );
if ( $filetype['ext'] === $filename ) {
$filename = 'unnamed-file.' . $filetype['ext'];
}
}
// Split the filename into a base and extension[s].
$parts = explode( '.', $filename );
// Return if only one extension.
if ( count( $parts ) <= 2 ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
}
// Process multiple extensions.
$filename = array_shift( $parts );
$extension = array_pop( $parts );
$mimes = get_allowed_mime_types();
/*
* Loop over any intermediate extensions. Postfix them with a trailing underscore
* if they are a 2 - 5 character long alpha string not in the allowed extension list.
*/
foreach ( (array) $parts as $part ) {
$filename .= '.' . $part;
if ( preg_match( '/^[a-zA-Z]{2,5}\d?$/', $part ) ) {
$allowed = false;
foreach ( $mimes as $ext_preg => $mime_match ) {
$ext_preg = '!^(' . $ext_preg . ')$!i';
if ( preg_match( $ext_preg, $part ) ) {
$allowed = true;
break;
}
}
if ( ! $allowed ) {
$filename .= '_';
}
}
}
$filename .= '.' . $extension;
/**
* Filters a sanitized filename string.
*
* @since 2.8.0
*
* @param string $filename Sanitized filename.
* @param string $filename_raw The filename prior to sanitization.
*/
return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
}
/**
* Sanitizes a username, stripping out unsafe characters.
*
* Removes tags, percent-encoded characters, HTML entities, and if strict is enabled,
* will only keep alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
* raw username (the username in the parameter), and the value of $strict as parameters
* for the {@see 'sanitize_user'} filter.
*
* @since 2.0.0
*
* @param string $username The username to be sanitized.
* @param bool $strict Optional. If set to true, limits $username to specific characters.
* Default false.
* @return string The sanitized username, after passing through filters.
*/
function sanitize_user( $username, $strict = false ) {
$raw_username = $username;
$username = wp_strip_all_tags( $username );
$username = remove_accents( $username );
// Remove percent-encoded characters.
$username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username );
// Remove HTML entities.
$username = preg_replace( '/&.+?;/', '', $username );
// If strict, reduce to ASCII for max portability.
if ( $strict ) {
$username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
}
$username = trim( $username );
// Consolidate contiguous whitespace.
$username = preg_replace( '|\s+|', ' ', $username );
/**
* Filters a sanitized username string.
*
* @since 2.0.1
*
* @param string $username Sanitized username.
* @param string $raw_username The username prior to sanitization.
* @param bool $strict Whether to limit the sanitization to specific characters.
*/
return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
}
/**
* Sanitizes a string key.
*
* Keys are used as internal identifiers. Lowercase alphanumeric characters,
* dashes, and underscores are allowed.
*
* @since 3.0.0
*
* @param string $key String key.
* @return string Sanitized key.
*/
function sanitize_key( $key ) {
$sanitized_key = '';
if ( is_scalar( $key ) ) {
$sanitized_key = strtolower( $key );
$sanitized_key = preg_replace( '/[^a-z0-9_\-]/', '', $sanitized_key );
}
/**
* Filters a sanitized key string.
*
* @since 3.0.0
*
* @param string $sanitized_key Sanitized key.
* @param string $key The key prior to sanitization.
*/
return apply_filters( 'sanitize_key', $sanitized_key, $key );
}
/**
* Sanitizes a string into a slug, which can be used in URLs or HTML attributes.
*
* By default, converts accent characters to ASCII characters and further
* limits the output to alphanumeric characters, underscore (_) and dash (-)
* through the {@see 'sanitize_title'} filter.
*
* If `$title` is empty and `$fallback_title` is set, the latter will be used.
*
* @since 1.0.0
*
* @param string $title The string to be sanitized.
* @param string $fallback_title Optional. A title to use if $title is empty. Default empty.
* @param string $context Optional. The operation for which the string is sanitized.
* When set to 'save', the string runs through remove_accents().
* Default 'save'.
* @return string The sanitized string.
*/
function sanitize_title( $title, $fallback_title = '', $context = 'save' ) {
$raw_title = $title;
if ( 'save' === $context ) {
$title = remove_accents( $title );
}
/**
* Filters a sanitized title string.
*
* @since 1.2.0
*
* @param string $title Sanitized title.
* @param string $raw_title The title prior to sanitization.
* @param string $context The context for which the title is being sanitized.
*/
$title = apply_filters( 'sanitize_title', $title, $raw_title, $context );
if ( '' === $title || false === $title ) {
$title = $fallback_title;
}
return $title;
}
/**
* Sanitizes a title with the 'query' context.
*
* Used for querying the database for a value from URL.
*
* @since 3.1.0
*
* @param string $title The string to be sanitized.
* @return string The sanitized string.
*/
function sanitize_title_for_query( $title ) {
return sanitize_title( $title, '', 'query' );
}
/**
* Sanitizes a title, replacing whitespace and a few other characters with dashes.
*
* Limits the output to alphanumeric characters, underscore (_) and dash (-).
* Whitespace becomes a dash.
*
* @since 1.2.0
*
* @param string $title The title to be sanitized.
* @param string $raw_title Optional. Not used. Default empty.
* @param string $context Optional. The operation for which the string is sanitized.
* When set to 'save', additional entities are converted to hyphens
* or stripped entirely. Default 'display'.
* @return string The sanitized title.
*/
function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) {
$title = strip_tags( $title );
// Preserve escaped octets.
$title = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title );
// Remove percent signs that are not part of an octet.
$title = str_replace( '%', '', $title );
// Restore octets.
$title = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title );
if ( seems_utf8( $title ) ) {
if ( function_exists( 'mb_strtolower' ) ) {
$title = mb_strtolower( $title, 'UTF-8' );
}
$title = utf8_uri_encode( $title, 200 );
}
$title = strtolower( $title );
if ( 'save' === $context ) {
// Convert , &ndash, and &mdash to hyphens.
$title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
// Convert , &ndash, and &mdash HTML entities to hyphens.
$title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title );
// Convert forward slash to hyphen.
$title = str_replace( '/', '-', $title );
// Strip these characters entirely.
$title = str_replace(
array(
// Soft hyphens.
'%c2%ad',
// ¡ and ¿.
'%c2%a1',
'%c2%bf',
// Angle quotes.
'%c2%ab',
'%c2%bb',
'%e2%80%b9',
'%e2%80%ba',
// Curly quotes.
'%e2%80%98',
'%e2%80%99',
'%e2%80%9c',
'%e2%80%9d',
'%e2%80%9a',
'%e2%80%9b',
'%e2%80%9e',
'%e2%80%9f',
// Bullet.
'%e2%80%a2',
// ©, ®, °, &hellip, and &trade.
'%c2%a9',
'%c2%ae',
'%c2%b0',
'%e2%80%a6',
'%e2%84%a2',
// Acute accents.
'%c2%b4',
'%cb%8a',
'%cc%81',
'%cd%81',
// Grave accent, macron, caron.
'%cc%80',
'%cc%84',
'%cc%8c',
// Non-visible characters that display without a width.
'%e2%80%8b', // Zero width space.
'%e2%80%8c', // Zero width non-joiner.
'%e2%80%8d', // Zero width joiner.
'%e2%80%8e', // Left-to-right mark.
'%e2%80%8f', // Right-to-left mark.
'%e2%80%aa', // Left-to-right embedding.
'%e2%80%ab', // Right-to-left embedding.
'%e2%80%ac', // Pop directional formatting.
'%e2%80%ad', // Left-to-right override.
'%e2%80%ae', // Right-to-left override.
'%ef%bb%bf', // Byte order mark.
'%ef%bf%bc', // Object replacement character.
),
'',
$title
);
// Convert non-visible characters that display with a width to hyphen.
$title = str_replace(
array(
'%e2%80%80', // En quad.
'%e2%80%81', // Em quad.
'%e2%80%82', // En space.
'%e2%80%83', // Em space.
'%e2%80%84', // Three-per-em space.
'%e2%80%85', // Four-per-em space.
'%e2%80%86', // Six-per-em space.
'%e2%80%87', // Figure space.
'%e2%80%88', // Punctuation space.
'%e2%80%89', // Thin space.
'%e2%80%8a', // Hair space.
'%e2%80%a8', // Line separator.
'%e2%80%a9', // Paragraph separator.
'%e2%80%af', // Narrow no-break space.
),
'-',
$title
);
// Convert × to 'x'.
$title = str_replace( '%c3%97', 'x', $title );
}
// Remove HTML entities.
$title = preg_replace( '/&.+?;/', '', $title );
$title = str_replace( '.', '-', $title );
$title = preg_replace( '/[^%a-z0-9 _-]/', '', $title );
$title = preg_replace( '/\s+/', '-', $title );
$title = preg_replace( '|-+|', '-', $title );
$title = trim( $title, '-' );
return $title;
}
/**
* Ensures a string is a valid SQL 'order by' clause.
*
* Accepts one or more columns, with or without a sort order (ASC / DESC).
* e.g. 'column_1', 'column_1, column_2', 'column_1 ASC, column_2 DESC' etc.
*
* Also accepts 'RAND()'.
*
* @since 2.5.1
*
* @param string $orderby Order by clause to be validated.
* @return string|false Returns $orderby if valid, false otherwise.
*/
function sanitize_sql_orderby( $orderby ) {
if ( preg_match( '/^\s*(([a-z0-9_]+|`[a-z0-9_]+`)(\s+(ASC|DESC))?\s*(,\s*(?=[a-z0-9_`])|$))+$/i', $orderby ) || preg_match( '/^\s*RAND\(\s*\)\s*$/i', $orderby ) ) {
return $orderby;
}
return false;
}
/**
* Sanitizes an HTML classname to ensure it only contains valid characters.
*
* Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
* string then it will return the alternative value supplied.
*
* @todo Expand to support the full range of CDATA that a class attribute can contain.
*
* @since 2.8.0
*
* @param string $classname The classname to be sanitized.
* @param string $fallback Optional. The value to return if the sanitization ends up as an empty string.
* Default empty string.
* @return string The sanitized value.
*/
function sanitize_html_class( $classname, $fallback = '' ) {
// Strip out any percent-encoded characters.
$sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $classname );
// Limit to A-Z, a-z, 0-9, '_', '-'.
$sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
if ( '' === $sanitized && $fallback ) {
return sanitize_html_class( $fallback );
}
/**
* Filters a sanitized HTML class string.
*
* @since 2.8.0
*
* @param string $sanitized The sanitized HTML class.
* @param string $classname HTML class before sanitization.
* @param string $fallback The fallback string.
*/
return apply_filters( 'sanitize_html_class', $sanitized, $classname, $fallback );
}
/**
* Strips out all characters not allowed in a locale name.
*
* @since 6.2.1
*
* @param string $locale_name The locale name to be sanitized.
* @return string The sanitized value.
*/
function sanitize_locale_name( $locale_name ) {
// Limit to A-Z, a-z, 0-9, '_', '-'.
$sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $locale_name );
/**
* Filters a sanitized locale name string.
*
* @since 6.2.1
*
* @param string $sanitized The sanitized locale name.
* @param string $locale_name The locale name before sanitization.
*/
return apply_filters( 'sanitize_locale_name', $sanitized, $locale_name );
}
/**
* Converts lone & characters into `&` (a.k.a. `&`)
*
* @since 0.71
*
* @param string $content String of characters to be converted.
* @param string $deprecated Not used.
* @return string Converted string.
*/
function convert_chars( $content, $deprecated = '' ) {
if ( ! empty( $deprecated ) ) {
_deprecated_argument( __FUNCTION__, '0.71' );
}
if ( str_contains( $content, '&' ) ) {
$content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content );
}
return $content;
}
/**
* Converts invalid Unicode references range to valid range.
*
* @since 4.3.0
*
* @param string $content String with entities that need converting.
* @return string Converted string.
*/
function convert_invalid_entities( $content ) {
$wp_htmltranswinuni = array(
'€' => '€', // The Euro sign.
'' => '',
'‚' => '‚', // These are Windows CP1252 specific characters.
'ƒ' => 'ƒ', // They would look weird on non-Windows browsers.
'„' => '„',
'…' => '…',
'†' => '†',
'‡' => '‡',
'ˆ' => 'ˆ',
'‰' => '‰',
'Š' => 'Š',
'‹' => '‹',
'Œ' => 'Œ',
'' => '',
'Ž' => 'Ž',
'' => '',
'' => '',
'‘' => '‘',
'’' => '’',
'“' => '“',
'”' => '”',
'•' => '•',
'–' => '–',
'—' => '—',
'˜' => '˜',
'™' => '™',
'š' => 'š',
'›' => '›',
'œ' => 'œ',
'' => '',
'ž' => 'ž',
'Ÿ' => 'Ÿ',
);
if ( str_contains( $content, '' ) ) {
$content = strtr( $content, $wp_htmltranswinuni );
}
return $content;
}
/**
* Balances tags if forced to, or if the 'use_balanceTags' option is set to true.
*
* @since 0.71
*
* @param string $text Text to be balanced
* @param bool $force If true, forces balancing, ignoring the value of the option. Default false.
* @return string Balanced text
*/
function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid
if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) {
return force_balance_tags( $text );
} else {
return $text;
}
}
/**
* Balances tags of string using a modified stack.
*
* @since 2.0.4
* @since 5.3.0 Improve accuracy and add support for custom element tags.
*
* @author Leonard Lin
* @license GPL
* @copyright November 4, 2001
* @version 1.1
* @todo Make better - change loop condition to $text in 1.2
* @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004
* 1.1 Fixed handling of append/stack pop order of end text
* Added Cleaning Hooks
* 1.0 First Version
*
* @param string $text Text to be balanced.
* @return string Balanced text.
*/
function force_balance_tags( $text ) {
$tagstack = array();
$stacksize = 0;
$tagqueue = '';
$newtext = '';
// Known single-entity/self-closing tags.
$single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr' );
// Tags that can be immediately nested within themselves.
$nestable_tags = array( 'article', 'aside', 'blockquote', 'details', 'div', 'figure', 'object', 'q', 'section', 'span' );
// WP bug fix for comments - in case you REALLY meant to type '< !--'.
$text = str_replace( '< !--', '< !--', $text );
// WP bug fix for LOVE <3 (and other situations with '<' before a number).
$text = preg_replace( '#<([0-9]{1})#', '<$1', $text );
/**
* Matches supported tags.
*
* To get the pattern as a string without the comments paste into a PHP
* REPL like `php -a`.
*
* @see https://html.spec.whatwg.org/#elements-2
* @see https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
*
* @example
* ~# php -a
* php > $s = [paste copied contents of expression below including parentheses];
* php > echo $s;
*/
$tag_pattern = (
'#<' . // Start with an opening bracket.
'(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash.
'(' . // Group 2 - Tag name.
// Custom element tags have more lenient rules than HTML tag names.
'(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' .
'|' .
// Traditional tag rules approximate HTML tag names.
'(?:[\w:]+)' .
')' .
'(?:' .
// We either immediately close the tag with its '>' and have nothing here.
'\s*' .
'(/?)' . // Group 3 - "attributes" for empty tag.
'|' .
// Or we must start with space characters to separate the tag name from the attributes (or whitespace).
'(\s+)' . // Group 4 - Pre-attribute whitespace.
'([^>]*)' . // Group 5 - Attributes.
')' .
'>#' // End with a closing bracket.
);
while ( preg_match( $tag_pattern, $text, $regex ) ) {
$full_match = $regex[0];
$has_leading_slash = ! empty( $regex[1] );
$tag_name = $regex[2];
$tag = strtolower( $tag_name );
$is_single_tag = in_array( $tag, $single_tags, true );
$pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : '';
$attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] );
$has_self_closer = str_ends_with( $attributes, '/' );
$newtext .= $tagqueue;
$i = strpos( $text, $full_match );
$l = strlen( $full_match );
// Clear the shifter.
$tagqueue = '';
if ( $has_leading_slash ) { // End tag.
// If too many closing tags.
if ( $stacksize <= 0 ) {
$tag = '';
// Or close to be safe $tag = '/' . $tag.
// If stacktop value = tag close value, then pop.
} elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag.
$tag = ''; // Close tag.
array_pop( $tagstack );
--$stacksize;
} else { // Closing tag not at top, search for it.
for ( $j = $stacksize - 1; $j >= 0; $j-- ) {
if ( $tagstack[ $j ] === $tag ) {
// Add tag to tagqueue.
for ( $k = $stacksize - 1; $k >= $j; $k-- ) {
$tagqueue .= '';
--$stacksize;
}
break;
}
}
$tag = '';
}
} else { // Begin tag.
if ( $has_self_closer ) {
/*
* If it presents itself as a self-closing tag, but it isn't a known single-entity self-closing tag,
* then don't let it be treated as such and immediately close it with a closing tag.
* The tag will encapsulate no text as a result.
*/
if ( ! $is_single_tag ) {
$attributes = trim( substr( $attributes, 0, -1 ) ) . "> 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) {
$tagqueue = '';
--$stacksize;
}
$stacksize = array_push( $tagstack, $tag );
}
// Attributes.
if ( $has_self_closer && $is_single_tag ) {
// We need some space - avoid
and prefer
.
$pre_attribute_ws = ' ';
}
$tag = '<' . $tag . $pre_attribute_ws . $attributes . '>';
// If already queuing a close tag, then put this tag on too.
if ( ! empty( $tagqueue ) ) {
$tagqueue .= $tag;
$tag = '';
}
}
$newtext .= substr( $text, 0, $i ) . $tag;
$text = substr( $text, $i + $l );
}
// Clear tag queue.
$newtext .= $tagqueue;
// Add remaining text.
$newtext .= $text;
while ( $x = array_pop( $tagstack ) ) {
$newtext .= ''; // Add remaining tags to close.
}
// WP fix for the bug with HTML comments.
$newtext = str_replace( '< !--', '