function cleanID($raw_id) { $sepchar = "_"; $sepcharpat = '#\\' . $sepchar . '+#'; $id = trim((string) $raw_id); $id = utf8_strtolower($id); //alternative namespace seperator $id = strtr($id, ';', ':'); $id = strtr($id, '/', $sepchar); $id = utf8_romanize($id); $id = utf8_deaccent($id, -1); //remove specials $id = utf8_stripspecials($id, $sepchar, '\\*'); $id = utf8_strip($id); $id = preg_replace($sepcharpat, $sepchar, $id); $id = preg_replace('#:+#', ':', $id); $id = preg_replace('#:[:\\._\\-]+#', ':', $id); return $id; }
/** * Encodes an email address header * * Unicode characters will be deaccented and encoded * quoted_printable for headers. * Addresses may not contain Non-ASCII data! * * Example: * mail_encode_address("föö <*****@*****.**>, me@somewhere.com","TBcc"); * * @param string $string Multiple adresses separated by commas * @param string $header Name of the header (To,Bcc,Cc,...) * @param boolean $names Allow named Recipients? */ function mail_encode_address($string, $header = '', $names = true) { $headers = ''; $parts = explode(',', $string); foreach ($parts as $part) { $part = trim($part); // parse address if (preg_match('#(.*?)<(.*?)>#', $part, $matches)) { $text = trim($matches[1]); $addr = $matches[2]; } else { $addr = $part; } // skip empty ones if (empty($addr)) { continue; } // FIXME: is there a way to encode the localpart of a emailaddress? if (!utf8_isASCII($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not ASCII"), -1); continue; } if (!mail_isvalid($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not valid"), -1); continue; } // text was given if (!empty($text) && $names) { // add address quotes $addr = "<{$addr}>"; if (defined('MAILHEADER_ASCIIONLY')) { $text = utf8_deaccent($text); $text = utf8_strip($text); } if (!utf8_isASCII($text)) { // put the quotes outside as in =?UTF-8?Q?"Elan Ruusam=C3=A4e"?= vs "=?UTF-8?Q?Elan Ruusam=C3=A4e?=" if (preg_match('/^"(.+)"$/', $text, $matches)) { $text = '"=?UTF-8?Q?' . mail_quotedprintable_encode($matches[1], 0) . '?="'; } else { $text = '=?UTF-8?Q?' . mail_quotedprintable_encode($text, 0) . '?='; } // additionally the space character should be encoded as =20 (or each // word QP encoded separately). // however this is needed only in mail headers, not globally in mail_quotedprintable_encode(). $text = str_replace(" ", "=20", $text); } } else { $text = ''; } // add to header comma seperated if ($headers != '') { $headers .= ','; if ($header) { $headers .= MAILHEADER_EOL . ' '; } // avoid overlong mail headers } $headers .= $text . ' ' . $addr; } if (empty($headers)) { return null; } //if headername was given add it and close correctly if ($header) { $headers = $header . ': ' . $headers . MAILHEADER_EOL; } return $headers; }
/** * Remove unwanted chars from ID * * Cleans a given ID to only use allowed characters. Accented characters are * converted to unaccented ones * * @author Andreas Gohr <*****@*****.**> * @param string $raw_id The pageid to clean * @param boolean $ascii Force ASCII * @return string cleaned id */ function cleanID($raw_id, $ascii = false) { global $conf; static $sepcharpat = null; global $cache_cleanid; $cache =& $cache_cleanid; if ($conf['syslog']) { syslog(LOG_WARNING, '[pageutils.php] cleanID: raw_id: ' . $raw_id); } // check if it's already in the memory cache if (isset($cache[(string) $raw_id])) { return $cache[(string) $raw_id]; } $sepchar = $conf['sepchar']; if ($sepcharpat == null) { // build string only once to save clock cycles $sepcharpat = '#\\' . $sepchar . '+#'; } $id = trim((string) $raw_id); if ($conf['mixedcase'] == 0) { $id = utf8_strtolower($id); } //alternative namespace seperator if ($conf['useslash']) { $id = strtr($id, ';/', '::'); } else { $id = strtr($id, ';/', ':' . $sepchar); } if ($conf['deaccent'] == 2 || $ascii) { $id = utf8_romanize($id); } if ($conf['deaccent'] || $ascii) { $id = utf8_deaccent($id, -1); } //remove specials if specialcharacters is set to 0 if ($conf['specialcharacters'] == 0) { $id = utf8_stripspecials($id, $sepchar, '\\*'); } if ($ascii) { $id = utf8_strip($id); } //clean up $id = preg_replace($sepcharpat, $sepchar, $id); $id = preg_replace('#:+#', ':', $id); $id = trim($id, ':._-'); $id = preg_replace('#:[:\\._\\-]+#', ':', $id); $id = preg_replace('#[:\\._\\-]+:#', ':', $id); $cache[(string) $raw_id] = $id; if ($conf['syslog']) { syslog(LOG_WARNING, '[pageutils.php] cleanID: id to be returned: ' . $id); } return $id; }
/** * Remove unwanted chars from ID * * Cleans a given ID to only use allowed characters. Accented characters are * converted to unaccented ones * * @author Andreas Gohr <*****@*****.**> * @param string $raw_id The pageid to clean * @param boolean $ascii Force ASCII * @param boolean $media Allow leading or trailing _ for media files */ function cleanID($raw_id, $ascii = false, $media = false) { global $conf; static $sepcharpat = null; global $cache_cleanid; $cache =& $cache_cleanid; // check if it's already in the memory cache if (isset($cache[(string) $raw_id])) { return $cache[(string) $raw_id]; } $sepchar = $conf['sepchar']; if ($sepcharpat == null) { // build string only once to save clock cycles $sepcharpat = '#\\' . $sepchar . '+#'; } $id = trim((string) $raw_id); $id = utf8_strtolower($id); //alternative namespace seperator $id = strtr($id, ';', ':'); if ($conf['useslash']) { $id = strtr($id, '/', ':'); } else { $id = strtr($id, '/', $sepchar); } if ($conf['deaccent'] == 2 || $ascii) { $id = utf8_romanize($id); } if ($conf['deaccent'] || $ascii) { $id = utf8_deaccent($id, -1); } //remove specials $id = utf8_stripspecials($id, $sepchar, '\\*'); if ($ascii) { $id = utf8_strip($id); } //clean up $id = preg_replace($sepcharpat, $sepchar, $id); $id = preg_replace('#:+#', ':', $id); $id = $media ? trim($id, ':.-') : trim($id, ':._-'); $id = preg_replace('#:[:\\._\\-]+#', ':', $id); $cache[(string) $raw_id] = $id; return $id; }
/** * Cleanup and encode the headers array */ protected function cleanHeaders() { global $conf; // clean up addresses if (empty($this->headers['From'])) { $this->from($conf['mailfrom']); } $addrs = array('To', 'From', 'Cc', 'Bcc', 'Reply-To', 'Sender'); foreach ($addrs as $addr) { if (isset($this->headers[$addr])) { $this->headers[$addr] = $this->cleanAddress($this->headers[$addr]); } } if (isset($this->headers['Subject'])) { // add prefix to subject if (empty($conf['mailprefix'])) { if (utf8_strlen($conf['title']) < 20) { $prefix = '[' . $conf['title'] . ']'; } else { $prefix = '[' . utf8_substr($conf['title'], 0, 20) . '...]'; } } else { $prefix = '[' . $conf['mailprefix'] . ']'; } $len = strlen($prefix); if (substr($this->headers['Subject'], 0, $len) != $prefix) { $this->headers['Subject'] = $prefix . ' ' . $this->headers['Subject']; } // encode subject if (defined('MAILHEADER_ASCIIONLY')) { $this->headers['Subject'] = utf8_deaccent($this->headers['Subject']); $this->headers['Subject'] = utf8_strip($this->headers['Subject']); } if (!utf8_isASCII($this->headers['Subject'])) { $this->headers['Subject'] = '=?UTF-8?B?' . base64_encode($this->headers['Subject']) . '?='; } } }
/** * Gets a dynamic redirect target based on a redirect param or the referrer. * * @param string|false $fallbackUrl Fallback if no redirect or referrer is available; if false, uses index * @param boolean $useReferrer True uses the referrer if no redirect param is available * * @return string */ public function getDynamicRedirect($fallbackUrl = false, $useReferrer = true) { $redirect = $this->_input->filterSingle('redirect', XenForo_Input::STRING); if (!$redirect && $useReferrer) { $redirect = $this->_request->getServer('HTTP_X_AJAX_REFERER'); if (!$redirect) { $redirect = $this->_request->getServer('HTTP_REFERER'); } } if ($redirect) { $redirect = strval($redirect); if (strlen($redirect) && !preg_match('/./u', $redirect)) { $redirect = utf8_strip($redirect); } if (strpos($redirect, "\n") === false && strpos($redirect, "\r") === false) { $fullRedirect = XenForo_Link::convertUriToAbsoluteUri($redirect, true); $redirectParts = @parse_url($fullRedirect); if ($redirectParts && !empty($redirectParts['host'])) { $paths = XenForo_Application::get('requestPaths'); $pageParts = @parse_url($paths['fullUri']); if ($pageParts && !empty($pageParts['host']) && $pageParts['host'] == $redirectParts['host']) { return $fullRedirect; } } } } if ($fallbackUrl === false) { if ($this instanceof XenForo_ControllerAdmin_Abstract) { $fallbackUrl = XenForo_Link::buildAdminLink('index'); } else { $fallbackUrl = XenForo_Link::buildPublicLink('index'); } } return $fallbackUrl; }
/** * Encodes an email address header * * Unicode characters will be deaccented and encoded * quoted_printable for headers. * Addresses may not contain Non-ASCII data! * * Example: * mail_encode_address("föö <*****@*****.**>, me@somewhere.com","TBcc"); * * @param string $string Multiple adresses separated by commas * @param string $header Name of the header (To,Bcc,Cc,...) * @param boolean $names Allow named Recipients? */ function mail_encode_address($string, $header = '', $names = true) { $headers = ''; $parts = split(',', $string); foreach ($parts as $part) { $part = trim($part); // parse address if (preg_match('#(.*?)<(.*?)>#', $part, $matches)) { $text = trim($matches[1]); $addr = $matches[2]; } else { $addr = $part; } // skip empty ones if (empty($addr)) { continue; } // FIXME: is there a way to encode the localpart of a emailaddress? if (!utf8_isASCII($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not ASCII"), -1); continue; } if (!mail_isvalid($addr)) { msg(htmlspecialchars("E-Mail address <{$addr}> is not valid"), -1); continue; } // text was given if (!empty($text) && $names) { // add address quotes $addr = "<{$addr}>"; if (defined('MAILHEADER_ASCIIONLY')) { $text = utf8_deaccent($text); $text = utf8_strip($text); } if (!utf8_isASCII($text)) { $text = '=?UTF-8?Q?' . mail_quotedprintable_encode($text, 0) . '?='; } } else { $text = ''; } // add to header comma seperated and in new line to avoid too long headers if ($headers != '') { $headers .= ',' . MAILHEADER_EOL . ' '; } $headers .= $text . ' ' . $addr; } if (empty($headers)) { return null; } //if headername was given add it and close correctly if ($header) { $headers = $header . ': ' . $headers . MAILHEADER_EOL; } return $headers; }
/** * UTF-8 aware replacement for rtrim(). * * Strip whitespace (or other characters) from the end of a string. * * @param string $str The UTF-8 encoded string * @param mixed $stripchars The stripped characters * @return string The stripped string */ function utf8_rstrip($str, $stripchars = null) { return utf8_strip($str, $stripchars, UTF8_STRIP_RIGHT); }
/** * @dataProvider providerUtf8Strip */ public function testUtf8Strip($str, $stripchars, $rv) { $this->assertEquals(utf8_strip($str, $stripchars), $rv); }
function utf8_strip($str) { return utf8_strip($str); }