/** * Sanitize a variable. * Removes leading and trailing whitespace, normalizes all characters to UTF-8. * @param $var string * @return string */ static function cleanVar($var) { // only normalize strings that are not UTF-8 already, and when the system is using UTF-8 if (Config::getVar('i18n', 'charset_normalization') == 'On' && strtolower_codesafe(Config::getVar('i18n', 'client_charset')) == 'utf-8' && !String::utf8_is_valid($var)) { $var = String::utf8_normalize($var); // convert HTML entities into valid UTF-8 characters (do not transcode) $var = html_entity_decode($var, ENT_COMPAT, 'UTF-8'); // strip any invalid UTF-8 sequences $var = String::utf8_bad_strip($var); // re-encode special HTML characters if (checkPhpVersion('5.2.3')) { $var = htmlspecialchars($var, ENT_NOQUOTES, 'UTF-8', false); } else { $var = htmlspecialchars($var, ENT_NOQUOTES, 'UTF-8'); } } // strip any invalid ASCII control characters $var = String::utf8_strip_ascii_ctrl($var); return trim($var); }
/** * Parse an XML file using the specified handler. * If no handler has been specified, XMLParserDOMHandler is used by default, returning a tree structure representing the document. * @param $file string full path to the XML file * @return object actual return type depends on the handler */ function &parse($file) { $parser =& $this->createParser(); if (!isset($this->handler)) { // Use default handler for parsing $handler = new XMLParserDOMHandler(); $this->setHandler($handler); } xml_set_object($parser, $this->handler); xml_set_element_handler($parser, "startElement", "endElement"); xml_set_character_data_handler($parser, "characterData"); import('lib.pkp.classes.file.FileWrapper'); $wrapper =& FileWrapper::wrapper($file); // Handle responses of various types while (true) { $newWrapper = $wrapper->open(); if (is_object($newWrapper)) { // Follow a redirect unset($wrapper); $wrapper =& $newWrapper; unset($newWrapper); } elseif (!$newWrapper) { // Could not open resource -- error $returner = false; return $returner; } else { // OK, we've found the end result break; } } if (!$wrapper) { $result = false; return $result; } while (!$wrapper->eof() && ($data = $wrapper->read()) !== false) { // if the string contains non-UTF8 characters, convert it to UTF-8 for parsing if (Config::getVar('i18n', 'charset_normalization') == 'On' && !String::utf8_compliant($data)) { $utf8_last = String::substr($data, String::strlen($data) - 1); // if the string ends in a "bad" UTF-8 character, maybe it's truncated while (!$wrapper->eof() && String::utf8_bad_find($utf8_last) === 0) { // read another chunk of data $data .= $wrapper->read(); $utf8_last = String::substr($data, String::strlen($data) - 1); } $data = String::utf8_normalize($data); // strip any invalid UTF-8 sequences $data = String::utf8_bad_strip($data); // convert named entities to numeric entities $data = strtr($data, String::getHTMLEntities()); } // strip any invalid ASCII control characters $data = String::utf8_strip_ascii_ctrl($data); if (!xml_parse($parser, $data, $wrapper->eof())) { $this->addError(xml_error_string(xml_get_error_code($parser))); } } $wrapper->close(); $result =& $this->handler->getResult(); $this->destroyParser($parser); if (isset($handler)) { $handler->destroy(); unset($handler); } return $result; }