/** * Format resource metadata. * * @param resource $value * * @return string */ protected function formatMetadata($value) { $props = array(); switch (get_resource_type($value)) { case 'stream': $props = stream_get_meta_data($value); break; case 'curl': $props = curl_getinfo($value); break; case 'xml': $props = array('current_byte_index' => xml_get_current_byte_index($value), 'current_column_number' => xml_get_current_column_number($value), 'current_line_number' => xml_get_current_line_number($value), 'error_code' => xml_get_error_code($value)); break; } if (empty($props)) { return '{}'; } $formatted = array(); foreach ($props as $name => $value) { $formatted[] = sprintf('%s: %s', $name, $this->indentValue($this->presentSubValue($value))); } $template = sprintf('{%s%s%%s%s}', PHP_EOL, self::INDENT, PHP_EOL); $glue = sprintf(',%s%s', PHP_EOL, self::INDENT); return sprintf($template, implode($glue, $formatted)); }
protected function _startElement($parser, $name, $attrs) { if (strpos($name, ':') === false) { throw new Exception('Invalid element name: ' . $name . '.'); } if ($name === RDF_NS . 'RDF') { if (isset($attrs[XML_NS . 'base'])) { $this->_baseUri = $attrs[XML_NS . 'base']; } return; } $idx = xml_get_current_byte_index($parser) - $this->_offset * 4096; if ($idx >= 0 && $this->_data[$idx] . $this->_data[$idx + 1] === '/>') { $this->_currentElementIsEmpty = true; } else { $this->_currentElementIsEmpty = false; } if (isset($attrs['http://www.w3.org/XML/1998/namespacelang'])) { $this->_currentXmlLang = $attrs['http://www.w3.org/XML/1998/namespacelang']; } if ($this->_topElemIsProperty()) { // In this case the surrounding element is a property, so this element is a s and/or o. $this->_processNode($name, $attrs); } else { // This element is a property. $this->_processProperty($name, $attrs); } }
public static function castXml($h, array $a, Stub $stub, $isNested) { $a['current_byte_index'] = xml_get_current_byte_index($h); $a['current_column_number'] = xml_get_current_column_number($h); $a['current_line_number'] = xml_get_current_line_number($h); $a['error_code'] = xml_get_error_code($h); if (isset(self::$xmlErrors[$a['error_code']])) { $a['error_code'] = new ConstStub(self::$xmlErrors[$a['error_code']], $a['error_code']); } return $a; }
public function __construct($message, $parser, \SplFileObject $file = NULL) { $this->code = xml_get_error_code($parser); if (false === $this->code) { throw new \BadMethodCallException('This is not a valid xml_parser resource.'); } parent::__construct($message ?: xml_error_string($this->code), $this->code); $this->file = $file ? $file->getPathname() : '(data stream)'; $this->line = xml_get_current_line_number($parser); $this->err['srcColumn'] = xml_get_current_column_number($parser); $this->err['srcIndex'] = xml_get_current_byte_index($parser); }
/** * @ignore */ private function parse($xml) { $xml_parser = xml_parser_create(); xml_set_object($xml_parser, $this); xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false); xml_set_element_handler($xml_parser, array(&$this, "startElement"), array(&$this, "endElement")); xml_set_character_data_handler($xml_parser, array(&$this, "characterData")); // xml_set_default_handler($xml_parser, array(&$this, "defaultHandler")); if (!xml_parse($xml_parser, $xml, true)) { echo 'MetaValue error: ' . xml_error_string($xml_parser) . '<br>' . 'Error line: ' . xml_get_current_line_number($xml_parser) . '<br>' . 'Byte number error: ' . xml_get_current_byte_index($xml_parser) . '<br><br>'; } xml_parser_free($xml_parser); }
function parse($url) { $values = ""; $encoding = 'UTF-8'; $data = file_get_contents($url); $parser = xml_parser_create($encoding); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); $ok = xml_parse_into_struct($parser, $data, $values); if (!$ok) { $errmsg = sprintf("XML parse error %d '%s' at line %d, column %d (byte index %d)", xml_get_error_code($parser), xml_error_string(xml_get_error_code($parser)), xml_get_current_line_number($parser), xml_get_current_column_number($parser), xml_get_current_byte_index($parser)); } xml_parser_free($parser); return $this->reorganize($values); }
/** * Import HTML from $source to $target node. * * @param \DOMElement $source * @param \DOMElement $target * * @since 1.0 */ public static function import(\DOMElement $source, \DOMElement $target) { /* Prepare container */ $container = $target->ownerDocument->createElement('xhtml:div'); $container->setAttributeNS(Atom::XMLNS, 'xmlns:xhtml', Atom::XHTML); $target->appendChild($container); /* Prefix all source tags with "xhtml:" */ $xmlFrom = $source->ownerDocument->saveXML($source); $xmlTo = ''; $xhtmlNsSet = false; $parser = xml_parser_create('UTF-8'); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_set_element_handler($parser, function ($parser, $name, array $attrs) use($xmlFrom, &$xmlTo, &$xhtmlNsSet) { $selfClosing = '/>' === substr($xmlFrom, xml_get_current_byte_index($parser), 2); $xmlTo .= '<xhtml:' . $name; if (false === $xhtmlNsSet) { $attrs['xmlns:xhtml'] = Atom::XHTML; $xhtmlNsSet = true; } foreach ($attrs as $attr => $value) { $xmlTo .= sprintf(' %s="%s"', $attr, htmlspecialchars($value, ENT_COMPAT | ENT_XML1)); } $xmlTo .= $selfClosing ? '/>' : '>'; }, function ($parser, $name) use($xmlFrom, &$xmlTo) { $selfClosing = '/>' === substr($xmlFrom, xml_get_current_byte_index($parser) - 2, 2); if ($selfClosing) { return; } $xmlTo .= '</xhtml:' . $name . '>'; }); xml_set_default_handler($parser, function ($parser, $data) use(&$xmlTo) { $xmlTo .= $data; }); xml_parse($parser, $xmlFrom, true); xml_parser_free($parser); /* Import prefixed XML into container */ $tmpDoc = new \DOMDocument('1.0', 'utf-8'); $tmpDoc->loadXML($xmlTo); foreach ($tmpDoc->documentElement->childNodes as $node) { $container->appendChild($container->ownerDocument->importNode($node, true)); } }
function parse() { // Creates the object tree from XML code $success = true; $error = array(); $this->parser = xml_parser_create($this->encoding); xml_set_object($this->parser, $this); xml_set_element_handler($this->parser, "startElement", "endElement"); xml_set_character_data_handler($this->parser, "characterData"); xml_set_default_handler($this->parser, "defaultHandler"); if (!xml_parse($this->parser, $this->xml)) { // Error while parsing document $success = false; $error['err_code'] = $err_code = xml_get_error_code($this->parser); $error['err_string'] = xml_error_string($err_code); $error['err_line'] = xml_get_current_line_number($this->parser); $error['err_col'] = xml_get_current_column_number($this->parser); $error['err_byte'] = xml_get_current_byte_index($this->parser); //print "<p><b>Error Code:</b> $err_code<br>$err_string<br><b>Line:</b> $err_line<br><b>Column: $err_col</p>"; } xml_parser_free($this->parser); return $success === true ? true : $error; }
function parse($schema) { $this->parser = xml_parser_create($this->encoding); if (!$this->parser) { $this->error = 'Relax Error: Failed to create an XML parser!'; return false; } if (!xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false)) { xml_parser_free($this->parser); $this->error = 'Relax Error: Failed to disable case folding!'; return false; } $this->schema = $schema; //$this->elements = array (); //$this->parents = array (); //$this->attrs = array (); $this->attrOpen = false; $this->definitions = array(); $this->addUntil = false; $this->error = false; $this->rule = 'required'; if (xml_parse_into_struct($this->parser, $schema, $this->nodes, $this->tags)) { xml_parser_free($this->parser); foreach ($this->nodes as $node) { $this->{$this->makeMethod($node['tag'], $node['type'])}($node); } return $this->rules; } else { $this->err_code = xml_get_error_code($this->parser); $this->err_line = xml_get_current_line_number($this->parser); $this->err_byte = xml_get_current_byte_index($this->parser); $this->err_colnum = xml_get_current_column_number($this->parser); $this->error = 'Relax Error: ' . xml_error_string($this->err_code); xml_parser_free($this->parser); return false; } }
function SetError($error_number, $error) { $this->error_number = $error_number; $this->error = $error; if ($this->xml_parser) { $line = xml_get_current_line_number($this->xml_parser); $column = xml_get_current_column_number($this->xml_parser); $byte_index = xml_get_current_byte_index($this->xml_parser); } else { $line = $column = 1; $byte_index = 0; } $this->SetErrorPosition($error_number, $error, $line, $column, $byte_index); }
/** * Parses xml text using Expat * @param Object A reference to the DOM document that the xml is to be parsed into * @param string The text to be parsed * @param boolean True if CDATA Section nodes are not to be converted into Text nodes * @return boolean True if the parsing is successful */ function parse(&$myXMLDoc, $xmlText, $preserveCDATA = true) { $this->xmlDoc =& $myXMLDoc; $this->lastChild =& $this->xmlDoc; $this->preserveCDATA = $preserveCDATA; //create instance of expat parser (should be included in php distro) if (version_compare(phpversion(), '5.0', '<=')) { $parser =& xml_parser_create(''); } else { $parser =& xml_parser_create(); } //set handlers for SAX events xml_set_object($parser, $this); xml_set_element_handler($parser, 'startElement', 'endElement'); xml_set_character_data_handler($parser, 'dataElement'); xml_set_default_handler($parser, 'defaultDataElement'); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); //parse out whitespace - (XML_OPTION_SKIP_WHITE = 1 does not //seem to work consistently across versions of PHP and Expat $xmlText = eregi_replace('>' . "[[:space:]]+" . '<', '><', $xmlText); $success = xml_parse($parser, $xmlText); $this->xmlDoc->errorCode = xml_get_error_code($parser); $this->xmlDoc->errorString = xml_error_string($this->xmlDoc->errorCode) . ' at line ' . xml_get_current_line_number($parser) . ', column ' . xml_get_current_column_number($parser) . ', byte ' . xml_get_current_byte_index($parser); xml_parser_free($parser); return $success; }
public function parse(&$data, $encoding) { // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character if (strtoupper($encoding) === 'US-ASCII') { $this->encoding = 'UTF-8'; } else { $this->encoding = $encoding; } // Strip BOM: // UTF-32 Big Endian BOM if (substr($data, 0, 4) === "��") { $data = substr($data, 4); } elseif (substr($data, 0, 4) === "��") { $data = substr($data, 4); } elseif (substr($data, 0, 2) === "��") { $data = substr($data, 2); } elseif (substr($data, 0, 2) === "��") { $data = substr($data, 2); } elseif (substr($data, 0, 3) === "") { $data = substr($data, 3); } if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\t\n\r ") && ($pos = strpos($data, '?>')) !== false) { $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); if ($declaration->parse()) { $data = substr($data, $pos + 2); $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . ($declaration->standalone ? 'yes' : 'no') . '"?>' . $data; } else { $this->error_string = 'SimplePie bug! Please report this!'; return false; } } $return = true; static $xml_is_sane = null; if ($xml_is_sane === null) { $parser_check = xml_parser_create(); xml_parse_into_struct($parser_check, '<foo>&</foo>', $values); xml_parser_free($parser_check); $xml_is_sane = isset($values[0]['value']); } // Create the parser if ($xml_is_sane) { $xml = xml_parser_create_ns($this->encoding, $this->separator); xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1); xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0); xml_set_object($xml, $this); xml_set_character_data_handler($xml, 'cdata'); xml_set_element_handler($xml, 'tag_open', 'tag_close'); // Parse! if (!xml_parse($xml, $data, true)) { $this->error_code = xml_get_error_code($xml); $this->error_string = xml_error_string($this->error_code); $return = false; } $this->current_line = xml_get_current_line_number($xml); $this->current_column = xml_get_current_column_number($xml); $this->current_byte = xml_get_current_byte_index($xml); xml_parser_free($xml); return $return; } else { libxml_clear_errors(); $xml = new XMLReader(); $xml->xml($data); while (@$xml->read()) { switch ($xml->nodeType) { case constant('XMLReader::END_ELEMENT'): if ($xml->namespaceURI !== '') { $tagName = $xml->namespaceURI . $this->separator . $xml->localName; } else { $tagName = $xml->localName; } $this->tag_close(null, $tagName); break; case constant('XMLReader::ELEMENT'): $empty = $xml->isEmptyElement; if ($xml->namespaceURI !== '') { $tagName = $xml->namespaceURI . $this->separator . $xml->localName; } else { $tagName = $xml->localName; } $attributes = array(); while ($xml->moveToNextAttribute()) { if ($xml->namespaceURI !== '') { $attrName = $xml->namespaceURI . $this->separator . $xml->localName; } else { $attrName = $xml->localName; } $attributes[$attrName] = $xml->value; } $this->tag_open(null, $tagName, $attributes); if ($empty) { $this->tag_close(null, $tagName); } break; case constant('XMLReader::TEXT'): case constant('XMLReader::CDATA'): $this->cdata(null, $xml->value); break; } } if ($error = libxml_get_last_error()) { $this->error_code = $error->code; $this->error_string = $error->message; $this->current_line = $error->line; $this->current_column = $error->column; return false; } else { return true; } } }
/** * Raise an error * @param string error message * @access private * @return void */ function raiseError($msg) { $this->isError = TRUE; $msg .= ' [byte index: ' . xml_get_current_byte_index($this->parser) . ']'; trigger_error($msg, E_USER_ERROR); }
public function run() { $xml = xml_parser_create(); xml_set_element_handler($xml, [&$this, "start"], [&$this, "end"]); xml_set_character_data_handler($xml, [&$this, "data"]); xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, false); $totalFileSize = filesize($this->iFileName); $processed = 0; $this->log("Processed: 0%\r"); while ($data = fread($this->iFh, 4096)) { if (!xml_parse($xml, $data, feof($this->iFh))) { throw new \RuntimeException(sprintf('XML parser error %s: %s at line %s at column %s (byte index %s)', xml_get_error_code($xml), xml_error_string(xml_get_error_code($xml)), xml_get_current_line_number($xml), xml_get_current_column_number($xml), xml_get_current_byte_index($xml))); } $processed += 4096; $percentage = round($processed / $totalFileSize * 100, 2); $this->log("Processed: {$percentage}%\r"); } xml_parser_free($xml); $this->log('Processed: 100% '); }
$rss_parser = new baseParserClass("rss2html"); $rss_parser->noFutureItems = $NoFutureItems; $rss_parser->wholeString = $convertedXML; xml_set_object($xml_parser, $rss_parser); xml_set_element_handler($xml_parser, "startElement", "endElement"); xml_set_character_data_handler($xml_parser, "characterData"); xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1); $parseResult = xml_parse($xml_parser, $convertedXML, TRUE); if ($parseResult == 0) { if (!isset($hideErrors)) { $errorCode = xml_get_error_code($xml_parser); echo "\$errorCode = {$errorCode}<br>\n"; echo "xml_error_string() = " . xml_error_string($errorCode) . "<br>\n"; echo "xml_get_current_line_number() = " . xml_get_current_line_number($xml_parser) . "<br>\n"; echo "xml_get_current_column_number() = " . xml_get_current_column_number($xml_parser) . "<br>\n"; echo "xml_get_current_byte_index() = " . xml_get_current_byte_index($xml_parser) . "<br>\n"; } } else { xml_parser_free($xml_parser); // make sure the channel contentEncoded is not blank if ($rss_parser->FeedContentEncoded == "") { $rss_parser->FeedContentEncoded = $rss_parser->FeedDescription; } $template = FeedForAll_rss2html_str_replace("~~~FeedXMLFilename~~~", FeedForAll_rss2html_EscapeLink($XMLfilename), $template); $template = FeedForAll_rss2html_str_replace("~~~FeedTitle~~~", FeedForAll_rss2html_limitLength($rss_parser->FeedTitle, $limitFeedTitleLength), $template); $template = FeedForAll_rss2html_str_replace("~~~FeedDescription~~~", FeedForAll_rss2html_limitLength($rss_parser->FeedDescription, $limitFeedDescriptionLength), $template); $template = FeedForAll_rss2html_str_replace("~~~FeedContentEncoded~~~", $rss_parser->FeedContentEncoded, $template); $template = FeedForAll_rss2html_str_replace("~~~FeedLink~~~", FeedForAll_rss2html_EscapeLink($rss_parser->FeedLink), $template); $template = FeedForAll_rss2html_str_replace("~~~FeedPubDate~~~", $rss_parser->FeedPubDate, $template); $template = FeedForAll_rss2html_str_replace("~~~FeedPubLongDate~~~", date($LongDateFormat, $rss_parser->FeedPubDate_t), $template); $template = FeedForAll_rss2html_str_replace("~~~FeedPubShortDate~~~", date($ShortDateFormat, $rss_parser->FeedPubDate_t), $template);
/** * Gets the current byte index of the given XML parser * * @return int */ function getCharacterOffset() { return xml_get_current_byte_index($this->parser); }
/** * Parse a chunk of data. * * @param string $data A UTF-8 string of arbitrary length to parse. * @param bool $final Whether this is the final piece of data. * @return XMLHandler $this */ public function parse($data, $final = false) { // Simply pass the data to Expat. if (xml_parse($this->parser, $data, $final) != 1) { // An error occured. Retrieve info and throw an exception. $parser = $this->parser; $code = xml_get_error_code($parser); throw new XMLException(sprintf('XML parsing error %d at %d:%d (byte %d): %s', $code, xml_get_current_line_number($parser), xml_get_current_column_number($parser), xml_get_current_byte_index($parser), xml_error_string($code)), $code); } }
/** * constructor that actually does the parsing * * @param string $xml SOAP message * @param string $encoding character encoding scheme of message * @param string $method method for which XML is parsed (unused?) * @param string $decode_utf8 whether to decode UTF-8 to ISO-8859-1 * @access public */ function soap_parser($xml, $encoding = 'UTF-8', $method = '', $decode_utf8 = true) { parent::nusoap_base(); $this->xml = $xml; $this->xml_encoding = $encoding; $this->method = $method; $this->decode_utf8 = $decode_utf8; // Check whether content has been read. if (!empty($xml)) { // Check XML encoding $pos_xml = strpos($xml, '<?xml'); if ($pos_xml !== FALSE) { $xml_decl = substr($xml, $pos_xml, strpos($xml, '?>', $pos_xml + 2) - $pos_xml + 1); if (preg_match("/encoding=[\"']([^\"']*)[\"']/", $xml_decl, $res)) { $xml_encoding = $res[1]; if (strtoupper($xml_encoding) != $encoding) { $err = "Charset from HTTP Content-Type '" . $encoding . "' does not match encoding from XML declaration '" . $xml_encoding . "'"; $this->debug($err); if ($encoding != 'ISO-8859-1' || strtoupper($xml_encoding) != 'UTF-8') { $this->setError($err); return; } // when HTTP says ISO-8859-1 (the default) and XML says UTF-8 (the typical), assume the other endpoint is just sloppy and proceed } else { $this->debug('Charset from HTTP Content-Type matches encoding from XML declaration'); } } else { $this->debug('No encoding specified in XML declaration'); } } else { $this->debug('No XML declaration'); } $this->debug('Entering soap_parser(), length=' . strlen($xml) . ', encoding=' . $encoding); // Create an XML parser - why not xml_parser_create_ns? $this->parser = xml_parser_create($this->xml_encoding); // Set the options for parsing the XML data. //xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->xml_encoding); // Set the object for the parser. xml_set_object($this->parser, $this); // Set the element handlers for the parser. xml_set_element_handler($this->parser, 'start_element', 'end_element'); xml_set_character_data_handler($this->parser, 'character_data'); // Parse the XML file. if (!xml_parse($this->parser, $xml, true)) { // Display an error message. $err = sprintf('XML error parsing SOAP payload on line %d, byte %d: %s', xml_get_current_line_number($this->parser), xml_get_current_byte_index($this->parser), xml_error_string(xml_get_error_code($this->parser))); $this->debug($err); $this->debug("XML payload:\n" . $xml); $this->setError($err); } else { $this->debug('parsed successfully, found root struct: ' . $this->root_struct . ' of name ' . $this->root_struct_name); // get final value $this->soapresponse = $this->message[$this->root_struct]['result']; // get header value: no, because this is documented as XML string // if($this->root_header != '' && isset($this->message[$this->root_header]['result'])){ // $this->responseHeaders = $this->message[$this->root_header]['result']; // } // resolve hrefs/ids if (sizeof($this->multirefs) > 0) { foreach ($this->multirefs as $id => $hrefs) { $this->debug('resolving multirefs for id: ' . $id); $idVal = $this->buildVal($this->ids[$id]); unset($idVal['!id']); foreach ($hrefs as $refPos => $ref) { $this->debug('resolving href at pos ' . $refPos); $this->multirefs[$id][$refPos] = $idVal; } } } } xml_parser_free($this->parser); } else { $this->debug('xml was empty, didn\'t parse!'); $this->setError('xml was empty, didn\'t parse!'); } }
/** * Main function to call to parse XMP. Use getResults to * get results. * * Also catches any errors during processing, writes them to * debug log, blanks result array and returns false. * * @param string $content XMP data * @param bool $allOfIt If this is all the data (true) or if its split up (false). Default true * @throws RuntimeException * @return bool Success. */ public function parse($content, $allOfIt = true) { if (!$this->xmlParser) { $this->resetXMLParser(); } try { // detect encoding by looking for BOM which is supposed to be in processing instruction. // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf if (!$this->charset) { $bom = array(); if (preg_match('/\\xEF\\xBB\\xBF|\\xFE\\xFF|\\x00\\x00\\xFE\\xFF|\\xFF\\xFE\\x00\\x00|\\xFF\\xFE/', $content, $bom)) { switch ($bom[0]) { case "þÿ": $this->charset = 'UTF-16BE'; break; case "ÿþ": $this->charset = 'UTF-16LE'; break; case "þÿ": $this->charset = 'UTF-32BE'; break; case "ÿþ": $this->charset = 'UTF-32LE'; break; case "": $this->charset = 'UTF-8'; break; default: // this should be impossible to get to throw new RuntimeException("Invalid BOM"); } } else { // standard specifically says, if no bom assume utf-8 $this->charset = 'UTF-8'; } } if ($this->charset !== 'UTF-8') { // don't convert if already utf-8 MediaWiki\suppressWarnings(); $content = iconv($this->charset, 'UTF-8//IGNORE', $content); MediaWiki\restoreWarnings(); } // Ensure the XMP block does not have an xml doctype declaration, which // could declare entities unsafe to parse with xml_parse (T85848/T71210). if ($this->parsable !== self::PARSABLE_OK) { if ($this->parsable === self::PARSABLE_NO) { throw new RuntimeException('Unsafe doctype declaration in XML.'); } $content = $this->xmlParsableBuffer . $content; if (!$this->checkParseSafety($content)) { if (!$allOfIt && $this->parsable !== self::PARSABLE_NO) { // parse wasn't Unsuccessful yet, so return true // in this case. return true; } $msg = $this->parsable === self::PARSABLE_NO ? 'Unsafe doctype declaration in XML.' : 'No root element found in XML.'; throw new RuntimeException($msg); } } $ok = xml_parse($this->xmlParser, $content, $allOfIt); if (!$ok) { $code = xml_get_error_code($this->xmlParser); $error = xml_error_string($code); $line = xml_get_current_line_number($this->xmlParser); $col = xml_get_current_column_number($this->xmlParser); $offset = xml_get_current_byte_index($this->xmlParser); $this->logger->warning('{method} : Error reading XMP content: {error} ' . '(line: {line} column: {column} byte offset: {offset})', array('method' => __METHOD__, 'error_code' => $code, 'error' => $error, 'line' => $line, 'column' => $col, 'offset' => $offset, 'content' => $content)); $this->results = array(); // blank if error. $this->destroyXMLParser(); return false; } } catch (Exception $e) { $this->logger->warning('{method} Exception caught while parsing: ' . $e->getMessage(), array('method' => __METHOD__, 'exception' => $e, 'content' => $content)); $this->results = array(); return false; } if ($allOfIt) { $this->destroyXMLParser(); } return true; }
/** * Executes a template. $obj is an optional object you * can pass to the template, which makes its properties immediately * available to the template. $carry is used internally to determine * whether to reset the object register before executing. * * @access public * @param string $data * @param object $obj * @param boolean $carry * @return string * */ function fill($data, $obj = '', $carry = false) { $this->error = false; // duplicate object for parser isolation $tpl = clone $this; // deliberate copy, we want two separate objects here $tpl->exp = clone $this->exp; if (!$carry) { //$tpl->register = array (); $tpl->exp->resetRegister(); $tpl->carry = false; } else { $tpl->carry = true; //$tpl->register = array (); //$tpl->register =& $this->register; //$this->exp->resetRegister (); } $tpl->output = ''; if ($obj !== '') { //$tpl->register['object'] = $obj; $tpl->exp->setObject($obj); } else { //$tpl->register['object'] = new StdClass; $tpl->exp->setObject(new StdClass()); } $tpl->sql = array(); $tpl->loop = array(); $tpl->if = array(); $tpl->switch = array(); $tpl->buffer = array(); $tpl->open = false; $tpl->open_var = false; $tpl->toc = array(); $tpl->rows = 0; $tpl->error = false; $tpl->err_code = false; $tpl->err_byte = false; $tpl->err_line = false; $tpl->err_colnum = false; $doc = $data; $data = $tpl->getDoc($data); if (is_array($data)) { // use nodeCache instead of new xml parser foreach ($data as $node) { $node = $this->reverseEntities($node); $tpl->_output($tpl->{$tpl->makeMethod($node['tag'], $node['type'], $node['level'])}($node)); } // gather blocks from included templates foreach ($tpl->block as $key => $block) { if (!isset($this->block[$key])) { $this->block[$key] =& $tpl->block[$key]; } } $this->rows = $tpl->rows; $this->toc = $tpl->toc; //$tpl->output = $this->reverseEntities ($tpl->output); return $tpl->output; } // create the xml parser now, and declare the handler methods $this->parser = xml_parser_create($this->encoding); if (!$this->parser) { $this->error = 'Template Error: Failed to create an XML parser!'; return false; } if (!xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false)) { xml_parser_free($this->parser); $this->error = 'Template Error: Failed to disable case folding!'; return false; } if ($this->parser) { // turning inline PHP off for the time being // actually, i don't think we need it at all //$data = $tpl->inline ($data); $data = $this->convertEntities($data); if (xml_parse_into_struct($this->parser, $data, $tpl->vals, $tpl->tags)) { xml_parser_free($this->parser); //echo '<pre>'; //print_r ($tpl->vals); //echo '</pre>'; // cache the node structure $this->nodeCache[$data] = $tpl->vals; // list of paths for the current tag and its parents // takes the form [level] = [path 1, path 2] $this->_path_list = array(); // the current level $this->_path_level = 0; /* $colours = array ( '000', '600', '060', '006', '900', '396', '369', 'f00', '0f0', '00f', 'f90', '666', '999', 'bbb', ); */ // mainloop foreach ($tpl->vals as $node) { $node = $this->reverseEntities($node); $norm_tag = str_replace(':', '-', $node['tag']); if ($node['type'] == 'cdata' || strpos($norm_tag, 'ch-') === 0 || strpos($norm_tag, 'xt-') === 0 || !in_array($norm_tag, $tpl->_bind_parts)) { $tpl->_output($tpl->{$tpl->makeMethod($node['tag'], $node['type'], $node['level'])}($node)); continue; } // echo '<span style="color: #' . $colours[$node['level']] . '">' . str_repeat (' ', $node['level']) . $node['tag'] . ' (' . $node['type'] . ")</span>\n"; $node['paths'] = array(); if ($node['type'] == 'open' || $node['type'] == 'complete') { if ($node['level'] > $this->_path_level) { // moving up a new level (ie. a sub-node) $this->_path_level++; $this->_path_list[$this->_path_level] = $node; } elseif ($this->_path_level > 0 && $node['level'] == $this->_path_level) { // next sibling at the same level array_pop($this->_path_list); $this->_path_list[$this->_path_level] = $node; } elseif ($node['level'] < $this->_path_level) { // do nothing... } else { // moving up a new level $this->_path_level++; $this->_path_list[$this->_path_level] = $node; } // compile all variations of this tag's xpath for a match in $this->_bind_list $paths = array('//' . $node['tag']); $list = $this->_path_list[$this->_path_level - 2]['paths']; if (is_array($list)) { foreach ($list as $p) { $paths[] = $p . '/' . $node['tag']; $paths[] = $p . '//' . $node['tag']; } } else { $paths[] = '/' . $node['tag']; } $count = count($paths); $cpl = count($this->_path_list) - 1; if (is_array($this->_path_list[$cpl]['attributes'])) { foreach ($this->_path_list[$cpl]['attributes'] as $k => $v) { if (strpos($k, 'xt:') !== 0) { for ($i = 0; $i < $count; $i++) { $paths[] = $paths[$i] . '[@' . $k . '="' . $v . '"]'; } } } } // echo '<div style="padding: 10px; margin: 10px; border: 1px solid #aaa">' . join ("\n", $paths) . '</div>'; $this->_path_list[$cpl]['paths'] = $paths; $node['paths'] = $paths; if ($node['type'] == 'complete') { foreach (array_intersect(array_keys($this->_bind_list), $paths) as $key) { $node['value'] .= $this->_bind_list[$key]; } } foreach (array_intersect(array_keys($this->_bind_attrs), $paths) as $key) { //info ($node['attributes']); foreach ($this->_bind_attrs[$key] as $k => $v) { $node['attributes'][$k] = $v; } //info ($node['attributes']); } if ($node['type'] == 'complete') { $this->_path_level--; array_pop($this->_path_list); } } elseif ($node['type'] == 'close') { if (count($this->_path_list) > 0 && $this->_path_list[count($this->_path_list) - 1] != null) { foreach (array_intersect(array_keys($this->_bind_list), $this->_path_list[count($this->_path_list) - 1]['paths']) as $key) { $tpl->_output($this->_bind_list[$key]); } $this->_path_level--; array_pop($this->_path_list); } } $tpl->_output($tpl->{$tpl->makeMethod($node['tag'], $node['type'], $node['level'])}($node)); } // gather blocks from included templates foreach ($tpl->block as $key => $block) { if (!isset($this->block[$key])) { $this->block[$key] =& $tpl->block[$key]; } } $this->rows = $tpl->rows; $this->toc = $tpl->toc; //$tpl->output = $this->reverseEntities ($tpl->output); return $tpl->output; } else { $this->err_code = xml_get_error_code($this->parser); $this->err_line = xml_get_current_line_number($this->parser); $this->err_byte = xml_get_current_byte_index($this->parser); $this->err_colnum = xml_get_current_column_number($this->parser); $this->error = 'Template Error: ' . xml_error_string($this->err_code); xml_parser_free($this->parser); return false; } } else { $this->error = 'Template Error: No parser available!'; return false; } }
function &raiseError($msg = null, $xmlecode = 0, $xp = null, $ecode = MDB2_SCHEMA_ERROR_PARSE) { if (is_null($this->error)) { $error = ''; if (is_resource($msg)) { $error .= 'Parser error: ' . xml_error_string(xml_get_error_code($msg)); $xp = $msg; } else { $error .= 'Parser error: ' . $msg; if (!is_resource($xp)) { $xp = $this->parser; } } if ($error_string = xml_error_string($xmlecode)) { $error .= ' - ' . $error_string; } if (is_resource($xp)) { $byte = @xml_get_current_byte_index($xp); $line = @xml_get_current_line_number($xp); $column = @xml_get_current_column_number($xp); $error .= " - Byte: {$byte}; Line: {$line}; Col: {$column}"; } $error .= "\n"; $this->error =& MDB2_Schema::raiseError($ecode, null, null, $error); } return $this->error; }
/** * this function parses an xml document and returns a multidimensional * array containing the date from the xml-source * @ingroup input * * @param xml the xml-input * @param from_html if this is set, the input structure is html and has to be parsed before processing * @return the array extracted from the xml */ function xml2tree($xml, $from_html = false) { if ($from_html) { // This is for replacing all < and > in cell value // to prevent inside tags from parsing by the parser // get all valid tags $tags = array(); foreach ($this->tagDefinitions as $group => $definitions) { foreach ($definitions as $tag => $stuff) { if ($tag != "universal") { $tags[] = $tag; } } } // extract all tags $matches = array(); $pattern = '&<[^>]+>&is'; preg_match_all($pattern, $xml, $matches); if (is_array($matches[0])) { // every match foreach ($matches[0] as $match) { // $this->debug(array($match)); $tag_match = array(); // get the tagname (e.g. <table or <tbody>) preg_match("/<([^ |>])*/i", $match, $tag_match); // remove leading "<" $tag_match = substr($tag_match[0], 1); // if closingtag remove "/" if ($tag_match[0] == "/") { $tag_match = substr($tag_match, 1); } // check if the tag is a table tag if (!in_array($tag_match, $tags)) { // if not replace the brackets with placeholders $replacement = str_replace('<', '###[###', $match); $replacement = str_replace('>', '###]###', $replacement); // replace the original tag with the changed one $xml = str_replace($match, $replacement, $xml); } } } // this was added at Jan. 13. 2005 Bugfix relating & in cellcontent $xml = str_replace('&', '###AMP###', $xml); // debug($xml); } $parser = xml_parser_create(); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); $result = xml_parse_into_struct($parser, $xml, $vals, $index); if (!$result) { $byte = xml_get_current_byte_index($parser); $line = xml_get_current_line_number($parser); $code = xml_get_error_code($parser); $this->errorMessage = "Error in XML\n"; $this->errorMessage .= "code " . $code . "\n"; $this->errorMessage .= "message " . xml_error_string($code) . "\n"; $this->errorMessage .= "line " . $line . "\n"; $this->errorMessage .= "content " . substr($xml, $byte - 10, 20); return false; } xml_parser_free($parser); $stack = array(array()); $stacktop = 0; $parent = array(); if ($from_html) { $cellTags = array_keys($this->tagDefinitions['cell']); } foreach ($vals as $val) { // if ($val["tag"] == 'td') debug(htmlspecialchars(nl2br($val['value']))); $type = $val["type"]; if ($type == "open" || $type == "complete") { // open tag $stack[$stacktop++] = $tagi; $tagi = array("tag" => $val["tag"]); if (isset($val["attributes"])) { $tagi["attrs"] = $val["attributes"]; } if (isset($val["value"])) { $tagi["values"][] = $val["value"]; } } if ($type == "complete" || $type == "close") { // finish tag $tags[] = $oldtagi = $tagi; $tagi = $stack[--$stacktop]; $oldtag = $oldtagi["tag"]; // handle celltags special if (!$from_html || $from_html && !in_array($oldtagi["tag"], $cellTags)) { unset($oldtagi["tag"]); $tagi["children"][$oldtag][] = $oldtagi; } else { // we save cells in a special field of the row array // and hold the tag, this will be parsed later on $tagi["children"]['cells'][] = $oldtagi; } $parent = $tagi; } if ($type == "cdata") { $tagi["values"][] = $val["value"]; } } // if ($from_html) debug($parent); return $parent["children"]; }
function xml_load_data($data, $resulttype = 'object', $encoding = 'UTF-8') { $parser = xml_parser_create($encoding); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); $ok = xml_parse_into_struct($parser, $data, $values); if (!$ok) { $errmsg = sprintf("XML parse error %d '%s' at line %d, column %d (byte index %d)", xml_get_error_code($parser), xml_error_string(xml_get_error_code($parser)), xml_get_current_line_number($parser), xml_get_current_column_number($parser), xml_get_current_byte_index($parser)); } xml_parser_free($parser); if (!$ok) { return $errmsg; } if ($resulttype == 'array') { return $this->xml_reorganize($values); } // default $resulttype is 'object' return $this->array2object($this->xml_reorganize($values)); }
/** * Get the XML contents and parse like SimpleXML * * @param string $file * @param string $resulttype * @param string $encoding * @return array/object */ function xml_load_file($file, $resulttype = 'object', $encoding = 'UTF-8') { $php_errormsg = ""; $this->result = ""; $this->evalCode = ""; $values = ""; $data = file_get_contents($file); if (!$data) { return 'Cannot open xml document: ' . (isset($php_errormsg) ? $php_errormsg : $file); } $parser = xml_parser_create($encoding); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); $ok = xml_parse_into_struct($parser, $data, $values); if (!$ok) { $errmsg = sprintf("XML parse error %d '%s' at line %d, column %d (byte index %d)", xml_get_error_code($parser), xml_error_string(xml_get_error_code($parser)), xml_get_current_line_number($parser), xml_get_current_column_number($parser), xml_get_current_byte_index($parser)); } xml_parser_free($parser); if (!$ok) { return $errmsg; } if ($resulttype == 'array') { return $this->xml_reorganize($values); } // default $resulttype is 'object' return $this->array2object($this->xml_reorganize($values)); }
function raiseError($msg = null, $xmlecode = 0, $xp = null, $ecode = OX_PLUGIN_ERROR_PARSE) { if (is_null($this->error)) { $error = ''; if (is_resource($msg)) { $error .= 'Parser error: ' . xml_error_string(xml_get_error_code($msg)); $xp = $msg; } else { $error .= 'Parser error: ' . $msg; if (!is_resource($xp)) { $xp = $this->parser; } } if (is_resource($xp)) { $byte = @xml_get_current_byte_index($xp); $line = @xml_get_current_line_number($xp); $column = @xml_get_current_column_number($xp); $error .= " - Byte: {$byte}; Line: {$line}; Col: {$column}"; } $error .= "\n"; $mode = 0; $options = 0; $userinfo = ''; $this->error = PEAR::raiseError($error, $code, $mode, $options, $userinfo); return $this->error; } return $this->error; }
public function Parse($filename) { $this->Reset(); if (!file_exists($filename) || !is_readable($filename)) { return FALSE; } $data = file_get_contents($filename); //Parse contents to change all entities to ISO-8859-1 $trans = array_flip(get_html_translation_table(HTML_ENTITIES)); $data = preg_replace('/(&[a-z]+;)/e', '"&#" . ord($trans["$1"]) . ";"', $data); $data = "<PCROOT>{$data}</PCROOT>"; $parser = xml_parser_create('ISO-8859-1'); xml_set_object($parser, $this); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false); xml_set_element_handler($parser, 'HandleOpen', 'HandleClose'); xml_set_character_data_handler($parser, 'HandleData'); $result = xml_parse($parser, $data, true); if ($result == FALSE) { //XML parser error $error = xml_error_string(xml_get_error_code($parser)); //30 characters around the location of the error $context = substr($data, xml_get_current_byte_index($parser) - 15, 30); throw new TPL_ParserException("{$error} [{$context}]", $filename, xml_get_current_line_number($parser)); } xml_parser_free($parser); return $this->root; }
/** * Main function to call to parse XMP. Use getResults to * get results. * * Also catches any errors during processing, writes them to * debug log, blanks result array and returns false. * * @param $content String: XMP data * @param $allOfIt Boolean: If this is all the data (true) or if its split up (false). Default true * @param $reset Boolean: does xml parser need to be reset. Default false * @throws MWException * @return Boolean success. */ public function parse($content, $allOfIt = true, $reset = false) { if ($reset) { $this->resetXMLParser(); } try { // detect encoding by looking for BOM which is supposed to be in processing instruction. // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf if (!$this->charset) { $bom = array(); if (preg_match('/\\xEF\\xBB\\xBF|\\xFE\\xFF|\\x00\\x00\\xFE\\xFF|\\xFF\\xFE\\x00\\x00|\\xFF\\xFE/', $content, $bom)) { switch ($bom[0]) { case "þÿ": $this->charset = 'UTF-16BE'; break; case "ÿþ": $this->charset = 'UTF-16LE'; break; case "þÿ": $this->charset = 'UTF-32BE'; break; case "ÿþ": $this->charset = 'UTF-32LE'; break; case "": $this->charset = 'UTF-8'; break; default: //this should be impossible to get to throw new MWException("Invalid BOM"); break; } } else { // standard specifically says, if no bom assume utf-8 $this->charset = 'UTF-8'; } } if ($this->charset !== 'UTF-8') { //don't convert if already utf-8 wfSuppressWarnings(); $content = iconv($this->charset, 'UTF-8//IGNORE', $content); wfRestoreWarnings(); } $ok = xml_parse($this->xmlParser, $content, $allOfIt); if (!$ok) { $error = xml_error_string(xml_get_error_code($this->xmlParser)); $where = 'line: ' . xml_get_current_line_number($this->xmlParser) . ' column: ' . xml_get_current_column_number($this->xmlParser) . ' byte offset: ' . xml_get_current_byte_index($this->xmlParser); wfDebugLog('XMP', "XMPReader::parse : Error reading XMP content: {$error} ({$where})"); $this->results = array(); // blank if error. return false; } } catch (MWException $e) { wfDebugLog('XMP', 'XMP parse error: ' . $e); $this->results = array(); return false; } return true; }
/** * Element start callback * * @param resource $parser xml parser * @param string $name element name * @param array $attributes element attributes * @return void */ protected function _cbElementStart($parser, $name, array $attributes) { if ($this->_prev != self::PREV_WAS_TEXT) { fwrite($this->_output, $this->_options["outputEOL"]); } $idx = xml_get_current_byte_index($this->_parser); $this->_empty = $this->_buffer[$idx - $this->_offset] == '/'; $attrs = ""; foreach ($attributes as $key => $val) { $attrs .= " " . $key . "=\"" . $val . "\""; } fwrite($this->_output, $this->_getPaddingStr() . "<" . $name . $attrs . ($this->_empty ? ' />' : '>')); if ($this->_empty) { $this->_prev = self::PREV_WAS_EMPTY_TAG; } else { ++$this->_depth; $this->_prev = self::PREV_WAS_OPENING_TAG; } }
function parse(&$data, $encoding) { // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character if (strtoupper($encoding) == 'US-ASCII') { $this->encoding = 'UTF-8'; } else { $this->encoding = $encoding; } // Strip BOM: // UTF-32 Big Endian BOM if (substr($data, 0, 4) === "��") { $data = substr($data, 4); } elseif (substr($data, 0, 4) === "��") { $data = substr($data, 4); } elseif (substr($data, 0, 2) === "��") { $data = substr($data, 2); } elseif (substr($data, 0, 2) === "��") { $data = substr($data, 2); } elseif (substr($data, 0, 3) === "") { $data = substr($data, 3); } if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\t\n\r ") && ($pos = strpos($data, '?>')) !== false) { $declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5)); if ($declaration->parse()) { $data = substr($data, $pos + 2); $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . ($declaration->standalone ? 'yes' : 'no') . '"?>' . $data; } else { $this->error_string = 'SimplePie bug! Please report this!'; return false; } } // Work around libxml bug $data = str_replace('<', '<', $data); $data = str_replace('>', '>', $data); $data = str_replace('&', '&', $data); $data = str_replace(''', ''', $data); $data = str_replace('"', '"', $data); $return = true; // Create the parser $xml = xml_parser_create_ns($this->encoding, $this->separator); xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1); xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0); xml_set_object($xml, $this); xml_set_character_data_handler($xml, 'cdata'); xml_set_element_handler($xml, 'tag_open', 'tag_close'); // Parse! if (!xml_parse($xml, $data, true)) { $this->error_code = xml_get_error_code($xml); $this->error_string = xml_error_string($this->error_code); $return = false; } $this->current_line = xml_get_current_line_number($xml); $this->current_column = xml_get_current_column_number($xml); $this->current_byte = xml_get_current_byte_index($xml); xml_parser_free($xml); return $return; }
/** * Constructor. * * @param string $xml XML content. * @param string $encoding Character set encoding, defaults to 'UTF-8'. * @param array $attachments List of attachments. */ function SOAP_Parser($xml, $encoding = SOAP_DEFAULT_ENCODING, $attachments = null) { parent::SOAP_Base('Parser'); $this->_setSchemaVersion(SOAP_XML_SCHEMA_VERSION); $this->attachments = $attachments; // Check the XML tag for encoding. if (preg_match('/<\\?xml[^>]+encoding\\s*?=\\s*?(\'([^\']*)\'|"([^"]*)")[^>]*?[\\?]>/', $xml, $m)) { $encoding = strtoupper($m[2] ? $m[2] : $m[3]); } // Determine where in the message we are (envelope, header, body, // method). Check whether content has been read. if (!empty($xml)) { // Prepare the XML parser. $parser = xml_parser_create($encoding); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); xml_set_object($parser, $this); xml_set_element_handler($parser, '_startElement', '_endElement'); xml_set_character_data_handler($parser, '_characterData'); // Some lame SOAP implementations add nul bytes at the end of the // SOAP stream, and expat chokes on that. if ($xml[strlen($xml) - 1] == 0) { $xml = trim($xml); } // Parse the XML file. if (!xml_parse($parser, $xml, true)) { $err = sprintf('XML error on line %d col %d byte %d %s', xml_get_current_line_number($parser), xml_get_current_column_number($parser), xml_get_current_byte_index($parser), xml_error_string(xml_get_error_code($parser))); $this->_raiseSoapFault($err, htmlspecialchars($xml)); } xml_parser_free($parser); } }