/** * @param \Closure $callback */ protected function parseFile(\Closure $callback) { while ($this->xmlr->localName == $this->node) { try { $sxe = new \SimpleXMLElement($this->xmlr->readOuterXml()); if (!$sxe instanceof \SimpleXMLElement) { throw new \Exception("node is note SimpleXMLElement"); } $callback($sxe); } catch (\RuntimeException $e) { throw new \RuntimeException($e->getMessage()); } catch (\Exception $e) { if ($this->trace) { echo sprintf("%s - %s - %s -%s\n", $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString()); } if ($this->strict) { throw new \RuntimeException($e->getMessage()); } } if ($this->debug) { break; } $this->xmlr->next($this->node); } $this->xmlr->close(); }
/** * Method to parse the feed into a JFeed object. * * @return JFeed * * @since 12.3 */ public function parse() { $feed = new JFeed(); // Detect the feed version. $this->initialise(); // Let's get this party started... do { // Expand the element for processing. $el = new SimpleXMLElement($this->stream->readOuterXml()); // Get the list of namespaces used within this element. $ns = $el->getNamespaces(true); // Get an array of available namespace objects for the element. $namespaces = array(); foreach ($ns as $prefix => $uri) { // Ignore the empty namespace prefix. if (empty($prefix)) { continue; } // Get the necessary namespace objects for the element. $namespace = $this->fetchNamespace($prefix); if ($namespace) { $namespaces[] = $namespace; } } // Process the element. $this->processElement($feed, $el, $namespaces); // Skip over this element's children since it has been processed. $this->moveToClosingElement(); } while ($this->moveToNextElement()); return $feed; }
public static function Decode($XMLResponse, &$isFault) { $responseXML = null; try { if (empty($XMLResponse)) { throw new Exception("Given Response is not a valid SOAP response."); } $xmlDoc = new XMLReader(); $res = $xmlDoc->XML($XMLResponse); if ($res) { $xmlDoc->read(); $responseXML = $xmlDoc->readOuterXml(); $xmlDOM = new DOMDocument(); $xmlDOM->loadXML($responseXML); $isFault = trim(strtoupper($xmlDoc->localName)) == self::$FaultMessage; if ($isFault) { $xmlDOM->loadXML($xmlDoc->readOuterXml()); } switch ($xmlDoc->nodeType) { case XMLReader::ELEMENT: $nodeName = $xmlDoc->localName; $prefix = $xmlDoc->prefix; if (class_exists($nodeName)) { $xmlNodes = $xmlDOM->getElementsByTagName($nodeName); foreach ($xmlNodes as $xmlNode) { //$xmlNode->prefix = ""; $xmlNode->setAttribute("_class", $nodeName); $xmlNode->setAttribute("_type", "object"); } } break; } $responseXML = $xmlDOM->saveXML(); $unserializer = new XML_Unserializer(); $unserializer->setOption(XML_UNSERIALIZER_OPTION_COMPLEXTYPE, 'object'); $res = $unserializer->unserialize($responseXML, false); if ($res) { $responseXML = $unserializer->getUnserializedData(); } $xmlDoc->close(); } else { throw new Exception("Given Response is not a valid XML response."); } } catch (Exception $ex) { throw new Exception("Error occurred while XML decoding"); } return $responseXML; }
/** * @param FeedTypeInterface $type * @param OutputInterface $output * * @return int */ protected function validate(FeedTypeInterface $type, OutputInterface $output) { $file = $this->exporter->getFeedFilename($type); if (!file_exists($file)) { throw new FileNotFoundException(sprintf('<error>Feed "%s" has not yet been exported</error>', $type->getName())); } $options = LIBXML_NOENT | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING; $this->reader = new \XMLReader($options); $this->reader->open($file); $this->reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true); // foreach ($type->getNamespaces() as $name => $location) { // $this->reader->setSchema($location); // } libxml_clear_errors(); libxml_use_internal_errors(true); libxml_disable_entity_loader(true); $progress = new ProgressBar($output); $progress->start(); // go through the whole thing while ($this->reader->read()) { if ($this->reader->nodeType === \XMLReader::ELEMENT && $this->reader->name === $type->getItemNode()) { $progress->advance(); $this->currentItem = $this->reader->readOuterXml(); } if ($error = libxml_get_last_error()) { throw new \RuntimeException(sprintf('[%s %s] %s (in %s - line %d, column %d)', LIBXML_ERR_WARNING === $error->level ? 'WARNING' : 'ERROR', $error->code, trim($error->message), $error->file ? $error->file : 'n/a', $error->line, $error->column)); } } $progress->finish(); }
/** * @param $xmlFilePath * @return \Generator|Product[] */ public function readFromPath($xmlFilePath) { $xml = new \XMLReader(); $xml->open($xmlFilePath); while ($xml->read() && $xml->name !== 'product') { } $decoder = new XmlDecoder(); while ($xmlData = $xml->readOuterXml()) { (yield $decoder->decodeProduct(new \SimpleXMLElement($xmlData))); $xml->next('product'); } $xml->close(); }
/** * Decorated method * * @throws BadMethodCallException in case XMLReader can not expand the node * @return string */ public function readOuterXml() { // Compatibility libxml 20620 (2.6.20) or later - LIBXML_VERSION / LIBXML_DOTTED_VERSION if (method_exists($this->reader, 'readOuterXml')) { return $this->reader->readOuterXml(); } if (0 === $this->reader->nodeType) { return ''; } $doc = new DOMDocument(); $doc->preserveWhiteSpace = false; $doc->formatOutput = true; $node = $this->expand($doc); return $doc->saveXML($node); }
public function streamXml() { $reader = new XMLReader(); $reader->open($this->getUrl()); while ($reader->next()) { while (!($reader->nodeType == XMLReader::ELEMENT && $reader->name == $this->getElement())) { if (!$reader->read()) { break 2; } //Break if something wrong } if ($reader->nodeType == XMLReader::ELEMENT && $reader->name == $this->getElement()) { (yield simplexml_load_string($reader->readOuterXml(), null, LIBXML_NOCDATA)); //Yield load XML to save time and pressure $reader->next(); } } }
/** * @param $date * @return CurrencyRaw[] */ public function parse($date) { $xml = new XMLReader(); $url = $this->getUrl($date); $temp_file = tempnam(sys_get_temp_dir(), 'currency-source'); $fp = fopen($temp_file, 'w+'); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FILE, $fp); curl_exec($ch); curl_close($ch); fclose($fp); $xml->open($temp_file); $xml->setParserProperty(XMLReader::VALIDATE, false); Yii::log('Open XML from `' . $url . '`', CLogger::LEVEL_INFO, 'currency-parser'); $data = []; while ($xml->read()) { if ($xml->nodeType == XMLReader::ELEMENT && $xml->localName == $this->xmlElement) { $xmlRow = null; try { $xmlRow = new SimpleXMLElement($xml->readOuterXml()); } catch (Exception $e) { continue; } if ($rowObj = $this->parseRow($xmlRow)) { $data[$rowObj->num_code] = $rowObj; // Yii::log('Parsed XML row `' . json_encode($rowObj) . '`', CLogger::LEVEL_INFO, 'currency-parser'); } else { Yii::log('Error parsed XML row', CLogger::LEVEL_WARNING, 'currency-parser'); } } } @unlink($temp_file); return $data; }
<?php /* $Id$ */ $xmlstring = '<?xml version="1.0" encoding="UTF-8"?> <books><book>test</book></books>'; $reader = new XMLReader(); $reader->XML($xmlstring); $reader->read(); echo $reader->readInnerXml(); echo "\n"; $reader->close(); $reader = new XMLReader(); $reader->XML($xmlstring); $reader->read(); echo $reader->readOuterXml(); echo "\n"; $reader->close(); ?> ===DONE===
/** * Get the xml content of a record for the specified prefix. * * @see OaiPmhGateway_ResponseGenerator::_getRecord() * * @param string $identifier * @param string $prefix * @return SimpleXml|null|boolean The record if found, null if not found, * false if error (incorrect format). The error is set if any. */ protected function _getRecord($identifier, $metadataPrefix) { // Prepare the xml reader for the existing static repository. // Don't use a static value to allow tests. $localRepositoryFilepath = $this->_folder->getLocalRepositoryFilepath(); if (!file_exists($localRepositoryFilepath)) { return false; } // Read the xml from the beginning. $reader = new XMLReader(); $result = $reader->open($localRepositoryFilepath, null, LIBXML_NSCLEAN); if (!$result) { $localRepositoryFilepath = false; return false; } $record = null; while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT && $reader->name == 'ListRecords' && $reader->getAttribute('metadataPrefix') === $metadataPrefix) { // Loop on all records until the one of the identifier (if // prefix is not found above, it's bypassed because there is no // new element to read. while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT && $reader->name === 'oai:record') { // Because XMLReader is a stream reader, forward only, // and the identifier is not the first element, it is // saved temporary. $currentRecord = $reader->readOuterXml(); $recordXml = @simplexml_load_string($currentRecord, 'SimpleXMLElement', 0, 'oai', true); // Check conditions. if ((string) $recordXml->header->identifier === $identifier) { $record = $recordXml; break 2; } $reader->next(); } // Don't continue to list records with another prefix. if ($reader->nodeType == XMLReader::END_ELEMENT && $reader->name == 'ListRecords') { break 2; } } } } $reader->close(); return $record; }
/** * Process the XML */ private function processXML() { // init object $reader = new XMLReader(); // open the file $reader->open(FRONTEND_FILES_PATH . '/blogger.xml'); // start reading $reader->read(); // loop through the document while (true) { // start tag for entry? if ($reader->name != 'entry') { continue; } // end tag? if ($reader->nodeType == XMLReader::END_ELEMENT) { $reader->next(); } // get the raw XML $xmlString = $reader->readOuterXml(); // is it really an entry? if (substr($xmlString, 0, 6) == '<entry') { // read the XML as an SimpleXML-object $xml = @simplexml_load_string($reader->readOuterXml()); // skip element if it isn't a valid SimpleXML-object if ($xml === false) { continue; } // loop the categories foreach ($xml->category as $category) { // post if ($category['term'] == 'http://schemas.google.com/blogger/2008/kind#post') { // process the post $this->processXMLAsPost($xml); // stop looping break; } // comment if ($category['term'] == 'http://schemas.google.com/blogger/2008/kind#comment') { // process the post $this->processXMLAsComment($xml); // stop looping break; } } } // end if (!$reader->read()) { break; } } // close $reader->close(); }
/** * @return \SimpleXMLElement */ protected function loadElementXml() { $xml = $this->xmlReader->readOuterXml(); return simplexml_load_string('<?xml version="1.0" encoding="UTF-8"?>' . $xml); }
public static function Decode($SOAPResponse, &$isSOAPFault) { $responseXML = ""; try { if (empty($SOAPResponse)) { throw new Exception("Given Response is not a valid SOAP response."); } $xmlDoc = new XMLReader(); $res = $xmlDoc->XML($SOAPResponse); if ($res) { while (trim(strtoupper($xmlDoc->localName)) != self::$SOAPBody) { $isNotEnd = $xmlDoc->read(); if (!$isNotEnd) { break; } } if (!$isNotEnd) { $isSOAPFault = true; $soapFault = new FaultMessage(); $errorData = new ErrorData(); $errorData->errorId = 'Given Response is not a valid SOAP response.'; $errorData->message = 'Given Response is not a valid SOAP response.'; $soapFault->error = $errorData; return $soapFault; } $responseXML = $xmlDoc->readInnerXml(); $xmlDOM = new DOMDocument(); $xmlDOM->loadXML($responseXML); $count = 0; $xmlDoc->read(); $isSOAPFault = trim(strtoupper($xmlDoc->localName)) == self::$SOAPFault; if ($isSOAPFault) { while (trim(strtoupper($xmlDoc->localName)) != self::$SOAPFaultMessage) { $isNotEnd = $xmlDoc->read(); if (!$isNotEnd) { break; } } $xmlDOM->loadXML($xmlDoc->readOuterXml()); } switch ($xmlDoc->nodeType) { case XMLReader::ELEMENT: $nodeName = $xmlDoc->localName; $prefix = $xmlDoc->prefix; if (class_exists($nodeName)) { $xmlNodes = $xmlDOM->getElementsByTagName($nodeName); foreach ($xmlNodes as $xmlNode) { //$xmlNode->prefix = ""; $xmlNode->setAttribute("_class", $nodeName); $xmlNode->setAttribute("_type", "object"); } } break; } $responseXML = $xmlDOM->saveXML(); $unserializer = new XML_Unserializer(); $unserializer->setOption(XML_UNSERIALIZER_OPTION_COMPLEXTYPE, 'object'); $res = $unserializer->unserialize($responseXML, false); if ($res) { $responseXML = $unserializer->getUnserializedData(); } $xmlDoc->close(); } else { throw new Exception("Given Response is not a valid SOAP response."); } } catch (Exception $ex) { throw $ex; throw new Exception("Error occurred while Soap decoding: " . $ex->getMessage()); } return $responseXML; }
/** * {@inheritdoc} */ public function parse($xmlString) { if ($this->validateResponse) { XmlChecker::isValid($xmlString); } $useErrors = libxml_use_internal_errors(true); $xml = new \XMLReader(); $xml->xml($xmlString, 'UTF-8', LIBXML_COMPACT | LIBXML_NOCDATA | LIBXML_NOBLANKS | LIBXML_PARSEHUGE); $xml->setParserProperty(\XMLReader::VALIDATE, false); $xml->setParserProperty(\XMLReader::LOADDTD, false); // This following assignments are auto-generated using Fxmlrpc\Serialization\CodeGenerator\XmlReaderParserBitmaskGenerator // Don’t edit manually static $flagmethodResponse = 0b1; static $flagparams = 0b10; static $flagfault = 0b100; static $flagparam = 0b1000; static $flagvalue = 0b10000; static $flagarray = 0b100000; static $flagmember = 0b1000000; static $flagname = 0b10000000; ${'flag#text'} = 0b100000000; static $flagstring = 0b1000000000; static $flagstruct = 0b10000000000; static $flagint = 0b100000000000; static $flagbiginteger = 0b1000000000000; static $flagi8 = 0b10000000000000; static $flagi4 = 0b100000000000000; static $flagi2 = 0b1000000000000000; static $flagi1 = 0b10000000000000000; static $flagboolean = 0b100000000000000000; static $flagdouble = 0b1000000000000000000; static $flagfloat = 0b10000000000000000000; static $flagbigdecimal = 0b100000000000000000000; ${'flagdateTime.iso8601'} = 0b1000000000000000000000; static $flagdateTime = 0b10000000000000000000000; static $flagbase64 = 0b100000000000000000000000; static $flagnil = 0b1000000000000000000000000; static $flagdom = 0b10000000000000000000000000; static $flagdata = 0b100000000000000000000000000; // End of auto-generated code $aggregates = []; $depth = 0; $nextExpectedElements = 0b1; $i = 0; $isFault = false; while ($xml->read()) { ++$i; $nodeType = $xml->nodeType; if ($nodeType === \XMLReader::COMMENT || $nodeType === \XMLReader::DOC_TYPE || $nodeType === \XMLReader::SIGNIFICANT_WHITESPACE && ($nextExpectedElements & 0b100000000) !== 0b100000000) { continue; } if ($nodeType === \XMLReader::ENTITY_REF) { return ''; } $tagName = $xml->localName; if ($nextExpectedElements !== null && ($flag = isset(${'flag' . $tagName}) ? ${'flag' . $tagName} : -1) && ($nextExpectedElements & $flag) !== $flag) { throw new UnexpectedTagException($tagName, $nextExpectedElements, get_defined_vars(), $xml->depth, $xml->readOuterXml()); } processing: switch ($nodeType) { case \XMLReader::ELEMENT: switch ($tagName) { case 'methodResponse': // Next: params, fault $nextExpectedElements = 0b110; break; case 'params': // Next: param $nextExpectedElements = 0b1000; $aggregates[$depth] = []; break; case 'fault': $isFault = true; // Break intentionally omitted // Break intentionally omitted case 'param': // Next: value $nextExpectedElements = 0b10000; break; case 'array': $aggregates[++$depth] = []; // Break intentionally omitted // Break intentionally omitted case 'data': // Next: array, data, value $nextExpectedElements = 0b100000000000000000000110000; break; case 'struct': // Next: struct, member, value $nextExpectedElements = 0b10001010000; $aggregates[++$depth] = []; break; case 'member': // Next: name, value $nextExpectedElements = 0b10010000; $aggregates[++$depth] = []; break; case 'name': // Next: #text $nextExpectedElements = 0b100000000; $type = 'name'; break; case 'value': $nextExpectedElements = 0b11111111111111111100110000; $type = 'value'; $aggregates[$depth + 1] = ''; break; case 'base64': case 'string': case 'biginteger': case 'i8': case 'dateTime.iso8601': case 'dateTime': // Next: value, $tagName, #text $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = ''; break; case 'nil': // Next: value, $tagName $nextExpectedElements = 0b1000000000000000000010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = null; break; case 'int': case 'i4': case 'i2': case 'i1': // Next: value, #text, $tagName $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = 0; break; case 'boolean': // Next: value, #text, $tagName $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = 'boolean'; $aggregates[$depth + 1] = false; break; case 'double': case 'float': case 'bigdecimal': // Next: value, #text, $tagName $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = 0.0; break; case 'dom': $type = 'dom'; // Disable type checking $nextExpectedElements = null; $aggregates[$depth + 1] = $xml->readInnerXml(); break; } break; case \XMLReader::END_ELEMENT: switch ($tagName) { case 'params': case 'fault': break 3; case 'param': // Next: params, param $nextExpectedElements = 0b1010; break; case 'value': $nextExpectedElements = 0b100100000011100100011011100; $aggregates[$depth][] = $aggregates[$depth + 1]; break; case 'array': case 'struct': --$depth; // Break intentionally omitted // Break intentionally omitted case 'string': case 'int': case 'biginteger': case 'i8': case 'i4': case 'i2': case 'i1': case 'boolean': case 'double': case 'float': case 'bigdecimal': case 'dateTime.iso8601': case 'dateTime': case 'base64': case 'nil': // Next: value $nextExpectedElements = 0b10000; break; case 'data': // Next: array $nextExpectedElements = 0b100000; break; case 'name': // Next: value, member $nextExpectedElements = 0b1010000; $aggregates[$depth]['name'] = $aggregates[$depth + 1]; break; case 'member': // Next: struct, member $nextExpectedElements = 0b10001000000; $aggregates[$depth - 1][$aggregates[$depth]['name']] = $aggregates[$depth][0]; unset($aggregates[$depth], $aggregates[$depth + 1]); --$depth; break; } break; case \XMLReader::TEXT: case \XMLReader::SIGNIFICANT_WHITESPACE: switch ($type) { case 'int': case 'i4': case 'i2': case 'i1': $value = (int) $xml->value; break; case 'boolean': $value = $xml->value === '1'; break; case 'double': case 'float': case 'bigdecimal': $value = (double) $xml->value; break; case 'dateTime.iso8601': $value = \DateTime::createFromFormat('Ymd\\TH:i:s', $xml->value, isset($timezone) ? $timezone : ($timezone = new \DateTimeZone('UTC'))); break; case 'dateTime': $value = \DateTime::createFromFormat('Y-m-d\\TH:i:s.uP', $xml->value, isset($timezone) ? $timezone : ($timezone = new \DateTimeZone('UTC'))); break; case 'base64': $value = Base64Value::deserialize($xml->value); break; case 'dom': $doc = new \DOMDocument('1.0', 'UTF-8'); $doc->loadXML($aggregates[$depth + 1]); $value = $doc; break; default: $value =& $xml->value; break; } $aggregates[$depth + 1] = $value; if ($nextExpectedElements === null) { break; } // Next: any $nextExpectedElements = 0b111111111111111111111111111; break; } if ($xml->isEmptyElement && $nodeType !== \XMLReader::END_ELEMENT) { $nodeType = \XMLReader::END_ELEMENT; goto processing; } } libxml_use_internal_errors($useErrors); $result = $aggregates ? array_pop($aggregates[0]) : null; if ($isFault) { throw FaultException::createFromResponse($result); } return $result; }
public function import($filename, $type = null, $external_source = null, $import_size = null) { if (!$filename) { $this->usageError('Import filename required'); } if (!file_exists($filename)) { $this->usageError("Cannot find import file " . $filename); } $type && ($this->type = $type); $external_source && ($this->external_source = $external_source); $import_size && ($this->import_size = $import_size); $connection = Yii::app()->db; $cmd = $connection->createCommand('ALTER TABLE medication_drug DISABLE KEYS;'); $cmd->execute(); $xr = new XMLReader(); $xr->open($filename); $count = 0; $rows = array(); $filter_regex = "/" . join('|', $this->filter_list) . "/i"; switch ($this->type) { case 'vtm': // get to the start while ($xr->read() && $xr->name !== 'VTM') { } // iterate through while ($xr->name === 'VTM') { $node = new SimpleXMLElement($xr->readOuterXml()); $rows[] = implode(",", array($connection->quoteValue($node->NM), $connection->quoteValue($node->VTMID), $connection->quoteValue('DMD-VTM'))); $xr->next('VTM'); if (++$count % $this->import_size == 0) { $this->importMD($rows); } } break; case 'vmp': // get to the start while ($xr->read() && $xr->name !== 'VMP') { } // iterate through while ($xr->name === 'VMP') { $node = new SimpleXMLElement($xr->readOuterXml()); if ($node->VTMID || preg_match($filter_regex, $node->NM)) { $xr->next('VMP'); continue; } $rows[] = implode(',', array($connection->quoteValue($node->NM), $connection->quoteValue($node->VPID), $connection->quoteValue('DMD-VMP'))); $xr->next('VMP'); if (++$count % $this->import_size == 0) { $this->importMD($rows); } } break; default: echo "Unrecognised format " . $this->type . "\n\n"; echo $this->getHelp(); } // be good $xr->close(); // import remainder if (count($rows)) { $this->importMD($rows); } // turn the indexes back on $cmd = $connection->createCommand('ALTER TABLE medication_drug ENABLE KEYS;'); $cmd->execute(); }
public function isValidXML() { $xml = new \XMLReader(); $xml->open($this->file); try { //Check if it is the right schema $xml->setSchema(__DIR__ . '/../gpx.xsd'); $this->xml = $xml; //Read the file to GPX element and open it with simpleXML while ($xml->read() && $xml->name === 'gpx') { $this->xml = new SimpleXMLElement($xml->readOuterXml()); } } catch (\Exception $e) { throw new InvalidFileException("ERROR invalid XML file!", InvalidFileException::FileXMLInvalid); } return TRUE; }
protected function processFile() { if (!$this->downloadFinished) { throw new \RuntimeException("File has not been downloaded yet."); } $xmlReader = new \XMLReader(); $mainNodeName = $this->getNodeName(); if ($this->tempFile) { $xmlReader->open($this->tempFile); } else { $xmlReader->XML($this->xml); } $elementIndex = 0; while (true) { $remainsAnything = $xmlReader->read(); if (!$remainsAnything) { break; } $nodeName = $xmlReader->name; $nodeType = $xmlReader->nodeType; if ($nodeType !== \XMLReader::ELEMENT or $nodeName !== $mainNodeName) { continue; } $nodeAsString = $xmlReader->readOuterXml(); if (!$nodeAsString) { continue; } $simpleXmlNode = simplexml_load_string($nodeAsString); if (!$simpleXmlNode) { continue; } $review = $this->processElement($simpleXmlNode, $elementIndex); if ($this->callback) { call_user_func_array($this->callback, array($review)); } $elementIndex++; } }
/** * Process the xml */ private function processXML() { $reader = new \XMLReader(); $reader->open(FRONTEND_FILES_PATH . '/wordpress.xml'); // Loop through the document while ($reader->read()) { // Start tag for item? if ($reader->name != 'item' && $reader->name != 'wp:author') { continue; } // End tag? if ($reader->nodeType == \XMLReader::END_ELEMENT) { continue; } // Get the raw XML $xmlString = $reader->readOuterXml(); // Read the XML as an SimpleXML-object /* @var \SimpleXMLElement $xml */ $xml = @simplexml_load_string($xmlString); // Skip element if it isn't a valid SimpleXML-object if ($xml === false) { continue; } // Is it really an item? if (substr($xmlString, 0, 5) == '<item') { // What type of content are we dealing with? switch ($xml->children('wp', true)->post_type) { case 'post': // Process as post $this->processPost($xml); break; case 'attachment': // Process as attachment $this->processAttachment($xml); break; default: // Don't do anything break; } } elseif (substr($xmlString, 0, 10) == '<wp:author') { // Process the authors $this->authors[(string) $xml->children('wp', true)->author_login] = array('id' => (string) $xml->children('wp', true)->author_id, 'login' => (string) $xml->children('wp', true)->author_login, 'email' => (string) $xml->children('wp', true)->author_email, 'display_name' => (string) $xml->children('wp', true)->author_display_name, 'first_name' => (string) $xml->children('wp', true)->author_first_name, 'last_name' => (string) $xml->children('wp', true)->author_last_name); } // End if (!$reader->read()) { break; } } // close $reader->close(); }
/** * Process the records xml * * @param XMLReader $xml XML File of records * * @return void */ protected function processRecords(&$xml) { while ($xml->read() && $xml->name !== $this->recordElem) { } $count = 0; $doc = new DOMDocument(); while ($xml->name == $this->recordElem) { ++$count; $expanded = $xml->expand(); if ($expanded === false) { $this->message('Failed to expand node: ' . $xml->readOuterXml(), false, Logger::ERROR); } else { $this->processRecord(simplexml_import_dom($doc->importNode($expanded, true)), $count); if ($count % 1000 == 0) { $this->message("{$count} records processed", true); } } $xml->next($this->recordElem); } }
/** * Validate the form * * @return void */ private function validateForm() { // is the form submitted? if ($this->frm->isSubmitted()) { // cleanup the submitted fields, ignore fields that were added by hackers $this->frm->cleanupFields(); // XML provided? if ($this->frm->getField('blogger')->isFilled()) { $this->frm->getField('blogger')->isAllowedExtension(array('xml'), BL::err('XMLFilesOnly')); } else { $this->frm->getField('blogger')->addError(BL::err('FieldIsRequired')); } // no errors? if ($this->frm->isCorrect()) { // move the file $this->frm->getField('blogger')->moveFile(FRONTEND_FILES_PATH . '/blogger.xml'); // init object $reader = new XMLReader(); // open the file $reader->open(FRONTEND_FILES_PATH . '/blogger.xml'); // start reading $reader->read(); // loop through the document while (true) { // start tag for entry? if ($reader->name == 'entry') { // end tag? if ($reader->nodeType == XMLReader::END_ELEMENT) { $reader->next(); } // get the raw XML $xmlString = $reader->readOuterXml(); // is it really an entry? if (substr($xmlString, 0, 6) == '<entry') { // read the XML as an SimpleXML-object $xml = @simplexml_load_string($reader->readOuterXml()); // validate if ($xml !== false) { // loop the categories foreach ($xml->category as $category) { // post if ($category['term'] == 'http://schemas.google.com/blogger/2008/kind#post') { // process the post $this->processXMLAsPost($xml); // stop looping break; } // comment if ($category['term'] == 'http://schemas.google.com/blogger/2008/kind#comment') { // process the post $this->processXMLAsComment($xml); // stop looping break; } } } } } // end if (!$reader->read()) { break; } } // close $reader->close(); // recalculate the comments BackendBlogModel::reCalculateCommentCount($this->newIds); // remove the file SpoonFile::delete(FRONTEND_FILES_PATH . '/blogger.xml'); // everything is saved, so redirect to the overview $this->redirect(BackendModel::createURLForAction('index') . '&report=imported'); } } }
/** * Builds an in-memory array containing all the shared strings of the worksheet. * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. * It is then accessed by the worksheet data, via the string index in the built table. * * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx * * The XML file can be really big with worksheets containing a lot of data. That is why * we need to use a XML reader that provides streaming like the XMLReader library. * Please note that SimpleXML does not provide such a functionality but since it is faster * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. * * @return void * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read */ public function extractSharedStrings() { $xmlReader = new \XMLReader(); $sharedStringIndex = 0; $this->tempFilePointer = null; $escaper = new \Box\Spout\Common\Escaper\XLSX(); $sharedStringsFilePath = $this->getSharedStringsFilePath(); if ($xmlReader->open($sharedStringsFilePath, null, LIBXML_NOENT | LIBXML_NONET) === false) { throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); } while ($xmlReader->read() && $xmlReader->name !== 'si') { // do nothing until a 'si' tag is reached } while ($xmlReader->name === 'si') { $node = new \SimpleXMLElement($xmlReader->readOuterXml()); $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); // removes nodes that should not be read, like the pronunciation of the Kanji characters $cleanNode = $this->removeSuperfluousTextNodes($node); // find all text nodes 't'; there can be multiple if the cell contains formatting $textNodes = $cleanNode->xpath('//ns:t'); $textValue = ''; foreach ($textNodes as $textNode) { if ($this->shouldPreserveWhitespace($textNode)) { $textValue .= $textNode->__toString(); } else { $textValue .= trim($textNode->__toString()); } } $unescapedTextValue = $escaper->unescape($textValue); // The shared string retrieval logic expects each cell data to be on one line only // Encoding the line feed character allows to preserve this assumption $lineFeedEncodedTextValue = $this->escapeLineFeed($unescapedTextValue); $this->writeSharedStringToTempFile($lineFeedEncodedTextValue, $sharedStringIndex); $sharedStringIndex++; // jump to the next 'si' tag $xmlReader->next('si'); } // close pointer to the last temp file that was written if ($this->tempFilePointer) { fclose($this->tempFilePointer); } $xmlReader->close(); }
public function readOuterXml() { return parent::readOuterXml(); }