/** * @param FeedTypeInterface $type * @param OutputInterface $output * * @return int */ protected function validate(FeedTypeInterface $type, OutputInterface $output) { $file = $this->exporter->getFeedFilename($type); if (!file_exists($file)) { throw new FileNotFoundException(sprintf('<error>Feed "%s" has not yet been exported</error>', $type->getName())); } $options = LIBXML_NOENT | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING; $this->reader = new \XMLReader($options); $this->reader->open($file); $this->reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true); // foreach ($type->getNamespaces() as $name => $location) { // $this->reader->setSchema($location); // } libxml_clear_errors(); libxml_use_internal_errors(true); libxml_disable_entity_loader(true); $progress = new ProgressBar($output); $progress->start(); // go through the whole thing while ($this->reader->read()) { if ($this->reader->nodeType === \XMLReader::ELEMENT && $this->reader->name === $type->getItemNode()) { $progress->advance(); $this->currentItem = $this->reader->readOuterXml(); } if ($error = libxml_get_last_error()) { throw new \RuntimeException(sprintf('[%s %s] %s (in %s - line %d, column %d)', LIBXML_ERR_WARNING === $error->level ? 'WARNING' : 'ERROR', $error->code, trim($error->message), $error->file ? $error->file : 'n/a', $error->line, $error->column)); } } $progress->finish(); }
/** * Open a file process. * * @param string $file * @return boolean */ public function open($file) { if (!file_exists($file)) { throw new Exception(sprintf('Unable to open file %s', $file)); } return $this->process->open($file); }
private function open() { if (!$this->reader) { $this->reader = new \XMLReader(); $this->reader->open($this->xmlFilePath); } }
/** * @throws RuntimeException * @return array of arrays. * array( 'dumpKey' => array( 'match1', 'match2' ) ) */ public function scan() { $openSuccess = $this->reader->open($this->dumpLocation); if (!$openSuccess) { throw new RuntimeException('Failed to open XML: ' . $this->dumpLocation); } $result = array(); foreach ($this->query as $queryKey => $query) { $result[$queryKey] = array(); // Make sure keys are returned even if empty } while ($this->reader->read() && $this->reader->name !== 'page') { } while ($this->reader->name === 'page') { $element = new SimpleXMLElement($this->reader->readOuterXML()); $page = $this->getPageFromElement($element); foreach ($this->query as $queryKey => $query) { $match = $this->matchPage($page, $query); if ($match) { //TODO allow the user to choose what to return $result[$queryKey][] = $page->getTitle()->getTitle(); } } $this->reader->next('page'); } $this->reader->close(); return $result; }
/** * Instantiate our GPX handler, and load the specified filename * * @param string $gpxFilename Load the specified filename into our GPX handler * @throws \Exception Unable to load the specified file **/ public function __construct($gpxFilename) { if (!file_exists($gpxFilename)) { throw new \Exception(sprintf('File "%s" does not exist', $gpxFilename)); } $this->gpxReader = new \XMLReader(); $this->gpxReader->open($gpxFilename); }
/** * @return void */ public function rewind() { $this->reader->open($this->file, 'utf-8', defined('LIBXML_COMPACT') ? constant('LIBXML_COMPACT') : 0); while ($this->reader->read()) { if ($this->valid()) { break; } } }
private function initializeReader($pathToFile) { FileHelper::ensureIsReadable($pathToFile); $this->reader = new \XMLReader(); $success = $this->reader->open($pathToFile); if (!$success) { throw new ImporterException('Ошибка открытия XML файла по адресу: ' . $pathToFile); } }
/** * @param string $xml */ public function __construct($xml) { $this->xml = new XMLReader(); if (preg_match('/^<\\?xml/', trim($xml))) { $this->xml->XML($xml); } else { $this->xml->open($xml); } $this->parse(); }
/** * Constructor * * Creates an SVGReader drawing from the source provided * @param string $source URI from which to read * @throws MWException|Exception */ function __construct($source) { global $wgSVGMetadataCutoff; $this->reader = new XMLReader(); // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. $size = filesize($source); if ($size === false) { throw new MWException("Error getting filesize of SVG."); } if ($size > $wgSVGMetadataCutoff) { $this->debug("SVG is {$size} bytes, which is bigger than {$wgSVGMetadataCutoff}. Truncating."); $contents = file_get_contents($source, false, null, -1, $wgSVGMetadataCutoff); if ($contents === false) { throw new MWException('Error reading SVG file.'); } $this->reader->XML($contents, null, LIBXML_NOERROR | LIBXML_NOWARNING); } else { $this->reader->open($source, null, LIBXML_NOERROR | LIBXML_NOWARNING); } // Expand entities, since Adobe Illustrator uses them for xmlns // attributes (bug 31719). Note that libxml2 has some protection // against large recursive entity expansions so this is not as // insecure as it might appear to be. However, it is still extremely // insecure. It's necessary to wrap any read() calls with // libxml_disable_entity_loader() to avoid arbitrary local file // inclusion, or even arbitrary code execution if the expect // extension is installed (bug 46859). $oldDisable = libxml_disable_entity_loader(true); $this->reader->setParserProperty(XMLReader::SUBST_ENTITIES, true); $this->metadata['width'] = self::DEFAULT_WIDTH; $this->metadata['height'] = self::DEFAULT_HEIGHT; // The size in the units specified by the SVG file // (for the metadata box) // Per the SVG spec, if unspecified, default to '100%' $this->metadata['originalWidth'] = '100%'; $this->metadata['originalHeight'] = '100%'; // Because we cut off the end of the svg making an invalid one. Complicated // try catch thing to make sure warnings get restored. Seems like there should // be a better way. MediaWiki\suppressWarnings(); try { $this->read(); } catch (Exception $e) { // Note, if this happens, the width/height will be taken to be 0x0. // Should we consider it the default 512x512 instead? MediaWiki\restoreWarnings(); libxml_disable_entity_loader($oldDisable); throw $e; } MediaWiki\restoreWarnings(); libxml_disable_entity_loader($oldDisable); }
/** * {@inheritdoc} */ public function rewind() { $this->position = 0; $this->reader = new XMLReader(); $this->reader->open($this->file); $this->reader->read(); $this->reader->next(); $this->reader->read(); $this->reader->next(); $this->reader->next(); while ($this->reader->read() && $this->reader->name !== 'product') { } }
public function __construct($inFile) { $this->_reader = new XMLReader(); $this->_reader->open($inFile); $this->_reader->read(); // Check that we are in the right place while ($this->_reader->read()) { if ($this->_reader->nodeType == XMLReader::ELEMENT) { break; } } if (!($this->_reader->nodeType == XMLReader::ELEMENT && $this->_reader->name == 'roboml_toc')) { throw new ErrorException("Unexpected XML node: {$this->_reader->name}"); } }
public function actionFias() { $file = 'AS_ADDROBJ_20160609_c5080ba4-9f46-4b6e-aecc-72a630730b3a.XML'; $interestingNodes = array('AOGUID'); $xmlObject = new \XMLReader(); $xmlObject->open($file); header('Content-Type: text/html; charset=utf-8'); $i = 0; while ($xmlObject->read()) { if ($xmlObject->name == 'Object') { if ($xmlObject->getAttribute('IFNSFL') == '8603') { // if (($xmlObject->getAttribute('PARENTGUID') == '0bf0f4ed-13f8-446e-82f6-325498808076' && $xmlObject->getAttribute('AOLEVEL') == '7') || $xmlObject->getAttribute('AOGUID') == '0bf0f4ed-13f8-446e-82f6-325498808076') { $fias = new Fias(); $fias->AOGUID = $xmlObject->getAttribute('AOGUID'); $fias->OFFNAME = $xmlObject->getAttribute('OFFNAME'); $fias->SHORTNAME = $xmlObject->getAttribute('SHORTNAME'); $fias->IFNSFL = $xmlObject->getAttribute('IFNSFL'); $fias->AOLEVEL = $xmlObject->getAttribute('AOLEVEL'); $fias->PARENTGUID = $xmlObject->getAttribute('PARENTGUID'); if ($fias->validate()) { $fias->save(); } else { var_dump($fias->attributes); var_dump($fias->getErrors()); } // $i++; } } } echo 'ok'; $xmlObject->close(); }
/** * Method to load a URI into the feed reader for parsing. * * @param string $uri The URI of the feed to load. Idn uris must be passed already converted to punycode. * * @return JFeedReader * * @since 12.3 * @throws InvalidArgumentException * @throws RuntimeException */ public function getFeed($uri) { // Create the XMLReader object. $reader = new XMLReader(); // Open the URI within the stream reader. if (!@$reader->open($uri, null, LIBXML_NOERROR | LIBXML_ERR_NONE | LIBXML_NOWARNING)) { // Retry with JHttpFactory that allow using CURL and Sockets as alternative method when available // Adding a valid user agent string, otherwise some feed-servers returning an error $options = new \joomla\Registry\Registry(); $options->set('userAgent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0'); $connector = JHttpFactory::getHttp($options); $feed = $connector->get($uri); if ($feed->code != 200) { throw new RuntimeException('Unable to open the feed.'); } // Set the value to the XMLReader parser if (!$reader->xml($feed->body, null, LIBXML_NOERROR | LIBXML_ERR_NONE | LIBXML_NOWARNING)) { throw new RuntimeException('Unable to parse the feed.'); } } try { // Skip ahead to the root node. while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT) { break; } } } catch (Exception $e) { throw new RuntimeException('Error reading feed.', $e->getCode(), $e); } // Setup the appopriate feed parser for the feed. $parser = $this->_fetchFeedParser($reader->name, $reader); return $parser->parse(); }
/** * Parses a specific XML file * * @param string $inputFile File to parse * @return \Generator */ public function parse($inputFile) { $DCNamespace = 'http://purl.org/rss/1.0/modules/content/'; $WPNamespace = 'http://wordpress.org/export/1.2/'; $reader = new \XMLReader(); $dom = new \DOMDocument('1.0', 'UTF-8'); $reader->open($inputFile); while ($reader->read() && $reader->name !== 'item') { } while ($reader->name == 'item') { $xml = simplexml_import_dom($dom->importNode($reader->expand(), true)); $wpItems = $xml->children($WPNamespace); $content = $xml->children($DCNamespace)->encoded; $categories = []; $tags = []; foreach ($xml->category as $category) { if ('category' == $category->attributes()->domain) { $categories[] = (string) $category; } if ('post_tag' == $category->attributes()->domain) { $tags[] = (string) $category; } } if ($wpItems) { $post_type = (string) $wpItems->post_type; $data = ['type' => $post_type, 'post_date' => new \DateTime((string) $wpItems->post_date), 'title' => (string) $xml->title, 'content' => (string) $content, 'tags' => $tags, 'categories' => $categories]; (yield $data); } $reader->next('item'); } }
function xsl() { $this->data['title'] = 'Trends XSL'; $this->data['pagebody'] = 'vtrendsxsl'; //obtains view template data $this->load->helper('display'); //loads the helper functionality $this->data['myxml'] = display_file('./data/xml/energy.xml'); //displays the contents of the xml file in the myxml placeholder $this->data['xmltable'] = xsl_transform('./data/xml/energy.xml', './data/xml/energy.xsl'); $this->data['eproduced'] = xsl_transform('./data/xml/energy.xml', './data/xml/energy2.xsl'); $this->data['eused'] = xsl_transform('./data/xml/energy.xml', './data/xml/energy3.xsl'); $doc = new DOMDocument(); //$doc->validateOnParse = true; $doc->load('./data/xml/energy.xml'); $xml = XMLReader::open('./data/xml/energy.xml'); $xml->setSchema('./data/xml/energy.xsd'); // You must to use it $xml->setParserProperty(XMLReader::VALIDATE, true); libxml_use_internal_errors(true); if ($xml->isValid()) { $this->data['validatedxml'] = '<br/>XML Valid <br/><br/>'; } else { $result = "<b>ERROR</b><br/>"; foreach (libxml_get_errors() as $error) { $result .= $error->message . '<br/>'; } libxml_clear_errors(); $result .= '<br/>'; $this->data['validatedxml'] = $result; } $this->render(); //renders the page }
/** * @param string $file * @param int $options */ protected function open($file, $options = null) { if (is_null($options)) { $options = LIBXML_NOENT | LIBXML_NONET | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING; } $this->reader->open($file, null, $options); }
public static function generateAcronymInfo($filename) { static $info; if ($info) { return $info; } $r = new \XMLReader(); if (!$r->open($filename)) { throw new \Exception("Could not open file for accessing acronym information: {$filename}"); } $acronyms = array(); while ($r->read()) { if ($r->nodeType != \XMLReader::ELEMENT) { continue; } if ($r->name == "term") { $r->read(); $k = $r->value; $acronyms[$k] = ""; } else { if ($r->name == "simpara") { $r->read(); $acronyms[$k] = $r->value; } } } $info = $acronyms; return $acronyms; }
public static function generateAcronymInfo($filename) { static $info; if ($info) { return $info; } if (!is_file($filename)) { v("Can't find acronym file (%s), skipping", $filename, E_USER_WARNING); return array(); } $r = new \XMLReader(); if (!$r->open($filename)) { v("Could not open file for accessing acronym information (%s)", $filename, E_USER_ERROR); } $acronyms = array(); while ($r->read()) { if ($r->nodeType != \XMLReader::ELEMENT) { continue; } if ($r->name == "term") { $r->read(); $k = $r->value; $acronyms[$k] = ""; } else { if ($r->name == "simpara") { $r->read(); $acronyms[$k] = $r->value; } } } $info = $acronyms; return $acronyms; }
function isXml($filename) { $xml = new XMLReader(); $xml->open($filename); $xml->setParserProperty(XMLReader::VALIDATE, true); return $xml->isValid(); }
protected function runRestoreFromXML($strRestoreFile) { // Unzip XML $objGzFile = gzopen(TL_ROOT . "/" . $strRestoreFile, "r"); $objXMLFile = new File("system/tmp/" . basename($strRestoreFile) . ".xml"); $objXMLFile->write(""); $objXMLFile->close(); while (true) { $strConten = gzread($objGzFile, 500000); if ($strConten == false || empty($strConten)) { break; } $objXMLFile->append($strConten, ""); $objXMLFile->close(); } // Read XML $this->objXMLReader = new XMLReader(); $this->objXMLReader->open(TL_ROOT . "/system/tmp/" . basename($strRestoreFile) . ".xml"); while ($this->objXMLReader->read()) { switch ($this->objXMLReader->nodeType) { case XMLReader::ELEMENT: switch ($this->objXMLReader->localName) { case "structure": $arrRestoreTables = $this->doRestoreStructure(); break; case "data": $this->doRestoreData(); break; } break; } } $objXMLFile->delete(); return $arrRestoreTables; }
function __construct($filename) { if (is_file($filename)) { $this->filename = $filename; $xml = new \XMLReader(); if (false !== $xml->open($filename)) { $this->xml = $xml; $rootCategory = Category::findOne(['parent_id' => 0]); if (empty($rootCategory)) { if (null === ($rootCategory = Category::createEmptyCategory(0, null, 'Каталог'))) { $this->xml->close(); $this->xml = null; } $this->rootCategoryCache = $rootCategory->id; } else { $this->rootCategoryCache = $rootCategory->id; } if (empty(static::$propertiesCache)) { static::$propertiesCache = array_reduce(CommercemlGuid::find([['>', 'model_id', 0], ['type' => 'PROPERTY']])->all(), function ($result, $item) { $result[$item['guid']] = $item->property; return $result; }, []); } $this->objectProduct = Object::getForClass(Product::className()); } } }
/** * Method to load a URI into the feed reader for parsing. * * @param string $uri The URI of the feed to load. * * @return JFeedReader * * @since 3.0 * @throws InvalidArgumentException * @throws RuntimeException */ public function getFeed($uri) { // Make sure the file exists. try { $this->http->get($uri); } catch (RunTimeException $e) { throw new InvalidArgumentException('The file ' . $uri . ' does not exist.'); } // Create the XMLReader object. $reader = new XMLReader(); // Open the URI within the stream reader. if (!@$reader->open($uri, null, LIBXML_NOERROR | LIBXML_ERR_NONE | LIBXML_NOWARNING)) { throw new RuntimeException('Unable to open the feed.'); } try { // Skip ahead to the root node. while ($reader->read() && $reader->nodeType !== XMLReader::ELEMENT) { } } catch (Exception $e) { throw new RuntimeException('Error reading feed.'); } // Setup the appopriate feed parser for the feed. $parser = $this->_fetchFeedParser($reader->name, $reader); return $parser->parse(); }
/** * Open the input file and position cursor at the beginning * @see $inputFile */ public function rewind() { $this->xmlReader = new XMLReader(); $this->xmlReader->open($this->inputFile); $this->currentElement = null; $this->currentElementId = null; }
public function runTest() { libxml_use_internal_errors(true); $xml = XMLReader::open(join(DIRECTORY_SEPARATOR, array($this->directory, $this->fileName))); $xml->setSchema(join(DIRECTORY_SEPARATOR, array($this->directory, $this->xsdFilename))); $this->logger->trace(__METHOD__); $this->logger->info(' XML file to test validity is ' . $this->fileName . 'using XSD file ' . $this->xsdFilename); // You have to parse the XML-file if you want it to be validated $currentReadCount = 1; $validationFailed = false; while ($xml->read() && $validationFailed == false) { // I want to break as soon as file is shown not to be valid // We could allow it to collect a few messages, but I think it's best // to do a manual check once we have discovered the file is not // correct. Speed is really what we want here! if ($currentReadCount++ % Constants::XML_PROCESSESING_CHECK_ERROR_COUNT == 0) { if (count(libxml_get_errors()) > 0) { $validationFailed = true; } } } if (count(libxml_get_errors()) == 0) { $this->testProperty->addTestResult(true); $this->logger->info(' RESULT Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] succeeded'); $this->testProperty->addTestResultDescription('Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] succeeded'); $this->testProperty->addTestResultReportDescription('Filen ' . $this->fileName . ' validerer mot filen' . $this->xsdFilename); } else { $this->testProperty->addTestResult(false); $this->logger->error(' RESULT Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] failed'); $this->testProperty->addTestResultDescription('Validation of [' . $this->fileName . '] against [' . $this->xsdFilename . '] failed'); $this->testProperty->addTestResultReportDescription('Filen ' . $this->fileName . ' validerer ikke mot filen' . $this->xsdFilename); } libxml_clear_errors(); }
/** * Processes given xml file by iterating over product nodes and extracting data into array * @param string $filePath * @return boolean */ public function processFile($filePath) { $messageHandler = Mage::getSingleton('xmlimport/messageHandler'); /* @var $productBuilder C4B_XmlImport_Model_Products_ProductBuilder */ $productBuilder = Mage::getModel('xmlimport/products_productBuilder'); $productNodePosition = 0; $xmlReader = new XMLReader(); $xmlReader->open($filePath); $products = array(); while ($xmlReader->read()) { if ($xmlReader->nodeType != XMLReader::ELEMENT || $xmlReader->name != self::XML_NODE_NAME_PRODUCT) { continue; } $productNodePosition++; $productData = $productBuilder->getProductData($xmlReader->expand()); if (count($productBuilder->getErrors()) > 0) { $messageHandler->addError("Product at position {$productNodePosition} has errors:"); } if ($productData == null) { $messageHandler->addError('Product will not be imported'); } else { foreach ($productData as $productDataRow) { $products[] = $productDataRow; } } $messageHandler->addErrorsForFile(basename($filePath), $productNodePosition, $productBuilder->getErrors()); } return $products; }
/** * Method to load a URI into the feed reader for parsing. * * @param string $uri The URI of the feed to load. Idn uris must be passed already converted to punycode. * * @return JFeedReader * * @since 12.3 * @throws InvalidArgumentException * @throws RuntimeException */ public function getFeed($uri) { // Create the XMLReader object. $reader = new XMLReader(); // Open the URI within the stream reader. if (!@$reader->open($uri, null, LIBXML_NOERROR | LIBXML_ERR_NONE | LIBXML_NOWARNING)) { // If allow_url_fopen is enabled if (ini_get('allow_url_fopen')) { // This is an error throw new RuntimeException('Unable to open the feed.'); } else { // Retry with JHttpFactory that allow using CURL and Sockets as alternative method when available $connector = JHttpFactory::getHttp(); $feed = $connector->get($uri); // Set the value to the XMLReader parser if (!$reader->xml($feed->body, null, LIBXML_NOERROR | LIBXML_ERR_NONE | LIBXML_NOWARNING)) { throw new RuntimeException('Unable to parse the feed.'); } } } try { // Skip ahead to the root node. while ($reader->read()) { if ($reader->nodeType == XMLReader::ELEMENT) { break; } } } catch (Exception $e) { throw new RuntimeException('Error reading feed.'); } // Setup the appopriate feed parser for the feed. $parser = $this->_fetchFeedParser($reader->name, $reader); return $parser->parse(); }
function eol_xml_stats() { $path = "http://localhost/eol_php_code/applications/content_server/resources/218.xml"; $reader = new \XMLReader(); $reader->open($path); $i = 0; $dist_count = 0; $taxa_count = 0; while (@$reader->read()) { if ($reader->nodeType == \XMLReader::ELEMENT && $reader->name == "taxon") { $string = $reader->readOuterXML(); $string = str_ireplace("dc:", "dc_", $string); $string = str_ireplace("dwc:", "dwc_", $string); if ($xml = simplexml_load_string($string)) { $taxa_with_dist = false; $taxon_id = (string) $xml->dc_identifier; print "[{$taxon_id}]"; foreach ($xml->dataObject as $o) { if (@$o->subject == "http://rs.tdwg.org/ontology/voc/SPMInfoItems#Distribution") { $dist_count++; $taxa_with_dist = true; } } if ($taxa_with_dist) { $taxa_count++; } } } } print "\n\n"; print "\n distribution: [{$dist_count}]"; print "\n taxa with dist: [" . $taxa_count . "]"; print "\n\n"; }
/** * Function takes path to SVG font (local path) and processes its xml * to get path representation of every character and additional * font parameters */ public function load($filename) { $this->glyphs = array(); $z = new XMLReader(); $z->open($filename); // move to the first <product /> node while ($z->read()) { $name = $z->name; if ($z->nodeType == XMLReader::ELEMENT) { if ($name == 'font') { $this->id = $z->getAttribute('id'); $this->horizAdvX = $z->getAttribute('horiz-adv-x'); } if ($name == 'font-face') { $this->unitsPerEm = $z->getAttribute('units-per-em'); $this->ascent = $z->getAttribute('ascent'); $this->descent = $z->getAttribute('descent'); } if ($name == 'glyph') { $unicode = $z->getAttribute('unicode'); $unicode = $this->utf8ToUnicode($unicode); $unicode = $unicode[0]; $this->glyphs[$unicode] = new stdClass(); $this->glyphs[$unicode]->horizAdvX = $z->getAttribute('horiz-adv-x'); if (empty($this->glyphs[$unicode]->horizAdvX)) { $this->glyphs[$unicode]->horizAdvX = $this->horizAdvX; } $this->glyphs[$unicode]->d = $z->getAttribute('d'); } } } }
/** * Reads the configuration file and creates the class attributes * */ protected function initialize() { $reader = new XMLReader(); $reader->open(parent::getConfigFilePath()); $reader->setRelaxNGSchemaSource(self::WURFL_CONF_SCHEMA); libxml_use_internal_errors(TRUE); while ($reader->read()) { if (!$reader->isValid()) { throw new Exception(libxml_get_last_error()->message); } $name = $reader->name; switch ($reader->nodeType) { case XMLReader::ELEMENT: $this->_handleStartElement($name); break; case XMLReader::TEXT: $this->_handleTextElement($reader->value); break; case XMLReader::END_ELEMENT: $this->_handleEndElement($name); break; } } $reader->close(); if (isset($this->cache["dir"])) { $this->logDir = $this->cache["dir"]; } }
public function testParseXmlFromHttpStream() { $stream = fopen('http://www.splunk.com/', 'rb'); $streamUri = Splunk_StreamStream::createUriForStream($stream); $xmlReader = new XMLReader(); $xmlReader->open($streamUri); }