/** * @param FeedTypeInterface $type * @param OutputInterface $output * * @return int */ protected function validate(FeedTypeInterface $type, OutputInterface $output) { $file = $this->exporter->getFeedFilename($type); if (!file_exists($file)) { throw new FileNotFoundException(sprintf('<error>Feed "%s" has not yet been exported</error>', $type->getName())); } $options = LIBXML_NOENT | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING; $this->reader = new \XMLReader($options); $this->reader->open($file); $this->reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true); // foreach ($type->getNamespaces() as $name => $location) { // $this->reader->setSchema($location); // } libxml_clear_errors(); libxml_use_internal_errors(true); libxml_disable_entity_loader(true); $progress = new ProgressBar($output); $progress->start(); // go through the whole thing while ($this->reader->read()) { if ($this->reader->nodeType === \XMLReader::ELEMENT && $this->reader->name === $type->getItemNode()) { $progress->advance(); $this->currentItem = $this->reader->readOuterXml(); } if ($error = libxml_get_last_error()) { throw new \RuntimeException(sprintf('[%s %s] %s (in %s - line %d, column %d)', LIBXML_ERR_WARNING === $error->level ? 'WARNING' : 'ERROR', $error->code, trim($error->message), $error->file ? $error->file : 'n/a', $error->line, $error->column)); } } $progress->finish(); }
function isXml($filename) { $xml = new XMLReader(); $xml->open($filename); $xml->setParserProperty(XMLReader::VALIDATE, true); return $xml->isValid(); }
/** * Constructor * * Creates an SVGReader drawing from the source provided * @param string $source URI from which to read * @throws MWException|Exception */ function __construct($source) { global $wgSVGMetadataCutoff; $this->reader = new XMLReader(); // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. $size = filesize($source); if ($size === false) { throw new MWException("Error getting filesize of SVG."); } if ($size > $wgSVGMetadataCutoff) { $this->debug("SVG is {$size} bytes, which is bigger than {$wgSVGMetadataCutoff}. Truncating."); $contents = file_get_contents($source, false, null, -1, $wgSVGMetadataCutoff); if ($contents === false) { throw new MWException('Error reading SVG file.'); } $this->reader->XML($contents, null, LIBXML_NOERROR | LIBXML_NOWARNING); } else { $this->reader->open($source, null, LIBXML_NOERROR | LIBXML_NOWARNING); } // Expand entities, since Adobe Illustrator uses them for xmlns // attributes (bug 31719). Note that libxml2 has some protection // against large recursive entity expansions so this is not as // insecure as it might appear to be. However, it is still extremely // insecure. It's necessary to wrap any read() calls with // libxml_disable_entity_loader() to avoid arbitrary local file // inclusion, or even arbitrary code execution if the expect // extension is installed (bug 46859). $oldDisable = libxml_disable_entity_loader(true); $this->reader->setParserProperty(XMLReader::SUBST_ENTITIES, true); $this->metadata['width'] = self::DEFAULT_WIDTH; $this->metadata['height'] = self::DEFAULT_HEIGHT; // The size in the units specified by the SVG file // (for the metadata box) // Per the SVG spec, if unspecified, default to '100%' $this->metadata['originalWidth'] = '100%'; $this->metadata['originalHeight'] = '100%'; // Because we cut off the end of the svg making an invalid one. Complicated // try catch thing to make sure warnings get restored. Seems like there should // be a better way. MediaWiki\suppressWarnings(); try { $this->read(); } catch (Exception $e) { // Note, if this happens, the width/height will be taken to be 0x0. // Should we consider it the default 512x512 instead? MediaWiki\restoreWarnings(); libxml_disable_entity_loader($oldDisable); throw $e; } MediaWiki\restoreWarnings(); libxml_disable_entity_loader($oldDisable); }
/** * @param $date * @return CurrencyRaw[] */ public function parse($date) { $xml = new XMLReader(); $url = $this->getUrl($date); $temp_file = tempnam(sys_get_temp_dir(), 'currency-source'); $fp = fopen($temp_file, 'w+'); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FILE, $fp); curl_exec($ch); curl_close($ch); fclose($fp); $xml->open($temp_file); $xml->setParserProperty(XMLReader::VALIDATE, false); Yii::log('Open XML from `' . $url . '`', CLogger::LEVEL_INFO, 'currency-parser'); $data = []; while ($xml->read()) { if ($xml->nodeType == XMLReader::ELEMENT && $xml->localName == $this->xmlElement) { $xmlRow = null; try { $xmlRow = new SimpleXMLElement($xml->readOuterXml()); } catch (Exception $e) { continue; } if ($rowObj = $this->parseRow($xmlRow)) { $data[$rowObj->num_code] = $rowObj; // Yii::log('Parsed XML row `' . json_encode($rowObj) . '`', CLogger::LEVEL_INFO, 'currency-parser'); } else { Yii::log('Error parsed XML row', CLogger::LEVEL_WARNING, 'currency-parser'); } } } @unlink($temp_file); return $data; }
/** * @param OutputInterface $output * @param \SplFileInfo $feed * @param string $filterExpression * * @return array<array, integer> */ protected function inspect(OutputInterface $output, \SplFileInfo $feed, $filterExpression) { $options = LIBXML_NOENT | LIBXML_NONET | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING; $this->reader = new \XMLReader($options); $this->reader->open($feed->getPathname()); $this->reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true); libxml_clear_errors(); libxml_use_internal_errors(true); libxml_disable_entity_loader(true); $total = 0; $results = []; $output->writeln(sprintf('Reading <comment>%s</comment>', $feed->getFilename())); if ($filterExpression) { $output->writeln(sprintf('Filtering nodes with expression "<info>%s</info>"', $filterExpression)); } $progress = new ProgressBar($output); $progress->start(); // go through the whole thing while ($this->reader->read()) { if ($this->reader->nodeType === \XMLReader::ELEMENT && $this->reader->name === 'listing') { $progress->advance(); ++$total; $node = $this->reader->expand(); $doc = new \DOMDocument(); $doc->appendChild($node); $xpath = new \DOMXPath($doc); $xpath->registerNamespace('x', $doc->lookupNamespaceUri($doc->namespaceURI)); $query = $xpath->evaluate($filterExpression, $node); $result = $query instanceof \DOMNodeList ? $query->length : !empty($query); if ($result) { $results[] = $node; } } } $progress->finish(); $output->writeln(''); return [$results, $total]; }
/** * @param string $fname the filename */ private function validateFromInput($xml, $isFile) { $reader = new XMLReader(); if ($isFile) { $s = $reader->open($xml, null, LIBXML_NOERROR | LIBXML_NOWARNING); } else { $s = $reader->XML($xml, null, LIBXML_NOERROR | LIBXML_NOWARNING); } if ($s !== true) { // Couldn't open the XML $this->wellFormed = false; } else { $oldDisable = libxml_disable_entity_loader(true); $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true); try { $this->validate($reader); } catch (Exception $e) { // Calling this malformed, because we didn't parse the whole // thing. Maybe just an external entity refernce. $this->wellFormed = false; $reader->close(); libxml_disable_entity_loader($oldDisable); throw $e; } $reader->close(); libxml_disable_entity_loader($oldDisable); } }
public function updateL10N() { //echo $this->html; // PHASE1: Convert span-level elements $depth = 1; $text = ""; echo $this->filename; $src = "<html>" . $this->html . "</html>"; /* Drop stupid ms-word quotes */ $src = str_replace("‘", "'", $src); $src = str_replace("’", "'", $src); $extractmode = true; $tf = new LinkORB_TranslationFile(); $tf->name = "markdown-topic:" . $this->filename; $tf->datatype = "x-linkorb-markdown-topic"; $tf->sourcelang = "en_US"; $tf->targetlang = "en_US"; $tf->tool = "x-lt-ldoc-updatel10n"; $l10npath = dirname($this->filename) . "/l10n/"; if (!file_exists($l10npath)) { mkdir($l10npath); } // Load previous trans-unit's if available if (file_exists($l10npath . basename($this->filename) . ".src.xlf")) { $tf->LoadXLIFF($l10npath . basename($this->filename) . ".src.xlf", true); // Set all transunit's to 'translate=false' foreach ($tf->transunit as $tu) { $tu->translate = "no"; $tu->comment = array(); $tu->filename = array(); } } $xhtml = new XMLReader(); $xhtml->setParserProperty('SUBST_ENTITIES', 0); $xhtml->xml($src); $skiptext = false; $inpre = false; while ($xhtml->read() && $depth != 0) { if (in_array($xhtml->nodeType, array(XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE, XMLReader::SIGNIFICANT_WHITESPACE))) { if (!$skiptext) { //$text .= htmlentities($xhtml->value); // Leaving entities as-is now $text .= $xhtml->value; } } // OPENING TAG if ($xhtml->nodeType == XMLReader::ELEMENT) { switch ($xhtml->name) { case "code": if (!$inpre) { $text .= "`"; } break; case "pre": $inpre = true; $text .= "<" . $xhtml->name . ">"; break; case "em": $text .= "*"; break; case "a": $text .= "[linktext](http://www.example.com)"; $skiptext = true; break; default: $text .= "<" . $xhtml->name . ">"; break; } $depth++; } // CLOSING TAG if ($xhtml->nodeType == XMLReader::END_ELEMENT) { switch ($xhtml->name) { case "code": if (!$inpre) { $text .= "`"; } break; case "pre": $inpre = false; $text .= "</" . $xhtml->name . ">"; break; case "em": $text .= "*"; break; case "a": $text .= ""; $skiptext = false; break; default: $text .= "</" . $xhtml->name . ">"; break; } $depth--; } } // PHASE2: Convert block-level elements $depth = 1; $xhtml = new XMLReader(); $text = str_replace("<br>", "<br />", $text); $xhtml->xml($text); file_put_contents("/tmp/html.txt", $text); echo $text; $text = ""; $firstliparagraph = false; $tagstack = array(); $liststack = array(); $inpre = false; $inblockquote = false; while ($xhtml->read() && $depth != 0) { if (in_array($xhtml->nodeType, array(XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE, XMLReader::SIGNIFICANT_WHITESPACE))) { if (!$skiptext) { $string = $xhtml->value; $indent = str_repeat("\t", count($liststack)); if ($inpre) { // Add indention, and insert text as-is (no translation etc) $string = trim(str_replace("\n", "\n\t" . $indent, $string)); $text .= $string; } else { // Remove linebreaks for all paragraphs, blockquotes, etc (all except pre) $string = str_replace("\n", " ", $string); // Remove reduntant spaces $string = str_replace(" ", " ", $string); $string = str_replace(" ", " ", $string); $string = str_replace(" ", " ", $string); $string = trim($string); if (trim($string, " \t\n") != "") { if (!$firstliparagraph) { $text .= $indent; } if ($inblockquote) { $text .= "> "; } // SEGMENTOR //$segment=explode(". ", $string); $segment = preg_split("/([.!]+\\s)/", $string . " ", null, PREG_SPLIT_DELIM_CAPTURE); $si = 0; while ($si < count($segment)) { $s = $segment[$si]; if ($si + 1 < count($segment)) { $si++; $s .= $segment[$si]; } $firstliparagraph = false; // Re-add closing punctuation //if ($si<count($segment)-1) $s.= "."; $s = trim($s); if ($s) { $extractmode = true; if ($extractmode) { $tu = $tf->GetTranslationUnit(null, $s); $tu->addComment("Topic: " . basename($this->filename)); $tu->addFilename("../" . basename($this->filename)); //$text .= "[START|" . $tu->id . "|" . str_replace("|", "(PIPE)", $s) . "|END]"; $text .= "[START|" . $tu->id . "|END]"; } else { $text .= "@" . $s . "@"; } } $si++; } } } } } // OPENING TAG if ($xhtml->nodeType == XMLReader::ELEMENT) { array_push($tagstack, array("name" => $xhtml->name)); //$text .="[" . $xhtml->name . "(d:" . count($tagstack) ." l:" . count($liststack) . ")]"; switch ($xhtml->name) { case "h0": case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": $text .= "\n" . str_repeat("#", (int) $xhtml->name[1]) . " "; break; case "p": break; case "pre": $inpre = true; //$text .= "PRE"; $text .= "\t"; break; case "blockquote": $inblockquote = true; //$text .= "BLOCKQUOTE"; break; case "ul": case "ol": array_push($liststack, array("name" => $xhtml->name)); break; case "li": $firstliparagraph = true; $text .= str_repeat("\t", count($liststack) - 1); if ($liststack[count($liststack) - 1]['name'] == "ol") { $text .= "#\t"; } else { $text .= "*\t"; } break; default: break; } $depth++; } // CLOSING TAG if ($xhtml->nodeType == XMLReader::END_ELEMENT) { array_pop($tagstack); //$text .="[/" . $xhtml->name . "]"; switch ($xhtml->name) { case "h0": case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": $text .= "\n\n"; break; case "p": $text .= "\n\n"; break; case "pre": $inpre = false; $text .= "\n\n"; break; case "blockquote": $inblockquote = false; break; case "li": $text .= "\n"; break; case "ul": case "ol": array_pop($liststack); // $text .= "\n"; break; default: break; } $depth--; } } // Strip redundant linebreaks $text = str_replace("\n\n\n\n", "\n\n", $text); $text = str_replace("\n\n\n", "\n\n", $text); file_put_contents($l10npath . basename($this->filename) . ".skl.md", $text); file_put_contents($l10npath . basename($this->filename) . ".src.xlf", $tf->ToXLIFF()); //echo "\n\n=====================================!!!!!!!!!!!!!\n" . $text; $locale = array(); $locale[] = "nl-NL"; $locale[] = "zh-CN"; $locale[] = "fr-FR"; $locale[] = "es-ES"; $locale[] = "ru-RU"; $locale[] = "de-DE"; $locale[] = "jp-JP"; $locale[] = "ar-EG"; // arabic Egypt $locale[] = "hi-IN"; // hindi India foreach ($locale as $l) { $tf = new LinkORB_TranslationFile(); $tf->name = "markdown-topic:" . $this->filename; $tf->name = "markdown-topic:" . $this->filename; $tf->datatype = "x-linkorb-markdown-topic"; $tf->sourcelang = "en_US"; $tf->targetlang = $l; $tf->tool = "x-lt-ldoc-updatel10n"; // Pretranslation $filename = $l10npath . basename($this->filename) . "." . $l . ".xlf"; if (file_exists($filename)) { $tf->LoadXLIFF($filename, true, dirname($filename)); foreach ($tf->transunit as $tu) { $tu->translate = "no"; $tu->comment = array(); $tu->filename = array(); if ($tu->target == "") { $tf->transunit[$tu->id]->export = false; } } } // Load new src trans-units on top $tf->LoadXLIFF($l10npath . basename($this->filename) . ".src.xlf", true, dirname($filename)); // Save file_put_contents($filename, $tf->ToXLIFF()); } // ------------- foreach ($locale as $l) { $o = file_get_contents($l10npath . basename($this->filename) . ".skl.md"); $tf = new LinkORB_TranslationFile(); $filename = $l10npath . basename($this->filename) . "." . $l . ".xlf"; if (file_exists($filename)) { $tf->LoadXLIFF($filename, true, dirname($filename)); foreach ($tf->transunit as $tu) { //$tag="[START|" . $tu->id . "|" . $tu->src . "|END]"; $tag = "[START|" . $tu->id . "|END]"; $target = trim($tu->target); if ($target == "") { $target = "@" . $tu->src . "@"; } //echo "$tag\n"; $o = str_replace($tag, $target, $o); } } file_put_contents($l10npath . basename($this->filename) . "." . $l . ".md", $o); } //exit ("END"); //return $text; }
private function validateFile($file_name) { $extension = pathinfo($file_name, PATHINFO_EXTENSION); $file_info = explode(".", $file_name); $file_info1 = explode("_", $file_info[0]); $file_id = $file_info1[1]; // verificam validitatea fisierului in fctie de extensie if ($extension == "xml") { $reader = new XMLReader(); $reader->open(DIR_FILE_FOR_MATCHING . $file_name); // Set parser options - you must set this in order to use isValid method $reader->setParserProperty(XMLReader::VALIDATE, TRUE); $a = 0; if (!$reader->isValid()) { // se sterge din baza de date si de pe server $this->model_account_customer_file_for_matching->deleteFile($file_id); $this->session->data['warning'] = $this->language->get('text_xml_file_is_not_valid'); return false; } else { // in cazul in care xml-ul este valid, verificam daca contine datele obligatorii specificate webservice-ului $xml = simplexml_load_file(DIR_FILE_FOR_MATCHING . $file_name); foreach ($xml->children()->children() as $child) { $this->xml_node[] = strtoupper(trim($child->getName())); } $xml_valid = 1; foreach ($this->mandatory_data as $value) { if (!in_array($value, $this->xml_node)) { $xml_valid = 0; break; } } if ($xml_valid == 0) { // se sterge din baza de date si de pe server $this->model_account_customer_file_for_matching->deleteFile($file_id); $this->session->data['warning'] = $this->language->get('text_xml_file_not_contain_mandatory_data'); return false; } else { return true; } } } else { if ($extension == "csv") { $csv_delimiter = $this->model_account_customer_file_for_matching->getCsvDelimiter($file_id); $filerow = array(); $filerow = @file(DIR_FILE_FOR_MATCHING . $file_name); $csv_header = explode($csv_delimiter, $filerow[0]); $csv_have_header = 1; // verificam daca fisierul csv are header foreach ($csv_header as $value) { if (is_numeric(trim($value))) { // se sterge din baza de date si de pe server $this->model_account_customer_file_for_matching->deleteFile($file_id); $this->session->data['warning'] = $this->language->get('text_csv_file_without_header'); return false; } } // transformam in litere mari $header = array(); foreach ($csv_header as $value) { $header[] = strtoupper(trim($value)); } // in cazul in care csv -ul are header, verificam daca headerul contine datele obligatorii specificate webservice-ului foreach ($this->mandatory_data as $value) { if (!in_array($value, $header)) { $csv_have_header = 0; break; } } if ($csv_have_header == 0) { // se sterge din baza de date si de pe server $this->model_account_customer_file_for_matching->deleteFile($file_id); $this->session->data['warning'] = $this->language->get('text_csv_file_not_contain_mandatory_data'); return false; } else { return true; } } } }
/** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns) * * @param string $pFilename * @throws PHPExcel_Reader_Exception */ public function listWorksheetInfo($pFilename) { // Check if file exists if (!file_exists($pFilename)) { throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist."); } $xml = new XMLReader(); $xml->open('compress.zlib://' . realpath($pFilename)); $xml->setParserProperty(2, true); $worksheetInfo = array(); while ($xml->read()) { if ($xml->name == 'gnm:Sheet' && $xml->nodeType == XMLReader::ELEMENT) { $tmpInfo = array('worksheetName' => '', 'lastColumnLetter' => 'A', 'lastColumnIndex' => 0, 'totalRows' => 0, 'totalColumns' => 0); while ($xml->read()) { if ($xml->name == 'gnm:Name' && $xml->nodeType == XMLReader::ELEMENT) { $xml->read(); // Move onto the value node $tmpInfo['worksheetName'] = (string) $xml->value; } elseif ($xml->name == 'gnm:MaxCol' && $xml->nodeType == XMLReader::ELEMENT) { $xml->read(); // Move onto the value node $tmpInfo['lastColumnIndex'] = (int) $xml->value; $tmpInfo['totalColumns'] = (int) $xml->value + 1; } elseif ($xml->name == 'gnm:MaxRow' && $xml->nodeType == XMLReader::ELEMENT) { $xml->read(); // Move onto the value node $tmpInfo['totalRows'] = (int) $xml->value + 1; break; } } $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']); $worksheetInfo[] = $tmpInfo; } } return $worksheetInfo; }
/** * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't * contain a doctype declaration which could contain a dos attack if we * parse it and expand internal entities (T85848). * * @param string $content xml string to check for parse safety * @return bool true if the xml is safe to parse, false otherwise */ private function checkParseSafety($content) { $reader = new XMLReader(); $result = null; // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed // instead of using XML(). $reader->open('data://text/plain,' . urlencode($content), null, LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET); $oldDisable = libxml_disable_entity_loader(true); /** @noinspection PhpUnusedLocalVariableInspection */ $reset = new ScopedCallback('libxml_disable_entity_loader', array($oldDisable)); $reader->setParserProperty(XMLReader::SUBST_ENTITIES, false); // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning // when parsing truncated XML, which causes unit tests to fail. MediaWiki\suppressWarnings(); while ($reader->read()) { if ($reader->nodeType === XMLReader::ELEMENT) { // Reached the first element without hitting a doctype declaration $this->parsable = self::PARSABLE_OK; $result = true; break; } if ($reader->nodeType === XMLReader::DOC_TYPE) { $this->parsable = self::PARSABLE_NO; $result = false; break; } } MediaWiki\restoreWarnings(); if (!is_null($result)) { return $result; } // Reached the end of the parsable xml without finding an element // or doctype. Buffer and try again. $this->parsable = self::PARSABLE_BUFFERING; $this->xmlParsableBuffer = $content; return false; }
public function testScrape() { $parser = new \Seld\JsonLint\JsonParser(); $googleScraper = Builder::create($this->engines[0], array(array('foo', 'baz'), 'google')); $outDir = $googleScraper->getOutDir(); $this->assertFalse($googleScraper->scrape('bar')); $this->assertFalse($googleScraper->scrape('baz', 100)); $this->assertFalse($googleScraper->scrape('baz', 1, 'baz')); $this->assertFalse($googleScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($googleScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertFalse($googleScraper->serialize('json')); $this->assertTrue($googleScraper->scrape('foo', 2, true, 'Europe/Berlin')); $this->assertCount(2, $googleScraper->getFetchedPages()); $this->assertCount(1, $googleScraper->getKeywords()); $this->assertTrue($googleScraper->scrape('baz', 2, true)); $this->assertCount(4, $googleScraper->getFetchedPages()); $this->assertCount(0, $googleScraper->getKeywords()); $this->assertFalse($googleScraper->scrapeAll()); $this->assertTrue($googleScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($googleScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $googleScraper->getFetchedPages()); $this->assertCount(0, $googleScraper->getKeywords()); $this->assertFalse($googleScraper->serialize('baz')); $this->assertTrue($googleScraper->serialize('json', true)); $this->assertCount(0, $googleScraper->getFetchedPages()); $this->assertCount(8, $googleScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages())); $this->assertTrue($googleScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } $this->assertTrue($googleScraper->addKeywords(array('foo bad'))); $this->assertTrue($googleScraper->scrapeAll(3, true)); $this->assertCount(3, $googleScraper->getFetchedPages()); $this->assertTrue($googleScraper->serialize('xml', true)); $this->assertCount(0, $googleScraper->getFetchedPages()); $this->assertCount(3, $googleScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages())); $this->assertTrue($googleScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $askScraper = Builder::create($this->engines[1], array(array('foo', 'baz'), 'ask')); $outDir = $askScraper->getOutDir(); $this->assertFalse($askScraper->scrape('bar')); $this->assertFalse($askScraper->scrape('baz', 100)); $this->assertFalse($askScraper->scrape('baz', 1, 'baz')); $this->assertFalse($askScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($askScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertTrue($askScraper->scrape('foo', 2, true, 'Europe/Rome')); $this->assertCount(2, $askScraper->getFetchedPages()); $this->assertCount(1, $askScraper->getKeywords()); $this->assertTrue($askScraper->scrape('baz', 2, true)); $this->assertCount(4, $askScraper->getFetchedPages()); $this->assertCount(0, $askScraper->getKeywords()); $this->assertFalse($askScraper->scrapeAll()); $this->assertTrue($askScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($askScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $askScraper->getFetchedPages()); $this->assertCount(0, $askScraper->getKeywords()); $this->assertFalse($askScraper->serialize('baz')); $this->assertTrue($askScraper->serialize('xml', true)); $this->assertCount(0, $askScraper->getFetchedPages()); $this->assertCount(8, $askScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages())); $this->assertTrue($askScraper->save(true)); $this->assertCount(0, $askScraper->getSerializedPages()); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $this->assertTrue($askScraper->addKeywords(array('foobaz'))); $this->assertTrue($askScraper->scrapeAll(3, true)); $this->assertTrue($askScraper->serialize('json', true)); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages())); $this->assertTrue($askScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } $bingScraper = Builder::create($this->engines[2], array(array('foo', 'baz'), 'bing')); $outDir = $bingScraper->getOutDir(); $this->assertFalse($bingScraper->scrape('bar')); $this->assertFalse($bingScraper->scrape('baz', 100)); $this->assertFalse($bingScraper->scrape('baz', 1, 'baz')); $this->assertFalse($bingScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($bingScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertFalse($bingScraper->serialize('json')); $this->assertTrue($bingScraper->scrape('foo', 2, true, 'Europe/Berlin')); $this->assertCount(2, $bingScraper->getFetchedPages()); $this->assertCount(1, $bingScraper->getKeywords()); $this->assertTrue($bingScraper->scrape('baz', 2, true)); $this->assertCount(4, $bingScraper->getFetchedPages()); $this->assertCount(0, $bingScraper->getKeywords()); $this->assertFalse($bingScraper->scrapeAll()); $this->assertTrue($bingScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($bingScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $bingScraper->getFetchedPages()); $this->assertCount(0, $bingScraper->getKeywords()); $this->assertFalse($bingScraper->serialize('baz')); $this->assertTrue($bingScraper->serialize('json', true)); $this->assertCount(0, $bingScraper->getFetchedPages()); $this->assertCount(8, $bingScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages())); $this->assertTrue($bingScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } $this->assertTrue($bingScraper->addKeywords(array('foo bad'))); $this->assertTrue($bingScraper->scrapeAll(2, true)); $this->assertCount(2, $bingScraper->getFetchedPages()); $this->assertTrue($bingScraper->serialize('xml', true)); $this->assertCount(0, $bingScraper->getFetchedPages()); $this->assertCount(2, $bingScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages())); $this->assertTrue($bingScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $yahooScraper = Builder::create($this->engines[3], array(array('foo', 'baz'), 'yahoo')); $outDir = $yahooScraper->getOutDir(); $this->assertFalse($yahooScraper->scrape('bar')); $this->assertFalse($yahooScraper->scrape('baz', 100)); $this->assertFalse($yahooScraper->scrape('baz', 1, 'baz')); $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertTrue($yahooScraper->scrape('foo', 2, true, 'Europe/Rome')); $this->assertCount(2, $yahooScraper->getFetchedPages()); $this->assertCount(1, $yahooScraper->getKeywords()); $this->assertTrue($yahooScraper->scrape('baz', 2, true)); $this->assertCount(4, $yahooScraper->getFetchedPages()); $this->assertCount(0, $yahooScraper->getKeywords()); $this->assertFalse($yahooScraper->scrapeAll()); $this->assertTrue($yahooScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($yahooScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $yahooScraper->getFetchedPages()); $this->assertCount(0, $yahooScraper->getKeywords()); $this->assertFalse($yahooScraper->serialize('baz')); $this->assertTrue($yahooScraper->serialize('xml', true)); $this->assertCount(0, $yahooScraper->getFetchedPages()); $this->assertCount(8, $yahooScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages())); $this->assertTrue($yahooScraper->save(true)); $this->assertCount(0, $yahooScraper->getSerializedPages()); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $this->assertTrue($yahooScraper->addKeywords(array('foobaz'))); $this->assertTrue($yahooScraper->scrapeAll(3, true)); $this->assertTrue($yahooScraper->serialize('json', true)); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages())); $this->assertTrue($yahooScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } }
/** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * @author Dom Hastings * @access public */ public function __construct($file, $options = array()) { // merge the options together $this->options = array_merge($this->options, is_array($options) ? $options : array()); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size'); // set the filename $this->file = $file; $is_html = false; $f = @fopen($file, "rb"); while (!@feof($f)) { $chunk = @fread($f, 1024); if (strpos($chunk, "<!DOCTYPE") === 0) { $is_html = true; } break; } @fclose($f); if ($is_html) { return; } if (empty($this->options['element']) or $this->options['get_cloud']) { //$founded_tags = array(); if (function_exists('stream_filter_register') and $this->options['filter']) { stream_filter_register('preprocessxml', 'preprocessXml_filter'); $path = 'php://filter/read=preprocessxml/resource=' . $this->file; } else { $path = $this->file; } $reader = new XMLReader(); $reader->open($path); $reader->setParserProperty(XMLReader::VALIDATE, false); while (@$reader->read()) { switch ($reader->nodeType) { case XMLREADER::ELEMENT: if (array_key_exists(str_replace(":", "_", $reader->localName), $this->cloud)) { $this->cloud[str_replace(":", "_", $reader->localName)]++; } else { $this->cloud[str_replace(":", "_", $reader->localName)] = 1; } //array_push($founded_tags, str_replace(":", "_", $reader->localName)); break; default: break; } } unset($reader); /*if (!empty($founded_tags)) { $element_counts = array_count_values($founded_tags); if (!empty($element_counts)){ foreach ($element_counts as $tag => $count) if (strpos($tag, ":") === false) $this->cloud[$tag] = $count; arsort($element_counts); } } */ if (!empty($this->cloud) and empty($this->options['element'])) { arsort($this->cloud); $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book'); foreach ($this->cloud as $element_name => $value) { if (in_array(strtolower($element_name), $main_elements)) { $this->options['element'] = $element_name; break; } } if (empty($this->options['element'])) { foreach ($this->cloud as $el => $count) { $this->options['element'] = $el; break; } } } } if (function_exists('stream_filter_register') and $this->options['filter']) { stream_filter_register('preprocessxml', 'preprocessXml_filter'); $path = 'php://filter/read=preprocessxml/resource=' . $this->file; } else { $path = $this->file; } $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); }
function fn_exim_1c_get_external_file($filename) { list($dir_1c, $dir_1c_url, $dir_1c_images) = fn_rus_exim_1c_get_dir_1c(); if (!is_dir($dir_1c)) { fn_mkdir($dir_1c); } $file_path = $dir_1c . $filename; if (Registry::get('addons.rus_exim_1c.exim_1c_schema_version') == '2.07') { if (file_exists($file_path) && extension_loaded('XMLReader')) { $xml = new XMLReader(); $xml->open($file_path); $xml->setParserProperty(XMLReader::VALIDATE, true); if (!$xml->isValid()) { @unlink($file_path); } } } if (fn_exim_1c_file_is_image($filename)) { if (!is_dir($dir_1c_images)) { fn_mkdir($dir_1c_images); } $file_path = $dir_1c_images . $filename; } $file = @fopen($file_path, 'w'); if (!$file) { return false; } fwrite($file, fn_get_contents('php://input')); fclose($file); return true; }
/** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * @author Dom Hastings * @access public */ public function __construct($file, $options = array(), $debug = false) { // merge the options together $this->options = array_merge($this->options, is_array($options) ? $options : array()); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size'); // set the filename $this->file = $file; $is_html = false; $f = @fopen($file, "rb"); while (!@feof($f)) { $chunk = @fread($f, 1024); if (strpos($chunk, "<!DOCTYPE") === 0) { $is_html = true; } break; } @fclose($f); if ($is_html) { $path = $this->get_file_path(); $this->is_404 = true; $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); return; } if (empty($this->options['element']) or $this->options['get_cloud']) { $path = $this->get_file_path(); $reader = new XMLReader(); $reader->open($path); $reader->setParserProperty(XMLReader::VALIDATE, false); while (@$reader->read()) { switch ($reader->nodeType) { case XMLREADER::ELEMENT: $localName = str_replace("_colon_", ":", $reader->localName); if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) { $this->cloud[str_replace(":", "_", $localName)]++; } else { $this->cloud[str_replace(":", "_", $localName)] = 1; } break; default: break; } } unset($reader); if (!empty($this->cloud) and empty($this->options['element'])) { arsort($this->cloud); $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0'); foreach ($this->cloud as $element_name => $value) { if (in_array(strtolower($element_name), $main_elements)) { $this->options['element'] = $element_name; break; } } if (empty($this->options['element'])) { foreach ($this->cloud as $el => $count) { $this->options['element'] = $el; break; } } } } $path = $this->get_file_path(); $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); }
<?php $indent = 5; /* Number of spaces to indent per level */ $xml = new XMLReader(); $xml->open("dtdexample.xml"); $xml->setParserProperty(XMLREADER::LOADDTD, TRUE); $xml->setParserProperty(XMLREADER::VALIDATE, TRUE); while ($xml->read()) { /* Print node name indenting it based on depth and $indent var */ print str_repeat(" ", $xml->depth * $indent) . $xml->name . "\n"; if ($xml->hasAttributes) { $attCount = $xml->attributeCount; print str_repeat(" ", $xml->depth * $indent) . " Number of Attributes: " . $xml->attributeCount . "\n"; } } print "\n\nValid:\n"; var_dump($xml->isValid());
$reader->XML($xmlstring); $reader->setParserProperty(XMLReader::DEFAULTATTRS, true); while ($reader->read() && $reader->nodeType != XMLReader::ELEMENT) { } var_dump($reader->getAttribute('bar')); var_dump($reader->getAttribute('baz')); $reader->close(); echo "\nUsing URI:\n"; $reader = new XMLReader(); $file = dirname(__FILE__) . '/012.xml'; if (DIRECTORY_SEPARATOR == '\\') { $file = str_replace('\\', "/", $file); } $reader->open($file); //$reader->setParserProperty(XMLReader::DEFAULTATTRS, true); while ($reader->read() && $reader->nodeType != XMLReader::ELEMENT) { } var_dump($reader->getAttribute('bar')); var_dump($reader->getAttribute('baz')); $reader->close(); $reader = new XMLReader(); $reader->open(dirname(__FILE__) . '/012.xml'); $reader->setParserProperty(XMLReader::DEFAULTATTRS, true); while ($reader->read() && $reader->nodeType != XMLReader::ELEMENT) { } var_dump($reader->getAttribute('bar')); var_dump($reader->getAttribute('baz')); $reader->close(); ?> ===DONE===
protected function createReader($content) { $reader = new \XMLReader(); $content = ltrim($content); if ($this->isXmlDocument($content)) { $reader->XML($content, null, LIBXML_NOBLANKS | LIBXML_DTDLOAD); } else { $success = @$reader->open($content, null, LIBXML_NOBLANKS | LIBXML_DTDLOAD); if (!$success) { throw new ParseException(sprintf('File "%s" doesn\'t exist or is unreadable', $content)); } } $reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true); return $reader; }
/** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * @author Dom Hastings * @access public */ public function __construct($file, $options = array(), $parser_type = false) { // merge the options together $this->options = array_merge($this->options, is_array($options) ? $options : array()); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size'); // set the filename $this->file = $file; $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type; $is_html = false; $f = @fopen($file, "rb"); while (!@feof($f)) { $chunk = @fread($f, 1024); if (strpos($chunk, "<!DOCTYPE") === 0) { $is_html = true; } break; } @fclose($f); if ($is_html) { $path = $this->get_file_path(); $this->is_404 = true; $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); return; } if (PMXI_Plugin::getInstance()->getOption('force_stream_reader')) { $this->parser_type = 'xmlstreamer'; } else { $input = new PMXI_Input(); $import_id = $input->get('id', 0); if (empty($import_id)) { $import_id = $input->get('import_id', 0); } if (!empty($import_id)) { $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type; $import = new PMXI_Import_Record(); $import->getById($import_id); if (!$import->isEmpty()) { $this->parser_type = empty($import->options['xml_reader_engine']) ? 'xmlreader' : 'xmlstreamer'; } } else { $this->parser_type = empty($parser_type) ? get_option('wpai_parser_type', 'xmlreader') : $parser_type; } } if (empty($this->options['element']) or $this->options['get_cloud']) { $path = $this->get_file_path(); if ($this->parser_type == 'xmlreader') { $reader = new XMLReader(); $reader->open($path); $reader->setParserProperty(XMLReader::VALIDATE, false); while (@$reader->read()) { switch ($reader->nodeType) { case XMLREADER::ELEMENT: $localName = str_replace("_colon_", ":", $reader->localName); if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) { $this->cloud[str_replace(":", "_", $localName)]++; } else { $this->cloud[str_replace(":", "_", $localName)] = 1; } break; default: break; } } unset($reader); } else { $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE); $parseroptions = array("extractContainer" => false); // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting. $parser = new Parser\StringWalker($parseroptions); // Create the streamer $streamer = new XmlStringStreamer($parser, $streamProvider); while ($node = $streamer->getNode()) { // $simpleXmlNode = simplexml_load_string($node); // echo (string)$simpleXmlNode->firstName; } $this->cloud = $parser->cloud; } if (!empty($this->cloud) and empty($this->options['element'])) { arsort($this->cloud); $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0'); foreach ($this->cloud as $element_name => $value) { if (in_array(strtolower($element_name), $main_elements)) { $this->options['element'] = $element_name; break; } } if (empty($this->options['element'])) { foreach ($this->cloud as $el => $count) { $this->options['element'] = $el; break; } } } } $path = $this->get_file_path(); if ($this->parser_type == 'xmlreader') { $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); } else { $parseroptions = array("uniqueNode" => $this->options['element']); $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE); $parser = new Parser\UniqueNode($parseroptions); $this->reader = new XmlStringStreamer($parser, $streamProvider); } }
/** * Performs XML transformation of the string given as argument * * @param string $xml Well-formed XML string to transform * @param string|array|\Closure $callback Name of either a callback function or * an array with indexes 0: class and 1: method that returns transformation * info for this tag. (As the function is called for each opening or * closing tag, it has to be efficient!) Function / method must accept 3 * arguments: * 1. Tag name * 2. Attributes as associative array (also provided for closing tags) * 3. One of the XMLTransformer::EL* constants to indicate the node type * The function must either false (in which case the tag itself and anything * inside it is completely ignored) or an array with 0 or more of these keys: * - "tag" can be a new tag name that will be used instead of the * original one. If false, the tag will be removed, but its child * nodes will be preserved. * - "@<name>" (where <name> is an attribute name) may be false (will * return the attribute) or a string, either starting with "@" (will * rename the attribute) or not starting with "@" (literal attr. value) * - "insbefore" inserts PCDATA before the opening tag * - "insstart" inserts PCDATA after the opening tag (i.e.: as a * new first child) * - "insend" inserts PCDATA directly before the closing tag * - "insafter" inserts PCDATA after the closing tag * - "transformOuter" This can be a closure that is passed the * transformed element including all contained elements as a string. * - "transformInner" This can be a closure that is passed the transformed * element's content as a string. * Anything for which neither false or an appropriate array * value is returned, is left unmodified. * @param bool $keepCData If false (default: true), CDATA content * is not retained as CDATA, but as PCDATA * with < and > and & escaped * * @return string XML string * @throws \InvalidArgumentException * @throws \RuntimeException */ public static function transformString($xml, $callback, $keepCData = true) { $xmltr = new static(); if (!self::checkCallback($callback)) { throw new \InvalidArgumentException('Callback must be function, method or closure'); } $xmltr->callback = $callback; $xmltr->keepCData = (bool) $keepCData; $r = new \XMLReader(); $r->XML($xml); $r->setParserProperty(\XMLReader::SUBST_ENTITIES, true); while ($r->read()) { switch ($r->nodeType) { case \XMLReader::ELEMENT: $xmltr->nodeOpen($r); break; case \XMLReader::END_ELEMENT: $xmltr->nodeClose($r); break; case \XMLReader::SIGNIFICANT_WHITESPACE: case \XMLReader::WHITESPACE: $xmltr->nodeContent($r->value); break; case \XMLReader::CDATA: $xmltr->cDataNodeContent($r->value); break; case \XMLReader::TEXT: $xmltr->nodeContent(htmlspecialchars($r->value)); } } $r->close(); return $xmltr->content; }
/** * Parses the input code and returns the OPT XML tree. * * @param String $filename The file name (for debug purposes) * @param String &$code The code to parse * @return Opt_Xml_Root */ public function parse($filename, &$code) { $debug = array(XMLReader::NONE => 'NONE', XMLReader::ELEMENT => 'ELEMENT', XMLReader::ATTRIBUTE => 'ATTRIBUTE', XMLReader::TEXT => 'TEXT', XMLReader::CDATA => 'CDATA', XMLReader::ENTITY_REF => 'ENTITY_REF', XMLReader::ENTITY => 'ENTITY', XMLReader::PI => 'PI', XMLReader::COMMENT => 'COMMENT', XMLReader::DOC => 'DOC', XMLReader::DOC_TYPE => 'DOC_TYPE', XMLReader::DOC_FRAGMENT => 'DOC_FRAGMENT', XMLReader::NOTATION => 'NOTATION', XMLReader::WHITESPACE => 'WHITESPACE', XMLReader::SIGNIFICANT_WHITESPACE => 'SIGNIFICANT_WHITESPACE', XMLReader::END_ELEMENT => 'END_ELEMENT', XMLReader::END_ENTITY => 'END_ENTITY', XMLReader::XML_DECLARATION => 'XML_DECLARATION'); libxml_use_internal_errors(true); $reader = new XMLReader(); $reader->xml($code); // $reader->setParserProperty(XMLReader::LOADDTD, true); // $reader->setParserProperty(XMLReader::VALIDATE, true); $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true); $root = $current = new Opt_Xml_Root(); $firstElementMatched = false; $depth = 0; // Thanks, Oh Great PHP for your excellent WARNINGS!!! >:( while (@$reader->read()) { if ($reader->depth < $depth) { $current = $current->getParent(); } elseif ($reader->depth > $depth) { $current = $optNode; } // Opl_Debug::write($debug[$reader->nodeType].': '.$reader->name.', '.$reader->value); switch ($reader->nodeType) { // XML elements case XMLReader::ELEMENT: $optNode = new Opt_Xml_Element($reader->name); // Parse element attributes, if you manage to get there if ($reader->moveToFirstAttribute()) { do { // "xmlns" special namespace must be handler somehow differently. if ($reader->prefix == 'xmlns') { $ns = str_replace('xmlns:', '', $reader->name); $root->addNamespace($ns, $reader->value); // Let this attribute to appear, if it does not represent an OPT special // namespace if (!$this->_compiler->isNamespace($ns)) { $optAttribute = new Opt_Xml_Attribute($reader->name, $reader->value); $optNode->addAttribute($optAttribute); } } else { $optAttribute = new Opt_Xml_Attribute($reader->name, $reader->value); $optNode->addAttribute($optAttribute); } } while ($reader->moveToNextAttribute()); $reader->moveToElement(); } // Set "rootNode" flag if (!$firstElementMatched) { $optNode->set('rootNode', true); $firstElementMatched = true; } // Set "single" flag if ($reader->isEmptyElement) { $optNode->set('single', true); } $current->appendChild($optNode); break; case XMLReader::TEXT: $this->_treeTextCompile($current, $reader->value); break; case XMLReader::COMMENT: $optNode = new Opt_Xml_Comment($reader->value); $current->appendChild($optNode); break; case XMLReader::CDATA: $cdata = new Opt_Xml_Cdata($reader->value); $cdata->set('cdata', true); if ($current instanceof Opt_Xml_Text) { $current->appendChild($cdata); } else { $text = new Opt_Xml_Text(); $text->appendChild($cdata); $current->appendChild($text); $current = $text; } break; /* case XMLReader::SIGNIFICANT_WHITESPACE: $cdata = new Opt_Xml_Cdata($reader->value); $cdata->set('cdata', true); if($current instanceof Opt_Xml_Text) { $current->appendChild($cdata); } else { $text = new Opt_Xml_Text(); $text->appendChild($cdata); $current->appendChild($text); $current = $text; } break; */ } $depth = $reader->depth; } // Error checking $errors = libxml_get_errors(); if (sizeof($errors) > 0) { libxml_clear_errors(); foreach ($errors as $error) { echo $error->message . ' (' . $error->line . ')<br/>'; } } return $root; }
/** * Process a node from the XML Map * It is permitted for the XML to just define one or more tables without fields (when the 'use headers' option is used) * * Note: Calls itself recursively to process a tree * * @return bool returns true if all fine else false */ private function _getXmlNode($node_content, $current_record, $xml_path) { $jinput = JFactory::getApplication()->input; $csvilog = $jinput->get('csvilog', null, null); $current_node = ''; $xml_schema = new XMLReader(); /** * Add a wrapper to make the XML viable and ensure that self closing tags contain a space before the '/>' * The XML may still be invalid but that's down to what the user entered */ $node_content = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<da_root>" . $node_content . '</da_root>'; $xml_schema->XML($node_content); // XML file to table map is valid XML - construct the arrays used in file extraction $use_read = true; // The XML could only be validated against a DTD if the syntax of the XML used for the map is made more complex $validate_xml = false; if ($validate_xml == true) { // Note: When the DTD is external, the property value must be set before the first read() $xml_schema->setParserProperty(XMLReader::VALIDATE, true); } while ($use_read ? $xml_schema->read() : $xml_schema->next()) { // Validation checking disabled because a DTD (or RELAX NG) schema is required. if ($validate_xml == true) { if ($xml_schema->isValid() == false) { $xml_schema->close(); return false; } } // Default to a reading a single node in the next loop $use_read = true; // Ignore any node associated with the root if ($xml_schema->name == 'da_root') { continue; } // Process start elements if ($xml_schema->nodeType == XMLReader::ELEMENT) { $self_closing = $xml_schema->isEmptyElement; // Ready to add a new node - but only if the last node was closed if (!empty($current_node)) { $csvilog->AddStats('incorrect', JText::sprintf('COM_CSVI_XML_NODE_UNCLOSED', $current_node)); return false; } // A new node was found - Check whether this is a new record type if (empty($current_record)) { // New record type // Check for a self-closing node $self_closing = $xml_schema->isEmptyElement; $current_record = strtolower($xml_schema->name); $this->_xml_records[] = strtolower($current_record); // Store any attributes while ($xml_schema->moveToNextAttribute()) { // Note1: $xml_schema->hasValue only indicates whether the element can have a value, not whether it does // Note2: empty($xml_schema->value) always return true, regardless of the actual value $value = $xml_schema->value; if (!empty($value)) { if ($this->_isXmlFieldNameValid($xml_schema->value)) { $this->_xml_schema[$current_record]['attrs'][strtolower($xml_schema->name)] = trim($xml_schema->value); } else { $csvilog->AddStats('incorrect', JText::sprintf('COM_CSVI_XML_FILE_MAP_NO_REFERENCE', $xml_schema->value)); $xml_schema->close(); return false; } } } // Check for a self-closing node if ($self_closing == true) { $current_record = ''; } } else { // New field type $current_node = strtolower($xml_schema->name); $current_path = $this->_getXmlNodePath($xml_path, $current_node); // Store any attributes while ($xml_schema->moveToNextAttribute()) { // Note1: $xml_schema->hasValue only indicates whether the element can have a value, not whether it does // Note2: empty($xml_schema->value) always return true, regardless of the actual value $value = $xml_schema->value; if (!empty($value)) { if ($this->_isXmlFieldNameValid($xml_schema->value)) { $this->_xml_schema[$current_record]['nodes'][$current_path]['attrs'][strtolower($xml_schema->name)] = trim($xml_schema->value); } else { $csvilog->AddStats('incorrect', JText::_('COM_CSVI_XML_FILE_MAP_NO_REFERENCE', $xml_schema->value)); $xml_schema->close(); return false; } } } $sub_node_content = $xml_schema->readInnerXML(); // Check whether there are any lower level nodes if (strstr($sub_node_content, '<') === false) { /** * Content has no embedded nodes - Assume a field name * Note: An empty node gives a blank field name which indicates an unwanted node * that is being mapped to prevent errors when processing the file */ if ($this->_isXmlFieldNameValid($sub_node_content)) { $this->_xml_schema[$current_record]['nodes'][$current_path]['field'] = trim($sub_node_content); } else { $this->_xml_schema[$current_record]['nodes'][$current_path]['field'] = ''; } } else { // There are embedded nodes - go down another level // Indicate a 'group' node by storing an empty field name $this->_xml_schema[$current_record]['nodes'][$current_path]['field'] = ''; // Push the node name to the path stack $this->_pushXmlNodePath($xml_path, $current_node); if ($this->_getXmlNode($sub_node_content, $current_record, $xml_path) == false) { $xml_schema->close(); return false; } // At the next read, skip to the next node at this level $use_read = false; // Close the node $current_node = ''; // Pop the last item off the path stack $this->_popXmlNodePath($xml_path); } // Check for a self-closing node if ($self_closing == true) { $current_node = ''; } } } else { if ($xml_schema->nodeType == XMLReader::END_ELEMENT) { // End of node found // Check for end of record if (!empty($current_record) && strtolower($xml_schema->name) == $current_record) { // End of record detected $current_record = ''; } else { if (!empty($current_node) && strtolower($xml_schema->name) == $current_node) { // End of current node detected $current_node = ''; } } } } } $xml_schema->close(); // Node not terminated if (!empty($current_node)) { $csvilog->AddStats('incorrect', JText::sprintf('COM_CSVI_XML_NODE_NOT_CLOSED', $current_node)); return false; } if (empty($this->_xml_records)) { $csvilog->AddStats('incorrect', JText::_('COM_CSVI_XML_NO_RECORDS_DEFINED')); return false; } return true; }
/** * {@inheritdoc} */ public function parse($xmlString) { if ($this->validateResponse) { XmlChecker::isValid($xmlString); } $useErrors = libxml_use_internal_errors(true); $xml = new \XMLReader(); $xml->xml($xmlString, 'UTF-8', LIBXML_COMPACT | LIBXML_NOCDATA | LIBXML_NOBLANKS | LIBXML_PARSEHUGE); $xml->setParserProperty(\XMLReader::VALIDATE, false); $xml->setParserProperty(\XMLReader::LOADDTD, false); // This following assignments are auto-generated using Fxmlrpc\Serialization\CodeGenerator\XmlReaderParserBitmaskGenerator // Don’t edit manually static $flagmethodResponse = 0b1; static $flagparams = 0b10; static $flagfault = 0b100; static $flagparam = 0b1000; static $flagvalue = 0b10000; static $flagarray = 0b100000; static $flagmember = 0b1000000; static $flagname = 0b10000000; ${'flag#text'} = 0b100000000; static $flagstring = 0b1000000000; static $flagstruct = 0b10000000000; static $flagint = 0b100000000000; static $flagbiginteger = 0b1000000000000; static $flagi8 = 0b10000000000000; static $flagi4 = 0b100000000000000; static $flagi2 = 0b1000000000000000; static $flagi1 = 0b10000000000000000; static $flagboolean = 0b100000000000000000; static $flagdouble = 0b1000000000000000000; static $flagfloat = 0b10000000000000000000; static $flagbigdecimal = 0b100000000000000000000; ${'flagdateTime.iso8601'} = 0b1000000000000000000000; static $flagdateTime = 0b10000000000000000000000; static $flagbase64 = 0b100000000000000000000000; static $flagnil = 0b1000000000000000000000000; static $flagdom = 0b10000000000000000000000000; static $flagdata = 0b100000000000000000000000000; // End of auto-generated code $aggregates = []; $depth = 0; $nextExpectedElements = 0b1; $i = 0; $isFault = false; while ($xml->read()) { ++$i; $nodeType = $xml->nodeType; if ($nodeType === \XMLReader::COMMENT || $nodeType === \XMLReader::DOC_TYPE || $nodeType === \XMLReader::SIGNIFICANT_WHITESPACE && ($nextExpectedElements & 0b100000000) !== 0b100000000) { continue; } if ($nodeType === \XMLReader::ENTITY_REF) { return ''; } $tagName = $xml->localName; if ($nextExpectedElements !== null && ($flag = isset(${'flag' . $tagName}) ? ${'flag' . $tagName} : -1) && ($nextExpectedElements & $flag) !== $flag) { throw new UnexpectedTagException($tagName, $nextExpectedElements, get_defined_vars(), $xml->depth, $xml->readOuterXml()); } processing: switch ($nodeType) { case \XMLReader::ELEMENT: switch ($tagName) { case 'methodResponse': // Next: params, fault $nextExpectedElements = 0b110; break; case 'params': // Next: param $nextExpectedElements = 0b1000; $aggregates[$depth] = []; break; case 'fault': $isFault = true; // Break intentionally omitted // Break intentionally omitted case 'param': // Next: value $nextExpectedElements = 0b10000; break; case 'array': $aggregates[++$depth] = []; // Break intentionally omitted // Break intentionally omitted case 'data': // Next: array, data, value $nextExpectedElements = 0b100000000000000000000110000; break; case 'struct': // Next: struct, member, value $nextExpectedElements = 0b10001010000; $aggregates[++$depth] = []; break; case 'member': // Next: name, value $nextExpectedElements = 0b10010000; $aggregates[++$depth] = []; break; case 'name': // Next: #text $nextExpectedElements = 0b100000000; $type = 'name'; break; case 'value': $nextExpectedElements = 0b11111111111111111100110000; $type = 'value'; $aggregates[$depth + 1] = ''; break; case 'base64': case 'string': case 'biginteger': case 'i8': case 'dateTime.iso8601': case 'dateTime': // Next: value, $tagName, #text $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = ''; break; case 'nil': // Next: value, $tagName $nextExpectedElements = 0b1000000000000000000010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = null; break; case 'int': case 'i4': case 'i2': case 'i1': // Next: value, #text, $tagName $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = 0; break; case 'boolean': // Next: value, #text, $tagName $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = 'boolean'; $aggregates[$depth + 1] = false; break; case 'double': case 'float': case 'bigdecimal': // Next: value, #text, $tagName $nextExpectedElements = 0b100010000 | ${'flag' . $tagName}; $type = $tagName; $aggregates[$depth + 1] = 0.0; break; case 'dom': $type = 'dom'; // Disable type checking $nextExpectedElements = null; $aggregates[$depth + 1] = $xml->readInnerXml(); break; } break; case \XMLReader::END_ELEMENT: switch ($tagName) { case 'params': case 'fault': break 3; case 'param': // Next: params, param $nextExpectedElements = 0b1010; break; case 'value': $nextExpectedElements = 0b100100000011100100011011100; $aggregates[$depth][] = $aggregates[$depth + 1]; break; case 'array': case 'struct': --$depth; // Break intentionally omitted // Break intentionally omitted case 'string': case 'int': case 'biginteger': case 'i8': case 'i4': case 'i2': case 'i1': case 'boolean': case 'double': case 'float': case 'bigdecimal': case 'dateTime.iso8601': case 'dateTime': case 'base64': case 'nil': // Next: value $nextExpectedElements = 0b10000; break; case 'data': // Next: array $nextExpectedElements = 0b100000; break; case 'name': // Next: value, member $nextExpectedElements = 0b1010000; $aggregates[$depth]['name'] = $aggregates[$depth + 1]; break; case 'member': // Next: struct, member $nextExpectedElements = 0b10001000000; $aggregates[$depth - 1][$aggregates[$depth]['name']] = $aggregates[$depth][0]; unset($aggregates[$depth], $aggregates[$depth + 1]); --$depth; break; } break; case \XMLReader::TEXT: case \XMLReader::SIGNIFICANT_WHITESPACE: switch ($type) { case 'int': case 'i4': case 'i2': case 'i1': $value = (int) $xml->value; break; case 'boolean': $value = $xml->value === '1'; break; case 'double': case 'float': case 'bigdecimal': $value = (double) $xml->value; break; case 'dateTime.iso8601': $value = \DateTime::createFromFormat('Ymd\\TH:i:s', $xml->value, isset($timezone) ? $timezone : ($timezone = new \DateTimeZone('UTC'))); break; case 'dateTime': $value = \DateTime::createFromFormat('Y-m-d\\TH:i:s.uP', $xml->value, isset($timezone) ? $timezone : ($timezone = new \DateTimeZone('UTC'))); break; case 'base64': $value = Base64Value::deserialize($xml->value); break; case 'dom': $doc = new \DOMDocument('1.0', 'UTF-8'); $doc->loadXML($aggregates[$depth + 1]); $value = $doc; break; default: $value =& $xml->value; break; } $aggregates[$depth + 1] = $value; if ($nextExpectedElements === null) { break; } // Next: any $nextExpectedElements = 0b111111111111111111111111111; break; } if ($xml->isEmptyElement && $nodeType !== \XMLReader::END_ELEMENT) { $nodeType = \XMLReader::END_ELEMENT; goto processing; } } libxml_use_internal_errors($useErrors); $result = $aggregates ? array_pop($aggregates[0]) : null; if ($isFault) { throw FaultException::createFromResponse($result); } return $result; }
protected function createReader($content) { $reader = new \XMLReader(); $reader->XML($content, null, LIBXML_DTDLOAD); $reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true); return $reader; }
$errorXmlExport = null; if ($is_allowedToEdit && !empty($choice) && $choice == 'exportqti2') { require_once api_get_path(SYS_CODE_PATH) . 'exercice/export/qti2/qti2_export.php'; $export = export_exercise_to_qti($exerciseId, true); $archive_path = api_get_path(SYS_ARCHIVE_PATH); $temp_dir_short = api_get_unique_id(); $temp_zip_dir = $archive_path . $temp_dir_short; if (!is_dir($temp_zip_dir)) { mkdir($temp_zip_dir, api_get_permissions_for_new_directories()); } $temp_zip_file = $temp_zip_dir . "/" . api_get_unique_id() . ".zip"; $temp_xml_file = $temp_zip_dir . "/qti2export_" . $exerciseId . '.xml'; file_put_contents($temp_xml_file, $export); $xmlReader = new XMLReader(); $xmlReader->open($temp_xml_file); $xmlReader->setParserProperty(XMLReader::VALIDATE, true); $isValid = $xmlReader->isValid(); if ($isValid) { $zip_folder = new PclZip($temp_zip_file); $zip_folder->add($temp_xml_file, PCLZIP_OPT_REMOVE_ALL_PATH); $name = 'qti2_export_' . $exerciseId . '.zip'; DocumentManager::file_send_for_download($temp_zip_file, true, $name); unlink($temp_zip_file); unlink($temp_xml_file); rmdir($temp_zip_dir); exit; //otherwise following clicks may become buggy } else { $errorXmlExport = Display::return_message(get_lang('ErrorWritingXMLFile'), 'error'); } }
<?php /* $Id$ */ $xmlstring = '<?xml version="1.0" encoding="UTF-8"?> <books></books>'; $reader = new XMLReader(); $reader->XML($xmlstring); $a = $reader->setParserProperty(XMLReader::LOADDTD, false); $b = $reader->getParserProperty(XMLReader::LOADDTD); if (!$a && !$b) { echo "ok\n"; } $a = $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true); $b = $reader->getParserProperty(XMLReader::SUBST_ENTITIES); if ($a && $b) { echo "ok\n"; } // Only go through while ($reader->read()) { } $reader->close(); ?> ===DONE===
/** * * @expectedException PHPUnit_Framework_Error * XMLReader::read(): I/O warning : failed to load external entity "file:///C:/Christopher_Spaeth/code/xml_files_windows/xxe.txt" */ public function testXXE_setParserProperty_SUBST_ENTITIES_disable_entity_loader() { $xml = new XMLReader(); // use setParserProperty $xml->open("../../xml_files_windows/xxe/xxe.xml"); $xml->setParserProperty(XMLReader::SUBST_ENTITIES, true); libxml_disable_entity_loader(true); while ($xml->read()) { if ($xml->nodeType == XMLReader::ELEMENT && $xml->name == 'data') { $node = $xml->name; $content = $xml->readString(); } } $content = preg_replace('/\\s+/', '', $content); $this->assertEquals("", $content); }
/** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns) * * @param string $pFilename * @throws PHPExcel_Reader_Exception */ public function listWorksheetInfo($pFilename) { // Check if file exists if (!file_exists($pFilename)) { throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist."); } $worksheetInfo = array(); $zipClass = PHPExcel_Settings::getZipClass(); $zip = new $zipClass(); if (!$zip->open($pFilename)) { throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! Error opening file."); } $xml = new XMLReader(); $res = $xml->open('zip://' . realpath($pFilename) . '#content.xml', null, PHPExcel_Settings::getLibXmlLoaderOptions()); $xml->setParserProperty(2, true); // Step into the first level of content of the XML $xml->read(); while ($xml->read()) { // Quickly jump through to the office:body node while ($xml->name !== 'office:body') { if ($xml->isEmptyElement) { $xml->read(); } else { $xml->next(); } } // Now read each node until we find our first table:table node while ($xml->read()) { if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) { $worksheetNames[] = $xml->getAttribute('table:name'); $tmpInfo = array('worksheetName' => $xml->getAttribute('table:name'), 'lastColumnLetter' => 'A', 'lastColumnIndex' => 0, 'totalRows' => 0, 'totalColumns' => 0); // Loop through each child node of the table:table element reading $currCells = 0; do { $xml->read(); if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) { $rowspan = $xml->getAttribute('table:number-rows-repeated'); $rowspan = empty($rowspan) ? 1 : $rowspan; $tmpInfo['totalRows'] += $rowspan; $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); $currCells = 0; // Step into the row $xml->read(); do { if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) { if (!$xml->isEmptyElement) { $currCells++; $xml->next(); } else { $xml->read(); } } elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) { $mergeSize = $xml->getAttribute('table:number-columns-repeated'); $currCells += $mergeSize; $xml->read(); } } while ($xml->name != 'table:table-row'); } } while ($xml->name != 'table:table'); $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']); $worksheetInfo[] = $tmpInfo; } } // foreach($workbookData->table as $worksheetDataSet) { // $worksheetData = $worksheetDataSet->children($namespacesContent['table']); // $worksheetDataAttributes = $worksheetDataSet->attributes($namespacesContent['table']); // // $rowIndex = 0; // foreach ($worksheetData as $key => $rowData) { // switch ($key) { // case 'table-row' : // $rowDataTableAttributes = $rowData->attributes($namespacesContent['table']); // $rowRepeats = (isset($rowDataTableAttributes['number-rows-repeated'])) ? // $rowDataTableAttributes['number-rows-repeated'] : 1; // $columnIndex = 0; // // foreach ($rowData as $key => $cellData) { // $cellDataTableAttributes = $cellData->attributes($namespacesContent['table']); // $colRepeats = (isset($cellDataTableAttributes['number-columns-repeated'])) ? // $cellDataTableAttributes['number-columns-repeated'] : 1; // $cellDataOfficeAttributes = $cellData->attributes($namespacesContent['office']); // if (isset($cellDataOfficeAttributes['value-type'])) { // $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex + $colRepeats - 1); // $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex + $rowRepeats); // } // $columnIndex += $colRepeats; // } // $rowIndex += $rowRepeats; // break; // } // } // // $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']); // $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1; // // } // } } return $worksheetInfo; }
<?php $reader = new XMLReader(); /* load xml document as a string */ $reader->XML(file_get_contents(dirname(__FILE__) . '/thedata.xml')); /* tell the parser to perform syntax validation */ $reader->setParserProperty(XMLREADER_VALIDATE, TRUE); // loop until end of document // @ blocks the parser errors (missing DTD in this case) while (@$reader->read()) { } echo "XML document is: " . ($reader->isValid() ? '' : ' not') . " valid.";
/** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns) * * @param string $pFilename * @throws PHPExcel_Reader_Exception */ public function listWorksheetInfo($pFilename) { // Check if file exists if (!file_exists($pFilename)) { throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist."); } $worksheetInfo = array(); $zip = new ZipArchive(); $zip->open($pFilename); $rels = simplexml_load_string($this->_getFromZipArchive($zip, "_rels/.rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships"); foreach ($rels->Relationship as $rel) { if ($rel["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") { $dir = dirname($rel["Target"]); $relsWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "{$dir}/_rels/" . basename($rel["Target"]) . ".rels")); //~ http://schemas.openxmlformats.org/package/2006/relationships"); $relsWorkbook->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships"); $worksheets = array(); foreach ($relsWorkbook->Relationship as $ele) { if ($ele["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet") { $worksheets[(string) $ele["Id"]] = $ele["Target"]; } } $xmlWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}")); //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main"); if ($xmlWorkbook->sheets) { $dir = dirname($rel["Target"]); foreach ($xmlWorkbook->sheets->sheet as $eleSheet) { $tmpInfo = array('worksheetName' => (string) $eleSheet["name"], 'lastColumnLetter' => 'A', 'lastColumnIndex' => 0, 'totalRows' => 0, 'totalColumns' => 0); $fileWorksheet = $worksheets[(string) self::array_item($eleSheet->attributes("http://schemas.openxmlformats.org/officeDocument/2006/relationships"), "id")]; $xml = new XMLReader(); $res = $xml->open('zip://' . PHPExcel_Shared_File::realpath($pFilename) . '#' . "{$dir}/{$fileWorksheet}"); $xml->setParserProperty(2, true); $currCells = 0; while ($xml->read()) { if ($xml->name == 'row' && $xml->nodeType == XMLReader::ELEMENT) { $row = $xml->getAttribute('r'); $tmpInfo['totalRows'] = $row; $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); $currCells = 0; } elseif ($xml->name == 'c' && $xml->nodeType == XMLReader::ELEMENT) { $currCells++; } } $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells); $xml->close(); $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1; $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']); $worksheetInfo[] = $tmpInfo; } } } } $zip->close(); return $worksheetInfo; }
/** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * @author Dom Hastings * @access public */ public function __construct($file, $options = array()) { // merge the options together $this->options = array_merge($this->options, is_array($options) ? $options : array()); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size'); // set the filename $this->file = $file; if (empty($this->options['element'])) { $founded_tags = array(); if (function_exists('stream_filter_register')) { stream_filter_register('preprocessxml', 'preprocessXml_filter'); $path = 'php://filter/read=preprocessxml/resource=' . $this->file; } else { $path = $this->file; } $reader = new XMLReader(); $reader->open($path); $reader->setParserProperty(XMLReader::VALIDATE, false); while (@$reader->read()) { switch ($reader->nodeType) { case XMLREADER::ELEMENT: array_push($founded_tags, str_replace(":", "_", $reader->localName)); if (count($founded_tags) > 100) { break 2; } break; default: break; } } unset($reader); if (!empty($founded_tags)) { $element_counts = array_count_values($founded_tags); if (!empty($element_counts)) { foreach ($element_counts as $tag => $count) { if (strpos($tag, ":") === false) { $this->cloud[$tag] = $count; } } arsort($element_counts); } } if (!empty($this->cloud)) { $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post'); foreach ($this->cloud as $element_name => $value) { if (in_array(strtolower($element_name), $main_elements)) { $this->options['element'] = $element_name; break; } } if (empty($this->options['element'])) { foreach ($element_counts as $el => $count) { $this->options['element'] = $el; break; } } } } if (function_exists('stream_filter_register')) { stream_filter_register('preprocessxml', 'preprocessXml_filter'); $path = 'php://filter/read=preprocessxml/resource=' . $this->file; } else { $path = $this->file; } $this->reader = new XMLReader(); $this->reader->open($path); $this->reader->setParserProperty(XMLReader::VALIDATE, false); }