/**
  * @param FeedTypeInterface $type
  * @param OutputInterface   $output
  *
  * @return int
  */
 protected function validate(FeedTypeInterface $type, OutputInterface $output)
 {
     $file = $this->exporter->getFeedFilename($type);
     if (!file_exists($file)) {
         throw new FileNotFoundException(sprintf('<error>Feed "%s" has not yet been exported</error>', $type->getName()));
     }
     $options = LIBXML_NOENT | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING;
     $this->reader = new \XMLReader($options);
     $this->reader->open($file);
     $this->reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true);
     //        foreach ($type->getNamespaces() as $name => $location) {
     //            $this->reader->setSchema($location);
     //        }
     libxml_clear_errors();
     libxml_use_internal_errors(true);
     libxml_disable_entity_loader(true);
     $progress = new ProgressBar($output);
     $progress->start();
     // go through the whole thing
     while ($this->reader->read()) {
         if ($this->reader->nodeType === \XMLReader::ELEMENT && $this->reader->name === $type->getItemNode()) {
             $progress->advance();
             $this->currentItem = $this->reader->readOuterXml();
         }
         if ($error = libxml_get_last_error()) {
             throw new \RuntimeException(sprintf('[%s %s] %s (in %s - line %d, column %d)', LIBXML_ERR_WARNING === $error->level ? 'WARNING' : 'ERROR', $error->code, trim($error->message), $error->file ? $error->file : 'n/a', $error->line, $error->column));
         }
     }
     $progress->finish();
 }
Exemplo n.º 2
0
function isXml($filename)
{
    $xml = new XMLReader();
    $xml->open($filename);
    $xml->setParserProperty(XMLReader::VALIDATE, true);
    return $xml->isValid();
}
 /**
  * Constructor
  *
  * Creates an SVGReader drawing from the source provided
  * @param string $source URI from which to read
  * @throws MWException|Exception
  */
 function __construct($source)
 {
     global $wgSVGMetadataCutoff;
     $this->reader = new XMLReader();
     // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
     $size = filesize($source);
     if ($size === false) {
         throw new MWException("Error getting filesize of SVG.");
     }
     if ($size > $wgSVGMetadataCutoff) {
         $this->debug("SVG is {$size} bytes, which is bigger than {$wgSVGMetadataCutoff}. Truncating.");
         $contents = file_get_contents($source, false, null, -1, $wgSVGMetadataCutoff);
         if ($contents === false) {
             throw new MWException('Error reading SVG file.');
         }
         $this->reader->XML($contents, null, LIBXML_NOERROR | LIBXML_NOWARNING);
     } else {
         $this->reader->open($source, null, LIBXML_NOERROR | LIBXML_NOWARNING);
     }
     // Expand entities, since Adobe Illustrator uses them for xmlns
     // attributes (bug 31719). Note that libxml2 has some protection
     // against large recursive entity expansions so this is not as
     // insecure as it might appear to be. However, it is still extremely
     // insecure. It's necessary to wrap any read() calls with
     // libxml_disable_entity_loader() to avoid arbitrary local file
     // inclusion, or even arbitrary code execution if the expect
     // extension is installed (bug 46859).
     $oldDisable = libxml_disable_entity_loader(true);
     $this->reader->setParserProperty(XMLReader::SUBST_ENTITIES, true);
     $this->metadata['width'] = self::DEFAULT_WIDTH;
     $this->metadata['height'] = self::DEFAULT_HEIGHT;
     // The size in the units specified by the SVG file
     // (for the metadata box)
     // Per the SVG spec, if unspecified, default to '100%'
     $this->metadata['originalWidth'] = '100%';
     $this->metadata['originalHeight'] = '100%';
     // Because we cut off the end of the svg making an invalid one. Complicated
     // try catch thing to make sure warnings get restored. Seems like there should
     // be a better way.
     MediaWiki\suppressWarnings();
     try {
         $this->read();
     } catch (Exception $e) {
         // Note, if this happens, the width/height will be taken to be 0x0.
         // Should we consider it the default 512x512 instead?
         MediaWiki\restoreWarnings();
         libxml_disable_entity_loader($oldDisable);
         throw $e;
     }
     MediaWiki\restoreWarnings();
     libxml_disable_entity_loader($oldDisable);
 }
Exemplo n.º 4
0
 /**
  * @param $date
  * @return CurrencyRaw[]
  */
 public function parse($date)
 {
     $xml = new XMLReader();
     $url = $this->getUrl($date);
     $temp_file = tempnam(sys_get_temp_dir(), 'currency-source');
     $fp = fopen($temp_file, 'w+');
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
     curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
     curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($ch, CURLOPT_FILE, $fp);
     curl_exec($ch);
     curl_close($ch);
     fclose($fp);
     $xml->open($temp_file);
     $xml->setParserProperty(XMLReader::VALIDATE, false);
     Yii::log('Open XML from `' . $url . '`', CLogger::LEVEL_INFO, 'currency-parser');
     $data = [];
     while ($xml->read()) {
         if ($xml->nodeType == XMLReader::ELEMENT && $xml->localName == $this->xmlElement) {
             $xmlRow = null;
             try {
                 $xmlRow = new SimpleXMLElement($xml->readOuterXml());
             } catch (Exception $e) {
                 continue;
             }
             if ($rowObj = $this->parseRow($xmlRow)) {
                 $data[$rowObj->num_code] = $rowObj;
                 //                    Yii::log('Parsed XML row `' . json_encode($rowObj) . '`', CLogger::LEVEL_INFO, 'currency-parser');
             } else {
                 Yii::log('Error parsed XML row', CLogger::LEVEL_WARNING, 'currency-parser');
             }
         }
     }
     @unlink($temp_file);
     return $data;
 }
 /**
  * @param OutputInterface $output
  * @param \SplFileInfo    $feed
  * @param string          $filterExpression
  *
  * @return array<array, integer>
  */
 protected function inspect(OutputInterface $output, \SplFileInfo $feed, $filterExpression)
 {
     $options = LIBXML_NOENT | LIBXML_NONET | LIBXML_COMPACT | LIBXML_PARSEHUGE | LIBXML_NOERROR | LIBXML_NOWARNING;
     $this->reader = new \XMLReader($options);
     $this->reader->open($feed->getPathname());
     $this->reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true);
     libxml_clear_errors();
     libxml_use_internal_errors(true);
     libxml_disable_entity_loader(true);
     $total = 0;
     $results = [];
     $output->writeln(sprintf('Reading <comment>%s</comment>', $feed->getFilename()));
     if ($filterExpression) {
         $output->writeln(sprintf('Filtering nodes with expression "<info>%s</info>"', $filterExpression));
     }
     $progress = new ProgressBar($output);
     $progress->start();
     // go through the whole thing
     while ($this->reader->read()) {
         if ($this->reader->nodeType === \XMLReader::ELEMENT && $this->reader->name === 'listing') {
             $progress->advance();
             ++$total;
             $node = $this->reader->expand();
             $doc = new \DOMDocument();
             $doc->appendChild($node);
             $xpath = new \DOMXPath($doc);
             $xpath->registerNamespace('x', $doc->lookupNamespaceUri($doc->namespaceURI));
             $query = $xpath->evaluate($filterExpression, $node);
             $result = $query instanceof \DOMNodeList ? $query->length : !empty($query);
             if ($result) {
                 $results[] = $node;
             }
         }
     }
     $progress->finish();
     $output->writeln('');
     return [$results, $total];
 }
Exemplo n.º 6
0
 /**
  * @param string $fname the filename
  */
 private function validateFromInput($xml, $isFile)
 {
     $reader = new XMLReader();
     if ($isFile) {
         $s = $reader->open($xml, null, LIBXML_NOERROR | LIBXML_NOWARNING);
     } else {
         $s = $reader->XML($xml, null, LIBXML_NOERROR | LIBXML_NOWARNING);
     }
     if ($s !== true) {
         // Couldn't open the XML
         $this->wellFormed = false;
     } else {
         $oldDisable = libxml_disable_entity_loader(true);
         $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true);
         try {
             $this->validate($reader);
         } catch (Exception $e) {
             // Calling this malformed, because we didn't parse the whole
             // thing. Maybe just an external entity refernce.
             $this->wellFormed = false;
             $reader->close();
             libxml_disable_entity_loader($oldDisable);
             throw $e;
         }
         $reader->close();
         libxml_disable_entity_loader($oldDisable);
     }
 }
Exemplo n.º 7
0
Arquivo: L10n.php Projeto: doxedo/core
 public function updateL10N()
 {
     //echo $this->html;
     // PHASE1: Convert span-level elements
     $depth = 1;
     $text = "";
     echo $this->filename;
     $src = "<html>" . $this->html . "</html>";
     /* Drop stupid ms-word quotes */
     $src = str_replace("‘", "'", $src);
     $src = str_replace("’", "'", $src);
     $extractmode = true;
     $tf = new LinkORB_TranslationFile();
     $tf->name = "markdown-topic:" . $this->filename;
     $tf->datatype = "x-linkorb-markdown-topic";
     $tf->sourcelang = "en_US";
     $tf->targetlang = "en_US";
     $tf->tool = "x-lt-ldoc-updatel10n";
     $l10npath = dirname($this->filename) . "/l10n/";
     if (!file_exists($l10npath)) {
         mkdir($l10npath);
     }
     // Load previous trans-unit's if available
     if (file_exists($l10npath . basename($this->filename) . ".src.xlf")) {
         $tf->LoadXLIFF($l10npath . basename($this->filename) . ".src.xlf", true);
         // Set all transunit's to 'translate=false'
         foreach ($tf->transunit as $tu) {
             $tu->translate = "no";
             $tu->comment = array();
             $tu->filename = array();
         }
     }
     $xhtml = new XMLReader();
     $xhtml->setParserProperty('SUBST_ENTITIES', 0);
     $xhtml->xml($src);
     $skiptext = false;
     $inpre = false;
     while ($xhtml->read() && $depth != 0) {
         if (in_array($xhtml->nodeType, array(XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE, XMLReader::SIGNIFICANT_WHITESPACE))) {
             if (!$skiptext) {
                 //$text .= htmlentities($xhtml->value); // Leaving entities as-is now
                 $text .= $xhtml->value;
             }
         }
         // OPENING TAG
         if ($xhtml->nodeType == XMLReader::ELEMENT) {
             switch ($xhtml->name) {
                 case "code":
                     if (!$inpre) {
                         $text .= "`";
                     }
                     break;
                 case "pre":
                     $inpre = true;
                     $text .= "<" . $xhtml->name . ">";
                     break;
                 case "em":
                     $text .= "*";
                     break;
                 case "a":
                     $text .= "[linktext](http://www.example.com)";
                     $skiptext = true;
                     break;
                 default:
                     $text .= "<" . $xhtml->name . ">";
                     break;
             }
             $depth++;
         }
         // CLOSING TAG
         if ($xhtml->nodeType == XMLReader::END_ELEMENT) {
             switch ($xhtml->name) {
                 case "code":
                     if (!$inpre) {
                         $text .= "`";
                     }
                     break;
                 case "pre":
                     $inpre = false;
                     $text .= "</" . $xhtml->name . ">";
                     break;
                 case "em":
                     $text .= "*";
                     break;
                 case "a":
                     $text .= "";
                     $skiptext = false;
                     break;
                 default:
                     $text .= "</" . $xhtml->name . ">";
                     break;
             }
             $depth--;
         }
     }
     // PHASE2: Convert block-level elements
     $depth = 1;
     $xhtml = new XMLReader();
     $text = str_replace("<br>", "<br />", $text);
     $xhtml->xml($text);
     file_put_contents("/tmp/html.txt", $text);
     echo $text;
     $text = "";
     $firstliparagraph = false;
     $tagstack = array();
     $liststack = array();
     $inpre = false;
     $inblockquote = false;
     while ($xhtml->read() && $depth != 0) {
         if (in_array($xhtml->nodeType, array(XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE, XMLReader::SIGNIFICANT_WHITESPACE))) {
             if (!$skiptext) {
                 $string = $xhtml->value;
                 $indent = str_repeat("\t", count($liststack));
                 if ($inpre) {
                     // Add indention, and insert text as-is (no translation etc)
                     $string = trim(str_replace("\n", "\n\t" . $indent, $string));
                     $text .= $string;
                 } else {
                     // Remove linebreaks for all paragraphs, blockquotes, etc (all except pre)
                     $string = str_replace("\n", " ", $string);
                     // Remove reduntant spaces
                     $string = str_replace("   ", " ", $string);
                     $string = str_replace("   ", " ", $string);
                     $string = str_replace("  ", " ", $string);
                     $string = trim($string);
                     if (trim($string, " \t\n") != "") {
                         if (!$firstliparagraph) {
                             $text .= $indent;
                         }
                         if ($inblockquote) {
                             $text .= "> ";
                         }
                         // SEGMENTOR
                         //$segment=explode(". ", $string);
                         $segment = preg_split("/([.!]+\\s)/", $string . " ", null, PREG_SPLIT_DELIM_CAPTURE);
                         $si = 0;
                         while ($si < count($segment)) {
                             $s = $segment[$si];
                             if ($si + 1 < count($segment)) {
                                 $si++;
                                 $s .= $segment[$si];
                             }
                             $firstliparagraph = false;
                             // Re-add closing punctuation
                             //if ($si<count($segment)-1) $s.= ".";
                             $s = trim($s);
                             if ($s) {
                                 $extractmode = true;
                                 if ($extractmode) {
                                     $tu = $tf->GetTranslationUnit(null, $s);
                                     $tu->addComment("Topic: " . basename($this->filename));
                                     $tu->addFilename("../" . basename($this->filename));
                                     //$text .= "[START|" . $tu->id . "|" . str_replace("|", "(PIPE)", $s) . "|END]";
                                     $text .= "[START|" . $tu->id . "|END]";
                                 } else {
                                     $text .= "@" . $s . "@";
                                 }
                             }
                             $si++;
                         }
                     }
                 }
             }
         }
         // OPENING TAG
         if ($xhtml->nodeType == XMLReader::ELEMENT) {
             array_push($tagstack, array("name" => $xhtml->name));
             //$text .="[" . $xhtml->name  . "(d:" . count($tagstack) ." l:" . count($liststack) . ")]";
             switch ($xhtml->name) {
                 case "h0":
                 case "h1":
                 case "h2":
                 case "h3":
                 case "h4":
                 case "h5":
                 case "h6":
                     $text .= "\n" . str_repeat("#", (int) $xhtml->name[1]) . " ";
                     break;
                 case "p":
                     break;
                 case "pre":
                     $inpre = true;
                     //$text .= "PRE";
                     $text .= "\t";
                     break;
                 case "blockquote":
                     $inblockquote = true;
                     //$text .= "BLOCKQUOTE";
                     break;
                 case "ul":
                 case "ol":
                     array_push($liststack, array("name" => $xhtml->name));
                     break;
                 case "li":
                     $firstliparagraph = true;
                     $text .= str_repeat("\t", count($liststack) - 1);
                     if ($liststack[count($liststack) - 1]['name'] == "ol") {
                         $text .= "#\t";
                     } else {
                         $text .= "*\t";
                     }
                     break;
                 default:
                     break;
             }
             $depth++;
         }
         // CLOSING TAG
         if ($xhtml->nodeType == XMLReader::END_ELEMENT) {
             array_pop($tagstack);
             //$text .="[/" . $xhtml->name  . "]";
             switch ($xhtml->name) {
                 case "h0":
                 case "h1":
                 case "h2":
                 case "h3":
                 case "h4":
                 case "h5":
                 case "h6":
                     $text .= "\n\n";
                     break;
                 case "p":
                     $text .= "\n\n";
                     break;
                 case "pre":
                     $inpre = false;
                     $text .= "\n\n";
                     break;
                 case "blockquote":
                     $inblockquote = false;
                     break;
                 case "li":
                     $text .= "\n";
                     break;
                 case "ul":
                 case "ol":
                     array_pop($liststack);
                     //                      $text .= "\n";
                     break;
                 default:
                     break;
             }
             $depth--;
         }
     }
     // Strip redundant linebreaks
     $text = str_replace("\n\n\n\n", "\n\n", $text);
     $text = str_replace("\n\n\n", "\n\n", $text);
     file_put_contents($l10npath . basename($this->filename) . ".skl.md", $text);
     file_put_contents($l10npath . basename($this->filename) . ".src.xlf", $tf->ToXLIFF());
     //echo "\n\n=====================================!!!!!!!!!!!!!\n" . $text;
     $locale = array();
     $locale[] = "nl-NL";
     $locale[] = "zh-CN";
     $locale[] = "fr-FR";
     $locale[] = "es-ES";
     $locale[] = "ru-RU";
     $locale[] = "de-DE";
     $locale[] = "jp-JP";
     $locale[] = "ar-EG";
     // arabic Egypt
     $locale[] = "hi-IN";
     // hindi India
     foreach ($locale as $l) {
         $tf = new LinkORB_TranslationFile();
         $tf->name = "markdown-topic:" . $this->filename;
         $tf->name = "markdown-topic:" . $this->filename;
         $tf->datatype = "x-linkorb-markdown-topic";
         $tf->sourcelang = "en_US";
         $tf->targetlang = $l;
         $tf->tool = "x-lt-ldoc-updatel10n";
         // Pretranslation
         $filename = $l10npath . basename($this->filename) . "." . $l . ".xlf";
         if (file_exists($filename)) {
             $tf->LoadXLIFF($filename, true, dirname($filename));
             foreach ($tf->transunit as $tu) {
                 $tu->translate = "no";
                 $tu->comment = array();
                 $tu->filename = array();
                 if ($tu->target == "") {
                     $tf->transunit[$tu->id]->export = false;
                 }
             }
         }
         // Load new src trans-units on top
         $tf->LoadXLIFF($l10npath . basename($this->filename) . ".src.xlf", true, dirname($filename));
         // Save
         file_put_contents($filename, $tf->ToXLIFF());
     }
     // -------------
     foreach ($locale as $l) {
         $o = file_get_contents($l10npath . basename($this->filename) . ".skl.md");
         $tf = new LinkORB_TranslationFile();
         $filename = $l10npath . basename($this->filename) . "." . $l . ".xlf";
         if (file_exists($filename)) {
             $tf->LoadXLIFF($filename, true, dirname($filename));
             foreach ($tf->transunit as $tu) {
                 //$tag="[START|" . $tu->id . "|" . $tu->src . "|END]";
                 $tag = "[START|" . $tu->id . "|END]";
                 $target = trim($tu->target);
                 if ($target == "") {
                     $target = "@" . $tu->src . "@";
                 }
                 //echo "$tag\n";
                 $o = str_replace($tag, $target, $o);
             }
         }
         file_put_contents($l10npath . basename($this->filename) . "." . $l . ".md", $o);
     }
     //exit ("END");
     //return $text;
 }
Exemplo n.º 8
0
 private function validateFile($file_name)
 {
     $extension = pathinfo($file_name, PATHINFO_EXTENSION);
     $file_info = explode(".", $file_name);
     $file_info1 = explode("_", $file_info[0]);
     $file_id = $file_info1[1];
     // verificam validitatea fisierului in fctie de extensie
     if ($extension == "xml") {
         $reader = new XMLReader();
         $reader->open(DIR_FILE_FOR_MATCHING . $file_name);
         // Set parser options - you must set this in order to use isValid method
         $reader->setParserProperty(XMLReader::VALIDATE, TRUE);
         $a = 0;
         if (!$reader->isValid()) {
             // se sterge din baza de date si de pe server
             $this->model_account_customer_file_for_matching->deleteFile($file_id);
             $this->session->data['warning'] = $this->language->get('text_xml_file_is_not_valid');
             return false;
         } else {
             // in cazul in care xml-ul este valid, verificam daca contine datele obligatorii specificate webservice-ului
             $xml = simplexml_load_file(DIR_FILE_FOR_MATCHING . $file_name);
             foreach ($xml->children()->children() as $child) {
                 $this->xml_node[] = strtoupper(trim($child->getName()));
             }
             $xml_valid = 1;
             foreach ($this->mandatory_data as $value) {
                 if (!in_array($value, $this->xml_node)) {
                     $xml_valid = 0;
                     break;
                 }
             }
             if ($xml_valid == 0) {
                 // se sterge din baza de date si de pe server
                 $this->model_account_customer_file_for_matching->deleteFile($file_id);
                 $this->session->data['warning'] = $this->language->get('text_xml_file_not_contain_mandatory_data');
                 return false;
             } else {
                 return true;
             }
         }
     } else {
         if ($extension == "csv") {
             $csv_delimiter = $this->model_account_customer_file_for_matching->getCsvDelimiter($file_id);
             $filerow = array();
             $filerow = @file(DIR_FILE_FOR_MATCHING . $file_name);
             $csv_header = explode($csv_delimiter, $filerow[0]);
             $csv_have_header = 1;
             // verificam daca fisierul csv are header
             foreach ($csv_header as $value) {
                 if (is_numeric(trim($value))) {
                     // se sterge din baza de date si de pe server
                     $this->model_account_customer_file_for_matching->deleteFile($file_id);
                     $this->session->data['warning'] = $this->language->get('text_csv_file_without_header');
                     return false;
                 }
             }
             // transformam in litere mari
             $header = array();
             foreach ($csv_header as $value) {
                 $header[] = strtoupper(trim($value));
             }
             // in cazul in care csv -ul are header, verificam daca headerul contine datele obligatorii specificate webservice-ului
             foreach ($this->mandatory_data as $value) {
                 if (!in_array($value, $header)) {
                     $csv_have_header = 0;
                     break;
                 }
             }
             if ($csv_have_header == 0) {
                 // se sterge din baza de date si de pe server
                 $this->model_account_customer_file_for_matching->deleteFile($file_id);
                 $this->session->data['warning'] = $this->language->get('text_csv_file_not_contain_mandatory_data');
                 return false;
             } else {
                 return true;
             }
         }
     }
 }
Exemplo n.º 9
0
 /**
  * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
  *
  * @param   string     $pFilename
  * @throws   PHPExcel_Reader_Exception
  */
 public function listWorksheetInfo($pFilename)
 {
     // Check if file exists
     if (!file_exists($pFilename)) {
         throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
     }
     $xml = new XMLReader();
     $xml->open('compress.zlib://' . realpath($pFilename));
     $xml->setParserProperty(2, true);
     $worksheetInfo = array();
     while ($xml->read()) {
         if ($xml->name == 'gnm:Sheet' && $xml->nodeType == XMLReader::ELEMENT) {
             $tmpInfo = array('worksheetName' => '', 'lastColumnLetter' => 'A', 'lastColumnIndex' => 0, 'totalRows' => 0, 'totalColumns' => 0);
             while ($xml->read()) {
                 if ($xml->name == 'gnm:Name' && $xml->nodeType == XMLReader::ELEMENT) {
                     $xml->read();
                     //	Move onto the value node
                     $tmpInfo['worksheetName'] = (string) $xml->value;
                 } elseif ($xml->name == 'gnm:MaxCol' && $xml->nodeType == XMLReader::ELEMENT) {
                     $xml->read();
                     //	Move onto the value node
                     $tmpInfo['lastColumnIndex'] = (int) $xml->value;
                     $tmpInfo['totalColumns'] = (int) $xml->value + 1;
                 } elseif ($xml->name == 'gnm:MaxRow' && $xml->nodeType == XMLReader::ELEMENT) {
                     $xml->read();
                     //	Move onto the value node
                     $tmpInfo['totalRows'] = (int) $xml->value + 1;
                     break;
                 }
             }
             $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']);
             $worksheetInfo[] = $tmpInfo;
         }
     }
     return $worksheetInfo;
 }
Exemplo n.º 10
0
 /**
  * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't
  * contain a doctype declaration which could contain a dos attack if we
  * parse it and expand internal entities (T85848).
  *
  * @param string $content xml string to check for parse safety
  * @return bool true if the xml is safe to parse, false otherwise
  */
 private function checkParseSafety($content)
 {
     $reader = new XMLReader();
     $result = null;
     // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed
     // instead of using XML().
     $reader->open('data://text/plain,' . urlencode($content), null, LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET);
     $oldDisable = libxml_disable_entity_loader(true);
     /** @noinspection PhpUnusedLocalVariableInspection */
     $reset = new ScopedCallback('libxml_disable_entity_loader', array($oldDisable));
     $reader->setParserProperty(XMLReader::SUBST_ENTITIES, false);
     // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning
     // when parsing truncated XML, which causes unit tests to fail.
     MediaWiki\suppressWarnings();
     while ($reader->read()) {
         if ($reader->nodeType === XMLReader::ELEMENT) {
             // Reached the first element without hitting a doctype declaration
             $this->parsable = self::PARSABLE_OK;
             $result = true;
             break;
         }
         if ($reader->nodeType === XMLReader::DOC_TYPE) {
             $this->parsable = self::PARSABLE_NO;
             $result = false;
             break;
         }
     }
     MediaWiki\restoreWarnings();
     if (!is_null($result)) {
         return $result;
     }
     // Reached the end of the parsable xml without finding an element
     // or doctype. Buffer and try again.
     $this->parsable = self::PARSABLE_BUFFERING;
     $this->xmlParsableBuffer = $content;
     return false;
 }
Exemplo n.º 11
0
 public function testScrape()
 {
     $parser = new \Seld\JsonLint\JsonParser();
     $googleScraper = Builder::create($this->engines[0], array(array('foo', 'baz'), 'google'));
     $outDir = $googleScraper->getOutDir();
     $this->assertFalse($googleScraper->scrape('bar'));
     $this->assertFalse($googleScraper->scrape('baz', 100));
     $this->assertFalse($googleScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($googleScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($googleScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertFalse($googleScraper->serialize('json'));
     $this->assertTrue($googleScraper->scrape('foo', 2, true, 'Europe/Berlin'));
     $this->assertCount(2, $googleScraper->getFetchedPages());
     $this->assertCount(1, $googleScraper->getKeywords());
     $this->assertTrue($googleScraper->scrape('baz', 2, true));
     $this->assertCount(4, $googleScraper->getFetchedPages());
     $this->assertCount(0, $googleScraper->getKeywords());
     $this->assertFalse($googleScraper->scrapeAll());
     $this->assertTrue($googleScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($googleScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $googleScraper->getFetchedPages());
     $this->assertCount(0, $googleScraper->getKeywords());
     $this->assertFalse($googleScraper->serialize('baz'));
     $this->assertTrue($googleScraper->serialize('json', true));
     $this->assertCount(0, $googleScraper->getFetchedPages());
     $this->assertCount(8, $googleScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages()));
     $this->assertTrue($googleScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
     $this->assertTrue($googleScraper->addKeywords(array('foo bad')));
     $this->assertTrue($googleScraper->scrapeAll(3, true));
     $this->assertCount(3, $googleScraper->getFetchedPages());
     $this->assertTrue($googleScraper->serialize('xml', true));
     $this->assertCount(0, $googleScraper->getFetchedPages());
     $this->assertCount(3, $googleScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages()));
     $this->assertTrue($googleScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $askScraper = Builder::create($this->engines[1], array(array('foo', 'baz'), 'ask'));
     $outDir = $askScraper->getOutDir();
     $this->assertFalse($askScraper->scrape('bar'));
     $this->assertFalse($askScraper->scrape('baz', 100));
     $this->assertFalse($askScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($askScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($askScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertTrue($askScraper->scrape('foo', 2, true, 'Europe/Rome'));
     $this->assertCount(2, $askScraper->getFetchedPages());
     $this->assertCount(1, $askScraper->getKeywords());
     $this->assertTrue($askScraper->scrape('baz', 2, true));
     $this->assertCount(4, $askScraper->getFetchedPages());
     $this->assertCount(0, $askScraper->getKeywords());
     $this->assertFalse($askScraper->scrapeAll());
     $this->assertTrue($askScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($askScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $askScraper->getFetchedPages());
     $this->assertCount(0, $askScraper->getKeywords());
     $this->assertFalse($askScraper->serialize('baz'));
     $this->assertTrue($askScraper->serialize('xml', true));
     $this->assertCount(0, $askScraper->getFetchedPages());
     $this->assertCount(8, $askScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages()));
     $this->assertTrue($askScraper->save(true));
     $this->assertCount(0, $askScraper->getSerializedPages());
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $this->assertTrue($askScraper->addKeywords(array('foobaz')));
     $this->assertTrue($askScraper->scrapeAll(3, true));
     $this->assertTrue($askScraper->serialize('json', true));
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages()));
     $this->assertTrue($askScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
     $bingScraper = Builder::create($this->engines[2], array(array('foo', 'baz'), 'bing'));
     $outDir = $bingScraper->getOutDir();
     $this->assertFalse($bingScraper->scrape('bar'));
     $this->assertFalse($bingScraper->scrape('baz', 100));
     $this->assertFalse($bingScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($bingScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($bingScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertFalse($bingScraper->serialize('json'));
     $this->assertTrue($bingScraper->scrape('foo', 2, true, 'Europe/Berlin'));
     $this->assertCount(2, $bingScraper->getFetchedPages());
     $this->assertCount(1, $bingScraper->getKeywords());
     $this->assertTrue($bingScraper->scrape('baz', 2, true));
     $this->assertCount(4, $bingScraper->getFetchedPages());
     $this->assertCount(0, $bingScraper->getKeywords());
     $this->assertFalse($bingScraper->scrapeAll());
     $this->assertTrue($bingScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($bingScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $bingScraper->getFetchedPages());
     $this->assertCount(0, $bingScraper->getKeywords());
     $this->assertFalse($bingScraper->serialize('baz'));
     $this->assertTrue($bingScraper->serialize('json', true));
     $this->assertCount(0, $bingScraper->getFetchedPages());
     $this->assertCount(8, $bingScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages()));
     $this->assertTrue($bingScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
     $this->assertTrue($bingScraper->addKeywords(array('foo bad')));
     $this->assertTrue($bingScraper->scrapeAll(2, true));
     $this->assertCount(2, $bingScraper->getFetchedPages());
     $this->assertTrue($bingScraper->serialize('xml', true));
     $this->assertCount(0, $bingScraper->getFetchedPages());
     $this->assertCount(2, $bingScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages()));
     $this->assertTrue($bingScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $yahooScraper = Builder::create($this->engines[3], array(array('foo', 'baz'), 'yahoo'));
     $outDir = $yahooScraper->getOutDir();
     $this->assertFalse($yahooScraper->scrape('bar'));
     $this->assertFalse($yahooScraper->scrape('baz', 100));
     $this->assertFalse($yahooScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertTrue($yahooScraper->scrape('foo', 2, true, 'Europe/Rome'));
     $this->assertCount(2, $yahooScraper->getFetchedPages());
     $this->assertCount(1, $yahooScraper->getKeywords());
     $this->assertTrue($yahooScraper->scrape('baz', 2, true));
     $this->assertCount(4, $yahooScraper->getFetchedPages());
     $this->assertCount(0, $yahooScraper->getKeywords());
     $this->assertFalse($yahooScraper->scrapeAll());
     $this->assertTrue($yahooScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($yahooScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $yahooScraper->getFetchedPages());
     $this->assertCount(0, $yahooScraper->getKeywords());
     $this->assertFalse($yahooScraper->serialize('baz'));
     $this->assertTrue($yahooScraper->serialize('xml', true));
     $this->assertCount(0, $yahooScraper->getFetchedPages());
     $this->assertCount(8, $yahooScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages()));
     $this->assertTrue($yahooScraper->save(true));
     $this->assertCount(0, $yahooScraper->getSerializedPages());
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $this->assertTrue($yahooScraper->addKeywords(array('foobaz')));
     $this->assertTrue($yahooScraper->scrapeAll(3, true));
     $this->assertTrue($yahooScraper->serialize('json', true));
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages()));
     $this->assertTrue($yahooScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
 }
Exemplo n.º 12
0
 /**
  * __construct
  * 
  * Builds the Chunk object
  *
  * @param string $file The filename to work with
  * @param array $options The options with which to parse the file
  * @author Dom Hastings
  * @access public
  */
 public function __construct($file, $options = array())
 {
     // merge the options together
     $this->options = array_merge($this->options, is_array($options) ? $options : array());
     $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size');
     // set the filename
     $this->file = $file;
     $is_html = false;
     $f = @fopen($file, "rb");
     while (!@feof($f)) {
         $chunk = @fread($f, 1024);
         if (strpos($chunk, "<!DOCTYPE") === 0) {
             $is_html = true;
         }
         break;
     }
     @fclose($f);
     if ($is_html) {
         return;
     }
     if (empty($this->options['element']) or $this->options['get_cloud']) {
         //$founded_tags = array();
         if (function_exists('stream_filter_register') and $this->options['filter']) {
             stream_filter_register('preprocessxml', 'preprocessXml_filter');
             $path = 'php://filter/read=preprocessxml/resource=' . $this->file;
         } else {
             $path = $this->file;
         }
         $reader = new XMLReader();
         $reader->open($path);
         $reader->setParserProperty(XMLReader::VALIDATE, false);
         while (@$reader->read()) {
             switch ($reader->nodeType) {
                 case XMLREADER::ELEMENT:
                     if (array_key_exists(str_replace(":", "_", $reader->localName), $this->cloud)) {
                         $this->cloud[str_replace(":", "_", $reader->localName)]++;
                     } else {
                         $this->cloud[str_replace(":", "_", $reader->localName)] = 1;
                     }
                     //array_push($founded_tags, str_replace(":", "_", $reader->localName));
                     break;
                 default:
                     break;
             }
         }
         unset($reader);
         /*if (!empty($founded_tags)) {            
             $element_counts = array_count_values($founded_tags);                          
             if (!empty($element_counts)){
               foreach ($element_counts as $tag => $count)
                 if (strpos($tag, ":") === false)
                     $this->cloud[$tag] = $count;                
               
               arsort($element_counts);           
             }              
           } */
         if (!empty($this->cloud) and empty($this->options['element'])) {
             arsort($this->cloud);
             $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book');
             foreach ($this->cloud as $element_name => $value) {
                 if (in_array(strtolower($element_name), $main_elements)) {
                     $this->options['element'] = $element_name;
                     break;
                 }
             }
             if (empty($this->options['element'])) {
                 foreach ($this->cloud as $el => $count) {
                     $this->options['element'] = $el;
                     break;
                 }
             }
         }
     }
     if (function_exists('stream_filter_register') and $this->options['filter']) {
         stream_filter_register('preprocessxml', 'preprocessXml_filter');
         $path = 'php://filter/read=preprocessxml/resource=' . $this->file;
     } else {
         $path = $this->file;
     }
     $this->reader = new XMLReader();
     @$this->reader->open($path);
     @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
 }
Exemplo n.º 13
0
function fn_exim_1c_get_external_file($filename)
{
    list($dir_1c, $dir_1c_url, $dir_1c_images) = fn_rus_exim_1c_get_dir_1c();
    if (!is_dir($dir_1c)) {
        fn_mkdir($dir_1c);
    }
    $file_path = $dir_1c . $filename;
    if (Registry::get('addons.rus_exim_1c.exim_1c_schema_version') == '2.07') {
        if (file_exists($file_path) && extension_loaded('XMLReader')) {
            $xml = new XMLReader();
            $xml->open($file_path);
            $xml->setParserProperty(XMLReader::VALIDATE, true);
            if (!$xml->isValid()) {
                @unlink($file_path);
            }
        }
    }
    if (fn_exim_1c_file_is_image($filename)) {
        if (!is_dir($dir_1c_images)) {
            fn_mkdir($dir_1c_images);
        }
        $file_path = $dir_1c_images . $filename;
    }
    $file = @fopen($file_path, 'w');
    if (!$file) {
        return false;
    }
    fwrite($file, fn_get_contents('php://input'));
    fclose($file);
    return true;
}
Exemplo n.º 14
0
 /**
  * __construct
  * 
  * Builds the Chunk object
  *
  * @param string $file The filename to work with
  * @param array $options The options with which to parse the file
  * @author Dom Hastings
  * @access public
  */
 public function __construct($file, $options = array(), $debug = false)
 {
     // merge the options together
     $this->options = array_merge($this->options, is_array($options) ? $options : array());
     $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size');
     // set the filename
     $this->file = $file;
     $is_html = false;
     $f = @fopen($file, "rb");
     while (!@feof($f)) {
         $chunk = @fread($f, 1024);
         if (strpos($chunk, "<!DOCTYPE") === 0) {
             $is_html = true;
         }
         break;
     }
     @fclose($f);
     if ($is_html) {
         $path = $this->get_file_path();
         $this->is_404 = true;
         $this->reader = new XMLReader();
         @$this->reader->open($path);
         @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
         return;
     }
     if (empty($this->options['element']) or $this->options['get_cloud']) {
         $path = $this->get_file_path();
         $reader = new XMLReader();
         $reader->open($path);
         $reader->setParserProperty(XMLReader::VALIDATE, false);
         while (@$reader->read()) {
             switch ($reader->nodeType) {
                 case XMLREADER::ELEMENT:
                     $localName = str_replace("_colon_", ":", $reader->localName);
                     if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) {
                         $this->cloud[str_replace(":", "_", $localName)]++;
                     } else {
                         $this->cloud[str_replace(":", "_", $localName)] = 1;
                     }
                     break;
                 default:
                     break;
             }
         }
         unset($reader);
         if (!empty($this->cloud) and empty($this->options['element'])) {
             arsort($this->cloud);
             $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0');
             foreach ($this->cloud as $element_name => $value) {
                 if (in_array(strtolower($element_name), $main_elements)) {
                     $this->options['element'] = $element_name;
                     break;
                 }
             }
             if (empty($this->options['element'])) {
                 foreach ($this->cloud as $el => $count) {
                     $this->options['element'] = $el;
                     break;
                 }
             }
         }
     }
     $path = $this->get_file_path();
     $this->reader = new XMLReader();
     @$this->reader->open($path);
     @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
 }
Exemplo n.º 15
0
<?php

$indent = 5;
/* Number of spaces to indent per level */
$xml = new XMLReader();
$xml->open("dtdexample.xml");
$xml->setParserProperty(XMLREADER::LOADDTD, TRUE);
$xml->setParserProperty(XMLREADER::VALIDATE, TRUE);
while ($xml->read()) {
    /* Print node name indenting it based on depth and $indent var */
    print str_repeat(" ", $xml->depth * $indent) . $xml->name . "\n";
    if ($xml->hasAttributes) {
        $attCount = $xml->attributeCount;
        print str_repeat(" ", $xml->depth * $indent) . " Number of Attributes: " . $xml->attributeCount . "\n";
    }
}
print "\n\nValid:\n";
var_dump($xml->isValid());
Exemplo n.º 16
0
$reader->XML($xmlstring);
$reader->setParserProperty(XMLReader::DEFAULTATTRS, true);
while ($reader->read() && $reader->nodeType != XMLReader::ELEMENT) {
}
var_dump($reader->getAttribute('bar'));
var_dump($reader->getAttribute('baz'));
$reader->close();
echo "\nUsing URI:\n";
$reader = new XMLReader();
$file = dirname(__FILE__) . '/012.xml';
if (DIRECTORY_SEPARATOR == '\\') {
    $file = str_replace('\\', "/", $file);
}
$reader->open($file);
//$reader->setParserProperty(XMLReader::DEFAULTATTRS, true);
while ($reader->read() && $reader->nodeType != XMLReader::ELEMENT) {
}
var_dump($reader->getAttribute('bar'));
var_dump($reader->getAttribute('baz'));
$reader->close();
$reader = new XMLReader();
$reader->open(dirname(__FILE__) . '/012.xml');
$reader->setParserProperty(XMLReader::DEFAULTATTRS, true);
while ($reader->read() && $reader->nodeType != XMLReader::ELEMENT) {
}
var_dump($reader->getAttribute('bar'));
var_dump($reader->getAttribute('baz'));
$reader->close();
?>
===DONE===
Exemplo n.º 17
0
 protected function createReader($content)
 {
     $reader = new \XMLReader();
     $content = ltrim($content);
     if ($this->isXmlDocument($content)) {
         $reader->XML($content, null, LIBXML_NOBLANKS | LIBXML_DTDLOAD);
     } else {
         $success = @$reader->open($content, null, LIBXML_NOBLANKS | LIBXML_DTDLOAD);
         if (!$success) {
             throw new ParseException(sprintf('File "%s" doesn\'t exist or is unreadable', $content));
         }
     }
     $reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true);
     return $reader;
 }
Exemplo n.º 18
0
 /**
  * __construct
  * 
  * Builds the Chunk object
  *
  * @param string $file The filename to work with
  * @param array $options The options with which to parse the file
  * @author Dom Hastings
  * @access public
  */
 public function __construct($file, $options = array(), $parser_type = false)
 {
     // merge the options together
     $this->options = array_merge($this->options, is_array($options) ? $options : array());
     $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size');
     // set the filename
     $this->file = $file;
     $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type;
     $is_html = false;
     $f = @fopen($file, "rb");
     while (!@feof($f)) {
         $chunk = @fread($f, 1024);
         if (strpos($chunk, "<!DOCTYPE") === 0) {
             $is_html = true;
         }
         break;
     }
     @fclose($f);
     if ($is_html) {
         $path = $this->get_file_path();
         $this->is_404 = true;
         $this->reader = new XMLReader();
         @$this->reader->open($path);
         @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
         return;
     }
     if (PMXI_Plugin::getInstance()->getOption('force_stream_reader')) {
         $this->parser_type = 'xmlstreamer';
     } else {
         $input = new PMXI_Input();
         $import_id = $input->get('id', 0);
         if (empty($import_id)) {
             $import_id = $input->get('import_id', 0);
         }
         if (!empty($import_id)) {
             $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type;
             $import = new PMXI_Import_Record();
             $import->getById($import_id);
             if (!$import->isEmpty()) {
                 $this->parser_type = empty($import->options['xml_reader_engine']) ? 'xmlreader' : 'xmlstreamer';
             }
         } else {
             $this->parser_type = empty($parser_type) ? get_option('wpai_parser_type', 'xmlreader') : $parser_type;
         }
     }
     if (empty($this->options['element']) or $this->options['get_cloud']) {
         $path = $this->get_file_path();
         if ($this->parser_type == 'xmlreader') {
             $reader = new XMLReader();
             $reader->open($path);
             $reader->setParserProperty(XMLReader::VALIDATE, false);
             while (@$reader->read()) {
                 switch ($reader->nodeType) {
                     case XMLREADER::ELEMENT:
                         $localName = str_replace("_colon_", ":", $reader->localName);
                         if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) {
                             $this->cloud[str_replace(":", "_", $localName)]++;
                         } else {
                             $this->cloud[str_replace(":", "_", $localName)] = 1;
                         }
                         break;
                     default:
                         break;
                 }
             }
             unset($reader);
         } else {
             $CHUNK_SIZE = 1024;
             $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE);
             $parseroptions = array("extractContainer" => false);
             // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting.
             $parser = new Parser\StringWalker($parseroptions);
             // Create the streamer
             $streamer = new XmlStringStreamer($parser, $streamProvider);
             while ($node = $streamer->getNode()) {
                 // $simpleXmlNode = simplexml_load_string($node);
                 // echo (string)$simpleXmlNode->firstName;
             }
             $this->cloud = $parser->cloud;
         }
         if (!empty($this->cloud) and empty($this->options['element'])) {
             arsort($this->cloud);
             $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0');
             foreach ($this->cloud as $element_name => $value) {
                 if (in_array(strtolower($element_name), $main_elements)) {
                     $this->options['element'] = $element_name;
                     break;
                 }
             }
             if (empty($this->options['element'])) {
                 foreach ($this->cloud as $el => $count) {
                     $this->options['element'] = $el;
                     break;
                 }
             }
         }
     }
     $path = $this->get_file_path();
     if ($this->parser_type == 'xmlreader') {
         $this->reader = new XMLReader();
         @$this->reader->open($path);
         @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
     } else {
         $parseroptions = array("uniqueNode" => $this->options['element']);
         $CHUNK_SIZE = 1024;
         $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE);
         $parser = new Parser\UniqueNode($parseroptions);
         $this->reader = new XmlStringStreamer($parser, $streamProvider);
     }
 }
Exemplo n.º 19
0
 /**
  * Performs XML transformation of the string given as argument
  *
  * @param string                $xml      Well-formed XML string to transform
  * @param string|array|\Closure $callback Name of either a callback function or
  *        an array with indexes 0: class and 1: method that returns transformation
  *        info for this tag. (As the function is called for each opening or
  *        closing tag, it has to be efficient!) Function / method must accept 3
  *        arguments:
  *          1. Tag name
  *          2. Attributes as associative array (also provided for closing tags)
  *          3. One of the XMLTransformer::EL* constants to indicate the node type
  *        The function must either false (in which case the tag itself and anything
  *        inside it is completely ignored) or an array with 0 or more of these keys:
  *          - "tag" can be a new tag name that will be used instead of the
  *             original one. If false, the tag will be removed, but its child
  *             nodes will be preserved.
  *          - "@<name>" (where <name> is an attribute name) may be false (will
  *             return the attribute) or a string, either starting with "@" (will
  *             rename the attribute) or not starting with "@" (literal attr. value)
  *          - "insbefore" inserts PCDATA before the opening tag
  *          - "insstart" inserts PCDATA after the opening tag (i.e.: as a
  *            new first child)
  *          - "insend" inserts PCDATA directly before the closing tag
  *          - "insafter" inserts PCDATA after the closing tag
  *          - "transformOuter" This can be a closure that is passed the
  *            transformed element including all contained elements as a string.
  *          - "transformInner" This can be a closure that is passed the transformed
  *            element's content as a string.
  *          Anything for which neither false or an appropriate array
  *          value is returned, is left unmodified.
  * @param bool                  $keepCData If false (default: true), CDATA content
  *                                         is not retained as CDATA, but as PCDATA
  *                                         with < and > and & escaped
  *
  * @return string XML string
  * @throws \InvalidArgumentException
  * @throws \RuntimeException
  */
 public static function transformString($xml, $callback, $keepCData = true)
 {
     $xmltr = new static();
     if (!self::checkCallback($callback)) {
         throw new \InvalidArgumentException('Callback must be function, method or closure');
     }
     $xmltr->callback = $callback;
     $xmltr->keepCData = (bool) $keepCData;
     $r = new \XMLReader();
     $r->XML($xml);
     $r->setParserProperty(\XMLReader::SUBST_ENTITIES, true);
     while ($r->read()) {
         switch ($r->nodeType) {
             case \XMLReader::ELEMENT:
                 $xmltr->nodeOpen($r);
                 break;
             case \XMLReader::END_ELEMENT:
                 $xmltr->nodeClose($r);
                 break;
             case \XMLReader::SIGNIFICANT_WHITESPACE:
             case \XMLReader::WHITESPACE:
                 $xmltr->nodeContent($r->value);
                 break;
             case \XMLReader::CDATA:
                 $xmltr->cDataNodeContent($r->value);
                 break;
             case \XMLReader::TEXT:
                 $xmltr->nodeContent(htmlspecialchars($r->value));
         }
     }
     $r->close();
     return $xmltr->content;
 }
Exemplo n.º 20
0
 /**
  * Parses the input code and returns the OPT XML tree.
  *
  * @param String $filename The file name (for debug purposes)
  * @param String &$code The code to parse
  * @return Opt_Xml_Root
  */
 public function parse($filename, &$code)
 {
     $debug = array(XMLReader::NONE => 'NONE', XMLReader::ELEMENT => 'ELEMENT', XMLReader::ATTRIBUTE => 'ATTRIBUTE', XMLReader::TEXT => 'TEXT', XMLReader::CDATA => 'CDATA', XMLReader::ENTITY_REF => 'ENTITY_REF', XMLReader::ENTITY => 'ENTITY', XMLReader::PI => 'PI', XMLReader::COMMENT => 'COMMENT', XMLReader::DOC => 'DOC', XMLReader::DOC_TYPE => 'DOC_TYPE', XMLReader::DOC_FRAGMENT => 'DOC_FRAGMENT', XMLReader::NOTATION => 'NOTATION', XMLReader::WHITESPACE => 'WHITESPACE', XMLReader::SIGNIFICANT_WHITESPACE => 'SIGNIFICANT_WHITESPACE', XMLReader::END_ELEMENT => 'END_ELEMENT', XMLReader::END_ENTITY => 'END_ENTITY', XMLReader::XML_DECLARATION => 'XML_DECLARATION');
     libxml_use_internal_errors(true);
     $reader = new XMLReader();
     $reader->xml($code);
     //	$reader->setParserProperty(XMLReader::LOADDTD, true);
     //	$reader->setParserProperty(XMLReader::VALIDATE, true);
     $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true);
     $root = $current = new Opt_Xml_Root();
     $firstElementMatched = false;
     $depth = 0;
     // Thanks, Oh Great PHP for your excellent WARNINGS!!! >:(
     while (@$reader->read()) {
         if ($reader->depth < $depth) {
             $current = $current->getParent();
         } elseif ($reader->depth > $depth) {
             $current = $optNode;
         }
         //	Opl_Debug::write($debug[$reader->nodeType].': '.$reader->name.', '.$reader->value);
         switch ($reader->nodeType) {
             // XML elements
             case XMLReader::ELEMENT:
                 $optNode = new Opt_Xml_Element($reader->name);
                 // Parse element attributes, if you manage to get there
                 if ($reader->moveToFirstAttribute()) {
                     do {
                         // "xmlns" special namespace must be handler somehow differently.
                         if ($reader->prefix == 'xmlns') {
                             $ns = str_replace('xmlns:', '', $reader->name);
                             $root->addNamespace($ns, $reader->value);
                             // Let this attribute to appear, if it does not represent an OPT special
                             // namespace
                             if (!$this->_compiler->isNamespace($ns)) {
                                 $optAttribute = new Opt_Xml_Attribute($reader->name, $reader->value);
                                 $optNode->addAttribute($optAttribute);
                             }
                         } else {
                             $optAttribute = new Opt_Xml_Attribute($reader->name, $reader->value);
                             $optNode->addAttribute($optAttribute);
                         }
                     } while ($reader->moveToNextAttribute());
                     $reader->moveToElement();
                 }
                 // Set "rootNode" flag
                 if (!$firstElementMatched) {
                     $optNode->set('rootNode', true);
                     $firstElementMatched = true;
                 }
                 // Set "single" flag
                 if ($reader->isEmptyElement) {
                     $optNode->set('single', true);
                 }
                 $current->appendChild($optNode);
                 break;
             case XMLReader::TEXT:
                 $this->_treeTextCompile($current, $reader->value);
                 break;
             case XMLReader::COMMENT:
                 $optNode = new Opt_Xml_Comment($reader->value);
                 $current->appendChild($optNode);
                 break;
             case XMLReader::CDATA:
                 $cdata = new Opt_Xml_Cdata($reader->value);
                 $cdata->set('cdata', true);
                 if ($current instanceof Opt_Xml_Text) {
                     $current->appendChild($cdata);
                 } else {
                     $text = new Opt_Xml_Text();
                     $text->appendChild($cdata);
                     $current->appendChild($text);
                     $current = $text;
                 }
                 break;
                 /*		case XMLReader::SIGNIFICANT_WHITESPACE:
                 					$cdata = new Opt_Xml_Cdata($reader->value);
                 					$cdata->set('cdata', true);
                 
                 					if($current instanceof Opt_Xml_Text)
                 					{
                 						$current->appendChild($cdata);
                 					}
                 					else
                 					{
                 						$text = new Opt_Xml_Text();
                 						$text->appendChild($cdata);
                 						$current->appendChild($text);
                 						$current = $text;
                 					}
                 					break;
                 		 */
         }
         $depth = $reader->depth;
     }
     // Error checking
     $errors = libxml_get_errors();
     if (sizeof($errors) > 0) {
         libxml_clear_errors();
         foreach ($errors as $error) {
             echo $error->message . ' (' . $error->line . ')<br/>';
         }
     }
     return $root;
 }
Exemplo n.º 21
0
 /**
  * Process a node from the XML Map
  * It is permitted for the XML to just define one or more tables without fields (when the 'use headers' option is used)
  *
  * Note: Calls itself recursively to process a tree
  *
  * @return bool returns true if all fine else false
  */
 private function _getXmlNode($node_content, $current_record, $xml_path)
 {
     $jinput = JFactory::getApplication()->input;
     $csvilog = $jinput->get('csvilog', null, null);
     $current_node = '';
     $xml_schema = new XMLReader();
     /**
      * Add a wrapper to make the XML viable and ensure that self closing tags contain a space before the '/>'
      * The XML may still be invalid but that's down to what the user entered
      */
     $node_content = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<da_root>" . $node_content . '</da_root>';
     $xml_schema->XML($node_content);
     // XML file to table map is valid XML - construct the arrays used in file extraction
     $use_read = true;
     // The XML could only be validated against a DTD if the syntax of the XML used for the map is made more complex
     $validate_xml = false;
     if ($validate_xml == true) {
         // Note: When the DTD is external, the property value must be set before the first read()
         $xml_schema->setParserProperty(XMLReader::VALIDATE, true);
     }
     while ($use_read ? $xml_schema->read() : $xml_schema->next()) {
         // Validation checking disabled because a DTD (or RELAX NG) schema is required.
         if ($validate_xml == true) {
             if ($xml_schema->isValid() == false) {
                 $xml_schema->close();
                 return false;
             }
         }
         // Default to a reading a single node in the next loop
         $use_read = true;
         // Ignore any node associated with the root
         if ($xml_schema->name == 'da_root') {
             continue;
         }
         // Process start elements
         if ($xml_schema->nodeType == XMLReader::ELEMENT) {
             $self_closing = $xml_schema->isEmptyElement;
             // Ready to add a new node - but only if the last node was closed
             if (!empty($current_node)) {
                 $csvilog->AddStats('incorrect', JText::sprintf('COM_CSVI_XML_NODE_UNCLOSED', $current_node));
                 return false;
             }
             // A new node was found - Check whether this is a new record type
             if (empty($current_record)) {
                 // New record type
                 // Check for a self-closing node
                 $self_closing = $xml_schema->isEmptyElement;
                 $current_record = strtolower($xml_schema->name);
                 $this->_xml_records[] = strtolower($current_record);
                 // Store any attributes
                 while ($xml_schema->moveToNextAttribute()) {
                     // Note1: $xml_schema->hasValue only indicates whether the element can have a value, not whether it does
                     // Note2: empty($xml_schema->value) always return true, regardless of the actual value
                     $value = $xml_schema->value;
                     if (!empty($value)) {
                         if ($this->_isXmlFieldNameValid($xml_schema->value)) {
                             $this->_xml_schema[$current_record]['attrs'][strtolower($xml_schema->name)] = trim($xml_schema->value);
                         } else {
                             $csvilog->AddStats('incorrect', JText::sprintf('COM_CSVI_XML_FILE_MAP_NO_REFERENCE', $xml_schema->value));
                             $xml_schema->close();
                             return false;
                         }
                     }
                 }
                 // Check for a self-closing node
                 if ($self_closing == true) {
                     $current_record = '';
                 }
             } else {
                 // New field type
                 $current_node = strtolower($xml_schema->name);
                 $current_path = $this->_getXmlNodePath($xml_path, $current_node);
                 // Store any attributes
                 while ($xml_schema->moveToNextAttribute()) {
                     // Note1: $xml_schema->hasValue only indicates whether the element can have a value, not whether it does
                     // Note2: empty($xml_schema->value) always return true, regardless of the actual value
                     $value = $xml_schema->value;
                     if (!empty($value)) {
                         if ($this->_isXmlFieldNameValid($xml_schema->value)) {
                             $this->_xml_schema[$current_record]['nodes'][$current_path]['attrs'][strtolower($xml_schema->name)] = trim($xml_schema->value);
                         } else {
                             $csvilog->AddStats('incorrect', JText::_('COM_CSVI_XML_FILE_MAP_NO_REFERENCE', $xml_schema->value));
                             $xml_schema->close();
                             return false;
                         }
                     }
                 }
                 $sub_node_content = $xml_schema->readInnerXML();
                 // Check whether there are any lower level nodes
                 if (strstr($sub_node_content, '<') === false) {
                     /**
                      * Content has no embedded nodes - Assume a field name
                      * Note: An empty node gives a blank field name which indicates an unwanted node
                      * that is being mapped to prevent errors when processing the file
                      */
                     if ($this->_isXmlFieldNameValid($sub_node_content)) {
                         $this->_xml_schema[$current_record]['nodes'][$current_path]['field'] = trim($sub_node_content);
                     } else {
                         $this->_xml_schema[$current_record]['nodes'][$current_path]['field'] = '';
                     }
                 } else {
                     // There are embedded nodes - go down another level
                     // Indicate a 'group' node by storing an empty field name
                     $this->_xml_schema[$current_record]['nodes'][$current_path]['field'] = '';
                     // Push the node name to the path stack
                     $this->_pushXmlNodePath($xml_path, $current_node);
                     if ($this->_getXmlNode($sub_node_content, $current_record, $xml_path) == false) {
                         $xml_schema->close();
                         return false;
                     }
                     // At the next read, skip to the next node at this level
                     $use_read = false;
                     // Close the node
                     $current_node = '';
                     // Pop the last item off the path stack
                     $this->_popXmlNodePath($xml_path);
                 }
                 // Check for a self-closing node
                 if ($self_closing == true) {
                     $current_node = '';
                 }
             }
         } else {
             if ($xml_schema->nodeType == XMLReader::END_ELEMENT) {
                 // End of node found
                 // Check for end of record
                 if (!empty($current_record) && strtolower($xml_schema->name) == $current_record) {
                     // End of record detected
                     $current_record = '';
                 } else {
                     if (!empty($current_node) && strtolower($xml_schema->name) == $current_node) {
                         // End of current node detected
                         $current_node = '';
                     }
                 }
             }
         }
     }
     $xml_schema->close();
     // Node not terminated
     if (!empty($current_node)) {
         $csvilog->AddStats('incorrect', JText::sprintf('COM_CSVI_XML_NODE_NOT_CLOSED', $current_node));
         return false;
     }
     if (empty($this->_xml_records)) {
         $csvilog->AddStats('incorrect', JText::_('COM_CSVI_XML_NO_RECORDS_DEFINED'));
         return false;
     }
     return true;
 }
Exemplo n.º 22
0
 /**
  * {@inheritdoc}
  */
 public function parse($xmlString)
 {
     if ($this->validateResponse) {
         XmlChecker::isValid($xmlString);
     }
     $useErrors = libxml_use_internal_errors(true);
     $xml = new \XMLReader();
     $xml->xml($xmlString, 'UTF-8', LIBXML_COMPACT | LIBXML_NOCDATA | LIBXML_NOBLANKS | LIBXML_PARSEHUGE);
     $xml->setParserProperty(\XMLReader::VALIDATE, false);
     $xml->setParserProperty(\XMLReader::LOADDTD, false);
     // This following assignments are auto-generated using Fxmlrpc\Serialization\CodeGenerator\XmlReaderParserBitmaskGenerator
     // Don’t edit manually
     static $flagmethodResponse = 0b1;
     static $flagparams = 0b10;
     static $flagfault = 0b100;
     static $flagparam = 0b1000;
     static $flagvalue = 0b10000;
     static $flagarray = 0b100000;
     static $flagmember = 0b1000000;
     static $flagname = 0b10000000;
     ${'flag#text'} = 0b100000000;
     static $flagstring = 0b1000000000;
     static $flagstruct = 0b10000000000;
     static $flagint = 0b100000000000;
     static $flagbiginteger = 0b1000000000000;
     static $flagi8 = 0b10000000000000;
     static $flagi4 = 0b100000000000000;
     static $flagi2 = 0b1000000000000000;
     static $flagi1 = 0b10000000000000000;
     static $flagboolean = 0b100000000000000000;
     static $flagdouble = 0b1000000000000000000;
     static $flagfloat = 0b10000000000000000000;
     static $flagbigdecimal = 0b100000000000000000000;
     ${'flagdateTime.iso8601'} = 0b1000000000000000000000;
     static $flagdateTime = 0b10000000000000000000000;
     static $flagbase64 = 0b100000000000000000000000;
     static $flagnil = 0b1000000000000000000000000;
     static $flagdom = 0b10000000000000000000000000;
     static $flagdata = 0b100000000000000000000000000;
     // End of auto-generated code
     $aggregates = [];
     $depth = 0;
     $nextExpectedElements = 0b1;
     $i = 0;
     $isFault = false;
     while ($xml->read()) {
         ++$i;
         $nodeType = $xml->nodeType;
         if ($nodeType === \XMLReader::COMMENT || $nodeType === \XMLReader::DOC_TYPE || $nodeType === \XMLReader::SIGNIFICANT_WHITESPACE && ($nextExpectedElements & 0b100000000) !== 0b100000000) {
             continue;
         }
         if ($nodeType === \XMLReader::ENTITY_REF) {
             return '';
         }
         $tagName = $xml->localName;
         if ($nextExpectedElements !== null && ($flag = isset(${'flag' . $tagName}) ? ${'flag' . $tagName} : -1) && ($nextExpectedElements & $flag) !== $flag) {
             throw new UnexpectedTagException($tagName, $nextExpectedElements, get_defined_vars(), $xml->depth, $xml->readOuterXml());
         }
         processing:
         switch ($nodeType) {
             case \XMLReader::ELEMENT:
                 switch ($tagName) {
                     case 'methodResponse':
                         // Next: params, fault
                         $nextExpectedElements = 0b110;
                         break;
                     case 'params':
                         // Next: param
                         $nextExpectedElements = 0b1000;
                         $aggregates[$depth] = [];
                         break;
                     case 'fault':
                         $isFault = true;
                         // Break intentionally omitted
                     // Break intentionally omitted
                     case 'param':
                         // Next: value
                         $nextExpectedElements = 0b10000;
                         break;
                     case 'array':
                         $aggregates[++$depth] = [];
                         // Break intentionally omitted
                     // Break intentionally omitted
                     case 'data':
                         // Next: array, data, value
                         $nextExpectedElements = 0b100000000000000000000110000;
                         break;
                     case 'struct':
                         // Next: struct, member, value
                         $nextExpectedElements = 0b10001010000;
                         $aggregates[++$depth] = [];
                         break;
                     case 'member':
                         // Next: name, value
                         $nextExpectedElements = 0b10010000;
                         $aggregates[++$depth] = [];
                         break;
                     case 'name':
                         // Next: #text
                         $nextExpectedElements = 0b100000000;
                         $type = 'name';
                         break;
                     case 'value':
                         $nextExpectedElements = 0b11111111111111111100110000;
                         $type = 'value';
                         $aggregates[$depth + 1] = '';
                         break;
                     case 'base64':
                     case 'string':
                     case 'biginteger':
                     case 'i8':
                     case 'dateTime.iso8601':
                     case 'dateTime':
                         // Next: value, $tagName, #text
                         $nextExpectedElements = 0b100010000 | ${'flag' . $tagName};
                         $type = $tagName;
                         $aggregates[$depth + 1] = '';
                         break;
                     case 'nil':
                         // Next: value, $tagName
                         $nextExpectedElements = 0b1000000000000000000010000 | ${'flag' . $tagName};
                         $type = $tagName;
                         $aggregates[$depth + 1] = null;
                         break;
                     case 'int':
                     case 'i4':
                     case 'i2':
                     case 'i1':
                         // Next: value, #text, $tagName
                         $nextExpectedElements = 0b100010000 | ${'flag' . $tagName};
                         $type = $tagName;
                         $aggregates[$depth + 1] = 0;
                         break;
                     case 'boolean':
                         // Next: value, #text, $tagName
                         $nextExpectedElements = 0b100010000 | ${'flag' . $tagName};
                         $type = 'boolean';
                         $aggregates[$depth + 1] = false;
                         break;
                     case 'double':
                     case 'float':
                     case 'bigdecimal':
                         // Next: value, #text, $tagName
                         $nextExpectedElements = 0b100010000 | ${'flag' . $tagName};
                         $type = $tagName;
                         $aggregates[$depth + 1] = 0.0;
                         break;
                     case 'dom':
                         $type = 'dom';
                         // Disable type checking
                         $nextExpectedElements = null;
                         $aggregates[$depth + 1] = $xml->readInnerXml();
                         break;
                 }
                 break;
             case \XMLReader::END_ELEMENT:
                 switch ($tagName) {
                     case 'params':
                     case 'fault':
                         break 3;
                     case 'param':
                         // Next: params, param
                         $nextExpectedElements = 0b1010;
                         break;
                     case 'value':
                         $nextExpectedElements = 0b100100000011100100011011100;
                         $aggregates[$depth][] = $aggregates[$depth + 1];
                         break;
                     case 'array':
                     case 'struct':
                         --$depth;
                         // Break intentionally omitted
                     // Break intentionally omitted
                     case 'string':
                     case 'int':
                     case 'biginteger':
                     case 'i8':
                     case 'i4':
                     case 'i2':
                     case 'i1':
                     case 'boolean':
                     case 'double':
                     case 'float':
                     case 'bigdecimal':
                     case 'dateTime.iso8601':
                     case 'dateTime':
                     case 'base64':
                     case 'nil':
                         // Next: value
                         $nextExpectedElements = 0b10000;
                         break;
                     case 'data':
                         // Next: array
                         $nextExpectedElements = 0b100000;
                         break;
                     case 'name':
                         // Next: value, member
                         $nextExpectedElements = 0b1010000;
                         $aggregates[$depth]['name'] = $aggregates[$depth + 1];
                         break;
                     case 'member':
                         // Next: struct, member
                         $nextExpectedElements = 0b10001000000;
                         $aggregates[$depth - 1][$aggregates[$depth]['name']] = $aggregates[$depth][0];
                         unset($aggregates[$depth], $aggregates[$depth + 1]);
                         --$depth;
                         break;
                 }
                 break;
             case \XMLReader::TEXT:
             case \XMLReader::SIGNIFICANT_WHITESPACE:
                 switch ($type) {
                     case 'int':
                     case 'i4':
                     case 'i2':
                     case 'i1':
                         $value = (int) $xml->value;
                         break;
                     case 'boolean':
                         $value = $xml->value === '1';
                         break;
                     case 'double':
                     case 'float':
                     case 'bigdecimal':
                         $value = (double) $xml->value;
                         break;
                     case 'dateTime.iso8601':
                         $value = \DateTime::createFromFormat('Ymd\\TH:i:s', $xml->value, isset($timezone) ? $timezone : ($timezone = new \DateTimeZone('UTC')));
                         break;
                     case 'dateTime':
                         $value = \DateTime::createFromFormat('Y-m-d\\TH:i:s.uP', $xml->value, isset($timezone) ? $timezone : ($timezone = new \DateTimeZone('UTC')));
                         break;
                     case 'base64':
                         $value = Base64Value::deserialize($xml->value);
                         break;
                     case 'dom':
                         $doc = new \DOMDocument('1.0', 'UTF-8');
                         $doc->loadXML($aggregates[$depth + 1]);
                         $value = $doc;
                         break;
                     default:
                         $value =& $xml->value;
                         break;
                 }
                 $aggregates[$depth + 1] = $value;
                 if ($nextExpectedElements === null) {
                     break;
                 }
                 // Next: any
                 $nextExpectedElements = 0b111111111111111111111111111;
                 break;
         }
         if ($xml->isEmptyElement && $nodeType !== \XMLReader::END_ELEMENT) {
             $nodeType = \XMLReader::END_ELEMENT;
             goto processing;
         }
     }
     libxml_use_internal_errors($useErrors);
     $result = $aggregates ? array_pop($aggregates[0]) : null;
     if ($isFault) {
         throw FaultException::createFromResponse($result);
     }
     return $result;
 }
Exemplo n.º 23
0
 protected function createReader($content)
 {
     $reader = new \XMLReader();
     $reader->XML($content, null, LIBXML_DTDLOAD);
     $reader->setParserProperty(\XMLReader::SUBST_ENTITIES, true);
     return $reader;
 }
Exemplo n.º 24
0
$errorXmlExport = null;
if ($is_allowedToEdit && !empty($choice) && $choice == 'exportqti2') {
    require_once api_get_path(SYS_CODE_PATH) . 'exercice/export/qti2/qti2_export.php';
    $export = export_exercise_to_qti($exerciseId, true);
    $archive_path = api_get_path(SYS_ARCHIVE_PATH);
    $temp_dir_short = api_get_unique_id();
    $temp_zip_dir = $archive_path . $temp_dir_short;
    if (!is_dir($temp_zip_dir)) {
        mkdir($temp_zip_dir, api_get_permissions_for_new_directories());
    }
    $temp_zip_file = $temp_zip_dir . "/" . api_get_unique_id() . ".zip";
    $temp_xml_file = $temp_zip_dir . "/qti2export_" . $exerciseId . '.xml';
    file_put_contents($temp_xml_file, $export);
    $xmlReader = new XMLReader();
    $xmlReader->open($temp_xml_file);
    $xmlReader->setParserProperty(XMLReader::VALIDATE, true);
    $isValid = $xmlReader->isValid();
    if ($isValid) {
        $zip_folder = new PclZip($temp_zip_file);
        $zip_folder->add($temp_xml_file, PCLZIP_OPT_REMOVE_ALL_PATH);
        $name = 'qti2_export_' . $exerciseId . '.zip';
        DocumentManager::file_send_for_download($temp_zip_file, true, $name);
        unlink($temp_zip_file);
        unlink($temp_xml_file);
        rmdir($temp_zip_dir);
        exit;
        //otherwise following clicks may become buggy
    } else {
        $errorXmlExport = Display::return_message(get_lang('ErrorWritingXMLFile'), 'error');
    }
}
Exemplo n.º 25
0
<?php

/* $Id$ */
$xmlstring = '<?xml version="1.0" encoding="UTF-8"?>
<books></books>';
$reader = new XMLReader();
$reader->XML($xmlstring);
$a = $reader->setParserProperty(XMLReader::LOADDTD, false);
$b = $reader->getParserProperty(XMLReader::LOADDTD);
if (!$a && !$b) {
    echo "ok\n";
}
$a = $reader->setParserProperty(XMLReader::SUBST_ENTITIES, true);
$b = $reader->getParserProperty(XMLReader::SUBST_ENTITIES);
if ($a && $b) {
    echo "ok\n";
}
// Only go through
while ($reader->read()) {
}
$reader->close();
?>
===DONE===
Exemplo n.º 26
0
 /**
 * * @expectedException PHPUnit_Framework_Error
 * XMLReader::read(): I/O warning : failed to load
 external entity "file:///C:/Christopher_Spaeth/code/xml_files_windows/xxe.txt"
 */
 public function testXXE_setParserProperty_SUBST_ENTITIES_disable_entity_loader()
 {
     $xml = new XMLReader();
     // use setParserProperty
     $xml->open("../../xml_files_windows/xxe/xxe.xml");
     $xml->setParserProperty(XMLReader::SUBST_ENTITIES, true);
     libxml_disable_entity_loader(true);
     while ($xml->read()) {
         if ($xml->nodeType == XMLReader::ELEMENT && $xml->name == 'data') {
             $node = $xml->name;
             $content = $xml->readString();
         }
     }
     $content = preg_replace('/\\s+/', '', $content);
     $this->assertEquals("", $content);
 }
Exemplo n.º 27
0
 /**
  * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
  *
  * @param   string     $pFilename
  * @throws   PHPExcel_Reader_Exception
  */
 public function listWorksheetInfo($pFilename)
 {
     // Check if file exists
     if (!file_exists($pFilename)) {
         throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
     }
     $worksheetInfo = array();
     $zipClass = PHPExcel_Settings::getZipClass();
     $zip = new $zipClass();
     if (!$zip->open($pFilename)) {
         throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! Error opening file.");
     }
     $xml = new XMLReader();
     $res = $xml->open('zip://' . realpath($pFilename) . '#content.xml', null, PHPExcel_Settings::getLibXmlLoaderOptions());
     $xml->setParserProperty(2, true);
     //	Step into the first level of content of the XML
     $xml->read();
     while ($xml->read()) {
         //	Quickly jump through to the office:body node
         while ($xml->name !== 'office:body') {
             if ($xml->isEmptyElement) {
                 $xml->read();
             } else {
                 $xml->next();
             }
         }
         //	Now read each node until we find our first table:table node
         while ($xml->read()) {
             if ($xml->name == 'table:table' && $xml->nodeType == XMLReader::ELEMENT) {
                 $worksheetNames[] = $xml->getAttribute('table:name');
                 $tmpInfo = array('worksheetName' => $xml->getAttribute('table:name'), 'lastColumnLetter' => 'A', 'lastColumnIndex' => 0, 'totalRows' => 0, 'totalColumns' => 0);
                 //	Loop through each child node of the table:table element reading
                 $currCells = 0;
                 do {
                     $xml->read();
                     if ($xml->name == 'table:table-row' && $xml->nodeType == XMLReader::ELEMENT) {
                         $rowspan = $xml->getAttribute('table:number-rows-repeated');
                         $rowspan = empty($rowspan) ? 1 : $rowspan;
                         $tmpInfo['totalRows'] += $rowspan;
                         $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
                         $currCells = 0;
                         //	Step into the row
                         $xml->read();
                         do {
                             if ($xml->name == 'table:table-cell' && $xml->nodeType == XMLReader::ELEMENT) {
                                 if (!$xml->isEmptyElement) {
                                     $currCells++;
                                     $xml->next();
                                 } else {
                                     $xml->read();
                                 }
                             } elseif ($xml->name == 'table:covered-table-cell' && $xml->nodeType == XMLReader::ELEMENT) {
                                 $mergeSize = $xml->getAttribute('table:number-columns-repeated');
                                 $currCells += $mergeSize;
                                 $xml->read();
                             }
                         } while ($xml->name != 'table:table-row');
                     }
                 } while ($xml->name != 'table:table');
                 $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
                 $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1;
                 $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']);
                 $worksheetInfo[] = $tmpInfo;
             }
         }
         //				foreach($workbookData->table as $worksheetDataSet) {
         //					$worksheetData = $worksheetDataSet->children($namespacesContent['table']);
         //					$worksheetDataAttributes = $worksheetDataSet->attributes($namespacesContent['table']);
         //
         //					$rowIndex = 0;
         //					foreach ($worksheetData as $key => $rowData) {
         //						switch ($key) {
         //							case 'table-row' :
         //								$rowDataTableAttributes = $rowData->attributes($namespacesContent['table']);
         //								$rowRepeats = (isset($rowDataTableAttributes['number-rows-repeated'])) ?
         //										$rowDataTableAttributes['number-rows-repeated'] : 1;
         //								$columnIndex = 0;
         //
         //								foreach ($rowData as $key => $cellData) {
         //									$cellDataTableAttributes = $cellData->attributes($namespacesContent['table']);
         //									$colRepeats = (isset($cellDataTableAttributes['number-columns-repeated'])) ?
         //										$cellDataTableAttributes['number-columns-repeated'] : 1;
         //									$cellDataOfficeAttributes = $cellData->attributes($namespacesContent['office']);
         //									if (isset($cellDataOfficeAttributes['value-type'])) {
         //										$tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex + $colRepeats - 1);
         //										$tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex + $rowRepeats);
         //									}
         //									$columnIndex += $colRepeats;
         //								}
         //								$rowIndex += $rowRepeats;
         //								break;
         //						}
         //					}
         //
         //					$tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']);
         //					$tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1;
         //
         //				}
         //			}
     }
     return $worksheetInfo;
 }
Exemplo n.º 28
0
<?php

$reader = new XMLReader();
/* load xml document as a string */
$reader->XML(file_get_contents(dirname(__FILE__) . '/thedata.xml'));
/* tell the parser to perform syntax validation */
$reader->setParserProperty(XMLREADER_VALIDATE, TRUE);
// loop until end of document
// @ blocks the parser errors (missing DTD in this case)
while (@$reader->read()) {
}
echo "XML document is: " . ($reader->isValid() ? '' : ' not') . " valid.";
Exemplo n.º 29
0
 /**
  * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
  *
  * @param   string     $pFilename
  * @throws   PHPExcel_Reader_Exception
  */
 public function listWorksheetInfo($pFilename)
 {
     // Check if file exists
     if (!file_exists($pFilename)) {
         throw new PHPExcel_Reader_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
     }
     $worksheetInfo = array();
     $zip = new ZipArchive();
     $zip->open($pFilename);
     $rels = simplexml_load_string($this->_getFromZipArchive($zip, "_rels/.rels"));
     //~ http://schemas.openxmlformats.org/package/2006/relationships");
     foreach ($rels->Relationship as $rel) {
         if ($rel["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") {
             $dir = dirname($rel["Target"]);
             $relsWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "{$dir}/_rels/" . basename($rel["Target"]) . ".rels"));
             //~ http://schemas.openxmlformats.org/package/2006/relationships");
             $relsWorkbook->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
             $worksheets = array();
             foreach ($relsWorkbook->Relationship as $ele) {
                 if ($ele["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet") {
                     $worksheets[(string) $ele["Id"]] = $ele["Target"];
                 }
             }
             $xmlWorkbook = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}"));
             //~ http://schemas.openxmlformats.org/spreadsheetml/2006/main");
             if ($xmlWorkbook->sheets) {
                 $dir = dirname($rel["Target"]);
                 foreach ($xmlWorkbook->sheets->sheet as $eleSheet) {
                     $tmpInfo = array('worksheetName' => (string) $eleSheet["name"], 'lastColumnLetter' => 'A', 'lastColumnIndex' => 0, 'totalRows' => 0, 'totalColumns' => 0);
                     $fileWorksheet = $worksheets[(string) self::array_item($eleSheet->attributes("http://schemas.openxmlformats.org/officeDocument/2006/relationships"), "id")];
                     $xml = new XMLReader();
                     $res = $xml->open('zip://' . PHPExcel_Shared_File::realpath($pFilename) . '#' . "{$dir}/{$fileWorksheet}");
                     $xml->setParserProperty(2, true);
                     $currCells = 0;
                     while ($xml->read()) {
                         if ($xml->name == 'row' && $xml->nodeType == XMLReader::ELEMENT) {
                             $row = $xml->getAttribute('r');
                             $tmpInfo['totalRows'] = $row;
                             $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
                             $currCells = 0;
                         } elseif ($xml->name == 'c' && $xml->nodeType == XMLReader::ELEMENT) {
                             $currCells++;
                         }
                     }
                     $tmpInfo['totalColumns'] = max($tmpInfo['totalColumns'], $currCells);
                     $xml->close();
                     $tmpInfo['lastColumnIndex'] = $tmpInfo['totalColumns'] - 1;
                     $tmpInfo['lastColumnLetter'] = PHPExcel_Cell::stringFromColumnIndex($tmpInfo['lastColumnIndex']);
                     $worksheetInfo[] = $tmpInfo;
                 }
             }
         }
     }
     $zip->close();
     return $worksheetInfo;
 }
Exemplo n.º 30
0
 /**
  * __construct
  * 
  * Builds the Chunk object
  *
  * @param string $file The filename to work with
  * @param array $options The options with which to parse the file
  * @author Dom Hastings
  * @access public
  */
 public function __construct($file, $options = array())
 {
     // merge the options together
     $this->options = array_merge($this->options, is_array($options) ? $options : array());
     $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size');
     // set the filename
     $this->file = $file;
     if (empty($this->options['element'])) {
         $founded_tags = array();
         if (function_exists('stream_filter_register')) {
             stream_filter_register('preprocessxml', 'preprocessXml_filter');
             $path = 'php://filter/read=preprocessxml/resource=' . $this->file;
         } else {
             $path = $this->file;
         }
         $reader = new XMLReader();
         $reader->open($path);
         $reader->setParserProperty(XMLReader::VALIDATE, false);
         while (@$reader->read()) {
             switch ($reader->nodeType) {
                 case XMLREADER::ELEMENT:
                     array_push($founded_tags, str_replace(":", "_", $reader->localName));
                     if (count($founded_tags) > 100) {
                         break 2;
                     }
                     break;
                 default:
                     break;
             }
         }
         unset($reader);
         if (!empty($founded_tags)) {
             $element_counts = array_count_values($founded_tags);
             if (!empty($element_counts)) {
                 foreach ($element_counts as $tag => $count) {
                     if (strpos($tag, ":") === false) {
                         $this->cloud[$tag] = $count;
                     }
                 }
                 arsort($element_counts);
             }
         }
         if (!empty($this->cloud)) {
             $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post');
             foreach ($this->cloud as $element_name => $value) {
                 if (in_array(strtolower($element_name), $main_elements)) {
                     $this->options['element'] = $element_name;
                     break;
                 }
             }
             if (empty($this->options['element'])) {
                 foreach ($element_counts as $el => $count) {
                     $this->options['element'] = $el;
                     break;
                 }
             }
         }
     }
     if (function_exists('stream_filter_register')) {
         stream_filter_register('preprocessxml', 'preprocessXml_filter');
         $path = 'php://filter/read=preprocessxml/resource=' . $this->file;
     } else {
         $path = $this->file;
     }
     $this->reader = new XMLReader();
     $this->reader->open($path);
     $this->reader->setParserProperty(XMLReader::VALIDATE, false);
 }