public function test_getNode()
 {
     $node = "<node><a>lorem</a><b>ipsum</b></node>";
     $parser = Mockery::mock("\\Prewk\\XmlStringStreamer\\ParserInterface");
     $parser->shouldReceive("getNodeFrom")->with(Mockery::type("\\Prewk\\XmlStringStreamer\\StreamInterface"))->once()->andReturn($node);
     $stream = Mockery::mock("\\Prewk\\XmlStringStreamer\\StreamInterface");
     $streamer = new XmlStringStreamer($parser, $stream);
     $this->assertEquals($node, $streamer->getNode(), "Node received from the parser should be what was expected");
 }
예제 #2
0
 /**
  * @param string $url
  * @param XMLHandler $handler
  * @return int
  */
 public function parse($url, XMLHandler $handler)
 {
     $stream = new Stream\Guzzle($url, self::CHUNK_SIZE);
     $parser = new Parser\StringWalker();
     $streamer = new XmlStringStreamer($parser, $stream);
     $countOfProducts = 0;
     while ($node = $streamer->getNode()) {
         $simpleXmlNode = simplexml_load_string($node);
         $handler->perform($simpleXmlNode);
         $countOfProducts++;
     }
     return $countOfProducts;
 }
예제 #3
0
 public function run()
 {
     Countrycode::truncate();
     $CHUNK_SIZE = 1024;
     $streamProvider = new Stream\File(dirname(__FILE__) . "/countrycodes.xml", $CHUNK_SIZE);
     $config = array("uniqueNode" => "row");
     $parser = new Parser\UniqueNode($config);
     $streamer = new XmlStringStreamer($parser, $streamProvider);
     while ($node = $streamer->getNode()) {
         $simpleXmlNode = simplexml_load_string($node);
         Countrycode::create(['countrycode' => $simpleXmlNode->field[0], 'country' => $simpleXmlNode->field[1]]);
     }
     $this->command->info('Countrycode table seeded!');
 }
 /**
  * {@inheritdoc}
  */
 public function isSpamReferrer(Url $url)
 {
     $url = $url->toArray();
     if (!isset($url['registerableDomain'], $url['host'], $url['publicSuffix'])) {
         return false;
     }
     $provider = new File($this->file, 1024);
     $parser = new XmlStringStreamer\Parser\StringWalker();
     $streamer = new XmlStringStreamer($parser, $provider);
     while ($node = $streamer->getNode()) {
         $domain = (string) simplexml_load_string($node);
         if (in_array($domain, [$url['registerableDomain'], $url['host'], $url['publicSuffix']])) {
             return true;
         }
     }
     return false;
 }
예제 #5
0
 /**
  * @param string|XmlStringStreamer $xml
  * @return object[]
  */
 public function unserialize($xml)
 {
     $hydrator = $this->buildHydrator('xml', 'hydrate');
     $class = $this->getOptions()->getClass();
     $classes = array();
     if ($xml instanceof XmlStringStreamer) {
         while ($node = $xml->getNode()) {
             $node = simplexml_load_string($node);
             $classes[] = $hydrator->hydrate((array) $node, new $class());
         }
     } else {
         $docElement = simplexml_load_string($xml)->children();
         $name = $docElement->getName();
         foreach ($docElement->{$name} as $node) {
             $classes[] = $hydrator->hydrate((array) $node, new $class());
         }
     }
     return $classes;
 }
예제 #6
0
 /**
  * TEST TEST TEST
  * This method will try to return entities instead of a response
  * @TODO Use XML instead
  * @TODO Maybe use this https://github.com/prewk/xml-string-streamer-guzzle
  * @TODO Or this http://dk2.php.net/manual/en/function.xml-parse.php
  * @TODO Maybe create my own parser: http://php.net/manual/en/example.xml-structure.php
  *
  * @param int $page
  * @param int $pageSize
  * @return Response
  */
 public function getProductPageAsEntities($page, $pageSize)
 {
     $response = $this->getProductPage($page, $pageSize);
     $stream = new Stream\Guzzle('');
     $stream->setGuzzleStream($response->getBody());
     $parser = new Parser\StringWalker();
     $streamer = new XmlStringStreamer($parser, $stream);
     while ($node = $streamer->getNode()) {
         $xml = new \SimpleXMLElement($node, LIBXML_NOERROR);
         //$entity = new Entity\ProductData();
         (yield $xml);
     }
 }
예제 #7
0
 /**
  *
  */
 public static function convertPendingXMLtoJSON($filename, $logger)
 {
     //Number of processed nodes (books)
     $count = 0;
     //Nombre del archivo local
     $local_file = DATA_PENDING_DIR . $filename;
     //Nombre del archivo final
     $json_file = DATA_OUTPUT_DIR . "{$filename}.json";
     //Metodo para la obtención del tamaño del archivo XML local
     $totalSize = filesize($local_file);
     $start_timestamp = date('Y-m-d H:i:s');
     // Se prepara el streaming y monitoreo con 16kb de buffer
     $progress = 0;
     $last_progress = 0;
     $stream = new File($local_file, 16384, function ($chunk, $readBytes) use($progress, &$last_progress, $totalSize, $logger) {
         $progress = $readBytes / $totalSize;
         //report every 10%
         if ($progress >= $last_progress + 0.1) {
             $logger->log("Progress: {$progress}");
             $last_progress = $last_progress + 0.1;
         }
     });
     $start_timestamp = date('Y-m-d H:i:s');
     //Configura el parser
     $parser = new StringWalker();
     //Configura el streamer
     $streamer = new XmlStringStreamer($parser, $stream);
     //Creación del archivo final
     $file = fopen($json_file, "w") or die(json_encode("Could not open {$json_file} for writing"));
     $logger->log("Convirtiendo {$local_file} a {$json_file}...");
     //Procesamiento de nodos
     while ($node = $streamer->getNode()) {
         //Set json string ready for mongo insertion
         $json_string = Utils::getBookJSONFromXMLNode($node);
         //Inserta la cadena en el archivo final
         fputs($file, $json_string . PHP_EOL);
         $count++;
     }
     if ($count == 0) {
         $logger->error("0 Records converted");
     } else {
         $logger->log("{$count} Records converted");
     }
     //Cierra la edición del archivo final
     fclose($file);
     //Elimina la cache del proceso
     clearstatcache();
     return $count;
 }
 public function test_StringWalker_parser_with_file_shorter_than_buffer()
 {
     $file = __DIR__ . "/../../xml/short.xml";
     $stream = new XmlStringStreamer\Stream\File($file, 1024);
     $parser = new XmlStringStreamer\Parser\StringWalker();
     $streamer = new XmlStringStreamer($parser, $stream);
     $expectedNodes = array("foo", "bar");
     $foundNodes = array();
     while ($node = $streamer->getNode()) {
         $xmlNode = simplexml_load_string($node);
         $foundNodes[] = (string) $xmlNode->node;
     }
     $this->assertEquals($expectedNodes, $foundNodes, "The found nodes should equal the expected nodes");
 }
 public function test_UniqueNode_parser_with_file_with_data_in_last_chunk()
 {
     $file = __DIR__ . "/../../xml/short_last_chunk.xml";
     $stream = new XmlStringStreamer\Stream\File($file, 200);
     $parser = $parser = new UniqueNode(array("uniqueNode" => 'capture'));
     $streamer = new XmlStringStreamer($parser, $stream);
     $foundNodes = 0;
     while ($node = $streamer->getNode()) {
         $foundNodes++;
     }
     $this->assertEquals(2, $foundNodes, "The found nodes should equal the expected nodes number.");
 }
예제 #10
0
     //report every 10%
     if ($progress >= $last_progress + 0.1) {
         $logger->log("Progress: {$progress}");
         $last_progress = $last_progress + 0.1;
     }
 });
 $start_timestamp = date('Y-m-d H:i:s');
 //Configura el parser
 $parser = new StringWalker();
 //Configura el streamer
 $streamer = new XmlStringStreamer($parser, $stream);
 //Creación del archivo final
 $file = fopen($json_file, "w") or die(json_encode("Could not open {$json_file} for writing"));
 $logger->log("Convirtiendo {$local_file} a {$json_file}...");
 //Procesamiento de nodos
 while ($node = $streamer->getNode()) {
     //Set json string ready for mongo insertion
     $json_string = Utils::getBookJSONFromXMLNode2($node);
     //Inserta la cadena en el archivo final
     fputs($file, $json_string . PHP_EOL);
     $count++;
 }
 //Cierra la edición del archivo final
 fclose($file);
 //Elimina la cache del proceso
 clearstatcache();
 $logger->log("{$json_file} creado.");
 //Termina el proceso del archivo y lo reporta en el log
 $logger->log("Resultado almacenado en {$json_file}");
 $end_timestap = date('Y-m-d H:i:s');
 //Send response*/
예제 #11
0
 /**
  * __construct
  * 
  * Builds the Chunk object
  *
  * @param string $file The filename to work with
  * @param array $options The options with which to parse the file
  * @author Dom Hastings
  * @access public
  */
 public function __construct($file, $options = array(), $parser_type = false)
 {
     // merge the options together
     $this->options = array_merge($this->options, is_array($options) ? $options : array());
     $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size');
     // set the filename
     $this->file = $file;
     $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type;
     $is_html = false;
     $f = @fopen($file, "rb");
     while (!@feof($f)) {
         $chunk = @fread($f, 1024);
         if (strpos($chunk, "<!DOCTYPE") === 0) {
             $is_html = true;
         }
         break;
     }
     @fclose($f);
     if ($is_html) {
         $path = $this->get_file_path();
         $this->is_404 = true;
         $this->reader = new XMLReader();
         @$this->reader->open($path);
         @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
         return;
     }
     if (PMXI_Plugin::getInstance()->getOption('force_stream_reader')) {
         $this->parser_type = 'xmlstreamer';
     } else {
         $input = new PMXI_Input();
         $import_id = $input->get('id', 0);
         if (empty($import_id)) {
             $import_id = $input->get('import_id', 0);
         }
         if (!empty($import_id)) {
             $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type;
             $import = new PMXI_Import_Record();
             $import->getById($import_id);
             if (!$import->isEmpty()) {
                 $this->parser_type = empty($import->options['xml_reader_engine']) ? 'xmlreader' : 'xmlstreamer';
             }
         } else {
             $this->parser_type = empty($parser_type) ? get_option('wpai_parser_type', 'xmlreader') : $parser_type;
         }
     }
     if (empty($this->options['element']) or $this->options['get_cloud']) {
         $path = $this->get_file_path();
         if ($this->parser_type == 'xmlreader') {
             $reader = new XMLReader();
             $reader->open($path);
             $reader->setParserProperty(XMLReader::VALIDATE, false);
             while (@$reader->read()) {
                 switch ($reader->nodeType) {
                     case XMLREADER::ELEMENT:
                         $localName = str_replace("_colon_", ":", $reader->localName);
                         if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) {
                             $this->cloud[str_replace(":", "_", $localName)]++;
                         } else {
                             $this->cloud[str_replace(":", "_", $localName)] = 1;
                         }
                         break;
                     default:
                         break;
                 }
             }
             unset($reader);
         } else {
             $CHUNK_SIZE = 1024;
             $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE);
             $parseroptions = array("extractContainer" => false);
             // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting.
             $parser = new Parser\StringWalker($parseroptions);
             // Create the streamer
             $streamer = new XmlStringStreamer($parser, $streamProvider);
             while ($node = $streamer->getNode()) {
                 // $simpleXmlNode = simplexml_load_string($node);
                 // echo (string)$simpleXmlNode->firstName;
             }
             $this->cloud = $parser->cloud;
         }
         if (!empty($this->cloud) and empty($this->options['element'])) {
             arsort($this->cloud);
             $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0');
             foreach ($this->cloud as $element_name => $value) {
                 if (in_array(strtolower($element_name), $main_elements)) {
                     $this->options['element'] = $element_name;
                     break;
                 }
             }
             if (empty($this->options['element'])) {
                 foreach ($this->cloud as $el => $count) {
                     $this->options['element'] = $el;
                     break;
                 }
             }
         }
     }
     $path = $this->get_file_path();
     if ($this->parser_type == 'xmlreader') {
         $this->reader = new XMLReader();
         @$this->reader->open($path);
         @$this->reader->setParserProperty(XMLReader::VALIDATE, false);
     } else {
         $parseroptions = array("uniqueNode" => $this->options['element']);
         $CHUNK_SIZE = 1024;
         $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE);
         $parser = new Parser\UniqueNode($parseroptions);
         $this->reader = new XmlStringStreamer($parser, $streamProvider);
     }
 }