public function test_getNode() { $node = "<node><a>lorem</a><b>ipsum</b></node>"; $parser = Mockery::mock("\\Prewk\\XmlStringStreamer\\ParserInterface"); $parser->shouldReceive("getNodeFrom")->with(Mockery::type("\\Prewk\\XmlStringStreamer\\StreamInterface"))->once()->andReturn($node); $stream = Mockery::mock("\\Prewk\\XmlStringStreamer\\StreamInterface"); $streamer = new XmlStringStreamer($parser, $stream); $this->assertEquals($node, $streamer->getNode(), "Node received from the parser should be what was expected"); }
/** * @param string $url * @param XMLHandler $handler * @return int */ public function parse($url, XMLHandler $handler) { $stream = new Stream\Guzzle($url, self::CHUNK_SIZE); $parser = new Parser\StringWalker(); $streamer = new XmlStringStreamer($parser, $stream); $countOfProducts = 0; while ($node = $streamer->getNode()) { $simpleXmlNode = simplexml_load_string($node); $handler->perform($simpleXmlNode); $countOfProducts++; } return $countOfProducts; }
public function run() { Countrycode::truncate(); $CHUNK_SIZE = 1024; $streamProvider = new Stream\File(dirname(__FILE__) . "/countrycodes.xml", $CHUNK_SIZE); $config = array("uniqueNode" => "row"); $parser = new Parser\UniqueNode($config); $streamer = new XmlStringStreamer($parser, $streamProvider); while ($node = $streamer->getNode()) { $simpleXmlNode = simplexml_load_string($node); Countrycode::create(['countrycode' => $simpleXmlNode->field[0], 'country' => $simpleXmlNode->field[1]]); } $this->command->info('Countrycode table seeded!'); }
/** * {@inheritdoc} */ public function isSpamReferrer(Url $url) { $url = $url->toArray(); if (!isset($url['registerableDomain'], $url['host'], $url['publicSuffix'])) { return false; } $provider = new File($this->file, 1024); $parser = new XmlStringStreamer\Parser\StringWalker(); $streamer = new XmlStringStreamer($parser, $provider); while ($node = $streamer->getNode()) { $domain = (string) simplexml_load_string($node); if (in_array($domain, [$url['registerableDomain'], $url['host'], $url['publicSuffix']])) { return true; } } return false; }
/** * @param string|XmlStringStreamer $xml * @return object[] */ public function unserialize($xml) { $hydrator = $this->buildHydrator('xml', 'hydrate'); $class = $this->getOptions()->getClass(); $classes = array(); if ($xml instanceof XmlStringStreamer) { while ($node = $xml->getNode()) { $node = simplexml_load_string($node); $classes[] = $hydrator->hydrate((array) $node, new $class()); } } else { $docElement = simplexml_load_string($xml)->children(); $name = $docElement->getName(); foreach ($docElement->{$name} as $node) { $classes[] = $hydrator->hydrate((array) $node, new $class()); } } return $classes; }
/** * TEST TEST TEST * This method will try to return entities instead of a response * @TODO Use XML instead * @TODO Maybe use this https://github.com/prewk/xml-string-streamer-guzzle * @TODO Or this http://dk2.php.net/manual/en/function.xml-parse.php * @TODO Maybe create my own parser: http://php.net/manual/en/example.xml-structure.php * * @param int $page * @param int $pageSize * @return Response */ public function getProductPageAsEntities($page, $pageSize) { $response = $this->getProductPage($page, $pageSize); $stream = new Stream\Guzzle(''); $stream->setGuzzleStream($response->getBody()); $parser = new Parser\StringWalker(); $streamer = new XmlStringStreamer($parser, $stream); while ($node = $streamer->getNode()) { $xml = new \SimpleXMLElement($node, LIBXML_NOERROR); //$entity = new Entity\ProductData(); (yield $xml); } }
/** * */ public static function convertPendingXMLtoJSON($filename, $logger) { //Number of processed nodes (books) $count = 0; //Nombre del archivo local $local_file = DATA_PENDING_DIR . $filename; //Nombre del archivo final $json_file = DATA_OUTPUT_DIR . "{$filename}.json"; //Metodo para la obtención del tamaño del archivo XML local $totalSize = filesize($local_file); $start_timestamp = date('Y-m-d H:i:s'); // Se prepara el streaming y monitoreo con 16kb de buffer $progress = 0; $last_progress = 0; $stream = new File($local_file, 16384, function ($chunk, $readBytes) use($progress, &$last_progress, $totalSize, $logger) { $progress = $readBytes / $totalSize; //report every 10% if ($progress >= $last_progress + 0.1) { $logger->log("Progress: {$progress}"); $last_progress = $last_progress + 0.1; } }); $start_timestamp = date('Y-m-d H:i:s'); //Configura el parser $parser = new StringWalker(); //Configura el streamer $streamer = new XmlStringStreamer($parser, $stream); //Creación del archivo final $file = fopen($json_file, "w") or die(json_encode("Could not open {$json_file} for writing")); $logger->log("Convirtiendo {$local_file} a {$json_file}..."); //Procesamiento de nodos while ($node = $streamer->getNode()) { //Set json string ready for mongo insertion $json_string = Utils::getBookJSONFromXMLNode($node); //Inserta la cadena en el archivo final fputs($file, $json_string . PHP_EOL); $count++; } if ($count == 0) { $logger->error("0 Records converted"); } else { $logger->log("{$count} Records converted"); } //Cierra la edición del archivo final fclose($file); //Elimina la cache del proceso clearstatcache(); return $count; }
public function test_StringWalker_parser_with_file_shorter_than_buffer() { $file = __DIR__ . "/../../xml/short.xml"; $stream = new XmlStringStreamer\Stream\File($file, 1024); $parser = new XmlStringStreamer\Parser\StringWalker(); $streamer = new XmlStringStreamer($parser, $stream); $expectedNodes = array("foo", "bar"); $foundNodes = array(); while ($node = $streamer->getNode()) { $xmlNode = simplexml_load_string($node); $foundNodes[] = (string) $xmlNode->node; } $this->assertEquals($expectedNodes, $foundNodes, "The found nodes should equal the expected nodes"); }
public function test_UniqueNode_parser_with_file_with_data_in_last_chunk() { $file = __DIR__ . "/../../xml/short_last_chunk.xml"; $stream = new XmlStringStreamer\Stream\File($file, 200); $parser = $parser = new UniqueNode(array("uniqueNode" => 'capture')); $streamer = new XmlStringStreamer($parser, $stream); $foundNodes = 0; while ($node = $streamer->getNode()) { $foundNodes++; } $this->assertEquals(2, $foundNodes, "The found nodes should equal the expected nodes number."); }
//report every 10% if ($progress >= $last_progress + 0.1) { $logger->log("Progress: {$progress}"); $last_progress = $last_progress + 0.1; } }); $start_timestamp = date('Y-m-d H:i:s'); //Configura el parser $parser = new StringWalker(); //Configura el streamer $streamer = new XmlStringStreamer($parser, $stream); //Creación del archivo final $file = fopen($json_file, "w") or die(json_encode("Could not open {$json_file} for writing")); $logger->log("Convirtiendo {$local_file} a {$json_file}..."); //Procesamiento de nodos while ($node = $streamer->getNode()) { //Set json string ready for mongo insertion $json_string = Utils::getBookJSONFromXMLNode2($node); //Inserta la cadena en el archivo final fputs($file, $json_string . PHP_EOL); $count++; } //Cierra la edición del archivo final fclose($file); //Elimina la cache del proceso clearstatcache(); $logger->log("{$json_file} creado."); //Termina el proceso del archivo y lo reporta en el log $logger->log("Resultado almacenado en {$json_file}"); $end_timestap = date('Y-m-d H:i:s'); //Send response*/
/** * __construct * * Builds the Chunk object * * @param string $file The filename to work with * @param array $options The options with which to parse the file * @author Dom Hastings * @access public */ public function __construct($file, $options = array(), $parser_type = false) { // merge the options together $this->options = array_merge($this->options, is_array($options) ? $options : array()); $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption('chunk_size'); // set the filename $this->file = $file; $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type; $is_html = false; $f = @fopen($file, "rb"); while (!@feof($f)) { $chunk = @fread($f, 1024); if (strpos($chunk, "<!DOCTYPE") === 0) { $is_html = true; } break; } @fclose($f); if ($is_html) { $path = $this->get_file_path(); $this->is_404 = true; $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); return; } if (PMXI_Plugin::getInstance()->getOption('force_stream_reader')) { $this->parser_type = 'xmlstreamer'; } else { $input = new PMXI_Input(); $import_id = $input->get('id', 0); if (empty($import_id)) { $import_id = $input->get('import_id', 0); } if (!empty($import_id)) { $this->parser_type = empty($parser_type) ? 'xmlreader' : $parser_type; $import = new PMXI_Import_Record(); $import->getById($import_id); if (!$import->isEmpty()) { $this->parser_type = empty($import->options['xml_reader_engine']) ? 'xmlreader' : 'xmlstreamer'; } } else { $this->parser_type = empty($parser_type) ? get_option('wpai_parser_type', 'xmlreader') : $parser_type; } } if (empty($this->options['element']) or $this->options['get_cloud']) { $path = $this->get_file_path(); if ($this->parser_type == 'xmlreader') { $reader = new XMLReader(); $reader->open($path); $reader->setParserProperty(XMLReader::VALIDATE, false); while (@$reader->read()) { switch ($reader->nodeType) { case XMLREADER::ELEMENT: $localName = str_replace("_colon_", ":", $reader->localName); if (array_key_exists(str_replace(":", "_", $localName), $this->cloud)) { $this->cloud[str_replace(":", "_", $localName)]++; } else { $this->cloud[str_replace(":", "_", $localName)] = 1; } break; default: break; } } unset($reader); } else { $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE); $parseroptions = array("extractContainer" => false); // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting. $parser = new Parser\StringWalker($parseroptions); // Create the streamer $streamer = new XmlStringStreamer($parser, $streamProvider); while ($node = $streamer->getNode()) { // $simpleXmlNode = simplexml_load_string($node); // echo (string)$simpleXmlNode->firstName; } $this->cloud = $parser->cloud; } if (!empty($this->cloud) and empty($this->options['element'])) { arsort($this->cloud); $main_elements = array('node', 'product', 'job', 'deal', 'entry', 'item', 'property', 'listing', 'hotel', 'record', 'article', 'post', 'book', 'item_0'); foreach ($this->cloud as $element_name => $value) { if (in_array(strtolower($element_name), $main_elements)) { $this->options['element'] = $element_name; break; } } if (empty($this->options['element'])) { foreach ($this->cloud as $el => $count) { $this->options['element'] = $el; break; } } } } $path = $this->get_file_path(); if ($this->parser_type == 'xmlreader') { $this->reader = new XMLReader(); @$this->reader->open($path); @$this->reader->setParserProperty(XMLReader::VALIDATE, false); } else { $parseroptions = array("uniqueNode" => $this->options['element']); $CHUNK_SIZE = 1024; $streamProvider = new Prewk\XmlStringStreamer\Stream\File($path, $CHUNK_SIZE); $parser = new Parser\UniqueNode($parseroptions); $this->reader = new XmlStringStreamer($parser, $streamProvider); } }