protected function compare() { $bench = new Ubench(); $url = 'tests/templated-retrospect/index.html'; $file = 'test.html'; if (!file_exists($file)) { $htmlstr = file_get_contents($url); file_put_contents($file, $htmlstr); } $htmlstr = file_get_contents($file); $this->log('', true); $this->log('Measuring Simple HTML DOM Parser...'); $resultsSimpleHtmlDomParser = $bench->run(function ($htmlstr) { $results = []; $html = HtmlDomParser::str_get_html($htmlstr); $html->find('title', 0)->innertext('New Title'); $results[1] = $html->__toString(); $tpl = HtmlDomParser::str_get_html(file_get_contents('tests/templated-retrospect/index.html')); foreach ($tpl->find('link') as $elem) { $elem->href = '//localhost/xparser/tests/templated-retrospect/' . $elem->href; } foreach ($tpl->find('img, script') as $elem) { $elem->src = '//localhost/xparser/tests/templated-retrospect/' . $elem->src; } $results[2] = $tpl->__toString(); return $results; }, $htmlstr); //$this->log('distance: ' . similar_text($htmlstr, $result)); $this->logBench($bench); $this->log('', true); $this->log('Measuring XParser...'); $resultsXParser = $bench->run(function ($htmlstr) { $results = []; $html = new XNode($htmlstr); $html->find('title')->inner('New Title'); $results[1] = $html->__toString(); $tpl = new XNode(file_get_contents('tests/templated-retrospect/index.html')); foreach ($tpl('link') as $elem) { $elem->href = '//localhost/xparser/tests/templated-retrospect/' . $elem->href; } foreach ($tpl('img, script') as $elem) { $elem->src = '//localhost/xparser/tests/templated-retrospect/' . $elem->src; } $results[2] = $tpl->__toString(); return $results; }, $htmlstr); //$this->log('distance: ' . similar_text($htmlstr, $result)); $this->logBench($bench); $this->log('', true); $this->log('Measuring Ganon...'); $resultsGanon = $bench->run(function ($htmlstr) { $html = str_get_dom($htmlstr); foreach ($html('title') as $title) { $title->setInnerText('New Title'); } $results[1] = $html->__toString(); $tpl = new XNode(file_get_contents('tests/templated-retrospect/index.html')); foreach ($tpl('link') as $elem) { $elem->href = '//localhost/xparser/tests/templated-retrospect/' . $elem->href; } foreach ($tpl('img, script') as $elem) { $elem->src = '//localhost/xparser/tests/templated-retrospect/' . $elem->src; } $results[2] = $tpl->__toString(); return $results; }, $htmlstr); //$this->log('distance: ' . similar_text($htmlstr, $result)); $this->logBench($bench); $this->log('', true); $this->log('Symfony CSS Selector combined with DOMDocument and DOMXPath...'); $resultsXParser = $bench->run(function ($htmlstr) { $results = []; $html = new DOMDocument(); libxml_use_internal_errors(true); $html->loadHTML($htmlstr); $converter = new CssSelectorConverter(); $xpath = new DOMXPath($html); $elements = $xpath->query($converter->toXPath('title')); foreach ($elements as $element) { $element->innserHTML = 'New Title'; } $results[1] = $html->saveHTML(); $tpl = new DOMDocument(); $tpl->load('tests/templated-retrospect/index.html'); foreach ($xpath->query($converter->toXPath('link')) as $elem) { $elem->setAttribute('href', '//localhost/xparser/tests/templated-retrospect/' . $elem->getAttribute('href')); } foreach ($xpath->query($converter->toXPath('img, script')) as $elem) { $elem->setAttribute('src', '//localhost/xparser/tests/templated-retrospect/' . $elem->getAttribute('src')); } $results[2] = $tpl->saveHTML(); return $results; }, $htmlstr); //$this->log('distance: ' . similar_text($htmlstr, $result)); $this->logBench($bench); $this->log('', true); $this->log('Simple HTML DOM Parser vs Ganon distance: ' . similar_text($resultsSimpleHtmlDomParser[2], $resultsGanon[2])); $this->log('Simple HTML DOM Parser vs XParser distance: ' . similar_text($resultsSimpleHtmlDomParser[2], $resultsXParser[2])); $this->log('Ganon vs XParser distance: ' . similar_text($resultsGanon[2], $resultsXParser[2])); $this->log('', true); $this->log('', true); }
public function find($select, $index = null) { $ret = new XNodeList([], $this); if (!preg_match('/^[\\.\\#\\w\\s\\,]+$/is', $select)) { return $this->findViaSymfony($select, $index); } $selects = preg_split('/\\s*,\\s*/', $select); foreach ($selects as $select) { $words = preg_split('/\\s+/', trim($select)); $founds = []; foreach ($words as $wkey => $word) { self::parseSelectorWord($word, $tag, $id, $classes); if (!$id && !$classes) { $founds = $this->getElementsArray($tag); } else { if ($id && !$classes) { //foreach($ids as $id) { $founds = array_merge($founds, $this->getElementsArray($tag, 'id', $id)); //} } else { if (!$id && $classes) { foreach ($classes as $class) { $foundsByClass[$class] = $this->getElementsByTagAndClassArray($tag, $class); } if (count($foundsByClass) > 1) { $founds = array_merge($founds, call_user_func_array('array_intersect', $foundsByClass)); } else { $founds = array_merge($founds, $foundsByClass[$class]); } } else { if ($id && $classes) { $foundsById = []; //foreach($ids as $id) { $foundsById = array_merge($foundsById, $this->getElementsArray($tag, 'id', $id)); //} $foundsByClass = []; foreach ($classes as $class) { $foundsByClass = array_merge($foundsByClass, $this->getElementsByClassArray($class)); } $founds = array_intersect($foundsById, $foundsByClass); } else { // hmmm.. interesting. throw new XParserException('?'); } } } } if (!$founds) { break; } if (isset($words[$wkey + 1])) { $rest = implode(' ', array_slice($words, $wkey + 1)); foreach ($founds as $found) { $inner = self::getInner($found); $innerElement = new XNode($inner, $this); $restElements = $innerElement->find($rest); foreach ($restElements as $restElement) { $ret->addElement($restElement); } } return $ret; } } $ret->addElementsArray($founds, $this); } if (!is_null($index)) { return $ret->getElement($index); } return $ret; }
public function mainTest() { $tpl = new XNode('<html> <head> <title>Test page</title> </head> <body> <h1>Lorem ipsum</h1> <div /> <div>asd</div> <div id= "hello01" asdasdw /> <!-- <div id= "hello02" asdasdw class="message" asdasd /> --> <hr> <div id="hello1" class="message"> Hello World! </div> <hr> <div id="hello2" class="message selected"> before <span>Hello World!</span> after </div> <hr> <div id="hello3" class="message"> before <div>Hello <span>here</span> World!</div> after </div> <hr> <input type="text" id="myinput1" value="my value here.."> </body> </html>'); $before = $tpl->find('div#hello2.selected.message, div#hello1')->inner(); $tpl->find('div#hello2.selected.message, div#hello1')->inner('yupeeee!'); $after = $tpl->find('div#hello2.selected.message, div#hello1')->inner(); $this->equ($before, ' Hello World! '); $this->equ($after, 'yupeeee!'); $before = $tpl->find('html body input')->attr('value'); $tpl->find('input')->attr('value', 'elembe!'); $after = $tpl->find('html body input')->attr('value'); $this->equ($before, 'my value here..'); $this->equ($after, 'elembe!'); $before = $tpl->outer(); $this->equ(count($tpl('#hello02')->getElements()), 0); $after = $tpl->outer(); $this->equ($before, $after); $this->equ(count($tpl('#hello02')->getElements()), $tpl->getCount('#hello02')); }