find() публичный Метод

Searches for an node in the DOM tree for a given XPath expression or a CSS selector.
public find ( string $expression, string $type = Query::TYPE_CSS, boolean $wrapElement = true ) : Element[] | DOMElement[]
$expression string XPath expression or a CSS selector
$type string The type of the expression
$wrapElement boolean Returns array of \DiDom\Element if true, otherwise array of \DOMElement
Результат Element[] | DOMElement[]
Пример #1
0
 /**
  * @dataProvider findProvider
  */
 public function testFind($html, $selector, $type, $count)
 {
     $document = new Document($html, false);
     $elements = $document->find($selector, $type);
     $this->assertTrue(is_array($elements));
     $this->assertEquals($count, count($elements));
 }
Пример #2
0
 protected function getGamesFromWeb($teamTournament)
 {
     //getting array of team aliases and participant id's
     $aliases = ArrayHelper::map($teamTournament, 'alias', 'id');
     $count = $this->tournament->num_tours;
     //$html = new Document($this->tournament->autoProcessURL, true);
     $html = new Document('italy.htm', true);
     $table = $html->find('table.table.b-table-sortlist tbody')[0];
     $j = 0;
     $gamesFromWeb = [];
     foreach ($table->find('tr') as $row) {
         $time = $this->autoTimeToUnix($row->find('td.sport__calendar__table__date')[0]->text());
         if ($time > time() - 60 * 60 * 24 * 7 * 2) {
             $home = $row->find('td.sport__calendar__table__teams a.sport__calendar__table__team')[0]->text();
             $guest = $row->find('td.sport__calendar__table__teams a.sport__calendar__table__team')[1]->text();
             if (isset($aliases[$home]) && isset($aliases[$guest])) {
                 $gamesFromWeb[$j]['tour'] = $row->find('td.sport__calendar__table__tour')[0]->text();
                 $gamesFromWeb[$j]['date_time_game'] = $this->autoTimeToUnix($row->find('td.sport__calendar__table__date')[0]->text());
                 $gamesFromWeb[$j]['id_team_home'] = (int) $aliases[$home];
                 $gamesFromWeb[$j]['id_team_guest'] = (int) $aliases[$guest];
                 $homeScore = $row->find('td.sport__calendar__table__result span.sport__calendar__table__result__left')[0]->text();
                 $guestScore = $row->find('td.sport__calendar__table__result span.sport__calendar__table__result__right')[0]->text();
                 $gamesFromWeb[$j]['score_home'] = $this->calculateHomeScore($homeScore);
                 $gamesFromWeb[$j]['score_guest'] = $this->calculateHomeScore($guestScore);
                 $j++;
             } else {
                 throw new \Exception('Error during alias parsing ' . $home . ' or ' . $guest);
             }
         }
     }
     $this->gamesFromWeb = $gamesFromWeb;
 }
Пример #3
0
 /**
  * @param  string $url
  * @return array $this->data
  */
 function parse_approvedevent($url)
 {
     $doc = new Document($url, true);
     $title = $doc->find('.descr_approvedevent')[0]->text();
     $this->data['title'] = trim(preg_replace('/\\s{2,}/', '', $title));
     //remove whitespaces
     foreach ($doc->find('div.caracs_approvedevent div') as $element) {
         $div_id = $element->attr('id');
         switch ($div_id) {
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_OrganizingCompany_Panel':
                 $el = $element->find('a')[0]->text();
                 $this->data['orginizing_company'] = trim(preg_replace('/\\s{2,}/', '', $el));
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_OrganiserLocation_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['orginiser_location'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_BusinessSectors_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['business_sectors'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Frequency_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['frequency'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_EventOpenTo_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['event_open_to'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Email_Panel':
                 $el = $element->find('a')[0]->text();
                 $this->data['email'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Website_Panel':
                 $el = $element->find('a')[0]->attr('href');
                 $this->data['website'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Links_Panel':
                 foreach ($element->find('a') as $key => $href) {
                     $this->data['links'][$key] = $href->attr('href');
                 }
                 break;
         }
     }
     $this->data['session'] = $this->parse_session($doc);
     return $this->data;
 }
Пример #4
0
 /**
  * @dataProvider findProvider
  */
 public function testReturnDomElent($html, $selector, $type, $count)
 {
     $document = new Document($html, false);
     $elements = $document->find($selector, $type, false);
     $this->assertTrue(is_array($elements));
     $this->assertEquals($count, count($elements));
     if ($count > 0) {
         $this->assertInstanceOf('DOMElement', $elements[0]);
     }
 }
Пример #5
0
 /**
  * Extract comments from DOM
  *
  * @param  \DiDom\Document $dom
  * @return array
  */
 public function extractComments($dom)
 {
     $comments = [];
     $nodes = $dom->find('.single-comment');
     foreach ($nodes as $node) {
         $comment = [];
         $comment['text'] = $node->find('.text')[0]->text();
         $comments[] = $comment;
     }
     return $comments;
 }
Пример #6
0
 public function testClass()
 {
     $html = '
         <span class="odd first">Lorem ipsum dolor.</span>
         <span class="even second">Tenetur totam, nostrum.</span>
         <span class="odd third">Iste, doloremque, praesentium.</span>
     ';
     $document = new Document($html);
     $expected = ['Lorem ipsum dolor.', 'Iste, doloremque, praesentium.'];
     $result = [];
     foreach ($document->find('.odd') as $element) {
         $result[] = $element->text();
     }
     $this->assertEquals($expected, $result);
     $expected = ['Iste, doloremque, praesentium.'];
     $result = [];
     foreach ($document->find('.odd.third') as $element) {
         $result[] = $element->text();
     }
     $this->assertEquals($expected, $result);
 }
Пример #7
0
 protected function getGamesFromWeb($teamTournament)
 {
     //getting array of team aliases and participant id's
     $aliases = ArrayHelper::map($teamTournament, 'alias', 'id');
     $count = $this->tournament->num_tours;
     $j = 0;
     $html = new Document($this->tournament->autoProcessURL, true);
     //$html = new Document('pl.htm', true);
     $results = $html->find('div.mainPart')[0];
     $gamesFromWeb = [];
     for ($i = 0; $i < $count; $i++) {
         if (isset($results->find('div.stat.mB15 table.stat-table')[$i])) {
             $tour = $html->find('h3.titleH3.bordered.mB10')[$i]->text();
             $tour = $this->getTour($tour);
             $resultTable = $results->find('div.stat.mB15 table.stat-table')[$i];
             foreach ($resultTable->find('tbody tr') as $k => $one) {
                 if ($this->autoTimeToUnix($one->find('td.name-td')[0]->text()) > time() - 60 * 60 * 24 * 7 * 2 && $tour <= $count) {
                     if (isset($one->find('td.owner-td a.player')[0]) && isset($one->find('td.guests-td a.player')[0])) {
                         $owner = $one->find('td.owner-td a.player')[0]->text();
                         $guest = $one->find('td.guests-td a.player')[0]->text();
                         if (isset($aliases[$owner]) && isset($aliases[$guest])) {
                             $gamesFromWeb[$j]['id_team_home'] = (int) $aliases[$owner];
                             $gamesFromWeb[$j]['id_team_guest'] = (int) $aliases[$guest];
                             $gamesFromWeb[$j]['date_time_game'] = (int) $this->autoTimeToUnix($one->find('td.name-td')[0]->text());
                             $gamesFromWeb[$j]['tour'] = $tour;
                             $score = $one->find('td.score-td noindex')[0]->text();
                             $gamesFromWeb[$j]['score_home'] = $this->calculateHomeScore($score);
                             $gamesFromWeb[$j]['score_guest'] = $this->calculateGuestScore($score);
                             $j++;
                         } else {
                             throw new Exception('Error during alias parsing ' . $owner . ' or ' . $guest);
                         }
                     }
                 }
             }
         }
     }
     $this->gamesFromWeb = $gamesFromWeb;
 }
Пример #8
0
 /**
  * @param $html
  * @return array
  */
 public function load($html)
 {
     $metaTags = ['canonical' => '', 'robots' => ''];
     $this->crawler->loadHtml((string) $html);
     foreach ($this->crawler->find('meta') as $meta) {
         /** @var Element $meta */
         $name = strtolower($meta->attr('name'));
         $content = $meta->attr('content');
         $metaTags[$name] = $content;
     }
     $links = [];
     foreach ($this->crawler->find('a') as $link) {
         /** @var Element $link */
         $rel = $link->attr('rel');
         $href = $link->attr('href');
         if ('nofollow' === strtolower($rel)) {
             continue;
         }
         $links[] = $href;
     }
     $this->links = array_unique($links);
     $this->metaTags = $metaTags;
     return ['links' => $links, 'meta' => $metaTags];
 }
Пример #9
0
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $container = $this->getContainer();
     $em = $container->get('doctrine')->getManager();
     //$utils = $container->get('sbranch.common.utils');
     $items = $em->getRepository("AppBundle:Item")->findTimed();
     $output->writeln('Total count items ' . sizeof($items));
     $progress = new ProgressBar($output, sizeof($items));
     $progress->start();
     foreach ($items as $item) {
         $progress->advance();
         //            if($item->getHtmlDocument()) {
         //                $document = new Document();
         //                $document->loadHtml($item->getHtmlDocument());
         //            } else {
         //
         //            }
         $document = new Document($item->getUrl(), true);
         $title = $document->find('title');
         //TODO: this parsing is shitcode
         $piecesTitle = explode(":", $title[0]->text());
         $piecesTitle2 = explode('-', trim(end($piecesTitle)));
         $priceString = trim($piecesTitle2[0]);
         $pricePieces = explode(' ', $priceString);
         $currency = trim(end($pricePieces));
         $price = trim(str_replace($currency, '', $priceString));
         $price = trim(str_replace(' ', '', $price));
         $title = $piecesTitle[0];
         $item->setTitle($title);
         if ($price != $item->getPrice()) {
             $item->setPriceOld($item->getPrice());
         }
         $item->setPrice($price);
         $item->setCurrency($currency);
         $item->setLastCheck(new \DateTime());
         $item->setHtmlDocument($document->html());
         $em->persist($item);
     }
     $progress->finish();
     $em->flush();
     $output->writeln('Ok');
 }
Пример #10
0
 /**
  * Parses a kat list (a search, user uploads, etc..)
  * @param Document $html
  * @param int $maxAge timestamp
  * @return array
  */
 public static function parseList($html, $maxAge = null)
 {
     $uploads = [];
     foreach ($html->find("tr.odd, tr.even") as $i => $rawUpload) {
         //echo $rawUpload->html();exit;
         $elements = $rawUpload->find("td.center");
         $date = strtotime($rawUpload->find(".center[title]")[0]->attr("title"));
         if (!empty($maxAge) && $date < $maxAge) {
             continue;
         }
         $link = $rawUpload->find("a.cellMainLink")[0];
         // the uploader may be anonymous
         $creator = null;
         if ($rawUpload->has(".lightgrey.block a.plain")) {
             $rawUser = $rawUpload->find(".lightgrey.block a.plain")[0];
             $creator = new User(["nick" => $rawUser->text(), "link" => Kat::DOMAIN . $rawUser->attr("href")]);
         }
         $uploads[] = new BasicTorrent(["name" => $link->text(), "size" => $rawUpload->find(".nobr.center")[0]->text(), "created" => date("Y-m-d H:i:s", $date), "magnet" => $rawUpload->find("a[data-nop]")[0]->attr("href"), "torrent" => "https:" . $rawUpload->find("a[data-download]")[0]->attr("href"), "creator" => $creator, "files" => $elements[1]->text(), "seeders" => $elements[3]->text(), "leechers" => $elements[4]->text(), "link" => self::DOMAIN . $link->attr("href"), "comments" => (int) $rawUpload->find('div.iaconbox.center.floatright a')[0]->text()]);
     }
     return $uploads;
 }
Пример #11
0
 /**
  * Searches for the element in the DOM tree.
  * 
  * @param string $expression XPath expression or CSS selector
  * @param string $type       the type of the expression
  * @param string $wrapList
  *
  * @return NodeList|DiDom\Element[]
  */
 public function find($expression, $type = Query::TYPE_CSS, $wrapList = true)
 {
     $nodes = parent::find($expression, $type, false);
     return $wrapList ? new NodeList($nodes) : $nodes;
 }
Пример #12
0
function data($url)
{
    $document = new Document($url, true);
    $a = $document->find('.game-info p');
    return $a[1]->text();
}
Пример #13
0
 /**
  * Gets the full movie crew divided in departments
  * @return array
  */
 public function getCastCredits()
 {
     $content = new Document($this->getCredits());
     $titles = $content->find("#fullcredits_content h4");
     $persons = $content->find(".simpleCreditsTable tbody");
     $crew = [];
     // skip useless h4s, (DiDom doesnt have :not pseudo class, so we make a foreach)
     foreach ($titles as $pos => $h4) {
         if ($h4->hasAttribute("id") || $h4->hasAttribute("name")) {
             unset($titles[$pos]);
         }
     }
     $titles = array_values($titles);
     foreach ($titles as $pos => $h4) {
         $title = trim($h4->text(), " \t\n\r\v ");
         switch ($title) {
             case "Directed by":
                 $name = "director";
                 break;
             case "Music by":
                 $name = "music";
                 break;
             case "Cinematography by":
                 $name = "cinematography";
                 break;
             case "Film Editing by":
                 $name = "editing";
                 break;
             case "Casting By":
                 $name = "casting";
                 break;
             case "Production Design by":
                 $name = "production_design";
                 break;
             case "Art Direction by":
                 $name = "art_direction";
                 break;
             case "Set Decoration by":
                 $name = "set_decoration";
                 break;
             case "Costume Design by":
                 $name = "costume_design";
                 break;
             case "Makeup Department":
                 $name = "makeup_department";
                 break;
             case "Production Management":
                 $name = "production_management";
                 break;
             case "Art Department":
                 $name = "art_department";
                 break;
             case "Sound Department":
                 $name = "sound_department";
                 break;
             case "Special Effects by":
                 $name = "special_effects";
                 break;
             case "Visual Effects by":
                 $name = "visual_effects";
                 break;
             case "Stunts":
                 $name = "stunts";
                 break;
             case "Camera and Electrical Department":
                 $name = "camera_department";
                 break;
             case "Animation Department":
                 $name = "animation_department";
                 break;
             case "Casting Department":
                 $name = "casting_department";
                 break;
             case "Costume and Wardrobe Department":
                 $name = "wardrobe_department";
                 break;
             case "Editorial Department":
                 $name = "editorial_department";
                 break;
             case "Location Management":
                 $name = "location_management";
                 break;
             case "Music Department":
                 $name = "music_department";
                 break;
             case "Transportation Department":
                 $name = "transportation_department";
                 break;
             case "Storyline":
                 $name = "storyline";
                 break;
             case "Photo & Video":
                 $name = "photo";
                 break;
             default:
                 continue 2;
                 break;
         }
         if (!isset($crew[$name])) {
             $crew[$name] = [];
         }
         $regex = "/name\\/nm(\\d+)\\/(?:.*)/";
         if (!isset($persons[$pos])) {
             continue;
         }
         foreach ($persons[$pos]->find("a") as $person) {
             preg_match($regex, $person->attr("href"), $matches);
             if (!isset($matches[1]) || empty($matches[1])) {
                 continue;
             }
             $crew[$name][] = ["id" => $matches[1], "name" => trim($person->text())];
         }
     }
     return $crew;
 }
Пример #14
0
 public function testFirst()
 {
     $html = '<ul><li>One</li><li>Two</li><li>Three</li></ul>';
     $document = new Document($html, false);
     $items = $document->find('ul > li');
     $this->assertEquals($items[0]->getNode(), $document->first('ul > li')->getNode());
     $this->assertEquals('One', $document->first('ul > li::text'));
     $document = new Document();
     $this->assertNull($document->first('ul > li'));
 }
Пример #15
0
 public function testReplace()
 {
     $html = '<ul><li>One</li><li>Two</li><li>Three</li></ul>';
     $document = new Document($html, false);
     $first = $document->find('li')[0];
     $third = $document->find('li')[2];
     $this->assertEquals($first->getNode(), $first->replace($third)->getNode());
     $this->assertEquals($third->getNode(), $document->find('li')[0]->getNode());
     $this->assertCount(3, $document->find('li'));
     $document = new Document($html, false);
     $first = $document->find('li')[0];
     $third = $document->find('li')[2];
     $this->assertEquals($first->getNode(), $first->replace($third, false)->getNode());
     $this->assertEquals($third->getNode(), $document->find('li')[0]->getNode());
     $this->assertCount(2, $document->find('li'));
 }
Пример #16
0
 public function testGetLineNo()
 {
     $element = new Element('div');
     $this->assertEquals(0, $element->getLineNo());
     $html = '<ul>
         <li>One</li>
         <li>Two</li>
         <li>Three</li>
     </ul>';
     $document = new Document($html, false);
     $this->assertEquals(4, $document->find('li')[2]->getLineNo());
 }
Пример #17
0
<?php

require_once 'vendor/autoload.php';
use DiDom\Document;
$document = new Document('http://www.opencart.com/index.php?route=extension/extension&filter_license=0', true);
$extensions = $document->find('#content > div.extension-grid > div > div.name > a');
foreach ($extensions as $extension) {
    echo $extension->text(), "<br>";
}