コード例 #1
0
 protected function getGamesFromWeb($teamTournament)
 {
     //getting array of team aliases and participant id's
     $aliases = ArrayHelper::map($teamTournament, 'alias', 'id');
     $count = $this->tournament->num_tours;
     //$html = new Document($this->tournament->autoProcessURL, true);
     $html = new Document('italy.htm', true);
     $table = $html->find('table.table.b-table-sortlist tbody')[0];
     $j = 0;
     $gamesFromWeb = [];
     foreach ($table->find('tr') as $row) {
         $time = $this->autoTimeToUnix($row->find('td.sport__calendar__table__date')[0]->text());
         if ($time > time() - 60 * 60 * 24 * 7 * 2) {
             $home = $row->find('td.sport__calendar__table__teams a.sport__calendar__table__team')[0]->text();
             $guest = $row->find('td.sport__calendar__table__teams a.sport__calendar__table__team')[1]->text();
             if (isset($aliases[$home]) && isset($aliases[$guest])) {
                 $gamesFromWeb[$j]['tour'] = $row->find('td.sport__calendar__table__tour')[0]->text();
                 $gamesFromWeb[$j]['date_time_game'] = $this->autoTimeToUnix($row->find('td.sport__calendar__table__date')[0]->text());
                 $gamesFromWeb[$j]['id_team_home'] = (int) $aliases[$home];
                 $gamesFromWeb[$j]['id_team_guest'] = (int) $aliases[$guest];
                 $homeScore = $row->find('td.sport__calendar__table__result span.sport__calendar__table__result__left')[0]->text();
                 $guestScore = $row->find('td.sport__calendar__table__result span.sport__calendar__table__result__right')[0]->text();
                 $gamesFromWeb[$j]['score_home'] = $this->calculateHomeScore($homeScore);
                 $gamesFromWeb[$j]['score_guest'] = $this->calculateHomeScore($guestScore);
                 $j++;
             } else {
                 throw new \Exception('Error during alias parsing ' . $home . ' or ' . $guest);
             }
         }
     }
     $this->gamesFromWeb = $gamesFromWeb;
 }
コード例 #2
0
ファイル: Dummy.php プロジェクト: kminek/comment-scraper
 /**
  * Extract comments from DOM
  *
  * @param  \DiDom\Document $dom
  * @return array
  */
 public function extractComments($dom)
 {
     $comments = [];
     $nodes = $dom->find('.single-comment');
     foreach ($nodes as $node) {
         $comment = [];
         $comment['text'] = $node->find('.text')[0]->text();
         $comments[] = $comment;
     }
     return $comments;
 }
コード例 #3
0
ファイル: ufi.php プロジェクト: eok8177/parser
 /**
  * @param  string $url
  * @return array $this->data
  */
 function parse_approvedevent($url)
 {
     $doc = new Document($url, true);
     $title = $doc->find('.descr_approvedevent')[0]->text();
     $this->data['title'] = trim(preg_replace('/\\s{2,}/', '', $title));
     //remove whitespaces
     foreach ($doc->find('div.caracs_approvedevent div') as $element) {
         $div_id = $element->attr('id');
         switch ($div_id) {
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_OrganizingCompany_Panel':
                 $el = $element->find('a')[0]->text();
                 $this->data['orginizing_company'] = trim(preg_replace('/\\s{2,}/', '', $el));
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_OrganiserLocation_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['orginiser_location'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_BusinessSectors_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['business_sectors'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Frequency_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['frequency'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_EventOpenTo_Panel':
                 $el = $element->find('span')[0]->text();
                 $this->data['event_open_to'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Email_Panel':
                 $el = $element->find('a')[0]->text();
                 $this->data['email'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Website_Panel':
                 $el = $element->find('a')[0]->attr('href');
                 $this->data['website'] = $el;
                 break;
             case 'ctl00_ContentPlaceHolder1_UFIApprovedEventsDetails1_Links_Panel':
                 foreach ($element->find('a') as $key => $href) {
                     $this->data['links'][$key] = $href->attr('href');
                 }
                 break;
         }
     }
     $this->data['session'] = $this->parse_session($doc);
     return $this->data;
 }
コード例 #4
0
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $container = $this->getContainer();
     $em = $container->get('doctrine')->getManager();
     //$utils = $container->get('sbranch.common.utils');
     $items = $em->getRepository("AppBundle:Item")->findTimed();
     $output->writeln('Total count items ' . sizeof($items));
     $progress = new ProgressBar($output, sizeof($items));
     $progress->start();
     foreach ($items as $item) {
         $progress->advance();
         //            if($item->getHtmlDocument()) {
         //                $document = new Document();
         //                $document->loadHtml($item->getHtmlDocument());
         //            } else {
         //
         //            }
         $document = new Document($item->getUrl(), true);
         $title = $document->find('title');
         //TODO: this parsing is shitcode
         $piecesTitle = explode(":", $title[0]->text());
         $piecesTitle2 = explode('-', trim(end($piecesTitle)));
         $priceString = trim($piecesTitle2[0]);
         $pricePieces = explode(' ', $priceString);
         $currency = trim(end($pricePieces));
         $price = trim(str_replace($currency, '', $priceString));
         $price = trim(str_replace(' ', '', $price));
         $title = $piecesTitle[0];
         $item->setTitle($title);
         if ($price != $item->getPrice()) {
             $item->setPriceOld($item->getPrice());
         }
         $item->setPrice($price);
         $item->setCurrency($currency);
         $item->setLastCheck(new \DateTime());
         $item->setHtmlDocument($document->html());
         $em->persist($item);
     }
     $progress->finish();
     $em->flush();
     $output->writeln('Ok');
 }
コード例 #5
0
ファイル: Kat.php プロジェクト: tetreum/kat
 /**
  * Parses a kat list (a search, user uploads, etc..)
  * @param Document $html
  * @param int $maxAge timestamp
  * @return array
  */
 public static function parseList($html, $maxAge = null)
 {
     $uploads = [];
     foreach ($html->find("tr.odd, tr.even") as $i => $rawUpload) {
         //echo $rawUpload->html();exit;
         $elements = $rawUpload->find("td.center");
         $date = strtotime($rawUpload->find(".center[title]")[0]->attr("title"));
         if (!empty($maxAge) && $date < $maxAge) {
             continue;
         }
         $link = $rawUpload->find("a.cellMainLink")[0];
         // the uploader may be anonymous
         $creator = null;
         if ($rawUpload->has(".lightgrey.block a.plain")) {
             $rawUser = $rawUpload->find(".lightgrey.block a.plain")[0];
             $creator = new User(["nick" => $rawUser->text(), "link" => Kat::DOMAIN . $rawUser->attr("href")]);
         }
         $uploads[] = new BasicTorrent(["name" => $link->text(), "size" => $rawUpload->find(".nobr.center")[0]->text(), "created" => date("Y-m-d H:i:s", $date), "magnet" => $rawUpload->find("a[data-nop]")[0]->attr("href"), "torrent" => "https:" . $rawUpload->find("a[data-download]")[0]->attr("href"), "creator" => $creator, "files" => $elements[1]->text(), "seeders" => $elements[3]->text(), "leechers" => $elements[4]->text(), "link" => self::DOMAIN . $link->attr("href"), "comments" => (int) $rawUpload->find('div.iaconbox.center.floatright a')[0]->text()]);
     }
     return $uploads;
 }
コード例 #6
0
ファイル: SelectorTest.php プロジェクト: imangazaliev/didom
 public function testClass()
 {
     $html = '
         <span class="odd first">Lorem ipsum dolor.</span>
         <span class="even second">Tenetur totam, nostrum.</span>
         <span class="odd third">Iste, doloremque, praesentium.</span>
     ';
     $document = new Document($html);
     $expected = ['Lorem ipsum dolor.', 'Iste, doloremque, praesentium.'];
     $result = [];
     foreach ($document->find('.odd') as $element) {
         $result[] = $element->text();
     }
     $this->assertEquals($expected, $result);
     $expected = ['Iste, doloremque, praesentium.'];
     $result = [];
     foreach ($document->find('.odd.third') as $element) {
         $result[] = $element->text();
     }
     $this->assertEquals($expected, $result);
 }
コード例 #7
0
 protected function getGamesFromWeb($teamTournament)
 {
     //getting array of team aliases and participant id's
     $aliases = ArrayHelper::map($teamTournament, 'alias', 'id');
     $count = $this->tournament->num_tours;
     $j = 0;
     $html = new Document($this->tournament->autoProcessURL, true);
     //$html = new Document('pl.htm', true);
     $results = $html->find('div.mainPart')[0];
     $gamesFromWeb = [];
     for ($i = 0; $i < $count; $i++) {
         if (isset($results->find('div.stat.mB15 table.stat-table')[$i])) {
             $tour = $html->find('h3.titleH3.bordered.mB10')[$i]->text();
             $tour = $this->getTour($tour);
             $resultTable = $results->find('div.stat.mB15 table.stat-table')[$i];
             foreach ($resultTable->find('tbody tr') as $k => $one) {
                 if ($this->autoTimeToUnix($one->find('td.name-td')[0]->text()) > time() - 60 * 60 * 24 * 7 * 2 && $tour <= $count) {
                     if (isset($one->find('td.owner-td a.player')[0]) && isset($one->find('td.guests-td a.player')[0])) {
                         $owner = $one->find('td.owner-td a.player')[0]->text();
                         $guest = $one->find('td.guests-td a.player')[0]->text();
                         if (isset($aliases[$owner]) && isset($aliases[$guest])) {
                             $gamesFromWeb[$j]['id_team_home'] = (int) $aliases[$owner];
                             $gamesFromWeb[$j]['id_team_guest'] = (int) $aliases[$guest];
                             $gamesFromWeb[$j]['date_time_game'] = (int) $this->autoTimeToUnix($one->find('td.name-td')[0]->text());
                             $gamesFromWeb[$j]['tour'] = $tour;
                             $score = $one->find('td.score-td noindex')[0]->text();
                             $gamesFromWeb[$j]['score_home'] = $this->calculateHomeScore($score);
                             $gamesFromWeb[$j]['score_guest'] = $this->calculateGuestScore($score);
                             $j++;
                         } else {
                             throw new Exception('Error during alias parsing ' . $owner . ' or ' . $guest);
                         }
                     }
                 }
             }
         }
     }
     $this->gamesFromWeb = $gamesFromWeb;
 }
コード例 #8
0
ファイル: Document.php プロジェクト: thcolin/senscritique-api
 public function findNextNodeByText(DiDomElement $element, $expression, $grep = false)
 {
     $childNodes = $element->getNode()->childNodes;
     $length = $childNodes->length;
     $stop = false;
     for ($i = 0; $i < $length; $i++) {
         $node = $childNodes->item($i);
         if ($stop && trim($node->textContent)) {
             $document = new DiDomDocument();
             $document->appendChild($node);
             return $document;
         } else {
             if (!$grep && $expression == $node->textContent) {
                 $stop = true;
             } else {
                 if ($grep && preg_match($expression, $node->textContent)) {
                     $stop = true;
                 }
             }
         }
     }
 }
コード例 #9
0
ファイル: DiDomCrawler.php プロジェクト: vedebel/sitemap
 /**
  * @param $html
  * @return array
  */
 public function load($html)
 {
     $metaTags = ['canonical' => '', 'robots' => ''];
     $this->crawler->loadHtml((string) $html);
     foreach ($this->crawler->find('meta') as $meta) {
         /** @var Element $meta */
         $name = strtolower($meta->attr('name'));
         $content = $meta->attr('content');
         $metaTags[$name] = $content;
     }
     $links = [];
     foreach ($this->crawler->find('a') as $link) {
         /** @var Element $link */
         $rel = $link->attr('rel');
         $href = $link->attr('href');
         if ('nofollow' === strtolower($rel)) {
             continue;
         }
         $links[] = $href;
     }
     $this->links = array_unique($links);
     $this->metaTags = $metaTags;
     return ['links' => $links, 'meta' => $metaTags];
 }
コード例 #10
0
ファイル: ElementTest.php プロジェクト: imangazaliev/didom
 public function testGetDocument()
 {
     $html = $this->loadFixture('posts.html');
     $document = new Document($html, false);
     $element = $document->createElement('span', 'value');
     $this->assertEquals($document->getDocument(), $element->getDocument()->getDocument());
 }
コード例 #11
0
ファイル: Element.php プロジェクト: nik4152/DiDOM
 /**
  * Get the DOM document with the current element.
  * 
  * @return \DiDom\Document
  */
 public function toDocument()
 {
     $document = new Document();
     $document->appendChild($this->domElement);
     return $document;
 }
コード例 #12
0
ファイル: Document.php プロジェクト: imangazaliev/didom
 /**
  * Indicates if two documents are the same document.
  * 
  * @param Document|\DOMDocument $document The compared document
  *
  * @return bool
  *
  * @throws \InvalidArgumentException if the provided argument is not an instance of \DOMDocument or \DiDom\Document
  */
 public function is($document)
 {
     if ($document instanceof self) {
         $element = $document->getElement();
     } else {
         if (!$document instanceof DOMDocument) {
             throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMDocument, %s given', __METHOD__, __CLASS__, is_object($document) ? get_class($document) : gettype($document)));
         }
         $element = $document->documentElement;
     }
     if ($element === null) {
         return false;
     }
     return $this->getElement()->isSameNode($element);
 }
コード例 #13
0
ファイル: ElementTest.php プロジェクト: ixtel/DiDOM
 public function testReplace()
 {
     $html = '<ul><li>One</li><li>Two</li><li>Three</li></ul>';
     $document = new Document($html, false);
     $first = $document->find('li')[0];
     $third = $document->find('li')[2];
     $this->assertEquals($first->getNode(), $first->replace($third)->getNode());
     $this->assertEquals($third->getNode(), $document->find('li')[0]->getNode());
     $this->assertCount(3, $document->find('li'));
     $document = new Document($html, false);
     $first = $document->find('li')[0];
     $third = $document->find('li')[2];
     $this->assertEquals($first->getNode(), $first->replace($third, false)->getNode());
     $this->assertEquals($third->getNode(), $document->find('li')[0]->getNode());
     $this->assertCount(2, $document->find('li'));
 }
コード例 #14
0
ファイル: DocumentTest.php プロジェクト: imangazaliev/didom
 public function testToStringXml()
 {
     $xml = $this->loadFixture('books.xml');
     $document = new Document($xml, false, 'UTF-8', 'xml');
     $this->assertEquals($document->xml(), $document->__toString());
 }
コード例 #15
0
ファイル: parse.php プロジェクト: Cnfc19932/KHL-Telegram-bot
function data($url)
{
    $document = new Document($url, true);
    $a = $document->find('.game-info p');
    return $a[1]->text();
}
コード例 #16
0
ファイル: Document.php プロジェクト: nik4152/DiDOM
 /**
  * @param  Document|\DOMDocument $document
  * @return bool
  * @throws \InvalidArgumentException
  */
 public function is($document)
 {
     if ($document instanceof Document) {
         $element = $document->getElement();
     } else {
         if (!$document instanceof DOMDocument) {
             throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or %s, %s given', __METHOD__, __CLASS__, 'DOMDocument', gettype($document)));
         }
         $element = $document->documentElement;
     }
     return $this->getElement()->isSameNode($element);
 }
コード例 #17
0
ファイル: ElementTest.php プロジェクト: pronskiy/DiDOM
 public function testParent()
 {
     $html = $this->loadFixture('posts.html');
     $document = new Document($html, false);
     $element = $document->createElement('span', 'value');
     $parent = $element->parent();
     $this->assertInstanceOf('DiDom\\Document', $parent);
     $this->assertTrue($document->getElement()->isSameNode($parent->getElement()));
 }
コード例 #18
0
ファイル: IMDB.php プロジェクト: kingio/php-imdb-grabber
 /**
  * Gets the full movie crew divided in departments
  * @return array
  */
 public function getCastCredits()
 {
     $content = new Document($this->getCredits());
     $titles = $content->find("#fullcredits_content h4");
     $persons = $content->find(".simpleCreditsTable tbody");
     $crew = [];
     // skip useless h4s, (DiDom doesnt have :not pseudo class, so we make a foreach)
     foreach ($titles as $pos => $h4) {
         if ($h4->hasAttribute("id") || $h4->hasAttribute("name")) {
             unset($titles[$pos]);
         }
     }
     $titles = array_values($titles);
     foreach ($titles as $pos => $h4) {
         $title = trim($h4->text(), " \t\n\r\v ");
         switch ($title) {
             case "Directed by":
                 $name = "director";
                 break;
             case "Music by":
                 $name = "music";
                 break;
             case "Cinematography by":
                 $name = "cinematography";
                 break;
             case "Film Editing by":
                 $name = "editing";
                 break;
             case "Casting By":
                 $name = "casting";
                 break;
             case "Production Design by":
                 $name = "production_design";
                 break;
             case "Art Direction by":
                 $name = "art_direction";
                 break;
             case "Set Decoration by":
                 $name = "set_decoration";
                 break;
             case "Costume Design by":
                 $name = "costume_design";
                 break;
             case "Makeup Department":
                 $name = "makeup_department";
                 break;
             case "Production Management":
                 $name = "production_management";
                 break;
             case "Art Department":
                 $name = "art_department";
                 break;
             case "Sound Department":
                 $name = "sound_department";
                 break;
             case "Special Effects by":
                 $name = "special_effects";
                 break;
             case "Visual Effects by":
                 $name = "visual_effects";
                 break;
             case "Stunts":
                 $name = "stunts";
                 break;
             case "Camera and Electrical Department":
                 $name = "camera_department";
                 break;
             case "Animation Department":
                 $name = "animation_department";
                 break;
             case "Casting Department":
                 $name = "casting_department";
                 break;
             case "Costume and Wardrobe Department":
                 $name = "wardrobe_department";
                 break;
             case "Editorial Department":
                 $name = "editorial_department";
                 break;
             case "Location Management":
                 $name = "location_management";
                 break;
             case "Music Department":
                 $name = "music_department";
                 break;
             case "Transportation Department":
                 $name = "transportation_department";
                 break;
             case "Storyline":
                 $name = "storyline";
                 break;
             case "Photo & Video":
                 $name = "photo";
                 break;
             default:
                 continue 2;
                 break;
         }
         if (!isset($crew[$name])) {
             $crew[$name] = [];
         }
         $regex = "/name\\/nm(\\d+)\\/(?:.*)/";
         if (!isset($persons[$pos])) {
             continue;
         }
         foreach ($persons[$pos]->find("a") as $person) {
             preg_match($regex, $person->attr("href"), $matches);
             if (!isset($matches[1]) || empty($matches[1])) {
                 continue;
             }
             $crew[$name][] = ["id" => $matches[1], "name" => trim($person->text())];
         }
     }
     return $crew;
 }
コード例 #19
0
ファイル: teste_crawl.php プロジェクト: newcart/ecommerce
<?php

require_once 'vendor/autoload.php';
use DiDom\Document;
$document = new Document('http://www.opencart.com/index.php?route=extension/extension&filter_license=0', true);
$extensions = $document->find('#content > div.extension-grid > div > div.name > a');
foreach ($extensions as $extension) {
    echo $extension->text(), "<br>";
}
コード例 #20
0
ファイル: DocumentTest.php プロジェクト: nik4152/DiDOM
 public function testToElement()
 {
     $html = $this->loadFixture('posts.html');
     $document = new Document($html, false);
     $element = $document->toElement();
     $this->assertInstanceOf('DiDom\\Element', $element);
 }
コード例 #21
0
ファイル: DocumentTest.php プロジェクト: ixtel/DiDOM
 public function testToString()
 {
     $html = $this->loadFixture('posts.html');
     $document = new Document($html, false);
     $this->assertEquals($document->html(), $document->__toString());
 }
コード例 #22
0
ファイル: ElementTest.php プロジェクト: nik4152/DiDOM
 public function testParent()
 {
     $document = new Document('', true);
     $element = $document->createElement('span', 'value');
     $parent = $element->parent();
     $this->assertInstanceOf('DiDom\\Document', $parent);
 }
コード例 #23
0
ファイル: Crawler.php プロジェクト: Imangazaliev/Tweezers
 /**
  * Searches for the element in the DOM tree.
  * 
  * @param string $expression XPath expression or CSS selector
  * @param string $type       the type of the expression
  * @param string $wrapList
  *
  * @return NodeList|DiDom\Element[]
  */
 public function find($expression, $type = Query::TYPE_CSS, $wrapList = true)
 {
     $nodes = parent::find($expression, $type, false);
     return $wrapList ? new NodeList($nodes) : $nodes;
 }
コード例 #24
0
ファイル: Element.php プロジェクト: ixtel/DiDOM
 /**
  * Get the DOM document with the current element.
  *
  * @param  string $encoding The document encoding
  * 
  * @return \DiDom\Document
  */
 public function toDocument($encoding = 'UTF-8')
 {
     $document = new Document(null, false, $encoding);
     $document->appendChild($this->node);
     return $document;
 }
コード例 #25
0
ファイル: Curl.php プロジェクト: proclnas/curl-rox
 /**
  * Request callback
  *
  * @param callable $callback
  * @throws \Exception
  * @return Curl
  */
 public function setCallback($callback)
 {
     if (!is_callable($callback)) {
         throw new \Exception(sprintf('Error: %s is not a valid callable', $callback));
     }
     $http_response = $this->getHttpResponse();
     $didom = new Document();
     $dom = $didom->loadHtml($http_response);
     call_user_func_array($callback, [$http_response, $dom, $this]);
     return $this;
 }