/**
  * Search for torrents.
  *
  * @param string $query
  * @param int    $category
  * @return array Array of torrents. Either empty or filled.
  */
 public function search($query, $category)
 {
     # Set single-cell view for torrents.
     $requestOptions = ['headers' => ['User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36']];
     try {
         $url = $this->makeUrl($query, $category);
         $response = $this->httpClient->get($url, $requestOptions);
         $crawler = new Crawler((string) $response->getBody());
     } catch (\Exception $e) {
         return [];
     }
     $items = $crawler->filterXpath('//channel/item');
     $torrents = [];
     foreach ($items as $item) {
         $torrent = new Torrent();
         $itemCrawler = new Crawler($item);
         // Set details for torrent.
         $torrent->setSite($this->tag);
         $torrent->setTitle($itemCrawler->filterXpath('//title')->text());
         $torrent->setSeeders((int) $itemCrawler->filterXpath('//torrent:seeds')->text());
         $torrent->setLeechers((int) $itemCrawler->filterXpath('//torrent:peers')->text());
         $torrent->setMagnet($itemCrawler->filterXpath('//torrent:magnetURI')->text());
         $torrent->setSize($this->formatBytes((int) $itemCrawler->filterXPath('//torrent:contentLength')->text()));
         $torrent->setAge($itemCrawler->filterXPath('//pubDate')->text());
         $torrent->setCategory($itemCrawler->filterXPath('//category')->text());
         $torrents[] = $torrent;
     }
     return $torrents;
 }
 public function getTitleFallback(Crawler $crawler)
 {
     $title = '';
     if ($crawler->filterXpath('//title')->count()) {
         $title = $crawler->filterXpath('//title')->text();
     }
     return $title;
 }
 /**
  * @param string $query
  * @return SearchResult[]
  */
 public function search($query)
 {
     try {
         $response = $this->httpClient->get('https://thepiratebay.se/search/' . urlencode($query) . '/0/7/0');
     } catch (ClientException $e) {
         return [];
     }
     $crawler = new Crawler((string) $response->getBody());
     $items = $crawler->filter('#searchResult tr');
     $results = [];
     $first = true;
     foreach ($items as $item) {
         // Ignore the first row, the header
         if ($first) {
             $first = false;
             continue;
         }
         $result = new SearchResult();
         $itemCrawler = new Crawler($item);
         $result->setName(trim($itemCrawler->filter('.detName')->text()));
         $result->setSeeders((int) $itemCrawler->filter('td')->eq(2)->text());
         $result->setLeechers((int) $itemCrawler->filter('td')->eq(3)->text());
         $result->setMagnetUrl($itemCrawler->filterXpath('//tr/td/a')->attr('href'));
         $results[] = $result;
     }
     return $results;
 }
Exemple #4
0
 function crawl($url, $afterCrawl = null, $beforeCrawl = null)
 {
     $md5Url = md5($url);
     try {
         $this->crawledUrls[] = $md5Url;
         $response = $this->client->request("GET", $url);
         $html = $response->getBody()->getContents();
         $this->requestCount++;
         echo $this->requestCount . "\n";
         unset($response);
         if (isset($afterCrawl)) {
             $afterCrawl($url, $html);
         }
         $domCrawler = new DomCrawler($html);
         unset($html);
         $urlsToCrawl = array_unique($domCrawler->filterXpath('//a')->extract(['href']));
         unset($domCrawler);
         foreach ($urlsToCrawl as $urlToCrawl) {
             $urlNormalized = $this->normalizeUrl($url, $urlToCrawl);
             if ($this->maxRequestcount != 0 && $this->requestCount >= $this->maxRequestcount) {
                 return;
             }
             if ($this->isCrawlable($url, $urlNormalized)) {
                 if (isset($beforeCrawl) && !$beforeCrawl($urlNormalized)) {
                     continue;
                 }
                 $this->crawl($urlNormalized, $afterCrawl, $beforeCrawl);
             }
         }
     } catch (\Exception $e) {
         $this->errorUrls[] = $url;
     }
 }
Exemple #5
0
 public function load($gameweek)
 {
     $this->gameweek = $gameweek;
     $crawler = $this->getDom("http://fantasy.premierleague.com/fixtures/{$gameweek}/", ['X-Requested-With' => 'XMLHttpRequest']);
     $deadline = $crawler->filter('.ismStrongCaption')->text();
     $this->deadline_time = $this->parseDate(array_pop(explode(' - ', $deadline)));
     $games = $crawler->filter('.ismFixture');
     foreach ($games as $g) {
         $gc = new Crawler($g);
         $match = new GameweekMatch();
         $home_team_id = (int) preg_replace('~^.+badge_(\\d+).+$~', '$1', $gc->filterXpath('//td[3]/img')->attr('src'));
         $away_team_id = (int) preg_replace('~^.+badge_(\\d+).+$~', '$1', $gc->filterXpath('//td[5]/img')->attr('src'));
         $match->home_team = new TeamSimple();
         $match->home_team->load($home_team_id);
         $match->away_team = new TeamSimple();
         $match->away_team->load($away_team_id);
         $match->start_time = $this->parseDate($gc->filterXpath('//td[1]')->text());
         $this->matches[] = $match;
     }
 }
 /**
  * {@inheritdoc}
  */
 public function lookup($id)
 {
     if (!class_exists('Symfony\\Component\\DomCrawler\\Crawler')) {
         throw new RuntimeException('symfony/dom-crawler is required.');
     }
     $url = sprintf('https://play.google.com/store/apps/details?id=%s', $id);
     $response = $this->exec($url);
     if (404 === $this->getInfo(CURLINFO_HTTP_CODE)) {
         throw new NotFoundException($id);
     }
     $this->close();
     $crawler = new Crawler($response);
     $name = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' document-title ')]/descendant::div")->text();
     $owner = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' document-subtitle ') and (contains(concat(' ', normalize-space(@class), ' '), ' primary '))]/descendant::span")->text();
     $description = $crawler->filterXpath('//*[@id="body-content"]/div/div/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div[1]')->text();
     $screenshots = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' details-section ') and (contains(concat(' ', normalize-space(@class), ' '), ' screenshots '))]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' screenshot-container ')]/descendant::img")->each(function ($node, $i) {
         if (0 === $i) {
             return;
         }
         return $node->attr('src');
     });
     $screenshots = array_filter($screenshots);
     $tags = array_map('strtolower', (array) $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' document-subtitle ') and (contains(concat(' ', normalize-space(@class), ' '), ' category '))]/descendant::span")->text());
     $cover = $crawler->filterXpath("descendant-or-self::*[contains(concat(' ', normalize-space(@class), ' '), ' details-info ')]/descendant::*[contains(concat(' ', normalize-space(@class), ' '), ' cover-container ')]/descendant::img")->attr('src');
     return new AppInfo($id, $name, $owner, $description, $cover, $tags, $screenshots, AppInfo::PLATFORM_ANDROID);
 }
Exemple #7
0
 public function populateLeagues($crawler)
 {
     $types = [];
     foreach ($crawler->filterXPath('//*[@class="ismSecondary"]/*[@class="ismTableHeading"]') as $h) {
         $c = new Crawler($h);
         if (preg_match('~ leagues$~', $c->text())) {
             $types[] = strtolower(preg_replace('~ leagues$~', '', $c->text()));
         }
     }
     foreach ($crawler->filterXPath('//*[@class="ismTable ismLeagueTable"]') as $i => $t) {
         $table = new Crawler($t);
         $leagues = $table->filterXpath('//tbody/tr');
         foreach ($leagues as $l) {
             $c = new Crawler($l);
             $league = new UserLeague();
             $league->type = $types[$i];
             $league->populate($c);
             $this->leagues[] = $league;
         }
     }
 }
 /**
  * Search for torrents.
  *
  * @param string $query
  * @param int    $category
  * @return array Array of torrents. Either empty or filled.
  */
 public function search($query, $category)
 {
     # Set single-cell view for torrents.
     $requestOptions = ['headers' => ['User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'], 'cookies' => ['lw' => 's']];
     try {
         $url = $this->makeUrl($query, $category);
         $response = $this->httpClient->get($url, $requestOptions);
         $crawler = new Crawler((string) $response->getBody());
     } catch (\Exception $e) {
         // TODO: Log error. Some error has occured.
         return [];
     }
     $items = $crawler->filter('#searchResult tr');
     $torrents = [];
     $firstRow = true;
     foreach ($items as $item) {
         // Ignore the first row.
         if ($firstRow) {
             $firstRow = false;
             continue;
         }
         $torrent = new Torrent();
         $itemCrawler = new Crawler($item);
         // Set details for torrent.
         $torrent->setSite($this->tag);
         $torrent->setTitle(trim($itemCrawler->filter('td')->eq(1)->text()));
         $torrent->setSeeders((int) $itemCrawler->filter('td')->eq(5)->text());
         $torrent->setLeechers((int) $itemCrawler->filter('td')->eq(6)->text());
         $torrent->setMagnet($itemCrawler->filterXpath('/td[3]/a[0]')->attr('href'));
         $torrent->setSize($itemCrawler->filter('td')->eq(4)->text());
         $torrent->setAge($itemCrawler->filterXPath('/td[2]')->text());
         $torrent->setCategory($itemCrawler->filterXPath('/td[0]')->text());
         $torrents[] = $torrent;
     }
     return $torrents;
 }
Exemple #9
0
 /**
  * Get all links in the given html.
  *
  * @param string $html
  *
  * @return \Spatie\Crawler\Url[]
  */
 protected function getAllLinks($html)
 {
     $domCrawler = new DomCrawler($html);
     return collect($domCrawler->filterXpath('//a')->extract(['href']))->map(function ($url) {
         return Url::create($url);
     });
 }
Exemple #10
0
 /**
  * @param $html
  * @param $selector
  * @param $urlAttribute
  * @param $type
  * @param $parentPage
  * @return array
  */
 protected function createAssetsFromDOMElements($html, $selector, $urlAttribute, $type, $parentPage)
 {
     $assets = [];
     $crawler = new Crawler($html);
     $elements = $crawler->filterXpath($selector);
     /** @var \DOMElement $assetElement */
     foreach ($elements as $element) {
         if (!empty($element->getAttribute($urlAttribute))) {
             $urlValue = $element->getAttribute($urlAttribute);
             if ($this->config->ignoreWhiteSpaces) {
                 $urlValue = trim($urlValue);
             }
             $assets[] = new Asset($urlValue, $parentPage, $element->ownerDocument->saveHTML($element), $type);
         }
     }
     return $assets;
 }
Exemple #11
0
 /**
  * @return $this|null
  */
 public function findImage()
 {
     $crawler = new Crawler($this->html);
     $images = $crawler->filterXpath('//img')->extract(['src']);
     if (empty($images)) {
         return null;
     }
     $this->image_uri = $this->getUri($images[0]);
     return $this;
 }
Exemple #12
0
 public function extractAction($html)
 {
     $crawler = new Crawler();
     $crawler->add($html);
     /*$crawler = $crawler->filter('body')->nextAll();
       foreach ($crawler as $domElement) {
           $nodeValue = $domElement->nodeValue;
       }*/
     $readData = $crawler->filterXpath('//body/p')->extract(array('_text', 'class'));
     /*
      * print_r($readData); - Array ( [0] => Array ( [0] => Hello World! [1] => message ) [1] => Array ( [0] => Hello Crawler! [1] => ) [2] => Array
      *
      * Throw empty data from array readData and create arrays $showDataEven and $showDataOdd
      */
     /* Prvi nacin
             $i = 0;
             $j = 0;
     
             $showData = array();
             foreach($readData as $row){
                 foreach($row as $key=>$value) {
                     if ($key == 0) {
                         if(($j % 2) == 0){
                             $showDataEven[$i] = $value;
                             $showDataA[$a][$b] = $value;
                             $b++;
                         }else{
                             $showDataOdd[$i] = $value;
                             $i++;
                             $showDataA[$a][$b] = $value;
                             $a++;
                             $b = 0;
                         }
                     }
                 }
                 $j++;
             }
             */
     /*
      * $showDataEven: Array ( [0] => Hello World! [1] => Hello World2! [2] => Hello World3! [3] => Hello World4! )
      * and $showDataOdd: Array ( [0] => Hello Crawler! [1] => Hello Crawler2! [2] => Hello Crawler3! [3] => Hello Crawler4! )
      */
     /*$i = 0;
       $j = 0;
       foreach($showDataEven as $keyeven=>$valueeven){
           $showData[$i][$j] = $valueeven;
           $j++;
           foreach($showDataOdd as $keyodd=>$valueodd){
               if($keyeven == $keyodd){
                   $showData[$i][$j] = $valueodd;
                   $j = 0;
               }
           }
           $i++;
       }*/
     $a = 0;
     $b = 0;
     /* Drugi nacin */
     $showDataA = array();
     foreach ($readData as $row) {
         foreach ($row as $key => $value) {
             if ($key == 0) {
                 if ($b % 2 == 0) {
                     $showDataA[$a][$b] = $value;
                     $b++;
                 } else {
                     $showDataA[$a][$b] = $value;
                     $a++;
                     $b = 0;
                 }
             }
         }
     }
     //print_r($showDataA);
     return $showDataA;
     //$crawler = $crawler->filter('body')->children()->text();
     //return $crawler;
 }
Exemple #13
0
 public function extractAction($url)
 {
     //$html = htmlspecialchars_decode($url);
     //print_r($html);
     $crawler = new Crawler();
     $crawler->add($url);
     /*$crawler = $crawler->filter('body')->nextAll();
       foreach ($crawler as $domElement) {
           $nodeValue = $domElement->nodeValue;
       }*/
     /*
      *
      *
     filterXpath('//html/body/div/div/form/div/div/table/tbody/tr/td/a/img')
     *
     *
     */
     $readData = $crawler->filterXpath('//html/body/div/div/form/div/div');
     /*
     $readData0 = $crawler
         ->filterXpath('//html/body/div/div/form/div/div')
         ->extract(array('_text', 'class'))
     ;
     print_r($readData0);
     */
     $html = '';
     foreach ($readData as $domElement) {
         $html .= $domElement->ownerDocument->saveHTML($domElement);
     }
     $crawler = new Crawler();
     $crawler->add($html);
     // /html/body/div/table
     $readData1 = $crawler->filterXpath('//html/body/div/table/tr/th');
     $readData11 = $crawler->filterXpath('//html/body/div/table/tr/th')->extract(array('_text', 'class'));
     /**** getting ID: to first array $showData1 *****/
     $showData1 = array();
     $j1 = 0;
     foreach ($readData11 as $keyrow => $valuerow) {
         if ($keyrow % 2 == 0) {
             foreach ($valuerow as $keyid => $valueid) {
                 if ($keyid % 2 == 0) {
                     $showData1[$j1] = $valueid;
                 }
             }
             $j1++;
         }
     }
     /******** Reading data from table - tr-td - $crawler->add($html);********/
     // /html/body/div/table
     $readData3 = $crawler->filterXpath('//html/body/div/table/tr/td');
     $readData33 = $crawler->filterXpath('//html/body/div/table/tr/td')->extract(array('_text', 'class'));
     /**** getting Description: to third array $showData3 *****/
     $showData3 = array();
     $j3 = 0;
     foreach ($readData33 as $keyrow => $valuerow) {
         if ($keyrow % 2 == 0) {
             foreach ($valuerow as $keydesc => $valuedesc) {
                 if ($keydesc % 2 == 0) {
                     $showData3[$j3] = $valuedesc;
                 }
             }
             $j3++;
         }
     }
     /*
     $html3 = '';
     foreach ($readData3 as $domElement) {
         $html3 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     */
     /******** Reading data URL from table - tr-td - $crawler->add($html);********/
     // /html/body/div/table
     $readData4 = $crawler->filterXpath('//html/body/div/table/tr/td/a/img');
     $readData44 = $crawler->filterXpath('//html/body/div/table/tr/td/a/img')->extract(array('src', 'img'));
     /**** getting URL: to fourth array $showData4 *****/
     $showData4 = array();
     $j4 = 0;
     foreach ($readData44 as $keyrow => $valuerow) {
         foreach ($valuerow as $keyurl => $valueurl) {
             if ($keyurl % 2 == 0) {
                 $showData4[$j4] = $valueurl;
             }
         }
         $j4++;
     }
     $html4 = '';
     foreach ($readData4 as $domElement) {
         $html4 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     /******** Reading data from table - tr-th - input - $crawler->add($html1);********/
     // /html/body/div/table
     $html1 = '';
     foreach ($readData1 as $domElement) {
         $html1 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     $crawler = new Crawler();
     $crawler->add($html1);
     $readData2 = $crawler->filterXpath('//html/body/th/input');
     $readData22 = $crawler->filterXpath('//html/body/th/input')->extract(array('value', 'input'));
     /*        $reducedSubsetCrawler = $crawler->reduce(function (Crawler $crawler, $i) {
                     // Just return `false` if you want to remove an element from a set:
                     return preg_match('/^value/', $crawler->attr('input'));
                 });
     
             $newCrawler = $crawler->filter('input[type=text]')
                 ->first();
     */
     /**** getting Title: to second array $showData2 *****/
     $showData2 = array();
     $j2 = 0;
     foreach ($readData22 as $keyrow => $valuerow) {
         foreach ($valuerow as $keyid => $valueid) {
             if ($keyid % 2 == 0) {
                 $showData2[$j2] = $valueid;
             }
         }
         $j2++;
     }
     $html2 = '';
     foreach ($readData2 as $domElement) {
         $html2 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     $crawler = new Crawler();
     $crawler->add($html2);
     //$more = $reducedSubsetCrawler->filter('a > img')->first();
     /*********** Create array of array to return to controller **************/
     $showData = array($showData1, $showData2, $showData3, $showData4);
     $i = count($showData[0]);
     //print_r($i);
     $showDataD1 = $showData[0];
     $showDataD2 = $showData[1];
     $showDataD3 = $showData[2];
     $showDataD4 = $showData[3];
     $showDataA = array();
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][0] = $showDataD1[$j];
     }
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][1] = $showDataD2[$j];
     }
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][2] = $showDataD3[$j];
     }
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][3] = $showDataD4[$j];
     }
     //print_r($showDataD1);
     //print_r($showData);
     //print_r($showDataA);
     return $showDataA;
     //$crawler = $crawler->filter('body')->children()->text();
     //return $crawler;
 }
Exemple #14
0
 /**
  * @param $html
  * @return array
  */
 private function getFirstPageAttributes($html)
 {
     $crawler = new DomCrawler\Crawler($html);
     $nodeValues = $crawler->filterXpath(self::PRODUCT_XPATH)->each(function (DomCrawler\Crawler $node, $i) {
         $descXpath = '//div[contains(concat(" ", normalize-space(@class), " "), " productInfo ")]/h3/a';
         $priceXpath = '//p[contains(concat(" ", normalize-space(@class), " "), " pricePerUnit ")]';
         $priceRegEx = '/([0-9]+[.|,][0-9])|([0-9][.|,][0-9]+)|([0-9]+)/i';
         $thisLink = $node->filterXPath($descXpath)->first();
         $thisPriceText = trim($node->filterXPath($priceXpath)->first()->text());
         preg_match($priceRegEx, $thisPriceText, $priceMatch);
         return array('title' => trim($thisLink->text()), 'link' => $thisLink->attr('href'), 'price' => $priceMatch[0]);
     });
     return $nodeValues;
 }