file_get_html() public static method

public static file_get_html ( ) : simple_html_dom
return simplehtmldom_1_5\simple_html_dom
コード例 #1
0
 protected static function getUrlsFromSitemap($sitemapLocation)
 {
     $sitemap = HtmlDomParser::file_get_html($sitemapLocation);
     $urls = [];
     foreach ($sitemap->find('loc') as $loc) {
         $urls[] = $loc->innertext;
     }
     return $urls;
 }
コード例 #2
0
ファイル: dom.php プロジェクト: moeinrahimi/beheshtinotifier
function get_news()
{
    $news_page = HtmlDomParser::file_get_html("http://p-karaj.tvu.ac.ir/");
    $elems = $news_page->find("#simple-list_11643 ", 0);
    echo $elems->plaintext;
    $link = $elems[0]->href;
    $fixLink = str_replace('./', '/', $link);
    //echo $fixLink;
    $behe = "http://p-karaj.tvu.ac.ir";
}
コード例 #3
0
ファイル: SiteScraper.php プロジェクト: volrac/scraper
 protected function downloadUrl($url)
 {
     $html = '';
     $html = HtmlDomParser::file_get_html($url);
     if ($html == '') {
         exec("wget -qO- " . $url . " 2>&1", $wget_result);
         $html = HtmlDomParser::str_get_html($wget_result);
     }
     return $html;
 }
コード例 #4
0
ファイル: Fetcher.php プロジェクト: etd-framework/fetcher
 /**
  * Méthode pour récupérer les informations d'une page par son URL.
  *
  * @param string $url L'adresse complète de la page.
  *
  * @return array Un tableau contenant les informations.
  *
  * @throws \InvalidArgumentException Si l'adresse est mal formatée.
  * @throws \RuntimeException         Si la page ne peut pas être traitée.
  */
 public function fetch($url)
 {
     // On contrôle que c'est bien une URL.
     if (!$this->testUrl($url)) {
         throw new \InvalidArgumentException($this->text->sprintf('APP_ERROR_BAD_URL', $url));
     }
     // On récupère le contenu de l'adresse.
     $dom = @HtmlDomParser::file_get_html($url);
     // Si une erreur est survenue.
     if (empty($dom)) {
         throw new \RuntimeException($this->text->sprintf('APP_ERROR_UNABLE_TO_LOAD_URL', $url));
     }
     // On récupère le titre de la page.
     $page_title = $dom->find('title')[0]->text();
     // On récupère les balises meta.
     $metas = [];
     foreach ($dom->find('head')[0]->find('meta') as $element) {
         foreach ($this->meta as $meta) {
             if ($element->hasAttribute($meta['key'])) {
                 if (strtolower($element->getAttribute($meta['key'])) == $meta['tag']) {
                     $content = $element->getAttribute('content');
                     if (!empty($content)) {
                         $metas[$meta['name']] = $content;
                     }
                 }
             }
         }
     }
     // On récupère le contenu de la page.
     $body = trim($dom->find('body')[0]->plaintext);
     $body = preg_replace('/\\s+/', ' ', $body);
     $pos = strpos($body, ' ', 200);
     $body = substr($body, 0, $pos);
     // On récupère les images.
     $images = [];
     foreach ($dom->find('img') as $element) {
         // On teste que c'est bien une URL valide.
         if ($this->testUrl($element->src)) {
             // On ne prend que les extensions images.
             $parts = UriHelper::parse_url($element->src);
             if (in_array($this->file_ext($parts['path']), $this->image_extensions)) {
                 $images[] = $parts['scheme'] . '://' . $parts['host'] . $parts['path'];
             }
         }
     }
     // Si on arrive ici c'est que tout s'est bien passé.
     return ['title' => $page_title, 'text' => $body, 'images' => $images, 'metas' => $metas];
 }
コード例 #5
0
ファイル: RabotaOlx.php プロジェクト: Sywooch/find-parser
 public static function getPrice($url)
 {
     $parser = new HtmlDomParser();
     $dom = $parser->file_get_html($url);
     $price = $dom->find('div.pricelabel strong')[0]->plaintext;
     unset($dom);
     if (isset($price) && !empty($price)) {
         preg_match_all("/(\\d+)/", str_replace(" ", "", $price), $price);
         if (isset($price[0]) && !empty($price[0])) {
             return $price[0];
         } else {
             return "0";
         }
     } else {
         return "0";
     }
 }
コード例 #6
0
 /**
  * Reads a HTML page
  *
  * @param $page
  *
  * @return bool|null
  */
 public function ReadPage($page)
 {
     /**
      * Lets first get the page
      */
     $page = HTMLDomParser::file_get_html($page);
     /**
      * Checks
      */
     if (empty($page)) {
         return null;
     }
     /**
      * Else return false
      */
     return $page;
 }
コード例 #7
0
ファイル: Parser.php プロジェクト: balatsky/futhead
 /**
  * Get players info
  *
  * @param int $offset
  * @param int|null $limit
  * @param \Closure|null $condition
  * @return array
  */
 public function players($offset = 1, $limit = null)
 {
     $players = [];
     if (is_null($limit)) {
         $limit = $this->pages();
     }
     foreach (range($offset, $limit) as $page) {
         $html = Html::file_get_html($this->url . $page);
         foreach ($html->find('.player-row') as $player) {
             $data = $this->player($player);
             if (null == $this->before or null != $this->before and call_user_func($this->before, $data)) {
                 $players[] = null != $this->after ? call_user_func($this->after, $data) : $data;
             }
         }
     }
     return $players;
 }
コード例 #8
0
 private function getRSS(CraigslistRequest $request)
 {
     $body = file_get_contents($request->url());
     $listings = simplexml_load_string(utf8_encode($body));
     $results = [];
     foreach ($listings as $item) {
         $id = substr($item->link, -15, -5);
         if (!is_numeric($id)) {
             continue;
         }
         if ($this->remove_duplicates) {
             if (in_array($id, $this->ids) || in_array((string) $item->title, $this->titles)) {
                 continue;
             }
             $this->ids[] = $id;
             $this->titles[] = (string) $item->title;
         }
         $results[$id] = ['id' => $id, 'link' => (string) $item->link, 'title' => (string) $item->title, 'description' => (string) $item->description];
         if ($request->follow()) {
             $results[$id]['content'] = [];
             $dom = HtmlDomParser::file_get_html($item->link);
             @($results[$id]['date'] = $dom->find('time', 0)->datetime);
             @($results[$id]['page_title'] = $dom->find('.postingtitletext', 0)->innertext);
             @($results[$id]['location'] = str_replace(['(', ')'], '', $dom->find('.postingtitletext small', 0)->innertext));
             @($results[$id]['price'] = $dom->find('.price', 0)->innertext);
             @($results[$id]['body'] = $dom->find('.postingbody, #postingbody, #postingBody', 0)->innertext);
             foreach ($dom->find('.attrgroup span') as $attr) {
                 $results[$id]['attributes'][] = $attr->innertext;
             }
             foreach ($request->selectors as $selector) {
                 $target = $selector['target'];
                 foreach ($dom->find($selector['element']) as $k => $attr) {
                     if (isset($selector['limit']) && $k > $selector['limit'] - 1) {
                         continue;
                     }
                     $results[$id][$selector['label']][] = $attr->{$target};
                 }
             }
         }
     }
     return $results;
 }
コード例 #9
0
ファイル: Crawler.php プロジェクト: enrike1983/telegram_bot
 public static function findMovies($str, $cinema_name)
 {
     $res = array();
     $dom = HtmlDomParser::file_get_html($str);
     $cinemas = $dom->find(".movie_results");
     foreach ($cinemas as $cinema) {
         foreach ($cinema->children() as $theater) {
             foreach ($theater->find('.desc') as $els) {
                 foreach ($els->find('h2') as $title) {
                     if (strtolower($title->text()) == $cinema_name) {
                         foreach ($theater->find('.name') as $name) {
                             $res[] = array($name->text());
                         }
                     }
                 }
             }
         }
     }
     return $res;
 }
コード例 #10
0
 public function downloadTorrentFile($torrentUrlCard)
 {
     $dom = HtmlDomParser::file_get_html($torrentUrlCard);
     $urlTorrentFile = $dom->find('a[id=telecharger]', 0)->href;
     $urlTorrentFile = self::CPASBIEN_BASE_URL . $urlTorrentFile;
     $curl = curl_init($urlTorrentFile);
     curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
     curl_setopt($curl, CURLOPT_COOKIESESSION, true);
     $fileContent = curl_exec($curl);
     curl_close($curl);
     $filename = $torrentUrlCard;
     while (strpos($filename, '/') !== false) {
         $test = strpos($filename, '/');
         $filename = substr($filename, strpos($filename, '/') + 1);
     }
     $test = strpos($filename, '/');
     $filename = substr($filename, 0, strlen($filename) - 5) . '.torrent';
     $filePath = $this->tmpFolder . '/' . $filename;
     file_put_contents($filePath, $fileContent);
     return $filePath;
 }
コード例 #11
0
ファイル: mp3with.php プロジェクト: skipperbent/mp3vibez
 public function search($query)
 {
     $dom = HtmlDomParser::file_get_html(sprintf(self::SERVICE_URL, urlencode($query)));
     $songs = $dom->find('ul.songs li');
     $results = array();
     if ($songs) {
         /* @var $song \simple_html_dom_node */
         foreach ($songs as $song) {
             $result = new mp3withResult();
             $result->id = $song->attr['data-id'];
             $result->url = 'http://mp3with.co' . $song->attr['data-mp3'];
             $song = $song->find('.song', 0);
             if ($song) {
                 $result->title = trim($song->find('strong', 0)->innertext);
                 $result->artist = trim($song->find('strong.artist', 0)->innertext);
             }
             $results[] = $result;
         }
     }
     return $results;
 }
コード例 #12
0
 /**
  * Get party information from URL
  *
  * @param $url
  * @return array of party information defined as above
  */
 protected function getPartiesInfo($url)
 {
     $html = HtmlDomParser::file_get_html($url);
     $party = [];
     foreach ($html->find('.borderbox1') as $index => $partyHtml) {
         $nameHtml = $partyHtml->find('h3.partytitle', 0);
         // If name section has website link
         if (strpos($nameHtml, 'href')) {
             $party[$index]['name'] = $nameHtml->find('a', 0)->innertext;
             $party[$index]['website'] = $nameHtml->find('a', 0)->href;
         } else {
             $party[$index]['name'] = $nameHtml->innertext;
         }
         // Get party info from left column
         $infoPartOneHtml = $partyHtml->find('div.colun', 0);
         $infoPartOneIndex = 0;
         $shortNameHtml = $infoPartOneHtml->find('p', $infoPartOneIndex++)->innertext;
         $party[$index]['short_name'] = $this->getAfterFirstBr($shortNameHtml);
         $leaderHtml = $infoPartOneHtml->find('p', $infoPartOneIndex++)->innertext;
         $party[$index]['leader'] = $this->getAfterFirstBr($leaderHtml);
         $headquartersHtml = $infoPartOneHtml->find('p', $infoPartOneIndex++)->innertext;
         $party[$index]['headquarters'] = $this->getAfterFirstBr($headquartersHtml);
         // Get party info from right column
         $infoPartTwoHtml = $partyHtml->find('div.coldeux', 0);
         $infoPartTwoIndex = 0;
         $eligibleHtml = $infoPartTwoHtml->find('p', $infoPartTwoIndex++)->innertext;
         $party[$index]['eligible_date'] = $this->getAfterSpan($eligibleHtml);
         $registeredHtml = $infoPartTwoHtml->find('p', $infoPartTwoIndex++)->innertext;
         $party[$index]['registered_date'] = $this->getAfterSpan($registeredHtml);
         if (strpos($infoPartTwoHtml->innertext, 'Deregistered')) {
             $deregisteredHtml = $infoPartTwoHtml->find('p', $infoPartTwoIndex++)->innertext;
             $party[$index]['deregistered_date'] = $this->getAfterSpan($deregisteredHtml);
         }
         $chefAgentHtml = $infoPartTwoHtml->find('p', $infoPartTwoIndex++)->innertext;
         $party[$index]['chef_agent'] = $this->getAfterFirstBr($chefAgentHtml);
         $auditorHtml = $infoPartTwoHtml->find('p', $infoPartTwoIndex++)->innertext;
         $party[$index]['auditor'] = $this->getAfterFirstBr($auditorHtml);
     }
     return $party;
 }
コード例 #13
0
ファイル: SearchController.php プロジェクト: tuytoosh/search
 public function getFind()
 {
     $url_for_check = App\Film::where('check', '0')->orderBy('id', 'desc')->first();
     $url_for_check->check = 1;
     $url_for_check->save();
     if (parse_url($url_for_check->url)['host'] == 'filmiha.com' && substr(parse_url($url_for_check->url)['path'], 1, 3) != 'tag') {
         $dom = HtmlDomParser::file_get_html($url_for_check->url);
         foreach ($dom->find('a') as $link) {
             $href = $link->href;
             $hrefs = App\Film::where('url', $href);
             if ($hrefs->count() == 0) {
                 $new_href = new App\Film();
                 if (parse_url($href)['host'] != 'filmiha.com' || substr(parse_url($href)['path'], 1, 3) == 'tag') {
                     $new_href->check = 1;
                 }
                 $new_href->url = $href;
                 $new_href->save();
             }
         }
     }
     dd("Operation Was Successful! :) ");
 }
コード例 #14
0
ファイル: ParserOlx.php プロジェクト: Sywooch/find-parser
 protected static function getPhoneNumber($url)
 {
     $out = null;
     $parser = new HtmlDomParser();
     $dom = $parser->file_get_html($url);
     $uuid = $dom->find('div.rel ul.brbott-12 li');
     unset($dom);
     if (isset($uuid) && !empty($uuid)) {
         preg_match("/\\'id\\'\\:\\'(.*?)\\'/", $uuid[0]->class, $uuid);
         $uuid = explode(':', $uuid[0]);
         $uuid = str_replace("'", "", $uuid[1]);
         if ($curl = curl_init()) {
             curl_setopt($curl, CURLOPT_URL, 'http://olx.ua/ajax/misc/contact/phone/' . $uuid . '/white/');
             curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
             curl_setopt($curl, CURLOPT_POST, true);
             curl_setopt($curl, CURLOPT_POSTFIELDS, "");
             $out = curl_exec($curl);
             curl_close($curl);
         }
         $phone = json_decode($out);
         if (isset($phone->value) && !empty($phone->value)) {
             if (preg_match("/<span\\sclass=\"block\">(.*)<\\/span\\>/", $phone->value)) {
                 $ddom = $parser->str_get_html($phone->value);
                 $phone = $ddom->find('span[class=block]')[0]->innertext;
                 unset($ddom);
                 return $phone;
             } else {
                 return $phone->value;
             }
         } else {
             return false;
         }
     } else {
         return false;
     }
 }
コード例 #15
0
ファイル: Pinterest.php プロジェクト: krike/crawler-tool
 public function getImage($returnJson = true)
 {
     $url = $_POST['url'];
     $crawlerId = $_POST['crawler'];
     $crawlerModel = new CrawlerModel();
     $crawlerInfo = $crawlerModel->getCrawlerInfo($crawlerId);
     if (empty($url)) {
         return false;
     }
     if (empty($crawlerId)) {
         return false;
     }
     $crawlerModel = new CrawlerModel();
     $source = $crawlerModel->getSourceById($crawlerInfo['source_type']);
     $dom = HtmlDomParser::file_get_html($url);
     $images = $dom->find($source['main_image']);
     $imgSrc = $images[0]->src;
     /**
      * find more sources
      */
     $moreSources = array();
     $moreFromBlock = $dom->find('.domainLinkWrapper');
     if (isset($moreFromBlock[0])) {
         $moreSources[] = $moreFromBlock[0]->href;
     }
     //paged Collection (suggested boards under pin
     $pagedCollection = $dom->find('.PagedCollection');
     if (isset($pagedCollection[0]) && !empty($pagedCollection[0])) {
         $boardLinkWrapper = $pagedCollection[0]->find('.boardLinkWrapper');
         foreach ($boardLinkWrapper as $blw) {
             $moreSources[] = $blw->href;
         }
     }
     $imgData = array('image' => $imgSrc, 'more-sources' => $moreSources);
     if ($returnJson) {
         echo json_encode($imgData);
         exit;
     } else {
         return $imgData;
     }
 }
コード例 #16
0
 public function init($link = null, $dom = null)
 {
     $this->link = $link;
     $this->dom = HtmlDomParser::file_get_html($this->link);
 }
コード例 #17
0
ファイル: Rst.php プロジェクト: Sywooch/find-parser
 /**
  * @param $baseUrl
  * @param $subcategory
  */
 private function saveParseRst($baseUrl, $subcategory)
 {
     set_time_limit(0);
     error_reporting(E_ALL & ~E_NOTICE);
     $j = 1;
     while ($j <= 1000) {
         $url = $baseUrl . '&start=' . $j;
         $parser = new HtmlDomParser();
         $html = iconv('windows-1251', 'UTF-8//IGNORE', $parser->file_get_html($url));
         $dom = $parser->str_get_html($html);
         $year = null;
         $fuel = null;
         $price = null;
         $link = null;
         $product = null;
         $city = null;
         $phone = null;
         for ($i = 0; $i < count($dom->find('div[class=rst-ocb-i]')) - 1; $i++) {
             $dparser = new HtmlDomParser();
             $ddom = $dparser->str_get_html($dom->find('div[class=rst-ocb-i]')[$i]->innertext);
             $link = 'http://rst.ua' . $ddom->find('a.rst-ocb-i-a')[0]->href;
             preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-i]')[1]->plaintext, $year);
             preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-i]')[2]->plaintext, $fuel);
             preg_match("/(.*)/", $ddom->find('li[class=rst-ocb-i-d-l-j]')[0]->plaintext, $city);
             $product = $ddom->find('h3[class=rst-ocb-i-h]')[0]->plaintext;
             $phone_parser = $dparser->file_get_html($link);
             $phone = utf8_encode($phone_parser->find('p[class=rst-page-oldcars-item-option-block-container]')[0]->plaintext);
             if (isset($phone) && !empty($phone)) {
                 preg_match("/\\d+/", $phone, $phone);
                 $phone = $phone[0];
             } else {
                 $phone = utf8_encode($phone_parser->find('div.rst-page-oldcars-item-option-block-container td')[0]->plaintext);
             }
             unset($phone_parser);
             $price = str_replace("'", "", $ddom->find('span[class=rst-ocb-i-d-l-i-s rst-ocb-i-d-l-i-s-p]')[0]->plaintext);
             unset($dparser);
             if ($this->productUnique($link)) {
                 preg_match("/\\((\\d+).*?\\)/", $year[0], $running);
                 preg_match("/\\((.*?)\\)/", $fuel[0], $transmission);
                 preg_match("/\\d+/", $year[0], $year);
                 preg_match("/\\d+/", $phone[0], $phone);
                 preg_match("/\\-?\\d+(\\.\\d{0,})?(.*?)\\(/", $fuel[0], $fuel);
                 preg_match("/(\\d+)/", $price, $price);
                 $city = explode(":", $city[0]);
                 $model = new Items();
                 $model->product = $product;
                 if (!empty($price)) {
                     $model->price = $price[0];
                 } else {
                     $model->options = "договорная";
                     $model->price = "0";
                 }
                 $model->url = $link;
                 $model->store = 'Rst';
                 $model->phone = $phone;
                 $model->subcategory_id = $subcategory;
                 $model->options .= '{"year":"' . trim($year[0]) . '","fuel":"' . trim($fuel[2]) . '","transmission":"' . trim($transmission[1]) . '","running":"' . trim($running[1]) . '","city":"' . trim($city[1]) . '","b/u":"1"}';
                 $model->save();
             }
         }
         $j++;
     }
 }
コード例 #18
0
 public function init($propertyUrl = null, $dom = null)
 {
     $this->propertyUrl = $propertyUrl;
     $this->dom = HtmlDomParser::file_get_html($propertyUrl);
 }
コード例 #19
0
ファイル: index.php プロジェクト: sinsery/demo
<?php

require "vendor/autoload.php";
use Sunra\PhpSimple\HtmlDomParser;
$dom = HtmlDomParser::file_get_html("http://bilibili.com");
$elems = $dom->find('title', 0)->innertext;
echo "<pre>";
var_dump($elems);
コード例 #20
0
 /**
  * @param Symbol $symbol
  * @param int    $monthNumber
  * @throws WrongMonth
  * @return OptionPrice[]
  */
 private function collectOptionPrices(Symbol $symbol, int $monthNumber) : array
 {
     if ($monthNumber < 1 || $monthNumber > 12) {
         throw new WrongMonth();
     }
     $optionTableUrl = $this->createOptionTableUrl($symbol, $monthNumber);
     /** @var simple_html_dom $optionHtml */
     $optionHtml = HtmlDomParser::file_get_html($optionTableUrl);
     if (count($optionHtml->find('span.error'))) {
         return [];
     }
     $futuresDataUrl = $this->createFuturesDataUrl($symbol, $monthNumber);
     /** @var simple_html_dom $optionHtml */
     $futuresDataHtml = HtmlDomParser::file_get_html($futuresDataUrl);
     $futuresExpirationDate = strip_tags($optionHtml->find('#divContent table table tr td')[1]->text());
     if (count($futuresDataHtml->find('span.error'))) {
         return [];
     }
     $futuresPrice = $this->makeFloat($futuresDataHtml->find('#dtaLast')[0]->text());
     $futuresPrice52WeekNode = $futuresDataHtml->find('#main-content tr td td span strong');
     $futuresPrice52WeekHigh = $this->makeFloat($futuresPrice52WeekNode[0]->text());
     $futuresPrice52WeekLow = $this->makeFloat($futuresPrice52WeekNode[1]->text());
     $futuresDataHtml->clear();
     $futuresDataHtml = null;
     $priceLines = $optionHtml->find('.datatable_simple tr');
     $priceLinesCount = count($priceLines);
     $futures = $this->futuresRepository->findOneBySymbolAndExpiration($symbol, $futuresExpirationDate);
     if (!$futures) {
         $futures = new Futures($symbol, $futuresExpirationDate);
         $this->futuresRepository->persist($futures);
         $this->futuresRepository->flush();
     }
     $prices = [];
     foreach ($priceLines as $priceLineKey => $priceLine) {
         if ($priceLineKey < 2 || $priceLineKey > $priceLinesCount - 5) {
             continue;
         }
         $priceLineNodes = $priceLine->children;
         $callPriceNode = $priceLineNodes[0];
         $strikeNode = $priceLineNodes[3];
         $putPriceNode = $priceLineNodes[4];
         $strike = $this->makeFloat($strikeNode->text());
         $callPrice = $this->makeFloat($callPriceNode->text());
         $putPrice = $this->makeFloat($putPriceNode->text());
         if ($callPrice > 0) {
             $prices[] = $this->createOptionPrice(OptionContract::TYPE_CALL, $callPrice, $strike, $futuresPrice, $futuresPrice52WeekHigh, $futuresPrice52WeekLow, $futures);
         }
         if ($putPrice > 0) {
             $prices[] = $this->createOptionPrice(OptionContract::TYPE_PUT, $putPrice, $strike, $futuresPrice, $futuresPrice52WeekHigh, $futuresPrice52WeekLow, $futures);
         }
     }
     $optionHtml->clear();
     $optionHtml = null;
     return $prices;
 }
コード例 #21
0
     $elems = $news_page->find("#simple-list_11643 ", 0);
     $message = $elems->plaintext;
     $response = $client->sendMessage(array('chat_id' => $chatid, 'text' => $message, 'reply_to_message_id' => $messageid));
     break;
 case 'اخبار':
 case '/akhbar':
 case '/akhbar@BeheshtiNotifierBot':
     $params = array('chat_id' => $chatid, 'action' => 'typing');
     $response = $client->sendChatAction($params);
     $news_page = HtmlDomParser::file_get_html("http://p-karaj.tvu.ac.ir/");
     $elems = $news_page->find(".full-list article header a ");
     $link = $elems[0]->href;
     $fixLink = str_replace('./', '/', $link);
     $behe = "http://p-karaj.tvu.ac.ir";
     $url = $behe . $fixLink;
     $dom = HtmlDomParser::file_get_html($url);
     $elems = $dom->find("#content article div", 0);
     $message = $elems->plaintext;
     $response = $client->sendMessage(array('chat_id' => $chatid, 'text' => $message, 'reply_to_message_id' => $messageid));
     break;
 case 'کلاس جبرانی':
 case '/jobrani':
 case '/jobrani@BeheshtiNotifierBot':
     // $params  = array('chat_id' => $chatid, 'action' => 'typing');
     // $response   = $client -> sendChatAction($params);
     // $news_page =HtmlDomParser::file_get_html( "http://p-karaj.tvu.ac.ir/" );
     // $elems = $news_page->find("#simple-list_12031",0);
     // $message   =$elems->plaintext;
     $response = $client->sendMessage(array('chat_id' => $chatid, 'text' => 'این بخش غیر فعال است', 'reply_to_message_id' => $messageid));
     break;
 default:
コード例 #22
0
 public function actionRenderMetrics()
 {
     if (!$this->parsingDom) {
         $this->parsingDom = true;
         $oldPath = method_exists(craft()->templates, 'getTemplatesPath') ? craft()->templates->getTemplatesPath() : craft()->path->getTemplatesPath();
         $newPath = craft()->path->getPluginsPath() . 'seomatic/templates';
         method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($newPath) : craft()->path->setTemplatesPath($newPath);
         /* -- Render the SEOmatic display preview template */
         $url = urldecode(craft()->request->getParam('url'));
         if (UrlHelper::isAbsoluteUrl($url)) {
             $urlParts = parse_url($url);
             if (isset($urlParts['scheme'])) {
                 $rootUrl = $urlParts['scheme'] . "://" . $urlParts['host'];
             } else {
                 $rootUrl = "http" . "://" . $urlParts['host'];
             }
             if (isset($urlParts['port'])) {
                 $rootUrl .= $urlParts['port'] . "/";
             } else {
                 $rootUrl .= "/";
             }
             $keywordsParam = urldecode(craft()->request->getParam('keywords'));
             $keywordsKeys = explode(",", $keywordsParam);
             $keywords = array();
             /* -- Silly work-around for what appears to be a file_get_contents bug with https -> http://stackoverflow.com/questions/10524748/why-im-getting-500-error-when-using-file-get-contents-but-works-in-a-browser */
             $opts = array('http' => array('header' => "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13\r\n"));
             $context = stream_context_create($opts);
             $dom = HtmlDomParser::file_get_html($url, false, $context);
             if ($dom) {
                 $textStatistics = new TS\TextStatistics();
                 /* -- See if robots.txt exists */
                 $hasRobotsTxt = false;
                 $hasSitemap = false;
                 $sitemapUrl = rtrim($rootUrl, '/') . "/sitemap.xml";
                 $foundSitemapUrl = "";
                 $robotsUrl = rtrim($rootUrl, '/') . "/robots.txt";
                 $robots = @file_get_contents($robotsUrl, false, $context);
                 if ($robots !== false) {
                     $hasRobotsTxt = true;
                     $lines = explode("\n", $robots);
                     foreach ($lines as $line) {
                         $line = ltrim($line);
                         $searchStr = 'Sitemap';
                         $pos = strpos($line, $searchStr);
                         if ($pos !== false) {
                             $pos += strlen($searchStr);
                             $foundSitemapUrl = substr($line, $pos);
                             $foundSitemapUrl = trim($sitemapUrl, ':');
                             $foundSitemapUrl = trim($sitemapUrl);
                         }
                     }
                 }
                 /* -- Check to see if a sitemap exists */
                 if ($foundSitemapUrl) {
                     $siteMapContents = "";
                     $siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1);
                     if ($siteMapContents !== false) {
                         $hasSitemap = true;
                     }
                 }
                 $siteMapContents = "";
                 $siteMapContents = @file_get_contents($sitemapUrl, false, $context, 0, 1);
                 if ($siteMapContents !== false) {
                     $hasSitemap = true;
                 }
                 /* -- See if the site is https */
                 $sslReturnCode = 0;
                 $sslUrl = "https" . "://" . $urlParts['host'];
                 if (isset($urlParts['port'])) {
                     $sslUrl .= $sslUrl['port'] . '/';
                 } else {
                     $sslUrl .= '/';
                 }
                 $ch = curl_init($sslUrl);
                 curl_setopt($ch, CURLOPT_NOBODY, true);
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 $open_basedir = ini_get('open_basedir');
                 if (empty($open_basedir)) {
                     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
                 }
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_exec($ch);
                 $sslReturnCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                 curl_close($ch);
                 /* -- Check to see if the page is valid */
                 $validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url) . "&output=json";
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 curl_setopt($ch, CURLOPT_URL, $validatorUrl);
                 $validatorResult = curl_exec($ch);
                 curl_close($ch);
                 $validatorStatus = $validatorErrors = $validatorWarnings = "";
                 if ($validatorResult) {
                     $searchStr = "X-W3C-Validator-Status: ";
                     $pos = strpos($validatorResult, $searchStr);
                     if ($pos !== false) {
                         $pos += strlen($searchStr);
                         $validatorStatus = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
                     }
                     $searchStr = "X-W3C-Validator-Errors: ";
                     $pos = strpos($validatorResult, $searchStr);
                     if ($pos !== false) {
                         $pos += strlen($searchStr);
                         $validatorErrors = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
                     }
                     $searchStr = "X-W3C-Validator-Warnings: ";
                     $pos = strpos($validatorResult, $searchStr);
                     if ($pos !== false) {
                         $pos += strlen($searchStr);
                         $validatorWarnings = substr($validatorResult, $pos, strpos($validatorResult, PHP_EOL, $pos) - $pos);
                     }
                 }
                 $validatorUrl = "https://validator.w3.org/check?uri=" . urlencode($url);
                 /* -- Check Google Pagespeed insights for desktop */
                 $pagespeedDesktopScore = "";
                 $pagespeedDesktopUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=desktop";
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 curl_setopt($ch, CURLOPT_URL, $pagespeedDesktopUrl);
                 $pagespeedDesktopResult = curl_exec($ch);
                 curl_close($ch);
                 $pageSpeedPageStats = array();
                 if ($pagespeedDesktopResult) {
                     $pagespeedJson = json_decode($pagespeedDesktopResult, true);
                     if ($pagespeedJson) {
                         if (!empty($pagespeedJson['pageStats'])) {
                             $pageSpeedPageStats = $pagespeedJson['pageStats'];
                             if (empty($pageSpeedPageStats['htmlResponseBytes'])) {
                                 $pageSpeedPageStats['htmlResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['cssResponseBytes'])) {
                                 $pageSpeedPageStats['cssResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['imageResponseBytes'])) {
                                 $pageSpeedPageStats['imageResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['javascriptResponseBytes'])) {
                                 $pageSpeedPageStats['javascriptResponseBytes'] = 0;
                             }
                             if (empty($pageSpeedPageStats['otherResponseBytes'])) {
                                 $pageSpeedPageStats['otherResponseBytes'] = 0;
                             }
                             $pageSpeedPageStats['totalResponseBytes'] = $pageSpeedPageStats['htmlResponseBytes'] + $pageSpeedPageStats['cssResponseBytes'] + $pageSpeedPageStats['imageResponseBytes'] + $pageSpeedPageStats['javascriptResponseBytes'] + $pageSpeedPageStats['otherResponseBytes'];
                         }
                         if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) {
                             if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) {
                                 $pagespeedDesktopScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']);
                             }
                         }
                     }
                 }
                 $pagespeedDesktopUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=desktop";
                 /* -- Check Google Pagespeed insights for desktop */
                 $pagespeedMobileScore = "";
                 $pagespeedMobileUsability = "";
                 $pagespeedMobileUrl = "https://www.googleapis.com/pagespeedonline/v2/runPagespeed?url=" . urlencode($url) . "&strategy=mobile";
                 $ch = curl_init();
                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
                 curl_setopt($ch, CURLOPT_URL, $pagespeedMobileUrl);
                 $pagespeedMobileResult = curl_exec($ch);
                 curl_close($ch);
                 if ($pagespeedMobileResult) {
                     $pagespeedJson = json_decode($pagespeedMobileResult, true);
                     if ($pagespeedJson) {
                         if (isset($pagespeedJson['responseCode']) && ($pagespeedJson['responseCode'] == "200" || $pagespeedJson['responseCode'] == "301" || $pagespeedJson['responseCode'] == "302")) {
                             if (isset($pagespeedJson['ruleGroups']['SPEED']['score'])) {
                                 $pagespeedMobileScore = intval($pagespeedJson['ruleGroups']['SPEED']['score']);
                             }
                             if (isset($pagespeedJson['ruleGroups']['USABILITY']['score'])) {
                                 $pagespeedMobileUsability = intval($pagespeedJson['ruleGroups']['USABILITY']['score']);
                             }
                         }
                     }
                 }
                 $pagespeedMobileUrl = "https://developers.google.com/speed/pagespeed/insights/?url=" . urlencode($url) . "&tab=mobile";
                 /* -- Scrape for JSON-LD before we remove the <script> tags */
                 $jsonLdTypes = array();
                 foreach ($dom->find('script[type=application/ld+json]') as $elem) {
                     $jsonArray = json_decode($elem->innertext, true);
                     if (isset($jsonArray['@type'])) {
                         array_push($jsonLdTypes, $jsonArray['@type']);
                     }
                 }
                 $jsonLdTypes = array_unique($jsonLdTypes);
                 /* -- Remove inline <script> and <style> tags, and then strip the DOM down */
                 foreach ($dom->find('style') as $element) {
                     $element->outertext = '';
                 }
                 foreach ($dom->find('script') as $element) {
                     $element->outertext = '';
                 }
                 $strippedDom = html_entity_decode($dom->plaintext);
                 //                    $strippedDom = preg_replace('@[^0-9a-z\.\!]+@i', ', ', $strippedDom);
                 $strippedDom = stripslashes($strippedDom);
                 $htmlDom = html_entity_decode($dom->outertext);
                 //                    $htmlDom = preg_replace('@[^0-9a-z\.\!]+@i', '', $htmlDom);
                 /* -- SEO statistics */
                 $titleTag = html_entity_decode($dom->find('title', 0)->plaintext);
                 $titleLength = strlen($titleTag);
                 $metaDescriptionTag = "";
                 $metaDescriptionLength = 0;
                 $elem = $dom->find('meta[name=description]', 0);
                 if ($elem) {
                     $metaDescriptionTag = html_entity_decode($elem->content);
                     $metaDescriptionLength = strlen($metaDescriptionTag);
                 }
                 $metaTwitterTag = "";
                 $elem = $dom->find('meta[name=twitter:card],meta[property=twitter:card]', 0);
                 if ($elem) {
                     $metaTwitterTag = html_entity_decode($elem->content);
                 }
                 $metaOpenGraphTag = "";
                 $elem = $dom->find('meta[property=og:type],meta[property=og:url],meta[property=og:title]', 0);
                 if ($elem) {
                     $metaOpenGraphTag = html_entity_decode($elem->content);
                 }
                 $hasRelPublisherTag = false;
                 $elem = $dom->find('link[rel=publisher]', 0);
                 if ($elem) {
                     $hasRelPublisherTag = true;
                 }
                 $emptyImageAlts = count($dom->find('img[!alt]'));
                 $h1Tags = count($dom->find('h1'));
                 $h2Tags = count($dom->find('h2'));
                 $h3Tags = count($dom->find('h3'));
                 $h4Tags = count($dom->find('h4'));
                 $h5Tags = count($dom->find('h5'));
                 $totalHTags = $h1Tags + $h2Tags + $h3Tags + $h4Tags + $h5Tags;
                 $effectiveHTags = true;
                 if ($h1Tags != 1) {
                     $effectiveHTags = false;
                 }
                 if ($totalHTags < 3) {
                     $effectiveHTags = false;
                 }
                 if ($h2Tags == 0 && ($h3Tags || $h4Tags || $h5Tags)) {
                     $effectiveHTags = false;
                 }
                 if ($h3Tags == 0 && ($h4Tags || $h5Tags)) {
                     $effectiveHTags = false;
                 }
                 if ($h4Tags == 0 && $h5Tags) {
                     $effectiveHTags = false;
                 }
                 $textToHtmlRatio = strlen($strippedDom) / (strlen($htmlDom) - strlen($strippedDom)) * 100;
                 $strippedDom = preg_replace('/\\s+/', ' ', $strippedDom);
                 /* -- Extract the page keywords, and clean them up a bit */
                 $pageKeywords = craft()->seomatic->extractKeywords($strippedDom);
                 $pageKeywords = str_replace(",,", ",", $pageKeywords);
                 $pageKeywords = str_replace(" ,", ",", $pageKeywords);
                 $pageKeywords = str_replace(" .", ".", $pageKeywords);
                 $pageKeywords = preg_replace('/\\.+/', '.', $pageKeywords);
                 $pageKeywords = preg_replace('/,+/', ',', $pageKeywords);
                 $pageKeywords = str_replace(",.,", ",", $pageKeywords);
                 $pageKeywords = html_entity_decode($pageKeywords, ENT_COMPAT, 'UTF-8');
                 /* -- Focus keywords */
                 foreach ($keywordsKeys as $keywordsKey) {
                     $keywordsKey = trim($keywordsKey);
                     if (strlen($keywordsKey)) {
                         $appearsInH1Tag = 0;
                         foreach ($dom->find('h1') as $element) {
                             $appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
                         }
                         foreach ($dom->find('h2') as $element) {
                             $appearsInH1Tag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
                         }
                         $appearsInImgTag = 0;
                         foreach ($dom->find('img') as $element) {
                             $appearsInImgTag += substr_count(strtolower($element->alt), strtolower($keywordsKey));
                         }
                         $appearsInAhrefTag = 0;
                         foreach ($dom->find('a') as $element) {
                             $appearsInAhrefTag += substr_count(strtolower($element->plaintext), strtolower($keywordsKey));
                         }
                         $keywords[$keywordsKey] = array('appearsInTitleTag' => substr_count(strtolower($titleTag), strtolower($keywordsKey)), 'appearsInUrl' => substr_count(strtolower($url), strtolower($keywordsKey)), 'appearsInMetaDescriptionTag' => substr_count(strtolower($metaDescriptionTag), strtolower($keywordsKey)), 'appearsInH1Tag' => $appearsInH1Tag, 'appearsInAhrefTag' => $appearsInAhrefTag, 'appearsInImgTag' => $appearsInImgTag, 'appearsInPageKeywords' => substr_count(strtolower($pageKeywords), strtolower($keywordsKey)), 'appearsOnWebPage' => substr_count(strtolower($strippedDom), strtolower($keywordsKey)));
                     }
                 }
                 /* -- Text statistics */
                 $wordCount = $textStatistics->wordCount($strippedDom);
                 $readingTime = floor($wordCount / 200);
                 if ($readingTime === 0) {
                     $readingTime = 1;
                 }
                 $fleschKincaidReadingEase = $textStatistics->fleschKincaidReadingEase($strippedDom);
                 $fleschKincaidGradeLevel = $textStatistics->fleschKincaidGradeLevel($strippedDom);
                 $gunningFogScore = $textStatistics->gunningFogScore($strippedDom);
                 $colemanLiauIndex = $textStatistics->colemanLiauIndex($strippedDom);
                 $smogIndex = $textStatistics->smogIndex($strippedDom);
                 $automatedReadabilityIndex = $textStatistics->automatedReadabilityIndex($strippedDom);
                 $vars = array('titleTag' => $titleTag, 'titleLength' => $titleLength, 'metaDescriptionTag' => $metaDescriptionTag, 'metaDescriptionLength' => $metaDescriptionLength, 'metaTwitterTag' => $metaTwitterTag, 'metaOpenGraphTag' => $metaOpenGraphTag, 'hasRelPublisherTag' => $hasRelPublisherTag, 'jsonLdTypes' => $jsonLdTypes, 'hasRobotsTxt' => $hasRobotsTxt, 'hasSitemap' => $hasSitemap, 'emptyImageAlts' => $emptyImageAlts, 'validatorUrl' => $validatorUrl, 'validatorStatus' => $validatorStatus, 'validatorErrors' => $validatorErrors, 'validatorWarnings' => $validatorWarnings, 'pageSpeedPageStats' => $pageSpeedPageStats, 'pagespeedDesktopScore' => $pagespeedDesktopScore, 'pagespeedDesktopUrl' => $pagespeedDesktopUrl, 'pagespeedMobileScore' => $pagespeedMobileScore, 'pagespeedMobileUsability' => $pagespeedMobileUsability, 'pagespeedMobileUrl' => $pagespeedMobileUrl, 'sslReturnCode' => $sslReturnCode, 'h1Tags' => $h1Tags, 'h2Tags' => $h2Tags, 'h3Tags' => $h3Tags, 'h4Tags' => $h4Tags, 'h5Tags' => $h5Tags, 'effectiveHTags' => $effectiveHTags, 'textToHtmlRatio' => $textToHtmlRatio, 'wordCount' => $wordCount, 'readingTime' => $readingTime, 'pageKeywords' => $pageKeywords, 'keywords' => $keywords, 'fleschKincaidReadingEase' => $fleschKincaidReadingEase, 'fleschKincaidGradeLevel' => $fleschKincaidGradeLevel, 'gunningFogScore' => $gunningFogScore, 'colemanLiauIndex' => $colemanLiauIndex, 'smogIndex' => $smogIndex, 'automatedReadabilityIndex' => $automatedReadabilityIndex);
                 //$htmlText = craft()->templates->render('_seo_metrics.twig', $vars);
                 $this->renderTemplate('_seo_metrics.twig', $vars);
             } else {
                 $this->renderTemplate('_error', array('errorMessage' => "Error parsing the DOM.  Is this a valid, publicly accessible URL?"));
             }
         } else {
             $this->renderTemplate('_error', array('errorMessage' => "Error loading the webpage. Is this a valid, publicly accessible URL?"));
         }
         method_exists(craft()->templates, 'setTemplatesPath') ? craft()->templates->setTemplatesPath($oldPath) : craft()->path->setTemplatesPath($oldPath);
     }
     $this->parsingDom = false;
 }
コード例 #23
0
ファイル: import_posts.php プロジェクト: ahocquard/vdm
$settings_db = array('driver' => 'sqlite', 'database' => 'vdm_posts_db.sqlite', 'prefix' => '');
// Bootstrap Eloquent ORM
$container = new Container();
$connFactory = new \Illuminate\Database\Connectors\ConnectionFactory($container);
$conn = $connFactory->make($settings['database']);
$resolver = new \Illuminate\Database\ConnectionResolver();
$resolver->addConnection('default', $conn);
$resolver->setDefaultConnection('default');
\Illuminate\Database\Eloquent\Model::setConnectionResolver($resolver);
$numberPostsAdded = 0;
$page = 0;
// delete all lines in table before starting
\Post::truncate();
while ($numberPostsAdded < $nbPostMax) {
    // get page
    $dom = HtmlDomParser::file_get_html($settings['script']['url'] . '?page=' . $page);
    // parse all posts in page
    foreach ($dom->find('div.post') as $post) {
        if ($numberPostsAdded < $nbPostMax) {
            // simple php parser can't get directly an object associated with two classes
            // then we have to filter on article directly after
            if ($post->class == "post article") {
                // get id
                $id = $post->id;
                // get content which is in multiple tags
                $content = '';
                foreach ($post->find('p', 0)->find('a') as $sentence) {
                    $content .= $sentence->plaintext;
                }
                // get author and date whick are not in separated field
                $authorDate = $post->find('.right_part', 0)->find('p', 1);
コード例 #24
0
    $elems = $news_page->find(".full-list article header a ");
    $link = $elems[0]->href;
    $fixLink = str_replace('./', '/', $link);
    //echo $fixLink;
    $behe = "http://p-karaj.tvu.ac.ir";
    $url = $behe . $fixLink;
    //echo $url;
    $dom = HtmlDomParser::file_get_html($url);
    $elems = $dom->find("#content article div", 0);
    $message = $elems->plaintext;
    $params = array('chat_id' => $chatid, 'action' => 'typing');
    $response = $client->sendChatAction($params);
    $response = $client->sendMessage(array('chat_id' => $chatid, 'text' => $message, 'reply_to_message_id' => $messageid));
}
if ($text == '/jobrani' || $text == '/jobrani@BeheshtiNotifierBot') {
    $news_page = HtmlDomParser::file_get_html("http://p-karaj.tvu.ac.ir/");
    $elems = $news_page->find("#simple-list_12031", 0);
    $message = $elems->plaintext;
    $params = array('chat_id' => $chatid, 'action' => 'typing');
    $response = $client->sendChatAction($params);
    $response = $client->sendMessage(array('chat_id' => $chatid, 'text' => $message, 'reply_to_message_id' => $messageid));
}
//switch($text){
//
//    case '/hi':
//
//
//
//
//
//
コード例 #25
0
ファイル: Scraper.php プロジェクト: Kitchup/vdm-scraper
 /**
  * Retrieves the last $postCount from vdm
  *
  * @param  int $postCount : the number of entries to retrieve
  * @return mixed
  */
 protected function getLatestPosts($postCount)
 {
     // posts array is empty
     $posts = [];
     // starting at page 0 (source achitecture)
     $pageId = 0;
     // fetching until enough posts are retrieved
     while (Arrays::size($posts) < $postCount) {
         $dom = HtmlDomParser::file_get_html(sprintf($this->getBaseUrl(), $pageId));
         $domPosts = $dom->find('.article');
         $posts = Arrays::merge($posts, $domPosts);
         $pageId++;
     }
     // sorting by descending date and keeping only the $postCount first entries
     return Arrays::from($posts)->sort('date', 'desc')->first($postCount)->obtain();
 }
コード例 #26
0
 public function parseActualFutures()
 {
     $urlAllFutures = $this->config["url"]["futuresall"];
     $html = HtmlDomParser::file_get_html($urlAllFutures);
     // Обработка таблицы фьючерсов
     $tableEnergies = $html->find('table#dt2 tbody', 0);
     $symbols['CrudeOil'] = $tableEnergies->find('tr', 1)->find('td', 1)->plaintext;
     $symbols['NaturalGas'] = $tableEnergies->find('tr', 4)->find('td', 1)->plaintext;
     $tableGrains = $html->find('table#dt4 tbody', 0);
     $symbols['Wheat'] = $tableGrains->find('tr', 1)->find('td', 1)->plaintext;
     $symbols['Corn'] = $tableGrains->find('tr', 2)->find('td', 1)->plaintext;
     $symbols['Soybeans'] = $tableGrains->find('tr', 3)->find('td', 1)->plaintext;
     $tableIndexes = $html->find('table#dt5 tbody', 0);
     $symbols['Emini'] = $tableIndexes->find('tr', 1)->find('td', 1)->plaintext;
     $symbols['DJMini'] = $tableIndexes->find('tr', 3)->find('td', 1)->plaintext;
     $tableMetals = $html->find('table#dt7 tbody', 0);
     $symbols['Gold'] = $tableMetals->find('tr', 1)->find('td', 1)->plaintext;
     $symbols['Silver'] = $tableMetals->find('tr', 2)->find('td', 1)->plaintext;
     // Приведение символов к нужному виду
     foreach ($symbols as $key => $symbol) {
         $symbols[$key] = explode(' ', $symbol);
         $symbols[$key] = $symbols[$key][0];
         $lastTwoSymbols = substr($symbols[$key], -2);
         $lastSymbol = substr($lastTwoSymbols, 1);
         $symbolsString = substr($symbols[$key], 0, -2);
         $symbols[$key] = $symbolsString . $lastSymbol;
     }
     return $symbols;
 }
コード例 #27
0
 public function setUrl($url)
 {
     $this->url = $url;
     $this->dom = HtmlDomParser::file_get_html($url);
 }
コード例 #28
0
ファイル: spider.php プロジェクト: slpi1/phpSpider
 /**
  * 获取dom对象
  * @param  array $file 文档模型数据
  * @return simple_html_dom       dom对象
  */
 public function get_file($file)
 {
     $html = HtmlDomParser::file_get_html($file['url']);
     if ($html->root) {
         return $html;
     } else {
         return false;
     }
 }
コード例 #29
0
        # Fundamento
        $infraccion['sancion'] = parseSancion(html_entity_decode($tablaInfrancciones->find('td', 9)->plaintext));
        #Sanción
        $GLOBALS['sumaAdeudos'] += $infraccion['sancion']['monto'];
    } else {
        $infraccion['motivo'] = html_entity_decode($tablaInfrancciones->find('td', 4)->plaintext);
        # Motivo
        $infraccion['fundamento'] = parseFundamento($tablaInfrancciones->find('td', 6)->plaintext);
        # Fundamento
        $infraccion['sancion'] = parseSancion(html_entity_decode($tablaInfrancciones->find('td', 8)->plaintext));
        #Sanción
    }
    $infracciones[] = $infraccion;
}
# Adeudos tenencia
$dom = HtmlDomParser::file_get_html(URL_TENENCIA . $placas);
$tdAdeudos = $dom->find('#tabla500 td');
$aniosAdeudosTenencia = array();
foreach ($tdAdeudos as $tdAdeudo) {
    if (preg_match('/^\\d{4}\\b/', $tdAdeudo->plaintext)) {
        $aniosAdeudosTenencia[] = $tdAdeudo->plaintext;
    }
}
# Si no tiene adeudos de tenencia, busca info general
# TODO: Refactor se repite abajo
if (empty($aniosAdeudosTenencia)) {
    $curl = new Curl();
    $curl->post(URL_CALCULO_TENENCIA, array('placa' => $placas, 'ejercicio' => 2015));
    $jsonCalculoTenencia = json_decode($curl->response, true);
    $infoAuto['modelo'] = (int) $jsonCalculoTenencia['modelo'];
    if ($jsonCalculoTenencia['procedencia'] == 'N') {