PHP PHPHtmlParser Dom::loadFromUrl Examples

Programming Language: PHP

Namespace/Package Name: PHPHtmlParser

Class/Type: Dom

Method/Function: loadFromUrl

Examples at hotexamples.com: 11

PHP PHPHtmlParser Dom::loadFromUrl - 11 examples found. These are the top rated real world PHP examples of PHPHtmlParser\Dom::loadFromUrl extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

find(30)

load(30)

loadFromUrl(11)

loadFromFile(9)

getElementsByTag(5)

setOptions(5)

getElementById(3)

getElementsByClass(2)

addSelfClosingTag(1)

clearSelfClosingTags(1)

firstChild(1)

getAttribute(1)

lastChild(1)

loadStr(1)

removeSelfClosingTag(1)

loadFromUrl() public method

Use a curl interface implementation to attempt to load the content from a url.

public loadFromUrl ( string $url, array $options = [], phphtmlparser\CurlInterface $curl = null )
$url	string
$options	array
$curl	phphtmlparser\CurlInterface

Dom Class Documentation

Example #1

Show file

File: Craigslist.php Project: nnrudakov/glabs

 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $host = 'http://' . parse_url($url, PHP_URL_HOST);
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     if (false !== strpos($dom, 'This IP has been automatically blocked.')) {
         throw new CurlException('IP has been blocked.');
     }
     // end collect. no results
     if ($dom->find('#moon')[0]) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.txt') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a')[0]) {
             $href = $this->checkObjectLink($host, $link->getAttribute('href'));
             if (false === $href) {
                 continue;
             }
             $title = $link->text() ?: strip_tags($link->innerHtml());
             try {
                 $object = $this->getObjectModel($url, $href, $title, $this->categoryId, $this->type);
                 $object->setPrice($span);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace([self::$pageParam . self::$page, '#list'], '', $url);
         self::$page += 100;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }

Example #2

Show file

File: Backpage.php Project: nnrudakov/glabs

 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found.')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.summaryHeader') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             try {
                 $object->setPrice();
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }

Example #3

Show file

File: StaticDom.php Project: ericmorandbi/php-html-parser

 /**
  * Creates a new dom object and calls loadFromUrl() on the
  * new object.
  *
  * @param string $url
  * @param CurlInterface $curl
  * @return $this
  */
 public static function loadFromUrl($url, CurlInterface $curl = null)
 {
     $dom = new Dom();
     self::$dom = $dom;
     if (is_null($curl)) {
         // use the default curl interface
         $curl = new Curl();
     }
     return $dom->loadFromUrl($url, $curl);
 }

Example #4

Show file

File: Backpage.php Project: nnrudakov/glabs

 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found') || false !== strpos($dom, 'Keine Entsprechungen gefunden') || false !== strpos($dom, 'No hay resultados') || false !== strpos($dom, 'Nessuna corrispondenza trovata') || false !== strpos($dom, 'Aucune correspondance n&#146;a &eacute;t&eacute; trouv&eacute;e') || false !== strpos($dom, 'Nenhuma correspondência encontrada') || false !== strpos($dom, 'Совпадений нет') || false !== strpos($dom, 'Ingen match fundet') || false !== strpos($dom, 'Nebyly nalezeny žádné shody') || false !== strpos($dom, 'Ingen match funnet') || false !== strpos($dom, 'Nie znaleziono') || false !== strpos($dom, 'Eşleşme bulunamadı') || false !== strpos($dom, 'Eredmény nem található') || false !== strpos($dom, 'Δεν βρέθηκαν εγγραφές') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.cat') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             try {
                 $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }

Example #5

Show file

File: BaseObject.php Project: nnrudakov/glabs

 /**
  * Load DOM.
  *
  * @return bool
  *
  * @throws ObjectException
  */
 protected function loadDom()
 {
     try {
         $curl = GlabsController::$curl;
         $curl::$referer = $this->categoryUrl;
         self::$dom->loadFromUrl($this->url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false !== strpos($e->getMessage(), 'timed out')) {
             GlabsController::showMessage(' ...trying again', false);
             return $this->loadDom();
         }
         throw new ObjectException($e->getMessage());
     } catch (EmptyCollectionException $e) {
         throw new ObjectException($e->getMessage());
     }
     return true;
 }

Example #6

Show file

File: DomTest.php Project: cybrox/php-html-parser

 public function testLoadFromUrl()
 {
     $curl = Mockery::mock('PHPHtmlParser\\CurlInterface');
     $curl->shouldReceive('get')->once()->with('http://google.com')->andReturn(file_get_contents('tests/files/small.html'));
     $dom = new Dom();
     $dom->loadFromUrl('http://google.com', [], $curl);
     $this->assertEquals('VonBurgermeister', $dom->find('.post-row div .post-user font', 0)->text);
 }

Example #7

Show file

File: yle_episode_downloader.php Project: jarkko-hautakorpi/yle-areena-episode-downloader

<?php

/*
 * YLE Areena video crawler
 * Find video links from a episode listing page and download them using yle-dl.
 * http://aajanki.github.io/yle-dl/
 * Run as a cron job to download episodes.
 * 0 1 * * * php -f /home/john/.cronscripts/yle_episode_downloader.php >> /home/john/.cronscripts/download.log
 *
 */
require __DIR__ . '/vendor/autoload.php';
use PHPHtmlParser\Dom;
$page_URL = "http://areena.yle.fi/1-2540138";
$saved_videos_folder = "/home/john/Videos/YLE/Yle_uutiset/";
$dom = new Dom();
$dom->loadFromUrl($page_URL);
// Find the dom element with videos and loop them through
$newslist = $dom->find('ul.program-list li');
if (count($newslist) >= 1) {
    foreach ($newslist as $news) {
        // Get video ID
        $data_item_id = $news->getAttribute('data-item-id');
        /* <time itemprop="startDate" datetime="2015-07-10T20:30:00.000+03:00"> */
        $timestamp_dom = $news->find('time[itemprop=startDate]');
        $timestamp = $timestamp_dom->getAttribute('datetime');
        $weekday = "_" . date('l', strtotime($timestamp));
        $pubDate = strftime("%Y-%m-%d_klo_%H.%M", strtotime($timestamp));
        $filename = "Yle_uutiset_" . $pubDate . $weekday . ".flv";
        $url = "http://areena.yle.fi/" . $data_item_id;
        if (!file_exists($saved_videos_folder . $filename)) {
            echo "\nDownloading video: " . $filename . "\n";

Example #8

Show file

File: ImportController.php Project: charlesportwoodii/galnet-api

 /**
  * Imports a specific news entry
  * @param PHPHtmlParser\Dom $html
  * @return boolean|null
  */
 private function importNewsEntry($html, $origin)
 {
     $dom = new Dom();
     $uri = $html->getAttribute('href');
     $uid = str_replace('/galnet/uid/', '', $uri);
     $count = (new \yii\db\Query())->from('news')->where(['uid' => $uid])->count();
     if ((int) $count != 0) {
         $this->stdOut("    - {$uid} :: Already Imported...\n");
         return;
     }
     $dom->loadFromUrl(Yii::$app->params['galnet']['url'] . $uri);
     $title = trim(strip_tags($dom->find('h3.galnetNewsArticleTitle a')[0]->innerHtml));
     $content = trim(strip_tags(str_replace('<br /><br /> ', "\n", $dom->find('div.article p')[0]->innerHtml)));
     // Early Galnet posts are empty, so grab the first line from the article
     if (empty($title)) {
         $title = strtok($content, "\n");
     }
     $news = new News();
     $news->attributes = ['uid' => $uid, 'title' => $title, 'content' => $content, 'created_at' => time(), 'updated_at' => time(), 'published_at_native' => strtotime($origin), 'published_at' => strtotime($origin . "-1286 years")];
     $this->stdOut("    - {$uid}\n");
     $news->save();
 }

Example #9

Show file

File: index.php Project: Lisss13/prog

<?php

include "vendor/autoload.php";
use PHPHtmlParser\Dom;
$head = "<!DOCTYPE html>\n <html><head>\n<meta charset='utf-8'>\n</head><body><table>";
file_put_contents("index.html", $head);
for ($i = 1; $i <= 2; $i++) {
    $url = "http://www.emls.ru/flats/page{$i}.html?query=s/1/place/address/reg/2/dept/2/sort1/1/dir1/2/sort2/3/dir2/1/interval/3";
    $dom = new Dom();
    $dom->loadFromUrl($url);
    $trs = $dom->find("table.html_table_1 tr");
    foreach ($trs as $tr) {
        file_put_contents("index.html", "<tr>", FILE_APPEND);
        $tds = $tr->find("td");
        foreach ($tds as $td) {
            file_put_contents("index.html", "<td>" . $td->text() . "</td>", FILE_APPEND);
        }
        file_put_contents("index.html", "</tr>", FILE_APPEND);
    }
}
$footer = "</table></body></html>";
file_put_contents("index.html", $footer, FILE_APPEND);

Example #10

Show file

File: render.php Project: nickel715/fb-page-to-rss

<?php

require_once 'vendor/autoload.php';
use PHPHtmlParser\Dom;
$pageId =& $argv[1];
if (empty($pageId)) {
    die('Usage `php render.php GitHub` to generate rss feed for `https://www.facebook.com/GitHub`');
}
$url = sprintf('https://www.facebook.com/%s?_fb_noscript=1', urlencode($pageId));
$dom = new Dom();
$dom->loadFromUrl($url, ['whitespaceTextNode' => false], new \FbPageToRSS\Curl());
$parser = new \FbPageToRSS\FbParser($dom);
$feedGenerator = new \FbPageToRSS\FeedGenerator($parser);
$feedGenerator->setSkipFixedPost(true);
echo $feedGenerator->render();

Example #11

Show file

File: script.php Project: salmander/process_webpage

// Load and parse Sainsbury's Ripe Fruits webpage
$dom->loadFromUrl(URL, [], new \Curl());
// In order to find all the urls to the product pages, we need to
// find all the divs with the .product class
$products = $dom->find('#productsContainer .product');
echo "Total products: " . count($products) . PHP_EOL;
$product_response = new ProductResponse();
foreach ($products as $p) {
    // Get the URL to the products page
    $href = $p->find('.productInfo a')->getAttribute('href');
    echo 'Get content for: ' . $href . PHP_EOL;
    // Instantiate new DOM for product page.
    $pp_dom = new Dom();
    // Load and parse product page HTML
    $product_page_curl = new \Curl();
    $pp_dom->loadFromUrl($href, [], $product_page_curl);
    // Instantiate new Product
    $product = new Product();
    // Find and assign title to the Product
    $product->title = $pp_dom->find('.productSummary h1')->text();
    // Assign page size to the Product
    $product->size = $product_page_curl->getSize('kb');
    // Find unit_price then, remove £ and any whitespace
    // And assign it to the Product
    $product->unit_price = preg_replace('/£|\\s/', '', $pp_dom->find('.productSummary p.pricePerUnit')->text());
    // First we try extracting description from the div.productText next to
    // the h3.productDataItemHeader (with text = "Description").
    // If this fails (on some product pages) we get description
    // from the div.longTextItems within
    // div.itemTypeGroupContainer
    $div_description = $pp_dom->find('.productDataItemHeader');