load() public méthode

Attempts to load the dom from any resource, string, file, or URL.
public load ( string $str, array $options = [] )
$str string
$options array
Exemple #1
  * Gets departures from the given station starting at the given time.
  * @param int $stationID
  * @param Carbon $time
  * @return array
  * @throws ApiException
 public static function getDepartures(int $stationID, Carbon $time, int $maxJourneys = 10)
     // prepare parameters for our request
     $query = ['input' => $stationID, 'boardType' => 'dep', 'time' => $time->format('H:i'), 'date' => $time->format('d.m.y'), 'maxJourneys' => $maxJourneys, 'start' => 'yes'];
     // send it to the bvg mobile site
     $response = \Requests::get(self::getApiEndpoint() . '?' . http_build_query($query));
     if ($response->status_code == 200) {
         // our results array
         $departures = [];
         // prepare document
         $dom = new Dom();
         // get date from API
         $date = $dom->find('#ivu_overview_input');
         $date = trim(substr($date->text, strpos($date->text, ':') + 1));
         $date = Carbon::createFromFormat('d.m.y', $date, 'Europe/Berlin');
         // get table data without the first line (header)
         $rows = $dom->find('.ivu_result_box .ivu_table tbody tr');
         // loop through each departure in the table
         foreach ($rows as $row) {
             // get columns
             $columns = $row->find('td');
             // explode time into two parts
             $time = explode(':', strip_tags($columns[0]));
             // push the departure onto our results array
             $departures[] = ['time' => $date->copy()->hour($time[0])->minute($time[1])->second(0), 'line' => trim(strip_tags($columns[1]->find('a')[0])), 'direction' => trim(strip_tags($columns[2]))];
         // return results
         return $departures;
     } else {
         throw new ApiException('Failed getting station data from BVG API');
 public function testConfigLocalOverride()
     $dom = new Dom();
     $dom->setOptions(['whitespaceTextNode' => false]);
     $dom->load('<div><p id="hey">Hey you</p> <p id="ya">Ya you!</p></div>', ['whitespaceTextNode' => true]);
     $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text);
 public function addPhoto($session, File $file, $number)
     $path = $session . '.html';
     $update = $this->client->api('repo')->contents()->exists($this->user, $this->repo, $path, $this->branch);
     if ($update) {
         $set = new \PHPHtmlParser\Dom();
         $set->load($this->client->api('repo')->contents()->download($this->user, $this->repo, $path, $this->branch));
         $info = $this->client->api('repo')->contents()->show($this->user, $this->repo, $path, $this->branch);
     } else {
         $set = new \PHPHtmlParser\Dom();
     $div = $set->find('#photos')[0];
     $photo = new \PHPHtmlParser\Dom();
     $img = $photo->find('img')[0];
     $img->setAttribute('src', $this->getPhotoFilename($file));
     $images = $div->find('img');
     $count = count($images);
     if ($count <= 4) {
         $meta = new Dom();
         $meta->load('<meta name="twitter:image' . --$count . '" content="' . $this->web . $this->getPhotoFilename($file, 'th') . '">');
         $set->find('head', 0)->addChild($meta->root);
     $content = \Mihaeu\HtmlFormatter::format((string) $set);
     $content = preg_replace("#\n\\s*\n#", "\n", $content);
     if ($update) {
         $response = $this->client->api('repo')->contents()->update($this->user, $this->repo, $path, $content, 'Adding Photo ' . PHP_EOL . $file, $info['sha'], $this->branch);
     } else {
         $response = $this->client->api('repo')->contents()->create($this->user, $this->repo, $path, $content, 'Adding Page and Photo ' . PHP_EOL . $file, $this->branch);
         $this->addSession($session, $number, $file);
  * setOpenGraph
  * @param string $context
  * @param object $article
  * @return  void
 public static function setOpenGraph($context, $article)
     $es = \Ezset::getInstance();
     $input = \JFactory::getApplication()->input;
     $view = $input->get('view');
     if (empty($article->id)) {
     if (!$es->params->get('ogGetInnerPageImage', 1)) {
     if ('article' == $view) {
         $images = new \JRegistry($article->images);
         $ignoreFirst = false;
         $imgs = array();
         $img = $images->get('image_fulltext', $images->get('image_intro'));
         if ($img) {
             $imgs[] = $img;
         if ($imgs) {
             $ignoreFirst = true;
         $dom = new Dom();
         // If first image = main image, delete this paragraph.
         $images = $dom->find('img');
         foreach ($images as $image) {
             if ($ignoreFirst) {
             $imgs[] = $image->src;
         if (!$imgs && isset($article->catid)) {
             $cat = \JTable::getInstance('category');
             $cat->params = new \JRegistry($cat->params);
             $imgs[] = $cat->params->get('image');
         if (!$imgs && !$es->params->get('ogDefaultImageOnlyFrontPage', 1)) {
             $imgs[] = UriHelper::pathAddHost($es->params->get('ogDefaultImage'));
         $es->data->ogImages = $imgs;
     } elseif ('category' == $view) {
         if (static::$once) {
             $cat = \JTable::getInstance('category');
             $cat->params = new \JRegistry($cat->params);
             $img = $cat->params->get('image');
             if ($img) {
                 $es->ogImage = $img;
             } elseif (!$es->params->get('ogDefaultImageOnlyFrontPage', 1)) {
                 $es->ogImage = $es->params->get('ogDefaultImage');
             $es->ogImage = UriHelper::pathAddHost($es->data->ogImage);
         static::$once = 0;
Exemple #5
  * Constructor
  * @param string $url
  * @throws \InvalidArgumentException
 public function __construct($url)
     $html = $this->_loadUrl($url);
     if ($html !== null) {
         $dom = new Dom();
         $this->data = $this->_extractData($dom);
     } else {
         throw new \InvalidArgumentException('This recipe does not exists.');
Exemple #6
 protected function addAdditionalInfo(&$result)
     $dom = new Dom();
     $additionalInfoTag = $dom->find('#textContent p');
     $result['additionalText'] = $additionalInfoTag->text;
     $photoTags = $dom->find('.img-item img');
     $photos = [];
     foreach ($photoTags as $photoTag) {
         $photos[] = $photoTag->getAttribute('src');
     $result['photos'] = $photos;
 public function testConfigStrictMissingAttribute()
     $dom = new Dom();
     $dom->setOptions(['strict' => true]);
     try {
         // should throw an exception
         $dom->load('<div><p id="hey" block>Hey you</p> <p id="ya">Ya you!</p></div>');
         // we should not get here
     } catch (StrictException $e) {
         $this->assertEquals("Tag 'p' has an attribute 'block' with out a value! (character #22)", $e->getMessage());
  * saveFirstImage
  * @param string  $context
  * @param \JTable $article
  * @return  void
 public static function saveFirstImage($context, $article)
     if (!property_exists($article, 'images') && $context != 'com_content.article') {
     $image = new \JRegistry($article->images);
     $dom = new Dom();
     $dom->load($article->introtext . $article->fulltext);
     $imgs = $dom->find('img');
     $imageSrc = null;
     if ($imgs->count()) {
         $imageSrc = $imgs[0]->src;
     $image->set('image_intro', $imageSrc);
     $article->images = $image->toString();
 public function getAlbumCoverURL(string $songPath) : string
     $songReader = new \SongReader($songPath);
     $client = new Client(['base_uri' => 'http://www.slothradio.com/', 'timeout' => 2.0]);
     $response = $client->request('GET', 'covers/', ['query' => ['artist' => $songReader->getAuthor(), 'album' => $songReader->getAlbum()]]);
     $html = $response->getBody()->getContents();
     $dom = new Dom();
     $images = $dom->find('#content > div.album0 > img');
     if (count($images) > 0) {
         /** @var Dom\HtmlNode $image */
         $image = $images[0];
         $albumURL = $image->getAttribute('src');
     } else {
         $albumURL = '';
     return $albumURL;
Exemple #10
 public function scan()
     $crawlerResult = new Result(array());
     foreach ($this->urls as $url) {
         $dom = new Dom();
         $aTags = $dom->find("a");
         foreach ($aTags as $a) {
             $href = $a->href;
             if (0 !== strpos($href, 'http')) {
                 // It starts with 'http'
             $crawlerResult->result["urls"][] = $a->href;
     $crawlerResult->result["urls"] = array_values(array_unique($crawlerResult->result["urls"]));
     return $crawlerResult;
Exemple #11
 public function run(Page $page)
     // Parse the HTML
     $dom = new Dom();
     $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true));
     // Format
     foreach ($dom->find($this->selector) as $node) {
         // Format the node
         $formattedNode = $this->format($node);
         // Remove all children
         foreach ($node->find('*') as $child) {
         // Add the new node
     // Set and return
     return $page->withBody($dom->root->outerHtml());
  * @param  string $url
  * @param  string $userAgent
  * @return string $redurectUrl
 private function getRedirectUrl($url, $userAgent)
     $curlInfo = $this->getCurlInfo($url, $userAgent);
     $redurectUrl = $this->removeQueryString(@$curlInfo['redirect_url']);
     if (trim($url, '/') === trim($redurectUrl, '/')) {
         $redurectUrl = '';
     // look for meta http-equiv="refresh"
     if (!$redurectUrl) {
         $dom = new Dom();
         $metaTags = $dom->find('meta');
         foreach ($metaTags as $meta) {
             if ($meta->getAttribute('http-equiv') === 'refresh') {
                 $redurectUrl = preg_replace('/\\s*\\d+\\s*;\\s*url\\s*=\\s*(\'|\\")(.+)(\'|\\")/i', '$2', $meta->getAttribute('content'));
     return $redurectUrl;
Exemple #13
  * Attempts to get the URL to a given profiles
  * photo. This method will return the URL or will return
  * boolean false if the profile photo could not be scraped.
  * @param string $profileURL 	The URL to the profile
  * @return string|boolean
 public function profilePhoto($profileURL)
     $contents = @file_get_contents($profileURL . '/posts');
     if (!$contents) {
         return false;
     $position = stripos($contents, 'dkb photo');
     /* Chop off all of the string before this position */
     $contents = substr($contents, $position);
     $position = stripos($contents, '<img');
     $contents = substr($contents, $position);
     $position = stripos($contents, '>');
     /* Chop off everything after the position */
     $contents = substr($contents, 0, $position + 1);
     $dom = new Dom();
     $img = $dom->find('img', 0);
     $src = $img->getAttribute('src');
     if (substr($src, 0, 2) == '//') {
         $src = 'https:' . $src;
     return $src;
Exemple #14
 public function page($path, array $context = array())
     $path = array_filter(explode('/', $path), 'strlen');
     $pathname = implode('.', $path);
     $file = sprintf($this->path, $pathname);
     if (!file_exists($file)) {
         throw new PageNotFoundException($path);
     // Parse the HTML
     $dom = new Dom();
     $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true));
     include $file;
     // Create a new page
     $page = new Page($path);
     // Title
     if (($title = $dom->find('title', 0)) !== null) {
     // Header
     if (($head = $dom->find('head', 0)) !== null) {
         foreach ($head->getChildren() as $child) {
             if ($child->getTag()->name() !== 'title' && !($child->getTag()->name() === 'meta' && $child->getAttribute('charset') !== null)) {
                 $page->withHeader($page->header() . $child->outerHtml());
     // Body
     if (($body = $dom->find('body', 0)) !== null) {
     // Return page
     return $page;
Exemple #15
  * 得到页面上所有的图片地址,已经存过的图片地址不会被重新储存。
  * @param int $page 第几页
  * @return array 图片地址
 protected function getRawImgsrcs($page)
     // 准备DOM
     $html = $this->getHtml($page);
     $dom = new Dom();
     $imgsrcs = [];
     // 将所有的img加入数组
     foreach ($dom->find('img') as $img) {
         $imgsrcs[] = $img->getAttribute('src');
     // 侦测所有iframe
     foreach ($dom->find('iframe') as $iframe) {
         $id = $iframe->getAttribute('id');
         if (!strstr($id, 'photoset')) {
         $src = $iframe->getAttribute('src');
         $imgHtml = $this->requestHtml($src);
         $imgDom = new Dom();
         foreach ($imgDom->find('img') as $img) {
             $imgsrcs[] = $img->getAttribute('src');
     return $imgsrcs;
  * This command echoes what you have entered as the message.
  * @param int $start
  * @internal param string $message the message to be echoed.
 public function actionIndex($start = 111111111111)
     $lastID = SavedVariable::findOne('lastParseID');
     if (!isset($lastID)) {
         $lastID = new SavedVariable();
         $lastID->name = 'lastParseID';
         $lastID->value = (string) (int) $start;
     $ii = max(111111111111, (int) $lastID->value);
     while ($ii <= 999999999999) {
         for ($i = $ii; $i <= 999999999999; $i++) {
             $lastID->value = (string) $i;
             $barcode .= (10 - array_sum(ArrayHelperAdvanced::array_walk_r(str_split($barcode = (string) $i), function (&$v, $k) {
                 $v *= $k % 2 * 2 + 1;
             })) % 10) % 10;
             $dom = new Dom();
             try {
                 $string = file_get_contents('http://www.barcode-list.ru/barcode/RU/Поиск.htm?barcode=' . $barcode);
             } catch (\Exception $e) {
             /** @var HtmlNode $table */
             $tables = $dom->find('.randomBarcodes');
             if (count($tables) === 0) {
                 echo 'not found ' . $i . ' of ' . $barcode . "\n";
             $table = $tables[0];
             echo '$table->countChildren() = ' . $table->countChildren() . "\n";
             // "click here"
             echo '$table->getTag()->name() = ' . $table->getTag()->name() . "\n";
             // "click here"
             /** @var HtmlNode $tr */
             /** @noinspection LoopWhichDoesNotLoopInspection */
             foreach ($table->find('tr') as $tr) {
                 echo '  $tr->getTag()->name() = ' . $tr->getTag()->name() . "\n";
                 $tds = $tr->find('td');
                 if (count($tds) === 0) {
                 $td = $tds[2];
                 /** @var HtmlNode $td */
                 echo '    $td->getTag()->name() = ' . $td->getTag()->name() . "\n";
                 echo '      $td->text = ' . $td->text . "\n";
                 (new ParsedProduct(['barcode' => $i, 'name' => $td->text]))->save(false);
         $ii = $i;
         echo 'break ' . $i . "\n";
         $startTimestamp = time();
         echo date('Y.m.d h:i:s', $startTimestamp) . "\n";
         do {
             $currentTimestamp = time();
         } while ($currentTimestamp - $startTimestamp < 10);
         echo date('Y.m.d h:i:s', $currentTimestamp) . "\n";
     //while ($ii <= 999999999999) {
     echo $ii . "\n";
Exemple #17
 protected function requestDom($url)
     $html = $this->requestHtml($url);
     $dom = new Dom();
     $this->doms[$url] = $dom;
     return $dom;
Exemple #18

include_once "../vendor/autoload.php";
$basUrl = 'http://www.baidu.com/s?wd=';
$communityName = '毕加索小镇';
$queryUrl = $basUrl . $communityName;
use PHPHtmlParser\Dom;
$dom = new Dom();
$html = $dom->outerHtml;
echo $html;
  * Creates a new dom object and calls load() on the
  * new object.
  * @param string $str
  * @return $this
 public static function load($str)
     $dom = new Dom();
     self::$dom = $dom;
     return $dom->load($str);
Exemple #20
 public function getDescription()
     $intro = $this->dom->getElementById('intro');
     $dom = new Dom();
     $p = $dom->getElementsByTag('p');
     if (count($p) > 1) {
         return htmlentities($p[1]);
     return htmlentities($p);
Exemple #21
  * @param null $filter
  * @return File[]
 public function getFiles($filter = null)
     $files = [];
     foreach ($this->getDirectories() as $path) {
         $result = $this->getHttpClient()->get('http://' . $this->ip . ':' . $this->web . '/DCIM/' . $path);
         $dom = new Dom();
         foreach ($dom->find('tbody a') as $link) {
             $file = new File($path, $link->getAttribute('href'));
             if ($filter and $filter !== $file->getType()) {
             $files[] = $file;
     return $files;
 if (@$send_to && is_array($send_to)) {
     foreach ($send_to as $address) {
 // Set the subject line
 $mail->Subject = 'Newsletter Tester [' . md5(time()) . ']';
 // Read an HTML message body from an external file, convert referenced images to embedded,
 // convert HTML into a basic plain-text alternative body
 if (@$content) {
     $newslettersPath = 'newsletters/' . $content . '/';
     $contentHtml = file_get_contents($newslettersPath . 'index.html');
     $contentHtmlOriginal = $contentHtml;
     $dom = new Dom();
     $images = $dom->find('img');
     foreach ($images as $image) {
         $imageSrc = $image->getAttribute('src');
         $mail->AddEmbeddedImage($newslettersPath . $imageSrc, slugify($imageSrc));
         $contentHtml = str_replace($imageSrc, 'cid:' . slugify($imageSrc), $contentHtml);
         $contentHtmlOriginal = str_replace($imageSrc, $newslettersPath . $imageSrc, $contentHtmlOriginal);
     $mail->msgHTML($contentHtml, dirname(__FILE__));
     $output['content'] = $contentHtmlOriginal;
 // Replace the plain text body with one created manually
 $mail->AltBody = 'Test with Newsletter Tester tool by Syaiful Shah Zinan';
 // send the message, check for errors
 if (!$mail->send()) {
     $output['status'] = 'error';
 private function handle($count, $option)
     $key = "tweet.{$option}.{$count}";
     $tag = "tweet";
     if (Cache::tags($tag)->has($key)) {
         return response(Cache::tags($tag)->get($key))->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*');
     $rep = file_get_contents("https://t.kcwiki.moe/?json=1&count={$count}");
     if ($rep) {
         $result = json_decode($rep, true);
         $posts = $result['posts'];
         $output = [];
         foreach ($posts as $post) {
             $dom = new Dom();
             $new_post = [];
             if (array_key_exists('ozh_ta_id', $post['custom_fields']) && is_array($post['custom_fields']['ozh_ta_id'])) {
                 $new_post['id'] = $post['custom_fields']['ozh_ta_id'][0];
             } else {
                 $new_post['id'] = '';
             $img = $dom->find('img');
             if (count($img) > 0 && $option != 'html') {
                 $new_post['img'] = $img[0]->getAttribute('src');
                 foreach ($img as $x) {
                     $parent = $x->getParent();
                     $parentTagName = $parent->getTag()->name();
                     if ($parentTagName == 'a') {
                     } else {
             } else {
                 if ($option != 'html') {
                     $new_post['img'] = '';
             $p = $dom->find('p, div');
             $new_post['jp'] = '';
             $new_post['zh'] = '';
             $n = $this->detect($p);
             for ($i = 0; $i <= $n; $i++) {
                 $new_post['jp'] .= $p[$i]->innerHtml;
             for ($i = $n + 1; $i < count($p); $i++) {
                 $new_post['zh'] .= $p[$i]->innerHtml;
             $new_post['date'] = $post['date'];
             if ($option == 'plain') {
                 $new_post['zh'] = strip_tags($this->expandUrl($new_post['zh']));
                 $new_post['jp'] = strip_tags($this->expandUrl($new_post['jp']));
             array_push($output, $new_post);
         Cache::tags($tag)->put($key, $output, 5);
         return response($output)->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*');
     } else {
         return response()->json(['result' => 'error', 'reason' => 'Getting tweets failed.']);
 public function testChangeContent()
     $dom = new Dom();
     $dom->load('<div class="all"><p>Hey bro, <a href="google.com" id="78">click here</a></div><br />');
     $links = $dom->find('a');
     $this->assertEquals('<p>Hey bro, <a href="google.com" id="78">gogogo</a></p>', $dom->getElementsByTag('p')[0]->outerHtml);
Exemple #25
 public function testEnforceEncoding()
     $dom = new Dom();
     $dom->load('tests/files/horrible.html', ['enforceEncoding' => 'UTF-8']);
     $this->assertNotEquals('<input type="submit" tabindex="0" name="submit" value="Информации" />', $dom->find('table input', 1)->outerHtml);
Exemple #26
  * @return Dom
  * @throws LoginException  Thrown if access to the wallpaper was denied.
  * @throws NotFoundException Thrown if the wallpaper was not found.
 private function getDom()
     if ($this->cacheEnabled && $this->dom !== null) {
         return $this->dom;
     try {
         $response = $this->client->get(Wallhaven::URL_WALLPAPER . '/' . $this->id)->getBody()->getContents();
     } catch (RequestException $e) {
         $code = $e->getCode();
         if ($code == 403) {
             throw new LoginException("Access to wallpaper is forbidden.");
         } else {
             if ($code == 404) {
                 throw new NotFoundException("Wallpaper not found.");
             } else {
                 throw $e;
     $dom = new Dom();
     $this->dom = $this->cacheEnabled ? $dom : null;
     return $dom;
Exemple #27
 public function testMultipleSingleQuotes()
     $dom = new Dom();
     $dom->load("<a title='Ain't this the best' href=\"http://www.example.com\">Hello</a>");
     $this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title);
Exemple #28
    public function testScriptCleanerScriptTag()
        $dom = new Dom();
		Some code ... 
		document.write("<script src=\'some script\'><\\/script>") 
		Some code ... 
        $this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml);
Exemple #29
require_once 'vendor/autoload.php';
use PHPHtmlParser\Dom;
$url = "http://tini.maiige.hu";
if (isset($_GET['date'])) {
    $date = $_GET['date'];
    $url .= "/?date=" . $date;
$dom = new Dom();
$page_title = $dom->find('title')[0]->innerHtml;
$text_title = $dom->find('span.cimsor_cim')[0]->innerHtml;
$text_date = $dom->find('span.cimsor_datum')[0]->innerHtml;
$text = $dom->find('div.gondolatok_box')[0]->find('p');
$andnow = $dom->find('div.esmost_box')[0]->find('p');

<!DOCTYPE html>
echo $page_title;
</title> <!-- TODO: MySQL Query -->
Exemple #30
 public static function getDom($url)
     $dom = new Dom();
     $html = self::getHtml($url);
     return $dom;