public function testConfigLocalOverride()
 {
     $dom = new Dom();
     $dom->setOptions(['whitespaceTextNode' => false]);
     $dom->load('<div><p id="hey">Hey you</p> <p id="ya">Ya you!</p></div>', ['whitespaceTextNode' => true]);
     $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text);
 }
Exemple #2
0
 /**
  * Gets departures from the given station starting at the given time.
  *
  * @param int $stationID
  * @param Carbon $time
  * @return array
  * @throws ApiException
  */
 public static function getDepartures(int $stationID, Carbon $time, int $maxJourneys = 10)
 {
     // prepare parameters for our request
     $query = ['input' => $stationID, 'boardType' => 'dep', 'time' => $time->format('H:i'), 'date' => $time->format('d.m.y'), 'maxJourneys' => $maxJourneys, 'start' => 'yes'];
     // send it to the bvg mobile site
     $response = \Requests::get(self::getApiEndpoint() . '?' . http_build_query($query));
     if ($response->status_code == 200) {
         // our results array
         $departures = [];
         // prepare document
         $dom = new Dom();
         $dom->load($response->body);
         // get date from API
         $date = $dom->find('#ivu_overview_input');
         $date = trim(substr($date->text, strpos($date->text, ':') + 1));
         $date = Carbon::createFromFormat('d.m.y', $date, 'Europe/Berlin');
         // get table data without the first line (header)
         $rows = $dom->find('.ivu_result_box .ivu_table tbody tr');
         // loop through each departure in the table
         foreach ($rows as $row) {
             // get columns
             $columns = $row->find('td');
             // explode time into two parts
             $time = explode(':', strip_tags($columns[0]));
             // push the departure onto our results array
             $departures[] = ['time' => $date->copy()->hour($time[0])->minute($time[1])->second(0), 'line' => trim(strip_tags($columns[1]->find('a')[0])), 'direction' => trim(strip_tags($columns[2]))];
         }
         // return results
         return $departures;
     } else {
         throw new ApiException('Failed getting station data from BVG API');
     }
 }
 /**
  * Parses the given file and returns a Game object
  *
  * @param string $filename
  *
  * @return Game
  */
 protected function processFile($filename)
 {
     $this->command->out("Processing " . $filename);
     // Create a game object with home/away teams and other info
     $game = $this->createGameWithInfo($filename);
     $dom = new Dom();
     $dom->loadFromFile($filename);
     $lines = [];
     /** @var AbstractNode $tr */
     foreach ($dom->find('tr.evenColor') as $tr) {
         $lineContent = [];
         $lineCount = 0;
         /** @var AbstractNode $td */
         foreach ($tr->getChildren() as $td) {
             $value = $this->cleanUpLine($td->text);
             if ($value) {
                 $lineCount++;
                 // Each event is actually 6 lines
                 $lineContent[] = $value;
                 if ($lineCount % 6 == 0) {
                     $lines[] = $lineContent;
                     $lineContent = [];
                 }
             }
         }
     }
     // Add each event line to the game log
     foreach ($lines as $line) {
         if ($event = $this->createParsedEvent($line)) {
             $game->addEvent($event);
         }
     }
     return $game;
 }
 public function addPhoto($session, File $file, $number)
 {
     $path = $session . '.html';
     $update = $this->client->api('repo')->contents()->exists($this->user, $this->repo, $path, $this->branch);
     if ($update) {
         $set = new \PHPHtmlParser\Dom();
         $set->load($this->client->api('repo')->contents()->download($this->user, $this->repo, $path, $this->branch));
         $info = $this->client->api('repo')->contents()->show($this->user, $this->repo, $path, $this->branch);
     } else {
         $set = new \PHPHtmlParser\Dom();
         $set->loadFromFile($this->page);
     }
     $div = $set->find('#photos')[0];
     $photo = new \PHPHtmlParser\Dom();
     $photo->loadFromFile($this->photo);
     $img = $photo->find('img')[0];
     $img->setAttribute('src', $this->getPhotoFilename($file));
     $div->addChild($photo->root);
     $images = $div->find('img');
     $count = count($images);
     if ($count <= 4) {
         $meta = new Dom();
         $meta->load('<meta name="twitter:image' . --$count . '" content="' . $this->web . $this->getPhotoFilename($file, 'th') . '">');
         $set->find('head', 0)->addChild($meta->root);
     }
     $content = \Mihaeu\HtmlFormatter::format((string) $set);
     $content = preg_replace("#\n\\s*\n#", "\n", $content);
     if ($update) {
         $response = $this->client->api('repo')->contents()->update($this->user, $this->repo, $path, $content, 'Adding Photo ' . PHP_EOL . $file, $info['sha'], $this->branch);
     } else {
         $response = $this->client->api('repo')->contents()->create($this->user, $this->repo, $path, $content, 'Adding Page and Photo ' . PHP_EOL . $file, $this->branch);
         $this->addSession($session, $number, $file);
     }
 }
 private function getAndParseSubteInfo()
 {
     if ($cachedData = $this->isCached()) {
         $this->lineas = $cachedData;
         return true;
     }
     $dom = new Dom();
     $dom->loadFromFile($this->sourceURL);
     foreach ($this->lineas as $linea => $info) {
         $lineInfo = $dom->find("#status-line-{$linea}-container")[0];
         $lineStatusClass = $lineInfo->getAttribute('class');
         if (strpos($lineStatusClass, 'suspendido') !== false) {
             $this->lineas[$linea]->status = 'CANCELLED';
         }
         if (strpos($lineStatusClass, 'demorado') !== false) {
             $this->lineas[$linea]->status = 'DELAYED';
         }
         if ($this->isSleepingTime()) {
             $this->lineas[$linea]->status = 'SLEEPING';
         }
         // get raw status msg
         $status_msg = $lineInfo->find("#status-line-{$linea}")->text;
         $this->lineas[$linea]->statusMessage = html_entity_decode($status_msg, ENT_QUOTES, 'ISO-8859-1');
     }
     $this->updateStatusInfo();
     $this->cacheLines();
 }
 public function testRemoveScriptsFalse()
 {
     $dom = new Dom();
     $dom->setOptions(['removeScripts' => false]);
     $dom->loadFromFile('tests/files/horrible.html');
     $this->assertEquals(1, count($dom->find('script')));
     $this->assertEquals('text/JavaScript', $dom->find('script')->getAttribute('type'));
 }
 /**
  * setOpenGraph
  *
  * @param string $context
  * @param object $article
  *
  * @return  void
  */
 public static function setOpenGraph($context, $article)
 {
     $es = \Ezset::getInstance();
     $input = \JFactory::getApplication()->input;
     $view = $input->get('view');
     if (empty($article->id)) {
         return;
     }
     if (!$es->params->get('ogGetInnerPageImage', 1)) {
         return;
     }
     if ('article' == $view) {
         $images = new \JRegistry($article->images);
         $ignoreFirst = false;
         $imgs = array();
         $img = $images->get('image_fulltext', $images->get('image_intro'));
         if ($img) {
             $imgs[] = $img;
         }
         if ($imgs) {
             $ignoreFirst = true;
         }
         $dom = new Dom();
         // If first image = main image, delete this paragraph.
         $dom->load($article->text);
         $images = $dom->find('img');
         foreach ($images as $image) {
             if ($ignoreFirst) {
                 continue;
             }
             $imgs[] = $image->src;
         }
         if (!$imgs && isset($article->catid)) {
             $cat = \JTable::getInstance('category');
             $cat->load($article->catid);
             $cat->params = new \JRegistry($cat->params);
             $imgs[] = $cat->params->get('image');
         }
         if (!$imgs && !$es->params->get('ogDefaultImageOnlyFrontPage', 1)) {
             $imgs[] = UriHelper::pathAddHost($es->params->get('ogDefaultImage'));
         }
         $es->data->ogImages = $imgs;
     } elseif ('category' == $view) {
         if (static::$once) {
             $cat = \JTable::getInstance('category');
             $cat->load($input->get('id'));
             $cat->params = new \JRegistry($cat->params);
             $img = $cat->params->get('image');
             if ($img) {
                 $es->ogImage = $img;
             } elseif (!$es->params->get('ogDefaultImageOnlyFrontPage', 1)) {
                 $es->ogImage = $es->params->get('ogDefaultImage');
             }
             $es->ogImage = UriHelper::pathAddHost($es->data->ogImage);
         }
         static::$once = 0;
     }
 }
Exemple #8
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $host = 'http://' . parse_url($url, PHP_URL_HOST);
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     if (false !== strpos($dom, 'This IP has been automatically blocked.')) {
         throw new CurlException('IP has been blocked.');
     }
     // end collect. no results
     if ($dom->find('#moon')[0]) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.txt') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a')[0]) {
             $href = $this->checkObjectLink($host, $link->getAttribute('href'));
             if (false === $href) {
                 continue;
             }
             $title = $link->text() ?: strip_tags($link->innerHtml());
             try {
                 $object = $this->getObjectModel($url, $href, $title, $this->categoryId, $this->type);
                 $object->setPrice($span);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace([self::$pageParam . self::$page, '#list'], '', $url);
         self::$page += 100;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
Exemple #9
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found.')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.summaryHeader') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             try {
                 $object->setPrice();
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
 /**
  * Creates a new dom object and calls loadFromUrl() on the
  * new object.
  *
  * @param string $url
  * @param CurlInterface $curl
  * @return $this
  */
 public static function loadFromUrl($url, CurlInterface $curl = null)
 {
     $dom = new Dom();
     self::$dom = $dom;
     if (is_null($curl)) {
         // use the default curl interface
         $curl = new Curl();
     }
     return $dom->loadFromUrl($url, $curl);
 }
Exemple #11
0
 /**
  * @inheritdoc
  */
 protected function collectObjects($url)
 {
     if (!array_key_exists($url, $this->collectedCount)) {
         $this->collectedCount[$url] = 0;
     }
     $dom = new Dom();
     try {
         $dom->loadFromUrl($url, [], GlabsController::$curl);
     } catch (CurlException $e) {
         if (false === strpos($e->getMessage(), 'timed out')) {
             throw new CurlException($e->getMessage());
         }
         if (false === strpos($e->getMessage(), '525')) {
             throw new CurlException($e->getMessage());
         }
         GlabsController::showMessage(' ...trying again', false);
         return $this->collectObjects($url);
     }
     // end collect. no results
     if (false !== strpos($dom, 'No matches found') || false !== strpos($dom, 'Keine Entsprechungen gefunden') || false !== strpos($dom, 'No hay resultados') || false !== strpos($dom, 'Nessuna corrispondenza trovata') || false !== strpos($dom, 'Aucune correspondance n&#146;a &eacute;t&eacute; trouv&eacute;e') || false !== strpos($dom, 'Nenhuma correspondência encontrada') || false !== strpos($dom, 'Совпадений нет') || false !== strpos($dom, 'Ingen match fundet') || false !== strpos($dom, 'Nebyly nalezeny žádné shody') || false !== strpos($dom, 'Ingen match funnet') || false !== strpos($dom, 'Nie znaleziono') || false !== strpos($dom, 'Eşleşme bulunamadı') || false !== strpos($dom, 'Eredmény nem található') || false !== strpos($dom, 'Δεν βρέθηκαν εγγραφές') || false !== strpos($dom, 'Aucune correspondance n’a été trouvée')) {
         return true;
     }
     $this->checkTotalObjects($dom);
     /* @var \PHPHtmlParser\Dom\AbstractNode $span */
     foreach ($dom->find('.cat') as $span) {
         if ($this->isEnoughCollect()) {
             break;
         }
         /* @var \PHPHtmlParser\Dom\AbstractNode $link */
         if ($link = $span->find('a', 0)) {
             $href = $link->getAttribute('href');
             if (in_array($href, $this->collected, true)) {
                 continue;
             }
             try {
                 $object = new Object($url, $href, $link->text(), $this->categoryId, $this->type);
             } catch (ObjectException $e) {
                 continue;
             }
             $this->collected[] = $href;
             $this->objects[] = $object;
             $this->collectedCount[$url]++;
             BaseSite::$doneObjects++;
             BaseSite::progress();
         }
     }
     if (!$this->isEnoughCollect()) {
         $curl = GlabsController::$curl;
         $curl::$referer = $url;
         $url = str_replace(self::$pageParam . self::$page, '', $url);
         self::$page += self::$page ? 1 : 2;
         return $this->collectObjects($this->getPagedUrl($url));
     }
     return true;
 }
Exemple #12
0
 private function generateHtmlFields($response)
 {
     $dom = new Dom();
     $dom->loadStr($response, array());
     $contents = $dom->find('.caixacampobranco')[0];
     if ($contents === null) {
         return false;
     }
     $fields = $contents->find('span');
     return $fields;
 }
Exemple #13
0
 /**
  * Constructor
  *
  * @param string $url
  * @throws \InvalidArgumentException
  */
 public function __construct($url)
 {
     $html = $this->_loadUrl($url);
     if ($html !== null) {
         $dom = new Dom();
         $dom->load($html);
         $this->data = $this->_extractData($dom);
     } else {
         throw new \InvalidArgumentException('This recipe does not exists.');
     }
 }
Exemple #14
0
 public function testConfigStrictMissingAttribute()
 {
     $dom = new Dom();
     $dom->setOptions(['strict' => true]);
     try {
         // should throw an exception
         $dom->load('<div><p id="hey" block>Hey you</p> <p id="ya">Ya you!</p></div>');
         // we should not get here
         $this->assertTrue(false);
     } catch (StrictException $e) {
         $this->assertEquals("Tag 'p' has an attribute 'block' with out a value! (character #22)", $e->getMessage());
     }
 }
Exemple #15
0
 protected function addAdditionalInfo(&$result)
 {
     $dom = new Dom();
     $dom->load($result['href']);
     $additionalInfoTag = $dom->find('#textContent p');
     $result['additionalText'] = $additionalInfoTag->text;
     $photoTags = $dom->find('.img-item img');
     $photos = [];
     foreach ($photoTags as $photoTag) {
         $photos[] = $photoTag->getAttribute('src');
     }
     $result['photos'] = $photos;
 }
 /**
  * saveFirstImage
  *
  * @param string  $context
  * @param \JTable $article
  *
  * @return  void
  */
 public static function saveFirstImage($context, $article)
 {
     if (!property_exists($article, 'images') && $context != 'com_content.article') {
         return;
     }
     $image = new \JRegistry($article->images);
     $dom = new Dom();
     $dom->load($article->introtext . $article->fulltext);
     $imgs = $dom->find('img');
     $imageSrc = null;
     if ($imgs->count()) {
         $imageSrc = $imgs[0]->src;
     }
     $image->set('image_intro', $imageSrc);
     $article->images = $image->toString();
 }
 public function getAlbumCoverURL(string $songPath) : string
 {
     $songReader = new \SongReader($songPath);
     $client = new Client(['base_uri' => 'http://www.slothradio.com/', 'timeout' => 2.0]);
     $response = $client->request('GET', 'covers/', ['query' => ['artist' => $songReader->getAuthor(), 'album' => $songReader->getAlbum()]]);
     $html = $response->getBody()->getContents();
     $dom = new Dom();
     $dom->load($html);
     $images = $dom->find('#content > div.album0 > img');
     if (count($images) > 0) {
         /** @var Dom\HtmlNode $image */
         $image = $images[0];
         $albumURL = $image->getAttribute('src');
     } else {
         $albumURL = '';
     }
     return $albumURL;
 }
Exemple #18
0
 public function scan()
 {
     $crawlerResult = new Result(array());
     foreach ($this->urls as $url) {
         $dom = new Dom();
         $dom->load($url);
         $aTags = $dom->find("a");
         foreach ($aTags as $a) {
             $href = $a->href;
             if (0 !== strpos($href, 'http')) {
                 // It starts with 'http'
                 continue;
             }
             $crawlerResult->result["urls"][] = $a->href;
         }
     }
     $crawlerResult->result["urls"] = array_values(array_unique($crawlerResult->result["urls"]));
     return $crawlerResult;
 }
Exemple #19
0
 public function run(Page $page)
 {
     // Parse the HTML
     $dom = new Dom();
     $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true));
     $dom->load($page->body());
     // Format
     foreach ($dom->find($this->selector) as $node) {
         // Format the node
         $formattedNode = $this->format($node);
         // Remove all children
         foreach ($node->find('*') as $child) {
             $child->delete();
         }
         // Add the new node
         $node->addChild($formattedNode);
     }
     // Set and return
     return $page->withBody($dom->root->outerHtml());
 }
 /**
  * @param  string $url
  * @param  string $userAgent
  *
  * @return string $redurectUrl
  */
 private function getRedirectUrl($url, $userAgent)
 {
     $curlInfo = $this->getCurlInfo($url, $userAgent);
     $redurectUrl = $this->removeQueryString(@$curlInfo['redirect_url']);
     if (trim($url, '/') === trim($redurectUrl, '/')) {
         $redurectUrl = '';
     }
     // look for meta http-equiv="refresh"
     if (!$redurectUrl) {
         $dom = new Dom();
         $dom->load($url);
         $metaTags = $dom->find('meta');
         foreach ($metaTags as $meta) {
             if ($meta->getAttribute('http-equiv') === 'refresh') {
                 $redurectUrl = preg_replace('/\\s*\\d+\\s*;\\s*url\\s*=\\s*(\'|\\")(.+)(\'|\\")/i', '$2', $meta->getAttribute('content'));
                 break;
             }
         }
     }
     return $redurectUrl;
 }
Exemple #21
0
 /**
  * Attempts to get the URL to a given profiles
  * photo. This method will return the URL or will return
  * boolean false if the profile photo could not be scraped.
  *
  * @param string $profileURL 	The URL to the profile
  *
  * @return string|boolean
  */
 public function profilePhoto($profileURL)
 {
     $contents = @file_get_contents($profileURL . '/posts');
     if (!$contents) {
         return false;
     }
     $position = stripos($contents, 'dkb photo');
     /* Chop off all of the string before this position */
     $contents = substr($contents, $position);
     $position = stripos($contents, '<img');
     $contents = substr($contents, $position);
     $position = stripos($contents, '>');
     /* Chop off everything after the position */
     $contents = substr($contents, 0, $position + 1);
     $dom = new Dom();
     $dom->load($contents);
     $img = $dom->find('img', 0);
     $src = $img->getAttribute('src');
     if (substr($src, 0, 2) == '//') {
         $src = 'https:' . $src;
     }
     return $src;
 }
Exemple #22
0
 /**
  * @return bool
  */
 protected function setPhone()
 {
     $patterns = ['/\\d+-\\d+-\\d+/', '/\\d{10}/', '/\\d{3}\\s+\\d{7}/', '/\\(\\d+\\)\\s?[\\d+-]+/', '/\\d+\\.+\\s?\\d+\\.+\\d+\\.+\\d+\\.+/', '/\\d{3}\\s\\d{3}\\s\\d{4}/', '/\\d+\\s+-\\d+-\\s+\\d+/', '/\\d+--\\d+--\\d+/'];
     foreach ($patterns as $pattern) {
         if (preg_match($pattern, $this->description)) {
             $this->phone = true;
             return true;
         }
     }
     /* @var \PHPHtmlParser\Dom\AbstractNode[] $contacts */
     if ($contacts = self::$dom->find('.metaInfoDisplay', 0)) {
         foreach ($patterns as $pattern) {
             if (preg_match($pattern, $contacts)) {
                 $this->phone = true;
                 return true;
             }
         }
     }
 }
Exemple #23
0
 public function page($path, array $context = array())
 {
     $path = array_filter(explode('/', $path), 'strlen');
     $pathname = implode('.', $path);
     $file = sprintf($this->path, $pathname);
     if (!file_exists($file)) {
         throw new PageNotFoundException($path);
     }
     // Parse the HTML
     $dom = new Dom();
     $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true));
     extract($context);
     ob_start();
     include $file;
     $dom->load(ob_get_clean());
     // Create a new page
     $page = new Page($path);
     // Title
     if (($title = $dom->find('title', 0)) !== null) {
         $page->withTitle($title->text());
     }
     // Header
     if (($head = $dom->find('head', 0)) !== null) {
         foreach ($head->getChildren() as $child) {
             if ($child->getTag()->name() !== 'title' && !($child->getTag()->name() === 'meta' && $child->getAttribute('charset') !== null)) {
                 $page->withHeader($page->header() . $child->outerHtml());
             }
         }
     }
     // Body
     if (($body = $dom->find('body', 0)) !== null) {
         $page->withBody($body->innerHtml());
     }
     // Return page
     return $page;
 }
Exemple #24
0
<?php

include "vendor/autoload.php";
use PHPHtmlParser\Dom;
?>
    <!doctype html>
    <html lang="ru">
    <head>
        <meta charset="UTF-8">
        <title>Таблица</title>
    </head>
    <body>
        <table>

<?php 
$dom = new Dom();
$dom->loadFromFile('index.html');
for ($i = 0; $i < 100; $i++) {
    $contents = $dom->find("tr td")[$i];
    if ($contents->text == "Адрес") {
        $i += 5;
        continue;
    }
    if (preg_match("|^[\\d]+\$|", $contents->text)) {
        echo "<tr><td>" . $contents->text . "</td></tr>";
    }
    echo "<br/>";
    echo $contents->text;
}
?>
        </table>
Exemple #25
0
 public function testMultipleSingleQuotes()
 {
     $dom = new Dom();
     $dom->load("<a title='Ain't this the best' href=\"http://www.example.com\">Hello</a>");
     $this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title);
 }
Exemple #26
0
 public function testEnforceEncoding()
 {
     $dom = new Dom();
     $dom->load('tests/files/horrible.html', ['enforceEncoding' => 'UTF-8']);
     $this->assertNotEquals('<input type="submit" tabindex="0" name="submit" value="Информации" />', $dom->find('table input', 1)->outerHtml);
 }
Exemple #27
0
 /**
  * 得到页面上所有的图片地址,已经存过的图片地址不会被重新储存。
  * @param int $page 第几页
  * @return array 图片地址
  */
 protected function getRawImgsrcs($page)
 {
     // 准备DOM
     $html = $this->getHtml($page);
     $dom = new Dom();
     $dom->load($html);
     $imgsrcs = [];
     // 将所有的img加入数组
     foreach ($dom->find('img') as $img) {
         $imgsrcs[] = $img->getAttribute('src');
     }
     // 侦测所有iframe
     foreach ($dom->find('iframe') as $iframe) {
         $id = $iframe->getAttribute('id');
         if (!strstr($id, 'photoset')) {
             continue;
         }
         $src = $iframe->getAttribute('src');
         $imgHtml = $this->requestHtml($src);
         $imgDom = new Dom();
         $imgDom->load($imgHtml);
         foreach ($imgDom->find('img') as $img) {
             $imgsrcs[] = $img->getAttribute('src');
         }
     }
     return $imgsrcs;
 }
 /**
  * This command echoes what you have entered as the message.
  *
  * @param int $start
  *
  * @internal param string $message the message to be echoed.
  */
 public function actionIndex($start = 111111111111)
 {
     $lastID = SavedVariable::findOne('lastParseID');
     if (!isset($lastID)) {
         $lastID = new SavedVariable();
         $lastID->name = 'lastParseID';
         $lastID->value = (string) (int) $start;
         $lastID->save();
     }
     $ii = max(111111111111, (int) $lastID->value);
     while ($ii <= 999999999999) {
         for ($i = $ii; $i <= 999999999999; $i++) {
             $lastID->value = (string) $i;
             $lastID->save();
             $barcode .= (10 - array_sum(ArrayHelperAdvanced::array_walk_r(str_split($barcode = (string) $i), function (&$v, $k) {
                 $v *= $k % 2 * 2 + 1;
             })) % 10) % 10;
             $dom = new Dom();
             try {
                 $string = file_get_contents('http://www.barcode-list.ru/barcode/RU/Поиск.htm?barcode=' . $barcode);
             } catch (\Exception $e) {
                 break;
             }
             $dom->load($string);
             /** @var HtmlNode $table */
             $tables = $dom->find('.randomBarcodes');
             if (count($tables) === 0) {
                 echo 'not found ' . $i . ' of ' . $barcode . "\n";
                 continue;
             }
             $table = $tables[0];
             $table->countChildren();
             echo '$table->countChildren() = ' . $table->countChildren() . "\n";
             // "click here"
             echo '$table->getTag()->name() = ' . $table->getTag()->name() . "\n";
             // "click here"
             /** @var HtmlNode $tr */
             /** @noinspection LoopWhichDoesNotLoopInspection */
             foreach ($table->find('tr') as $tr) {
                 echo '  $tr->getTag()->name() = ' . $tr->getTag()->name() . "\n";
                 $tds = $tr->find('td');
                 if (count($tds) === 0) {
                     continue;
                 }
                 $td = $tds[2];
                 /** @var HtmlNode $td */
                 echo '    $td->getTag()->name() = ' . $td->getTag()->name() . "\n";
                 echo '      $td->text = ' . $td->text . "\n";
                 (new ParsedProduct(['barcode' => $i, 'name' => $td->text]))->save(false);
                 break;
             }
         }
         $ii = $i;
         echo 'break ' . $i . "\n";
         $startTimestamp = time();
         echo date('Y.m.d h:i:s', $startTimestamp) . "\n";
         do {
             $currentTimestamp = time();
         } while ($currentTimestamp - $startTimestamp < 10);
         echo date('Y.m.d h:i:s', $currentTimestamp) . "\n";
     }
     //while ($ii <= 999999999999) {
     echo $ii . "\n";
 }
Exemple #29
0
    public function testScriptCleanerScriptTag()
    {
        $dom = new Dom();
        $dom->load('
		<p>.....</p>
		<script>
		Some code ... 
		document.write("<script src=\'some script\'><\\/script>") 
		Some code ... 
		</script>
		<p>....</p>');
        $this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml);
    }
Exemple #30
-1
 public static function getDom($url)
 {
     $dom = new Dom();
     $html = self::getHtml($url);
     $dom->load($html);
     return $dom;
 }