Dom::find, PHPHtmlParser PHP代码示例

示例#1

0

显示文件

文件： Station.php 项目： mkerix/php-bvg

 /**
  * Gets departures from the given station starting at the given time.
  *
  * @param int $stationID
  * @param Carbon $time
  * @return array
  * @throws ApiException
  */
 public static function getDepartures(int $stationID, Carbon $time, int $maxJourneys = 10)
 {
     // prepare parameters for our request
     $query = ['input' => $stationID, 'boardType' => 'dep', 'time' => $time->format('H:i'), 'date' => $time->format('d.m.y'), 'maxJourneys' => $maxJourneys, 'start' => 'yes'];
     // send it to the bvg mobile site
     $response = \Requests::get(self::getApiEndpoint() . '?' . http_build_query($query));
     if ($response->status_code == 200) {
         // our results array
         $departures = [];
         // prepare document
         $dom = new Dom();
         $dom->load($response->body);
         // get date from API
         $date = $dom->find('#ivu_overview_input');
         $date = trim(substr($date->text, strpos($date->text, ':') + 1));
         $date = Carbon::createFromFormat('d.m.y', $date, 'Europe/Berlin');
         // get table data without the first line (header)
         $rows = $dom->find('.ivu_result_box .ivu_table tbody tr');
         // loop through each departure in the table
         foreach ($rows as $row) {
             // get columns
             $columns = $row->find('td');
             // explode time into two parts
             $time = explode(':', strip_tags($columns[0]));
             // push the departure onto our results array
             $departures[] = ['time' => $date->copy()->hour($time[0])->minute($time[1])->second(0), 'line' => trim(strip_tags($columns[1]->find('a')[0])), 'direction' => trim(strip_tags($columns[2]))];
         }
         // return results
         return $departures;
     } else {
         throw new ApiException('Failed getting station data from BVG API');
     }
 }

示例#2

0

显示文件

文件： CleanupTest.php 项目： cybrox/php-html-parser

 public function testRemoveScriptsFalse()
 {
     $dom = new Dom();
     $dom->setOptions(['removeScripts' => false]);
     $dom->loadFromFile('tests/files/horrible.html');
     $this->assertEquals(1, count($dom->find('script')));
     $this->assertEquals('text/JavaScript', $dom->find('script')->getAttribute('type'));
 }

示例#3

0

显示文件

文件： ParseCommand.php 项目： pechenyipavel/flat

 protected function addAdditionalInfo(&$result)
 {
     $dom = new Dom();
     $dom->load($result['href']);
     $additionalInfoTag = $dom->find('#textContent p');
     $result['additionalText'] = $additionalInfoTag->text;
     $photoTags = $dom->find('.img-item img');
     $photos = [];
     foreach ($photoTags as $photoTag) {
         $photos[] = $photoTag->getAttribute('src');
     }
     $result['photos'] = $photos;
 }

示例#4

0

显示文件

文件： Subtes.php 项目： HaySubtes/hay-subtes-website

 private function getAndParseSubteInfo()
 {
     if ($cachedData = $this->isCached()) {
         $this->lineas = $cachedData;
         return true;
     }
     $dom = new Dom();
     $dom->loadFromFile($this->sourceURL);
     foreach ($this->lineas as $linea => $info) {
         $lineInfo = $dom->find("#status-line-{$linea}-container")[0];
         $lineStatusClass = $lineInfo->getAttribute('class');
         if (strpos($lineStatusClass, 'suspendido') !== false) {
             $this->lineas[$linea]->status = 'CANCELLED';
         }
         if (strpos($lineStatusClass, 'demorado') !== false) {
             $this->lineas[$linea]->status = 'DELAYED';
         }
         if ($this->isSleepingTime()) {
             $this->lineas[$linea]->status = 'SLEEPING';
         }
         // get raw status msg
         $status_msg = $lineInfo->find("#status-line-{$linea}")->text;
         $this->lineas[$linea]->statusMessage = html_entity_decode($status_msg, ENT_QUOTES, 'ISO-8859-1');
     }
     $this->updateStatusInfo();
     $this->cacheLines();
 }

示例#5

0

显示文件

文件： PlayByPlayParserParser.php 项目： gnumast/nhl

 /**
  * Parses the given file and returns a Game object
  *
  * @param string $filename
  *
  * @return Game
  */
 protected function processFile($filename)
 {
     $this->command->out("Processing " . $filename);
     // Create a game object with home/away teams and other info
     $game = $this->createGameWithInfo($filename);
     $dom = new Dom();
     $dom->loadFromFile($filename);
     $lines = [];
     /** @var AbstractNode $tr */
     foreach ($dom->find('tr.evenColor') as $tr) {
         $lineContent = [];
         $lineCount = 0;
         /** @var AbstractNode $td */
         foreach ($tr->getChildren() as $td) {
             $value = $this->cleanUpLine($td->text);
             if ($value) {
                 $lineCount++;
                 // Each event is actually 6 lines
                 $lineContent[] = $value;
                 if ($lineCount % 6 == 0) {
                     $lines[] = $lineContent;
                     $lineContent = [];
                 }
             }
         }
     }
     // Add each event line to the game log
     foreach ($lines as $line) {
         if ($event = $this->createParsedEvent($line)) {
             $game->addEvent($event);
         }
     }
     return $game;
 }

示例#6

0

显示文件

文件： OpenGraph.php 项目： lyrasoft/lyrasoft.github.io

 /**
  * setOpenGraph
  *
  * @param string $context
  * @param object $article
  *
  * @return  void
  */
 public static function setOpenGraph($context, $article)
 {
     $es = \Ezset::getInstance();
     $input = \JFactory::getApplication()->input;
     $view = $input->get('view');
     if (empty($article->id)) {
         return;
     }
     if (!$es->params->get('ogGetInnerPageImage', 1)) {
         return;
     }
     if ('article' == $view) {
         $images = new \JRegistry($article->images);
         $ignoreFirst = false;
         $imgs = array();
         $img = $images->get('image_fulltext', $images->get('image_intro'));
         if ($img) {
             $imgs[] = $img;
         }
         if ($imgs) {
             $ignoreFirst = true;
         }
         $dom = new Dom();
         // If first image = main image, delete this paragraph.
         $dom->load($article->text);
         $images = $dom->find('img');
         foreach ($images as $image) {
             if ($ignoreFirst) {
                 continue;
             }
             $imgs[] = $image->src;
         }
         if (!$imgs && isset($article->catid)) {
             $cat = \JTable::getInstance('category');
             $cat->load($article->catid);
             $cat->params = new \JRegistry($cat->params);
             $imgs[] = $cat->params->get('image');
         }
         if (!$imgs && !$es->params->get('ogDefaultImageOnlyFrontPage', 1)) {
             $imgs[] = UriHelper::pathAddHost($es->params->get('ogDefaultImage'));
         }
         $es->data->ogImages = $imgs;
     } elseif ('category' == $view) {
         if (static::$once) {
             $cat = \JTable::getInstance('category');
             $cat->load($input->get('id'));
             $cat->params = new \JRegistry($cat->params);
             $img = $cat->params->get('image');
             if ($img) {
                 $es->ogImage = $img;
             } elseif (!$es->params->get('ogDefaultImageOnlyFrontPage', 1)) {
                 $es->ogImage = $es->params->get('ogDefaultImage');
             }
             $es->ogImage = UriHelper::pathAddHost($es->data->ogImage);
         }
         static::$once = 0;
     }
 }

示例#7

0

显示文件

文件： Correios.php 项目： upnid/consulta-cep

 private function generateHtmlFields($response)
 {
     $dom = new Dom();
     $dom->loadStr($response, array());
     $contents = $dom->find('.caixacampobranco')[0];
     if ($contents === null) {
         return false;
     }
     $fields = $contents->find('span');
     return $fields;
 }

示例#8

0

显示文件

文件： FileBackend.php 项目： dengsn/quill

 public function page($path, array $context = array())
 {
     $path = array_filter(explode('/', $path), 'strlen');
     $pathname = implode('.', $path);
     $file = sprintf($this->path, $pathname);
     if (!file_exists($file)) {
         throw new PageNotFoundException($path);
     }
     // Parse the HTML
     $dom = new Dom();
     $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true));
     extract($context);
     ob_start();
     include $file;
     $dom->load(ob_get_clean());
     // Create a new page
     $page = new Page($path);
     // Title
     if (($title = $dom->find('title', 0)) !== null) {
         $page->withTitle($title->text());
     }
     // Header
     if (($head = $dom->find('head', 0)) !== null) {
         foreach ($head->getChildren() as $child) {
             if ($child->getTag()->name() !== 'title' && !($child->getTag()->name() === 'meta' && $child->getAttribute('charset') !== null)) {
                 $page->withHeader($page->header() . $child->outerHtml());
             }
         }
     }
     // Body
     if (($body = $dom->find('body', 0)) !== null) {
         $page->withBody($body->innerHtml());
     }
     // Return page
     return $page;
 }

示例#9

0

显示文件

文件： Content.php 项目： lyrasoft/lyrasoft.github.io

 /**
  * saveFirstImage
  *
  * @param string  $context
  * @param \JTable $article
  *
  * @return  void
  */
 public static function saveFirstImage($context, $article)
 {
     if (!property_exists($article, 'images') && $context != 'com_content.article') {
         return;
     }
     $image = new \JRegistry($article->images);
     $dom = new Dom();
     $dom->load($article->introtext . $article->fulltext);
     $imgs = $dom->find('img');
     $imageSrc = null;
     if ($imgs->count()) {
         $imageSrc = $imgs[0]->src;
     }
     $image->set('image_intro', $imageSrc);
     $article->images = $image->toString();
 }

示例#10

0

显示文件

文件： AlbumCoverProvider.php 项目： ParalelniPolis/bitcoinJukebox

 public function getAlbumCoverURL(string $songPath) : string
 {
     $songReader = new \SongReader($songPath);
     $client = new Client(['base_uri' => 'http://www.slothradio.com/', 'timeout' => 2.0]);
     $response = $client->request('GET', 'covers/', ['query' => ['artist' => $songReader->getAuthor(), 'album' => $songReader->getAlbum()]]);
     $html = $response->getBody()->getContents();
     $dom = new Dom();
     $dom->load($html);
     $images = $dom->find('#content > div.album0 > img');
     if (count($images) > 0) {
         /** @var Dom\HtmlNode $image */
         $image = $images[0];
         $albumURL = $image->getAttribute('src');
     } else {
         $albumURL = '';
     }
     return $albumURL;
 }

示例#11

0

显示文件

文件： BaseObject.php 项目： nnrudakov/glabs

 /**
  * @return bool
  */
 protected function setPhone()
 {
     $patterns = ['/\\d+-\\d+-\\d+/', '/\\d{10}/', '/\\d{3}\\s+\\d{7}/', '/\\(\\d+\\)\\s?[\\d+-]+/', '/\\d+\\.+\\s?\\d+\\.+\\d+\\.+\\d+\\.+/', '/\\d{3}\\s\\d{3}\\s\\d{4}/', '/\\d+\\s+-\\d+-\\s+\\d+/', '/\\d+--\\d+--\\d+/'];
     foreach ($patterns as $pattern) {
         if (preg_match($pattern, $this->description)) {
             $this->phone = true;
             return true;
         }
     }
     /* @var \PHPHtmlParser\Dom\AbstractNode[] $contacts */
     if ($contacts = self::$dom->find('.metaInfoDisplay', 0)) {
         foreach ($patterns as $pattern) {
             if (preg_match($pattern, $contacts)) {
                 $this->phone = true;
                 return true;
             }
         }
     }
 }

示例#12

0

显示文件

文件： Scanner.php 项目： Sohib/Crawler

 public function scan()
 {
     $crawlerResult = new Result(array());
     foreach ($this->urls as $url) {
         $dom = new Dom();
         $dom->load($url);
         $aTags = $dom->find("a");
         foreach ($aTags as $a) {
             $href = $a->href;
             if (0 !== strpos($href, 'http')) {
                 // It starts with 'http'
                 continue;
             }
             $crawlerResult->result["urls"][] = $a->href;
         }
     }
     $crawlerResult->result["urls"] = array_values(array_unique($crawlerResult->result["urls"]));
     return $crawlerResult;
 }

示例#13

0

显示文件

文件： Formatter.php 项目： dengsn/quill

 public function run(Page $page)
 {
     // Parse the HTML
     $dom = new Dom();
     $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true));
     $dom->load($page->body());
     // Format
     foreach ($dom->find($this->selector) as $node) {
         // Format the node
         $formattedNode = $this->format($node);
         // Remove all children
         foreach ($node->find('*') as $child) {
             $child->delete();
         }
         // Add the new node
         $node->addChild($formattedNode);
     }
     // Set and return
     return $page->withBody($dom->root->outerHtml());
 }

示例#14

0

显示文件

文件： SpamLinkAnalyser.php 项目： limonte/spam-link-analyser

 /**
  * @param  string $url
  * @param  string $userAgent
  *
  * @return string $redurectUrl
  */
 private function getRedirectUrl($url, $userAgent)
 {
     $curlInfo = $this->getCurlInfo($url, $userAgent);
     $redurectUrl = $this->removeQueryString(@$curlInfo['redirect_url']);
     if (trim($url, '/') === trim($redurectUrl, '/')) {
         $redurectUrl = '';
     }
     // look for meta http-equiv="refresh"
     if (!$redurectUrl) {
         $dom = new Dom();
         $dom->load($url);
         $metaTags = $dom->find('meta');
         foreach ($metaTags as $meta) {
             if ($meta->getAttribute('http-equiv') === 'refresh') {
                 $redurectUrl = preg_replace('/\\s*\\d+\\s*;\\s*url\\s*=\\s*(\'|\\")(.+)(\'|\\")/i', '$2', $meta->getAttribute('content'));
                 break;
             }
         }
     }
     return $redurectUrl;
 }

示例#15

0

显示文件

文件： GoogleScraper.php 项目： Vicimus/Gicimus

 /**
  * Attempts to get the URL to a given profiles
  * photo. This method will return the URL or will return
  * boolean false if the profile photo could not be scraped.
  *
  * @param string $profileURL 	The URL to the profile
  *
  * @return string|boolean
  */
 public function profilePhoto($profileURL)
 {
     $contents = @file_get_contents($profileURL . '/posts');
     if (!$contents) {
         return false;
     }
     $position = stripos($contents, 'dkb photo');
     /* Chop off all of the string before this position */
     $contents = substr($contents, $position);
     $position = stripos($contents, '<img');
     $contents = substr($contents, $position);
     $position = stripos($contents, '>');
     /* Chop off everything after the position */
     $contents = substr($contents, 0, $position + 1);
     $dom = new Dom();
     $dom->load($contents);
     $img = $dom->find('img', 0);
     $src = $img->getAttribute('src');
     if (substr($src, 0, 2) == '//') {
         $src = 'https:' . $src;
     }
     return $src;
 }

示例#16

0

显示文件

文件： Wallhaven.php 项目： ivkos/wallhaven

 /**
  * Search for wallpapers.
  *
  * @param string   $query       What to search for. Searching for specific tags can be done with #tagname, e.g.
  *                              <samp>#cars</samp>
  * @param int      $categories  Categories to include. This is a bit field, e.g.: <samp>Category::GENERAL |
  *                              Category::PEOPLE</samp>
  * @param int      $purity      Purity of wallpapers. This is a bit field, e.g.: <samp>Purity::SFW |
  *                              Purity::NSFW</samp>
  * @param string   $sorting     Sorting, e.g. <samp>Sorting::RELEVANCE</samp>
  * @param string   $order       Order of results. Can be <samp>Order::ASC</samp> or <samp>Order::DESC</samp>
  * @param string[] $resolutions Array of resolutions in the format of WxH, e.g.: <samp>['1920x1080',
  *                              '1280x720']</samp>
  * @param string[] $ratios      Array of ratios in the format of WxH, e.g.: <samp>['16x9', '4x3']</samp>
  * @param int      $page        The id of the page to fetch. This is <em>not</em> a total number of pages to
  *                              fetch.
  *
  * @return WallpaperList Wallpapers.
  */
 public function search($query, $categories = Category::ALL, $purity = Purity::SFW, $sorting = Sorting::RELEVANCE, $order = Order::DESC, $resolutions = [], $ratios = [], $page = 1)
 {
     $result = $this->client->get(self::URL_SEARCH, ['query' => ['q' => $query, 'categories' => self::getBinary($categories), 'purity' => self::getBinary($purity), 'sorting' => $sorting, 'order' => $order, 'resolutions' => implode(',', $resolutions), 'ratios' => implode(',', $ratios), 'page' => $page], 'headers' => ['X-Requested-With' => 'XMLHttpRequest']]);
     $body = $result->getBody()->getContents();
     $dom = new Dom();
     $dom->load($body);
     $figures = $dom->find('figure.thumb');
     $wallpapers = new WallpaperList();
     foreach ($figures as $figure) {
         $id = preg_split('#' . self::URL_HOME . self::URL_WALLPAPER . '/#', $figure->find('a.preview')->getAttribute('href'))[1];
         $classText = $figure->getAttribute('class');
         preg_match("/thumb thumb-(sfw|sketchy|nsfw) thumb-(general|anime|people)/", $classText, $classMatches);
         $purity = constant('Wallhaven\\Purity::' . strtoupper($classMatches[1]));
         $category = constant('Wallhaven\\Category::' . strtoupper($classMatches[2]));
         $resolution = str_replace(' ', '', trim($figure->find('span.wall-res')->text));
         $favorites = (int) $figure->find('.wall-favs')->text;
         $w = new Wallpaper($id, $this->client);
         $w->setProperties(['purity' => $purity, 'category' => $category, 'resolution' => $resolution, 'favorites' => $favorites]);
         $wallpapers[] = $w;
     }
     return $wallpapers;
 }

示例#17

0

显示文件

文件： DomTest.php 项目： lyrasoft/lyrasoft.github.io

 public function testChangeContent()
 {
     $dom = new Dom();
     $dom->load('<div class="all"><p>Hey bro, <a href="google.com" id="78">click here</a></div><br />');
     $links = $dom->find('a');
     $links[0]->setInnerHtml('gogogo');
     $this->assertEquals('<p>Hey bro, <a href="google.com" id="78">gogogo</a></p>', $dom->getElementsByTag('p')[0]->outerHtml);
 }

示例#18

0

显示文件

文件： TumblrUser.php 项目： roslairy/tumfetch

 /**
  * 得到页面上所有的图片地址，已经存过的图片地址不会被重新储存。
  * @param int $page 第几页
  * @return array 图片地址
  */
 protected function getRawImgsrcs($page)
 {
     // 准备DOM
     $html = $this->getHtml($page);
     $dom = new Dom();
     $dom->load($html);
     $imgsrcs = [];
     // 将所有的img加入数组
     foreach ($dom->find('img') as $img) {
         $imgsrcs[] = $img->getAttribute('src');
     }
     // 侦测所有iframe
     foreach ($dom->find('iframe') as $iframe) {
         $id = $iframe->getAttribute('id');
         if (!strstr($id, 'photoset')) {
             continue;
         }
         $src = $iframe->getAttribute('src');
         $imgHtml = $this->requestHtml($src);
         $imgDom = new Dom();
         $imgDom->load($imgHtml);
         foreach ($imgDom->find('img') as $img) {
             $imgsrcs[] = $img->getAttribute('src');
         }
     }
     return $imgsrcs;
 }

示例#19

0

显示文件

文件： ParseController.php 项目： kadanin-at-work/parser

 /**
  * This command echoes what you have entered as the message.
  *
  * @param int $start
  *
  * @internal param string $message the message to be echoed.
  */
 public function actionIndex($start = 111111111111)
 {
     $lastID = SavedVariable::findOne('lastParseID');
     if (!isset($lastID)) {
         $lastID = new SavedVariable();
         $lastID->name = 'lastParseID';
         $lastID->value = (string) (int) $start;
         $lastID->save();
     }
     $ii = max(111111111111, (int) $lastID->value);
     while ($ii <= 999999999999) {
         for ($i = $ii; $i <= 999999999999; $i++) {
             $lastID->value = (string) $i;
             $lastID->save();
             $barcode .= (10 - array_sum(ArrayHelperAdvanced::array_walk_r(str_split($barcode = (string) $i), function (&$v, $k) {
                 $v *= $k % 2 * 2 + 1;
             })) % 10) % 10;
             $dom = new Dom();
             try {
                 $string = file_get_contents('http://www.barcode-list.ru/barcode/RU/Поиск.htm?barcode=' . $barcode);
             } catch (\Exception $e) {
                 break;
             }
             $dom->load($string);
             /** @var HtmlNode $table */
             $tables = $dom->find('.randomBarcodes');
             if (count($tables) === 0) {
                 echo 'not found ' . $i . ' of ' . $barcode . "\n";
                 continue;
             }
             $table = $tables[0];
             $table->countChildren();
             echo '$table->countChildren() = ' . $table->countChildren() . "\n";
             // "click here"
             echo '$table->getTag()->name() = ' . $table->getTag()->name() . "\n";
             // "click here"
             /** @var HtmlNode $tr */
             /** @noinspection LoopWhichDoesNotLoopInspection */
             foreach ($table->find('tr') as $tr) {
                 echo '  $tr->getTag()->name() = ' . $tr->getTag()->name() . "\n";
                 $tds = $tr->find('td');
                 if (count($tds) === 0) {
                     continue;
                 }
                 $td = $tds[2];
                 /** @var HtmlNode $td */
                 echo '    $td->getTag()->name() = ' . $td->getTag()->name() . "\n";
                 echo '      $td->text = ' . $td->text . "\n";
                 (new ParsedProduct(['barcode' => $i, 'name' => $td->text]))->save(false);
                 break;
             }
         }
         $ii = $i;
         echo 'break ' . $i . "\n";
         $startTimestamp = time();
         echo date('Y.m.d h:i:s', $startTimestamp) . "\n";
         do {
             $currentTimestamp = time();
         } while ($currentTimestamp - $startTimestamp < 10);
         echo date('Y.m.d h:i:s', $currentTimestamp) . "\n";
     }
     //while ($ii <= 999999999999) {
     echo $ii . "\n";
 }

示例#20

0

显示文件

文件： TFSniffer.php 项目： roslairy/tumfetch

 protected function sniffVideoOfDom(Dom $dom)
 {
     foreach ($dom->find('iframe') as $videoFrame) {
         // 获得iframe的地址
         $videoFrameSrc = $videoFrame->getAttribute('src');
         // 如果iframe的地址不存在，或者不含有特定字符串（用来判别是否是视频），废弃
         if (empty($videoFrameSrc)) {
             continue;
         }
         if (!strstr($videoFrameSrc, 'https://www.tumblr.com/video/')) {
             continue;
         }
         try {
             // 如果已经检测了此视频，返回
             $parentId = $videoFrame->getParent()->getAttribute('id');
             $name = explode('_', $parentId)[3] . '.mp4';
             if (TFFile::where('name', '=', $name)->count() != 0) {
                 continue;
             }
             // 准备video的DOM
             $videoDom = $this->requestDom($videoFrameSrc);
             // 获取对应节点
             $videoElem = $videoDom->find('video')[0];
             $videoSource = $videoElem->find('source')[0];
         } catch (\Exception $e) {
             $error = 'TFSniffer: Failed to parse video element, url is %s, msg is %s.';
             $error = sprintf($error, $videoFrameSrc, $e->getMessage());
             Log::error($error);
             continue;
             // pass this
         }
         // 开始读取数据
         $video = new TFFile();
         $video->type = 'video';
         $video->state = 'sniffed';
         $video->error = '';
         $video->name = $name;
         $video->source = $videoSource->getAttribute('src');
         $video->save();
     }
 }

示例#21

0

显示文件

文件： prob.php 项目： Lisss13/prog

use PHPHtmlParser\Dom;
?>
    <!doctype html>
    <html lang="ru">
    <head>
        <meta charset="UTF-8">
        <title>Таблица</title>
    </head>
    <body>
        <table>

<?php 
$dom = new Dom();
$dom->loadFromFile('index.html');
for ($i = 0; $i < 100; $i++) {
    $contents = $dom->find("tr td")[$i];
    if ($contents->text == "Адрес") {
        $i += 5;
        continue;
    }
    if (preg_match("|^[\\d]+\$|", $contents->text)) {
        echo "<tr><td>" . $contents->text . "</td></tr>";
    }
    echo "<br/>";
    echo $contents->text;
}
?>
        </table>
    </body>
</html>

示例#22

0

显示文件

文件： DomTest.php 项目： cybrox/php-html-parser

 public function testEnforceEncoding()
 {
     $dom = new Dom();
     $dom->load('tests/files/horrible.html', ['enforceEncoding' => 'UTF-8']);
     $this->assertNotEquals('<input type="submit" tabindex="0" name="submit" value="Информации" />', $dom->find('table input', 1)->outerHtml);
 }

示例#23

0

显示文件

文件： ImportController.php 项目： charlesportwoodii/galnet-api

 /**
  * Imports a specific news entry
  * @param PHPHtmlParser\Dom $html
  * @return boolean|null
  */
 private function importNewsEntry($html, $origin)
 {
     $dom = new Dom();
     $uri = $html->getAttribute('href');
     $uid = str_replace('/galnet/uid/', '', $uri);
     $count = (new \yii\db\Query())->from('news')->where(['uid' => $uid])->count();
     if ((int) $count != 0) {
         $this->stdOut("    - {$uid} :: Already Imported...\n");
         return;
     }
     $dom->loadFromUrl(Yii::$app->params['galnet']['url'] . $uri);
     $title = trim(strip_tags($dom->find('h3.galnetNewsArticleTitle a')[0]->innerHtml));
     $content = trim(strip_tags(str_replace('<br /><br /> ', "\n", $dom->find('div.article p')[0]->innerHtml)));
     // Early Galnet posts are empty, so grab the first line from the article
     if (empty($title)) {
         $title = strtok($content, "\n");
     }
     $news = new News();
     $news->attributes = ['uid' => $uid, 'title' => $title, 'content' => $content, 'created_at' => time(), 'updated_at' => time(), 'published_at_native' => strtotime($origin), 'published_at' => strtotime($origin . "-1286 years")];
     $this->stdOut("    - {$uid}\n");
     $news->save();
 }

示例#24

0

显示文件

文件： yle_episode_downloader.php 项目： jarkko-hautakorpi/yle-areena-episode-downloader

/*
 * YLE Areena video crawler
 * Find video links from a episode listing page and download them using yle-dl.
 * http://aajanki.github.io/yle-dl/
 * Run as a cron job to download episodes.
 * 0 1 * * * php -f /home/john/.cronscripts/yle_episode_downloader.php >> /home/john/.cronscripts/download.log
 *
 */
require __DIR__ . '/vendor/autoload.php';
use PHPHtmlParser\Dom;
$page_URL = "http://areena.yle.fi/1-2540138";
$saved_videos_folder = "/home/john/Videos/YLE/Yle_uutiset/";
$dom = new Dom();
$dom->loadFromUrl($page_URL);
// Find the dom element with videos and loop them through
$newslist = $dom->find('ul.program-list li');
if (count($newslist) >= 1) {
    foreach ($newslist as $news) {
        // Get video ID
        $data_item_id = $news->getAttribute('data-item-id');
        /* <time itemprop="startDate" datetime="2015-07-10T20:30:00.000+03:00"> */
        $timestamp_dom = $news->find('time[itemprop=startDate]');
        $timestamp = $timestamp_dom->getAttribute('datetime');
        $weekday = "_" . date('l', strtotime($timestamp));
        $pubDate = strftime("%Y-%m-%d_klo_%H.%M", strtotime($timestamp));
        $filename = "Yle_uutiset_" . $pubDate . $weekday . ".flv";
        $url = "http://areena.yle.fi/" . $data_item_id;
        if (!file_exists($saved_videos_folder . $filename)) {
            echo "\nDownloading video: " . $filename . "\n";
            $current_folder = getcwd();
            chdir($saved_videos_folder);

示例#25

0

显示文件

文件： Client.php 项目： nexmo-community/javafoto

 /**
  * @param null $filter
  * @return File[]
  */
 public function getFiles($filter = null)
 {
     $files = [];
     foreach ($this->getDirectories() as $path) {
         $result = $this->getHttpClient()->get('http://' . $this->ip . ':' . $this->web . '/DCIM/' . $path);
         $dom = new Dom();
         $dom->load($result->getBody()->getContents());
         foreach ($dom->find('tbody a') as $link) {
             $file = new File($path, $link->getAttribute('href'));
             if ($filter and $filter !== $file->getType()) {
                 continue;
             }
             $files[] = $file;
         }
     }
     return $files;
 }

示例#26

0

显示文件

文件： index.php 项目： kislukacs/schedule

require_once 'vendor/autoload.php';
use PHPHtmlParser\Dom;
$url = "http://tini.maiige.hu";
if (isset($_GET['date'])) {
    $date = $_GET['date'];
    $url .= "/?date=" . $date;
}
/*$html=file_get_html($url);
	$page_title=$html->find('title',0)->plaintext;
	$text_title=$html->find('span.cimsor_cim',0)->plaintext;
	$textdate=$html->find('span.cimsor_datum',0)->plaintext;
	$text=$html->find('div.gondolatok_box',0)->find('p');
	$andnow=$html->find('div.esmost_box',0)->find('p');*/
$dom = new Dom();
$dom->load($url);
$page_title = $dom->find('title')[0]->innerHtml;
$text_title = $dom->find('span.cimsor_cim')[0]->innerHtml;
$text_date = $dom->find('span.cimsor_datum')[0]->innerHtml;
$text = $dom->find('div.gondolatok_box')[0]->find('p');
$andnow = $dom->find('div.esmost_box')[0]->find('p');
?>

<!DOCTYPE html>
<html>
<head>
    <title><?php 
echo $page_title;
?>
</title> <!-- TODO: MySQL Query -->
	
	<script type="text/javascript" src="js/jquery.min.js"></script>

示例#27

0

显示文件

文件： TweetController.php 项目： kcwikizh/kcwiki-api

 private function handle($count, $option)
 {
     $key = "tweet.{$option}.{$count}";
     $tag = "tweet";
     if (Cache::tags($tag)->has($key)) {
         return response(Cache::tags($tag)->get($key))->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*');
     }
     $rep = file_get_contents("https://t.kcwiki.moe/?json=1&count={$count}");
     if ($rep) {
         $result = json_decode($rep, true);
         $posts = $result['posts'];
         $output = [];
         foreach ($posts as $post) {
             $dom = new Dom();
             $dom->load($post['content']);
             $new_post = [];
             if (array_key_exists('ozh_ta_id', $post['custom_fields']) && is_array($post['custom_fields']['ozh_ta_id'])) {
                 $new_post['id'] = $post['custom_fields']['ozh_ta_id'][0];
             } else {
                 $new_post['id'] = '';
             }
             $img = $dom->find('img');
             if (count($img) > 0 && $option != 'html') {
                 $new_post['img'] = $img[0]->getAttribute('src');
                 foreach ($img as $x) {
                     $parent = $x->getParent();
                     $parentTagName = $parent->getTag()->name();
                     if ($parentTagName == 'a') {
                         $parent->delete();
                     } else {
                         $x->delete();
                     }
                 }
             } else {
                 if ($option != 'html') {
                     $new_post['img'] = '';
                 }
             }
             $p = $dom->find('p, div');
             $new_post['jp'] = '';
             $new_post['zh'] = '';
             $n = $this->detect($p);
             for ($i = 0; $i <= $n; $i++) {
                 $new_post['jp'] .= $p[$i]->innerHtml;
             }
             for ($i = $n + 1; $i < count($p); $i++) {
                 $new_post['zh'] .= $p[$i]->innerHtml;
             }
             $new_post['date'] = $post['date'];
             if ($option == 'plain') {
                 $new_post['zh'] = strip_tags($this->expandUrl($new_post['zh']));
                 $new_post['jp'] = strip_tags($this->expandUrl($new_post['jp']));
             }
             array_push($output, $new_post);
         }
         Cache::tags($tag)->put($key, $output, 5);
         return response($output)->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*');
     } else {
         return response()->json(['result' => 'error', 'reason' => 'Getting tweets failed.']);
     }
 }

示例#28

0

显示文件

文件： newsletter-tester.php 项目： syaifulsz/newsletter-tester

 if (@$send_to && is_array($send_to)) {
     foreach ($send_to as $address) {
         $mail->addAddress($address);
     }
 }
 // Set the subject line
 $mail->Subject = 'Newsletter Tester [' . md5(time()) . ']';
 // Read an HTML message body from an external file, convert referenced images to embedded,
 // convert HTML into a basic plain-text alternative body
 if (@$content) {
     $newslettersPath = 'newsletters/' . $content . '/';
     $contentHtml = file_get_contents($newslettersPath . 'index.html');
     $contentHtmlOriginal = $contentHtml;
     $dom = new Dom();
     $dom->load($contentHtml);
     $images = $dom->find('img');
     foreach ($images as $image) {
         $imageSrc = $image->getAttribute('src');
         $mail->AddEmbeddedImage($newslettersPath . $imageSrc, slugify($imageSrc));
         $contentHtml = str_replace($imageSrc, 'cid:' . slugify($imageSrc), $contentHtml);
         $contentHtmlOriginal = str_replace($imageSrc, $newslettersPath . $imageSrc, $contentHtmlOriginal);
     }
     $mail->msgHTML($contentHtml, dirname(__FILE__));
     $output['content'] = $contentHtmlOriginal;
 }
 // Replace the plain text body with one created manually
 $mail->AltBody = 'Test with Newsletter Tester tool by Syaiful Shah Zinan';
 // send the message, check for errors
 if (!$mail->send()) {
     $output['status'] = 'error';
     $output['message'] = $mail->ErrorInfo;

示例#29

0

显示文件

文件： GlabsController.php 项目： nnrudakov/glabs

 private function collectSites()
 {
     $old_data = json_decode(file_get_contents(Yii::getAlias('@runtime/data/chatapp.json')), true);
     $dom = new Dom();
     $dom->loadFromFile(Yii::getAlias('@runtime/sites.html'));
     $clinks = $blinks = [];
     $exclude = array_key_exists('exclude', $old_data) ? $old_data['exclude'] : ['auburn.craigslist.org', 'bham.craigslist.org', 'dothan.craigslist.org', 'shoals.craigslist.org', 'gadsden.craigslist.org', 'huntsville.craigslist.org'];
     /* @var Dom\AbstractNode $link */
     foreach ($dom->find('a') as $link) {
         $href = $link->getAttribute('href');
         if (0 !== strpos($href, '//')) {
             continue;
         }
         $href = str_replace('/', '', $href);
         if (in_array($href, $exclude, true)) {
             continue;
         }
         $clinks[] = $href;
     }
     shuffle($clinks);
     $dom->loadFromFile(Yii::getAlias('@runtime/backpage.html'));
     /* @var Dom\AbstractNode $link */
     foreach ($dom->find('a') as $link) {
         $href = $link->getAttribute('href');
         $href = str_replace(['http:', '/'], '', $href);
         if (in_array($href, $exclude, true)) {
             continue;
         }
         $blinks[] = $href;
     }
     shuffle($blinks);
     $data = ['total_count' => (int) $old_data['total_count'], 'current_site' => '', 'sites' => array_merge($clinks, $blinks), 'exclude' => $exclude];
     file_put_contents(Yii::getAlias('@runtime/data/chatapp.json'), json_encode($data));
 }

示例#30

0

显示文件

文件： index.php 项目： Lisss13/prog

<?php

include "vendor/autoload.php";
use PHPHtmlParser\Dom;
$head = "<!DOCTYPE html>\n <html><head>\n<meta charset='utf-8'>\n</head><body><table>";
file_put_contents("index.html", $head);
for ($i = 1; $i <= 2; $i++) {
    $url = "http://www.emls.ru/flats/page{$i}.html?query=s/1/place/address/reg/2/dept/2/sort1/1/dir1/2/sort2/3/dir2/1/interval/3";
    $dom = new Dom();
    $dom->loadFromUrl($url);
    $trs = $dom->find("table.html_table_1 tr");
    foreach ($trs as $tr) {
        file_put_contents("index.html", "<tr>", FILE_APPEND);
        $tds = $tr->find("td");
        foreach ($tds as $td) {
            file_put_contents("index.html", "<td>" . $td->text() . "</td>", FILE_APPEND);
        }
        file_put_contents("index.html", "</tr>", FILE_APPEND);
    }
}
$footer = "</table></body></html>";
file_put_contents("index.html", $footer, FILE_APPEND);

public find ( string $selector, integer $nth = null ) : array
$selector	string
$nth	integer
return	array

PHP PHPHtmlParser Dom::find示例

find() public method