/** * Gets departures from the given station starting at the given time. * * @param int $stationID * @param Carbon $time * @return array * @throws ApiException */ public static function getDepartures(int $stationID, Carbon $time, int $maxJourneys = 10) { // prepare parameters for our request $query = ['input' => $stationID, 'boardType' => 'dep', 'time' => $time->format('H:i'), 'date' => $time->format('d.m.y'), 'maxJourneys' => $maxJourneys, 'start' => 'yes']; // send it to the bvg mobile site $response = \Requests::get(self::getApiEndpoint() . '?' . http_build_query($query)); if ($response->status_code == 200) { // our results array $departures = []; // prepare document $dom = new Dom(); $dom->load($response->body); // get date from API $date = $dom->find('#ivu_overview_input'); $date = trim(substr($date->text, strpos($date->text, ':') + 1)); $date = Carbon::createFromFormat('d.m.y', $date, 'Europe/Berlin'); // get table data without the first line (header) $rows = $dom->find('.ivu_result_box .ivu_table tbody tr'); // loop through each departure in the table foreach ($rows as $row) { // get columns $columns = $row->find('td'); // explode time into two parts $time = explode(':', strip_tags($columns[0])); // push the departure onto our results array $departures[] = ['time' => $date->copy()->hour($time[0])->minute($time[1])->second(0), 'line' => trim(strip_tags($columns[1]->find('a')[0])), 'direction' => trim(strip_tags($columns[2]))]; } // return results return $departures; } else { throw new ApiException('Failed getting station data from BVG API'); } }
public function testRemoveScriptsFalse() { $dom = new Dom(); $dom->setOptions(['removeScripts' => false]); $dom->loadFromFile('tests/files/horrible.html'); $this->assertEquals(1, count($dom->find('script'))); $this->assertEquals('text/JavaScript', $dom->find('script')->getAttribute('type')); }
protected function addAdditionalInfo(&$result) { $dom = new Dom(); $dom->load($result['href']); $additionalInfoTag = $dom->find('#textContent p'); $result['additionalText'] = $additionalInfoTag->text; $photoTags = $dom->find('.img-item img'); $photos = []; foreach ($photoTags as $photoTag) { $photos[] = $photoTag->getAttribute('src'); } $result['photos'] = $photos; }
private function getAndParseSubteInfo() { if ($cachedData = $this->isCached()) { $this->lineas = $cachedData; return true; } $dom = new Dom(); $dom->loadFromFile($this->sourceURL); foreach ($this->lineas as $linea => $info) { $lineInfo = $dom->find("#status-line-{$linea}-container")[0]; $lineStatusClass = $lineInfo->getAttribute('class'); if (strpos($lineStatusClass, 'suspendido') !== false) { $this->lineas[$linea]->status = 'CANCELLED'; } if (strpos($lineStatusClass, 'demorado') !== false) { $this->lineas[$linea]->status = 'DELAYED'; } if ($this->isSleepingTime()) { $this->lineas[$linea]->status = 'SLEEPING'; } // get raw status msg $status_msg = $lineInfo->find("#status-line-{$linea}")->text; $this->lineas[$linea]->statusMessage = html_entity_decode($status_msg, ENT_QUOTES, 'ISO-8859-1'); } $this->updateStatusInfo(); $this->cacheLines(); }
/** * Parses the given file and returns a Game object * * @param string $filename * * @return Game */ protected function processFile($filename) { $this->command->out("Processing " . $filename); // Create a game object with home/away teams and other info $game = $this->createGameWithInfo($filename); $dom = new Dom(); $dom->loadFromFile($filename); $lines = []; /** @var AbstractNode $tr */ foreach ($dom->find('tr.evenColor') as $tr) { $lineContent = []; $lineCount = 0; /** @var AbstractNode $td */ foreach ($tr->getChildren() as $td) { $value = $this->cleanUpLine($td->text); if ($value) { $lineCount++; // Each event is actually 6 lines $lineContent[] = $value; if ($lineCount % 6 == 0) { $lines[] = $lineContent; $lineContent = []; } } } } // Add each event line to the game log foreach ($lines as $line) { if ($event = $this->createParsedEvent($line)) { $game->addEvent($event); } } return $game; }
/** * setOpenGraph * * @param string $context * @param object $article * * @return void */ public static function setOpenGraph($context, $article) { $es = \Ezset::getInstance(); $input = \JFactory::getApplication()->input; $view = $input->get('view'); if (empty($article->id)) { return; } if (!$es->params->get('ogGetInnerPageImage', 1)) { return; } if ('article' == $view) { $images = new \JRegistry($article->images); $ignoreFirst = false; $imgs = array(); $img = $images->get('image_fulltext', $images->get('image_intro')); if ($img) { $imgs[] = $img; } if ($imgs) { $ignoreFirst = true; } $dom = new Dom(); // If first image = main image, delete this paragraph. $dom->load($article->text); $images = $dom->find('img'); foreach ($images as $image) { if ($ignoreFirst) { continue; } $imgs[] = $image->src; } if (!$imgs && isset($article->catid)) { $cat = \JTable::getInstance('category'); $cat->load($article->catid); $cat->params = new \JRegistry($cat->params); $imgs[] = $cat->params->get('image'); } if (!$imgs && !$es->params->get('ogDefaultImageOnlyFrontPage', 1)) { $imgs[] = UriHelper::pathAddHost($es->params->get('ogDefaultImage')); } $es->data->ogImages = $imgs; } elseif ('category' == $view) { if (static::$once) { $cat = \JTable::getInstance('category'); $cat->load($input->get('id')); $cat->params = new \JRegistry($cat->params); $img = $cat->params->get('image'); if ($img) { $es->ogImage = $img; } elseif (!$es->params->get('ogDefaultImageOnlyFrontPage', 1)) { $es->ogImage = $es->params->get('ogDefaultImage'); } $es->ogImage = UriHelper::pathAddHost($es->data->ogImage); } static::$once = 0; } }
private function generateHtmlFields($response) { $dom = new Dom(); $dom->loadStr($response, array()); $contents = $dom->find('.caixacampobranco')[0]; if ($contents === null) { return false; } $fields = $contents->find('span'); return $fields; }
public function page($path, array $context = array()) { $path = array_filter(explode('/', $path), 'strlen'); $pathname = implode('.', $path); $file = sprintf($this->path, $pathname); if (!file_exists($file)) { throw new PageNotFoundException($path); } // Parse the HTML $dom = new Dom(); $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true)); extract($context); ob_start(); include $file; $dom->load(ob_get_clean()); // Create a new page $page = new Page($path); // Title if (($title = $dom->find('title', 0)) !== null) { $page->withTitle($title->text()); } // Header if (($head = $dom->find('head', 0)) !== null) { foreach ($head->getChildren() as $child) { if ($child->getTag()->name() !== 'title' && !($child->getTag()->name() === 'meta' && $child->getAttribute('charset') !== null)) { $page->withHeader($page->header() . $child->outerHtml()); } } } // Body if (($body = $dom->find('body', 0)) !== null) { $page->withBody($body->innerHtml()); } // Return page return $page; }
/** * saveFirstImage * * @param string $context * @param \JTable $article * * @return void */ public static function saveFirstImage($context, $article) { if (!property_exists($article, 'images') && $context != 'com_content.article') { return; } $image = new \JRegistry($article->images); $dom = new Dom(); $dom->load($article->introtext . $article->fulltext); $imgs = $dom->find('img'); $imageSrc = null; if ($imgs->count()) { $imageSrc = $imgs[0]->src; } $image->set('image_intro', $imageSrc); $article->images = $image->toString(); }
public function getAlbumCoverURL(string $songPath) : string { $songReader = new \SongReader($songPath); $client = new Client(['base_uri' => 'http://www.slothradio.com/', 'timeout' => 2.0]); $response = $client->request('GET', 'covers/', ['query' => ['artist' => $songReader->getAuthor(), 'album' => $songReader->getAlbum()]]); $html = $response->getBody()->getContents(); $dom = new Dom(); $dom->load($html); $images = $dom->find('#content > div.album0 > img'); if (count($images) > 0) { /** @var Dom\HtmlNode $image */ $image = $images[0]; $albumURL = $image->getAttribute('src'); } else { $albumURL = ''; } return $albumURL; }
/** * @return bool */ protected function setPhone() { $patterns = ['/\\d+-\\d+-\\d+/', '/\\d{10}/', '/\\d{3}\\s+\\d{7}/', '/\\(\\d+\\)\\s?[\\d+-]+/', '/\\d+\\.+\\s?\\d+\\.+\\d+\\.+\\d+\\.+/', '/\\d{3}\\s\\d{3}\\s\\d{4}/', '/\\d+\\s+-\\d+-\\s+\\d+/', '/\\d+--\\d+--\\d+/']; foreach ($patterns as $pattern) { if (preg_match($pattern, $this->description)) { $this->phone = true; return true; } } /* @var \PHPHtmlParser\Dom\AbstractNode[] $contacts */ if ($contacts = self::$dom->find('.metaInfoDisplay', 0)) { foreach ($patterns as $pattern) { if (preg_match($pattern, $contacts)) { $this->phone = true; return true; } } } }
public function scan() { $crawlerResult = new Result(array()); foreach ($this->urls as $url) { $dom = new Dom(); $dom->load($url); $aTags = $dom->find("a"); foreach ($aTags as $a) { $href = $a->href; if (0 !== strpos($href, 'http')) { // It starts with 'http' continue; } $crawlerResult->result["urls"][] = $a->href; } } $crawlerResult->result["urls"] = array_values(array_unique($crawlerResult->result["urls"])); return $crawlerResult; }
public function run(Page $page) { // Parse the HTML $dom = new Dom(); $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true)); $dom->load($page->body()); // Format foreach ($dom->find($this->selector) as $node) { // Format the node $formattedNode = $this->format($node); // Remove all children foreach ($node->find('*') as $child) { $child->delete(); } // Add the new node $node->addChild($formattedNode); } // Set and return return $page->withBody($dom->root->outerHtml()); }
/** * @param string $url * @param string $userAgent * * @return string $redurectUrl */ private function getRedirectUrl($url, $userAgent) { $curlInfo = $this->getCurlInfo($url, $userAgent); $redurectUrl = $this->removeQueryString(@$curlInfo['redirect_url']); if (trim($url, '/') === trim($redurectUrl, '/')) { $redurectUrl = ''; } // look for meta http-equiv="refresh" if (!$redurectUrl) { $dom = new Dom(); $dom->load($url); $metaTags = $dom->find('meta'); foreach ($metaTags as $meta) { if ($meta->getAttribute('http-equiv') === 'refresh') { $redurectUrl = preg_replace('/\\s*\\d+\\s*;\\s*url\\s*=\\s*(\'|\\")(.+)(\'|\\")/i', '$2', $meta->getAttribute('content')); break; } } } return $redurectUrl; }
/** * Attempts to get the URL to a given profiles * photo. This method will return the URL or will return * boolean false if the profile photo could not be scraped. * * @param string $profileURL The URL to the profile * * @return string|boolean */ public function profilePhoto($profileURL) { $contents = @file_get_contents($profileURL . '/posts'); if (!$contents) { return false; } $position = stripos($contents, 'dkb photo'); /* Chop off all of the string before this position */ $contents = substr($contents, $position); $position = stripos($contents, '<img'); $contents = substr($contents, $position); $position = stripos($contents, '>'); /* Chop off everything after the position */ $contents = substr($contents, 0, $position + 1); $dom = new Dom(); $dom->load($contents); $img = $dom->find('img', 0); $src = $img->getAttribute('src'); if (substr($src, 0, 2) == '//') { $src = 'https:' . $src; } return $src; }
/** * Search for wallpapers. * * @param string $query What to search for. Searching for specific tags can be done with #tagname, e.g. * <samp>#cars</samp> * @param int $categories Categories to include. This is a bit field, e.g.: <samp>Category::GENERAL | * Category::PEOPLE</samp> * @param int $purity Purity of wallpapers. This is a bit field, e.g.: <samp>Purity::SFW | * Purity::NSFW</samp> * @param string $sorting Sorting, e.g. <samp>Sorting::RELEVANCE</samp> * @param string $order Order of results. Can be <samp>Order::ASC</samp> or <samp>Order::DESC</samp> * @param string[] $resolutions Array of resolutions in the format of WxH, e.g.: <samp>['1920x1080', * '1280x720']</samp> * @param string[] $ratios Array of ratios in the format of WxH, e.g.: <samp>['16x9', '4x3']</samp> * @param int $page The id of the page to fetch. This is <em>not</em> a total number of pages to * fetch. * * @return WallpaperList Wallpapers. */ public function search($query, $categories = Category::ALL, $purity = Purity::SFW, $sorting = Sorting::RELEVANCE, $order = Order::DESC, $resolutions = [], $ratios = [], $page = 1) { $result = $this->client->get(self::URL_SEARCH, ['query' => ['q' => $query, 'categories' => self::getBinary($categories), 'purity' => self::getBinary($purity), 'sorting' => $sorting, 'order' => $order, 'resolutions' => implode(',', $resolutions), 'ratios' => implode(',', $ratios), 'page' => $page], 'headers' => ['X-Requested-With' => 'XMLHttpRequest']]); $body = $result->getBody()->getContents(); $dom = new Dom(); $dom->load($body); $figures = $dom->find('figure.thumb'); $wallpapers = new WallpaperList(); foreach ($figures as $figure) { $id = preg_split('#' . self::URL_HOME . self::URL_WALLPAPER . '/#', $figure->find('a.preview')->getAttribute('href'))[1]; $classText = $figure->getAttribute('class'); preg_match("/thumb thumb-(sfw|sketchy|nsfw) thumb-(general|anime|people)/", $classText, $classMatches); $purity = constant('Wallhaven\\Purity::' . strtoupper($classMatches[1])); $category = constant('Wallhaven\\Category::' . strtoupper($classMatches[2])); $resolution = str_replace(' ', '', trim($figure->find('span.wall-res')->text)); $favorites = (int) $figure->find('.wall-favs')->text; $w = new Wallpaper($id, $this->client); $w->setProperties(['purity' => $purity, 'category' => $category, 'resolution' => $resolution, 'favorites' => $favorites]); $wallpapers[] = $w; } return $wallpapers; }
public function testChangeContent() { $dom = new Dom(); $dom->load('<div class="all"><p>Hey bro, <a href="google.com" id="78">click here</a></div><br />'); $links = $dom->find('a'); $links[0]->setInnerHtml('gogogo'); $this->assertEquals('<p>Hey bro, <a href="google.com" id="78">gogogo</a></p>', $dom->getElementsByTag('p')[0]->outerHtml); }
/** * 得到页面上所有的图片地址,已经存过的图片地址不会被重新储存。 * @param int $page 第几页 * @return array 图片地址 */ protected function getRawImgsrcs($page) { // 准备DOM $html = $this->getHtml($page); $dom = new Dom(); $dom->load($html); $imgsrcs = []; // 将所有的img加入数组 foreach ($dom->find('img') as $img) { $imgsrcs[] = $img->getAttribute('src'); } // 侦测所有iframe foreach ($dom->find('iframe') as $iframe) { $id = $iframe->getAttribute('id'); if (!strstr($id, 'photoset')) { continue; } $src = $iframe->getAttribute('src'); $imgHtml = $this->requestHtml($src); $imgDom = new Dom(); $imgDom->load($imgHtml); foreach ($imgDom->find('img') as $img) { $imgsrcs[] = $img->getAttribute('src'); } } return $imgsrcs; }
/** * This command echoes what you have entered as the message. * * @param int $start * * @internal param string $message the message to be echoed. */ public function actionIndex($start = 111111111111) { $lastID = SavedVariable::findOne('lastParseID'); if (!isset($lastID)) { $lastID = new SavedVariable(); $lastID->name = 'lastParseID'; $lastID->value = (string) (int) $start; $lastID->save(); } $ii = max(111111111111, (int) $lastID->value); while ($ii <= 999999999999) { for ($i = $ii; $i <= 999999999999; $i++) { $lastID->value = (string) $i; $lastID->save(); $barcode .= (10 - array_sum(ArrayHelperAdvanced::array_walk_r(str_split($barcode = (string) $i), function (&$v, $k) { $v *= $k % 2 * 2 + 1; })) % 10) % 10; $dom = new Dom(); try { $string = file_get_contents('http://www.barcode-list.ru/barcode/RU/Поиск.htm?barcode=' . $barcode); } catch (\Exception $e) { break; } $dom->load($string); /** @var HtmlNode $table */ $tables = $dom->find('.randomBarcodes'); if (count($tables) === 0) { echo 'not found ' . $i . ' of ' . $barcode . "\n"; continue; } $table = $tables[0]; $table->countChildren(); echo '$table->countChildren() = ' . $table->countChildren() . "\n"; // "click here" echo '$table->getTag()->name() = ' . $table->getTag()->name() . "\n"; // "click here" /** @var HtmlNode $tr */ /** @noinspection LoopWhichDoesNotLoopInspection */ foreach ($table->find('tr') as $tr) { echo ' $tr->getTag()->name() = ' . $tr->getTag()->name() . "\n"; $tds = $tr->find('td'); if (count($tds) === 0) { continue; } $td = $tds[2]; /** @var HtmlNode $td */ echo ' $td->getTag()->name() = ' . $td->getTag()->name() . "\n"; echo ' $td->text = ' . $td->text . "\n"; (new ParsedProduct(['barcode' => $i, 'name' => $td->text]))->save(false); break; } } $ii = $i; echo 'break ' . $i . "\n"; $startTimestamp = time(); echo date('Y.m.d h:i:s', $startTimestamp) . "\n"; do { $currentTimestamp = time(); } while ($currentTimestamp - $startTimestamp < 10); echo date('Y.m.d h:i:s', $currentTimestamp) . "\n"; } //while ($ii <= 999999999999) { echo $ii . "\n"; }
protected function sniffVideoOfDom(Dom $dom) { foreach ($dom->find('iframe') as $videoFrame) { // 获得iframe的地址 $videoFrameSrc = $videoFrame->getAttribute('src'); // 如果iframe的地址不存在,或者不含有特定字符串(用来判别是否是视频),废弃 if (empty($videoFrameSrc)) { continue; } if (!strstr($videoFrameSrc, 'https://www.tumblr.com/video/')) { continue; } try { // 如果已经检测了此视频,返回 $parentId = $videoFrame->getParent()->getAttribute('id'); $name = explode('_', $parentId)[3] . '.mp4'; if (TFFile::where('name', '=', $name)->count() != 0) { continue; } // 准备video的DOM $videoDom = $this->requestDom($videoFrameSrc); // 获取对应节点 $videoElem = $videoDom->find('video')[0]; $videoSource = $videoElem->find('source')[0]; } catch (\Exception $e) { $error = 'TFSniffer: Failed to parse video element, url is %s, msg is %s.'; $error = sprintf($error, $videoFrameSrc, $e->getMessage()); Log::error($error); continue; // pass this } // 开始读取数据 $video = new TFFile(); $video->type = 'video'; $video->state = 'sniffed'; $video->error = ''; $video->name = $name; $video->source = $videoSource->getAttribute('src'); $video->save(); } }
use PHPHtmlParser\Dom; ?> <!doctype html> <html lang="ru"> <head> <meta charset="UTF-8"> <title>Таблица</title> </head> <body> <table> <?php $dom = new Dom(); $dom->loadFromFile('index.html'); for ($i = 0; $i < 100; $i++) { $contents = $dom->find("tr td")[$i]; if ($contents->text == "Адрес") { $i += 5; continue; } if (preg_match("|^[\\d]+\$|", $contents->text)) { echo "<tr><td>" . $contents->text . "</td></tr>"; } echo "<br/>"; echo $contents->text; } ?> </table> </body> </html>
public function testEnforceEncoding() { $dom = new Dom(); $dom->load('tests/files/horrible.html', ['enforceEncoding' => 'UTF-8']); $this->assertNotEquals('<input type="submit" tabindex="0" name="submit" value="Информации" />', $dom->find('table input', 1)->outerHtml); }
/** * Imports a specific news entry * @param PHPHtmlParser\Dom $html * @return boolean|null */ private function importNewsEntry($html, $origin) { $dom = new Dom(); $uri = $html->getAttribute('href'); $uid = str_replace('/galnet/uid/', '', $uri); $count = (new \yii\db\Query())->from('news')->where(['uid' => $uid])->count(); if ((int) $count != 0) { $this->stdOut(" - {$uid} :: Already Imported...\n"); return; } $dom->loadFromUrl(Yii::$app->params['galnet']['url'] . $uri); $title = trim(strip_tags($dom->find('h3.galnetNewsArticleTitle a')[0]->innerHtml)); $content = trim(strip_tags(str_replace('<br /><br /> ', "\n", $dom->find('div.article p')[0]->innerHtml))); // Early Galnet posts are empty, so grab the first line from the article if (empty($title)) { $title = strtok($content, "\n"); } $news = new News(); $news->attributes = ['uid' => $uid, 'title' => $title, 'content' => $content, 'created_at' => time(), 'updated_at' => time(), 'published_at_native' => strtotime($origin), 'published_at' => strtotime($origin . "-1286 years")]; $this->stdOut(" - {$uid}\n"); $news->save(); }
/* * YLE Areena video crawler * Find video links from a episode listing page and download them using yle-dl. * http://aajanki.github.io/yle-dl/ * Run as a cron job to download episodes. * 0 1 * * * php -f /home/john/.cronscripts/yle_episode_downloader.php >> /home/john/.cronscripts/download.log * */ require __DIR__ . '/vendor/autoload.php'; use PHPHtmlParser\Dom; $page_URL = "http://areena.yle.fi/1-2540138"; $saved_videos_folder = "/home/john/Videos/YLE/Yle_uutiset/"; $dom = new Dom(); $dom->loadFromUrl($page_URL); // Find the dom element with videos and loop them through $newslist = $dom->find('ul.program-list li'); if (count($newslist) >= 1) { foreach ($newslist as $news) { // Get video ID $data_item_id = $news->getAttribute('data-item-id'); /* <time itemprop="startDate" datetime="2015-07-10T20:30:00.000+03:00"> */ $timestamp_dom = $news->find('time[itemprop=startDate]'); $timestamp = $timestamp_dom->getAttribute('datetime'); $weekday = "_" . date('l', strtotime($timestamp)); $pubDate = strftime("%Y-%m-%d_klo_%H.%M", strtotime($timestamp)); $filename = "Yle_uutiset_" . $pubDate . $weekday . ".flv"; $url = "http://areena.yle.fi/" . $data_item_id; if (!file_exists($saved_videos_folder . $filename)) { echo "\nDownloading video: " . $filename . "\n"; $current_folder = getcwd(); chdir($saved_videos_folder);
/** * @param null $filter * @return File[] */ public function getFiles($filter = null) { $files = []; foreach ($this->getDirectories() as $path) { $result = $this->getHttpClient()->get('http://' . $this->ip . ':' . $this->web . '/DCIM/' . $path); $dom = new Dom(); $dom->load($result->getBody()->getContents()); foreach ($dom->find('tbody a') as $link) { $file = new File($path, $link->getAttribute('href')); if ($filter and $filter !== $file->getType()) { continue; } $files[] = $file; } } return $files; }
require_once 'vendor/autoload.php'; use PHPHtmlParser\Dom; $url = "http://tini.maiige.hu"; if (isset($_GET['date'])) { $date = $_GET['date']; $url .= "/?date=" . $date; } /*$html=file_get_html($url); $page_title=$html->find('title',0)->plaintext; $text_title=$html->find('span.cimsor_cim',0)->plaintext; $textdate=$html->find('span.cimsor_datum',0)->plaintext; $text=$html->find('div.gondolatok_box',0)->find('p'); $andnow=$html->find('div.esmost_box',0)->find('p');*/ $dom = new Dom(); $dom->load($url); $page_title = $dom->find('title')[0]->innerHtml; $text_title = $dom->find('span.cimsor_cim')[0]->innerHtml; $text_date = $dom->find('span.cimsor_datum')[0]->innerHtml; $text = $dom->find('div.gondolatok_box')[0]->find('p'); $andnow = $dom->find('div.esmost_box')[0]->find('p'); ?> <!DOCTYPE html> <html> <head> <title><?php echo $page_title; ?> </title> <!-- TODO: MySQL Query --> <script type="text/javascript" src="js/jquery.min.js"></script>
private function handle($count, $option) { $key = "tweet.{$option}.{$count}"; $tag = "tweet"; if (Cache::tags($tag)->has($key)) { return response(Cache::tags($tag)->get($key))->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*'); } $rep = file_get_contents("https://t.kcwiki.moe/?json=1&count={$count}"); if ($rep) { $result = json_decode($rep, true); $posts = $result['posts']; $output = []; foreach ($posts as $post) { $dom = new Dom(); $dom->load($post['content']); $new_post = []; if (array_key_exists('ozh_ta_id', $post['custom_fields']) && is_array($post['custom_fields']['ozh_ta_id'])) { $new_post['id'] = $post['custom_fields']['ozh_ta_id'][0]; } else { $new_post['id'] = ''; } $img = $dom->find('img'); if (count($img) > 0 && $option != 'html') { $new_post['img'] = $img[0]->getAttribute('src'); foreach ($img as $x) { $parent = $x->getParent(); $parentTagName = $parent->getTag()->name(); if ($parentTagName == 'a') { $parent->delete(); } else { $x->delete(); } } } else { if ($option != 'html') { $new_post['img'] = ''; } } $p = $dom->find('p, div'); $new_post['jp'] = ''; $new_post['zh'] = ''; $n = $this->detect($p); for ($i = 0; $i <= $n; $i++) { $new_post['jp'] .= $p[$i]->innerHtml; } for ($i = $n + 1; $i < count($p); $i++) { $new_post['zh'] .= $p[$i]->innerHtml; } $new_post['date'] = $post['date']; if ($option == 'plain') { $new_post['zh'] = strip_tags($this->expandUrl($new_post['zh'])); $new_post['jp'] = strip_tags($this->expandUrl($new_post['jp'])); } array_push($output, $new_post); } Cache::tags($tag)->put($key, $output, 5); return response($output)->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*'); } else { return response()->json(['result' => 'error', 'reason' => 'Getting tweets failed.']); } }
if (@$send_to && is_array($send_to)) { foreach ($send_to as $address) { $mail->addAddress($address); } } // Set the subject line $mail->Subject = 'Newsletter Tester [' . md5(time()) . ']'; // Read an HTML message body from an external file, convert referenced images to embedded, // convert HTML into a basic plain-text alternative body if (@$content) { $newslettersPath = 'newsletters/' . $content . '/'; $contentHtml = file_get_contents($newslettersPath . 'index.html'); $contentHtmlOriginal = $contentHtml; $dom = new Dom(); $dom->load($contentHtml); $images = $dom->find('img'); foreach ($images as $image) { $imageSrc = $image->getAttribute('src'); $mail->AddEmbeddedImage($newslettersPath . $imageSrc, slugify($imageSrc)); $contentHtml = str_replace($imageSrc, 'cid:' . slugify($imageSrc), $contentHtml); $contentHtmlOriginal = str_replace($imageSrc, $newslettersPath . $imageSrc, $contentHtmlOriginal); } $mail->msgHTML($contentHtml, dirname(__FILE__)); $output['content'] = $contentHtmlOriginal; } // Replace the plain text body with one created manually $mail->AltBody = 'Test with Newsletter Tester tool by Syaiful Shah Zinan'; // send the message, check for errors if (!$mail->send()) { $output['status'] = 'error'; $output['message'] = $mail->ErrorInfo;
private function collectSites() { $old_data = json_decode(file_get_contents(Yii::getAlias('@runtime/data/chatapp.json')), true); $dom = new Dom(); $dom->loadFromFile(Yii::getAlias('@runtime/sites.html')); $clinks = $blinks = []; $exclude = array_key_exists('exclude', $old_data) ? $old_data['exclude'] : ['auburn.craigslist.org', 'bham.craigslist.org', 'dothan.craigslist.org', 'shoals.craigslist.org', 'gadsden.craigslist.org', 'huntsville.craigslist.org']; /* @var Dom\AbstractNode $link */ foreach ($dom->find('a') as $link) { $href = $link->getAttribute('href'); if (0 !== strpos($href, '//')) { continue; } $href = str_replace('/', '', $href); if (in_array($href, $exclude, true)) { continue; } $clinks[] = $href; } shuffle($clinks); $dom->loadFromFile(Yii::getAlias('@runtime/backpage.html')); /* @var Dom\AbstractNode $link */ foreach ($dom->find('a') as $link) { $href = $link->getAttribute('href'); $href = str_replace(['http:', '/'], '', $href); if (in_array($href, $exclude, true)) { continue; } $blinks[] = $href; } shuffle($blinks); $data = ['total_count' => (int) $old_data['total_count'], 'current_site' => '', 'sites' => array_merge($clinks, $blinks), 'exclude' => $exclude]; file_put_contents(Yii::getAlias('@runtime/data/chatapp.json'), json_encode($data)); }
<?php include "vendor/autoload.php"; use PHPHtmlParser\Dom; $head = "<!DOCTYPE html>\n <html><head>\n<meta charset='utf-8'>\n</head><body><table>"; file_put_contents("index.html", $head); for ($i = 1; $i <= 2; $i++) { $url = "http://www.emls.ru/flats/page{$i}.html?query=s/1/place/address/reg/2/dept/2/sort1/1/dir1/2/sort2/3/dir2/1/interval/3"; $dom = new Dom(); $dom->loadFromUrl($url); $trs = $dom->find("table.html_table_1 tr"); foreach ($trs as $tr) { file_put_contents("index.html", "<tr>", FILE_APPEND); $tds = $tr->find("td"); foreach ($tds as $td) { file_put_contents("index.html", "<td>" . $td->text() . "</td>", FILE_APPEND); } file_put_contents("index.html", "</tr>", FILE_APPEND); } } $footer = "</table></body></html>"; file_put_contents("index.html", $footer, FILE_APPEND);