/** * Gets departures from the given station starting at the given time. * * @param int $stationID * @param Carbon $time * @return array * @throws ApiException */ public static function getDepartures(int $stationID, Carbon $time, int $maxJourneys = 10) { // prepare parameters for our request $query = ['input' => $stationID, 'boardType' => 'dep', 'time' => $time->format('H:i'), 'date' => $time->format('d.m.y'), 'maxJourneys' => $maxJourneys, 'start' => 'yes']; // send it to the bvg mobile site $response = \Requests::get(self::getApiEndpoint() . '?' . http_build_query($query)); if ($response->status_code == 200) { // our results array $departures = []; // prepare document $dom = new Dom(); $dom->load($response->body); // get date from API $date = $dom->find('#ivu_overview_input'); $date = trim(substr($date->text, strpos($date->text, ':') + 1)); $date = Carbon::createFromFormat('d.m.y', $date, 'Europe/Berlin'); // get table data without the first line (header) $rows = $dom->find('.ivu_result_box .ivu_table tbody tr'); // loop through each departure in the table foreach ($rows as $row) { // get columns $columns = $row->find('td'); // explode time into two parts $time = explode(':', strip_tags($columns[0])); // push the departure onto our results array $departures[] = ['time' => $date->copy()->hour($time[0])->minute($time[1])->second(0), 'line' => trim(strip_tags($columns[1]->find('a')[0])), 'direction' => trim(strip_tags($columns[2]))]; } // return results return $departures; } else { throw new ApiException('Failed getting station data from BVG API'); } }
public function testConfigLocalOverride() { $dom = new Dom(); $dom->setOptions(['whitespaceTextNode' => false]); $dom->load('<div><p id="hey">Hey you</p> <p id="ya">Ya you!</p></div>', ['whitespaceTextNode' => true]); $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); }
public function addPhoto($session, File $file, $number) { $path = $session . '.html'; $update = $this->client->api('repo')->contents()->exists($this->user, $this->repo, $path, $this->branch); if ($update) { $set = new \PHPHtmlParser\Dom(); $set->load($this->client->api('repo')->contents()->download($this->user, $this->repo, $path, $this->branch)); $info = $this->client->api('repo')->contents()->show($this->user, $this->repo, $path, $this->branch); } else { $set = new \PHPHtmlParser\Dom(); $set->loadFromFile($this->page); } $div = $set->find('#photos')[0]; $photo = new \PHPHtmlParser\Dom(); $photo->loadFromFile($this->photo); $img = $photo->find('img')[0]; $img->setAttribute('src', $this->getPhotoFilename($file)); $div->addChild($photo->root); $images = $div->find('img'); $count = count($images); if ($count <= 4) { $meta = new Dom(); $meta->load('<meta name="twitter:image' . --$count . '" content="' . $this->web . $this->getPhotoFilename($file, 'th') . '">'); $set->find('head', 0)->addChild($meta->root); } $content = \Mihaeu\HtmlFormatter::format((string) $set); $content = preg_replace("#\n\\s*\n#", "\n", $content); if ($update) { $response = $this->client->api('repo')->contents()->update($this->user, $this->repo, $path, $content, 'Adding Photo ' . PHP_EOL . $file, $info['sha'], $this->branch); } else { $response = $this->client->api('repo')->contents()->create($this->user, $this->repo, $path, $content, 'Adding Page and Photo ' . PHP_EOL . $file, $this->branch); $this->addSession($session, $number, $file); } }
/** * setOpenGraph * * @param string $context * @param object $article * * @return void */ public static function setOpenGraph($context, $article) { $es = \Ezset::getInstance(); $input = \JFactory::getApplication()->input; $view = $input->get('view'); if (empty($article->id)) { return; } if (!$es->params->get('ogGetInnerPageImage', 1)) { return; } if ('article' == $view) { $images = new \JRegistry($article->images); $ignoreFirst = false; $imgs = array(); $img = $images->get('image_fulltext', $images->get('image_intro')); if ($img) { $imgs[] = $img; } if ($imgs) { $ignoreFirst = true; } $dom = new Dom(); // If first image = main image, delete this paragraph. $dom->load($article->text); $images = $dom->find('img'); foreach ($images as $image) { if ($ignoreFirst) { continue; } $imgs[] = $image->src; } if (!$imgs && isset($article->catid)) { $cat = \JTable::getInstance('category'); $cat->load($article->catid); $cat->params = new \JRegistry($cat->params); $imgs[] = $cat->params->get('image'); } if (!$imgs && !$es->params->get('ogDefaultImageOnlyFrontPage', 1)) { $imgs[] = UriHelper::pathAddHost($es->params->get('ogDefaultImage')); } $es->data->ogImages = $imgs; } elseif ('category' == $view) { if (static::$once) { $cat = \JTable::getInstance('category'); $cat->load($input->get('id')); $cat->params = new \JRegistry($cat->params); $img = $cat->params->get('image'); if ($img) { $es->ogImage = $img; } elseif (!$es->params->get('ogDefaultImageOnlyFrontPage', 1)) { $es->ogImage = $es->params->get('ogDefaultImage'); } $es->ogImage = UriHelper::pathAddHost($es->data->ogImage); } static::$once = 0; } }
/** * Constructor * * @param string $url * @throws \InvalidArgumentException */ public function __construct($url) { $html = $this->_loadUrl($url); if ($html !== null) { $dom = new Dom(); $dom->load($html); $this->data = $this->_extractData($dom); } else { throw new \InvalidArgumentException('This recipe does not exists.'); } }
protected function addAdditionalInfo(&$result) { $dom = new Dom(); $dom->load($result['href']); $additionalInfoTag = $dom->find('#textContent p'); $result['additionalText'] = $additionalInfoTag->text; $photoTags = $dom->find('.img-item img'); $photos = []; foreach ($photoTags as $photoTag) { $photos[] = $photoTag->getAttribute('src'); } $result['photos'] = $photos; }
public function testConfigStrictMissingAttribute() { $dom = new Dom(); $dom->setOptions(['strict' => true]); try { // should throw an exception $dom->load('<div><p id="hey" block>Hey you</p> <p id="ya">Ya you!</p></div>'); // we should not get here $this->assertTrue(false); } catch (StrictException $e) { $this->assertEquals("Tag 'p' has an attribute 'block' with out a value! (character #22)", $e->getMessage()); } }
/** * saveFirstImage * * @param string $context * @param \JTable $article * * @return void */ public static function saveFirstImage($context, $article) { if (!property_exists($article, 'images') && $context != 'com_content.article') { return; } $image = new \JRegistry($article->images); $dom = new Dom(); $dom->load($article->introtext . $article->fulltext); $imgs = $dom->find('img'); $imageSrc = null; if ($imgs->count()) { $imageSrc = $imgs[0]->src; } $image->set('image_intro', $imageSrc); $article->images = $image->toString(); }
public function getAlbumCoverURL(string $songPath) : string { $songReader = new \SongReader($songPath); $client = new Client(['base_uri' => 'http://www.slothradio.com/', 'timeout' => 2.0]); $response = $client->request('GET', 'covers/', ['query' => ['artist' => $songReader->getAuthor(), 'album' => $songReader->getAlbum()]]); $html = $response->getBody()->getContents(); $dom = new Dom(); $dom->load($html); $images = $dom->find('#content > div.album0 > img'); if (count($images) > 0) { /** @var Dom\HtmlNode $image */ $image = $images[0]; $albumURL = $image->getAttribute('src'); } else { $albumURL = ''; } return $albumURL; }
public function scan() { $crawlerResult = new Result(array()); foreach ($this->urls as $url) { $dom = new Dom(); $dom->load($url); $aTags = $dom->find("a"); foreach ($aTags as $a) { $href = $a->href; if (0 !== strpos($href, 'http')) { // It starts with 'http' continue; } $crawlerResult->result["urls"][] = $a->href; } } $crawlerResult->result["urls"] = array_values(array_unique($crawlerResult->result["urls"])); return $crawlerResult; }
public function run(Page $page) { // Parse the HTML $dom = new Dom(); $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true)); $dom->load($page->body()); // Format foreach ($dom->find($this->selector) as $node) { // Format the node $formattedNode = $this->format($node); // Remove all children foreach ($node->find('*') as $child) { $child->delete(); } // Add the new node $node->addChild($formattedNode); } // Set and return return $page->withBody($dom->root->outerHtml()); }
/** * @param string $url * @param string $userAgent * * @return string $redurectUrl */ private function getRedirectUrl($url, $userAgent) { $curlInfo = $this->getCurlInfo($url, $userAgent); $redurectUrl = $this->removeQueryString(@$curlInfo['redirect_url']); if (trim($url, '/') === trim($redurectUrl, '/')) { $redurectUrl = ''; } // look for meta http-equiv="refresh" if (!$redurectUrl) { $dom = new Dom(); $dom->load($url); $metaTags = $dom->find('meta'); foreach ($metaTags as $meta) { if ($meta->getAttribute('http-equiv') === 'refresh') { $redurectUrl = preg_replace('/\\s*\\d+\\s*;\\s*url\\s*=\\s*(\'|\\")(.+)(\'|\\")/i', '$2', $meta->getAttribute('content')); break; } } } return $redurectUrl; }
/** * Attempts to get the URL to a given profiles * photo. This method will return the URL or will return * boolean false if the profile photo could not be scraped. * * @param string $profileURL The URL to the profile * * @return string|boolean */ public function profilePhoto($profileURL) { $contents = @file_get_contents($profileURL . '/posts'); if (!$contents) { return false; } $position = stripos($contents, 'dkb photo'); /* Chop off all of the string before this position */ $contents = substr($contents, $position); $position = stripos($contents, '<img'); $contents = substr($contents, $position); $position = stripos($contents, '>'); /* Chop off everything after the position */ $contents = substr($contents, 0, $position + 1); $dom = new Dom(); $dom->load($contents); $img = $dom->find('img', 0); $src = $img->getAttribute('src'); if (substr($src, 0, 2) == '//') { $src = 'https:' . $src; } return $src; }
public function page($path, array $context = array()) { $path = array_filter(explode('/', $path), 'strlen'); $pathname = implode('.', $path); $file = sprintf($this->path, $pathname); if (!file_exists($file)) { throw new PageNotFoundException($path); } // Parse the HTML $dom = new Dom(); $dom->setOptions(array('removeScripts' => false, 'removeStyles' => false, 'preserveLineBreaks' => true)); extract($context); ob_start(); include $file; $dom->load(ob_get_clean()); // Create a new page $page = new Page($path); // Title if (($title = $dom->find('title', 0)) !== null) { $page->withTitle($title->text()); } // Header if (($head = $dom->find('head', 0)) !== null) { foreach ($head->getChildren() as $child) { if ($child->getTag()->name() !== 'title' && !($child->getTag()->name() === 'meta' && $child->getAttribute('charset') !== null)) { $page->withHeader($page->header() . $child->outerHtml()); } } } // Body if (($body = $dom->find('body', 0)) !== null) { $page->withBody($body->innerHtml()); } // Return page return $page; }
/** * 得到页面上所有的图片地址,已经存过的图片地址不会被重新储存。 * @param int $page 第几页 * @return array 图片地址 */ protected function getRawImgsrcs($page) { // 准备DOM $html = $this->getHtml($page); $dom = new Dom(); $dom->load($html); $imgsrcs = []; // 将所有的img加入数组 foreach ($dom->find('img') as $img) { $imgsrcs[] = $img->getAttribute('src'); } // 侦测所有iframe foreach ($dom->find('iframe') as $iframe) { $id = $iframe->getAttribute('id'); if (!strstr($id, 'photoset')) { continue; } $src = $iframe->getAttribute('src'); $imgHtml = $this->requestHtml($src); $imgDom = new Dom(); $imgDom->load($imgHtml); foreach ($imgDom->find('img') as $img) { $imgsrcs[] = $img->getAttribute('src'); } } return $imgsrcs; }
/** * This command echoes what you have entered as the message. * * @param int $start * * @internal param string $message the message to be echoed. */ public function actionIndex($start = 111111111111) { $lastID = SavedVariable::findOne('lastParseID'); if (!isset($lastID)) { $lastID = new SavedVariable(); $lastID->name = 'lastParseID'; $lastID->value = (string) (int) $start; $lastID->save(); } $ii = max(111111111111, (int) $lastID->value); while ($ii <= 999999999999) { for ($i = $ii; $i <= 999999999999; $i++) { $lastID->value = (string) $i; $lastID->save(); $barcode .= (10 - array_sum(ArrayHelperAdvanced::array_walk_r(str_split($barcode = (string) $i), function (&$v, $k) { $v *= $k % 2 * 2 + 1; })) % 10) % 10; $dom = new Dom(); try { $string = file_get_contents('http://www.barcode-list.ru/barcode/RU/Поиск.htm?barcode=' . $barcode); } catch (\Exception $e) { break; } $dom->load($string); /** @var HtmlNode $table */ $tables = $dom->find('.randomBarcodes'); if (count($tables) === 0) { echo 'not found ' . $i . ' of ' . $barcode . "\n"; continue; } $table = $tables[0]; $table->countChildren(); echo '$table->countChildren() = ' . $table->countChildren() . "\n"; // "click here" echo '$table->getTag()->name() = ' . $table->getTag()->name() . "\n"; // "click here" /** @var HtmlNode $tr */ /** @noinspection LoopWhichDoesNotLoopInspection */ foreach ($table->find('tr') as $tr) { echo ' $tr->getTag()->name() = ' . $tr->getTag()->name() . "\n"; $tds = $tr->find('td'); if (count($tds) === 0) { continue; } $td = $tds[2]; /** @var HtmlNode $td */ echo ' $td->getTag()->name() = ' . $td->getTag()->name() . "\n"; echo ' $td->text = ' . $td->text . "\n"; (new ParsedProduct(['barcode' => $i, 'name' => $td->text]))->save(false); break; } } $ii = $i; echo 'break ' . $i . "\n"; $startTimestamp = time(); echo date('Y.m.d h:i:s', $startTimestamp) . "\n"; do { $currentTimestamp = time(); } while ($currentTimestamp - $startTimestamp < 10); echo date('Y.m.d h:i:s', $currentTimestamp) . "\n"; } //while ($ii <= 999999999999) { echo $ii . "\n"; }
protected function requestDom($url) { $html = $this->requestHtml($url); $dom = new Dom(); $dom->load($html); $this->doms[$url] = $dom; return $dom; }
<?php include_once "../vendor/autoload.php"; $basUrl = 'http://www.baidu.com/s?wd='; $communityName = '毕加索小镇'; $queryUrl = $basUrl . $communityName; use PHPHtmlParser\Dom; $dom = new Dom(); $dom->load($queryUrl); $html = $dom->outerHtml; echo $html;
/** * Creates a new dom object and calls load() on the * new object. * * @param string $str * @return $this */ public static function load($str) { $dom = new Dom(); self::$dom = $dom; return $dom->load($str); }
public function getDescription() { $intro = $this->dom->getElementById('intro'); $dom = new Dom(); $dom->load($intro); $p = $dom->getElementsByTag('p'); if (count($p) > 1) { return htmlentities($p[1]); } return htmlentities($p); }
/** * @param null $filter * @return File[] */ public function getFiles($filter = null) { $files = []; foreach ($this->getDirectories() as $path) { $result = $this->getHttpClient()->get('http://' . $this->ip . ':' . $this->web . '/DCIM/' . $path); $dom = new Dom(); $dom->load($result->getBody()->getContents()); foreach ($dom->find('tbody a') as $link) { $file = new File($path, $link->getAttribute('href')); if ($filter and $filter !== $file->getType()) { continue; } $files[] = $file; } } return $files; }
$mail->addAddress($email); if (@$send_to && is_array($send_to)) { foreach ($send_to as $address) { $mail->addAddress($address); } } // Set the subject line $mail->Subject = 'Newsletter Tester [' . md5(time()) . ']'; // Read an HTML message body from an external file, convert referenced images to embedded, // convert HTML into a basic plain-text alternative body if (@$content) { $newslettersPath = 'newsletters/' . $content . '/'; $contentHtml = file_get_contents($newslettersPath . 'index.html'); $contentHtmlOriginal = $contentHtml; $dom = new Dom(); $dom->load($contentHtml); $images = $dom->find('img'); foreach ($images as $image) { $imageSrc = $image->getAttribute('src'); $mail->AddEmbeddedImage($newslettersPath . $imageSrc, slugify($imageSrc)); $contentHtml = str_replace($imageSrc, 'cid:' . slugify($imageSrc), $contentHtml); $contentHtmlOriginal = str_replace($imageSrc, $newslettersPath . $imageSrc, $contentHtmlOriginal); } $mail->msgHTML($contentHtml, dirname(__FILE__)); $output['content'] = $contentHtmlOriginal; } // Replace the plain text body with one created manually $mail->AltBody = 'Test with Newsletter Tester tool by Syaiful Shah Zinan'; // send the message, check for errors if (!$mail->send()) { $output['status'] = 'error';
private function handle($count, $option) { $key = "tweet.{$option}.{$count}"; $tag = "tweet"; if (Cache::tags($tag)->has($key)) { return response(Cache::tags($tag)->get($key))->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*'); } $rep = file_get_contents("https://t.kcwiki.moe/?json=1&count={$count}"); if ($rep) { $result = json_decode($rep, true); $posts = $result['posts']; $output = []; foreach ($posts as $post) { $dom = new Dom(); $dom->load($post['content']); $new_post = []; if (array_key_exists('ozh_ta_id', $post['custom_fields']) && is_array($post['custom_fields']['ozh_ta_id'])) { $new_post['id'] = $post['custom_fields']['ozh_ta_id'][0]; } else { $new_post['id'] = ''; } $img = $dom->find('img'); if (count($img) > 0 && $option != 'html') { $new_post['img'] = $img[0]->getAttribute('src'); foreach ($img as $x) { $parent = $x->getParent(); $parentTagName = $parent->getTag()->name(); if ($parentTagName == 'a') { $parent->delete(); } else { $x->delete(); } } } else { if ($option != 'html') { $new_post['img'] = ''; } } $p = $dom->find('p, div'); $new_post['jp'] = ''; $new_post['zh'] = ''; $n = $this->detect($p); for ($i = 0; $i <= $n; $i++) { $new_post['jp'] .= $p[$i]->innerHtml; } for ($i = $n + 1; $i < count($p); $i++) { $new_post['zh'] .= $p[$i]->innerHtml; } $new_post['date'] = $post['date']; if ($option == 'plain') { $new_post['zh'] = strip_tags($this->expandUrl($new_post['zh'])); $new_post['jp'] = strip_tags($this->expandUrl($new_post['jp'])); } array_push($output, $new_post); } Cache::tags($tag)->put($key, $output, 5); return response($output)->header('Content-Type', 'application/json')->header('Access-Control-Allow-Origin', '*'); } else { return response()->json(['result' => 'error', 'reason' => 'Getting tweets failed.']); } }
public function testChangeContent() { $dom = new Dom(); $dom->load('<div class="all"><p>Hey bro, <a href="google.com" id="78">click here</a></div><br />'); $links = $dom->find('a'); $links[0]->setInnerHtml('gogogo'); $this->assertEquals('<p>Hey bro, <a href="google.com" id="78">gogogo</a></p>', $dom->getElementsByTag('p')[0]->outerHtml); }
public function testEnforceEncoding() { $dom = new Dom(); $dom->load('tests/files/horrible.html', ['enforceEncoding' => 'UTF-8']); $this->assertNotEquals('<input type="submit" tabindex="0" name="submit" value="Информации" />', $dom->find('table input', 1)->outerHtml); }
/** * @return Dom * @throws LoginException Thrown if access to the wallpaper was denied. * @throws NotFoundException Thrown if the wallpaper was not found. */ private function getDom() { if ($this->cacheEnabled && $this->dom !== null) { return $this->dom; } try { $response = $this->client->get(Wallhaven::URL_WALLPAPER . '/' . $this->id)->getBody()->getContents(); } catch (RequestException $e) { $code = $e->getCode(); if ($code == 403) { throw new LoginException("Access to wallpaper is forbidden."); } else { if ($code == 404) { throw new NotFoundException("Wallpaper not found."); } else { throw $e; } } } $dom = new Dom(); $dom->load($response); $this->dom = $this->cacheEnabled ? $dom : null; return $dom; }
public function testMultipleSingleQuotes() { $dom = new Dom(); $dom->load("<a title='Ain't this the best' href=\"http://www.example.com\">Hello</a>"); $this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title); }
public function testScriptCleanerScriptTag() { $dom = new Dom(); $dom->load(' <p>.....</p> <script> Some code ... document.write("<script src=\'some script\'><\\/script>") Some code ... </script> <p>....</p>'); $this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml); }
//require_once('simple_html_dom.php'); require_once 'vendor/autoload.php'; use PHPHtmlParser\Dom; $url = "http://tini.maiige.hu"; if (isset($_GET['date'])) { $date = $_GET['date']; $url .= "/?date=" . $date; } /*$html=file_get_html($url); $page_title=$html->find('title',0)->plaintext; $text_title=$html->find('span.cimsor_cim',0)->plaintext; $textdate=$html->find('span.cimsor_datum',0)->plaintext; $text=$html->find('div.gondolatok_box',0)->find('p'); $andnow=$html->find('div.esmost_box',0)->find('p');*/ $dom = new Dom(); $dom->load($url); $page_title = $dom->find('title')[0]->innerHtml; $text_title = $dom->find('span.cimsor_cim')[0]->innerHtml; $text_date = $dom->find('span.cimsor_datum')[0]->innerHtml; $text = $dom->find('div.gondolatok_box')[0]->find('p'); $andnow = $dom->find('div.esmost_box')[0]->find('p'); ?> <!DOCTYPE html> <html> <head> <title><?php echo $page_title; ?> </title> <!-- TODO: MySQL Query -->
public static function getDom($url) { $dom = new Dom(); $html = self::getHtml($url); $dom->load($html); return $dom; }