function SisterSiteData($sister_url) { include_once "library/simple_html_dom.php"; $url = "http://www.cafescribe.com/index.php?option=com_virtuemart&page=shop.product_details&flypage=shop.flypage&isbn13=9780073527093&storeid=670&vmcchk=1"; $html = file_get_dom($url); $ListPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[1]->plaintext; $ListPrice = trim($ListPrice); $YouPayPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[6]->plaintext; $YouPayPrice = trim($YouPayPrice); $Author = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 0)->children[1]->plaintext; $Author = trim($Author); $Edition = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 4)->children[1]->plaintext; $Edition = trim($Edition); $Publisher = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 3)->children[1]->plaintext; $Publisher = trim($Publisher); $ISBN_10_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 6)->children[1]->plaintext; $ISBN_10_Print = trim($ISBN_10_Print); $ISBN_13_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 7)->children[1]->plaintext; $ISBN_13_Print = trim($ISBN_13_Print); $ISBN_10_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 8)->children[1]->plaintext; $ISBN_10_Digital = trim($ISBN_10_Digital); $ISBN_13_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 9)->children[1]->plaintext; $ISBN_13_Digital = trim($ISBN_13_Digital); return "{$Author},{$Edition},{$Publisher},{$ISBN_10_Print},{$ISBN_13_Print},{$ISBN_10_Digital},{$ISBN_13_Digital},{$ListPrice},{$YouPayPrice}"; }
function connect($url) { $opts = array(CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_TIMEOUT => 20, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => 0); $html = file_get_dom($url, $return_root = true, $use_include_path = false, $opts); // retry twice if ($html === false) { $html = file_get_dom($url, $return_root = true, $use_include_path = false, $opts); return $html; } else { return $html; } }
function update() { /* *************************************** * 株価データの取得 * *************************************** */ $dom = file_get_dom(_URL); foreach ($dom->find('span[class=yjFL]') as $node) { $kabuka = $node->innertext; $kabuka = str_replace(",", "", $kabuka); $kabuka = floatval($kabuka); break; } foreach ($dom->find('td[class=yjSt]') as $node) { $date = $node->innertext; $date = substr($date, 20, 5); break; } /* *************************************** * 株価データが終値であるかをチェック。 * さらに終値であれば、データファイルの * 値と重複していないかをチェック。 * もし重複していないデータであれば、そ * のデータをファイルに書き込み更新する * *************************************** */ $d = file_get_contents(_DATA_FILE); $d = split("\n", $d); for ($i = 0; isset($d[$i]); $i++) { $dd[] = split(", ", $d[$i]); } if (!ereg("[0-9][0-9]/[0-9][0-9]", $date)) { // 何もしない } else { if (strcmp($dd[$i - 2][0], $date) == 0) { // Do Nothing } else { $fp = fopen(_DATA_FILE, "a"); fwrite($fp, "" . $date . ", " . $kabuka . "\n"); fclose($fp); $dd[$i - 1][0] = $date; $dd[$i - 1][1] = $kabuka; $i++; } } return $dd; }
public function __construct() { $post; $good = 0; $randId = mt_rand(1, 18514293); $randUrl = "http://www.shackchatty.com/thread/{$randId}.xml"; $dom = file_get_dom($randUrl); foreach ($dom->find('comment') as $element) { if ($element->reply_count >= $good) { $good = $element->reply_count; $post = $element; } } $shackUrl = "http://www.shacknews.com/laryn.x?id=" . $post->id; $body = "_[g{Random Post:}g]_ \n"; $body .= "_[By: y{" . trim($post->author) . "}y s[{$shackUrl}]s]_ \n"; $body .= self::cleanText($post->children(0)->innertext) . "\n"; parent::__construct($body); }
<?php /** * Should output all CNN Headlines * * Demonstrates advanced selectors * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ include_once '../ganon.php'; //PHP4 users, make sure this path is correct! $html = file_get_dom('http://www.cnn.com/'); if (version_compare(PHP_VERSION, '5.3.0') >= 0) { //PHP 5.3.0 and higher foreach ($html('div:has(h4) (li, h4)') as $element) { if ($element->tag === 'h4') { echo '<b>', $element->getPlainText(), '</b>'; } else { echo $element->getPlainText(); } echo "<br>\n"; } } else { //PHP 4 and 5.3.0 and lower foreach ($html->select('div:has(h4) (li, h4)') as $element) { if ($element->tag === 'h4') { echo '<b>', $element->getPlainText(), '</b>'; } else {
- Demonstrates formatter and how to delete nodes - - @author Niels A.D. - @package Ganon - @link http://code.google.com/p/ganon/ - @license http://dev.perl.org/licenses/artistic.html Artistic License --> <html> <h1>Minified HTML:</h1> <?php include_once '../ganon.php'; //PHP4 users, make sure this path is correct! //Only keep everything between body tags, delete the rest. $html = file_get_dom('http://m.nos.nl'); $html->select('"!DOCTYPE"', 0)->delete(); $html->select('head', 0)->delete(); $html->select('body', 0)->detach(true); $html->select('html', 0)->detach(true); //Minified version HTML_Formatter::minify_html($html); echo "{$html}\n"; ?> <h1>Formatted HTML:</h1> <?php //Formatted version $formatter = new HTML_Formatter(array('sort_attributes' => false, 'attributes_case' => CASE_UPPER)); $formatter->format($html);
function MainBookData($url) { include_once "library/simple_html_dom.php"; $Main_Data = ""; $html = file_get_dom($url); $ul = $html->find('div[id=material_results] ul'); // CHeck whether Required Material Exists if ($ul != null) { $total_type_books = count($ul); // Counting type of books for ($j = 0; $j < $total_type_books; $j++) { $all_li = $ul[$j]->find('li'); $total_books = count($all_li); //This will give us Amount of books for ($i = 0; $i < $total_books; $i++) { $BookTitle = $all_li[$i]->find('span[class=wrap]', 0)->plaintext; $BookTitle = htmlspecialchars_decode($BookTitle); $ImageUrl = $all_li[$i]->find('img', 0)->getAttribute("src"); if ($all_li[$i]->find('div[class=field]', 1)->plaintext != "") { $BK_UsedPrice = $all_li[$i]->find('div[class=field]', 1)->find('span[class=emph]', 0)->plaintext; } if ($all_li[$i]->find('div[class=field]', 2)->plaintext != "") { $BK_NewPrice = $all_li[$i]->find('div[class=field]', 2)->find('span[class=emph]', 0)->plaintext; } if ($all_li[$i]->find('div[id=field]', 0)->plaintext != "") { $BK_DigitalPrice = $all_li[$i]->find('div[id=field]', 0)->find('span[class=emph]', 0)->plaintext; } // var_dump($all_li[$i]->find('div[class=field]', 1)->find('span[class=emph]', 0)); $AuthorEdition = $all_li[$i]->find('div[class=detail]', 0)->plaintext; $AuthorEdition = split("Edition", $AuthorEdition); // Data Cleaning for Author and Edition $Author = $AuthorEdition[0]; $Edition = $AuthorEdition[1]; $Author = str_replace("Author:", "", $Author); $Edition = str_replace(":", "", $Edition); $Author = str_replace("\n", "", $Author); $Edition = str_replace("\n", "", $Edition); $Author = ltrim($Author); $Edition = ltrim($Edition); $Author = rtrim($Author); $Edition = rtrim($Edition); $Bk_ISBN = split("/", $ImageUrl); $Bk_ISBN_count = count($Bk_ISBN) - 1; $Bk_ISBN = $Bk_ISBN[$Bk_ISBN_count]; $Bk_ISBN = explode('.', $Bk_ISBN); $Bk_ISBN = $Bk_ISBN[0]; // --- Data Cleaning ENDz $SisterUrl_Ancher = $all_li[$i]->find('div[id=field] a', 0); if ($SisterUrl_Ancher->plaintext != "") { // Check if Sister URL is available $SisterUrl = $SisterUrl_Ancher->getAttribute("href"); } // if echo "{$i} - {$Bk_ISBN} - {$BookTitle} - {$Author} - {$Edition} - {$BK_UsedPrice} - {$BK_NewPrice} - {$BK_DigitalPrice} - {$ImageUrl} <br /> {$SisterUrl} <br /><br />"; // Clearing Space unset($BookTitle); unset($SisterUrl); unset($Author); unset($Edition); unset($ImageUrl); unset($BK_UsedPrice); unset($BK_NewPrice); unset($BK_DigitalPrice); unset($Bk_ISBN); } // for } } // if }
protected function _compileTpl($tpl) { require_once 'simple_html_dom.php'; $dom = file_get_dom($tpl); //全局级别block $blockFiles = glob($this->_viewScriptPath . '/___*.php'); $this->_compileTplBlock($dom, $blockFiles); //目录级别 blok $blockFiles = glob(dirname($tpl) . '/___*.php'); $this->_compileTplBlock($dom, $blockFiles); //单模板 blok $fileName = basename($tpl); $action = str_replace('.phtml', '', $fileName); $blockFiles = glob(dirname($tpl) . '/' . $action . '___*.php'); $this->_compileTplBlock($dom, $blockFiles); $dom->save($tpl); $dom->clear(); unset($dom); /* //单模板 blok $blockFiles = glob(dirname($tpl).'/'.$action.'___*.php'); if( !empty($blockFiles) ){ require_once('simple_html_dom.php'); $dom = file_get_dom($tpl); foreach($blockFiles as $blockFile) { $blockFileName = basename($blockFile); $xpath = substr($blockFileName, strlen($fileName) - 3, strlen($blockFileName) - 4 - (strlen($fileName) - 3) ); $results = $dom->find($xpath); if( empty($results) ) continue; $php = file_get_contents($blockFile); foreach ($results as $result) { $result->innertext = $php; } } $dom->save($tpl); $dom->clear(); unset($dom); } */ return true; }
<?php $url = 'http://spreadsheets.google.com/feeds/list/1XRSgXkJsuLgXsRPNB4Xrz6-aoMp4CqU4ycuKb2-S9WI/od6/public/values?alt=json'; $file = file_get_contents($url); $json = json_decode($file); $rows = $json->{'feed'}->{'entry'}; $listingContent = "<table><th>Full Name</th><th>Image</th><th>Bio</th><th>Locations</th>"; foreach ($rows as $row) { $fullname = $row->{'gsx$fullname'}->{'$t'}; $image = $row->{'gsx$image'}->{'$t'}; $bio = $row->{'gsx$bio'}->{'$t'}; $locations = $row->{'gsx$locations'}->{'$t'}; $listingContent = $listingContent . "<tr><td>" . $fullname . "</td><td>" . $image . "</td><td>" . $bio . "</td><td>" . $locations . "</td></tr>"; } $listingContent = $listingContent . "</table>"; include 'ganon.php'; $html = file_get_dom('http://valitesystems-2.hs-sites.com/trsom-sample-page/'); foreach ($html('.custom-json-data span') as $element) { $element->setInnerText($listingContent); } foreach ($html('html head title') as $element) { $element->setInnerText("Changed Title"); } echo $html;
public function get_browse($searcharg, $searchscope=null, $searchtype=null,$url=null) { $searchscope = $searchscope ? $searchscope : $this->def_scope; $searchtype = $searchtype ? $searchtype : $this->def_type; return $this->get_browse_results( file_get_dom( $this->get_browse_url($searcharg, $searchscope, $searchtype) ) ); }
<?php /** * Should output the most viewed video of the day on Youtube * * Demonstrates selectors * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ include_once '../ganon.php'; //PHP4 users, make sure this path is correct! $html = file_get_dom('http://www.youtube.com/videos'); if (version_compare(PHP_VERSION, '5.3.0') >= 0) { //PHP 5.3.0 and higher echo $html('a[href ^= "/watch"]:has(img)', 0)->toString(); } else { //PHP 4 and 5.3.0 and lower echo $html->select('a[href ^= "/watch"]:has(img)', 0)->toString(); }
function get_station($statid) { //return; $stationname = get_station_name($statid); $stationnameorig = urlencode(recode("UTF8..windows-1250", $stationname)); $stationurl = "http://vred.hznet.hr/hzinfo/Default.asp?KO={$stationnameorig}&Category=hzinfo&Service=izvr3&LANG=HR&SCREEN=2"; $html = file_get_dom($stationurl); //$html = file_get_dom('http://vred.hznet.hr/hzinfo/Default.asp?KO=Zagreb+Gl.+Kol.&Category=hzinfo&Service=izvr3&LANG=HR&SCREEN=2&SESSIONID=%3Csessionid%3E'); //$statid = get_station_id($stationname); //if ($statid == -1) return; foreach ($html->find('td') as $element) { $foundin = 0; foreach ($element->find("a") as $innerel) { //echo trim($innerel->innertext)."\n"; // commit previous work here if ($vlaknr) { if ($vlakiz) { $vlakiz = get_station_id($vlakiz); } else { $vlakiz = 0; } if ($vlakza) { $vlakza = get_station_id($vlakza); } else { $vlakza = 0; } $vlakdolstr = "'{$vlakdol}'"; if ($vlakdol == '') { $vlakdolstr = "NULL"; } $vlakodlstr = "'{$vlakodl}'"; if ($vlakodl == '') { $vlakodlstr = "NULL"; } $sun = 1; $sat = 1; $vlaknaphandled = 0; // blah, this is tiresome if ($vlaknap == "") { $vlaknaphandled = 1; } if ($vlaknap == "Ne vozi nedjeljom i blagdanom") { $sun = 0; $vlaknaphandled = 1; } if ($vlaknap == "Ne vozi subotom") { $sat = 0; $vlaknaphandled = 1; } if ($vlaknap == "Vozi nedjeljom") { $vlaknaphandled = 1; } if ($vlaknap == "Vozi nedjeljom i blagdanom") { $vlaknaphandled = 1; } if ($vlaknap == "Vozi subotom, nedjeljom i blagdanom") { $vlaknaphandled = 1; } if ($vlaknap == "Ne vozi subotom, nedjeljom i blagdanom") { $sat = 0; $sun = 0; $vlaknaphandled = 1; } //$pos = strpos("Ne vozi subotom, nedjeljom i blagdanom", $vlaknap); //if ($pos > 0) $sql = "INSERT INTO StationMovables values('{$statid}','{$vlaknr}',{$vlakdolstr}, {$vlakodlstr}, '{$vlakiz}' ,'{$vlakza}','{$vlaknap}','{$sun}','{$sat}','{$vlaknaphandled}');"; echo $sql . "\n"; pg_query($sql); } $vlaknr = trim($innerel->innertext); $foundin = 1; $col = 0; } if (!$foundin) { $txt = trim($element->innertext); $txt = str_ireplace("<BR>", "", $txt); switch ($col) { case 0: $vlakdol = $txt; break; case 1: $vlakodl = $txt; break; case 6: $vlakiz = recode("windows1250..UTF-8", $txt); break; case 7: $vlakza = recode("windows1250..UTF-8", $txt); break; case 8: $vlaknap = recode("windows1250..UTF-8", $txt); break; } $col++; } } if ($vlaknr) { if ($vlakiz) { $vlakiz = get_station_id($vlakiz); } else { $vlakiz = 0; } if ($vlakza) { $vlakza = get_station_id($vlakza); } else { $vlakza = 0; } $vlakdolstr = "'{$vlakdol}'"; if ($vlakdol == '') { $vlakdolstr = "NULL"; } $vlakodlstr = "'{$vlakodl}'"; if ($vlakodl == '') { $vlakodlstr = "NULL"; } $sql = "INSERT INTO StationMovables values('{$statid}','{$vlaknr}',{$vlakdolstr}, {$vlakodlstr}, '{$vlakiz}' ,'{$vlakza}','{$vlaknap}');"; echo $sql . "\n"; pg_query($sql); } $html->clear; unset($html); }
<?php /** * Should output a string with parsed unicode characters * * Demonstrates UTF8 * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ include_once '../ganon.php'; //PHP4 users, make sure this path is correct! header('Content-Type: text/html; charset=UTF-8'); //Make sure the header is set for UTF8 output $html = file_get_dom('_html5_utf.html'); if (version_compare(PHP_VERSION, '5.3.0') >= 0) { //PHP 5.3.0 and higher foreach ($html('(title, h1)') as $element) { echo $element->getPlainText(), "<br>\n"; } } else { //PHP 4 and 5.3.0 and lower foreach ($html->select('(title, h1)') as $element) { echo $element->getPlainText(), "<br>\n"; } }
<?php /** * Should output all sections from the SRL forums (http://villavu.com/forum/) * * Demonstrates selectors * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ include_once '../ganon.php'; //PHP4 users, make sure this path is correct! $html = file_get_dom('http://villavu.com/forum/'); if (version_compare(PHP_VERSION, '5.3.0') >= 0) { //PHP 5.3.0 and higher foreach ($html('a[href ^= forumdisplay] > strong') as $element) { echo $element->getPlainText(), "<br>\n"; } } else { //PHP 4 and 5.3.0 and lower foreach ($html->select('a[href ^= forumdisplay] > strong') as $element) { echo $element->getPlainText(), "<br>\n"; } }
<?php /** * Parses the BBC news feed * * Demonstrates selectors * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ include_once '../ganon.php'; //PHP4 users, make sure this path is correct! $html = file_get_dom('http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'); if (version_compare(PHP_VERSION, '5.3.0') >= 0) { //PHP 5.3.0 and higher echo 'Last updated: ', $html('lastBuildDate', 0)->getPlainText(), "<br><br>\n"; foreach ($html('item') as $item) { echo 'Title: ', $item('title', 0)->getPlainText(), "<br>\n"; echo 'Date: ', $item('pubDate', 0)->getPlainText(), "<br>\n"; echo 'Link: ', $item('link', 0)->getPlainText(), "<br><br>\n"; } } else { //PHP 4 and 5.3.0 and lower echo 'Last updated: ', $html->select('lastBuildDate', 0)->getPlainText(), "<br><br>\n"; foreach ($html->select('item') as $item) { echo 'Title: ', $item->select('title', 0)->getPlainText(), "<br>\n"; echo 'Date: ', $item->select('pubDate', 0)->getPlainText(), "<br>\n"; echo 'Link: ', $item->select('link', 0)->getPlainText(), "<br><br>\n"; }
function SisterSiteData($sister_url) { include_once "library/simple_html_dom.php"; $url = $sister_url; $html = file_get_dom($url); $ListPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[1]->plaintext; $ListPrice = trim($ListPrice); $YouPayPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[6]->plaintext; $YouPayPrice = trim($YouPayPrice); $Author = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 0)->children[1]->plaintext; $Author = trim($Author); $Edition = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 4)->children[1]->plaintext; $Edition = trim($Edition); $Publisher = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 3)->children[1]->plaintext; $Publisher = trim($Publisher); $ISBN_10_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 6)->children[1]->plaintext; $ISBN_10_Print = trim($ISBN_10_Print); $ISBN_13_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 7)->children[1]->plaintext; $ISBN_13_Print = trim($ISBN_13_Print); $ISBN_10_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 8)->children[1]->plaintext; $ISBN_10_Digital = trim($ISBN_10_Digital); $ISBN_13_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 9)->children[1]->plaintext; $ISBN_13_Digital = trim($ISBN_13_Digital); $html->__destruct(); unset($html); return "\"{$Author}\",{$Edition},\"{$Publisher}\",{$ISBN_10_Print},{$ISBN_13_Print},{$ISBN_10_Digital},{$ISBN_13_Digital},{$ListPrice},{$YouPayPrice}"; }
<?php /** * Should output all Wiki articles and their information from the Ganon page * at Google Code (http://code.google.com/p/ganon/w/list) * * Demonstrates (advanced) selectors and nested queries * * @author Niels A.D. * @package Ganon * @link http://code.google.com/p/ganon/ * @license http://dev.perl.org/licenses/artistic.html Artistic License */ include_once '../ganon.php'; //PHP4 users, make sure this path is correct! $html = file_get_dom('http://code.google.com/p/ganon/w/list'); if (version_compare(PHP_VERSION, '5.3.0') >= 0) { //PHP 5.3.0 and higher foreach ($html('#resultstable tr[! id=headingrow]') as $row) { foreach ($row('td[class ^= "vt "]') as $col) { echo $col->getPlainText(), ' [', $col, "] <br>\n"; } echo "<br>\n"; } } else { //PHP 4 and 5.3.0 and lower foreach ($html->select('#resultstable tr[! id=headingrow]') as $row) { foreach ($row->select('td[class ^= "vt "]') as $col) { echo $col->getPlainText(), ' [', $col, "] <br>\n"; } echo "<br>\n";
for ($i = 0; $i < 3; ++$i) { $dom = file_get_dom($filename); //stat_dom($dom); $dom->clear(); unset($dom); dump_memory(); flush(); } echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; flush(); echo '<br><br>[one object]<br>init memory: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; echo '------------------------------------------<br>'; flush(); $dom = new simple_html_dom(); for ($i = 0; $i < 3; ++$i) { $dom->load_file($filename); $dom->clear(); dump_memory(); } unset($dom); echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; flush(); echo '<br><br>[multi objects without clear memory]<br>init memory: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; echo '------------------------------------------<br>'; flush(); for ($i = 0; $i < 3; ++$i) { $dom = file_get_dom($filename); dump_memory(); } echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; flush();
<?php require 'simple_html_dom.php'; require 'post.php'; require 'birthdayPost.php'; require 'infPost.php'; require 'lolPost.php'; require 'randomPost.php'; // $a = new BirthdayPost(); //print_r($a); //echo "\n\n"; // echo $a->body; $url = "http://shackchatty.com/search.xml?author=Steve+Gibson"; $dom = file_get_dom($url); $postFirst = $dom->find("comment[author]", 0); $str = $postFirst->date; if (($timestamp = strtotime($str)) === false) { echo "The string ({$str}) is bogus"; } else { echo "{$str} == " . date('l dS \\o\\f F Y h:i:s A', $timestamp); } $end = date('Y-m-d'); if ($diff = @get_time_difference($str, $end)) { echo sprintf('days : %02d', $diff['days']); } else { echo "Hours: Error"; } // print $postFirst->date; // print "\n"; function get_time_difference($start, $end)