function get_next_page_url($url) { $html = new simple_html_dom(); $html->load_file($url); $next_page = $html->find('.next_page'); $next_page_link = null; foreach ($next_page as $next_page_link) { $next_page_link = $next_page_link->href; } return 'https://www.goodreads.com' . $next_page_link; }
public function initCategoryParser($link) { $this->_category_link = $link; $html = new simple_html_dom(); $link = $this->_domain . $link; $html->load_file($link); $this->_counter++; if ($this->_endOfLink($html) || $this->_counter > $this->_pages_to_get) { return array('last_link' => $this->_category_link, 'links' => $this->_links, 'parsing_data' => $this->_new_parsing_date); } else { $this->_parseHtml($html); } return array('last_link' => $this->_category_link, 'links' => $this->_links, 'parsing_data' => $this->_new_parsing_date); }
function getDetail($url) { Yii::import('ext.simple_html_dom'); $html = new simple_html_dom(); // $context = stream_context_create($this->user_agent); // $html->load_file($url, false, $context); $html->load_file($url); $db = $html->find('table td[class=bor f2 db]', 0); if (isset($db->plaintext)) { return substr($db->plaintext, strlen($db->plaintext) - 2); } else { return 0; } }
public function getdata() { //echo"shreejana";die; set_time_limit(0); include 'simple_html_dom.php'; $target_url = 'Nepal Rastra Bank.htm'; $html = new simple_html_dom(); $html->load_file($target_url); $table = $html->find('/html/body/table[2]/tbody/tr[2]/td[2]/table[2]/tbody'); $table = $table[0]; $i = 0; //increment array foreach ($table->find('tr') as $row) { $date = $row->find('td[1]'); $date = $date[0]->plaintext; if (preg_match("/[0-9]+[-][0-9]+[-][0-9]+/", $date)) { $j = 1; //increment values foreach ($table->find('tr[1]/td') as $head) { $currency = trim($head->plaintext); if ($currency == "Swedish Kr." || $currency == "Danish Kr." || $currency == "HKG\$") { $rates[$i]['date'] = $date; $rates[$i]['curname'] = $currency; $rate = $row->find('td[' . $j . ']'); $rate = trim($rate[0]->plaintext); $rates[$i]['buyingrate'] = $rate; $rates[$i]['sellingrate'] = null; $j++; $i++; } elseif ($currency == "Date") { $j++; } else { $rates[$i]['date'] = $date; $rates[$i]['curname'] = $currency; $rate = $row->find('td[' . $j . ']'); $rate = trim($rate[0]->plaintext); $rates[$i]['buyingrate'] = $rate; $j++; $rate = $row->find('td[' . $j . ']'); $rate = trim($rate[0]->plaintext); $rates[$i]['sellingrate'] = $rate; $j++; $i++; } } } } var_dump($rates); }
function insert_urls($conn) { $target_url = $_POST['t_url']; // t_url is taken from user input through the text box $html = new simple_html_dom(); if (!$html->load_file($target_url)) { $i = 1; foreach ($html->find('img') as $image) { $image_url = $image->src; $pp_image = imagecreatefromstring(file_get_contents($image_url)); imagejpeg($pp_image, 'temp_images/img' . $i . '.jpeg'); // Saves the image as a jpeg file $detector = new svay\FaceDetector('detection.dat'); if ($detector->faceDetect('temp_images/img' . $i . '.jpeg')) { // If the detector detects a face $sql = "INSERT INTO images (url) VALUES ('{$image_url}')"; // Insert that url into the database $stmt = $conn->prepare($sql); $stmt->execute(); } $i++; } } else { echo '<br /><div id="strongtext"><p><strong>Palun sisesta mõni muu aadress.</strong></p></div>'; var_dump($html->load_file($target_url)); } $temp_files = glob('temp_images/*'); // After the foreach loop is done checking all of the images foreach ($temp_files as $temp_file) { if (is_file($temp_file)) { unlink($temp_file); } } echo '<script>alertFunction();</script>'; // Alert the user that the script has finished working }
public function getArticleFromUrl($url) { $url = $this->_domain . trim($url, '/'); $this->_url = $url; $this->_url_hash = md5($url); $this->_articles[$this->_url_hash] = array('url' => $url); $article_html = new simple_html_dom(); $article_html->load_file($url); if (is_null($article_html)) { return; } $this->_article_html = $article_html; $this->_getMetaFromArticle(); $this->_getCategoryFromArticle(); $this->_getArticle(); return $this->_articles; echo '<pre>'; print_r($this->_articles); echo '</pre>'; }
function getChanges($job, $project) { $commitblacklist = array('Merge branch', 'Merge pull', 'Revert', 'Cleanup'); $url = "http://ci.earth2me.net/viewLog.html?buildId={$job}&tab=buildChangesDiv&buildTypeId={$project}&guest=1"; $html = new simple_html_dom(); $html->load_file($url); $output = "Change Log:<ul>"; foreach ($html->find('.changelist') as $list) { foreach ($list->find('.comment') as $comment) { $text = $comment->innertext; foreach ($commitblacklist as $matchtext) { if (stripos($text, $matchtext) !== FALSE) { $text = ""; } } if ($text != "") { $output .= "<li>{$text}</li>\n"; } } } $output .= "</ul>"; file_put_contents('status.log', "Collected changes! ", FILE_APPEND); return $output; }
<?php //Author: Zakaria Hmaidouch //Website: zhma.info //Import simplehtmldom lib, from http://simplehtmldom.sourceforge.net require 'libs/simple_html_dom.php'; $file = 'data.html'; $html = new simple_html_dom(); // Taget URL $url = 'http://www.zhma.info'; $counter = 1; $html->load_file($url); // Data to target $titles = $html->find('div[class=portfolio-item] h5'); // Open the file to get existing content $current = @file_get_contents($file); $current = '<html> <head> <title>Data Scraping</title> </head> <body>'; foreach ($titles as $title) { // Append a new data to the file $current .= "<b>Project {$counter}:</b> {$title->innertext}<br>"; // Write the contents back to the file file_put_contents($file, $current); $counter++; } $current .= '</body> </html>';
<?php include 'simple_html_dom.php'; $html = new simple_html_dom(); $content = $html->load_file('IAC.html'); $ret = $html->find('tr'); $fullInfo = []; $currentOrigin = ''; $currentDestination = ''; foreach ($ret as $line) { if ($line->class == 'destination') { $link = $line->find('a'); foreach ($link as $curLink) { $currentDestination = $curLink->innertext; } } if ($line->class == 'important origin') { $link = $line->find('a'); foreach ($link as $curLink) { $currentOrigin = $curLink->innertext; } } if ($line->class == 'line even') { // new flight $flightInfo = ['origin' => $currentOrigin, 'destination' => $currentDestination]; foreach ($line->children as $child) { $flightInfo[$child->attr['class']] = $child->plaintext; } $temp = []; foreach ($flightInfo as $key => $value) { if (in_array($key, ['remarks', 'valid'])) {
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('http://open.live.bbc.co.uk/weather/feeds/en/918702/3dayforecast.rss'); # get an element representing the second paragraph $element = $html->find('description'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - SA News</title>\n<item>\n<title>SA News</title>\n<description>"; echo $element[1]; echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('http://astrology.horoscope.com/horoscope/dailyhoroscope/tomorrow-career-horoscope.aspx?sign=2'); # get an element representing the second paragraph $element = $html->find('div[class=fontdef1]'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - Career Horoscopes Taurus</title>\n<item>\n<title>Career Horoscopes Taurus</title>\n<description>"; echo $element[0]->innertext; echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
header('Cache-Control: no-cache, must-revalidate'); header('Content-type: application/json'); ini_set("user_agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21"); ini_set("max_execution_time", 0); ini_set("memory_limit", "10000M"); ini_set('display_errors', '1'); $currenturl = ""; $isposted = false; if (!empty($_POST)) { $currenturl = $_POST['url']; $isposted = true; } if ($isposted) { include 'simple_html_dom.php'; $htmldom = new simple_html_dom(); $htmldom->load_file($currenturl); $title = $htmldom->find('title'); echo nl2br('{ "page" : '); echo nl2br('{ "title" : "' . utf8_encode(trim($title[0]->innertext)) . '",'); $images = $htmldom->find("img"); $bodyitems = $htmldom->find("body p text"); $paragraphs = $bodyitems; echo nl2br('"images" : ['); $numImages = count($images); $icount = 0; foreach ($images as $image) { if ($icount + 1 == $numImages) { $cditem = ""; } else { $cditem = ","; }
<?php // example of how to use advanced selector features include './lib/simple_html_dom/simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('./search.html'); $classg = $html->find('div[class=g]'); foreach ($classg as $g) { foreach ($g->find('a[class=fl]') as $flclass) { $flclass->outertext = ""; } echo $g->outertext; } $html->clear();
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('https://www.nationallottery.co.za/powerball_home/results.asp?type=1'); # get an element representing the second paragraph $element = $html->find('span[class=onGreenBackground]'); $element2 = $html->find('img'); $element3 = $html->find('td'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - Powerball Results</title>\n<item>\n<title>Powerball Results</title>\n<description>"; echo "The winning Powerball numbers for "; echo $element[0]->innertext; echo " are "; $number1 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[20]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number1); echo ", "; $number2 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[21]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number2); echo ", "; $number3 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[22]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number3); echo ", "; $number4 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[23]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number4); echo ", "; $number5 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[24]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number5); echo ", powerball: "; $number6 = str_replace("<img src=\"../images/power_balls/power_", "", $element2[25]);
$idCidPoli = str_replace('onPesquisaClick(this, ', '', $cidade->onclick); $idCidPoli = str_replace(',', '', $idCidPoli); $idCidPoli = str_replace('"', '', $idCidPoli); $idCidPoli = str_replace('\'', '', $idCidPoli); $idCidPoli = str_replace(');', '', $idCidPoli); //guarda a posição do espaço que separa o id do prefeito ou vereador do id do municipio $espaco = strripos($idCidPoli, ' '); //guarda o id do prefeito ou do vereador que é 11 ou 13 $codigoCargo = substr($idCidPoli, 0, $espaco); //guarda o id da cidade $codigoMunicipio = substr($idCidPoli, $espaco + 1); //modifica a url do ajax que é exibida na tela $urlAjaxPrefeitoVereador = "http://divulgacand2012.tse.jus.br/divulgacand2012/pesquisarCandidato.action?siglaUFSelecionada=" . $siglaUF . "&codigoMunicipio=" . $codigoMunicipio . "&codigoCargo=" . $codigoCargo . "&codigoSituacao=0"; $htmlCidade = new simple_html_dom(); //carrega o html que possui todos prefeitos ou vereadores da cidade $htmlCidade->load_file($urlAjaxPrefeitoVereador); //pega os input com o id e a ultima atualização do politico $candidato = $htmlCidade->find('tr[class="odd gradeX"] input'); $htmlCidade->clear(); unset($htmlCidade); //array para guardar ids dos candidatos e ids da ultima atualização do candidato $array = array("sqCandidato", "dtUltimaAtualizacao"); $i = 0; $j = 0; foreach ($candidato as $elemento) { if (strcmp($elemento->name, "sqCandidato") == 0) { $array["sqCandidato"][$i] = $elemento->value; $i++; } else { $array["dtUltimaAtualizacao"][$j] = $elemento->value; $j++;
$target_url = $_GET['q']; $qi = $_GET['qi']; $present = 0; $question_id = 0; include 'autoloader.php'; use NlpTools\Classifiers\MultinomialNBClassifier; use NlpTools\Documents\TokensDocument; use NlpTools\Documents\TrainingSet; use NlpTools\FeatureFactories\DataAsFeatures; use NlpTools\Models\FeatureBasedNB; use NlpTools\Stemmers\PorterStemmer; use NlpTools\Tokenizers\WhitespaceTokenizer; use NlpTools\Utils\StopWords; include_once 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file($target_url); include_once 'connect.inc.php'; //echo "<br/>"; foreach ($html->find('h1') as $link) { $ans = $link->getAttribute('itemprop'); if ($ans == "name") { foreach ($link->find('a') as $link1) { echo "<div class='col-lg-offset-1 bg-primary col-lg-10' style='border-bottom:1px solid;'>"; echo '<h1>' . $link1->text() . "</h1><br/>"; echo "</div>"; $query = "SELECT * from `question` where quest='" . $link1->text() . "'"; $query_run = mysql_query($query); if (mysql_num_rows($query_run) == 1) { $present = 1; while ($row = mysql_fetch_array($query_run)) { $question_id = $row['q_id'];
public function indexAction() { if ($this->_request->isPost()) { $params = $this->_request->getParams(); $date = $params['date']; if ($date != "") { $date_arr_temp = explode("-", $date); $date_insert = $date_arr_temp[2] . "-" . $date_arr_temp[1] . "-" . $date_arr_temp[0]; $html = new simple_html_dom(); $link_load = "http://www.minhngoc.net.vn/ket-qua-xo-so/{$date}.html"; $html->load_file($link_load); if (!empty($html)) { $bangdo_mn = $html->find(".bkqmiennam", 0); $bangdo_mb = $html->find(".bkqtinhmienbac", 0); $this->view->bangdo_mn = $bangdo_mn; $this->view->bangdo_mb = $bangdo_mb; $title_date = $html->find(".bkqmiennam", 0)->children(0)->find("table", 0)->find("tr", 1); $title_date = strip_tags($title_date); // get title $date_present = date('d-m-Y'); $m_lichxoso = new Default_Model_Lichxoso(); $arr_lichxoso = $m_lichxoso->getBangDo($date)->toArray(); $temp_madai = $arr_lichxoso['madai']; $arr_madai = explode("|", $temp_madai); $soluong_dai = sizeof($arr_madai); $db = Zend_Db_Table::getDefaultAdapter(); if ($date == $date_present) { if ($this->checkTimeDai("mn")) { $arr_bangdo_mn = array(); for ($i = 0; $i < $soluong_dai - 1; $i++) { $sl = 2; $giai = 8; while ($sl <= 10) { $v_temp = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr", $sl); $arr_bangdo_mn[$arr_madai[$i]]["giai" . $giai] = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr ", $sl); $sl++; $giai--; } } if (sizeof($arr_bangdo_mn) > 0) { foreach ($arr_bangdo_mn as $key_tenbang => $arr_bang) { if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) { $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bang, $date_insert); $db->query($sql); } else { $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bang, $date_insert); $db->query($sql); } } } } // mien bac if ($this->checkTimeDai("mb")) { $key_tenbang = $arr_madai[$soluong_dai - 1]; $sl = 1; $giai = 0; $arr_bangdo_mb = array(); while ($sl <= 8) { $arr_bangdo_mb["giai" . $giai] = (string) $bangdo_mb->find('tr', $sl)->find("td", 1); $sl++; $giai++; } if ($arr_bangdo_mb > 0) { if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) { $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert); $db->query($sql); } else { $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert); $db->query($sql); } } } } else { $arr_bangdo_mn = array(); for ($i = 0; $i < $soluong_dai - 1; $i++) { $sl = 2; $giai = 8; while ($sl <= 10) { $v_temp = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr", $sl); $arr_bangdo_mn[$arr_madai[$i]]["giai" . $giai] = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr ", $sl); $sl++; $giai--; } } if (sizeof($arr_bangdo_mn) > 0) { foreach ($arr_bangdo_mn as $key_tenbang => $arr_bang) { if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) { $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bang, $date_insert); $db->query($sql); } else { $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bang, $date_insert); $db->query($sql); } } } // mien bac $key_tenbang = $arr_madai[$soluong_dai - 1]; $sl = 1; $giai = 0; $arr_bangdo_mb = array(); while ($sl <= 8) { $arr_bangdo_mb["giai" . $giai] = (string) $bangdo_mb->find('tr', $sl)->find("td", 1); $sl++; $giai++; } if (sizeof($arr_bangdo_mb) > 0) { if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) { $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert); $db->query($sql); } else { $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert); $db->query($sql); } } } } } } }
public function getKelas($ps, $kode, $kelas) { $nom = null; // Create a DOM object $html = new simple_html_dom(); // Load HTML from a URL $html->load_file('https://six.akademik.itb.ac.id/publik/daftarkelas.php?ps=' . $ps . '&semester=1&tahun=2015&th_kur=2013'); foreach ($html->find('ol') as $ul) { foreach ($ul->find('li') as $li) { // Mencari mata kuliah berdasarkan kode kuliah if (preg_match('/^' . $kode . '/', $li->plaintext)) { $i = 0; foreach ($li->find('li') as $child) { if (!strcmp($child->children(0)->plaintext, $kelas)) { //return $child->children(0)->href; $nom = $child->children(0)->plaintext; $link = $child->children(0)->href; break; } $i++; } } } } // mengecek apakah terdapat kelas atau tidak if ($nom) { return $link; } else { return "not found"; } }
<?php require 'simple_html_dom.php'; $cat_rem = fopen("cat_list.txt", "w"); fwrite($cat_rem, ""); fclose($cat_rem); $cat_handle = fopen("cat_list.txt", "a"); $cat_counter = 0; $cat_url = new simple_html_dom(); $cat_url->load_file('http://www.toy.ru/catalog/'); echo "<h4>Загрузили hmtl каталога товаров</h4>"; flush(); ob_flush(); if (count($cat_url->find('.category_list'))) { foreach ($cat_url->find('.category_list') as $catalog) { foreach ($catalog->find('div.item a') as $link) { $category = file_get_html("http://www.toy.ru" . $link->href); echo "<p>Загрузили hmtl категории http://www.toy.ru" . $link->href . " </p>"; flush(); ob_flush(); if (count($category->find('div.item a'))) { foreach ($category->find('div.item a') as $link) { $link_title = ""; if (count($link->find('img'))) { $link_title = $link->find('img', 0)->title; echo "<p>Название категории <strong>" . $link_title . "</strong></p>"; } fwrite($cat_handle, $link->href . ";" . $link_title . "\r\n"); $cat_counter++; echo "<p>Записали url категории http://www.toy.ru" . $link->href . " в файл </p>"; $subcat1 = file_get_html("http://www.toy.ru" . $link->href);
public function fetchDataFromPriceGrabberApi($upc) { ini_set('max_execution_time', 999999); ini_set('memory_limit', '400M'); require_once dirname(BASEPATH) . '/system/application/libraries/simple_html_dom.php'; $debug = false; $url = 'http://www.pricegrabber.com/search_request.php?form_keyword=' . $upc . '&some_id=&id_type=&requestParams=Tjs%3D&vendorIds=YTowOnt9&st=query&sv=findit_top&kw_suggest=0&topcat_menu=6&zip_code=54001'; if ($debug) { echo '<br />' . $url . '<br />'; } $html = new simple_html_dom(); $html->load_file($url); if ($html->find('div[class=product_description]', 0)) { $name = $html->find('div[class=product_description]', 0)->find('h1', 0)->plaintext; } else { $html->clear(); return array(); } $desc = ''; if ($html->find('p[id=product_details_description]', 0)) { $desc = $html->find('p[id=product_details_description]', 0)->plaintext; } $product_image = ''; if ($html->find('div[class=product_img]', 0)) { $product_image = $html->find('div[class=product_img]', 0)->find('img', 0)->getAttribute('src'); } $items = array(); if ($html->find('table[class=pricing_tbl]', 0)) { $price_table = $html->find('table[class=pricing_tbl]', 0); } else { $html->clear(); return array(); } if ($debug) { echo $price_table; } foreach ($price_table->find('tr') as $tr) { $mystring = $tr->getAttribute('class'); $findme = 'section'; $pos = strpos($mystring, $findme); if ($pos !== false) { continue; } if ('noseller' == $tr->find('td', 0)->getAttribute('class')) { break; } $bottom_price = $this->getPriceForPriceGrabber($tr->find('td', 1)->find('div[class=deftip]', 0)->plaintext); $price = $this->getPriceForPriceGrabber($tr->find('td', 2)->plaintext); $shopping_price = $bottom_price - $price; $seller_link = $tr->find('td', 4)->find('a', 0); $href = $seller_link->getAttribute('href'); $img = $seller_link->find('img', 0); if ($img) { $menu = $img->getAttribute('alt'); $logo = $img->getAttribute('src'); } else { $menu = $seller_link->plaintext; $logo = ''; } $data = array('name' => $name, 'desc' => $desc, 'manu' => $menu, 'url' => $href, 'productImage' => $product_image, 'basePrice' => trim($price), 'shippingPrice' => trim($shopping_price), 'price' => trim($price), 'logo' => $logo); $items[] = $data; if ($debug) { echo '<br />'; echo '--------------------------'; echo '<br />'; echo '<pre>'; print_r($data); echo '</pre>'; echo '<br />'; echo 'Name: ' . $name; echo '<br />'; echo 'Desc: ' . $desc; echo '<br />'; echo 'Bottom Price: ' . $bottom_price; echo '<br />'; echo 'Price: ' . $price; echo '<br />'; echo 'Shipping Price: ' . $shopping_price; echo '<br />'; echo 'Seller Name: ' . $menu; echo '<br />'; echo 'Seller Image: ' . $logo; echo '<br />'; echo 'Store Link: ' . $href; echo '<br />'; echo '--------------------------'; echo '<br />'; } } $html->clear(); return $items; }
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('http://mydorpie.com/m/?page=tv_sabc2_today'); # get an element representing the second paragraph $element = $html->find('div[id=blp]'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - SABC 2</title>\n<item>\n<title>SABC 2</title>\n<description>"; $full_value = ""; foreach ($element as $value) { if (substr($value->innertext, 3, 2) >= 17) { $value1 = str_replace("<b>", "", $value->innertext); $value2 = str_replace("</b> ", " - ", $value1); $full_value = $full_value . $value2; $full_value = $full_value . ", "; } } echo substr($full_value, 0, -2); echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('http://love.horoscope.com/astrology/tomorrow-love-horoscope-taurus.html'); # get an element representing the second paragraph $element = $html->find('div[class=fontdef1]'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - Love Horoscopes Taurus</title>\n<item>\n<title>Love Horoscopes Taurus</title>\n<description>"; echo $element[0]->innertext; echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('http://mg.co.za/section/news-africa/'); # get an element representing the second paragraph $element = $html->find('a'); $element2 = $html->find('p'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - International News</title>\n<item>\n<title>International News</title>\n<description>"; echo $element[39]->innertext; echo " - "; echo $element2[1]->innertext; echo " MTN Play and mg.co.za "; echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
function save() { if ($this->indexed == false) { $xml = simplexml_load_file("../data/uq/degrees/" . $this->code . ".xml"); $html = new simple_html_dom(); $html->load_file($this->coursesite); $courses = $xml->addChild("courses"); foreach ($html->find("tr") as $row) { if (!is_null($row->find("td", 0)->find("a", 0))) { $courselink = $row->find("td", 0)->find("a", 0); $coursecode = trim(preg_replace('/\\t+/', '', $courselink->innertext)); $course = $courses->addChild("course"); $course->addAttribute("code", $coursecode); $course->addAttribute("length", $row->find("td", 1)->innertext); $coursetitle = $row->find("td", 2)->innertext; //Remove tooltips from title if (stripos($coursetitle, "<sup>") !== false) { $course->addAttribute("title", removeSup($coursetitle)); } else { $course->addAttribute("title", $coursetitle); } } } $xml->saveXML("../data/uq/degrees/" . $this->code . ".xml"); echo "Information for {$this->title} has been collected and added to the data file.</br>"; echo "<a href='../data/uq/degrees/" . $this->code . ".xml'>{$this->title} data file</a><br/><br/>"; } }
if (!$status) { die("Failed to select database!"); } include 'simple_html_dom.php'; $html = new simple_html_dom(); $page = $_GET['url']; $pageArr = explode("/", $page); $pageTot = count($pageArr); $pageName = $pageArr[$pageTot - 1]; $pageNameRec = str_replace("-", " ", $pageName); $pageNameRec1 = str_replace("s.html", "", $pageNameRec); $pageNameArr = explode(".", $pageName); $pageNameArr1 = explode("-", $pageNameArr[0]); $tot = count($pageNameArr1); $catName = $pageNameArr1[$tot - 1]; $html->load_file($page); $i = 0; foreach ($html->find('table tr td[class=stone-main]') as $element) { if ($i == 0) { $name = strip_tags(trim($element->innertext)); } $i++; } $name = explode("::", $name); //$name = $pageNameRec1; $imageArr = array(); $fullArr = array(); foreach ($html->find('table tr') as $row) { $cells1 = $row->find('td img[width=170]'); if (count($cells1) >= 3) { $imageArr[0] = $cells1[0]->src;
#$J++; #continue; #} ob_start(); // --------------------------------------------------------------------------------------- #echo $link . "<br>"; // ----------------- we #echo $test_link; $html = new simple_html_dom(); $this_to_parse = "https://connect.data.com" . $link; #echo $this_to_parse . "\n"; // DO NOT DDOS THESE PPL. Have been blacklisted from my home IP. As if there's no way around that, but still a P.I.T.A $rand_seconds = rand(2, 13); // 2 to 13 second delay between requests. 1-10 is just as arbitrary sleep($rand_seconds); $dom = $html->load_file($this_to_parse); if (!empty($dom)) { #echo "Loaded file!<br>"; } // Find the exact thing -- industries array -------------------------------------------- $i = 0; $industries = ''; $location = ''; $name = ''; $website = ''; $size = ''; foreach ($html->find('.seo-company-info > table > tbody tr:nth-child(\'5\') > .seo-company-label') as $ele) { global $industries; $str = $ele->innertext; if ($str == "Industries") { $industries = $html->find('.seo-company-info > table > tbody tr:nth-child(5) > .seo-company-data', $i)->innertext;
for ($i = 0; $i < 3; ++$i) { $dom = file_get_dom($filename); //stat_dom($dom); $dom->clear(); unset($dom); dump_memory(); flush(); } echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; flush(); echo '<br><br>[one object]<br>init memory: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; echo '------------------------------------------<br>'; flush(); $dom = new simple_html_dom(); for ($i = 0; $i < 3; ++$i) { $dom->load_file($filename); $dom->clear(); dump_memory(); } unset($dom); echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; flush(); echo '<br><br>[multi objects without clear memory]<br>init memory: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; echo '------------------------------------------<br>'; flush(); for ($i = 0; $i < 3; ++$i) { $dom = file_get_dom($filename); dump_memory(); } echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>'; flush();
public function getFileName() { $file_path = $this->work_folder . $this->file_data["current_file_name"]; $tmp_file = new simple_html_dom($str = null, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = false); $tmp_file->load_file($file_path); $sub_category = $tmp_file->find("[id=sub-category-sub-title]"); return $this->convertToTranslit($sub_category[0]->innertext); }
<?php # create and load the HTML include 'simple_html_dom.php'; $html = new simple_html_dom(); $html->load_file('https://www.nationallottery.co.za/lotto_home/results.asp?type=2'); # get an element representing the second paragraph $element = $html->find('span[class=onGreenBackground]'); $element2 = $html->find('img'); $element3 = $html->find('td'); # output it! echo "<rss>\n<channel>\n<title>Bluefin Mobile - Lotto Plus Results</title>\n<item>\n<title>Lotto Plus Results</title>\n<description>"; echo "The winning Lotto Plus numbers for "; echo $element[0]->innertext; echo " are "; $number1 = str_replace("<img src=\"../images/balls/ball_", "", $element2[21]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number1); echo ", "; $number2 = str_replace("<img src=\"../images/balls/ball_", "", $element2[22]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number2); echo ", "; $number3 = str_replace("<img src=\"../images/balls/ball_", "", $element2[23]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number3); echo ", "; $number4 = str_replace("<img src=\"../images/balls/ball_", "", $element2[24]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number4); echo ", "; $number5 = str_replace("<img src=\"../images/balls/ball_", "", $element2[25]); echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number5); echo ", "; $number6 = str_replace("<img src=\"../images/balls/ball_", "", $element2[26]);
<PUCE colorpuce="orange" level="1" type="square"/> <PUCE colorpuce="green" level="2" type="disc"/> <PUCE colorpuce="orange" level="3" type="circle"/> <PUCE colorpuce="green" level="4" type="square"/> <PAGE backcolor="#0033FF" font="Comic sans MS" linkcolor="yellow" textcolor="white"/> <TITLES textcolor="white"/> </PREFERENCES>'; $filename = "east.xml"; //supprimer fichier existant if (file_exists($filename)) { unlink($filename); } // 1 : on ouvre le fichier $fichier = fopen($filename, "a+"); $html1 = new simple_html_dom(); $html1->load_file('test.html'); //$titre = $html1->find('h2',0)->innertext; /* $title = $html1->find('.pagetitre'); $contenu .=' <PAGE_TITRE> <TITRE>'.$title->plaintext.'</TITRE><AUTEUR></AUTEUR> </PAGE_TITRE>'; */ /****************************************/ //H for ($i = 1; $i < 7; $i++) { $titres = 'h' . $i; $titre = $html1->find("" . $titres . ""); foreach ($titre as $t) { echo $t->plaintext;