load_file() public method

load html from file
public load_file ( )
function get_next_page_url($url)
{
    $html = new simple_html_dom();
    $html->load_file($url);
    $next_page = $html->find('.next_page');
    $next_page_link = null;
    foreach ($next_page as $next_page_link) {
        $next_page_link = $next_page_link->href;
    }
    return 'https://www.goodreads.com' . $next_page_link;
}
Esempio n. 2
0
 public function initCategoryParser($link)
 {
     $this->_category_link = $link;
     $html = new simple_html_dom();
     $link = $this->_domain . $link;
     $html->load_file($link);
     $this->_counter++;
     if ($this->_endOfLink($html) || $this->_counter > $this->_pages_to_get) {
         return array('last_link' => $this->_category_link, 'links' => $this->_links, 'parsing_data' => $this->_new_parsing_date);
     } else {
         $this->_parseHtml($html);
     }
     return array('last_link' => $this->_category_link, 'links' => $this->_links, 'parsing_data' => $this->_new_parsing_date);
 }
Esempio n. 3
0
 function getDetail($url)
 {
     Yii::import('ext.simple_html_dom');
     $html = new simple_html_dom();
     // $context = stream_context_create($this->user_agent);
     // $html->load_file($url, false, $context);
     $html->load_file($url);
     $db = $html->find('table td[class=bor f2 db]', 0);
     if (isset($db->plaintext)) {
         return substr($db->plaintext, strlen($db->plaintext) - 2);
     } else {
         return 0;
     }
 }
Esempio n. 4
0
 public function getdata()
 {
     //echo"shreejana";die;
     set_time_limit(0);
     include 'simple_html_dom.php';
     $target_url = 'Nepal Rastra Bank.htm';
     $html = new simple_html_dom();
     $html->load_file($target_url);
     $table = $html->find('/html/body/table[2]/tbody/tr[2]/td[2]/table[2]/tbody');
     $table = $table[0];
     $i = 0;
     //increment array
     foreach ($table->find('tr') as $row) {
         $date = $row->find('td[1]');
         $date = $date[0]->plaintext;
         if (preg_match("/[0-9]+[-][0-9]+[-][0-9]+/", $date)) {
             $j = 1;
             //increment values
             foreach ($table->find('tr[1]/td') as $head) {
                 $currency = trim($head->plaintext);
                 if ($currency == "Swedish Kr." || $currency == "Danish Kr." || $currency == "HKG\$") {
                     $rates[$i]['date'] = $date;
                     $rates[$i]['curname'] = $currency;
                     $rate = $row->find('td[' . $j . ']');
                     $rate = trim($rate[0]->plaintext);
                     $rates[$i]['buyingrate'] = $rate;
                     $rates[$i]['sellingrate'] = null;
                     $j++;
                     $i++;
                 } elseif ($currency == "Date") {
                     $j++;
                 } else {
                     $rates[$i]['date'] = $date;
                     $rates[$i]['curname'] = $currency;
                     $rate = $row->find('td[' . $j . ']');
                     $rate = trim($rate[0]->plaintext);
                     $rates[$i]['buyingrate'] = $rate;
                     $j++;
                     $rate = $row->find('td[' . $j . ']');
                     $rate = trim($rate[0]->plaintext);
                     $rates[$i]['sellingrate'] = $rate;
                     $j++;
                     $i++;
                 }
             }
         }
     }
     var_dump($rates);
 }
Esempio n. 5
0
function insert_urls($conn)
{
    $target_url = $_POST['t_url'];
    // t_url is taken from user input through the text box
    $html = new simple_html_dom();
    if (!$html->load_file($target_url)) {
        $i = 1;
        foreach ($html->find('img') as $image) {
            $image_url = $image->src;
            $pp_image = imagecreatefromstring(file_get_contents($image_url));
            imagejpeg($pp_image, 'temp_images/img' . $i . '.jpeg');
            // Saves the image as a jpeg file
            $detector = new svay\FaceDetector('detection.dat');
            if ($detector->faceDetect('temp_images/img' . $i . '.jpeg')) {
                // If the detector detects a face
                $sql = "INSERT INTO images (url) VALUES ('{$image_url}')";
                // Insert that url into the database
                $stmt = $conn->prepare($sql);
                $stmt->execute();
            }
            $i++;
        }
    } else {
        echo '<br /><div id="strongtext"><p><strong>Palun sisesta mõni muu aadress.</strong></p></div>';
        var_dump($html->load_file($target_url));
    }
    $temp_files = glob('temp_images/*');
    // After the foreach loop is done checking all of the images
    foreach ($temp_files as $temp_file) {
        if (is_file($temp_file)) {
            unlink($temp_file);
        }
    }
    echo '<script>alertFunction();</script>';
    // Alert the user that the script has finished working
}
Esempio n. 6
0
 public function getArticleFromUrl($url)
 {
     $url = $this->_domain . trim($url, '/');
     $this->_url = $url;
     $this->_url_hash = md5($url);
     $this->_articles[$this->_url_hash] = array('url' => $url);
     $article_html = new simple_html_dom();
     $article_html->load_file($url);
     if (is_null($article_html)) {
         return;
     }
     $this->_article_html = $article_html;
     $this->_getMetaFromArticle();
     $this->_getCategoryFromArticle();
     $this->_getArticle();
     return $this->_articles;
     echo '<pre>';
     print_r($this->_articles);
     echo '</pre>';
 }
Esempio n. 7
0
function getChanges($job, $project)
{
    $commitblacklist = array('Merge branch', 'Merge pull', 'Revert', 'Cleanup');
    $url = "http://ci.earth2me.net/viewLog.html?buildId={$job}&tab=buildChangesDiv&buildTypeId={$project}&guest=1";
    $html = new simple_html_dom();
    $html->load_file($url);
    $output = "Change Log:<ul>";
    foreach ($html->find('.changelist') as $list) {
        foreach ($list->find('.comment') as $comment) {
            $text = $comment->innertext;
            foreach ($commitblacklist as $matchtext) {
                if (stripos($text, $matchtext) !== FALSE) {
                    $text = "";
                }
            }
            if ($text != "") {
                $output .= "<li>{$text}</li>\n";
            }
        }
    }
    $output .= "</ul>";
    file_put_contents('status.log', "Collected changes! ", FILE_APPEND);
    return $output;
}
Esempio n. 8
0
<?php

//Author: Zakaria Hmaidouch
//Website: zhma.info
//Import simplehtmldom lib, from http://simplehtmldom.sourceforge.net
require 'libs/simple_html_dom.php';
$file = 'data.html';
$html = new simple_html_dom();
// Taget URL
$url = 'http://www.zhma.info';
$counter = 1;
$html->load_file($url);
// Data to target
$titles = $html->find('div[class=portfolio-item] h5');
// Open the file to get existing content
$current = @file_get_contents($file);
$current = '<html>
				<head>
					<title>Data Scraping</title>
				</head>
				<body>';
foreach ($titles as $title) {
    // Append a new data to the file
    $current .= "<b>Project {$counter}:</b> {$title->innertext}<br>";
    // Write the contents back to the file
    file_put_contents($file, $current);
    $counter++;
}
$current .= '</body>
			</html>';
Esempio n. 9
0
<?php

include 'simple_html_dom.php';
$html = new simple_html_dom();
$content = $html->load_file('IAC.html');
$ret = $html->find('tr');
$fullInfo = [];
$currentOrigin = '';
$currentDestination = '';
foreach ($ret as $line) {
    if ($line->class == 'destination') {
        $link = $line->find('a');
        foreach ($link as $curLink) {
            $currentDestination = $curLink->innertext;
        }
    }
    if ($line->class == 'important origin') {
        $link = $line->find('a');
        foreach ($link as $curLink) {
            $currentOrigin = $curLink->innertext;
        }
    }
    if ($line->class == 'line even') {
        // new flight
        $flightInfo = ['origin' => $currentOrigin, 'destination' => $currentDestination];
        foreach ($line->children as $child) {
            $flightInfo[$child->attr['class']] = $child->plaintext;
        }
        $temp = [];
        foreach ($flightInfo as $key => $value) {
            if (in_array($key, ['remarks', 'valid'])) {
Esempio n. 10
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('http://open.live.bbc.co.uk/weather/feeds/en/918702/3dayforecast.rss');
# get an element representing the second paragraph
$element = $html->find('description');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - SA News</title>\n<item>\n<title>SA News</title>\n<description>";
echo $element[1];
echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
Esempio n. 11
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('http://astrology.horoscope.com/horoscope/dailyhoroscope/tomorrow-career-horoscope.aspx?sign=2');
# get an element representing the second paragraph
$element = $html->find('div[class=fontdef1]');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - Career Horoscopes Taurus</title>\n<item>\n<title>Career Horoscopes Taurus</title>\n<description>";
echo $element[0]->innertext;
echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
Esempio n. 12
0
header('Cache-Control: no-cache, must-revalidate');
header('Content-type: application/json');
ini_set("user_agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21");
ini_set("max_execution_time", 0);
ini_set("memory_limit", "10000M");
ini_set('display_errors', '1');
$currenturl = "";
$isposted = false;
if (!empty($_POST)) {
    $currenturl = $_POST['url'];
    $isposted = true;
}
if ($isposted) {
    include 'simple_html_dom.php';
    $htmldom = new simple_html_dom();
    $htmldom->load_file($currenturl);
    $title = $htmldom->find('title');
    echo nl2br('{ "page" : ');
    echo nl2br('{ "title" : "' . utf8_encode(trim($title[0]->innertext)) . '",');
    $images = $htmldom->find("img");
    $bodyitems = $htmldom->find("body p text");
    $paragraphs = $bodyitems;
    echo nl2br('"images" : [');
    $numImages = count($images);
    $icount = 0;
    foreach ($images as $image) {
        if ($icount + 1 == $numImages) {
            $cditem = "";
        } else {
            $cditem = ",";
        }
Esempio n. 13
0
<?php

// example of how to use advanced selector features
include './lib/simple_html_dom/simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('./search.html');
$classg = $html->find('div[class=g]');
foreach ($classg as $g) {
    foreach ($g->find('a[class=fl]') as $flclass) {
        $flclass->outertext = "";
    }
    echo $g->outertext;
}
$html->clear();
Esempio n. 14
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('https://www.nationallottery.co.za/powerball_home/results.asp?type=1');
# get an element representing the second paragraph
$element = $html->find('span[class=onGreenBackground]');
$element2 = $html->find('img');
$element3 = $html->find('td');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - Powerball Results</title>\n<item>\n<title>Powerball Results</title>\n<description>";
echo "The winning Powerball numbers for ";
echo $element[0]->innertext;
echo " are ";
$number1 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[20]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number1);
echo ", ";
$number2 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[21]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number2);
echo ", ";
$number3 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[22]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number3);
echo ", ";
$number4 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[23]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number4);
echo ", ";
$number5 = str_replace("<img src=\"../images/power_balls/ball_", "", $element2[24]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number5);
echo ", powerball: ";
$number6 = str_replace("<img src=\"../images/power_balls/power_", "", $element2[25]);
Esempio n. 15
0
 $idCidPoli = str_replace('onPesquisaClick(this, ', '', $cidade->onclick);
 $idCidPoli = str_replace(',', '', $idCidPoli);
 $idCidPoli = str_replace('"', '', $idCidPoli);
 $idCidPoli = str_replace('\'', '', $idCidPoli);
 $idCidPoli = str_replace(');', '', $idCidPoli);
 //guarda a posição do espaço que separa o id do prefeito ou vereador do id do municipio
 $espaco = strripos($idCidPoli, ' ');
 //guarda o id do prefeito ou do vereador que é 11 ou 13
 $codigoCargo = substr($idCidPoli, 0, $espaco);
 //guarda o id da cidade
 $codigoMunicipio = substr($idCidPoli, $espaco + 1);
 //modifica a url do ajax que é exibida na tela
 $urlAjaxPrefeitoVereador = "http://divulgacand2012.tse.jus.br/divulgacand2012/pesquisarCandidato.action?siglaUFSelecionada=" . $siglaUF . "&codigoMunicipio=" . $codigoMunicipio . "&codigoCargo=" . $codigoCargo . "&codigoSituacao=0";
 $htmlCidade = new simple_html_dom();
 //carrega o html que possui todos prefeitos ou vereadores da cidade
 $htmlCidade->load_file($urlAjaxPrefeitoVereador);
 //pega os input com o id e a ultima atualização do politico
 $candidato = $htmlCidade->find('tr[class="odd gradeX"] input');
 $htmlCidade->clear();
 unset($htmlCidade);
 //array para guardar ids dos candidatos e ids da ultima atualização do candidato
 $array = array("sqCandidato", "dtUltimaAtualizacao");
 $i = 0;
 $j = 0;
 foreach ($candidato as $elemento) {
     if (strcmp($elemento->name, "sqCandidato") == 0) {
         $array["sqCandidato"][$i] = $elemento->value;
         $i++;
     } else {
         $array["dtUltimaAtualizacao"][$j] = $elemento->value;
         $j++;
$target_url = $_GET['q'];
$qi = $_GET['qi'];
$present = 0;
$question_id = 0;
include 'autoloader.php';
use NlpTools\Classifiers\MultinomialNBClassifier;
use NlpTools\Documents\TokensDocument;
use NlpTools\Documents\TrainingSet;
use NlpTools\FeatureFactories\DataAsFeatures;
use NlpTools\Models\FeatureBasedNB;
use NlpTools\Stemmers\PorterStemmer;
use NlpTools\Tokenizers\WhitespaceTokenizer;
use NlpTools\Utils\StopWords;
include_once 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file($target_url);
include_once 'connect.inc.php';
//echo "<br/>";
foreach ($html->find('h1') as $link) {
    $ans = $link->getAttribute('itemprop');
    if ($ans == "name") {
        foreach ($link->find('a') as $link1) {
            echo "<div class='col-lg-offset-1 bg-primary col-lg-10' style='border-bottom:1px solid;'>";
            echo '<h1>' . $link1->text() . "</h1><br/>";
            echo "</div>";
            $query = "SELECT * from `question` where quest='" . $link1->text() . "'";
            $query_run = mysql_query($query);
            if (mysql_num_rows($query_run) == 1) {
                $present = 1;
                while ($row = mysql_fetch_array($query_run)) {
                    $question_id = $row['q_id'];
Esempio n. 17
0
 public function indexAction()
 {
     if ($this->_request->isPost()) {
         $params = $this->_request->getParams();
         $date = $params['date'];
         if ($date != "") {
             $date_arr_temp = explode("-", $date);
             $date_insert = $date_arr_temp[2] . "-" . $date_arr_temp[1] . "-" . $date_arr_temp[0];
             $html = new simple_html_dom();
             $link_load = "http://www.minhngoc.net.vn/ket-qua-xo-so/{$date}.html";
             $html->load_file($link_load);
             if (!empty($html)) {
                 $bangdo_mn = $html->find(".bkqmiennam", 0);
                 $bangdo_mb = $html->find(".bkqtinhmienbac", 0);
                 $this->view->bangdo_mn = $bangdo_mn;
                 $this->view->bangdo_mb = $bangdo_mb;
                 $title_date = $html->find(".bkqmiennam", 0)->children(0)->find("table", 0)->find("tr", 1);
                 $title_date = strip_tags($title_date);
                 // get title
                 $date_present = date('d-m-Y');
                 $m_lichxoso = new Default_Model_Lichxoso();
                 $arr_lichxoso = $m_lichxoso->getBangDo($date)->toArray();
                 $temp_madai = $arr_lichxoso['madai'];
                 $arr_madai = explode("|", $temp_madai);
                 $soluong_dai = sizeof($arr_madai);
                 $db = Zend_Db_Table::getDefaultAdapter();
                 if ($date == $date_present) {
                     if ($this->checkTimeDai("mn")) {
                         $arr_bangdo_mn = array();
                         for ($i = 0; $i < $soluong_dai - 1; $i++) {
                             $sl = 2;
                             $giai = 8;
                             while ($sl <= 10) {
                                 $v_temp = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr", $sl);
                                 $arr_bangdo_mn[$arr_madai[$i]]["giai" . $giai] = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr ", $sl);
                                 $sl++;
                                 $giai--;
                             }
                         }
                         if (sizeof($arr_bangdo_mn) > 0) {
                             foreach ($arr_bangdo_mn as $key_tenbang => $arr_bang) {
                                 if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) {
                                     $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bang, $date_insert);
                                     $db->query($sql);
                                 } else {
                                     $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bang, $date_insert);
                                     $db->query($sql);
                                 }
                             }
                         }
                     }
                     // mien bac
                     if ($this->checkTimeDai("mb")) {
                         $key_tenbang = $arr_madai[$soluong_dai - 1];
                         $sl = 1;
                         $giai = 0;
                         $arr_bangdo_mb = array();
                         while ($sl <= 8) {
                             $arr_bangdo_mb["giai" . $giai] = (string) $bangdo_mb->find('tr', $sl)->find("td", 1);
                             $sl++;
                             $giai++;
                         }
                         if ($arr_bangdo_mb > 0) {
                             if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) {
                                 $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert);
                                 $db->query($sql);
                             } else {
                                 $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert);
                                 $db->query($sql);
                             }
                         }
                     }
                 } else {
                     $arr_bangdo_mn = array();
                     for ($i = 0; $i < $soluong_dai - 1; $i++) {
                         $sl = 2;
                         $giai = 8;
                         while ($sl <= 10) {
                             $v_temp = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr", $sl);
                             $arr_bangdo_mn[$arr_madai[$i]]["giai" . $giai] = (string) $bangdo_mn->find("td table", 1)->find(".rightcl", $i)->find("tr ", $sl);
                             $sl++;
                             $giai--;
                         }
                     }
                     if (sizeof($arr_bangdo_mn) > 0) {
                         foreach ($arr_bangdo_mn as $key_tenbang => $arr_bang) {
                             if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) {
                                 $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bang, $date_insert);
                                 $db->query($sql);
                             } else {
                                 $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bang, $date_insert);
                                 $db->query($sql);
                             }
                         }
                     }
                     // mien bac
                     $key_tenbang = $arr_madai[$soluong_dai - 1];
                     $sl = 1;
                     $giai = 0;
                     $arr_bangdo_mb = array();
                     while ($sl <= 8) {
                         $arr_bangdo_mb["giai" . $giai] = (string) $bangdo_mb->find('tr', $sl)->find("td", 1);
                         $sl++;
                         $giai++;
                     }
                     if (sizeof($arr_bangdo_mb) > 0) {
                         if (!$this->checkExistDataVeDo($db, $key_tenbang, $date_insert)) {
                             $sql = $this->addNewDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert);
                             $db->query($sql);
                         } else {
                             $sql = $this->updateDataVeDo($db, $key_tenbang, $arr_bangdo_mb, $date_insert);
                             $db->query($sql);
                         }
                     }
                 }
             }
         }
     }
 }
Esempio n. 18
0
 public function getKelas($ps, $kode, $kelas)
 {
     $nom = null;
     // Create a DOM object
     $html = new simple_html_dom();
     // Load HTML from a URL
     $html->load_file('https://six.akademik.itb.ac.id/publik/daftarkelas.php?ps=' . $ps . '&semester=1&tahun=2015&th_kur=2013');
     foreach ($html->find('ol') as $ul) {
         foreach ($ul->find('li') as $li) {
             // Mencari mata kuliah berdasarkan kode kuliah
             if (preg_match('/^' . $kode . '/', $li->plaintext)) {
                 $i = 0;
                 foreach ($li->find('li') as $child) {
                     if (!strcmp($child->children(0)->plaintext, $kelas)) {
                         //return $child->children(0)->href;
                         $nom = $child->children(0)->plaintext;
                         $link = $child->children(0)->href;
                         break;
                     }
                     $i++;
                 }
             }
         }
     }
     // mengecek apakah terdapat kelas atau tidak
     if ($nom) {
         return $link;
     } else {
         return "not found";
     }
 }
Esempio n. 19
0
<?php

require 'simple_html_dom.php';
$cat_rem = fopen("cat_list.txt", "w");
fwrite($cat_rem, "");
fclose($cat_rem);
$cat_handle = fopen("cat_list.txt", "a");
$cat_counter = 0;
$cat_url = new simple_html_dom();
$cat_url->load_file('http://www.toy.ru/catalog/');
echo "<h4>Загрузили hmtl каталога товаров</h4>";
flush();
ob_flush();
if (count($cat_url->find('.category_list'))) {
    foreach ($cat_url->find('.category_list') as $catalog) {
        foreach ($catalog->find('div.item a') as $link) {
            $category = file_get_html("http://www.toy.ru" . $link->href);
            echo "<p>Загрузили hmtl категории http://www.toy.ru" . $link->href . " </p>";
            flush();
            ob_flush();
            if (count($category->find('div.item a'))) {
                foreach ($category->find('div.item a') as $link) {
                    $link_title = "";
                    if (count($link->find('img'))) {
                        $link_title = $link->find('img', 0)->title;
                        echo "<p>Название категории <strong>" . $link_title . "</strong></p>";
                    }
                    fwrite($cat_handle, $link->href . ";" . $link_title . "\r\n");
                    $cat_counter++;
                    echo "<p>Записали url категории http://www.toy.ru" . $link->href . " в файл </p>";
                    $subcat1 = file_get_html("http://www.toy.ru" . $link->href);
Esempio n. 20
0
 public function fetchDataFromPriceGrabberApi($upc)
 {
     ini_set('max_execution_time', 999999);
     ini_set('memory_limit', '400M');
     require_once dirname(BASEPATH) . '/system/application/libraries/simple_html_dom.php';
     $debug = false;
     $url = 'http://www.pricegrabber.com/search_request.php?form_keyword=' . $upc . '&some_id=&id_type=&requestParams=Tjs%3D&vendorIds=YTowOnt9&st=query&sv=findit_top&kw_suggest=0&topcat_menu=6&zip_code=54001';
     if ($debug) {
         echo '<br />' . $url . '<br />';
     }
     $html = new simple_html_dom();
     $html->load_file($url);
     if ($html->find('div[class=product_description]', 0)) {
         $name = $html->find('div[class=product_description]', 0)->find('h1', 0)->plaintext;
     } else {
         $html->clear();
         return array();
     }
     $desc = '';
     if ($html->find('p[id=product_details_description]', 0)) {
         $desc = $html->find('p[id=product_details_description]', 0)->plaintext;
     }
     $product_image = '';
     if ($html->find('div[class=product_img]', 0)) {
         $product_image = $html->find('div[class=product_img]', 0)->find('img', 0)->getAttribute('src');
     }
     $items = array();
     if ($html->find('table[class=pricing_tbl]', 0)) {
         $price_table = $html->find('table[class=pricing_tbl]', 0);
     } else {
         $html->clear();
         return array();
     }
     if ($debug) {
         echo $price_table;
     }
     foreach ($price_table->find('tr') as $tr) {
         $mystring = $tr->getAttribute('class');
         $findme = 'section';
         $pos = strpos($mystring, $findme);
         if ($pos !== false) {
             continue;
         }
         if ('noseller' == $tr->find('td', 0)->getAttribute('class')) {
             break;
         }
         $bottom_price = $this->getPriceForPriceGrabber($tr->find('td', 1)->find('div[class=deftip]', 0)->plaintext);
         $price = $this->getPriceForPriceGrabber($tr->find('td', 2)->plaintext);
         $shopping_price = $bottom_price - $price;
         $seller_link = $tr->find('td', 4)->find('a', 0);
         $href = $seller_link->getAttribute('href');
         $img = $seller_link->find('img', 0);
         if ($img) {
             $menu = $img->getAttribute('alt');
             $logo = $img->getAttribute('src');
         } else {
             $menu = $seller_link->plaintext;
             $logo = '';
         }
         $data = array('name' => $name, 'desc' => $desc, 'manu' => $menu, 'url' => $href, 'productImage' => $product_image, 'basePrice' => trim($price), 'shippingPrice' => trim($shopping_price), 'price' => trim($price), 'logo' => $logo);
         $items[] = $data;
         if ($debug) {
             echo '<br />';
             echo '--------------------------';
             echo '<br />';
             echo '<pre>';
             print_r($data);
             echo '</pre>';
             echo '<br />';
             echo 'Name: ' . $name;
             echo '<br />';
             echo 'Desc: ' . $desc;
             echo '<br />';
             echo 'Bottom Price: ' . $bottom_price;
             echo '<br />';
             echo 'Price: ' . $price;
             echo '<br />';
             echo 'Shipping Price: ' . $shopping_price;
             echo '<br />';
             echo 'Seller Name: ' . $menu;
             echo '<br />';
             echo 'Seller Image: ' . $logo;
             echo '<br />';
             echo 'Store Link: ' . $href;
             echo '<br />';
             echo '--------------------------';
             echo '<br />';
         }
     }
     $html->clear();
     return $items;
 }
Esempio n. 21
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('http://mydorpie.com/m/?page=tv_sabc2_today');
# get an element representing the second paragraph
$element = $html->find('div[id=blp]');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - SABC 2</title>\n<item>\n<title>SABC 2</title>\n<description>";
$full_value = "";
foreach ($element as $value) {
    if (substr($value->innertext, 3, 2) >= 17) {
        $value1 = str_replace("<b>", "", $value->innertext);
        $value2 = str_replace("</b> ", " - ", $value1);
        $full_value = $full_value . $value2;
        $full_value = $full_value . ", ";
    }
}
echo substr($full_value, 0, -2);
echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
Esempio n. 22
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('http://love.horoscope.com/astrology/tomorrow-love-horoscope-taurus.html');
# get an element representing the second paragraph
$element = $html->find('div[class=fontdef1]');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - Love Horoscopes Taurus</title>\n<item>\n<title>Love Horoscopes Taurus</title>\n<description>";
echo $element[0]->innertext;
echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
Esempio n. 23
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('http://mg.co.za/section/news-africa/');
# get an element representing the second paragraph
$element = $html->find('a');
$element2 = $html->find('p');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - International News</title>\n<item>\n<title>International News</title>\n<description>";
echo $element[39]->innertext;
echo " - ";
echo $element2[1]->innertext;
echo " MTN Play and mg.co.za ";
echo "</description>\n</item>\n</channel>\n<head/>\n</rss>";
 function save()
 {
     if ($this->indexed == false) {
         $xml = simplexml_load_file("../data/uq/degrees/" . $this->code . ".xml");
         $html = new simple_html_dom();
         $html->load_file($this->coursesite);
         $courses = $xml->addChild("courses");
         foreach ($html->find("tr") as $row) {
             if (!is_null($row->find("td", 0)->find("a", 0))) {
                 $courselink = $row->find("td", 0)->find("a", 0);
                 $coursecode = trim(preg_replace('/\\t+/', '', $courselink->innertext));
                 $course = $courses->addChild("course");
                 $course->addAttribute("code", $coursecode);
                 $course->addAttribute("length", $row->find("td", 1)->innertext);
                 $coursetitle = $row->find("td", 2)->innertext;
                 //Remove tooltips from title
                 if (stripos($coursetitle, "<sup>") !== false) {
                     $course->addAttribute("title", removeSup($coursetitle));
                 } else {
                     $course->addAttribute("title", $coursetitle);
                 }
             }
         }
         $xml->saveXML("../data/uq/degrees/" . $this->code . ".xml");
         echo "Information for {$this->title} has been collected and added to the data file.</br>";
         echo "<a href='../data/uq/degrees/" . $this->code . ".xml'>{$this->title} data file</a><br/><br/>";
     }
 }
Esempio n. 25
0
if (!$status) {
    die("Failed to select database!");
}
include 'simple_html_dom.php';
$html = new simple_html_dom();
$page = $_GET['url'];
$pageArr = explode("/", $page);
$pageTot = count($pageArr);
$pageName = $pageArr[$pageTot - 1];
$pageNameRec = str_replace("-", " ", $pageName);
$pageNameRec1 = str_replace("s.html", "", $pageNameRec);
$pageNameArr = explode(".", $pageName);
$pageNameArr1 = explode("-", $pageNameArr[0]);
$tot = count($pageNameArr1);
$catName = $pageNameArr1[$tot - 1];
$html->load_file($page);
$i = 0;
foreach ($html->find('table tr td[class=stone-main]') as $element) {
    if ($i == 0) {
        $name = strip_tags(trim($element->innertext));
    }
    $i++;
}
$name = explode("::", $name);
//$name = $pageNameRec1;
$imageArr = array();
$fullArr = array();
foreach ($html->find('table tr') as $row) {
    $cells1 = $row->find('td img[width=170]');
    if (count($cells1) >= 3) {
        $imageArr[0] = $cells1[0]->src;
 #$J++;
 #continue;
 #}
 ob_start();
 // ---------------------------------------------------------------------------------------
 #echo $link . "<br>";
 // ----------------- we
 #echo $test_link;
 $html = new simple_html_dom();
 $this_to_parse = "https://connect.data.com" . $link;
 #echo $this_to_parse . "\n";
 // DO NOT DDOS THESE PPL. Have been blacklisted from my home IP. As if there's no way around that, but still a P.I.T.A
 $rand_seconds = rand(2, 13);
 // 2 to 13 second delay between requests. 1-10 is just as arbitrary
 sleep($rand_seconds);
 $dom = $html->load_file($this_to_parse);
 if (!empty($dom)) {
     #echo "Loaded file!<br>";
 }
 // Find the exact thing -- industries array --------------------------------------------
 $i = 0;
 $industries = '';
 $location = '';
 $name = '';
 $website = '';
 $size = '';
 foreach ($html->find('.seo-company-info > table > tbody tr:nth-child(\'5\') > .seo-company-label') as $ele) {
     global $industries;
     $str = $ele->innertext;
     if ($str == "Industries") {
         $industries = $html->find('.seo-company-info > table > tbody tr:nth-child(5) > .seo-company-data', $i)->innertext;
Esempio n. 27
0
for ($i = 0; $i < 3; ++$i) {
    $dom = file_get_dom($filename);
    //stat_dom($dom);
    $dom->clear();
    unset($dom);
    dump_memory();
    flush();
}
echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>';
flush();
echo '<br><br>[one object]<br>init memory: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>';
echo '------------------------------------------<br>';
flush();
$dom = new simple_html_dom();
for ($i = 0; $i < 3; ++$i) {
    $dom->load_file($filename);
    $dom->clear();
    dump_memory();
}
unset($dom);
echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>';
flush();
echo '<br><br>[multi objects without clear memory]<br>init memory: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>';
echo '------------------------------------------<br>';
flush();
for ($i = 0; $i < 3; ++$i) {
    $dom = file_get_dom($filename);
    dump_memory();
}
echo 'final: ' . number_format(memory_get_usage(), 0, '.', ',') . '<br>';
flush();
Esempio n. 28
0
 public function getFileName()
 {
     $file_path = $this->work_folder . $this->file_data["current_file_name"];
     $tmp_file = new simple_html_dom($str = null, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = false);
     $tmp_file->load_file($file_path);
     $sub_category = $tmp_file->find("[id=sub-category-sub-title]");
     return $this->convertToTranslit($sub_category[0]->innertext);
 }
Esempio n. 29
0
<?php

# create and load the HTML
include 'simple_html_dom.php';
$html = new simple_html_dom();
$html->load_file('https://www.nationallottery.co.za/lotto_home/results.asp?type=2');
# get an element representing the second paragraph
$element = $html->find('span[class=onGreenBackground]');
$element2 = $html->find('img');
$element3 = $html->find('td');
# output it!
echo "<rss>\n<channel>\n<title>Bluefin Mobile - Lotto Plus Results</title>\n<item>\n<title>Lotto Plus Results</title>\n<description>";
echo "The winning Lotto Plus numbers for ";
echo $element[0]->innertext;
echo " are ";
$number1 = str_replace("<img src=\"../images/balls/ball_", "", $element2[21]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number1);
echo ", ";
$number2 = str_replace("<img src=\"../images/balls/ball_", "", $element2[22]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number2);
echo ", ";
$number3 = str_replace("<img src=\"../images/balls/ball_", "", $element2[23]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number3);
echo ", ";
$number4 = str_replace("<img src=\"../images/balls/ball_", "", $element2[24]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number4);
echo ", ";
$number5 = str_replace("<img src=\"../images/balls/ball_", "", $element2[25]);
echo str_replace(".gif\" width=\"34\" height=\"40\" />", "", $number5);
echo ", ";
$number6 = str_replace("<img src=\"../images/balls/ball_", "", $element2[26]);
Esempio n. 30
0
			<PUCE colorpuce="orange" level="1" type="square"/>
			<PUCE colorpuce="green" level="2" type="disc"/>
			<PUCE colorpuce="orange" level="3" type="circle"/>
			<PUCE colorpuce="green" level="4" type="square"/>
			<PAGE backcolor="#0033FF" font="Comic sans MS" linkcolor="yellow" textcolor="white"/>
			<TITLES textcolor="white"/>
	</PREFERENCES>';
$filename = "east.xml";
//supprimer fichier existant
if (file_exists($filename)) {
    unlink($filename);
}
// 1 : on ouvre le fichier
$fichier = fopen($filename, "a+");
$html1 = new simple_html_dom();
$html1->load_file('test.html');
//$titre = $html1->find('h2',0)->innertext;
/*
$title = $html1->find('.pagetitre');
$contenu .='
<PAGE_TITRE>
	<TITRE>'.$title->plaintext.'</TITRE><AUTEUR></AUTEUR>
</PAGE_TITRE>';
*/
/****************************************/
//H
for ($i = 1; $i < 7; $i++) {
    $titres = 'h' . $i;
    $titre = $html1->find("" . $titres . "");
    foreach ($titre as $t) {
        echo $t->plaintext;