public function __construct($strUrl, $strJqClass = null, $strJqSetupFunc = null, $strQcClass = null, $strQcBaseClass = 'QPanel')
 {
     $this->hasDisabledProperty = false;
     $html = file_get_html($strUrl);
     if ($strJqClass === null) {
         $nodes = $html->find('h1.entry-title');
         $strJqClass = preg_replace('/ .*/', '', $nodes[0]->plaintext);
     }
     parent::__construct($strJqClass, $strJqSetupFunc, $strQcClass, $strQcBaseClass);
     $htmlOptions = $html->find('section[id=options] div.api-item');
     foreach ($htmlOptions as $htmlOption) {
         $type = $this->add_option($htmlOption);
         if ($this->is_event_option($type)) {
             $this->add_event($htmlOption, $type);
         }
     }
     $htmlEvents = $html->find('section[id=events] div.api-item');
     foreach ($htmlEvents as $htmlEvent) {
         $this->add_event($htmlEvent);
     }
     $htmlMethods = $html->find('section[id=methods] div.api-item');
     $this->reset_names();
     foreach ($htmlMethods as $htmlMethod) {
         $this->add_method($htmlMethod);
     }
 }
示例#2
0
 public function getDpdOrderTracking()
 {
     $consignment = $this->getRequest()->getParam('consignment', 15502405041348.0);
     $link = 'http://www.dpd.co.uk/tracking/quicktrack.do?search.consignmentNumber=' . $consignment . '&search.searchType=16&search.javascriptValidated=0&appmode=guest';
     try {
         $html = file_get_html($link);
     } catch (Exception $e) {
         var_dump($e);
     }
     $Status = "Vide";
     $target_script = "Vide";
     foreach ($html->find('script') as $html_script) {
         if (strstr($html_script->outertext, "var trackCode =")) {
             $target_script = $html_script->outertext;
         }
     }
     $pattern = "/var trackCode = \\'(.*?)\\';*/";
     preg_match($pattern, $target_script, $matches);
     $data = array();
     $data['Status'] = $html->find('td[class=app-light-row-one app-table-indent] div[id=' . $matches[1] . '_text]', 0)->plaintext;
     $data['Parcel_No'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 0)->plaintext;
     $data['Reference'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 1)->plaintext;
     $data['consignment'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 2)->plaintext;
     $data['Post_Code'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 3)->plaintext;
     $data['Collected_Date'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 4)->plaintext;
     $data['Service'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 5)->plaintext;
     $data['Delivery_Status'] = $html->find('td[class=app-light-row-one app-border-top app-data-row]', 6)->plaintext;
     $trackingtable = $html->find('table[id=parceldetail]', 0)->outertext;
     $data['trackingtable'] = str_replace("SPICERS LTD", "WAREHOUSE", $trackingtable);
     return $data;
 }
示例#3
0
 public function collectData(array $param)
 {
     $page = 0;
     $tags = '';
     if (isset($param['p'])) {
         $page = (int) preg_replace("/[^0-9]/", '', $param['p']);
         $page = $page - 1;
         $page = $page * 50;
     }
     if (isset($param['t'])) {
         $tags = urlencode($param['t']);
     }
     $html = file_get_html("http://mspabooru.com/index.php?page=post&s=list&tags={$tags}&pid={$page}") or $this->returnError('Could not request Mspabooru.', 404);
     foreach ($html->find('div[class=content] span') as $element) {
         $item = new \Item();
         $item->uri = 'http://mspabooru.com/' . $element->find('a', 0)->href;
         $item->postid = (int) preg_replace("/[^0-9]/", '', $element->getAttribute('id'));
         $item->timestamp = time();
         $item->thumbnailUri = $element->find('img', 0)->src;
         $item->tags = $element->find('img', 0)->getAttribute('alt');
         $item->title = 'Mspabooru | ' . $item->postid;
         $item->content = '<a href="' . $item->uri . '"><img src="' . $item->thumbnailUri . '" /></a><br>Tags: ' . $item->tags;
         $this->items[] = $item;
     }
 }
 function ExtractContent($url)
 {
     $html2 = file_get_html($url);
     $text = $html2->find('div.column', 0)->innertext;
     $text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
     return $text;
 }
示例#5
0
 public function collectData(array $param)
 {
     $html = '';
     $link = 'http://www.leboncoin.fr/annonces/offres/' . $param[r] . '/?f=a&th=1&q=' . $param[k];
     $html = file_get_html($link) or $this->returnError('Could not request LeBonCoin.', 404);
     $list = $html->find('.list-lbc', 0);
     if ($list === NULL) {
         return;
     }
     $tags = $list->find('a');
     foreach ($tags as $element) {
         $item = new \Item();
         $item->uri = $element->href;
         $title = $element->getAttribute('title');
         $content_image = $element->find('div.image', 0)->find('img', 0);
         if ($content_image !== NULL) {
             $content = '<img src="' . $element->find('div.image', 0)->find('img', 0)->getAttribute('src') . '" alt="thumbnail">';
         }
         $date = $element->find('div.date', 0)->find('div', 0) . $element->find('div.date', 0)->find('div', 1) . '<br/>';
         $detailsList = $element->find('div.detail', 0);
         for ($i = 1; $i < 4; $i++) {
             $line = $detailsList->find('div', $i);
             $content .= $line;
         }
         $item->title = $title . ' - ' . $detailsList->find('div', 3);
         $item->content = $content . $date;
         $this->items[] = $item;
     }
 }
 public function parseRss()
 {
     $channel = Xml::toArray(Xml::build($this->args[0])->channel);
     $items = $channel['channel']['item'];
     $list = $this->PinterestPin->find('list', array('fields' => array('id', 'guid')));
     $data = array();
     foreach ($items as $item) {
         if (!in_array($item['guid'], $list)) {
             $html = file_get_html($item['guid']);
             $image = $html->find('img.pinImage', 0);
             if (is_object($image)) {
                 $data[] = array('guid' => $item['guid'], 'title' => $item['title'], 'image' => $image->attr['src'], 'description' => strip_tags($item['description']), 'created' => date('Y-m-d H:i:s', strtotime($item['pubDate'])));
             }
         }
     }
     if (!empty($data)) {
         if ($this->PinterestPin->saveAll($data)) {
             $this->out(__d('pinterest', '<success>All records saved sucesfully.</success>'));
             return true;
         } else {
             $this->err(__d('pinterest', 'Cannot save records.'));
             return false;
         }
     }
     $this->out(__d('pinterest', '<warning>No records saved.</warning>'));
 }
示例#7
0
 function CADExtractContent($url)
 {
     $html3 = file_get_html($url);
     preg_match_all("/http:\\/\\/cdn2\\.cad-comic\\.com\\/comics\\/cad-\\S*png/", $html3, $url2);
     $img = implode($url2[0]);
     return $img;
 }
示例#8
0
 function CoinDeskExtractContent($url)
 {
     $html2 = file_get_html($url);
     $text = $html2->find('div.single-content', 0)->innertext;
     $text = strip_tags($text, '<p><a><img>');
     return $text;
 }
示例#9
0
 public function collectData(array $param)
 {
     $html = '';
     if (isset($param['q'])) {
         /* keyword search mode */
         $this->request = $param['q'];
         $html = file_get_html('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnError('No results for this query.', 404);
     } else {
         $this->returnError('You must specify a keyword (?q=...).', 400);
     }
     $emIsRes = $html->find('div[id=ires]', 0);
     if (!is_null($emIsRes)) {
         foreach ($emIsRes->find('li[class=g]') as $element) {
             $item = new Item();
             // Extract direct URL from google href (eg. /url?q=...)
             $t = $element->find('a[href]', 0)->href;
             $item->uri = '' . $t;
             parse_str(parse_url($t, PHP_URL_QUERY), $parameters);
             if (isset($parameters['q'])) {
                 $item->uri = $parameters['q'];
             }
             $item->title = $element->find('h3', 0)->plaintext;
             $item->content = $element->find('span[class=st]', 0)->plaintext;
             $this->items[] = $item;
         }
     }
 }
示例#10
0
 public function collectData(array $param)
 {
     $html = file_get_html('http://lesjoiesducode.fr/') or $this->returnError('Could not request LesJoiesDuCode.', 404);
     foreach ($html->find('div.blog-post') as $element) {
         $item = new Item();
         $temp = $element->find('h1 a', 0);
         $titre = html_entity_decode($temp->innertext);
         $url = $temp->href;
         $temp = $element->find('div.blog-post-content', 0);
         // retrieve .gif instead of static .jpg
         $images = $temp->find('p img');
         foreach ($images as $image) {
             $img_src = str_replace(".jpg", ".gif", $image->src);
             $image->src = $img_src;
         }
         $content = $temp->innertext;
         $auteur = $temp->find('i', 0);
         $pos = strpos($auteur->innertext, "by");
         if ($pos > 0) {
             $auteur = trim(str_replace("*/", "", substr($auteur->innertext, $pos + 2)));
             $item->name = $auteur;
         }
         $item->content .= trim($content);
         $item->uri = $url;
         $item->title = trim($titre);
         $this->items[] = $item;
     }
 }
示例#11
0
 public function getContentSsc($link)
 {
     $data = file_get_html($link);
     $table1 = $data->find('table', 0);
     $content = '<table>';
     $content .= '<tbody>';
     foreach ($table1->find('tr') as $tr) {
         if ($tr->find('th', 0)) {
             $content .= '<tr>';
             $content .= '<th>';
             $content .= $tr->find('th', 0)->plaintext;
             $content .= '</th>';
             $content .= '<td>';
             if ($tr->find('td', 0)->find('a', 0)) {
                 $content .= '<a target="_blank" href="' . $tr->find('td', 0)->find('a', 0)->href . '">' . $tr->find('td', 0)->plaintext . '</a>';
             } else {
                 $content .= $tr->find('td', 0)->plaintext;
             }
             $content .= '</td>';
             $content .= '</tr>';
         }
     }
     $content .= '</tbody>';
     $content .= '</table>';
     return $content;
 }
 public function collectData(array $param)
 {
     $html = '';
     $baseUri = 'http://www.superbwallpapers.com';
     $this->category = $param['c'] ?: '';
     // All default
     $this->resolution = $param['r'] ?: '1920x1200';
     // Wide wallpaper default
     $num = 0;
     $max = $param['m'] ?: 36;
     $lastpage = 1;
     // Get last page number
     $link = $baseUri . '/' . $this->category . '/9999.html';
     $html = file_get_html($link);
     $lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max / 36));
     for ($page = 1; $page <= $lastpage; $page++) {
         $link = $baseUri . '/' . $this->category . '/' . $page . '.html';
         $html = file_get_html($link) or $this->returnError('No results for this query.', 404);
         foreach ($html->find('.wpl .i a') as $element) {
             $thumbnail = $element->find('img', 0);
             $item = new \Item();
             $item->uri = str_replace('200x125', $this->resolution, $thumbnail->src);
             $item->timestamp = time();
             $item->title = $element->title;
             $item->thumbnailUri = $thumbnail->src;
             $item->content = $item->title . '<br><a href="' . $item->uri . '">' . $thumbnail . '</a>';
             $this->items[] = $item;
             $num++;
             if ($num >= $max) {
                 break 2;
             }
         }
     }
 }
 public static function getPlayStoreVersion($packageName)
 {
     $url = "https://play.google.com/store/apps/details?id=" . $packageName;
     $html = file_get_html($url);
     $ret = $html->find('div[itemprop=softwareVersion]');
     return $ret[0]->plaintext;
 }
示例#14
0
 public function collectData(array $param)
 {
     $link = 'http://www.monde-diplomatique.fr';
     $html = file_get_html($link) or $this->returnError('Could not request MondeDiplo. for : ' . $link, 404);
     foreach ($html->find('div.laune') as $element) {
         $item = new Item();
         $item->uri = 'http://www.monde-diplomatique.fr' . $element->find('a', 0)->href;
         $item->title = $element->find('h3', 0)->plaintext;
         $item->content = $element->find('div.dates_auteurs', 0)->plaintext . '<br>' . strstr($element->find('div', 0)->plaintext, $element->find('div.dates_auteurs', 0)->plaintext, true);
         $this->items[] = $item;
     }
     $liste = $html->find('div.listes', 0);
     // First list
     foreach ($liste->find('li') as $e) {
         $item = new Item();
         $item->uri = 'http://www.monde-diplomatique.fr' . $e->find('a', 0)->href;
         $item->title = $e->find('a', 0)->plaintext;
         $item->content = $e->find('div.dates_auteurs', 0)->plaintext;
         $this->items[] = $item;
     }
     foreach ($html->find('div.liste ul li') as $element) {
         if ($element->getAttribute('class') != 'intrapub') {
             $item = new Item();
             $item->uri = 'http://www.monde-diplomatique.fr' . $element->find('a', 0)->href;
             $item->title = $element->find('h3', 0)->plaintext;
             $item->content = $element->find('div.dates_auteurs', 0)->plaintext . ' <br> ' . $element->find('div.intro', 0)->plaintext;
             $this->items[] = $item;
         }
     }
 }
示例#15
0
function bible_gateway_available_versions()
{
    include_once 'simple_html_dom.php';
    $html = file_get_html("http://www.biblegateway.com");
    if (empty($html)) {
        return false;
    }
    $select = $html->find('select[name=qs_version]', -1);
    if (empty($select)) {
        return false;
    }
    $versionnames = array();
    $currentlang = '';
    $options = $select->children;
    foreach ($options as $option) {
        if ($option->class == 'lang') {
            $currentlang = $option->value;
            continue;
        }
        // for some reason the english language section is deliniated by NIV not EN
        if ($currentlang != 'NIV') {
            continue;
        }
        $versionname = str_replace('&nbsp;', '', $option->innertext);
        $versionnames[$option->value] = $versionname;
    }
    return $versionnames;
}
示例#16
0
function updateJSON()
{
    $list = array();
    foreach (range('a', 'z') as $alphabet) {
        array_push($list, $alphabet);
    }
    array_push($list, "numeric");
    $space = '';
    for ($x = 0; $x <= 3; $x++) {
        $space = $space . "&nbsp;";
    }
    $movieMapping = array();
    foreach ($list as $alphabet) {
        $html = file_get_html('http://www.songspk.link/' . $alphabet . '_list.html');
        foreach ($html->find('div[class="catalog-album-holder"]') as $divElement) {
            foreach ($divElement->find('a') as $hrefElement) {
                $innerValue = trim($hrefElement->innertext);
                $hrefValue = $hrefElement->href;
                if (strlen($innerValue) > 1) {
                    $innerValue = preg_replace("/\\t/", "", $innerValue);
                    $movieMapping[trim($innerValue)] = trim($hrefValue);
                }
            }
        }
        //break;
    }
    $json = json_encode($movieMapping, true);
    $file = fopen("data.json", "w");
    file_put_contents("data.json", $json);
    fclose($file);
    //echo $json;
    echo "data.json Created!!";
}
示例#17
0
function accept_terms_get_cookies($terms_url, $button = 'Next', $postfields = array())
{
    $dom = file_get_html($terms_url);
    foreach ($dom->find('input[type=hidden]') as $data) {
        $postfields = array_merge($postfields, array($data->name => $data->value));
    }
    foreach ($dom->find("input[value={$button}]") as $data) {
        $postfields = array_merge($postfields, array($data->name => $data->value));
    }
    $curl = curl_init($terms_url);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_POST, 1);
    curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields);
    curl_setopt($curl, CURLOPT_HEADER, TRUE);
    $terms_response = curl_exec($curl);
    curl_close($curl);
    // get cookie
    // Please imporve it, I am not regex expert, this code changed ASP.NET_SessionId cookie
    // to ASP_NET_SessionId and Path, HttpOnly are missing etc
    // Example Source - Cookie: ASP.NET_SessionId=bz3jprrptbflxgzwes3mtse4; path=/; HttpOnly
    // Stored in array - ASP_NET_SessionId => bz3jprrptbflxgzwes3mtse4
    preg_match_all('/^Set-Cookie:\\s*([^;]*)/mi', $terms_response, $matches);
    $cookies = array();
    foreach ($matches[1] as $item) {
        parse_str($item, $cookie);
        $cookies = array_merge($cookies, $cookie);
    }
    return $cookies;
}
示例#18
0
 public function collectData(array $param)
 {
     $html = '';
     if (isset($param['u'])) {
         $this->request = $param['u'];
         if (strlen(preg_replace("/[^0-9a-f]/", '', $this->request)) == 24) {
             // is input the userid ?
             $html = file_get_html('http://www.whyd.com/u/' . preg_replace("/[^0-9a-f]/", '', $this->request)) or $this->returnError('No results for this query.', 404);
         } else {
             // input may be the username
             $html = file_get_html('http://www.whyd.com/search?q=' . urlencode($this->request)) or $this->returnError('No results for this query.', 404);
             for ($j = 0; $j < 5; $j++) {
                 if (strtolower($html->find('div.user', $j)->find('a', 0)->plaintext) == strtolower($this->request)) {
                     $html = file_get_html('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnError('No results for this query', 404);
                     break;
                 }
             }
         }
         $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext;
     } else {
         $this->returnError('You must specify username', 400);
     }
     for ($i = 0; $i < 10; $i++) {
         $track = $html->find('div.post', $i);
         $item = new \Item();
         $item->name = $track->find('h2', 0)->plaintext;
         $item->title = $track->find('h2', 0)->plaintext;
         $item->content = $track->find('a.thumb', 0) . '<br/>' . $track->find('h2', 0)->plaintext;
         $item->id = 'http://www.whyd.com' . $track->find('a.no-ajaxy', 0)->getAttribute('href');
         $item->uri = 'http://www.whyd.com' . $track->find('a.no-ajaxy', 0)->getAttribute('href');
         $this->items[] = $item;
     }
 }
示例#19
0
 function NiceMatinExtractContent($url)
 {
     $html2 = file_get_html($url);
     $text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext;
     $text .= $html2->find('div[id=content-article]', 0)->innertext;
     return $text;
 }
示例#20
0
function strip_link($inputFile, $inputFileLocation)
{
    $infile_final = $inputFileLocation . $inputFile;
    // get DOM from URL or file
    $html = file_get_html($infile_final);
    $header_reg = $html->find('h2', 0)->plaintext;
    $header_post = $html->find('h2', 1)->plaintext;
    $table = $html->find('table', 0);
    $boxScoreGames = $table->find('a');
    $links = array();
    if ($header_reg == "Regular Season") {
        foreach ($boxScoreGames as $rec) {
            if (preg_match("/games/", $rec)) {
                //strip everything but the link out of the string (pissed i did it this way)
                $rec = preg_replace('/<a href="/', '', $rec) . " ";
                $rec = preg_replace('/">(W|L)(.*)/', '', $rec) . "\n";
                $links[] = $rec;
            }
        }
        $table = $html->find('table', 1);
        $boxScoreGames = $table->find('a');
    }
    if ($header_post == "Postseason") {
        foreach ($boxScoreGames as $rec) {
            if (preg_match("/games/", $rec)) {
                //strip everything but the link out of the string (pissed i did it this way)
                $rec = preg_replace('/<a href="/', '', $rec) . " ";
                $rec = preg_replace('/">(W|L)(.*)/', '', $rec) . "\n";
                $links[] = $rec;
            }
        }
    }
    return $links;
}
 function DeveloppezDotComExtractContent($url)
 {
     $articleHTMLContent = file_get_html($url);
     $text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext);
     $text = utf8_encode($text);
     return trim($text);
 }
示例#22
0
function crawl_site($u)
{
    $html = file_get_html("http://www.hd-area.org/?s=search&q=" . $u);
    $content = $html->find('div[id=content]');
    $rss_head = "<rss version=\"2.0\"><channel>";
    $rss_title = "<title>HD-Area Suche</title>";
    $rss_desc = "<description>HD-Area-RSS-Feed-Generator</description>";
    $rss_link = "<link>http://www.hd-area.org/?s=search&q=" . $u . "/</link>";
    $rss_item = "";
    $rss_tail = "</channel></rss>";
    foreach ($html->find('div[id=content]') as $element) {
        $urls = $element->find('a');
        foreach ($urls as $url) {
            $title = $url->title;
            $link = $url->href;
            if (!empty($title)) {
                $rss_item = $rss_item . "<item>";
                $rss_item = $rss_item . "<title>" . $title . "</title>";
                $rss_item = $rss_item . "<link>" . $link . "</link>";
                $rss_item = $rss_item . "</item>";
            }
        }
    }
    echo $rss_head;
    echo $rss_title;
    echo $rss_desc;
    echo $rss_link;
    echo $rss_item;
    echo $rss_tail;
    echo "\n";
}
示例#23
0
 public function collectData(array $param)
 {
     $page = 1;
     $tags = '';
     if (isset($param['p'])) {
         $page = (int) preg_replace("/[^0-9]/", '', $param['p']);
     }
     if (isset($param['t'])) {
         $tags = urlencode($param['t']);
     }
     $html = file_get_html("http://sakuga.yshi.org/post?page={$page}&tags={$tags}") or $this->returnError('Could not request Sakugabooru.', 404);
     $input_json = explode('Post.register(', $html);
     foreach ($input_json as $element) {
         $data[] = preg_replace('/}\\)(.*)/', '}', $element);
     }
     unset($data[0]);
     foreach ($data as $datai) {
         $json = json_decode($datai, TRUE);
         $item = new \Item();
         $item->uri = 'http://sakuga.yshi.org/post/show/' . $json['id'];
         $item->postid = $json['id'];
         $item->timestamp = $json['created_at'];
         $item->imageUri = $json['file_url'];
         $item->thumbnailUri = $json['preview_url'];
         $item->title = 'Sakugabooru | ' . $json['id'];
         $item->content = '<a href="' . $item->imageUri . '"><img src="' . $item->thumbnailUri . '" /></a><br>Tags: ' . $json['tags'];
         $this->items[] = $item;
     }
 }
示例#24
0
function getBGVerseText($book, $chapter, $verse, $url)
{
    $errorMessage = "<div class='error'>This verse doesn't exist.</div><p>Click <a href='" . $url . "' target='_blank'>here</a> to check. If you're still sure it's a verse you can <a href='javascript:showBGVerse(0);'>type it yourself.</a></p>";
    $html = file_get_html($url) or die($errorMessage);
    if ($verse == 1) {
        $data = $html->find('span[class=chapternum]');
    } else {
        $data = $html->find('sup[class=versenum]');
    }
    if (count($data) <= 0) {
        die($errorMessage);
    }
    $verse_text = $data[0]->parent();
    $verse_text = str_replace("<br />", "\n", $verse_text);
    $verse_text = str_replace("&nbsp;", " ", $verse_text);
    $verse_text = ltrim(html_entity_decode(strip_tags($verse_text)));
    //Get rid of Footnotes
    $length = strpos($verse_text, "Footnotes");
    if ($length != 0) {
        $verse_text = substr($verse_text, 0, $length);
    }
    $verse_text = substr($verse_text, strpos($verse_text, $book));
    $verse_text = substr($verse_text, strpos($verse_text, $chapter));
    $verse_text = substr($verse_text, strpos($verse_text, $verse));
    //Get rid of the brackets that link to footnotes
    $verse_text = trim(preg_replace('/\\[[^)]*\\]/', '', $verse_text));
    return $verse_text;
}
function getHTML($url, &$arr)
{
    $m_url = $url;
    $m_html = file_get_html($m_url);
    $property = "";
    $no_bed = get_no_bed($m_html);
    $no_bath = get_no_bath($m_html);
    $no_car = get_no_car($m_html);
    $address = get_address($m_html);
    $agency = get_agency($m_html);
    $agency_localDir = get_agency_localDir($m_html);
    $first_agent_name = get_first_agent_name($m_html);
    $first_agent_contact = get_first_agent_contact($m_html);
    $listing_type = "";
    $price = get_price($m_html);
    $inspect_time = get_inspect_time($m_html);
    // $inspect_date = get_inspect_date ($m_html);
    // $inspect_hour = get_inspect_hour ($m_html);
    $auction_time = get_auction_time($m_html);
    $auction_date = get_auction_date($m_html);
    $auction_day = get_day($auction_date);
    $auction_hour = get_auction_hour($m_html);
    $auction_inspect_hour = get_auction_inspect_hour($m_html);
    $auction_string = get_auction_string($auction_date, $auction_hour, $auction_inspect_hour);
    $justlisted_string = get_justlisted_string($inspect_time);
    $arr = array('url' => $m_url, 'no_bed' => $no_bed, 'no_bath' => $no_bath, 'no_car' => $no_car, 'address' => $address, 'agency' => $agency, 'agency_localDir' => $agency_localDir, 'first_agent_name' => $first_agent_name, 'first_agent_contact' => $first_agent_contact, 'listing_type' => $listing_type, 'price' => $price, 'inspect_time' => $inspect_time, 'auction_time' => $auction_time, 'auction_date' => $auction_date, 'auction_day' => $auction_day, 'auction_hour' => $auction_hour, 'auction_inspect_hour' => $auction_inspect_hour, 'auction_string' => $auction_string, 'justlisted_string' => $justlisted_string);
    //print_r ($arr);
}
示例#26
0
 public function collectData(array $param)
 {
     $html = '';
     if (isset($param['q'])) {
         /* keyword search mode */
         $this->request = str_replace(" ", "-", trim($param['q']));
         $html = file_get_html('http://www.cpasbien.pw/recherche/' . urlencode($this->request) . '.html') or $this->returnError('No results for this query.', 404);
     } else {
         $this->returnError('You must specify a keyword (?q=...).', 400);
     }
     foreach ($html->find('#gauche', 0)->find('div') as $episode) {
         if ($episode->getAttribute('class') == 'ligne0' || $episode->getAttribute('class') == 'ligne1') {
             $htmlepisode = content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
             $item = new \Item();
             $item->name = $episode->find('a', 0)->text();
             $item->title = $episode->find('a', 0)->text();
             $item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href'));
             $textefiche = $htmlepisode->find('#textefiche', 0)->find('p', 1);
             if (isset($textefiche)) {
                 $item->content = $textefiche->text();
             } else {
                 $item->content = $htmlepisode->find('#textefiche', 0)->find('p', 0)->text();
             }
             $item->id = $episode->find('a', 0)->getAttribute('href');
             $item->uri = $this->getURI() . $htmlepisode->find('#telecharger', 0)->getAttribute('href');
             $item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img', 0)->getAttribute('src');
             $this->items[] = $item;
         }
     }
 }
示例#27
0
 public function dom_request($semi_asin)
 {
     require_once "simple_html_dom.php";
     $checkURL = "http://astore.amazon.com/k-astore-20/detail/" . $semi_asin;
     $checkImgURL = "http://astore.amazon.com/k-astore-20/images/" . $semi_asin;
     $html = file_get_html($checkURL);
     $htmlImage = file_get_html($checkImgURL);
     // -------- Get Big Image
     $chkBigImage = 1;
     // if have multi big image;
     $get_bigimage = $htmlImage->getElementById("content");
     $chkData = explode("text/javascript", $get_bigimage);
     $chkData = explode("\"", $chkData[1]);
     $chkArr = 0;
     for ($i = 2; $i < count($chkData); $i = $i + 2) {
         $getBigImage[$chkArr] = $chkData[$i];
         $chkArr++;
     }
     if ($getBigImage[0] == "") {
         // -------- Get Big Image
         $chkBigImage = 0;
         // if have one big image;
         $get_imageViewerLink = $htmlImage->getElementById("bigImage");
         $chkData = explode("<br>", $get_imageViewerLink);
         $chkData = explode("src", $chkData[0]);
         $chkData = explode("\"", $chkData[1]);
         $getBigImage[0] = $chkData[1];
     }
     $getBigImage = $getBigImage[0];
     return $getBigImage;
 }
示例#28
0
function getFortuneInfo($keyword)
{
    if ($keyword == "") {
        return "请发送算命+人名,例如“算命张三”";
    }
    if (strlen($keyword) < 6 || strlen($keyword) > 12) {
        return "人名长度为2到4个汉字";
    }
    try {
        include 'simple_html_dom.php';
        $url = "http://m.1518.com/xingming_view.php?word=" . urlencode(mb_convert_encoding($keyword, 'gb2312', 'utf-8')) . "&submit1=%C8%B7%B6%A8&FrontType=1";
        $html_fotune = file_get_html($url);
        if (!isset($html_fotune)) {
            $html_fotune->clear();
            return "程序检索出错!\n如果经常这样,请联系方倍工作室。";
        }
        $infomation = "";
        foreach ($html_fotune->find('div[id="detail"] dl') as $item) {
            $curText = $item->plaintext;
            $curText = preg_replace('/\\s{2,}/i', ' ', $curText);
            $curText = preg_replace('/\\t{2,}/i', ' ', $curText);
            $curText = preg_replace('/:\\s/i', ':', $curText);
            $infomation .= trim($curText) . "\n";
            if (strlen($infomation) > 2000) {
                break;
            }
        }
        $html_fotune->clear();
        $infomation = str_replace("\r\n", "\n", $infomation);
        $infomation = str_replace("天格", "\n天格", $infomation);
        $infomation = str_replace(" ", "", $infomation);
        return trim($infomation);
    } catch (Exception $e) {
    }
}
function scraping_IMDB($url)
{
    // create HTML DOM
    $html = file_get_html($url);
    // get title
    $ret['Title'] = $html->find('title', 0)->innertext;
    // get rating
    $ret['Rating'] = $html->find('div[class="titlePageSprite star-box-giga-star"]', 0)->innertext;
    // get overview
    foreach ($html->find('div[class="info"]') as $div) {
        // skip user comments
        if ($div->find('h5', 0)->innertext == 'User Comments:') {
            return $ret;
        }
        $key = '';
        $val = '';
        foreach ($div->find('*') as $node) {
            if ($node->tag == 'h5') {
                $key = $node->plaintext;
            }
            if ($node->tag == 'a' && $node->plaintext != 'more') {
                $val .= trim(str_replace("\n", '', $node->plaintext));
            }
            if ($node->tag == 'text') {
                $val .= trim(str_replace("\n", '', $node->plaintext));
            }
        }
        $ret[$key] = $val;
    }
    // clean up memory
    $html->clear();
    unset($html);
    return $ret;
}
示例#30
0
function getStockAnalysis($stockcode)
{
    if (!preg_match("/^\\d{6}\$/", $stockcode)) {
        return "发送分析加上6位数字代码,例如“分析000063”";
    }
    $resultArray = array();
    include_once 'simple_html_dom.php';
    try {
        $url = "http://m.ghzq.cn/weixin/index.aspx?code=" . $stockcode;
        $html_analysis = file_get_html($url);
        if (!isset($html_analysis)) {
            $html_analysis->clear();
        } else {
            $stock = $html_analysis->find('div[class="row first"] div', 0)->plaintext;
            $resultArray[] = array("Title" => trim($stock), "Description" => "", "PicUrl" => "", "Url" => "");
            //基本面
            $fundamentals = $html_analysis->find('div[class="font"]', 0);
            $resultArray[] = array("Title" => str_replace("%", "%%", "【基本面】\n" . $fundamentals->plaintext), "Description" => "", "PicUrl" => "", "Url" => "");
            //趋势面
            $technical = $html_analysis->find('div[class="font"]', 1);
            $resultArray[] = array("Title" => str_replace("%", "%%", "【技术面】\n" . $technical->plaintext), "Description" => "", "PicUrl" => "", "Url" => "");
            //评级面
            $technical = $html_analysis->find('div[class="font"]', 2);
            $resultArray[] = array("Title" => str_replace("%", "%%", "【机构认同】\n" . $technical->plaintext), "Description" => "", "PicUrl" => "", "Url" => "");
            $html_analysis->clear();
        }
    } catch (Exception $e) {
    }
    return $resultArray;
}