Example #1
0
function scrap($db, $y, $m, $d, $time)
{
    $postFields = array('strYear' => $y, 'strMonth' => $m, 'strDay' => $d);
    try {
        $url = 'http://g1.taisugar.com.tw/Sugar/Sugar_show_His.asp';
        $sugar = curl($url, $postFields);
        $packtSugarXpath = returnXPathObject($sugar);
        $td = $packtSugarXpath->query('//td');
        // return DOMNodeList
        $td_title = $td->length;
        $td_first = 13;
        $td_second = 14;
        $td_third = 15;
        $td_diff = 7;
        if ($td_title > 13) {
            $pid = filter_var($td->item($td_first)->nodeValue, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
            $kg = filter_var($td->item($td_second)->nodeValue, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
            $bag = filter_var($td->item($td_third)->nodeValue, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
            $result['pid'] = clean($pid);
            $result['pricePerKg'] = $kg;
            $result['pricePerBag'] = $bag;
            $result['time'] = $time;
            while ($result['pid'] != '01021050') {
                $td_first = $td_first + $td_diff;
                $td_second = $td_second + $td_diff;
                $td_third = $td_third + $td_diff;
                $pid = filter_var($td->item($td_first)->nodeValue, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
                $kg = filter_var($td->item($td_second)->nodeValue, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
                $bag = filter_var($td->item($td_third)->nodeValue, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
                $result['pid'] = clean($pid);
                $result['pricePerKg'] = $kg;
                $result['pricePerBag'] = $bag;
                $result['time'] = $time;
            }
            insertDB($db, $result['pid'], $result['pricePerKg'], $result['pricePerBag'], $result['time']);
            print_r($result);
        } else {
            // echo 'no data input';
        }
    } catch (Exception $ex) {
        echo "failed </br>";
    }
}
Example #2
0
    return $results;
}
function returnXPathObject($item)
{
    $xmlPageDom = new DomDocument();
    @$xmlPageDom->loadHTML($item);
    $xmlPageXPath = new DOMXPath($xmlPageDom);
    return $xmlPageXPath;
}
// prepare and bind
$stmt = $conn->prepare("INSERT INTO Products (title, price, payment, shippingOpt, shippingTime, bluetooth, brand, prdCondition, model, weight) VALUES (?,?,?,?,?,?,?,?,?,?)");
$stmt->bind_param("ssssssssss", $titledb, $pricedb, $paymentdb, $shippingOptdb, $shippingTimedb, $bluetoothdb, $branddb, $prdConditiondb, $modeldb, $weightdb);
for ($x = 1; $x < 100; $x++) {
    $productPage = file_get_contents("/home/spontaneous/Desktop/kaymu/mob/page{$x}");
    $products = array();
    $productPageXPath = returnXPathObject($productPage);
    $title = $productPageXPath->query('//span[@class="prd-title"]');
    if ($title->length > 0) {
        $products['title'] = trim($title->item(0)->nodeValue);
    }
    //$price = $packtPageXPath->query('//span[@id="price_box"]');
    $price = $productPageXPath->query('//*[@id="price_box"]');
    if ($price->length > 0) {
        //$packtBook['price'] = trim($overview->item(0)->nodeValue);
        $products['price'] = trim($price->item(0)->nodeValue);
    }
    $shiping = $productPageXPath->query('//div[@class="boxAttribute rtl-right"]');
    if ($shiping->length > 0) {
        for ($i = 0; $i < $shiping->length - 1; $i++) {
            $children = $shiping->item($i)->childNodes;
            $ship = trim($children->item(1)->nodeValue);
Example #3
0
// crawl the page with product list and scrap page link for each product 
<?php 
function returnXPathObject($item)
{
    $xmlPageDom = new DomDocument();
    @$xmlPageDom->loadHTML($item);
    $xmlPageXPath = new DOMXPath($xmlPageDom);
    return $xmlPageXPath;
}
$file = 'links.txt';
for ($i = 1; $i < 98; $i++) {
    $filename = '/home/spontaneous/Desktop/kaymu/mobile/page' . $i;
    $handle = fopen($filename, 'r');
    $webPage = fread($handle, filesize($filename));
    //$webPage = file_get_contents($filename, FILE_USE_INCLUDE_PATH);
    $packtPageXPath = returnXPathObject($webPage);
    fclose($handle);
    $anchor = $packtPageXPath->query('//*[@id="productsCatalog"]/div/div/a');
    if ($anchor->length > 0) {
        for ($j = 0; $j < $anchor->length; $j++) {
            $link = "http://www.kaymu.com.np" . $anchor->item($j)->getAttribute('href') . "\n";
            $handle = fopen('links.txt', 'a');
            fwrite($handle, $link);
            fclose($handle);
        }
    }
}
Example #4
0
$gosupage1 = getPage('http://www.gosugamers.net/dota2/rankings?page=1');
$gosuXPath1 = returnXPathObject($gosupage1);
//GET IMPORTANT DATA FROM XPATH OBJECT
$teamName1 = $gosuXPath1->query('//span[@class="main no-game"]');
//query for team name
$teamElo1 = $gosuXPath1->query('//td[@class="numbers"]');
//query for team Elo
$i = 0;
while ($teamName1->item($i)->nodeValue) {
    //while there are still teams on the page
    if ($teamName1->item($i)->nodeValue) {
        $scrapedData[$teamName1->item($i)->nodeValue] = str_replace(',', '', $teamElo1->item($i)->nodeValue);
        //replace commas from elo value eg) 1,000
    }
    $i++;
}
//SAME THING BUT SECOND PAGE (TEAMS 51-100)
$gosupage2 = getPage('http://www.gosugamers.net/dota2/rankings?page=2');
$gosuXPath2 = returnXPathObject($gosupage2);
$teamName2 = $gosuXPath2->query('//span[@class="main no-game"]');
//query for team name
$teamElo2 = $gosuXPath2->query('//td[@class="numbers"]');
//query for team Elo
$i = 0;
while ($teamName2->item($i)->nodeValue) {
    if ($teamName2->item($i)->nodeValue) {
        $scrapedData[$teamName2->item($i)->nodeValue] = str_replace(',', '', $teamElo2->item($i)->nodeValue);
    }
    $i++;
}
print_r($scrapedData);
    return $result;
}
//CONVERT TO XPATH OBJECT
$scrapedData = array();
function returnXPathObject($item)
{
    $xmlPageDom = new DOMDocument();
    //instantiate
    @$xmlPageDom->loadHTML($item);
    //load
    $xmlPageXPath = new DOMXPath($xmlPageDom);
    //instantiate xpath object
    return $xmlPageXPath;
}
$loungepage = getPage('http://www.dota2lounge.com/');
$loungeXPath = returnXPathObject($loungepage);
echo 'UPCOMING GAMES:<br>';
$match = $loungeXPath->query('//div[@class="match"]');
$i = 0;
$j = 0;
while ($match->item($i)->nodeValue) {
    $teams = $loungeXPath->query('//div[@class="teamtext"]');
    $team1 = substr($teams->item($j)->nodeValue, 0, -3);
    $odds1 = substr($teams->item($j)->nodeValue, -3, -1);
    $team2 = substr($teams->item($j + 1)->nodeValue, 0, -3);
    $odds2 = substr($teams->item($j + 1)->nodeValue, -3, -1);
    echo $team1 . ' (' . $odds1 . '%) ' . $team2 . ' (' . $odds2 . '%)<br>';
    echo 'ACTUAL ODDS: ';
    $actualOdds = calculateOdds($team1, $team2, $conn);
    if ($actualOdds < 0.01) {
        $actualOdds = 'N/A';