Example #1
0
    if (isset($json->error)) {
        print_r($json->error);
        exit;
    } else {
        if (isset($json->items) && gettype($json->items) == "array" && count($json->items) > 0) {
            $i = 0;
            foreach ($json->items as $data) {
                $data->link = preg_replace("/_sml/i", "_big", $data->link);
                echo $i + 1 . "- " . $data->link . "\n";
                echo "-- " . $data->image->contextLink . "\n";
                $data->date = $argv[1];
                $data->category = $category ? $category : null;
                $data->tag = $argv[2];
                $chk_data = array("link" => $data->link, "contextLink" => $data->image->contextLink, "tag" => $data->tag);
                if (!is_exists($MongoColl, $chk_data)) {
                    echo "-- " . insert_mongodb($MongoColl, $data) . "\n";
                } else {
                    echo "-- 已經資料了\n";
                }
                $i++;
            }
            if ($json->queries->request[0]->count < 10) {
                exit;
            }
        } else {
            print_r($json->error);
            exit;
        }
    }
    sleep(1);
}
Example #2
0
function get_xml_gss($category = null, $q, $start = 0, $date = "y1", $url = "")
{
    global $MongoColl1, $MongoColl2, $MongoColl3;
    if ($url == "") {
        // $url = "http://www.google.com/cse?cx=" . CX_PAID . "&client=google-csbe&output=xml_no_dtd&q=\"". $q . "\"&start=" . $start . "&searchtype=image&as_filetype=jpg&imgsz=large&as_qdr=" . $date;
        $url = "http://www.google.com/cse?cx=" . CX_PAID . "&client=google-csbe&output=xml_no_dtd&q=" . $q . "&start=" . $start . "&searchtype=image&as_filetype=jpg&imgsz=medium&as_qdr=" . $date;
    } else {
        if (preg_match("/^\\/images\\?q=/", $url)) {
            $url = "http://www.google.com" . $url;
        }
    }
    echo "開始取得 {$category} - {$q} {$date} 的第 {$start} 筆索引開始的資料\nURL:" . $url . "\n";
    insert_mongodb($MongoColl3, array("url" => $url, "datetime" => date("YmdHis")));
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_FAILONERROR, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    // allow redirects
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    // return into a variable
    curl_setopt($ch, CURLOPT_TIMEOUT, 3);
    // times out after 4s
    // curl_setopt($ch, CURLOPT_GET, 1); // set POST method
    // curl_setopt($ch, CURLOPT_POSTFIELDS, "postparam1=postvalue"); // add POST fields
    // submit the xml request and get the response
    $result = curl_exec($ch);
    curl_close($ch);
    // now parse the xml with
    $xml = simplexml_load_string($result);
    if ($xml->RES->M == 0) {
        exit("- 總筆數: 0\n");
    }
    echo "- 總共:" . $xml->RES->M . " 筆\n";
    if (isset($xml->RES->R)) {
        foreach ($xml->RES->R as $item) {
            $row = array();
            $row["link"] = (string) $item->U;
            $row["contextLink"] = (string) $item->RU;
            $row["kind"] = "sitesearch#result";
            if ($item->attributes()->count() > 0) {
                foreach ($item->attributes() as $name => $attrib) {
                    $item->{$name} = $attrib;
                }
                $row["RK"] = (int) $item->N;
                $row["mime"] = (string) $item->MIME;
            }
            if ($item->IMG->attributes()->count() > 0) {
                foreach ($item->IMG->attributes() as $name => $attrib) {
                    $item->IMG->{$name} = $attrib;
                }
                $row["height"] = (int) $item->IMG->HT;
                $row["width"] = (int) $item->IMG->WH;
                $row["byteSize"] = (int) $item->IMG->SZ;
            }
            if ($item->TBN->attributes()->count() > 0) {
                foreach ($item->TBN->attributes() as $name => $attrib) {
                    $item->TBN->{$name} = $attrib;
                }
                $row["thumbnailLink"] = (string) $item->TBN->URL;
                $row["thumbnailHeight"] = (int) $item->TBN->HT;
                $row["thumbnailWidth"] = (int) $item->TBN->WH;
            }
            $row["title"] = html2text($item->T);
            $row["htmlTitle"] = (string) $item->T;
            $row["snippet"] = html2text($item->S);
            $row["htmlSnippet"] = (string) $item->S;
            $row["timeStamp"] = get_timestamp_date($item->TIMESTAMP);
            if (isset($item->BYLINEDATE) && $item->BYLINEDATE) {
                $row["bylineDate"] = (int) $item->BYLINEDATE;
                $row["date"] = (string) date("Y/m/d", $row["bylineDate"]);
            } else {
                $row["bylineDate"] = null;
                $row["date"] = (string) fetch_date($row["link"]);
            }
            $row["category"] = $category;
            $row["tag"] = $q;
            echo "-- Rank: " . $row["RK"] . "\n";
            echo "--- " . $row["link"] . "\n";
            echo "--- " . $row["contextLink"] . "\n";
            $chk_data = array("link" => $row["link"], "contextLink" => $row["contextLink"], "tag" => $row["tag"]);
            if (!is_exists($MongoColl1, $chk_data)) {
                echo "--- insert1: " . (insert_mongodb($MongoColl1, $row) ? true : false) . "\n";
                $row["json"] = json_encode($item);
                echo "--- insert2: " . (insert_mongodb($MongoColl2, $row) ? true : false) . "\n";
            } else {
                echo "--- 已經有資料了\n";
            }
        }
    }
    if (isset($xml->RES->NB->NU)) {
        $next = get_xml_next($xml->RES->NB->NU);
        if ($next > $start) {
            echo "- 下一頁:" . $next . "\n";
            echo "-- url: " . $xml->RES->NB->NU . "\n";
            sleep(1);
            get_xml_gss($category, $q, $next, $date, $xml->RES->NB->NU);
        }
    }
}