if (isset($json->error)) { print_r($json->error); exit; } else { if (isset($json->items) && gettype($json->items) == "array" && count($json->items) > 0) { $i = 0; foreach ($json->items as $data) { $data->link = preg_replace("/_sml/i", "_big", $data->link); echo $i + 1 . "- " . $data->link . "\n"; echo "-- " . $data->image->contextLink . "\n"; $data->date = $argv[1]; $data->category = $category ? $category : null; $data->tag = $argv[2]; $chk_data = array("link" => $data->link, "contextLink" => $data->image->contextLink, "tag" => $data->tag); if (!is_exists($MongoColl, $chk_data)) { echo "-- " . insert_mongodb($MongoColl, $data) . "\n"; } else { echo "-- 已經資料了\n"; } $i++; } if ($json->queries->request[0]->count < 10) { exit; } } else { print_r($json->error); exit; } } sleep(1); }
function get_xml_gss($category = null, $q, $start = 0, $date = "y1", $url = "") { global $MongoColl1, $MongoColl2, $MongoColl3; if ($url == "") { // $url = "http://www.google.com/cse?cx=" . CX_PAID . "&client=google-csbe&output=xml_no_dtd&q=\"". $q . "\"&start=" . $start . "&searchtype=image&as_filetype=jpg&imgsz=large&as_qdr=" . $date; $url = "http://www.google.com/cse?cx=" . CX_PAID . "&client=google-csbe&output=xml_no_dtd&q=" . $q . "&start=" . $start . "&searchtype=image&as_filetype=jpg&imgsz=medium&as_qdr=" . $date; } else { if (preg_match("/^\\/images\\?q=/", $url)) { $url = "http://www.google.com" . $url; } } echo "開始取得 {$category} - {$q} {$date} 的第 {$start} 筆索引開始的資料\nURL:" . $url . "\n"; insert_mongodb($MongoColl3, array("url" => $url, "datetime" => date("YmdHis"))); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_FAILONERROR, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); // allow redirects curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // return into a variable curl_setopt($ch, CURLOPT_TIMEOUT, 3); // times out after 4s // curl_setopt($ch, CURLOPT_GET, 1); // set POST method // curl_setopt($ch, CURLOPT_POSTFIELDS, "postparam1=postvalue"); // add POST fields // submit the xml request and get the response $result = curl_exec($ch); curl_close($ch); // now parse the xml with $xml = simplexml_load_string($result); if ($xml->RES->M == 0) { exit("- 總筆數: 0\n"); } echo "- 總共:" . $xml->RES->M . " 筆\n"; if (isset($xml->RES->R)) { foreach ($xml->RES->R as $item) { $row = array(); $row["link"] = (string) $item->U; $row["contextLink"] = (string) $item->RU; $row["kind"] = "sitesearch#result"; if ($item->attributes()->count() > 0) { foreach ($item->attributes() as $name => $attrib) { $item->{$name} = $attrib; } $row["RK"] = (int) $item->N; $row["mime"] = (string) $item->MIME; } if ($item->IMG->attributes()->count() > 0) { foreach ($item->IMG->attributes() as $name => $attrib) { $item->IMG->{$name} = $attrib; } $row["height"] = (int) $item->IMG->HT; $row["width"] = (int) $item->IMG->WH; $row["byteSize"] = (int) $item->IMG->SZ; } if ($item->TBN->attributes()->count() > 0) { foreach ($item->TBN->attributes() as $name => $attrib) { $item->TBN->{$name} = $attrib; } $row["thumbnailLink"] = (string) $item->TBN->URL; $row["thumbnailHeight"] = (int) $item->TBN->HT; $row["thumbnailWidth"] = (int) $item->TBN->WH; } $row["title"] = html2text($item->T); $row["htmlTitle"] = (string) $item->T; $row["snippet"] = html2text($item->S); $row["htmlSnippet"] = (string) $item->S; $row["timeStamp"] = get_timestamp_date($item->TIMESTAMP); if (isset($item->BYLINEDATE) && $item->BYLINEDATE) { $row["bylineDate"] = (int) $item->BYLINEDATE; $row["date"] = (string) date("Y/m/d", $row["bylineDate"]); } else { $row["bylineDate"] = null; $row["date"] = (string) fetch_date($row["link"]); } $row["category"] = $category; $row["tag"] = $q; echo "-- Rank: " . $row["RK"] . "\n"; echo "--- " . $row["link"] . "\n"; echo "--- " . $row["contextLink"] . "\n"; $chk_data = array("link" => $row["link"], "contextLink" => $row["contextLink"], "tag" => $row["tag"]); if (!is_exists($MongoColl1, $chk_data)) { echo "--- insert1: " . (insert_mongodb($MongoColl1, $row) ? true : false) . "\n"; $row["json"] = json_encode($item); echo "--- insert2: " . (insert_mongodb($MongoColl2, $row) ? true : false) . "\n"; } else { echo "--- 已經有資料了\n"; } } } if (isset($xml->RES->NB->NU)) { $next = get_xml_next($xml->RES->NB->NU); if ($next > $start) { echo "- 下一頁:" . $next . "\n"; echo "-- url: " . $xml->RES->NB->NU . "\n"; sleep(1); get_xml_gss($category, $q, $next, $date, $xml->RES->NB->NU); } } }