function getIntros($dbr, $lastPageFile, $lastPage, $since, $batchSize, $printIntro = false, $verbose = false) { $id = $lastPage; $options = array("page_id > {$lastPage}", "page_namespace = 0", "page_is_redirect = 0"); if ($since) { $options[] = "page_touched > {$since}"; } $res = $dbr->select("page", "page_id, page_title, page_counter", $options, __FILE__, array("ORDER BY" => "page_id", "LIMIT" => $batchSize)); $count = 0; foreach ($res as $row) { $id = $row->page_id; $postData .= getDocData($row, $dbr, $printIntro); $count++; } if ($count == 0) { return -1; } if ($verbose) { decho("result", $contents, false); } @file_put_contents($lastPageFile, $id); $lastPage = $id; return $count; }
function postDocuments($dbr, $ch, $lastPageFile, $lastPage, $since, $batchSize, $noImages, $verbose = false) { $id = $lastPage; $options = array("page_id > {$lastPage}", "page_namespace = 0", "page_is_redirect = 0"); if ($since) { $options[] = "page_touched > {$since}"; } $res = $dbr->select("page", "page_id, page_title, page_counter", $options, __FILE__, array("ORDER BY" => "page_id", "LIMIT" => $batchSize)); $postData = '<add>'; $count = 0; foreach ($res as $row) { $id = $row->page_id; $postData .= getDocData($row, $noImages); $count++; } if ($count == 0) { return -1; } $postData .= '</add>'; if ($verbose) { decho("will post", $postData, false); } curl_setopt($ch, CURLOPT_POSTFIELDS, $postData); $contents = curl_exec($ch); if ($verbose) { decho("result", $contents, false); } if (curl_errno($ch)) { echo "curl error {$url}: " . curl_error($ch) . "\n"; } @file_put_contents($lastPageFile, $id); $lastPage = $id; return $count; }