コード例 #1
0
ファイル: crawl.php プロジェクト: janladaking/CodeIgniter
 /**
  * Function for  curl requests to REST API
  * @param string $site
  * @param string $keyword
  * @param int $parent_id
  * @param string $location (one of NoCal/Seattle/SoCal)
  * @param string $server
  * @param int $quantity
  * @author Ruslan Ushakov
  */
 private function get_new_keywords($site, $keyword, $cron_job, $location = '', $server = '', $quantity = 1000, $group_name = null, $zipCode = null, $user_agent = null)
 {
     $result = false;
     if ($cron_job->status != $this->ranking_model->CRON_STATUS_INPROCESS) {
         if ($this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_INPROCESS)) {
             $cron_job->status = $this->ranking_model->CRON_STATUS_INPROCESS;
         }
     }
     if (strtolower($site) != 'walmart' && strtolower($site) != 'pgestore') {
         $url = 'http://' . $server . ':6543/ranking_data/';
     } else {
         $url = 'http://' . $server . ':6543/ranking_data_with_best_sellers/';
     }
     $this->load->model('ranking_model');
     if (!$group_name) {
         $group_id = $cron_job->group_id ?: 0;
         if ($group_id == -1) {
             $group_name = 'Default';
         } else {
             $group = $this->ranking_model->getKeywordGroupById($group_id);
             $group_name = $group->name ?: '';
         }
     }
     $post_array = array('site' => $site, 'searchterms_str' => $keyword, 'quantity' => $quantity, 'group_name' => $group_name);
     if (!empty($user_agent) && in_array($user_agent, array('desktop', 'iphone_ipad', 'android'))) {
         $post_array['user_agent'] = $user_agent;
     }
     if ($site == 'amazonfresh') {
         if ($location && in_array($location, array('nocal', 'seattle'))) {
             $location = $location === 'nocal' ? 'northern_cali' : $location;
         } else {
             $location = 'southern_cali';
         }
         $post_array['location'] = $location;
     }
     if (!empty($zipCode)) {
         $post_array['zip_code'] = $zipCode;
     }
     $ch = curl_init();
     curl_setopt($ch, CURLOPT_URL, $url);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
     curl_setopt($ch, CURLOPT_POST, true);
     curl_setopt($ch, CURLOPT_POSTFIELDS, $post_array);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
     curl_setopt($ch, CURLOPT_MAXREDIRS, 1);
     $curl_result = curl_exec($ch);
     $curl_log_data = array('type' => 'keyword_cron_job', 'method' => 'POST', 'url' => $url, 'params' => json_encode($post_array), 'http_code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'curl_info' => json_encode(curl_getinfo($ch)), 'result' => $curl_result);
     $curl_log_id = $this->ranking_model->create_('curl_logs', $curl_log_data);
     $keyword_cron_jobs_log_data = array('job_id' => $cron_job->id, 'message_time' => gmdate('Y-m-d H:i:s'), 'job_phase' => $this->ranking_model->JOB_PHASE_REQUEST, 'curl_log_id' => $curl_log_id);
     $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
     if (preg_match('/[23]\\d\\d/', $http_code)) {
         // 2xx 3xx codes (usually 202 or 302)
         // create job
         $eff_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
         $eff_url_parts = explode('/', $eff_url);
         if ($site == 'walmart') {
             $request_id = $eff_url_parts[6];
         } else {
             $request_id = $eff_url_parts[sizeof($eff_url_parts) - 2];
         }
         $job_url = curl_getinfo($ch, CURLINFO_REDIRECT_URL) ?: $eff_url;
         // create 'child' cron job
         $keyword_cron_job_data = array('url' => $job_url, 'parent_id' => $cron_job->id, 'request_id' => $request_id, 'keyword' => $keyword, 'status' => $this->ranking_model->CRON_STATUS_WAITING);
         $this->ranking_model->create_('keyword_cron_jobs', $keyword_cron_job_data);
         $keyword_cron_jobs_log_data = array_merge($keyword_cron_jobs_log_data, array('message' => json_encode(array('id' => $request_id, 'site' => $site, 'keyword' => $keyword, 'group' => $group_name, 'status' => "STATUS REQUESTED ({$http_code})")), 'child_job_id' => $request_id, 'job_phase_status' => 'true'));
         $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
         $result = true;
     } else {
         $keyword_cron_jobs_log_data = array_merge($keyword_cron_jobs_log_data, array('message' => json_encode(array('site' => $site, 'keyword' => $keyword, 'group' => $group_name, 'status' => "STATUS ERROR - Wrong response from keyword server ({$http_code})")), 'job_phase_status' => 'false'));
         $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
     }
     curl_close($ch);
     return $result;
 }