Exemplo n.º 1
0
 public function keyword_url_cron_job_rerun()
 {
     if (intval(gmdate('G')) < 8) {
         echo 'mistime';
         return;
     }
     $this->load->model('sites_model');
     $this->load->model('ranking_model');
     $cron_jobs = $this->ranking_model->getStuckChildCronJobsForLastDays(self::CRON_JOBS_TO_RERUN);
     if (!empty($cron_jobs)) {
         // get parent jobs
         $cron_jobs_parents_ids = array_values(array_filter(array_unique(array_map(function ($cron_job) {
             return $cron_job->parent_id;
         }, $cron_jobs))));
         $cron_jobs_parents = array();
         if (!empty($cron_jobs_parents_ids)) {
             foreach ($this->ranking_model->get_('keyword_cron_jobs', array('id' => $cron_jobs_parents_ids)) as $cron_jobs_parent) {
                 $cron_jobs_parents[$cron_jobs_parent->id] = $cron_jobs_parent;
             }
         }
         // get corresponding sites
         $sites_ids = array_values(array_unique(array_filter(array_map(function ($cron_job) {
             return $cron_job->site_id;
         }, $cron_jobs_parents)), SORT_NUMERIC));
         $sites = array();
         if (!empty($sites_ids)) {
             foreach ($this->ranking_model->get_('sites', array('id' => $sites_ids)) as $site) {
                 $sites[$site->id] = $site;
             }
         }
         // get search_terms_groups
         $cron_jobs_group_ids = array_values(array_unique(array_filter(array_map(function ($cron_job) {
             return $cron_job->group_id;
         }, $cron_jobs_parents)), SORT_NUMERIC));
         $search_terms_groups = array();
         if (!empty($cron_jobs_group_ids)) {
             foreach ($this->ranking_model->get_('search_terms_groups', array('id' => $cron_jobs_group_ids)) as $search_terms_group) {
                 $search_terms_groups[$search_terms_group->id] = $search_terms_group;
             }
         }
         // get product list items
         $product_list_item_ids = array_filter(array_map(function ($cron_job) {
             return $cron_job->product_list_item_id;
         }, $cron_jobs));
         $product_list_items = array();
         if (!empty($product_list_item_ids)) {
             foreach ($this->ranking_model->get_('product_list_items', array('id' => $product_list_item_ids)) as $product_list_item) {
                 $product_list_items[$product_list_item->id] = $product_list_item;
             }
         }
         foreach ($cron_jobs as $cron_job) {
             // change status of current job to IN PROCESS
             if (!empty($cron_job->id) && $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_INPROCESS)) {
                 $cron_job->status = $this->ranking_model->CRON_STATUS_INPROCESS;
             }
             if (!$cron_job->url) {
                 continue;
             }
             $keyword_cron_jobs_log_data = array('job_id' => null, 'message' => null, 'child_job_id' => $cron_job->request_id, 'job_phase' => $this->ranking_model->JOB_PHASE_RESPONSE, 'job_phase_status' => 'true');
             if (empty($cron_job->product_list_item_id)) {
                 // cron job per search term
                 $cron_job_parent = isset($cron_jobs_parents[$cron_job->parent_id]) ? $cron_jobs_parents[$cron_job->parent_id] : null;
                 if (!$cron_job_parent) {
                     continue;
                 }
                 $site = isset($sites[$cron_job_parent->site_id]) ? $sites[$cron_job_parent->site_id] : null;
                 $search_terms_group = isset($search_terms_groups[$cron_job_parent->group_id]) ? $search_terms_groups[$cron_job_parent->group_id] : null;
                 if (!($site && $search_terms_group)) {
                     continue;
                 }
                 $site_name = strtolower($site->name);
                 $location = '';
                 // get crawler name
                 $crawler_name = Ranking_model::getCrawlerName($site_name, $location);
                 $group_name = $search_terms_group->name;
                 $keyword_cron_jobs_log_message = array('id' => $cron_job->request_id, 'site' => $crawler_name, 'keyword' => $cron_job->keyword, 'group' => $group_name, 'status' => 'RESPONSE GET');
                 $keyword_cron_jobs_log_data['job_id'] = $cron_job_parent->id;
                 $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
             } else {
                 // cron job per product
                 $cron_job_parent = null;
                 $product_list_item = isset($product_list_items[$cron_job->product_list_item_id]) ? $product_list_items[$cron_job->product_list_item_id] : null;
                 $keyword_cron_jobs_log_message = array('id' => $cron_job->request_id, 'product_url' => empty($product_list_item->url) ? null : $product_list_item->url, 'status' => 'RESPONSE GET');
                 $keyword_cron_jobs_log_data['job_id'] = $cron_job->id;
                 $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
             }
             $ch = curl_init($cron_job->url);
             curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
             $ch_result = curl_exec($ch);
             $curl_log_data = array('type' => 'keyword_cron_job', 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), 'http_code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'curl_info' => json_encode(curl_getinfo($ch)), 'result' => $ch_result);
             $curl_log_id = $this->ranking_model->create_($this->ranking_model->tables['curl_logs'], $curl_log_data);
             $request_time_seconds = round(curl_getinfo($ch, CURLINFO_TOTAL_TIME));
             $response_request_time = floor($request_time_seconds / 60) . " min " . $request_time_seconds % 60 . " sec";
             $keyword_cron_jobs_log_message['request time'] = $response_request_time;
             $keyword_cron_jobs_log_data['curl_log_id'] = $curl_log_id;
             $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
             if ($http_code === 302) {
                 $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                 $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                 $redirect_url = curl_getinfo($ch, CURLINFO_REDIRECT_URL);
                 $ch2 = curl_init($redirect_url);
                 curl_setopt($ch2, CURLOPT_RETURNTRANSFER, true);
                 $results = curl_exec($ch2);
                 $curl_log_data = array('type' => 'keyword_cron_job', 'url' => curl_getinfo($ch2, CURLINFO_EFFECTIVE_URL), 'http_code' => curl_getinfo($ch2, CURLINFO_HTTP_CODE), 'curl_info' => json_encode(curl_getinfo($ch2)), 'result' => $results);
                 $curl2_log_id = $this->ranking_model->create_('curl_logs', $curl_log_data);
                 $results_code = curl_getinfo($ch2, CURLINFO_HTTP_CODE);
                 $request_time_seconds = round(curl_getinfo($ch2, CURLINFO_TOTAL_TIME));
                 $result_request_time = floor($request_time_seconds / 60) . " min " . $request_time_seconds % 60 . " sec";
                 $keyword_cron_jobs_log_message['request time'] = $result_request_time;
                 $keyword_cron_jobs_log_data['job_phase'] = $this->ranking_model->JOB_PHASE_IMPORT_BEGIN;
                 $keyword_cron_jobs_log_data['curl_log_id'] = $curl2_log_id;
                 if ($results_code === 200) {
                     if ($results) {
                         // got results, set CRON_STATUS_READY, make import
                         $keyword_cron_jobs_log_message['status'] = 'IMPORT BEGIN';
                         $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                         $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                         $import_time_begin = microtime(true);
                         if (empty($cron_job->product_list_item_id)) {
                             // import data per search term
                             $imported_count = $this->import_ranking_data($results, $cron_job, $search_terms_group->id, $site->id);
                         } else {
                             // import data per product
                             $imported_count = $this->import_ranking_data($results, $cron_job, null, null, $product_list_item);
                         }
                         $import_time = number_format(microtime(true) - $import_time_begin, 2);
                         $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_READY);
                         if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_READY) {
                             if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_READY)) {
                                 $cron_job_parent->status = $this->ranking_model->CRON_STATUS_READY;
                             }
                         }
                         $keyword_cron_jobs_log_message['status'] = 'STATUS READY';
                         unset($keyword_cron_jobs_log_message['request time']);
                         $keyword_cron_jobs_log_message['import time'] = "{$import_time} sec";
                         $keyword_cron_jobs_log_message['imported products'] = $imported_count;
                         $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                         $keyword_cron_jobs_log_data['job_phase'] = $this->ranking_model->JOB_PHASE_IMPORT_END;
                         $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                     } else {
                         // result is empty, set CRON_STATUS_WARNING
                         $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_WARNING);
                         if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_WARNING) {
                             if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_WARNING)) {
                                 $cron_job_parent->status = $this->ranking_model->CRON_STATUS_WARNING;
                             }
                         }
                         $keyword_cron_jobs_log_message['status'] = 'STATUS WARNING - Response is empty';
                         $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                         $keyword_cron_jobs_log_data['job_phase_status'] = 'false';
                         $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                     }
                 } else {
                     // result wasn't received, set CRON_STATUS_ERROR
                     $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_ERROR);
                     if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_ERROR) {
                         if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_ERROR)) {
                             $cron_job_parent->status = $this->ranking_model->CRON_STATUS_ERROR;
                         }
                     }
                     $keyword_cron_jobs_log_message['status'] = "STATUS ERROR - Result wasn't received ({$results_code})";
                     $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                     $keyword_cron_jobs_log_data['job_phase_status'] = 'false';
                     $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                 }
             } elseif ($http_code === 404 || preg_match('/[5]\\d\\d/', $http_code) || empty($ch_result)) {
                 // not found, set CRON_STATUS_ERROR
                 $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_ERROR);
                 if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_ERROR) {
                     if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_ERROR)) {
                         $cron_job_parent->status = $this->ranking_model->CRON_STATUS_ERROR;
                     }
                 }
                 if ($http_code === 404) {
                     $keyword_cron_jobs_log_message['status'] = 'STATUS ERROR - The resource could not be found.';
                 } elseif (!empty($ch_result)) {
                     $keyword_cron_jobs_log_message['status'] = "STATUS ERROR - REST API Server Error ({$http_code})";
                 } else {
                     $keyword_cron_jobs_log_message['status'] = "STATUS ERROR - Empty response from keyword server";
                 }
                 $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                 $keyword_cron_jobs_log_data['job_phase_status'] = 'false';
                 $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
             } else {
                 $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_WAITING);
             }
         }
     }
     echo 'ok';
 }