Esempio n. 1
0
 /**
  * Add new keywords
  * @return json
  * @author Ruslan Ushakov
  */
 function keyword_url_cron_job()
 {
     $this->load->model('sites_model');
     $this->load->model('settings_model');
     $this->load->model('ranking_model');
     $parent_id = (int) $this->input->post('job');
     $jobs_in_process = $this->ranking_model->getInProcessUrlCronJobsCount();
     if ($jobs_in_process <= self::CRON_JOBS_TO_RUN) {
         $cron_jobs = $this->ranking_model->getWaitingChildCronJobs($parent_id, self::CRON_JOBS_TO_RUN);
         //change status of selected jobs to IN PROCESS
         $cron_jobs_ids = array_map(function ($cron_job) {
             return $cron_job->id;
         }, $cron_jobs);
         if ($cron_jobs_ids && $this->ranking_model->changeCronJobStatus($cron_jobs_ids, $this->ranking_model->CRON_STATUS_INPROCESS)) {
             foreach ($cron_jobs as $key => $cron_job) {
                 $cron_jobs[$key]->status = $this->ranking_model->CRON_STATUS_INPROCESS;
             }
         }
         // get parent jobs
         $cron_jobs_parents_ids = array_values(array_filter(array_unique(array_map(function ($cron_job) {
             return $cron_job->parent_id;
         }, $cron_jobs))));
         $cron_jobs_parents = array();
         if (!empty($cron_jobs_parents_ids)) {
             foreach ($this->ranking_model->get_('keyword_cron_jobs', array('id' => $cron_jobs_parents_ids)) as $cron_jobs_parent) {
                 $cron_jobs_parents[$cron_jobs_parent->id] = $cron_jobs_parent;
             }
         }
         // get corresponding sites
         $sites_ids = array_values(array_unique(array_filter(array_map(function ($cron_job) {
             return $cron_job->site_id;
         }, $cron_jobs_parents)), SORT_NUMERIC));
         $sites = array();
         if (!empty($sites_ids)) {
             foreach ($this->ranking_model->get_('sites', array('id' => $sites_ids)) as $site) {
                 $sites[$site->id] = $site;
             }
         }
         // get search_terms_groups
         $cron_jobs_group_ids = array_values(array_unique(array_filter(array_map(function ($cron_job) {
             return $cron_job->group_id;
         }, $cron_jobs_parents)), SORT_NUMERIC));
         $search_terms_groups = array();
         if (!empty($cron_jobs_group_ids)) {
             foreach ($this->ranking_model->get_('search_terms_groups', array('id' => $cron_jobs_group_ids)) as $search_terms_group) {
                 $search_terms_groups[$search_terms_group->id] = $search_terms_group;
             }
         }
         // get product list items
         $product_list_item_ids = array_filter(array_map(function ($cron_job) {
             return $cron_job->product_list_item_id;
         }, $cron_jobs));
         $product_list_items = array();
         if (!empty($product_list_item_ids)) {
             foreach ($this->ranking_model->get_('product_list_items', array('id' => $product_list_item_ids)) as $product_list_item) {
                 $product_list_items[$product_list_item->id] = $product_list_item;
             }
         }
         $this->load->library('email');
         $config['protocol'] = 'sendmail';
         $config['mailpath'] = '/usr/sbin/sendmail';
         $config['charset'] = 'UTF-8';
         $config['wordwrap'] = TRUE;
         $config['mailtype'] = 'html';
         $this->email->initialize($config);
         $begin_requests_time = gmdate('Y-m-d H:i:s');
         $imported_cron_jobs = 0;
         if (!empty($cron_jobs)) {
             $sent_cron_jobs = array();
             foreach ($cron_jobs as $cron_job) {
                 // change status of current job to IN PROCESS
                 if (!empty($cron_job->id) && $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_INPROCESS)) {
                     $cron_job->status = $this->ranking_model->CRON_STATUS_INPROCESS;
                 }
                 if (!$cron_job->url) {
                     continue;
                 }
                 $keyword_cron_jobs_log_data = array('job_id' => null, 'message' => null, 'child_job_id' => $cron_job->request_id, 'job_phase' => $this->ranking_model->JOB_PHASE_RESPONSE, 'job_phase_status' => 'true');
                 if (empty($cron_job->product_list_item_id)) {
                     // cron job per search term
                     $cron_job_parent = isset($cron_jobs_parents[$cron_job->parent_id]) ? $cron_jobs_parents[$cron_job->parent_id] : null;
                     if (!$cron_job_parent) {
                         continue;
                     }
                     $site = isset($sites[$cron_job_parent->site_id]) ? $sites[$cron_job_parent->site_id] : null;
                     $search_terms_group = isset($search_terms_groups[$cron_job_parent->group_id]) ? $search_terms_groups[$cron_job_parent->group_id] : null;
                     if (!($site && $search_terms_group)) {
                         continue;
                     }
                     $site_name = strtolower($site->name);
                     $location = '';
                     // get crawler name
                     $crawler_name = Ranking_model::getCrawlerName($site_name, $location);
                     $group_name = $search_terms_group->name;
                     $keyword_cron_jobs_log_message = array('id' => $cron_job->request_id, 'site' => $crawler_name, 'keyword' => $cron_job->keyword, 'group' => $group_name, 'status' => 'RESPONSE GET');
                     $keyword_cron_jobs_log_data['job_id'] = $cron_job_parent->id;
                     $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                 } else {
                     // cron job per product
                     $cron_job_parent = null;
                     $product_list_item = isset($product_list_items[$cron_job->product_list_item_id]) ? $product_list_items[$cron_job->product_list_item_id] : null;
                     $keyword_cron_jobs_log_message = array('id' => $cron_job->request_id, 'product_url' => empty($product_list_item->url) ? null : $product_list_item->url, 'status' => 'RESPONSE GET');
                     $keyword_cron_jobs_log_data['job_id'] = $cron_job->id;
                     $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                 }
                 $ch = curl_init($cron_job->url);
                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                 $ch_result = curl_exec($ch);
                 $curl_log_data = array('type' => 'keyword_cron_job', 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), 'http_code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'curl_info' => json_encode(curl_getinfo($ch)), 'result' => $ch_result);
                 $curl_log_id = $this->ranking_model->create_($this->ranking_model->tables['curl_logs'], $curl_log_data);
                 $request_time_seconds = round(curl_getinfo($ch, CURLINFO_TOTAL_TIME));
                 $response_request_time = floor($request_time_seconds / 60) . " min " . $request_time_seconds % 60 . " sec";
                 $keyword_cron_jobs_log_message['request time'] = $response_request_time;
                 $keyword_cron_jobs_log_data['curl_log_id'] = $curl_log_id;
                 $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                 if ($http_code === 302) {
                     $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                     $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                     $redirect_url = curl_getinfo($ch, CURLINFO_REDIRECT_URL);
                     $ch2 = curl_init($redirect_url);
                     curl_setopt($ch2, CURLOPT_RETURNTRANSFER, true);
                     $results = curl_exec($ch2);
                     $curl_log_data = array('type' => 'keyword_cron_job', 'url' => curl_getinfo($ch2, CURLINFO_EFFECTIVE_URL), 'http_code' => curl_getinfo($ch2, CURLINFO_HTTP_CODE), 'curl_info' => json_encode(curl_getinfo($ch2)), 'result' => $results);
                     $curl2_log_id = $this->ranking_model->create_('curl_logs', $curl_log_data);
                     $results_code = curl_getinfo($ch2, CURLINFO_HTTP_CODE);
                     $request_time_seconds = round(curl_getinfo($ch2, CURLINFO_TOTAL_TIME));
                     $result_request_time = floor($request_time_seconds / 60) . " min " . $request_time_seconds % 60 . " sec";
                     $keyword_cron_jobs_log_message['request time'] = $result_request_time;
                     $keyword_cron_jobs_log_data['job_phase'] = $this->ranking_model->JOB_PHASE_IMPORT_BEGIN;
                     $keyword_cron_jobs_log_data['curl_log_id'] = $curl2_log_id;
                     if ($results_code === 200) {
                         if ($results) {
                             // got results, set CRON_STATUS_READY, make import
                             $keyword_cron_jobs_log_message['status'] = 'IMPORT BEGIN';
                             $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                             $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                             $import_time_begin = microtime(true);
                             if (empty($cron_job->product_list_item_id)) {
                                 // import data per search term
                                 $imported_count = $this->import_ranking_data($results, $cron_job, $search_terms_group->id, $site->id);
                             } else {
                                 // import data per product
                                 $imported_count = $this->import_ranking_data($results, $cron_job, null, null, $product_list_item);
                             }
                             $import_time = number_format(microtime(true) - $import_time_begin, 2);
                             $imported_cron_jobs++;
                             $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_READY);
                             if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_READY) {
                                 if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_READY)) {
                                     $cron_job_parent->status = $this->ranking_model->CRON_STATUS_READY;
                                 }
                             }
                             $keyword_cron_jobs_log_message['status'] = 'STATUS READY';
                             unset($keyword_cron_jobs_log_message['request time']);
                             $keyword_cron_jobs_log_message['import time'] = "{$import_time} sec";
                             $keyword_cron_jobs_log_message['imported products'] = $imported_count;
                             $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                             $keyword_cron_jobs_log_data['job_phase'] = $this->ranking_model->JOB_PHASE_IMPORT_END;
                             $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                         } else {
                             // result is empty, set CRON_STATUS_WARNING
                             $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_WARNING);
                             if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_WARNING) {
                                 if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_WARNING)) {
                                     $cron_job_parent->status = $this->ranking_model->CRON_STATUS_WARNING;
                                 }
                             }
                             $keyword_cron_jobs_log_message['status'] = 'STATUS WARNING - Response is empty';
                             $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                             $keyword_cron_jobs_log_data['job_phase_status'] = 'false';
                             $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                         }
                     } else {
                         // result wasn't received, set CRON_STATUS_ERROR
                         $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_ERROR);
                         if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_ERROR) {
                             if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_ERROR)) {
                                 $cron_job_parent->status = $this->ranking_model->CRON_STATUS_ERROR;
                             }
                         }
                         $keyword_cron_jobs_log_message['status'] = "STATUS ERROR - Result wasn't received ({$results_code})";
                         $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                         $keyword_cron_jobs_log_data['job_phase_status'] = 'false';
                         $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                     }
                 } elseif ($http_code === 404 || preg_match('/[5]\\d\\d/', $http_code) || empty($ch_result)) {
                     // not found, set CRON_STATUS_ERROR
                     $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_ERROR);
                     if (!empty($cron_job_parent) && $cron_job_parent->status != $this->ranking_model->CRON_STATUS_ERROR) {
                         if ($this->ranking_model->changeCronJobStatus($cron_job_parent->id, $this->ranking_model->CRON_STATUS_ERROR)) {
                             $cron_job_parent->status = $this->ranking_model->CRON_STATUS_ERROR;
                         }
                     }
                     if ($http_code === 404) {
                         $keyword_cron_jobs_log_message['status'] = 'STATUS ERROR - The resource could not be found.';
                     } elseif (!empty($ch_result)) {
                         $keyword_cron_jobs_log_message['status'] = "STATUS ERROR - REST API Server Error ({$http_code})";
                     } else {
                         $keyword_cron_jobs_log_message['status'] = "STATUS ERROR - Empty response from keyword server";
                     }
                     $keyword_cron_jobs_log_data['message'] = json_encode($keyword_cron_jobs_log_message);
                     $keyword_cron_jobs_log_data['job_phase_status'] = 'false';
                     $this->ranking_model->create_('keyword_cron_jobs_log', $keyword_cron_jobs_log_data);
                 } else {
                     $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_WAITING);
                 }
             }
         }
         $end_requests_time = gmdate('Y-m-d H:i:s');
         $requests_count = empty($cron_jobs) ? 0 : count($cron_jobs);
         if (count($cron_jobs) < 20) {
             // $this->amazon_urls_unification(); no need for the moment
         }
         $this->load->model('ranking_timing_model');
         $this->ranking_timing_model->add_request_timing($begin_requests_time, $end_requests_time, $requests_count, $imported_cron_jobs);
         $result = array('status' => 'ok');
     } else {
         $result = array('status' => "too many jobs are in process ({$jobs_in_process})");
     }
     // fix 'stuck' jobs, reset their status to CRON_STATUS_WAITING
     $stuck_jobs = $this->ranking_model->getStuckChildCronJobs();
     if ($stuck_jobs) {
         $stuck_jobs_ids = array_map(function ($stuck_job) {
             return $stuck_job->id;
         }, $stuck_jobs);
         $this->ranking_model->changeCronJobStatus($stuck_jobs_ids, $this->ranking_model->CRON_STATUS_WAITING);
     }
     $this->output->set_content_type('application/json')->set_output(json_encode($result));
 }