/** * Add new keywords (parent) * @return json * @author Ruslan Ushakov */ function keyword_cron_job() { $this->load->model('sites_model'); $this->load->model('ranking_model'); $this->load->model('product_model'); $this->load->model('settings_model'); // schedule unscheduled keyword groups if ($unscheduled_jobs = $this->ranking_model->getKeywordGroupsUnscheduled()) { foreach ($unscheduled_jobs as $unscheduled_job) { $this->ranking_model->addKeywordGroupCronJob($unscheduled_job->id, $this->ranking_model->CRON_PERIOD_DAY); } } // get scheduled (per search term groups) tasks $scheduled_jobs = array(); foreach ($this->ranking_model->getKeywordGroupCronJobsActual() as $scheduled_job) { $scheduled_jobs[$scheduled_job->id] = $scheduled_job; } if (!empty($scheduled_jobs)) { $update_data = array('last_update' => 'now()'); $where_data = array('id' => array_keys($scheduled_jobs)); $this->ranking_model->update_('keyword_group_cron_jobs', $update_data, $where_data); $keyword_group_ids = array(); foreach ($scheduled_jobs as $scheduled_job) { $keyword_group_ids[] = $scheduled_job->keyword_group_id ?: -1; } // get groups_sites relations by group ids to have associated sites $groups_sites = $this->ranking_model->get_('groups_sites', array('group_id' => $keyword_group_ids)); // get count of search_terms for every search_terms_groups record to avoid empty search_terms_groups $search_terms_groups = array(); foreach ($this->ranking_model->getSearchTermsGroupsWithCount(array('id' => $keyword_group_ids)) as $search_terms_group) { $search_terms_groups[$search_terms_group->id] = $search_terms_group; } // create 'parent' cron jobs $keyword_cron_jobs_batch = array(); foreach ($groups_sites as $groups_sites_rel) { if (isset($search_terms_groups[$groups_sites_rel->group_id]) && $search_terms_groups[$groups_sites_rel->group_id]->search_terms_count) { $keyword_cron_jobs_batch[] = array('group_id' => $groups_sites_rel->group_id, 'site_id' => $groups_sites_rel->site_id, 'status' => $this->ranking_model->CRON_STATUS_WAITING); } } if (!empty($keyword_cron_jobs_batch)) { $create_batch_result = $this->ranking_model->create_batch_('keyword_cron_jobs', $keyword_cron_jobs_batch); } } //Issue #2956 $job_ids = $this->input->post('job_ids'); if (!empty($job_ids)) { $cron_jobs = $this->ranking_model->getCronJobsById($job_ids); $manual_request = true; } else { $keyword_servers = $this->settings_model->get_value($this->settings_model->system_user, 'restapi_servers'); if (empty($keyword_servers) || !is_array($keyword_servers)) { $keyword_servers = array(array('name' => $this->config->item('keyword_rest_api'))); } $servers_pending_statuses = $this->get_keyword_servers_pending_status($keyword_servers); // get products that need to be crawled $products_to_crawl = $this->product_model->getProductsToBeCrawled(self::PRODUCTS_TO_CRAWL); foreach ($products_to_crawl as $product_to_crawl) { $server_num = $this->check_keyword_server($servers_pending_statuses); $server = $keyword_servers[$server_num]['name']; $create_crawl_request_result = $this->create_product_crawl_request($product_to_crawl, $server); if ($create_crawl_request_result) { $servers_pending_statuses[$server_num]++; } } // get waiting cron jobs without url (parent) $cron_jobs = count($products_to_crawl) < self::PRODUCTS_TO_CRAWL ? $this->ranking_model->getWaitingParentCronJobs(20) : array(); $manual_request = false; } if (!empty($cron_jobs)) { if (empty($keyword_servers)) { $keyword_servers = $this->settings_model->get_value($this->settings_model->system_user, 'restapi_servers'); if (empty($keyword_servers) || !is_array($keyword_servers)) { $keyword_servers = array(array('name' => $this->config->item('keyword_rest_api'))); } } if (empty($servers_pending_statuses)) { $servers_pending_statuses = $this->get_keyword_servers_pending_status($keyword_servers); } // get search terms groups ids, to get corresponding search terms (aka keywords) $cron_jobs_group_ids = array_values(array_unique(array_filter(array_map(function ($cron_job) { return $cron_job->group_id; }, $cron_jobs)), SORT_NUMERIC)); // get search_terms_groups $search_terms_groups = array(); if (!empty($cron_jobs_group_ids)) { foreach ($this->ranking_model->get_('search_terms_groups', array('id' => $cron_jobs_group_ids)) as $search_terms_group) { $search_terms_groups[$search_terms_group->id] = $search_terms_group; } } // get corresponding search terms (aka keywords), group them by group_id $search_terms_by_group = array(); if (!empty($cron_jobs_group_ids)) { foreach ($this->ranking_model->get_('search_terms', array('group_id' => $cron_jobs_group_ids)) as $search_term) { $search_terms_by_group[$search_term->group_id][] = $search_term; } } // get child keyword_cron_jobs by parent ids $cron_jobs_ids = array_map(function ($cron_job) { return $cron_job->id; }, $cron_jobs); $cron_jobs_childs = array(); if (!empty($cron_jobs_ids)) { foreach ($this->ranking_model->get_('keyword_cron_jobs', array('parent_id' => $cron_jobs_ids)) as $keyword_cron_job) { $cron_jobs_childs[$keyword_cron_job->parent_id][$keyword_cron_job->keyword] = $keyword_cron_job; } } // get corresponding sites $sites_ids = array_values(array_unique(array_filter(array_map(function ($cron_job) { return $cron_job->site_id; }, $cron_jobs)), SORT_NUMERIC)); $sites = array(); if (!empty($sites_ids)) { foreach ($this->ranking_model->get_('sites', array('id' => $sites_ids)) as $site) { $sites[$site->id] = $site; } } // mark all jobs as 'in progress' if ($this->ranking_model->changeCronJobStatus($cron_jobs_ids, $this->ranking_model->CRON_STATUS_INPROCESS)) { foreach ($cron_jobs as $key => $cron_job) { $cron_jobs[$key]->status = $this->ranking_model->CRON_STATUS_INPROCESS; } } foreach ($cron_jobs as $cron_job) { $cron_job_childs = isset($cron_jobs_childs[$cron_job->id]) ? $cron_jobs_childs[$cron_job->id] : array(); $cron_job_search_terms = isset($search_terms_by_group[$cron_job->group_id]) ? $search_terms_by_group[$cron_job->group_id] : array(); if (!empty($manual_request)) { $cron_job_search_terms_to_create = $cron_job_search_terms; } else { $cron_job_search_terms_to_create = array(); foreach ($cron_job_search_terms as $keyword) { // filter keywords, get those which hadn't been processed if (!isset($cron_job_childs[$keyword->title])) { $cron_job_search_terms_to_create[] = $keyword; } } } $cron_jobs_to_create = count($cron_job_search_terms_to_create); if (!empty($cron_job_search_terms_to_create)) { if (isset($sites[$cron_job->site_id]) && ($cron_job_site = $sites[$cron_job->site_id])) { $cron_job_site_name = strtolower($cron_job_site->name); $location = ''; $user_agent = ''; $zipCode = !empty($cron_job_site->zip_code) ? $cron_job_site->zip_code : ''; // get crawler name $crawler_name = Ranking_model::getCrawlerName($cron_job_site_name, $location, $user_agent); if (!empty($crawler_name)) { foreach ($cron_job_search_terms_to_create as $keyword) { $server_num = $this->check_keyword_server($servers_pending_statuses); $server = $keyword_servers[$server_num]['name']; if ($keyword->title && $server) { $group_name = isset($search_terms_groups[$keyword->group_id]) ? $search_terms_groups[$keyword->group_id]->name : null; $get_new_keywords_result = $this->get_new_keywords($crawler_name, $keyword->title, $cron_job, $location, $server, 1000, $group_name, $zipCode, $user_agent); if ($get_new_keywords_result) { $cron_jobs_to_create--; $servers_pending_statuses[$server_num]++; } } } } } } if ($cron_jobs_to_create < 1) { $this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_REQUESTED); } elseif ($cron_job->status != $this->ranking_model->CRON_STATUS_ERROR) { if ($this->ranking_model->changeCronJobStatus($cron_job->id, $this->ranking_model->CRON_STATUS_ERROR)) { $cron_job->status = $this->ranking_model->CRON_STATUS_ERROR; } } } } $this->output->set_content_type('application/json')->set_output(json_encode(array('status' => 'ok'))); }