コード例 #1
0
ファイル: crawl.php プロジェクト: janladaking/CodeIgniter
 protected function amazon_urls_unification()
 {
     $this->load->model('ranking_model');
     $time_started = gmdate('Y-m-d H:i:s');
     $time_total_begin = microtime(true);
     $stats = array('rsri_urls_limit' => 0, 'rsri_urls_selected' => 0, 'rsri_urls_selected_unique' => 0, 'product_urls_exists' => 0, 'rsri_urls_updated' => 0, 'time_started' => $time_started, 'time_select_rsri' => 0, 'time_select_product_url' => 0, 'time_update_rsri' => 0, 'time_total' => 0);
     $settings = $this->db->where('key', 'amazon_urls_unification_limit')->get('settings')->row();
     $rsri_urls_limit = 1000;
     if (!empty($settings->id)) {
         $rsri_urls_limit = intval($settings->description);
     } else {
         $this->db->insert('settings', array('key' => 'amazon_urls_unification_limit', 'description' => $rsri_urls_limit));
     }
     $stats['rsri_urls_limit'] = $rsri_urls_limit;
     $result = array();
     if ($rsri_urls_limit > 0) {
         $time_begin = microtime(true);
         $result = $this->db->select('id, url')->from('ranking_search_results_items')->where("url ~ 'https?://(?:www\\.)?amazon\\.c(?:om|o\\.uk|a)/(?:.*?/)?dp/.+?/.*'")->limit($rsri_urls_limit)->get()->result();
         $stats['time_select_rsri'] = number_format(microtime(true) - $time_begin, 2);
         $stats['rsri_urls_selected'] = count($result);
         $urls = array();
         foreach ($result as $rsri) {
             $good_url = MY_Model::unifyAmazonUrl($rsri->url);
             if (!empty($good_url)) {
                 if (!array_key_exists($good_url, $urls)) {
                     $urls[$good_url] = array('rsri_ids' => array(), 'product_url_id' => null);
                 }
                 $urls[$good_url]['rsri_ids'][] = $rsri->id;
             }
         }
         $stats['rsri_urls_selected_unique'] = count($urls);
         // get product_url ids for $good_urls
         $product_urls_exists = 0;
         if (!empty($urls)) {
             $time_begin = microtime(true);
             $result = $this->db->select('id, url')->from('product_url')->where_in('url', array_keys($urls))->get()->result();
             $stats['time_select_product_url'] = number_format(microtime(true) - $time_begin, 2);
             foreach ($result as $product_url) {
                 if (array_key_exists($product_url->url, $urls) && empty($urls[$product_url->url]['product_url_id'])) {
                     $urls[$product_url->url]['product_url_id'] = $product_url->id;
                     $product_urls_exists++;
                 }
             }
         }
         $stats['product_urls_exists'] = $product_urls_exists;
         // update ranking_search_results_items
         $rsri_urls_updated = 0;
         $time_begin = microtime(true);
         foreach ($urls as $good_url => $url_data) {
             if (!empty($good_url)) {
                 if (empty($url_data['product_url_id'])) {
                     // create product_url
                     $product_url_data = array('url' => $good_url);
                     $url_data['product_url_id'] = $this->ranking_model->create_('product_url', $product_url_data);
                 }
                 if (!empty($url_data['product_url_id']) && !empty($url_data['rsri_ids'])) {
                     $rsri_data = array('url' => $good_url, 'url_id' => $url_data['product_url_id']);
                     $rsri_where = array('id' => $url_data['rsri_ids']);
                     if ($result = $this->ranking_model->update_('ranking_search_results_items', $rsri_data, $rsri_where)) {
                         $rsri_urls_updated++;
                     }
                 }
             }
         }
         $stats['rsri_urls_updated'] = $rsri_urls_updated;
         $stats['time_update_rsri'] = number_format(microtime(true) - $time_begin, 2);
         $stats['time_total'] = number_format(microtime(true) - $time_total_begin, 2);
         // save stats
         $settings = $this->db->where('key', 'amazon_urls_unification_stats')->get('settings')->row();
         $settings_id = null;
         if (!empty($settings->id)) {
             $settings_id = $settings->id;
         } else {
             $this->db->insert('settings', array('key' => 'amazon_urls_unification_stats', 'description' => ''));
             $settings_id = $this->db->insert_id();
         }
         if (!empty($settings_id)) {
             $data = array('setting_id' => $settings_id, 'user_id' => -1, 'value' => json_encode($stats));
             $this->db->insert('setting_values', $data);
         }
     }
 }