protected function amazon_urls_unification() { $this->load->model('ranking_model'); $time_started = gmdate('Y-m-d H:i:s'); $time_total_begin = microtime(true); $stats = array('rsri_urls_limit' => 0, 'rsri_urls_selected' => 0, 'rsri_urls_selected_unique' => 0, 'product_urls_exists' => 0, 'rsri_urls_updated' => 0, 'time_started' => $time_started, 'time_select_rsri' => 0, 'time_select_product_url' => 0, 'time_update_rsri' => 0, 'time_total' => 0); $settings = $this->db->where('key', 'amazon_urls_unification_limit')->get('settings')->row(); $rsri_urls_limit = 1000; if (!empty($settings->id)) { $rsri_urls_limit = intval($settings->description); } else { $this->db->insert('settings', array('key' => 'amazon_urls_unification_limit', 'description' => $rsri_urls_limit)); } $stats['rsri_urls_limit'] = $rsri_urls_limit; $result = array(); if ($rsri_urls_limit > 0) { $time_begin = microtime(true); $result = $this->db->select('id, url')->from('ranking_search_results_items')->where("url ~ 'https?://(?:www\\.)?amazon\\.c(?:om|o\\.uk|a)/(?:.*?/)?dp/.+?/.*'")->limit($rsri_urls_limit)->get()->result(); $stats['time_select_rsri'] = number_format(microtime(true) - $time_begin, 2); $stats['rsri_urls_selected'] = count($result); $urls = array(); foreach ($result as $rsri) { $good_url = MY_Model::unifyAmazonUrl($rsri->url); if (!empty($good_url)) { if (!array_key_exists($good_url, $urls)) { $urls[$good_url] = array('rsri_ids' => array(), 'product_url_id' => null); } $urls[$good_url]['rsri_ids'][] = $rsri->id; } } $stats['rsri_urls_selected_unique'] = count($urls); // get product_url ids for $good_urls $product_urls_exists = 0; if (!empty($urls)) { $time_begin = microtime(true); $result = $this->db->select('id, url')->from('product_url')->where_in('url', array_keys($urls))->get()->result(); $stats['time_select_product_url'] = number_format(microtime(true) - $time_begin, 2); foreach ($result as $product_url) { if (array_key_exists($product_url->url, $urls) && empty($urls[$product_url->url]['product_url_id'])) { $urls[$product_url->url]['product_url_id'] = $product_url->id; $product_urls_exists++; } } } $stats['product_urls_exists'] = $product_urls_exists; // update ranking_search_results_items $rsri_urls_updated = 0; $time_begin = microtime(true); foreach ($urls as $good_url => $url_data) { if (!empty($good_url)) { if (empty($url_data['product_url_id'])) { // create product_url $product_url_data = array('url' => $good_url); $url_data['product_url_id'] = $this->ranking_model->create_('product_url', $product_url_data); } if (!empty($url_data['product_url_id']) && !empty($url_data['rsri_ids'])) { $rsri_data = array('url' => $good_url, 'url_id' => $url_data['product_url_id']); $rsri_where = array('id' => $url_data['rsri_ids']); if ($result = $this->ranking_model->update_('ranking_search_results_items', $rsri_data, $rsri_where)) { $rsri_urls_updated++; } } } } $stats['rsri_urls_updated'] = $rsri_urls_updated; $stats['time_update_rsri'] = number_format(microtime(true) - $time_begin, 2); $stats['time_total'] = number_format(microtime(true) - $time_total_begin, 2); // save stats $settings = $this->db->where('key', 'amazon_urls_unification_stats')->get('settings')->row(); $settings_id = null; if (!empty($settings->id)) { $settings_id = $settings->id; } else { $this->db->insert('settings', array('key' => 'amazon_urls_unification_stats', 'description' => '')); $settings_id = $this->db->insert_id(); } if (!empty($settings_id)) { $data = array('setting_id' => $settings_id, 'user_id' => -1, 'value' => json_encode($stats)); $this->db->insert('setting_values', $data); } } }
/** * Import ranking data received from scraper * @param array $ranking_data Ranking data (array of json encoded items) * @param object $ranking_data_job ranking_data_jobs record * @return bool result */ public function import_ranking_data($ranking_data, $ranking_data_job) { $this->load->model('ranking_model'); $this->load->model('sites_model'); $this->load->library('price'); $this->load->library('variant'); $this->load->library('SalesEstimator/AmazonFba'); $this->load->library('reseller'); $find_entity = function (&$entites, $by) { if (is_array($entites)) { foreach ($entites as $entity) { $it_is = null; foreach ($by as $key => $value) { $hit = false; if (property_exists($entity, $key)) { $hit = is_null($value) ? $entity->{$key} === $value : $entity->{$key} == $value; } $it_is = (is_null($it_is) ? true : $it_is) && $hit; } if ($it_is) { return $entity; } } } return null; }; $imported_count = 0; // json decode results $is_single_result = false; $ranking_data = array_values(array_filter(array_map(function ($ranking_data_item) use(&$is_single_result, $ranking_data_job) { $result = null; if (!empty($ranking_data_item)) { $result = $ranking_data_item; if (empty($result->brand)) { // see http://bugzilla.contentanalyticsinc.com/show_bug.cgi?id=901 $result->brand = "No brand"; } if (empty($result->title) || empty($result->brand)) { $result = null; } elseif (empty($result->is_single_result) && (empty($result->url) || empty($ranking_data_job->search_term))) { // not is_single_result $result = null; } else { foreach ($result as $key => &$result_val) { if (is_string($result_val)) { $result_val = trim($result_val); } if ($key === 'url') { $result_val = Ranking_model::unifyAmazonUrl($result_val); } } $is_single_result = !empty($result->is_single_result); } } return $result; }, $ranking_data))); $is_single_result = $is_single_result && count($ranking_data) === 1; if (count($ranking_data) === 0) { return $imported_count; } $site_id = $ranking_data_job->site_id; $date_of_upload = empty($ranking_data_job->created_at) ? gmdate('Y-m-d') : date('Y-m-d', strtotime($ranking_data_job->created_at)); $brand_names = array(); $urls = array(); $prices = array(); foreach ($ranking_data as $product_key => &$results_item) { if (!empty($results_item->brand) && !in_array($results_item->brand, $brand_names)) { $brand_names[] = $results_item->brand; } if ($is_single_result) { $results_item->url = $ranking_data_job->url; } if (!empty($results_item->url) && !in_array($results_item->url, $urls)) { $urls[] = $results_item->url; } if (empty($site_id)) { if (!empty($results_item->site) || !empty($results_item->url)) { // get id of site by site name from current item. example: "site": "walmart.com" or "url": "http://www.walmart.com/ip/16609038" $url_to_use = empty($results_item->site) ? $results_item->url : $results_item->site; $name_fragment = empty($results_item->is_mobile_agent) ? null : 'iphone'; // @to-do: remove 'iphone' hardcode $site_id = $this->sites_model->get_id_by_url_new($url_to_use, $name_fragment); } } if (!$is_single_result && !empty($results_item->price)) { $price_arr = $this->price->parsePrice($results_item->price); if (!empty($price_arr['price'])) { $prices[$product_key] = array('price' => (double) $price_arr['price'], 'ranking' => (int) empty($results_item->ranking) ? 999 : $results_item->ranking); } } } // sort by price & ranking uasort($prices, function ($a, $b) { if (!empty($a['price']) && !empty($a['ranking']) && !empty($b['price']) && !empty($b['ranking'])) { if ($a['price'] != $b['price']) { return $a['price'] > $b['price'] ? 1 : -1; } elseif ($a['ranking'] != $b['ranking']) { return $a['ranking'] > $b['ranking'] ? 1 : -1; } } return 0; }); // get ranking_by_price per each product (product_key => ranking_by_price) $ranking_by_price = array(); $ranking_by_price_key = 1; foreach ($prices as $product_key => $price_and_ranking) { $ranking_by_price[$ranking_by_price_key++] = $product_key; // ranking_by_price => product_key } $ranking_by_price = array_flip($ranking_by_price); ksort($ranking_by_price); $search_terms = array(); if (!empty($ranking_data_job->search_term) && !empty($ranking_data_job->search_term_group_id)) { $search_terms = $this->ranking_model->get_('search_terms', array('title' => $ranking_data_job->search_term, 'group_id' => $ranking_data_job->search_term_group_id)); } $search_term_ids = array_map(function ($search_term) { return $search_term->id; }, $search_terms); $ranking_brands = array(); if (!empty($brand_names)) { $ranking_brands = $this->ranking_model->get_('ranking_brands', array('name' => $brand_names)); } $ranking_brand_ids = array_map(function ($ranking_brand) { return $ranking_brand->id; }, $ranking_brands); $search_terms_brands_relations = array(); if (($is_single_result || !empty($search_term_ids)) && !empty($ranking_brand_ids)) { $search_terms_brands_relations_where = array(); if (!$is_single_result) { $search_terms_brands_relations_where['search_term_id'] = $search_term_ids; } else { $search_terms_brands_relations_where['search_term_id IS NULL'] = null; } $search_terms_brands_relations_where['brand_id'] = $ranking_brand_ids; $search_terms_brands_relations = $this->ranking_model->get_('search_terms_brands_relation', $search_terms_brands_relations_where); } $search_terms_brands_relation_ids = array_map(function ($search_terms_brands_relation) { return $search_terms_brands_relation->id; }, $search_terms_brands_relations); $product_urls = array(); if (!empty($urls)) { $product_urls_result = $this->ranking_model->get_('product_url', array('url' => $urls)); foreach ($product_urls_result as $product_url_result) { $product_urls[$product_url_result->url] = $product_url_result->id; // url => id } } $ranking_search_results_items_summary_where = array_filter(array('site_id' => $site_id, 'search_items_brands_relation_id' => $search_terms_brands_relation_ids, 'date_of_upload' => $date_of_upload)); $ranking_search_results_items_summary = $this->ranking_model->get_('ranking_search_results_items_summary', $ranking_search_results_items_summary_where); $ranking_search_results_items_where = array_filter(array('site_id' => $site_id, 'search_items_brands_relation_id' => $search_terms_brands_relation_ids, 'date_of_upload' => $date_of_upload, 'url_id' => array_values($product_urls))); $ranking_search_results_items = $this->ranking_model->get_('ranking_search_results_items', $ranking_search_results_items_where); $ranking_search_results_items_ids = array_map(function ($ranking_search_results_item) { return $ranking_search_results_item->id; }, $ranking_search_results_items); $ranking_buyers_reviews = array(); if (!empty($ranking_search_results_items_ids)) { $ranking_buyers_reviews = $this->ranking_model->get_('ranking_buyers_review_info', array('rsri_id' => $ranking_search_results_items_ids)); } $total_results = 0; $brand_results = array(); $on_first_page = array(); $results_per_page = array(); foreach ($ranking_data as $product_key => $ranking_data_item) { if (empty($ranking_data_item->title) || empty($ranking_data_item->brand) || empty($site_id) || empty($ranking_data_item->url)) { continue; } if (!$is_single_result && empty($ranking_data_job->search_term)) { // not is_single_result continue; } // get search_term_id $search_term_id = null; if (!$is_single_result) { if ($search_term = $find_entity($search_terms, array('title' => $ranking_data_job->search_term))) { $search_term_id = $search_term->id; } // we don't need to create new search term if (empty($search_term_id)) { continue; } } // get brand_id $brand_id = null; if ($brand = $find_entity($ranking_brands, array('name' => $ranking_data_item->brand))) { $brand_id = $brand->id; } else { // create new ranking_brand $new_ranking_brand_data = array('name' => $ranking_data_item->brand); $brand_id = $this->ranking_model->create_('ranking_brands', $new_ranking_brand_data); if ($brand_id) { $ranking_brands[] = (object) array_merge(array('id' => $brand_id), $new_ranking_brand_data); } } if (empty($brand_id)) { continue; } // search_items_brands_relation_id $search_terms_brands_relation_id = null; $search_terms_brands_relation_data = array('search_term_id' => $search_term_id, 'brand_id' => $brand_id); $search_terms_brands_relation = $find_entity($search_terms_brands_relations, $search_terms_brands_relation_data); if (!empty($search_terms_brands_relation)) { $search_terms_brands_relation_id = $search_terms_brands_relation->id; } else { // create new search_terms_brands_relation $search_terms_brands_relation_id = $this->ranking_model->create_('search_terms_brands_relation', $search_terms_brands_relation_data); if ($search_terms_brands_relation_id) { $search_terms_brands_relations[] = (object) array_merge(array('id' => $search_terms_brands_relation_id), $search_terms_brands_relation_data); } } if (empty($search_terms_brands_relation_id)) { continue; } if (empty($product_urls[$ranking_data_item->url])) { $productId = $this->reseller->getProductIdOrCreateByResellerIdAndSiteId($this->reseller->getIdFromUrl($ranking_data_item->url), $site_id); $product_url_id = $this->ranking_model->create_('product_url', array('url' => $ranking_data_item->url, 'product_id' => $productId)); if ($product_url_id) { $product_urls[$ranking_data_item->url] = $product_url_id; } } $url_id = empty($product_urls[$ranking_data_item->url]) ? null : $product_urls[$ranking_data_item->url]; if (empty($url_id)) { continue; } // compose $new_rsri_data $description = null; if (!empty($ranking_data_item->description)) { $description = is_array($ranking_data_item->description) ? reset($ranking_data_item->description) : $ranking_data_item->description; $description = html_entity_decode($description); } $search_term_in_title = null; if (!empty($ranking_data_item->search_term_in_title_exactly)) { $search_term_in_title = 'exact'; } elseif (!empty($ranking_data_item->search_term_in_title_interleaved)) { $search_term_in_title = 'interleaved'; } elseif (!empty($ranking_data_item->search_term_in_title_partial)) { $search_term_in_title = 'partial'; } $price = $this->price->parsePrice(empty($ranking_data_item->price) ? '' : $ranking_data_item->price); // Tomorrow Jun 10 2015 scrapper will be refactored expected values (Bool) // and this code should be replaced with one line $shipping = null; if (isset($ranking_data_item->shipping)) { if (!empty($ranking_data_item->shipping)) { if (in_array($ranking_data_item->shipping, array('Available', 'Not Available'))) { $shipping = $ranking_data_item->shipping == 'Available' ? 't' : 'f'; } else { $shipping = 't'; } } else { $shipping = 'f'; } } $new_rsri_data = array('site_id' => $site_id, 'search_items_brands_relation_id' => $search_terms_brands_relation_id, 'ranking' => empty($ranking_data_item->ranking) ? null : $ranking_data_item->ranking, 'url' => $ranking_data_item->url, 'image_url' => empty($ranking_data_item->image_url) ? null : $ranking_data_item->image_url, 'title' => html_entity_decode($ranking_data_item->title), 'description' => $description, 'model' => empty($ranking_data_item->model) ? null : $ranking_data_item->model, 'upc' => empty($ranking_data_item->upc) ? null : $ranking_data_item->upc, 'locale' => empty($ranking_data_item->locale) ? null : $ranking_data_item->locale, 'date_of_upload' => $date_of_upload, 'best_seller_ranking' => !empty($ranking_data_item->best_seller_ranking) ? (int) $ranking_data_item->best_seller_ranking : (!empty($ranking_data_item->bestseller_rank) ? (int) $ranking_data_item->bestseller_rank : null), 'search_term_in_title' => $search_term_in_title, 'is_out_of_stock' => empty($ranking_data_item->is_out_of_stock) ? 'f' : 't', 'is_in_store_only' => empty($ranking_data_item->is_in_store_only) ? 'f' : 't', 'price' => empty($price['price']) ? null : number_format($price['price'], 2, '.', ''), 'currency' => $price['priceCurrency'], 'search_term' => empty($ranking_data_job->search_term) ? null : $ranking_data_job->search_term, 'results_per_page' => empty($ranking_data_item->results_per_page) ? null : (int) $ranking_data_item->results_per_page, 'url_id' => $url_id, 'ranking_by_price' => empty($ranking_by_price[$product_key]) ? null : $ranking_by_price[$product_key], 'prime' => !empty($ranking_data_item->prime) && in_array($ranking_data_item->prime, array('Prime', 'PrimePantry')) ? $ranking_data_item->prime : null, 'seller_category_id' => empty($ranking_data_item->department) ? null : $this->amazonfba->getCategoryIdByName($ranking_data_item->department), 'pickup_only' => empty($ranking_data_item->is_pickup_only) ? 'f' : 't', 'shipping' => $shipping); $this->setQuestionAnswer($ranking_data_item, $url_id); // check whether product exists already, if so - update it, otherwise - insert $rsri_search_keys = array('site_id', 'search_items_brands_relation_id', 'date_of_upload', 'url_id'); $rsri_where = array_intersect_key($new_rsri_data, array_flip($rsri_search_keys)); if ($ranking_search_results_item = $find_entity($ranking_search_results_items, $rsri_where)) { // update $ranking_search_results_item $rsri_id = $ranking_search_results_item->id; $update_rsri_data = array_diff_assoc($new_rsri_data, (array) $ranking_search_results_item); if (!empty($update_rsri_data)) { $this->ranking_model->update_('ranking_search_results_items', $update_rsri_data, array('id' => $rsri_id)); } } else { // insert into ranking_search_results_items $rsri_id = $this->ranking_model->create_('ranking_search_results_items', $new_rsri_data); $ranking_search_results_items[] = (object) array_merge(array('id' => $rsri_id), $new_rsri_data); } $imported_count++; // no matter insert or update - we count it as imported // Market Place if (!empty($ranking_data_item->marketplace)) { $insertMarketplaceBatch = array(); // Prepare Insert Batch foreach ($ranking_data_item->marketplace as $marketplace) { $insertMarketplaceBatch[] = array('rsri_id' => $rsri_id, 'name' => !empty($marketplace->name) ? $marketplace->name : null, 'price' => !empty($marketplace->price) ? $marketplace->price : null, 'currency' => !empty($marketplace->currency) ? $marketplace->currency : null); } if (!empty($insertMarketplaceBatch)) { $this->ranking_model->create_batch_('product_marketplace', $insertMarketplaceBatch); } } // Save Variants if (!empty($ranking_data_item->variants)) { $this->variant->save($ranking_data_item->variants, $rsri_id, $url_id); } // Save Google Shopping Sellers if (!empty($ranking_data_item->google_source_site)) { $sellersArray = json_decode($ranking_data_item->google_source_site); $jsonError = json_last_error(); if (!$jsonError) { $insertSellersBatch = array(); // Prepare Insert Batch foreach ($sellersArray as $sellerName => $seller) { if (empty($sellerName)) { continue; } $insertSellersBatch[] = array('rsri_id' => $rsri_id, 'seller_name' => !empty($sellerName) ? $sellerName : null, 'price' => !empty($seller->price) ? $seller->price : null, 'currency' => !empty($seller->currency) ? $seller->currency : null); } $this->ranking_model->create_batch_('google_shopping_sellers', $insertSellersBatch); } } // save buyer reviews if (!empty($ranking_data_item->buyer_reviews) && (is_array($ranking_data_item->buyer_reviews) || is_object($ranking_data_item->buyer_reviews))) { $buyer_reviews_data = array(); if (is_array($ranking_data_item->buyer_reviews)) { $buyer_reviews_data['total_count'] = empty($ranking_data_item->buyer_reviews[0]) ? 0 : (int) $ranking_data_item->buyer_reviews[0]; $buyer_reviews_data['average_num'] = empty($ranking_data_item->buyer_reviews[1]) ? 0 : (double) $ranking_data_item->buyer_reviews[1]; $raitings_by_stars = empty($ranking_data_item->buyer_reviews[2]) ? new stdClass() : $ranking_data_item->buyer_reviews[2]; } else { $buyer_reviews_data['total_count'] = empty($ranking_data_item->buyer_reviews->num_of_reviews) ? 0 : (int) $ranking_data_item->buyer_reviews->num_of_reviews; $buyer_reviews_data['average_num'] = empty($ranking_data_item->buyer_reviews->average_rating) ? 0 : (double) $ranking_data_item->buyer_reviews->average_rating; $raitings_by_stars = empty($ranking_data_item->buyer_reviews->rating_by_star) ? new stdClass() : $ranking_data_item->buyer_reviews->rating_by_star; } $buyer_reviews_data['one_star'] = empty($raitings_by_stars->{1}) ? 0 : $raitings_by_stars->{1}; $buyer_reviews_data['two_star'] = empty($raitings_by_stars->{2}) ? 0 : $raitings_by_stars->{2}; $buyer_reviews_data['three_star'] = empty($raitings_by_stars->{3}) ? 0 : $raitings_by_stars->{3}; $buyer_reviews_data['four_star'] = empty($raitings_by_stars->{4}) ? 0 : $raitings_by_stars->{4}; $buyer_reviews_data['five_star'] = empty($raitings_by_stars->{5}) ? 0 : $raitings_by_stars->{5}; if (($ranking_buyers_review = $find_entity($ranking_buyers_reviews, array('rsri_id' => $rsri_id))) && !empty($ranking_buyers_review->id)) { $update_buyer_reviews = array_diff_assoc($buyer_reviews_data, (array) $ranking_buyers_review); if (!empty($update_buyer_reviews)) { $this->ranking_model->update_('ranking_buyers_review_info', $update_buyer_reviews, array('id' => $ranking_buyers_review->id)); } } else { $buyer_reviews_data['rsri_id'] = $rsri_id; $ranking_buyers_review_info_id = $this->ranking_model->create_('ranking_buyers_review_info', $buyer_reviews_data); $ranking_buyers_reviews[] = (object) array_merge(array('id' => $ranking_buyers_review_info_id), $buyer_reviews_data); } } // Save Sponsored Links if (!empty($ranking_data_item->sponsored_links)) { $insertSponsoredLinksBatch = array(); foreach ($ranking_data_item->sponsored_links as $link) { $insertSponsoredLinksBatch[] = array('product_id' => $rsri_id, 'ad_title' => $link->ad_title, 'ad_text' => $link->ad_text, 'visible_url' => $link->visible_url, 'actual_url' => $link->actual_url); } $this->ranking_model->create_batch_('product_sponsored_links', $insertSponsoredLinksBatch); } // collect data for ranking_search_results_items_summary if ($total_results === 0 && !empty($ranking_data_item->total_matches)) { $total_results = $ranking_data_item->total_matches; } if (!array_key_exists($search_terms_brands_relation_id, $brand_results)) { $brand_results[$search_terms_brands_relation_id] = 0; } if (!array_key_exists($search_terms_brands_relation_id, $on_first_page)) { $on_first_page[$search_terms_brands_relation_id] = 0; } if (!array_key_exists($search_terms_brands_relation_id, $results_per_page)) { $results_per_page[$search_terms_brands_relation_id] = 0; } if (!empty($ranking_data_item->ranking)) { $brand_results[$search_terms_brands_relation_id]++; if (!empty($ranking_data_item->results_per_page)) { if ((int) $ranking_data_item->ranking <= (int) $ranking_data_item->results_per_page) { $on_first_page[$search_terms_brands_relation_id]++; } $results_per_page[$search_terms_brands_relation_id] = $ranking_data_item->results_per_page; } } } // foreach ($ranking_data as $ranking_data_item) if (!empty($urls)) { $this->ranking_model->updateAndGetUniqueProductUrls($urls, $product_urls); } // create or update summary data in ranking_search_results_items_summary table foreach ($brand_results as $stbr_id => $brand_result) { // compose on_first_page string as number_of_res_on_1st_page/results_per_page $on_first_page_str = empty($on_first_page[$stbr_id]) ? 0 : $on_first_page[$stbr_id]; $on_first_page_str .= empty($results_per_page[$stbr_id]) ? '' : "/{$results_per_page[$stbr_id]}"; $new_rsris_data = array('site_id' => $site_id, 'total_results' => $total_results, 'brand_results' => $brand_result, 'search_items_brands_relation_id' => $stbr_id, 'date_of_upload' => $date_of_upload, 'on_first_page' => $on_first_page_str); $rsris_search_keys = array('site_id', 'search_items_brands_relation_id', 'date_of_upload'); $rsris_where = array_intersect_key($new_rsris_data, array_flip($rsris_search_keys)); if ($rsris = $find_entity($ranking_search_results_items_summary, $rsris_where)) { $update_rsris_data = array_diff_assoc($new_rsris_data, (array) $rsris); if (!empty($update_rsris_data)) { $this->ranking_model->update_('ranking_search_results_items_summary', $update_rsris_data, array('id' => $rsris->id)); } } else { // create new ranking_search_results_items_summary $rsris_id = $this->ranking_model->create_('ranking_search_results_items_summary', $new_rsris_data); if ($rsris_id) { $ranking_search_results_items_summary[] = (object) array_merge(array('id' => $rsris_id), $new_rsris_data); } } } return $imported_count; }