$arr['millage'] = $avg_km; } } $sql = "SELECT prop_val,sum(specs) as num\n\t\t\t\t\tFROM ru_eyezeek.np_market_trends_vehicles_specs s join \n\t\t\t\t\t\tru_eyezeek.np_market_trends_vehicles_items i on s.item_id=i.id\n\t\t\t\t\t\twhere make='" . $arr['brand'] . "' and model='" . $arr['series'] . "' and \n\t\t\t\t\t\t\t `year`=" . $arr['year'] . " and prop='cnt_in_city' group by make,model,year,prop_val"; $regions = array(); $total_count = 0; print_text("Fetching regions for: " . $arr['brand'] . " " . $arr['series'] . " " . $arr['year']); if ($result1 = $mysqli->query($sql)) { while ($arr1 = $result1->fetch_assoc()) { $sph->resetSphinx(); $sph->setAttribute('what', 'vehicles'); $sph->setAttribute('lmake', $arr['brand']); $sph->setAttribute('lmodel', $arr['series']); $sph->setAttribute('lyear', $arr['year']); $sph->setAttribute('lgeocity', $arr1['prop_val']); $sph->setOptions($options); $res = $sph->search(); $active_in_city = $res['total_found']; $sph->resetSphinx(); $sph->setAttribute('what', 'vehicles'); $sph->setAttribute('lmake', $arr['brand']); $sph->setAttribute('lmodel', $arr['series']); $sph->setAttribute('lyear', $arr['year']); $sph->setAttribute('lgeocity', $arr1['prop_val']); $sph->setFilter($sph->VALUE_RANGE_INT, array('l_postdate', (int) strtotime(date('Y-m-00', strtotime('-30 day'))), (int) strtotime(date('Y-m-00')), FALSE)); $sph->setOptions($options); $res = $sph->search(); $posted_and_deleted = (int) ($arr1['num'] - $res['total_found']); $total_count += $active_in_city; $regions[] = array('r_name' => $arr1['prop_val'], 'count' => $active_in_city, 'count_post' => $arr1['num'], 'count_deleted' => $posted_and_deleted); }
// echo "\n".$ROW['category_id'].": ".$ROW['job_title']; $categories_titles[$ROW['category_id']][] = $ROW['job_title']; } // var_dump($categories_titles); // die(); $sph->resetSphinx(); foreach ($categories_titles as $cat_id => $category) { PrintText("\n\nCategory ID: {$cat_id}\n====================================="); $sph->multiInit(getOptions()); $stop_after = 31; // $count = 1; // foreach($category as $one_title){ for ($cat = 0; $cat < sizeof($category); $cat++) { $one_title = $category[$cat]; PrintText("{$cat}: {$one_title}"); $sph->setOptions(getOptions()); $sph->setAttribute('what', 'jobs'); $sph->setAttribute('lposition', $one_title); $sph->addQuery(); if ($cat % $stop_after == 0 && $cat != 0) { getSPHresutls($sph, &$ids, $cat_id); // PrintText("\n-------\nafter function ids:\n"); // var_dump($ids); updateDB($ids, $cat_id); unset($ids); $ids = NULL; } // $count++; } getSPHresutls($sph, &$ids, $cat_id); updateDB($ids, $cat_id);
private function make_search($city, $make, $group_by_field) { $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_vehicles i_delta_vehicles', 'page' => 1, 'limit' => 50); $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC'); $sph = new CSphinx(new sph_conf('3313')); $sph->resetSphinx(); $sph->setAttribute('lgeocity', $city); $sph->setAttribute('lmake', $make); $sph->setAttribute('what', 'vehicles'); $sph->setOptions($options); $result = $sph->search(NULL, FALSE); return $result; }
function newAntiDuplicate($l_countries, $max = 100) { $dbConf = new Config("/home/eyezeek/db.conf", "ini"); $optionsTemplate = array('connectTimeout' => 5, 'arrayResult' => true, 'matchMode' => SPH_MATCH_EXTENDED2, 'limits' => $max, 'page' => 1, 'index' => "i_ref_jobs i_delta_jobs", 'groupBy' => array('duplicates', SPH_GROUPBY_ATTR, '@count desc')); foreach ($l_countries as $target) { dumpMsg('Starting anti-dup ' . $target['reg']); $goOnWithNextPage = TRUE; $page = 1; $conf = new Conftmp($target['port']); $sph = new CSphinx($conf, $target['server']); //var_dump($target); // ids to keep $keepIDs = array(); // crc to delete $crcIDs = array(1); // ids to set inactive = 0 $duplicateIDs = array(0); dumpMsg('Grouping ... '); while ($goOnWithNextPage) { sleep(1); $options = $optionsTemplate; $options['page'] = $page; $page++; $sph->resetSphinx(); $sph->setAttribute('what', 'jobs'); $sortParam = 'alps_promoted desc, l_postdate desc'; $options['sortMode'] = array(SPH_SORT_EXTENDED, $sortParam); $sph->setOptions($options); $res = $sph->search(); $spx_ok = TRUE; if (!$res) { dumpError('Grouping: ' . $sph->getError()); die; } $relevantGroups = 0; if (isset($res['matches'])) { foreach ($res['matches'] as $oneMatch) { // if this is > 1 then there are duplicates $matchCounter = $oneMatch['attrs']['@count']; if ($matchCounter > 1) { $keepIDs[] = $oneMatch['id']; $crcIDs[] = $oneMatch['attrs']['duplicates']; $relevantGroups++; } else { $goOnWithNextPage = FALSE; } } // extra condition to check if there is no more need to traverse next page if ($relevantGroups < $max) { $goOnWithNextPage = FALSE; } } else { // no more matches i.e. nothing to do $goOnWithNextPage = FALSE; } // for now simple put manual cut-off to avoid offset overflows $goOnWithNextPage = FALSE; } dumpMsg('Grouped ' . ($page - 1) . ' pages and ' . count($keepIDs) . ' listings have at least 1 duplicate... '); dumpMsg('Processing IDs ... '); $goOnWithNextPage = TRUE; $page = 1; if (count($keepIDs) == 0) { dumpMsg('Nothing to do. No duplicated were detected '); dumpMsg('Finished anti-dup ' . $target['reg']); return TRUE; } while ($goOnWithNextPage) { // find actual duplicate ids $options = $optionsTemplate; $options['page'] = $page; $page++; $options['limits'] = 2000; unset($options['groupBy']); //var_dump($options);die; $sph->resetSphinx(); $sph->setAttribute('what', 'jobs'); $sph->setOptions($options); $sph->setFilter($sph->VALUE_FILTER, array('theid', $keepIDs, TRUE)); $sph->setFilter($sph->VALUE_FILTER, array('duplicates', $crcIDs, FALSE)); $res = $sph->search(); if (!$res) { dumpError('Error in fetching duplicates: ' . $sph->getError()); die; } $resultArray = $sph->getIDs(); if (sizeof($resultArray) < $options['limits']) { $goOnWithNextPage = FALSE; } $duplicateIDs = array_merge($duplicateIDs, $sph->getIDs()); // for now simple put manual cut-off to avoid offset overflows $goOnWithNextPage = FALSE; } dumpMsg('Doing SQL updates and deactivating ' . sizeof($duplicateIDs) . ' duplicates ... '); $lastDump = 0; $maxDump = 200; $tmpIDs = array(); $liveDB = new DBAdapter2(array('host' => $dbConf->live->host, 'username' => $dbConf->live->username, 'password' => $dbConf->live->password, 'schema' => $target['schemali'])); //var_dump($liveDB); for ($i = 0; $i < sizeof($duplicateIDs); $i++) { $tmpIDs[] = $duplicateIDs[$i]; $lastDump++; if ($lastDump == $maxDump || $i + 1 == sizeof($duplicateIDs)) { sleep(1); $lastDump = 0; $sql = "UPDATE jobs SET l_isActive=0 WHERE l_id IN (" . implode(",", $tmpIDs) . ")"; $tmpIDs = array(); try { $affectedRows = 0; $lid = 0; $liveDB->executeNoresSQL($sql, $affectedRows, $lid); } catch (DBAdapter2Exception $e) { dumpError(" MYSQL Error: " . $e->getMessage() . " *** QUERY: {$sql}"); die; } } } unset($sph); dumpMsg('Finished anti-dup ' . $target['reg']); } }
private function make_search($city, $one_deal_type, $one_translitted_category, $group_by_field) { $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_realestate i_delta_realestate', 'page' => 1, 'limit' => 50); if ($group_by_field != 'empty') { $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC'); } $sph = new CSphinx(new sph_conf('3313'), '192.168.240.43'); $sph->resetSphinx(); $sph->setFilter($sph->VALUE_FILTER, array('l_deal_type', array($one_deal_type), FALSE)); $sph->setAttribute('lgeocity', $city); foreach ($this->m_aCategoriesDetails[$one_translitted_category] as $asset_type) { $sph->setAttribute('lassettype', $asset_type); } $sph->setAttribute('what', 'realestate'); $sph->setOptions($options); $result = $sph->search(NULL, FALSE); return $result; }