} } $sql = "SELECT prop_val,sum(specs) as num\n\t\t\t\t\tFROM ru_eyezeek.np_market_trends_vehicles_specs s join \n\t\t\t\t\t\tru_eyezeek.np_market_trends_vehicles_items i on s.item_id=i.id\n\t\t\t\t\t\twhere make='" . $arr['brand'] . "' and model='" . $arr['series'] . "' and \n\t\t\t\t\t\t\t `year`=" . $arr['year'] . " and prop='cnt_in_city' group by make,model,year,prop_val"; $regions = array(); $total_count = 0; print_text("Fetching regions for: " . $arr['brand'] . " " . $arr['series'] . " " . $arr['year']); if ($result1 = $mysqli->query($sql)) { while ($arr1 = $result1->fetch_assoc()) { $sph->resetSphinx(); $sph->setAttribute('what', 'vehicles'); $sph->setAttribute('lmake', $arr['brand']); $sph->setAttribute('lmodel', $arr['series']); $sph->setAttribute('lyear', $arr['year']); $sph->setAttribute('lgeocity', $arr1['prop_val']); $sph->setOptions($options); $res = $sph->search(); $active_in_city = $res['total_found']; $sph->resetSphinx(); $sph->setAttribute('what', 'vehicles'); $sph->setAttribute('lmake', $arr['brand']); $sph->setAttribute('lmodel', $arr['series']); $sph->setAttribute('lyear', $arr['year']); $sph->setAttribute('lgeocity', $arr1['prop_val']); $sph->setFilter($sph->VALUE_RANGE_INT, array('l_postdate', (int) strtotime(date('Y-m-00', strtotime('-30 day'))), (int) strtotime(date('Y-m-00')), FALSE)); $sph->setOptions($options); $res = $sph->search(); $posted_and_deleted = (int) ($arr1['num'] - $res['total_found']); $total_count += $active_in_city; $regions[] = array('r_name' => $arr1['prop_val'], 'count' => $active_in_city, 'count_post' => $arr1['num'], 'count_deleted' => $posted_and_deleted); } }
function newAntiDuplicate($l_countries, $max = 100) { $dbConf = new Config("/home/eyezeek/db.conf", "ini"); $optionsTemplate = array('connectTimeout' => 5, 'arrayResult' => true, 'matchMode' => SPH_MATCH_EXTENDED2, 'limits' => $max, 'page' => 1, 'index' => "i_ref_jobs i_delta_jobs", 'groupBy' => array('duplicates', SPH_GROUPBY_ATTR, '@count desc')); foreach ($l_countries as $target) { dumpMsg('Starting anti-dup ' . $target['reg']); $goOnWithNextPage = TRUE; $page = 1; $conf = new Conftmp($target['port']); $sph = new CSphinx($conf, $target['server']); //var_dump($target); // ids to keep $keepIDs = array(); // crc to delete $crcIDs = array(1); // ids to set inactive = 0 $duplicateIDs = array(0); dumpMsg('Grouping ... '); while ($goOnWithNextPage) { sleep(1); $options = $optionsTemplate; $options['page'] = $page; $page++; $sph->resetSphinx(); $sph->setAttribute('what', 'jobs'); $sortParam = 'alps_promoted desc, l_postdate desc'; $options['sortMode'] = array(SPH_SORT_EXTENDED, $sortParam); $sph->setOptions($options); $res = $sph->search(); $spx_ok = TRUE; if (!$res) { dumpError('Grouping: ' . $sph->getError()); die; } $relevantGroups = 0; if (isset($res['matches'])) { foreach ($res['matches'] as $oneMatch) { // if this is > 1 then there are duplicates $matchCounter = $oneMatch['attrs']['@count']; if ($matchCounter > 1) { $keepIDs[] = $oneMatch['id']; $crcIDs[] = $oneMatch['attrs']['duplicates']; $relevantGroups++; } else { $goOnWithNextPage = FALSE; } } // extra condition to check if there is no more need to traverse next page if ($relevantGroups < $max) { $goOnWithNextPage = FALSE; } } else { // no more matches i.e. nothing to do $goOnWithNextPage = FALSE; } // for now simple put manual cut-off to avoid offset overflows $goOnWithNextPage = FALSE; } dumpMsg('Grouped ' . ($page - 1) . ' pages and ' . count($keepIDs) . ' listings have at least 1 duplicate... '); dumpMsg('Processing IDs ... '); $goOnWithNextPage = TRUE; $page = 1; if (count($keepIDs) == 0) { dumpMsg('Nothing to do. No duplicated were detected '); dumpMsg('Finished anti-dup ' . $target['reg']); return TRUE; } while ($goOnWithNextPage) { // find actual duplicate ids $options = $optionsTemplate; $options['page'] = $page; $page++; $options['limits'] = 2000; unset($options['groupBy']); //var_dump($options);die; $sph->resetSphinx(); $sph->setAttribute('what', 'jobs'); $sph->setOptions($options); $sph->setFilter($sph->VALUE_FILTER, array('theid', $keepIDs, TRUE)); $sph->setFilter($sph->VALUE_FILTER, array('duplicates', $crcIDs, FALSE)); $res = $sph->search(); if (!$res) { dumpError('Error in fetching duplicates: ' . $sph->getError()); die; } $resultArray = $sph->getIDs(); if (sizeof($resultArray) < $options['limits']) { $goOnWithNextPage = FALSE; } $duplicateIDs = array_merge($duplicateIDs, $sph->getIDs()); // for now simple put manual cut-off to avoid offset overflows $goOnWithNextPage = FALSE; } dumpMsg('Doing SQL updates and deactivating ' . sizeof($duplicateIDs) . ' duplicates ... '); $lastDump = 0; $maxDump = 200; $tmpIDs = array(); $liveDB = new DBAdapter2(array('host' => $dbConf->live->host, 'username' => $dbConf->live->username, 'password' => $dbConf->live->password, 'schema' => $target['schemali'])); //var_dump($liveDB); for ($i = 0; $i < sizeof($duplicateIDs); $i++) { $tmpIDs[] = $duplicateIDs[$i]; $lastDump++; if ($lastDump == $maxDump || $i + 1 == sizeof($duplicateIDs)) { sleep(1); $lastDump = 0; $sql = "UPDATE jobs SET l_isActive=0 WHERE l_id IN (" . implode(",", $tmpIDs) . ")"; $tmpIDs = array(); try { $affectedRows = 0; $lid = 0; $liveDB->executeNoresSQL($sql, $affectedRows, $lid); } catch (DBAdapter2Exception $e) { dumpError(" MYSQL Error: " . $e->getMessage() . " *** QUERY: {$sql}"); die; } } } unset($sph); dumpMsg('Finished anti-dup ' . $target['reg']); } }
foreach ($l_aSections as $l_sSection) { $l_aResult = array(); $l_aMatches = array(); if ($l_sSection == 'auto') { $sph = new CSphinx(new sph_conf('3313')); $table = 'vehicles'; } elseif ($l_sSection == 'realestate') { $sph = new CSphinx(new sph_conf('3313'), '192.168.240.43'); $options['index'] = "i_ref_realestate i_delta_realestate"; $table = 'realestate'; } $sph->resetSphinx(); $sph->setOptions($options); echo "\nPrforming sphinx search...\n"; $l_aIds = array(); $l_aResult = $sph->search(); $l_aMatches = $l_aResult['matches']; $l_aXmlData = array(); /** * Excluding zero == no city */ foreach ($l_aMatches as $match) { if ($match["attrs"]['l_geo_city'] != '0') { $l_aIds[] = $match['id']; $l_aXmlData[$match['id']] = array('count' => $match['attrs']['@count'], 'name' => '', 'translit' => ''); } } unset($l_aResult, $l_aMatches); /** * Fetching the l_geo_city from the DB by listing ID */
public $port; public function __construct($port) { $this->port = $port; } } $sph = new CSphinx(new sph_conf('3313')); $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_vehicles i_delta_vehicles', 'page' => 1, 'limit' => 1); $l_sSection = 'vehicles'; $sph->resetSphinx(); $options['groupBy'] = array('l_www_id', SPH_GROUPBY_ATTR); $sph->setAttribute('www_site_type', '2'); $sph->setAttribute('what', $l_sSection); $sph->setFilter($sph->VALUE_FILTER, array('alps_promoted', array(1), FALSE)); $sph->setOptions($options); $result = $sph->search(NULL, FALSE); foreach ($result['matches'] as $one_promoted_dealer) { $l_aReturnIDs[] = $one_promoted_dealer['attrs']['l_www_id']; } if (!empty($result)) { $l_sXMLbase = '<?xml version="1.0" encoding="UTF-8"?><Default_top_dealers></Default_top_dealers>'; $l_sFile_name = "/home/eyezeek/netup/ru/seo/" . $l_sSection . "_default_top_dealers.xml"; $l_oXML = new SimpleXMLElement($l_sXMLbase); foreach ($l_aReturnIDs as $dealer) { $l_node = $l_oXML->AddChild('dealer'); $l_node->AddChild('id', $dealer); } file_put_contents($l_sFile_name, $l_oXML->asXML()); unset($l_oXML); $l_oXML = NULL; }
private function make_search($city, $make, $group_by_field) { $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_vehicles i_delta_vehicles', 'page' => 1, 'limit' => 50); $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC'); $sph = new CSphinx(new sph_conf('3313')); $sph->resetSphinx(); $sph->setAttribute('lgeocity', $city); $sph->setAttribute('lmake', $make); $sph->setAttribute('what', 'vehicles'); $sph->setOptions($options); $result = $sph->search(NULL, FALSE); return $result; }
private function make_search($city, $one_deal_type, $one_translitted_category, $group_by_field) { $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_realestate i_delta_realestate', 'page' => 1, 'limit' => 50); if ($group_by_field != 'empty') { $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC'); } $sph = new CSphinx(new sph_conf('3313'), '192.168.240.43'); $sph->resetSphinx(); $sph->setFilter($sph->VALUE_FILTER, array('l_deal_type', array($one_deal_type), FALSE)); $sph->setAttribute('lgeocity', $city); foreach ($this->m_aCategoriesDetails[$one_translitted_category] as $asset_type) { $sph->setAttribute('lassettype', $asset_type); } $sph->setAttribute('what', 'realestate'); $sph->setOptions($options); $result = $sph->search(NULL, FALSE); return $result; }