Пример #1
0
function newAntiDuplicate($l_countries, $max = 100)
{
    $dbConf = new Config("/home/eyezeek/db.conf", "ini");
    $optionsTemplate = array('connectTimeout' => 5, 'arrayResult' => true, 'matchMode' => SPH_MATCH_EXTENDED2, 'limits' => $max, 'page' => 1, 'index' => "i_ref_jobs i_delta_jobs", 'groupBy' => array('duplicates', SPH_GROUPBY_ATTR, '@count desc'));
    foreach ($l_countries as $target) {
        dumpMsg('Starting anti-dup ' . $target['reg']);
        $goOnWithNextPage = TRUE;
        $page = 1;
        $conf = new Conftmp($target['port']);
        $sph = new CSphinx($conf, $target['server']);
        //var_dump($target);
        // ids to keep
        $keepIDs = array();
        // crc to delete
        $crcIDs = array(1);
        // ids to set inactive = 0
        $duplicateIDs = array(0);
        dumpMsg('Grouping ... ');
        while ($goOnWithNextPage) {
            sleep(1);
            $options = $optionsTemplate;
            $options['page'] = $page;
            $page++;
            $sph->resetSphinx();
            $sph->setAttribute('what', 'jobs');
            $sortParam = 'alps_promoted desc, l_postdate desc';
            $options['sortMode'] = array(SPH_SORT_EXTENDED, $sortParam);
            $sph->setOptions($options);
            $res = $sph->search();
            $spx_ok = TRUE;
            if (!$res) {
                dumpError('Grouping: ' . $sph->getError());
                die;
            }
            $relevantGroups = 0;
            if (isset($res['matches'])) {
                foreach ($res['matches'] as $oneMatch) {
                    // if this is > 1 then there are duplicates
                    $matchCounter = $oneMatch['attrs']['@count'];
                    if ($matchCounter > 1) {
                        $keepIDs[] = $oneMatch['id'];
                        $crcIDs[] = $oneMatch['attrs']['duplicates'];
                        $relevantGroups++;
                    } else {
                        $goOnWithNextPage = FALSE;
                    }
                }
                // extra condition to check if there is no more need to traverse next page
                if ($relevantGroups < $max) {
                    $goOnWithNextPage = FALSE;
                }
            } else {
                // no more matches i.e. nothing to do
                $goOnWithNextPage = FALSE;
            }
            // for now simple put manual cut-off to avoid offset overflows
            $goOnWithNextPage = FALSE;
        }
        dumpMsg('Grouped ' . ($page - 1) . ' pages and ' . count($keepIDs) . ' listings have at least 1 duplicate... ');
        dumpMsg('Processing IDs ... ');
        $goOnWithNextPage = TRUE;
        $page = 1;
        if (count($keepIDs) == 0) {
            dumpMsg('Nothing to do. No duplicated were detected ');
            dumpMsg('Finished anti-dup ' . $target['reg']);
            return TRUE;
        }
        while ($goOnWithNextPage) {
            // find actual duplicate ids
            $options = $optionsTemplate;
            $options['page'] = $page;
            $page++;
            $options['limits'] = 2000;
            unset($options['groupBy']);
            //var_dump($options);die;
            $sph->resetSphinx();
            $sph->setAttribute('what', 'jobs');
            $sph->setOptions($options);
            $sph->setFilter($sph->VALUE_FILTER, array('theid', $keepIDs, TRUE));
            $sph->setFilter($sph->VALUE_FILTER, array('duplicates', $crcIDs, FALSE));
            $res = $sph->search();
            if (!$res) {
                dumpError('Error in fetching duplicates: ' . $sph->getError());
                die;
            }
            $resultArray = $sph->getIDs();
            if (sizeof($resultArray) < $options['limits']) {
                $goOnWithNextPage = FALSE;
            }
            $duplicateIDs = array_merge($duplicateIDs, $sph->getIDs());
            // for now simple put manual cut-off to avoid offset overflows
            $goOnWithNextPage = FALSE;
        }
        dumpMsg('Doing SQL updates and deactivating ' . sizeof($duplicateIDs) . ' duplicates ... ');
        $lastDump = 0;
        $maxDump = 200;
        $tmpIDs = array();
        $liveDB = new DBAdapter2(array('host' => $dbConf->live->host, 'username' => $dbConf->live->username, 'password' => $dbConf->live->password, 'schema' => $target['schemali']));
        //var_dump($liveDB);
        for ($i = 0; $i < sizeof($duplicateIDs); $i++) {
            $tmpIDs[] = $duplicateIDs[$i];
            $lastDump++;
            if ($lastDump == $maxDump || $i + 1 == sizeof($duplicateIDs)) {
                sleep(1);
                $lastDump = 0;
                $sql = "UPDATE jobs SET l_isActive=0 WHERE l_id IN (" . implode(",", $tmpIDs) . ")";
                $tmpIDs = array();
                try {
                    $affectedRows = 0;
                    $lid = 0;
                    $liveDB->executeNoresSQL($sql, $affectedRows, $lid);
                } catch (DBAdapter2Exception $e) {
                    dumpError(" MYSQL Error: " . $e->getMessage() . " *** QUERY: {$sql}");
                    die;
                }
            }
        }
        unset($sph);
        dumpMsg('Finished anti-dup ' . $target['reg']);
    }
}
Пример #2
0
         $sph->resetSphinx();
         $sph->setAttribute('what', 'vehicles');
         $sph->setAttribute('lmake', $arr['brand']);
         $sph->setAttribute('lmodel', $arr['series']);
         $sph->setAttribute('lyear', $arr['year']);
         $sph->setAttribute('lgeocity', $arr1['prop_val']);
         $sph->setOptions($options);
         $res = $sph->search();
         $active_in_city = $res['total_found'];
         $sph->resetSphinx();
         $sph->setAttribute('what', 'vehicles');
         $sph->setAttribute('lmake', $arr['brand']);
         $sph->setAttribute('lmodel', $arr['series']);
         $sph->setAttribute('lyear', $arr['year']);
         $sph->setAttribute('lgeocity', $arr1['prop_val']);
         $sph->setFilter($sph->VALUE_RANGE_INT, array('l_postdate', (int) strtotime(date('Y-m-00', strtotime('-30 day'))), (int) strtotime(date('Y-m-00')), FALSE));
         $sph->setOptions($options);
         $res = $sph->search();
         $posted_and_deleted = (int) ($arr1['num'] - $res['total_found']);
         $total_count += $active_in_city;
         $regions[] = array('r_name' => $arr1['prop_val'], 'count' => $active_in_city, 'count_post' => $arr1['num'], 'count_deleted' => $posted_and_deleted);
     }
 }
 $arr['count'] = $total_count;
 foreach ($arr as $name => $value) {
     if ($name == 'url') {
         $exploded = explode('/', $value);
         $exploded[6] = str2url($exploded[6]);
         $exploded[7] = str2url($exploded[7]);
         $value = implode('/', $exploded);
     }
 * Real estate
 */
echo "\nPerforming sphinx search for {$l_sSection}...\n";
$l_aDealTypes = array('rent' => 0, 'sale' => 1);
foreach ($l_aDealTypes as $deal_type_str => $deal_type_int) {
    $options = array('connectTimeout' => 1, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => "i_ref_realestate i_delta_realestate", 'page' => 1, 'limits' => 1);
    $sph->resetSphinx();
    $sph->multiInit($options);
    $tmp_arr = array();
    for ($i = 0; $i < count($cities); $i++) {
        $sph->setAttribute('what', $l_sSection);
        //don't filter for all Russia
        if ($i > 0) {
            $sph->setAttribute('lgeocity', $cities[$i]['city_name']);
        }
        $sph->setFilter($sph->VALUE_FILTER, array('l_deal_type', array($deal_type_int), FALSE));
        $sph->setOptions($options);
        $cities[$i]['count'] = $sph->addQuery();
    }
    $allResults = $sph->multiSearch();
    for ($i = 0; $i < sizeof($allResults); $i++) {
        $l_iTotalFound = $sph->getTotalFound($i);
        $cities[$i]['count'] = $l_iTotalFound;
    }
    $result[$deal_type_str] = $cities;
}
if (!empty($result)) {
    $l_sXMLbase = '<?xml version="1.0" encoding="UTF-8"?><Top_cities_realestate></Top_cities_realestate>';
    $l_sFilecity_name = "/home/eyezeek/netup/ru/seo/top_cities_" . $l_sSection . ".xml";
    $l_oXML = new SimpleXMLElement($l_sXMLbase);
    echo "Saving to XML file {$l_sFilecity_name}...\n";
class sph_conf
{
    public $port;
    public function __construct($port)
    {
        $this->port = $port;
    }
}
$sph = new CSphinx(new sph_conf('3313'));
$options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_vehicles i_delta_vehicles', 'page' => 1, 'limit' => 1);
$l_sSection = 'vehicles';
$sph->resetSphinx();
$options['groupBy'] = array('l_www_id', SPH_GROUPBY_ATTR);
$sph->setAttribute('www_site_type', '2');
$sph->setAttribute('what', $l_sSection);
$sph->setFilter($sph->VALUE_FILTER, array('alps_promoted', array(1), FALSE));
$sph->setOptions($options);
$result = $sph->search(NULL, FALSE);
foreach ($result['matches'] as $one_promoted_dealer) {
    $l_aReturnIDs[] = $one_promoted_dealer['attrs']['l_www_id'];
}
if (!empty($result)) {
    $l_sXMLbase = '<?xml version="1.0" encoding="UTF-8"?><Default_top_dealers></Default_top_dealers>';
    $l_sFile_name = "/home/eyezeek/netup/ru/seo/" . $l_sSection . "_default_top_dealers.xml";
    $l_oXML = new SimpleXMLElement($l_sXMLbase);
    foreach ($l_aReturnIDs as $dealer) {
        $l_node = $l_oXML->AddChild('dealer');
        $l_node->AddChild('id', $dealer);
    }
    file_put_contents($l_sFile_name, $l_oXML->asXML());
    unset($l_oXML);
Пример #5
0
 private function search_for_new($city, $make, $model)
 {
     $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_vehicles i_delta_vehicles', 'page' => 1, 'limit' => 5);
     $sph = new CSphinx(new sph_conf('3313'));
     $sph->resetSphinx();
     $sph->setAttribute('lgeocity', $city);
     $sph->setAttribute('lmake', $make);
     $sph->setAttribute('lmodel', $model);
     $sph->setAttribute('what', 'vehicles');
     $filterType = $sph->VALUE_FILTER;
     $sph->setFilter($filterType, array('l_isDealer', array(1), FALSE));
     $sph->setOptions($options);
     $result = $sph->search(NULL, FALSE);
     if (!empty($result) && $result['total_found'] > 0) {
         return TRUE;
     }
     return FALSE;
 }
$makerCounter = 0;
$l_sAllMakersBuffer = '<?php $all_makers = array(';
foreach ($l_aData as $maker) {
    $l_sAllMakersBuffer .= "'{$makerCounter}' => array('origin'=>'" . $maker['name'] . "', 'translit'=>'" . $maker['translit'] . "', 'count'=>'" . $maker['count'] . "', 'link'=>'http://auto." . BASE_DOMAIN . "/search/?mk=" . $maker['translit'] . "'),";
    $makerCounter++;
}
$l_sAllMakersBuffer .= ");";
file_put_contents($l_sPath . "home/all_makers.php", $l_sAllMakersBuffer);
unset($l_sAllMakersBuffer);
echo "Count new cars..\n";
$options = array('connectTimeout' => 5, 'arrayResult' => true, 'matchMode' => SPH_MATCH_EXTENDED2, 'limits' => 41, 'page' => 1, 'index' => "i_ref_vehicles i_delta_vehicles", 'groupBy' => array('l_geo_city', SPH_GROUPBY_ATTR, '@count DESC'));
// New
$sph->resetSphinx();
$sph->setOptions($options);
$sph->setAttribute('lgeocity', 'Москва');
$sph->setFilter($sph->VALUE_FILTER, array('l_isDealer', array(1), FALSE));
$l_aResult = $sph->search();
$l_sBuffer = '<?php $count_new = ' . $l_aResult['matches'][0]['attrs']['@count'] . ';';
file_put_contents($l_sPath . "home/count_new.php", $l_sBuffer);
unset($l_sBuffer);
echo "Count used cars..\n";
// Used
$sph->resetSphinx();
$sph->setOptions($options);
$sph->setAttribute('lgeocity', 'Москва');
$sph->setFilter($sph->VALUE_FILTER, array('l_isDealer', array(1), TRUE));
$l_aResult = $sph->search();
$l_sBuffer = '<?php $count_used = ' . $l_aResult['matches'][0]['attrs']['@count'] . ';';
file_put_contents($l_sPath . "home/count_used.php", $l_sBuffer);
unset($l_sBuffer);
/**
Пример #7
0
 private function make_search($city, $one_deal_type, $one_translitted_category, $group_by_field)
 {
     $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_realestate i_delta_realestate', 'page' => 1, 'limit' => 50);
     if ($group_by_field != 'empty') {
         $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC');
     }
     $sph = new CSphinx(new sph_conf('3313'), '192.168.240.43');
     $sph->resetSphinx();
     $sph->setFilter($sph->VALUE_FILTER, array('l_deal_type', array($one_deal_type), FALSE));
     $sph->setAttribute('lgeocity', $city);
     foreach ($this->m_aCategoriesDetails[$one_translitted_category] as $asset_type) {
         $sph->setAttribute('lassettype', $asset_type);
     }
     $sph->setAttribute('what', 'realestate');
     $sph->setOptions($options);
     $result = $sph->search(NULL, FALSE);
     return $result;
 }