コード例 #1
0
ファイル: avtology_feed.php プロジェクト: sergrin/crawlers-il
             $avg_km += $l_avg_ranges[0] + floor(($l_avg_ranges[1] - $l_avg_ranges[0]) / 2);
             $avg_km_counter++;
         }
     }
     if ($avg_km_counter > 0) {
         $avg_km = (int) ($avg_km / $avg_km_counter);
         $arr['millage'] = $avg_km;
     }
 }
 $sql = "SELECT prop_val,sum(specs) as num\n\t\t\t\t\tFROM ru_eyezeek.np_market_trends_vehicles_specs s join \n\t\t\t\t\t\tru_eyezeek.np_market_trends_vehicles_items i on s.item_id=i.id\n\t\t\t\t\t\twhere make='" . $arr['brand'] . "' and model='" . $arr['series'] . "' and \n\t\t\t\t\t\t\t  `year`=" . $arr['year'] . " and prop='cnt_in_city' group by make,model,year,prop_val";
 $regions = array();
 $total_count = 0;
 print_text("Fetching regions for: " . $arr['brand'] . " " . $arr['series'] . " " . $arr['year']);
 if ($result1 = $mysqli->query($sql)) {
     while ($arr1 = $result1->fetch_assoc()) {
         $sph->resetSphinx();
         $sph->setAttribute('what', 'vehicles');
         $sph->setAttribute('lmake', $arr['brand']);
         $sph->setAttribute('lmodel', $arr['series']);
         $sph->setAttribute('lyear', $arr['year']);
         $sph->setAttribute('lgeocity', $arr1['prop_val']);
         $sph->setOptions($options);
         $res = $sph->search();
         $active_in_city = $res['total_found'];
         $sph->resetSphinx();
         $sph->setAttribute('what', 'vehicles');
         $sph->setAttribute('lmake', $arr['brand']);
         $sph->setAttribute('lmodel', $arr['series']);
         $sph->setAttribute('lyear', $arr['year']);
         $sph->setAttribute('lgeocity', $arr1['prop_val']);
         $sph->setFilter($sph->VALUE_RANGE_INT, array('l_postdate', (int) strtotime(date('Y-m-00', strtotime('-30 day'))), (int) strtotime(date('Y-m-00')), FALSE));
コード例 #2
0
ファイル: alert.class.php プロジェクト: sergrin/crawlers-il
 public function getSavedSearchListings($queryArray, $limit, $maxMultiSearch, $sphinxConfig)
 {
     // queueArray passed by reference to prevent it copying into memory
     $sphinx = new CSphinx($sphinxConfig, $sphinxConfig->host);
     $sphinx->resetSphinx();
     // Initialize search options first
     $initOptions = array('connectTimeout' => 1, 'arrayResult' => true, 'matchMode' => SPH_MATCH_EXTENDED2, 'limits' => $limit, 'page' => 1);
     $onedayago = strtotime('24 hours ago');
     // iterrate all the listings per multi-search
     $oneResultArray = array();
     $resultHash = array();
     $searchQueueCounter = 0;
     foreach ($queryArray as $md5Hash => $usersArray) {
         //$this->debugMode('enter hash: '.$md5Hash);
         //$this->debugMode('with: '.sizeof($usersArray).' values');
         if (sizeof($usersArray) > 0) {
             // get only the first element of the array since
             // elements are actual users who asked for the search with this md5
             $oneUser = $usersArray[0];
             // keep this counter to determine if sph has reached its defined limit
             $userSection = $oneUser['data']['section'] === 'auto' ? 'vehicles' : $oneUser['data']['section'];
             $initOptions['index'] = 'i_ref_' . $userSection . ' i_delta_' . $userSection;
             $initOptions['sortMode'] = array(SPH_SORT_EXTENDED, 'alps_promoted desc');
             //get only promoted on top
             //var_dump($initOptions);die;
             $sphinx->multiInit($initOptions);
             //$sphinx->setOptions($initOptions);
             $sphinx->setAttribute('what', $userSection);
             if (!isset($this->searchResults[$md5Hash])) {
                 if (!($oneUser['data']['type'] == 'search')) {
                     $this->setCategory($sphinx, $oneUser['data']['section'], $oneUser['data']['what']);
                 } else {
                     $sphinx->setQuery($oneUser['data']['what']);
                 }
                 if (isset($oneUser['data']['refinments'])) {
                     // Get all the query refinments
                     foreach ($oneUser['data']['refinments'] as $oneRefinment) {
                         $oneRefinment = explode('=', $oneRefinment);
                         //TODO: check if refinment is a geo component in which case make sure u have all
                         //var_dump($oneRefinment);
                         $refinmentSettings = $this->getSpxOptionType($oneUser['data']['section'], $oneRefinment[0]);
                         //var_dump($refinmentSettings);die;
                         if ($refinmentSettings != FALSE && sizeof($refinmentSettings) == 2) {
                             // we got a proper refinment
                             $this->refinmentToSphinx($sphinx, $refinmentSettings, $oneRefinment[1]);
                         }
                     }
                 }
                 // var_dump($sphinx->getOptions());die;
                 // add additional filter which will order by date
                 $sphinx->setFilter($sphinx->VALUE_RANGE_INT, array('l_postdate', $onedayago, strtotime('now'), FALSE));
                 // add additional filter which will give only promoted
                 $sphinx->setFilter($sphinx->VALUE_FILTER, array('alps_promoted', array(1), FALSE));
                 // puush the query to be executed
                 $resultQueryNumber = $sphinx->addQuery();
                 //$resultQueryNumber = $sphinx->search();
                 //$allResults = $sphinx->multiSearch();
                 //var_dump($resultQueryNumber);die;
                 $oneResultArray[$resultQueryNumber] = $md5Hash;
                 // increment the current limit counter and do check
                 $searchQueueCounter = $searchQueueCounter + 1;
             }
         }
         if ($searchQueueCounter >= $maxMultiSearch) {
             //$this->debugMode('process a batch of '.$maxMultiSearch);
             // Do search now - limit has been reached for this itterration
             $allResults = $sphinx->multiSearch();
             //var_dump($allResults);die;
             foreach ($oneResultArray as $qNumber => $arrayElementMD5) {
                 $this->searchResults[$arrayElementMD5] = $sphinx->getIDs($qNumber);
             }
             // reset status counters
             $oneResultArray = array();
             $searchQueueCounter = 0;
             $sphinx->resetSphinx();
         }
     }
     //$this->debugMode('process remaining batch of '.sizeof($oneResultArray));
     // Process any remaining batch
     if (sizeof($oneResultArray) > 0) {
         // Do search now - limit has been reached for this itterration
         $allResults = $sphinx->multiSearch();
         //var_dump($allResults);die;
         foreach ($oneResultArray as $qNumber => $arrayElementMD5) {
             $this->searchResults[$arrayElementMD5] = $sphinx->getIDs($qNumber);
         }
         // reset status counters
         $oneResultArray = array();
         $searchQueueCounter = 0;
         $sphinx->resetSphinx();
     }
     //var_dump($this->searchResults);
     //die('finished');
     unset($sphinx);
     return $this->searchResults;
 }
コード例 #3
0
ファイル: sitemapindex.php プロジェクト: sergrin/crawlers-il
 private function make_search($city, $make, $group_by_field)
 {
     $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_vehicles i_delta_vehicles', 'page' => 1, 'limit' => 50);
     $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC');
     $sph = new CSphinx(new sph_conf('3313'));
     $sph->resetSphinx();
     $sph->setAttribute('lgeocity', $city);
     $sph->setAttribute('lmake', $make);
     $sph->setAttribute('what', 'vehicles');
     $sph->setOptions($options);
     $result = $sph->search(NULL, FALSE);
     return $result;
 }
コード例 #4
0
ファイル: anti_dup_jobs.php プロジェクト: sergrin/crawlers-il
function newAntiDuplicate($l_countries, $max = 100)
{
    $dbConf = new Config("/home/eyezeek/db.conf", "ini");
    $optionsTemplate = array('connectTimeout' => 5, 'arrayResult' => true, 'matchMode' => SPH_MATCH_EXTENDED2, 'limits' => $max, 'page' => 1, 'index' => "i_ref_jobs i_delta_jobs", 'groupBy' => array('duplicates', SPH_GROUPBY_ATTR, '@count desc'));
    foreach ($l_countries as $target) {
        dumpMsg('Starting anti-dup ' . $target['reg']);
        $goOnWithNextPage = TRUE;
        $page = 1;
        $conf = new Conftmp($target['port']);
        $sph = new CSphinx($conf, $target['server']);
        //var_dump($target);
        // ids to keep
        $keepIDs = array();
        // crc to delete
        $crcIDs = array(1);
        // ids to set inactive = 0
        $duplicateIDs = array(0);
        dumpMsg('Grouping ... ');
        while ($goOnWithNextPage) {
            sleep(1);
            $options = $optionsTemplate;
            $options['page'] = $page;
            $page++;
            $sph->resetSphinx();
            $sph->setAttribute('what', 'jobs');
            $sortParam = 'alps_promoted desc, l_postdate desc';
            $options['sortMode'] = array(SPH_SORT_EXTENDED, $sortParam);
            $sph->setOptions($options);
            $res = $sph->search();
            $spx_ok = TRUE;
            if (!$res) {
                dumpError('Grouping: ' . $sph->getError());
                die;
            }
            $relevantGroups = 0;
            if (isset($res['matches'])) {
                foreach ($res['matches'] as $oneMatch) {
                    // if this is > 1 then there are duplicates
                    $matchCounter = $oneMatch['attrs']['@count'];
                    if ($matchCounter > 1) {
                        $keepIDs[] = $oneMatch['id'];
                        $crcIDs[] = $oneMatch['attrs']['duplicates'];
                        $relevantGroups++;
                    } else {
                        $goOnWithNextPage = FALSE;
                    }
                }
                // extra condition to check if there is no more need to traverse next page
                if ($relevantGroups < $max) {
                    $goOnWithNextPage = FALSE;
                }
            } else {
                // no more matches i.e. nothing to do
                $goOnWithNextPage = FALSE;
            }
            // for now simple put manual cut-off to avoid offset overflows
            $goOnWithNextPage = FALSE;
        }
        dumpMsg('Grouped ' . ($page - 1) . ' pages and ' . count($keepIDs) . ' listings have at least 1 duplicate... ');
        dumpMsg('Processing IDs ... ');
        $goOnWithNextPage = TRUE;
        $page = 1;
        if (count($keepIDs) == 0) {
            dumpMsg('Nothing to do. No duplicated were detected ');
            dumpMsg('Finished anti-dup ' . $target['reg']);
            return TRUE;
        }
        while ($goOnWithNextPage) {
            // find actual duplicate ids
            $options = $optionsTemplate;
            $options['page'] = $page;
            $page++;
            $options['limits'] = 2000;
            unset($options['groupBy']);
            //var_dump($options);die;
            $sph->resetSphinx();
            $sph->setAttribute('what', 'jobs');
            $sph->setOptions($options);
            $sph->setFilter($sph->VALUE_FILTER, array('theid', $keepIDs, TRUE));
            $sph->setFilter($sph->VALUE_FILTER, array('duplicates', $crcIDs, FALSE));
            $res = $sph->search();
            if (!$res) {
                dumpError('Error in fetching duplicates: ' . $sph->getError());
                die;
            }
            $resultArray = $sph->getIDs();
            if (sizeof($resultArray) < $options['limits']) {
                $goOnWithNextPage = FALSE;
            }
            $duplicateIDs = array_merge($duplicateIDs, $sph->getIDs());
            // for now simple put manual cut-off to avoid offset overflows
            $goOnWithNextPage = FALSE;
        }
        dumpMsg('Doing SQL updates and deactivating ' . sizeof($duplicateIDs) . ' duplicates ... ');
        $lastDump = 0;
        $maxDump = 200;
        $tmpIDs = array();
        $liveDB = new DBAdapter2(array('host' => $dbConf->live->host, 'username' => $dbConf->live->username, 'password' => $dbConf->live->password, 'schema' => $target['schemali']));
        //var_dump($liveDB);
        for ($i = 0; $i < sizeof($duplicateIDs); $i++) {
            $tmpIDs[] = $duplicateIDs[$i];
            $lastDump++;
            if ($lastDump == $maxDump || $i + 1 == sizeof($duplicateIDs)) {
                sleep(1);
                $lastDump = 0;
                $sql = "UPDATE jobs SET l_isActive=0 WHERE l_id IN (" . implode(",", $tmpIDs) . ")";
                $tmpIDs = array();
                try {
                    $affectedRows = 0;
                    $lid = 0;
                    $liveDB->executeNoresSQL($sql, $affectedRows, $lid);
                } catch (DBAdapter2Exception $e) {
                    dumpError(" MYSQL Error: " . $e->getMessage() . " *** QUERY: {$sql}");
                    die;
                }
            }
        }
        unset($sph);
        dumpMsg('Finished anti-dup ' . $target['reg']);
    }
}
コード例 #5
0
ファイル: sitemapindex.php プロジェクト: sergrin/crawlers-il
 private function make_search($city, $one_deal_type, $one_translitted_category, $group_by_field)
 {
     $options = array('connectTimeout' => 5, 'arrayResult' => TRUE, 'matchMode' => SPH_MATCH_EXTENDED2, 'index' => 'i_ref_realestate i_delta_realestate', 'page' => 1, 'limit' => 50);
     if ($group_by_field != 'empty') {
         $options['groupBy'] = array($group_by_field, SPH_GROUPBY_ATTR, '@count DESC');
     }
     $sph = new CSphinx(new sph_conf('3313'), '192.168.240.43');
     $sph->resetSphinx();
     $sph->setFilter($sph->VALUE_FILTER, array('l_deal_type', array($one_deal_type), FALSE));
     $sph->setAttribute('lgeocity', $city);
     foreach ($this->m_aCategoriesDetails[$one_translitted_category] as $asset_type) {
         $sph->setAttribute('lassettype', $asset_type);
     }
     $sph->setAttribute('what', 'realestate');
     $sph->setOptions($options);
     $result = $sph->search(NULL, FALSE);
     return $result;
 }