Exemplo n.º 1
0
 public function generate()
 {
     SiteMapBase::printLogMsg('Starting generation of site maps for section ' . $this->section);
     $this->initializeSphinx('192.168.240.43');
     $this->minId = null;
     $allItems = array();
     while (true) {
         $items = $this->_getAllItems();
         if ($items === false) {
             break;
         }
         $allItems = array_merge($allItems, $items);
     }
     $itemsBySubDomain = $this->groupItemsBySubDomain($allItems);
     unset($allItems);
     foreach ($itemsBySubDomain as $subDomain => &$items) {
         $this->subDomain = $subDomain;
         $this->createFolder();
         $this->createRobotsTXT();
         $this->urls = $this->_getItemsUrls($items);
         unset($itemsBySubDomain[$subDomain]);
         $siteMaps = $this->generateSiteMaps();
         if (count($siteMaps) > 1) {
             $file = $this->createSiteMapIndexXML($siteMaps);
         } else {
             $file = $siteMaps[0];
         }
         $this->writeMainSiteMap($file);
     }
     SiteMapBase::printLogMsg('Finished generation of site maps for section ' . $this->section . "\n\n\n");
 }
Exemplo n.º 2
0
 private function _getPositionById($id)
 {
     if (isset(self::$positions[$id])) {
         return self::$positions[$id];
     } else {
         $sql = 'SELECT translit ' . 'FROM ru_eyezeek.jobs_positions jp ' . 'LEFT JOIN ru_eyezeek.translitted_values tv ON jp.position_name=tv.origin ' . 'WHERE jp.id= :position_id';
         $position = parent::runSelect($sql, array('position_id' => $id));
         if (!$position) {
             return false;
         } else {
             self::$positions[$id] = $position[0]['translit'];
             return $position[0]['translit'];
         }
     }
 }
Exemplo n.º 3
0
define('MAX_URLS_IN_FILE', 50000);
define('BASE_DOMAIN', 'pingola.ru');
if (LOCAL === true) {
    define('LINE_BREAK', '</br>');
    define('NETUPPATH', '/var/www/projects/crawlers/netup_imitation');
    define('SITE_MAPS_PATH', NETUPPATH . '/sitemaps');
    define('NETUP_ROBOTS_TXT', SITE_MAPS_PATH . '/robots.txt');
    define('DB_CONF_PATH', NETUPPATH . '/db.conf');
} else {
    define('LINE_BREAK', "\n");
    define('NETUPPATH', '/home/eyezeek/netup/ru');
    define('SITE_MAPS_PATH', NETUPPATH . '/sitemaps');
    define('NETUP_ROBOTS_TXT', SITE_MAPS_PATH . '/robots.txt');
    define('DB_CONF_PATH', '/home/eyezeek/db.conf');
}
require_once GPATH . '/crawler/engine/classes/sphinxapi.php';
require_once GPATH . '/crawler/engine/classes/CSphinx.class.php';
require_once GPATH . '/crawler/engine/classes/Config.php';
require_once RUNNING_PATH . '/SiteMapBase.php';
$dbConf = new Config(DB_CONF_PATH, 'ini');
$sections = array('jobs' => 'SiteMapJobs', 'realty' => 'SiteMapRealty');
foreach ($sections as $sectionName => &$value) {
    require_once RUNNING_PATH . '/' . $value . '.php';
    $siteMapGenerator = new $value($dbConf);
    $siteMapGenerator->generate();
    unset($siteMapGenerator);
}
SiteMapBase::printLogMsg('Finished script');
//var_dump($dbConf->live);
//SiteMapBase::printLogMsg(NETUP_ROBOTS_TXT);
die;