public function generate() { SiteMapBase::printLogMsg('Starting generation of site maps for section ' . $this->section); $this->initializeSphinx('192.168.240.43'); $this->minId = null; $allItems = array(); while (true) { $items = $this->_getAllItems(); if ($items === false) { break; } $allItems = array_merge($allItems, $items); } $itemsBySubDomain = $this->groupItemsBySubDomain($allItems); unset($allItems); foreach ($itemsBySubDomain as $subDomain => &$items) { $this->subDomain = $subDomain; $this->createFolder(); $this->createRobotsTXT(); $this->urls = $this->_getItemsUrls($items); unset($itemsBySubDomain[$subDomain]); $siteMaps = $this->generateSiteMaps(); if (count($siteMaps) > 1) { $file = $this->createSiteMapIndexXML($siteMaps); } else { $file = $siteMaps[0]; } $this->writeMainSiteMap($file); } SiteMapBase::printLogMsg('Finished generation of site maps for section ' . $this->section . "\n\n\n"); }
public function generate() { SiteMapBase::printLogMsg('Starting generation of site maps for section ' . $this->section); $positions = $this->_getAllPositions(); $itemsBySubDomain = $this->groupItemsBySubDomain($positions); foreach ($itemsBySubDomain as $subDomain => &$items) { $this->subDomain = $subDomain; //$this->writeMainSiteMap('sitemap-jobs-0.xml'); $this->createFolder(); $this->createRobotsTXT(); $this->urls = $this->_getItemsUrls($items); $siteMaps = $this->generateSiteMaps(); if (count($siteMaps) > 1) { $file = $this->createSiteMapIndexXML($siteMaps); } else { $file = $siteMaps[0]; } $this->writeMainSiteMap($file); } SiteMapBase::printLogMsg('Finished generation of site maps for section ' . $this->section . "\n\n\n"); }
define('MAX_URLS_IN_FILE', 50000); define('BASE_DOMAIN', 'pingola.ru'); if (LOCAL === true) { define('LINE_BREAK', '</br>'); define('NETUPPATH', '/var/www/projects/crawlers/netup_imitation'); define('SITE_MAPS_PATH', NETUPPATH . '/sitemaps'); define('NETUP_ROBOTS_TXT', SITE_MAPS_PATH . '/robots.txt'); define('DB_CONF_PATH', NETUPPATH . '/db.conf'); } else { define('LINE_BREAK', "\n"); define('NETUPPATH', '/home/eyezeek/netup/ru'); define('SITE_MAPS_PATH', NETUPPATH . '/sitemaps'); define('NETUP_ROBOTS_TXT', SITE_MAPS_PATH . '/robots.txt'); define('DB_CONF_PATH', '/home/eyezeek/db.conf'); } require_once GPATH . '/crawler/engine/classes/sphinxapi.php'; require_once GPATH . '/crawler/engine/classes/CSphinx.class.php'; require_once GPATH . '/crawler/engine/classes/Config.php'; require_once RUNNING_PATH . '/SiteMapBase.php'; $dbConf = new Config(DB_CONF_PATH, 'ini'); $sections = array('jobs' => 'SiteMapJobs', 'realty' => 'SiteMapRealty'); foreach ($sections as $sectionName => &$value) { require_once RUNNING_PATH . '/' . $value . '.php'; $siteMapGenerator = new $value($dbConf); $siteMapGenerator->generate(); unset($siteMapGenerator); } SiteMapBase::printLogMsg('Finished script'); //var_dump($dbConf->live); //SiteMapBase::printLogMsg(NETUP_ROBOTS_TXT); die;