public function startSitemapGenerator() { if (!OW::getConfig()->getValue('oaseo', 'sitemap_init')) { OW::getConfig()->saveConfig('oaseo', 'sitemap_init', 1); } $prof = UTIL_Profiler::getInstance('oaseo_sitemap'); if (OW::getConfig()->getValue('oaseo', 'update_info')) { OW::getConfig()->saveConfig('oaseo', 'update_info', 0); $this->sitemapItemDao->clearTable(); $this->sitemapPageDao->clearTable(); $this->sitemapPageItemDao->clearTable(); $this->addUrlToList(UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME)); } if ($this->getNextUrlToProcess() == null) { return; } while ($prof->getTotalTime() < $this->configs[self::CNF_CRAWL_TIME_LIMIT]) { $url = $this->getNextUrlToProcess(); // // if ( $url == null ) // { // // need to complete site generation + generate sitemaps in configs // OW::getConfig()->saveConfig('oaseo', 'update_maps', 1); // break; // } $pageDto = $this->sitemapPageDao->findByUrl($url, 0); if ($pageDto != null) { $pageDto->setStatus(1); $pageDto->setProcessTs(time()); $this->sitemapPageDao->save($pageDto); if ($this->isBroken($url)) { $pageDto->setBroken(true); $this->sitemapPageDao->save($pageDto); continue; } } // TODO need to check if response is ok $content = file_get_contents($url); if (!$content) { continue; $this->sitemapPageDao->deleteById($pageDto->getId()); } $data = $this->processContent($content); $urlHome = UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME); //add meta info to the page entry $pageDto = $this->sitemapPageDao->findByUrl($url); $pageDto->setMeta(json_encode($data['meta'])); $pageDto->setTitle($data['title']); $this->sitemapPageDao->save($pageDto); foreach ($data['foundLinks'] as $link) { $pageItem = new OASEO_BOL_SitemapPageItem(); if (mb_strstr($link, $urlHome)) { $addedItem = $this->addUrlToList($link); if ($addedItem === null) { continue; } $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_PAGE); } else { if (mb_strstr($link, 'http://') || mb_strstr($link, 'www')) { $addedItem = $this->addExtUrl($link); if ($addedItem === null) { continue; } $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM); } else { continue; } } $pageItem->setPageId($pageDto->getId()); $pageItem->setItemId($addedItem->getId()); $this->sitemapPageItemDao->save($pageItem); } foreach ($data['foundImages'] as $image) { $pageItem = new OASEO_BOL_SitemapPageItem(); $image = $this->addImage($image); $pageItem->setPageId($pageDto->getId()); $pageItem->setItemId($image->getId()); $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM); $this->sitemapPageItemDao->save($pageItem); } if ($this->getNextUrlToProcess() == null) { // need to complete site generation + generate sitemaps in configs OW::getConfig()->saveConfig('oaseo', 'update_maps', 1); break; } } }