Exemplo n.º 1
0
 public function startSitemapGenerator()
 {
     if (!OW::getConfig()->getValue('oaseo', 'sitemap_init')) {
         OW::getConfig()->saveConfig('oaseo', 'sitemap_init', 1);
     }
     $prof = UTIL_Profiler::getInstance('oaseo_sitemap');
     if (OW::getConfig()->getValue('oaseo', 'update_info')) {
         OW::getConfig()->saveConfig('oaseo', 'update_info', 0);
         $this->sitemapItemDao->clearTable();
         $this->sitemapPageDao->clearTable();
         $this->sitemapPageItemDao->clearTable();
         $this->addUrlToList(UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME));
     }
     if ($this->getNextUrlToProcess() == null) {
         return;
     }
     while ($prof->getTotalTime() < $this->configs[self::CNF_CRAWL_TIME_LIMIT]) {
         $url = $this->getNextUrlToProcess();
         //
         //            if ( $url == null )
         //            {
         //                // need to complete site generation + generate sitemaps in configs
         //                OW::getConfig()->saveConfig('oaseo', 'update_maps', 1);
         //                break;
         //            }
         $pageDto = $this->sitemapPageDao->findByUrl($url, 0);
         if ($pageDto != null) {
             $pageDto->setStatus(1);
             $pageDto->setProcessTs(time());
             $this->sitemapPageDao->save($pageDto);
             if ($this->isBroken($url)) {
                 $pageDto->setBroken(true);
                 $this->sitemapPageDao->save($pageDto);
                 continue;
             }
         }
         // TODO need to check if response is ok
         $content = file_get_contents($url);
         if (!$content) {
             continue;
             $this->sitemapPageDao->deleteById($pageDto->getId());
         }
         $data = $this->processContent($content);
         $urlHome = UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME);
         //add meta info to the page entry
         $pageDto = $this->sitemapPageDao->findByUrl($url);
         $pageDto->setMeta(json_encode($data['meta']));
         $pageDto->setTitle($data['title']);
         $this->sitemapPageDao->save($pageDto);
         foreach ($data['foundLinks'] as $link) {
             $pageItem = new OASEO_BOL_SitemapPageItem();
             if (mb_strstr($link, $urlHome)) {
                 $addedItem = $this->addUrlToList($link);
                 if ($addedItem === null) {
                     continue;
                 }
                 $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_PAGE);
             } else {
                 if (mb_strstr($link, 'http://') || mb_strstr($link, 'www')) {
                     $addedItem = $this->addExtUrl($link);
                     if ($addedItem === null) {
                         continue;
                     }
                     $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM);
                 } else {
                     continue;
                 }
             }
             $pageItem->setPageId($pageDto->getId());
             $pageItem->setItemId($addedItem->getId());
             $this->sitemapPageItemDao->save($pageItem);
         }
         foreach ($data['foundImages'] as $image) {
             $pageItem = new OASEO_BOL_SitemapPageItem();
             $image = $this->addImage($image);
             $pageItem->setPageId($pageDto->getId());
             $pageItem->setItemId($image->getId());
             $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM);
             $this->sitemapPageItemDao->save($pageItem);
         }
         if ($this->getNextUrlToProcess() == null) {
             // need to complete site generation + generate sitemaps in configs
             OW::getConfig()->saveConfig('oaseo', 'update_maps', 1);
             break;
         }
     }
 }