Example #1
0
 /**
  * Returns an instance of class (singleton pattern implementation).
  *
  * @return OASEO_BOL_SitemapPageItemDao
  */
 public static function getInstance()
 {
     if (self::$classInstance === null) {
         self::$classInstance = new self();
     }
     return self::$classInstance;
 }
Example #2
0
 public function findItemsByType($type, $first, $count)
 {
     $result = $this->dbo->queryForColumnList("SELECT `id` FROM `" . $this->getTableName() . "` WHERE `type` = " . $type . " ORDER BY `id` LIMIT ?,?", array($first, $count));
     if (!$result) {
         return array();
     }
     $query = "SELECT `i`.*, `p`.`url` FROM `" . $this->getTableName() . "` AS `i`\n            LEFT JOIN `" . OASEO_BOL_SitemapPageItemDao::getInstance()->getTableName() . "` AS `pi` ON (`i`.`id` = `pi`.`itemId`)\n            LEFT JOIN `" . OASEO_BOL_SitemapPageDao::getInstance()->getTableName() . "` AS `p` ON (`pi`.`pageId` = `p`.`id`)\n            WHERE `pi`.`type` = " . ($type == OASEO_BOL_SitemapItemDao::VALUE_BROKEN_LINK ? OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_PAGE : OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM) . " AND `i`.`id` IN ( " . $this->dbo->mergeInClause($result) . " ) ";
     return $this->dbo->queryForList($query);
 }
Example #3
0
 public function findBrokenPages($first, $count)
 {
     $query = "SELECT `id` from `" . $this->getTableName() . "` WHERE `" . self::BROKEN . "` = 1 LIMIT ?, ?";
     $result = $this->dbo->queryForColumnList($query, array($first, $count));
     if (!$result) {
         return array();
     }
     $query = "SELECT p.url as burl, p2.url FROM `" . $this->getTableName() . "` AS `p` \n            LEFT JOIN `" . OASEO_BOL_SitemapPageItemDao::getInstance()->getTableName() . "` AS `pi` ON (p.id = pi.itemId AND pi.type = 1)\n            LEFT JOIN `" . $this->getTableName() . "` AS `p2` ON (`pi`.`pageId` = p2.id)\n            WHERE  p.id IN (" . $this->dbo->mergeInClause($result) . ")";
     return $this->dbo->queryForList($query);
 }
Example #4
0
 public function startSitemapGenerator()
 {
     if (!OW::getConfig()->getValue('oaseo', 'sitemap_init')) {
         OW::getConfig()->saveConfig('oaseo', 'sitemap_init', 1);
     }
     $prof = UTIL_Profiler::getInstance('oaseo_sitemap');
     if (OW::getConfig()->getValue('oaseo', 'update_info')) {
         OW::getConfig()->saveConfig('oaseo', 'update_info', 0);
         $this->sitemapItemDao->clearTable();
         $this->sitemapPageDao->clearTable();
         $this->sitemapPageItemDao->clearTable();
         $this->addUrlToList(UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME));
     }
     if ($this->getNextUrlToProcess() == null) {
         return;
     }
     while ($prof->getTotalTime() < $this->configs[self::CNF_CRAWL_TIME_LIMIT]) {
         $url = $this->getNextUrlToProcess();
         //
         //            if ( $url == null )
         //            {
         //                // need to complete site generation + generate sitemaps in configs
         //                OW::getConfig()->saveConfig('oaseo', 'update_maps', 1);
         //                break;
         //            }
         $pageDto = $this->sitemapPageDao->findByUrl($url, 0);
         if ($pageDto != null) {
             $pageDto->setStatus(1);
             $pageDto->setProcessTs(time());
             $this->sitemapPageDao->save($pageDto);
             if ($this->isBroken($url)) {
                 $pageDto->setBroken(true);
                 $this->sitemapPageDao->save($pageDto);
                 continue;
             }
         }
         // TODO need to check if response is ok
         $content = file_get_contents($url);
         if (!$content) {
             continue;
             $this->sitemapPageDao->deleteById($pageDto->getId());
         }
         $data = $this->processContent($content);
         $urlHome = UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME);
         //add meta info to the page entry
         $pageDto = $this->sitemapPageDao->findByUrl($url);
         $pageDto->setMeta(json_encode($data['meta']));
         $pageDto->setTitle($data['title']);
         $this->sitemapPageDao->save($pageDto);
         foreach ($data['foundLinks'] as $link) {
             $pageItem = new OASEO_BOL_SitemapPageItem();
             if (mb_strstr($link, $urlHome)) {
                 $addedItem = $this->addUrlToList($link);
                 if ($addedItem === null) {
                     continue;
                 }
                 $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_PAGE);
             } else {
                 if (mb_strstr($link, 'http://') || mb_strstr($link, 'www')) {
                     $addedItem = $this->addExtUrl($link);
                     if ($addedItem === null) {
                         continue;
                     }
                     $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM);
                 } else {
                     continue;
                 }
             }
             $pageItem->setPageId($pageDto->getId());
             $pageItem->setItemId($addedItem->getId());
             $this->sitemapPageItemDao->save($pageItem);
         }
         foreach ($data['foundImages'] as $image) {
             $pageItem = new OASEO_BOL_SitemapPageItem();
             $image = $this->addImage($image);
             $pageItem->setPageId($pageDto->getId());
             $pageItem->setItemId($image->getId());
             $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM);
             $this->sitemapPageItemDao->save($pageItem);
         }
         if ($this->getNextUrlToProcess() == null) {
             // need to complete site generation + generate sitemaps in configs
             OW::getConfig()->saveConfig('oaseo', 'update_maps', 1);
             break;
         }
     }
 }