/** * Returns an instance of class (singleton pattern implementation). * * @return OASEO_BOL_SitemapPageItemDao */ public static function getInstance() { if (self::$classInstance === null) { self::$classInstance = new self(); } return self::$classInstance; }
public function findItemsByType($type, $first, $count) { $result = $this->dbo->queryForColumnList("SELECT `id` FROM `" . $this->getTableName() . "` WHERE `type` = " . $type . " ORDER BY `id` LIMIT ?,?", array($first, $count)); if (!$result) { return array(); } $query = "SELECT `i`.*, `p`.`url` FROM `" . $this->getTableName() . "` AS `i`\n LEFT JOIN `" . OASEO_BOL_SitemapPageItemDao::getInstance()->getTableName() . "` AS `pi` ON (`i`.`id` = `pi`.`itemId`)\n LEFT JOIN `" . OASEO_BOL_SitemapPageDao::getInstance()->getTableName() . "` AS `p` ON (`pi`.`pageId` = `p`.`id`)\n WHERE `pi`.`type` = " . ($type == OASEO_BOL_SitemapItemDao::VALUE_BROKEN_LINK ? OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_PAGE : OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM) . " AND `i`.`id` IN ( " . $this->dbo->mergeInClause($result) . " ) "; return $this->dbo->queryForList($query); }
public function findBrokenPages($first, $count) { $query = "SELECT `id` from `" . $this->getTableName() . "` WHERE `" . self::BROKEN . "` = 1 LIMIT ?, ?"; $result = $this->dbo->queryForColumnList($query, array($first, $count)); if (!$result) { return array(); } $query = "SELECT p.url as burl, p2.url FROM `" . $this->getTableName() . "` AS `p` \n LEFT JOIN `" . OASEO_BOL_SitemapPageItemDao::getInstance()->getTableName() . "` AS `pi` ON (p.id = pi.itemId AND pi.type = 1)\n LEFT JOIN `" . $this->getTableName() . "` AS `p2` ON (`pi`.`pageId` = p2.id)\n WHERE p.id IN (" . $this->dbo->mergeInClause($result) . ")"; return $this->dbo->queryForList($query); }
public function startSitemapGenerator() { if (!OW::getConfig()->getValue('oaseo', 'sitemap_init')) { OW::getConfig()->saveConfig('oaseo', 'sitemap_init', 1); } $prof = UTIL_Profiler::getInstance('oaseo_sitemap'); if (OW::getConfig()->getValue('oaseo', 'update_info')) { OW::getConfig()->saveConfig('oaseo', 'update_info', 0); $this->sitemapItemDao->clearTable(); $this->sitemapPageDao->clearTable(); $this->sitemapPageItemDao->clearTable(); $this->addUrlToList(UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME)); } if ($this->getNextUrlToProcess() == null) { return; } while ($prof->getTotalTime() < $this->configs[self::CNF_CRAWL_TIME_LIMIT]) { $url = $this->getNextUrlToProcess(); // // if ( $url == null ) // { // // need to complete site generation + generate sitemaps in configs // OW::getConfig()->saveConfig('oaseo', 'update_maps', 1); // break; // } $pageDto = $this->sitemapPageDao->findByUrl($url, 0); if ($pageDto != null) { $pageDto->setStatus(1); $pageDto->setProcessTs(time()); $this->sitemapPageDao->save($pageDto); if ($this->isBroken($url)) { $pageDto->setBroken(true); $this->sitemapPageDao->save($pageDto); continue; } } // TODO need to check if response is ok $content = file_get_contents($url); if (!$content) { continue; $this->sitemapPageDao->deleteById($pageDto->getId()); } $data = $this->processContent($content); $urlHome = UTIL_String::removeFirstAndLastSlashes(OW_URL_HOME); //add meta info to the page entry $pageDto = $this->sitemapPageDao->findByUrl($url); $pageDto->setMeta(json_encode($data['meta'])); $pageDto->setTitle($data['title']); $this->sitemapPageDao->save($pageDto); foreach ($data['foundLinks'] as $link) { $pageItem = new OASEO_BOL_SitemapPageItem(); if (mb_strstr($link, $urlHome)) { $addedItem = $this->addUrlToList($link); if ($addedItem === null) { continue; } $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_PAGE); } else { if (mb_strstr($link, 'http://') || mb_strstr($link, 'www')) { $addedItem = $this->addExtUrl($link); if ($addedItem === null) { continue; } $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM); } else { continue; } } $pageItem->setPageId($pageDto->getId()); $pageItem->setItemId($addedItem->getId()); $this->sitemapPageItemDao->save($pageItem); } foreach ($data['foundImages'] as $image) { $pageItem = new OASEO_BOL_SitemapPageItem(); $image = $this->addImage($image); $pageItem->setPageId($pageDto->getId()); $pageItem->setItemId($image->getId()); $pageItem->setType(OASEO_BOL_SitemapPageItemDao::TYPE_VALUE_ITEM); $this->sitemapPageItemDao->save($pageItem); } if ($this->getNextUrlToProcess() == null) { // need to complete site generation + generate sitemaps in configs OW::getConfig()->saveConfig('oaseo', 'update_maps', 1); break; } } }