/** * Builds a Google Sitemap of all public pages known to the indexer * * The map is placed in the cache directory named sitemap.xml.gz - This * file needs to be writable! * * @author Michael Hamann * @author Andreas Gohr * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html * @link http://www.sitemaps.org/ */ public function generate() { global $conf; if ($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) { return false; } $sitemap = Sitemapper::getFilePath(); if (@file_exists($sitemap)) { if (!is_writable($sitemap)) { return false; } } else { if (!is_writable(dirname($sitemap))) { return false; } } if (@filesize($sitemap) && @filemtime($sitemap) > time() - $conf['sitemap'] * 86400) { // 60*60*24=86400 dbglog('Sitemapper::generate(): Sitemap up to date'); // FIXME: only in debug mode return false; } dbglog("Sitemapper::generate(): using {$sitemap}"); // FIXME: Only in debug mode $pages = idx_getIndex('page', ''); dbglog('Sitemapper::generate(): creating sitemap using ' . count($pages) . ' pages'); $items = array(); // build the sitemap items foreach ($pages as $id) { //skip hidden, non existing and restricted files if (isHiddenPage($id)) { continue; } if (auth_aclcheck($id, '', '') < AUTH_READ) { continue; } $item = SitemapItem::createFromID($id); if ($item !== NULL) { $items[] = $item; } } $eventData = array('items' => &$items, 'sitemap' => &$sitemap); $event = new Doku_Event('SITEMAP_GENERATE', $eventData); if ($event->advise_before(true)) { //save the new sitemap $result = io_saveFile($sitemap, Sitemapper::getXML($items)); } $event->advise_after(); return $result; }
/** * Get count of occurrences for a list of tags * * @param array $tags array of tags * @param array $namespaces array of namespaces where to count the tags * @param boolean $allTags boolean if all available tags should be counted * @param boolean $recursive boolean if pages in subnamespaces are allowed * @return array */ function tagOccurrences($tags, $namespaces = NULL, $allTags = false, $recursive = NULL) { // map with trim here in order to remove newlines from tags if ($allTags) { $tags = array_map('trim', idx_getIndex('subject', '_w')); } $tags = $this->_cleanTagList($tags); $otags = array(); //occurrences if (!$namespaces || $namespaces[0] == '' || !is_array($namespaces)) { $namespaces = NULL; } // $namespaces not specified $indexer = idx_get_indexer(); $indexer_pages = $indexer->lookupKey('subject', $tags, array($this, '_tagCompare')); $root_allowed = $namespaces == NULL ? false : in_array('.', $namespaces); if ($recursive === NULL) { $recursive = $this->getConf('list_tags_of_subns'); } foreach ($tags as $tag) { if (!isset($indexer_pages[$tag])) { continue; } // just to be sure remove duplicate pages from the list of pages $pages = array_unique($indexer_pages[$tag]); // don't count hidden pages or pages the user can't access // for performance reasons this doesn't take drafts into account $pages = array_filter($pages, array($this, '_isVisible')); if (empty($pages)) { continue; } if ($namespaces == NULL || $root_allowed && $recursive) { // count all pages $otags[$tag] = count($pages); } else { if (!$recursive) { // filter by exact namespace $otags[$tag] = 0; foreach ($pages as $page) { $ns = getNS($page); if ($ns == false && $root_allowed || in_array($ns, $namespaces)) { $otags[$tag]++; } } } else { // recursive, no root $otags[$tag] = 0; foreach ($pages as $page) { foreach ($namespaces as $ns) { if (strpos($page, $ns . ':') === 0) { $otags[$tag]++; break; } } } } } // don't return tags without pages if ($otags[$tag] == 0) { unset($otags[$tag]); } } return $otags; }
/** * Lookup words in index * * Takes an array of word and will return a list of matching * documents for each one. * * Important: No ACL checking is done here! All results are * returned, regardless of permissions * * @author Andreas Gohr <*****@*****.**> */ function idx_lookup($words) { global $conf; $result = array(); $wids = idx_getIndexWordsSorted($words, $result); if (empty($wids)) { return array(); } // load known words and documents $page_idx = idx_getIndex('page', ''); $docs = array(); // hold docs found foreach (array_keys($wids) as $wlen) { $wids[$wlen] = array_unique($wids[$wlen]); $index = idx_getIndex('i', $wlen); foreach ($wids[$wlen] as $ixid) { if ($ixid < count($index)) { $docs["{$wlen}*{$ixid}"] = idx_parseIndexLine($page_idx, $index[$ixid]); } } } // merge found pages into final result array $final = array(); foreach ($result as $word => $res) { $final[$word] = array(); foreach ($res as $wid) { $hits =& $docs[$wid]; foreach ($hits as $hitkey => $hitcnt) { if (!isset($final[$word][$hitkey])) { $final[$word][$hitkey] = $hitcnt; } else { $final[$word][$hitkey] += $hitcnt; } } } } return $final; }
/** * Returns the sorted word cloud array */ function _getWordCloud($num, &$min, &$max) { global $conf; // load stopwords $swfile = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/stopwords.txt'; if (@file_exists($swfile)) { $stopwords = file($swfile, FILE_IGNORE_NEW_LINES); } else { $stopwords = array(); } // load extra local stopwords $swfile = DOKU_CONF . 'stopwords.txt'; if (@file_exists($swfile)) { $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES)); } $cloud = array(); if (@file_exists($conf['indexdir'] . '/page.idx')) { // new word-length based index require_once DOKU_INC . 'inc/indexer.php'; $n = $this->getConf('minimum_word_length'); // minimum word length $lengths = idx_indexLengths($n); foreach ($lengths as $len) { $idx = idx_getIndex('i', $len); $word_idx = idx_getIndex('w', $len); $this->_addWordsToCloud($cloud, $idx, $word_idx, $stopwords); } } else { // old index $idx = file($conf['cachedir'] . '/index.idx'); $word_idx = file($conf['cachedir'] . '/word.idx'); $this->_addWordsToCloud($cloud, $idx, $word_idx, $stopwords); } return $this->_sortCloud($cloud, $num, $min, $max); }
/** * Returns a list of matching documents for the given query * * @author Andreas Gohr <*****@*****.**> * @author Kazutaka Miyasaka <*****@*****.**> */ function _ft_pageSearch(&$data) { // parse the given query $q = ft_queryParser($data['query']); $data['highlight'] = $q['highlight']; if (empty($q['parsed_ary'])) { return array(); } // lookup all words found in the query $lookup = idx_lookup($q['words']); // get all pages in this dokuwiki site (!: includes nonexistent pages) $pages_all = array(); foreach (idx_getIndex('page', '') as $id) { $pages_all[trim($id)] = 0; // base: 0 hit } // process the query $stack = array(); foreach ($q['parsed_ary'] as $token) { switch (substr($token, 0, 3)) { case 'W+:': case 'W-:': case 'W_:': // word $word = substr($token, 3); $stack[] = (array) $lookup[$word]; break; case 'P+:': case 'P-:': // phrase $phrase = substr($token, 3); // since phrases are always parsed as ((W1)(W2)...(P)), // the end($stack) always points the pages that contain // all words in this phrase $pages = end($stack); $pages_matched = array(); foreach (array_keys($pages) as $id) { $text = utf8_strtolower(rawWiki($id)); if (strpos($text, $phrase) !== false) { $pages_matched[$id] = 0; // phrase: always 0 hit } } $stack[] = $pages_matched; break; case 'N+:': case 'N-:': // namespace $ns = substr($token, 3); $pages_matched = array(); foreach (array_keys($pages_all) as $id) { if (strpos($id, $ns) === 0) { $pages_matched[$id] = 0; // namespace: always 0 hit } } $stack[] = $pages_matched; break; case 'AND': // and operation list($pages1, $pages2) = array_splice($stack, -2); $stack[] = ft_resultCombine(array($pages1, $pages2)); break; case 'OR': // or operation list($pages1, $pages2) = array_splice($stack, -2); $stack[] = ft_resultUnite(array($pages1, $pages2)); break; case 'NOT': // not operation (unary) $pages = array_pop($stack); $stack[] = ft_resultComplement(array($pages_all, $pages)); break; } } $docs = array_pop($stack); if (empty($docs)) { return array(); } // check: settings, acls, existence foreach (array_keys($docs) as $id) { if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ || !page_exists($id, '', false)) { unset($docs[$id]); } } // sort docs by count arsort($docs); return $docs; }
/** * List all pages - we use the indexer list here */ function listPages() { $list = array(); $pages = array_filter(array_filter(idx_getIndex('page', ''), 'isVisiblePage'), 'page_exists'); foreach (array_keys($pages) as $idx) { $perm = auth_quickaclcheck($pages[$idx]); if ($perm < AUTH_READ) { continue; } $page = array(); $page['id'] = trim($pages[$idx]); $page['perms'] = $perm; $page['size'] = @filesize(wikiFN($pages[$idx])); $page['lastModified'] = new IXR_Date(@filemtime(wikiFN($pages[$idx]))); $list[] = $page; } return $list; }
/** * Gets the first heading from a file * * @param string $id dokuwiki page id * @param bool $render rerender if first heading not known * default: true -- must be set to false for calls from the metadata renderer to * protects against loops and excessive resource usage when pages * for which only a first heading is required will attempt to * render metadata for all the pages for which they require first * headings ... and so on. * * @author Andreas Gohr <*****@*****.**> * @author Michael Hamann <*****@*****.**> */ function p_get_first_heading($id, $render = true) { // counter how many titles have been requested using p_get_metadata static $count = 1; // the index of all titles, only loaded when many titles are requested static $title_index = null; // cache for titles requested using p_get_metadata static $title_cache = array(); $id = cleanID($id); // check if this title has already been requested if (isset($title_cache[$id])) { return $title_cache[$id]; } // check if already too many titles have been requested and probably // using the title index is better if ($count > P_GET_FIRST_HEADING_METADATA_LIMIT) { if (is_null($title_index)) { $pages = array_map('rtrim', idx_getIndex('page', '')); $titles = array_map('rtrim', idx_getIndex('title', '')); // check for corrupt title index #FS2076 if (count($pages) != count($titles)) { $titles = array_fill(0, count($pages), ''); @unlink($conf['indexdir'] . '/title.idx'); // will be rebuilt in inc/init.php } $title_index = array_combine($pages, $titles); } return $title_index[$id]; } ++$count; $title_cache[$id] = p_get_metadata($id, 'title', $render); return $title_cache[$id]; }
/** * Create a pagewords index from the existing index. * * @author Tom N Harris <*****@*****.**> */ function idx_upgradePageWords() { global $conf; $page_idx = idx_getIndex('page', ''); if (empty($page_idx)) { return; } $pagewords = array(); $len = count($page_idx); for ($n = 0; $n < $len; $n++) { $pagewords[] = array(); } unset($page_idx); $n = 0; foreach (idx_indexLengths($n) as $wlen) { $lines = idx_getIndex('i', $wlen); $len = count($lines); for ($wid = 0; $wid < $len; $wid++) { $wkey = "{$wlen}*{$wid}"; foreach (explode(':', trim($lines[$wid])) as $part) { if ($part == '') { continue; } list($doc, $cnt) = explode('*', $part); $pagewords[(int) $doc][] = $wkey; } } } $fn = $conf['indexdir'] . '/pageword'; $fh = @fopen($fn . '.tmp', 'w'); if (!$fh) { trigger_error("Failed to write word index", E_USER_ERROR); return false; } foreach ($pagewords as $line) { fwrite($fh, join(':', $line) . "\n"); } fclose($fh); if ($conf['fperm']) { chmod($fn . '.tmp', $conf['fperm']); } io_rename($fn . '.tmp', $fn . '.idx'); return true; }
function _ft_pageLookup(&$data) { // split out original parameters $id = $data['id']; if (preg_match('/(?:^| )@(\\w+)/', $id, $matches)) { $ns = cleanID($matches[1]) . ':'; $id = str_replace($matches[0], '', $id); } $in_ns = $data['in_ns']; $in_title = $data['in_title']; $pages = array_map('rtrim', idx_getIndex('page', '')); $titles = array_map('rtrim', idx_getIndex('title', '')); $pages = array_combine($pages, $titles); if ($id !== '' && cleanID($id) !== '') { $cleaned = cleanID($id); $matched_pages = array(); foreach ($pages as $p_id => $p_title) { if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false || $in_title && stripos($p_title, $id) !== false) && (!isset($ns) || strpos($p_id, $ns) === 0)) { $matched_pages[$p_id] = $p_title; } } $pages = $matched_pages; } // discard hidden pages // discard nonexistent pages // check ACL permissions foreach (array_keys($pages) as $idx) { if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) { unset($pages[$idx]); } } uasort($pages, 'ft_pagesorter'); return $pages; }
/** * Run a few sanity checks * * @author Andreas Gohr <*****@*****.**> */ function check() { global $conf; global $INFO; if ($INFO['isadmin'] || $INFO['ismanager']) { msg('DokuWiki version: ' . getVersion(), 1); } if (version_compare(phpversion(), '5.2.0', '<')) { msg('Your PHP version is too old (' . phpversion() . ' vs. 5.2.0+ needed)', -1); } else { msg('PHP version ' . phpversion(), 1); } $mem = (int) php_to_byte(ini_get('memory_limit')); if ($mem) { if ($mem < 16777216) { msg('PHP is limited to less than 16MB RAM (' . $mem . ' bytes). Increase memory_limit in php.ini', -1); } elseif ($mem < 20971520) { msg('PHP is limited to less than 20MB RAM (' . $mem . ' bytes), you might encounter problems with bigger pages. Increase memory_limit in php.ini', -1); } elseif ($mem < 33554432) { msg('PHP is limited to less than 32MB RAM (' . $mem . ' bytes), but that should be enough in most cases. If not, increase memory_limit in php.ini', 0); } else { msg('More than 32MB RAM (' . $mem . ' bytes) available.', 1); } } if (is_writable($conf['changelog'])) { msg('Changelog is writable', 1); } else { if (@file_exists($conf['changelog'])) { msg('Changelog is not writable', -1); } } if (isset($conf['changelog_old']) && @file_exists($conf['changelog_old'])) { msg('Old changelog exists', 0); } if (@file_exists($conf['changelog'] . '_failed')) { msg('Importing old changelog failed', -1); } else { if (@file_exists($conf['changelog'] . '_importing')) { msg('Importing old changelog now.', 0); } else { if (@file_exists($conf['changelog'] . '_import_ok')) { msg('Old changelog imported', 1); if (!plugin_isdisabled('importoldchangelog')) { msg('Importoldchangelog plugin not disabled after import', -1); } } } } if (is_writable(DOKU_CONF)) { msg('conf directory is writable', 1); } else { msg('conf directory is not writable', -1); } if ($conf['authtype'] == 'plain') { global $config_cascade; if (is_writable($config_cascade['plainauth.users']['default'])) { msg('conf/users.auth.php is writable', 1); } else { msg('conf/users.auth.php is not writable', 0); } } if (function_exists('mb_strpos')) { if (defined('UTF8_NOMBSTRING')) { msg('mb_string extension is available but will not be used', 0); } else { msg('mb_string extension is available and will be used', 1); if (ini_get('mbstring.func_overload') != 0) { msg('mb_string function overloading is enabled, this will cause problems and should be disabled', -1); } } } else { msg('mb_string extension not available - PHP only replacements will be used', 0); } if (!UTF8_PREGSUPPORT) { msg('PHP is missing UTF-8 support in Perl-Compatible Regular Expressions (PCRE)', -1); } if (!UTF8_PROPERTYSUPPORT) { msg('PHP is missing Unicode properties support in Perl-Compatible Regular Expressions (PCRE)', -1); } $loc = setlocale(LC_ALL, 0); if (!$loc) { msg('No valid locale is set for your PHP setup. You should fix this', -1); } elseif (stripos($loc, 'utf') === false) { msg('Your locale <code>' . hsc($loc) . '</code> seems not to be a UTF-8 locale, you should fix this if you encounter problems.', 0); } else { msg('Valid locale ' . hsc($loc) . ' found.', 1); } if ($conf['allowdebug']) { msg('Debugging support is enabled. If you don\'t need it you should set $conf[\'allowdebug\'] = 0', -1); } else { msg('Debugging support is disabled', 1); } if ($INFO['userinfo']['name']) { msg('You are currently logged in as ' . $_SERVER['REMOTE_USER'] . ' (' . $INFO['userinfo']['name'] . ')', 0); msg('You are part of the groups ' . join($INFO['userinfo']['grps'], ', '), 0); } else { msg('You are currently not logged in', 0); } msg('Your current permission for this page is ' . $INFO['perm'], 0); if (is_writable($INFO['filepath'])) { msg('The current page is writable by the webserver', 0); } else { msg('The current page is not writable by the webserver', 0); } if ($INFO['writable']) { msg('The current page is writable by you', 0); } else { msg('The current page is not writable by you', 0); } // Check for corrupted search index $lengths = idx_listIndexLengths(); $index_corrupted = false; foreach ($lengths as $length) { if (count(idx_getIndex('w', $length)) != count(idx_getIndex('i', $length))) { $index_corrupted = true; break; } } foreach (idx_getIndex('metadata', '') as $index) { if (count(idx_getIndex($index . '_w', '')) != count(idx_getIndex($index . '_i', ''))) { $index_corrupted = true; break; } } if ($index_corrupted) { msg('The search index is corrupted. It might produce wrong results and most probably needs to be rebuilt. See <a href="http://www.dokuwiki.org/faq:searchindex">faq:searchindex</a> for ways to rebuild the search index.', -1); } elseif (!empty($lengths)) { msg('The search index seems to be working', 1); } else { msg('The search index is empty. See <a href="http://www.dokuwiki.org/faq:searchindex">faq:searchindex</a> for help on how to fix the search index. If the default indexer isn\'t used or the wiki is actually empty this is normal.'); } }
/** * Run a few sanity checks * * @author Andreas Gohr <*****@*****.**> */ function check() { global $conf; global $INFO; if ($INFO['isadmin'] || $INFO['ismanager']) { msg('DokuWiki version: ' . getVersion(), 1); } if (version_compare(phpversion(), '5.1.2', '<')) { msg('Your PHP version is too old (' . phpversion() . ' vs. 5.1.2+ needed)', -1); } else { msg('PHP version ' . phpversion(), 1); } $mem = (int) php_to_byte(ini_get('memory_limit')); if ($mem) { if ($mem < 16777216) { msg('PHP is limited to less than 16MB RAM (' . $mem . ' bytes). Increase memory_limit in php.ini', -1); } elseif ($mem < 20971520) { msg('PHP is limited to less than 20MB RAM (' . $mem . ' bytes), you might encounter problems with bigger pages. Increase memory_limit in php.ini', -1); } elseif ($mem < 33554432) { msg('PHP is limited to less than 32MB RAM (' . $mem . ' bytes), but that should be enough in most cases. If not, increase memory_limit in php.ini', 0); } else { msg('More than 32MB RAM (' . $mem . ' bytes) available.', 1); } } if (is_writable($conf['changelog'])) { msg('Changelog is writable', 1); } else { if (@file_exists($conf['changelog'])) { msg('Changelog is not writable', -1); } } if (isset($conf['changelog_old']) && @file_exists($conf['changelog_old'])) { msg('Old changelog exists', 0); } if (@file_exists($conf['changelog'] . '_failed')) { msg('Importing old changelog failed', -1); } else { if (@file_exists($conf['changelog'] . '_importing')) { msg('Importing old changelog now.', 0); } else { if (@file_exists($conf['changelog'] . '_import_ok')) { msg('Old changelog imported', 1); if (!plugin_isdisabled('importoldchangelog')) { msg('Importoldchangelog plugin not disabled after import', -1); } } } } if (is_writable($conf['datadir'])) { msg('Datadir is writable', 1); } else { msg('Datadir is not writable', -1); } if (is_writable($conf['olddir'])) { msg('Attic is writable', 1); } else { msg('Attic is not writable', -1); } if (is_writable($conf['mediadir'])) { msg('Mediadir is writable', 1); } else { msg('Mediadir is not writable', -1); } if (is_writable($conf['cachedir'])) { msg('Cachedir is writable', 1); } else { msg('Cachedir is not writable', -1); } if (is_writable($conf['lockdir'])) { msg('Lockdir is writable', 1); } else { msg('Lockdir is not writable', -1); } if (is_writable(DOKU_CONF)) { msg('conf directory is writable', 1); } else { msg('conf directory is not writable', -1); } if ($conf['authtype'] == 'plain') { global $config_cascade; if (is_writable($config_cascade['plainauth.users']['default'])) { msg('conf/users.auth.php is writable', 1); } else { msg('conf/users.auth.php is not writable', 0); } } if (function_exists('mb_strpos')) { if (defined('UTF8_NOMBSTRING')) { msg('mb_string extension is available but will not be used', 0); } else { msg('mb_string extension is available and will be used', 1); if (ini_get('mbstring.func_overload') != 0) { msg('mb_string function overloading is enabled, this will cause problems and should be disabled', -1); } } } else { msg('mb_string extension not available - PHP only replacements will be used', 0); } if ($conf['allowdebug']) { msg('Debugging support is enabled. If you don\'t need it you should set $conf[\'allowdebug\'] = 0', -1); } else { msg('Debugging support is disabled', 1); } if ($INFO['userinfo']['name']) { msg('You are currently logged in as ' . $_SERVER['REMOTE_USER'] . ' (' . $INFO['userinfo']['name'] . ')', 0); msg('You are part of the groups ' . join($INFO['userinfo']['grps'], ', '), 0); } else { msg('You are currently not logged in', 0); } msg('Your current permission for this page is ' . $INFO['perm'], 0); if (is_writable($INFO['filepath'])) { msg('The current page is writable by the webserver', 0); } else { msg('The current page is not writable by the webserver', 0); } if ($INFO['writable']) { msg('The current page is writable by you', 0); } else { msg('The current page is not writable by you', 0); } $check = wl('', '', true) . 'data/_dummy'; $http = new DokuHTTPClient(); $http->timeout = 6; $res = $http->get($check); if (strpos($res, 'data directory') !== false) { msg('It seems like the data directory is accessible from the web. Make sure this directory is properly protected (See <a href="http://www.dokuwiki.org/security">security</a>)', -1); } elseif ($http->status == 404 || $http->status == 403) { msg('The data directory seems to be properly protected', 1); } else { msg('Failed to check if the data directory is accessible from the web. Make sure this directory is properly protected (See <a href="http://www.dokuwiki.org/security">security</a>)', -1); } // Check for corrupted search index $lengths = idx_listIndexLengths(); $index_corrupted = false; foreach ($lengths as $length) { if (count(idx_getIndex('w', $length)) != count(idx_getIndex('i', $length))) { $index_corrupted = true; break; } } foreach (idx_getIndex('metadata', '') as $index) { if (count(idx_getIndex($index . '_w', '')) != count(idx_getIndex($index . '_i', ''))) { $index_corrupted = true; break; } } if ($index_corrupted) { msg('The search index is corrupted. It might produce wrong results and most probably needs to be rebuilt. See <a href="http://www.dokuwiki.org/faq:searchindex">faq:searchindex</a> for ways to rebuild the search index.', -1); } elseif (!empty($lengths)) { msg('The search index seems to be working', 1); } else { msg('The search index is empty. See <a href="http://www.dokuwiki.org/faq:searchindex">faq:searchindex</a> for help on how to fix the search index. If the default indexer isn\'t used or the wiki is actually empty this is normal.'); } }
/** * */ function _loadPageIndex() { global $conf; if (@file_exists($conf['indexdir'] . '/page.idx')) { require_once DOKU_INC . 'inc/indexer.php'; $this->pageIndex = idx_getIndex('page', ''); if (count($this->pageIndex) == 0) { throw new Exception('err_emptyidx'); } } else { throw new Exception('err_idxaccess'); } }
/** * */ private function loadPages() { global $conf; if (file_exists($conf['indexdir'] . '/page.idx')) { require_once DOKU_INC . 'inc/indexer.php'; $pageIndex = idx_getIndex('page', ''); $namespace = refnotes_configuration::getSetting('reference-db-namespace'); $namespacePattern = '/^' . trim($namespace, ':') . ':/'; $cache = new refnotes_reference_database_cache(); foreach ($pageIndex as $pageId) { $pageId = trim($pageId); if (preg_match($namespacePattern, $pageId) == 1 && file_exists(wikiFN($pageId))) { $this->enabled = false; $this->page[$pageId] = new refnotes_reference_database_page($this, $cache, $pageId); $this->enabled = true; } } $cache->save(); } }
function _ft_pageLookup(&$data) { global $conf; // split out original parameters $id = $data['id']; if (preg_match('/(?:^| )@(\\w+)/', $id, $matches)) { $ns = cleanID($matches[1]) . ':'; $id = str_replace($matches[0], '', $id); } $in_ns = $data['in_ns']; $in_title = $data['in_title']; $pages = array_map('rtrim', idx_getIndex('page', '')); $titles = array_map('rtrim', idx_getIndex('title', '')); // check for corrupt title index #FS2076 if (count($pages) != count($titles)) { $titles = array_fill(0, count($pages), ''); @unlink($conf['indexdir'] . '/title.idx'); // will be rebuilt in inc/init.php } $pages = array_combine($pages, $titles); $cleaned = cleanID($id); if ($id !== '' && $cleaned !== '') { foreach ($pages as $p_id => $p_title) { if (strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) === false && (!$in_title || stripos($p_title, $id) === false)) { unset($pages[$p_id]); } } } if (isset($ns)) { foreach (array_keys($pages) as $p_id) { if (strpos($p_id, $ns) !== 0) { unset($pages[$p_id]); } } } // discard hidden pages // discard nonexistent pages // check ACL permissions foreach (array_keys($pages) as $idx) { if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) { unset($pages[$idx]); } } uksort($pages, 'ft_pagesorter'); return $pages; }
/** * Builds a Google Sitemap of all public pages known to the indexer * * The map is placed in the root directory named sitemap.xml.gz - This * file needs to be writable! * * @author Andreas Gohr * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html */ function runSitemapper() { global $conf; print "runSitemapper(): started" . NL; if (!$conf['sitemap']) { return false; } if ($conf['compression'] == 'bz2' || $conf['compression'] == 'gz') { $sitemap = 'sitemap.xml.gz'; } else { $sitemap = 'sitemap.xml'; } print "runSitemapper(): using {$sitemap}" . NL; if (@file_exists(DOKU_INC . $sitemap)) { if (!is_writable(DOKU_INC . $sitemap)) { return false; } } else { if (!is_writable(DOKU_INC)) { return false; } } if (@filesize(DOKU_INC . $sitemap) && @filemtime(DOKU_INC . $sitemap) > time() - $conf['sitemap'] * 60 * 60 * 24) { print 'runSitemapper(): Sitemap up to date' . NL; return false; } $pages = idx_getIndex('page', ''); print 'runSitemapper(): creating sitemap using ' . count($pages) . ' pages' . NL; // build the sitemap ob_start(); print '<?xml version="1.0" encoding="UTF-8"?>' . NL; print '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . NL; foreach ($pages as $id) { $id = trim($id); $file = wikiFN($id); //skip hidden, non existing and restricted files if (isHiddenPage($id)) { continue; } $date = @filemtime($file); if (!$date) { continue; } if (auth_aclcheck($id, '', '') < AUTH_READ) { continue; } print ' <url>' . NL; print ' <loc>' . wl($id, '', true) . '</loc>' . NL; print ' <lastmod>' . date_iso8601($date) . '</lastmod>' . NL; print ' </url>' . NL; } print '</urlset>' . NL; $data = ob_get_contents(); ob_end_clean(); //save the new sitemap io_saveFile(DOKU_INC . $sitemap, $data); //ping search engines... $http = new DokuHTTPClient(); $http->timeout = 8; //ping google print 'runSitemapper(): pinging google' . NL; $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; $url .= urlencode(DOKU_URL . $sitemap); $resp = $http->get($url); if ($http->error) { print 'runSitemapper(): ' . $http->error . NL; } print 'runSitemapper(): ' . preg_replace('/[\\n\\r]/', ' ', strip_tags($resp)) . NL; //ping yahoo print 'runSitemapper(): pinging yahoo' . NL; $url = 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='; $url .= urlencode(DOKU_URL . $sitemap); $resp = $http->get($url); if ($http->error) { print 'runSitemapper(): ' . $http->error . NL; } print 'runSitemapper(): ' . preg_replace('/[\\n\\r]/', ' ', strip_tags($resp)) . NL; //ping microsoft print 'runSitemapper(): pinging microsoft' . NL; $url = 'http://www.bing.com/webmaster/ping.aspx?siteMap='; $url .= urlencode(DOKU_URL . $sitemap); $resp = $http->get($url); if ($http->error) { print 'runSitemapper(): ' . $http->error . NL; } print 'runSitemapper(): ' . preg_replace('/[\\n\\r]/', ' ', strip_tags($resp)) . NL; print 'runSitemapper(): finished' . NL; return true; }