function rebuild_search_indexes() { global $success_msg; global $error_msg; global $warning_msg; global $all_settings; $index_folder = get_setting('search_indexes_folder', $all_settings); try { $index = new Zend_Search_Lucene($index_folder, true); setlocale(LC_CTYPE, 'en_US'); foreach (get_all_html_files(dirname(__FILE__)) as $html_file => $html_url) { if (can_index_html_file($html_file)) { $file_content = file_get_contents($html_file); $file_content = '<html>' . strstr($file_content, '<head'); $doc = Zend_Search_Lucene_Document_Html::loadHTML($file_content); $doc->addField(Zend_Search_Lucene_Field::Text('url', $html_url, 'UTF-8')); $index->addDocument($doc); flush(); } } $broken_urls = array(); foreach (get_dynamic_urls(get_setting('search_dynamic_pages', $all_settings)) as $url) { $headers = get_headers($url); if (strrpos($headers[0], '200')) { $content = file_get_contents($url); $content = '<html>' . strstr($content, '<head'); $doc = Zend_Search_Lucene_Document_Html::loadHTML($content); $doc->addField(Zend_Search_Lucene_Field::Text('url', $url, 'UTF-8')); $index->addDocument($doc); flush(); } else { array_push($broken_urls, $url); } } if (file_exists($index_folder)) { if (count($broken_urls) > 0) { $warning_msg = '<p>The website was successfully indexed, but the following URL\'s were skipped because they are broken:</p>'; $warning_msg .= '<ul class="disc">'; foreach ($broken_urls as $broken_url) { $warning_msg .= '<li><a href="' . $broken_url . '">' . $broken_url . '</a></li>'; } $warning_msg .= '</ul>'; $warning_msg .= '<p>Please remove them from the "List of dynamic pages" field.</p>'; } else { $success_msg = 'The website was successfully indexed.'; } } else { $error_msg = 'An error occurred during the website indexing. The error message is: the folder that stores the website indexes couldn\'t be created'; } } catch (Exception $e) { $error_msg = 'An error occurred during the website indexing. The error message is: ' . $e->getMessage(); } }
function can_index_website_file($html_file) { global $all_settings; foreach (get_dynamic_urls(get_setting('search_exclude_from_indexing', $all_settings)) as $excluded_file) { $excluded_file = rtrim(str_replace("\\", "/", $excluded_file), '/'); $pos = strpos($html_file, $excluded_file); if ($pos && $pos >= 0) { return false; } } $file_content = file_get_contents($html_file); $pos = strpos($file_content, '@skip-indexing'); if ($pos && $pos >= 0) { return false; } return true; }