示例#1
0
function rebuild_search_indexes()
{
    global $success_msg;
    global $error_msg;
    global $warning_msg;
    global $all_settings;
    $index_folder = get_setting('search_indexes_folder', $all_settings);
    try {
        $index = new Zend_Search_Lucene($index_folder, true);
        setlocale(LC_CTYPE, 'en_US');
        foreach (get_all_html_files(dirname(__FILE__)) as $html_file => $html_url) {
            if (can_index_html_file($html_file)) {
                $file_content = file_get_contents($html_file);
                $file_content = '<html>' . strstr($file_content, '<head');
                $doc = Zend_Search_Lucene_Document_Html::loadHTML($file_content);
                $doc->addField(Zend_Search_Lucene_Field::Text('url', $html_url, 'UTF-8'));
                $index->addDocument($doc);
                flush();
            }
        }
        $broken_urls = array();
        foreach (get_dynamic_urls(get_setting('search_dynamic_pages', $all_settings)) as $url) {
            $headers = get_headers($url);
            if (strrpos($headers[0], '200')) {
                $content = file_get_contents($url);
                $content = '<html>' . strstr($content, '<head');
                $doc = Zend_Search_Lucene_Document_Html::loadHTML($content);
                $doc->addField(Zend_Search_Lucene_Field::Text('url', $url, 'UTF-8'));
                $index->addDocument($doc);
                flush();
            } else {
                array_push($broken_urls, $url);
            }
        }
        if (file_exists($index_folder)) {
            if (count($broken_urls) > 0) {
                $warning_msg = '<p>The website was successfully indexed, but the following URL\'s were skipped because they are broken:</p>';
                $warning_msg .= '<ul class="disc">';
                foreach ($broken_urls as $broken_url) {
                    $warning_msg .= '<li><a href="' . $broken_url . '">' . $broken_url . '</a></li>';
                }
                $warning_msg .= '</ul>';
                $warning_msg .= '<p>Please remove them from the "List of dynamic pages" field.</p>';
            } else {
                $success_msg = 'The website was successfully indexed.';
            }
        } else {
            $error_msg = 'An error occurred during the website indexing. The error message is: the folder that stores the website indexes couldn\'t be created';
        }
    } catch (Exception $e) {
        $error_msg = 'An error occurred during the website indexing. The error message is: ' . $e->getMessage();
    }
}
示例#2
0
function can_index_website_file($html_file)
{
    global $all_settings;
    foreach (get_dynamic_urls(get_setting('search_exclude_from_indexing', $all_settings)) as $excluded_file) {
        $excluded_file = rtrim(str_replace("\\", "/", $excluded_file), '/');
        $pos = strpos($html_file, $excluded_file);
        if ($pos && $pos >= 0) {
            return false;
        }
    }
    $file_content = file_get_contents($html_file);
    $pos = strpos($file_content, '@skip-indexing');
    if ($pos && $pos >= 0) {
        return false;
    }
    return true;
}