Exemple #1
0
/**
 * Returns the pages that use a given media file
 *
 * Does a quick lookup with the fulltext index, then
 * evaluates the instructions of the found pages
 *
 * Aborts after $max found results
 */
function ft_mediause($id, $max)
{
    if (!$max) {
        $max = 1;
    }
    // need to find at least one
    $result = array();
    // quick lookup of the mediafile
    // FIXME use metadata key lookup
    $media = noNS($id);
    $matches = idx_lookup(idx_tokenizer($media));
    $docs = array_keys(ft_resultCombine(array_values($matches)));
    if (!count($docs)) {
        return $result;
    }
    // go through all found pages
    $found = 0;
    $pcre = preg_quote($media, '/');
    foreach ($docs as $doc) {
        $ns = getNS($doc);
        preg_match_all('/\\{\\{([^|}]*' . $pcre . '[^|}]*)(|[^}]+)?\\}\\}/i', rawWiki($doc), $matches);
        foreach ($matches[1] as $img) {
            $img = trim($img);
            if (preg_match('/^https?:\\/\\//i', $img)) {
                continue;
            }
            // skip external images
            list($img) = explode('?', $img);
            // remove any parameters
            resolve_mediaid($ns, $img, $exists);
            // resolve the possibly relative img
            if ($img == $id) {
                // we have a match
                $result[] = $doc;
                $found++;
                break;
            }
        }
        if ($found >= $max) {
            break;
        }
    }
    sort($result);
    return $result;
}
Exemple #2
0
/**
 * Returns a list of matching documents for the given query
 *
 * @author Andreas Gohr <*****@*****.**>
 * @author Kazutaka Miyasaka <*****@*****.**>
 */
function _ft_pageSearch(&$data)
{
    $Indexer = idx_get_indexer();
    // parse the given query
    $q = ft_queryParser($Indexer, $data['query']);
    $data['highlight'] = $q['highlight'];
    if (empty($q['parsed_ary'])) {
        return array();
    }
    // lookup all words found in the query
    $lookup = $Indexer->lookup($q['words']);
    // get all pages in this dokuwiki site (!: includes nonexistent pages)
    $pages_all = array();
    foreach ($Indexer->getPages() as $id) {
        $pages_all[$id] = 0;
        // base: 0 hit
    }
    // process the query
    $stack = array();
    foreach ($q['parsed_ary'] as $token) {
        switch (substr($token, 0, 3)) {
            case 'W+:':
            case 'W-:':
            case 'W_:':
                // word
                $word = substr($token, 3);
                $stack[] = (array) $lookup[$word];
                break;
            case 'P+:':
            case 'P-:':
                // phrase
                $phrase = substr($token, 3);
                // since phrases are always parsed as ((W1)(W2)...(P)),
                // the end($stack) always points the pages that contain
                // all words in this phrase
                $pages = end($stack);
                $pages_matched = array();
                foreach (array_keys($pages) as $id) {
                    $evdata = array('id' => $id, 'phrase' => $phrase, 'text' => rawWiki($id));
                    $evt = new Doku_Event('FULLTEXT_PHRASE_MATCH', $evdata);
                    if ($evt->advise_before() && $evt->result !== true) {
                        $text = utf8_strtolower($evdata['text']);
                        if (strpos($text, $phrase) !== false) {
                            $evt->result = true;
                        }
                    }
                    $evt->advise_after();
                    if ($evt->result === true) {
                        $pages_matched[$id] = 0;
                        // phrase: always 0 hit
                    }
                }
                $stack[] = $pages_matched;
                break;
            case 'N+:':
            case 'N-:':
                // namespace
                $ns = substr($token, 3);
                $pages_matched = array();
                foreach (array_keys($pages_all) as $id) {
                    if (strpos($id, $ns) === 0) {
                        $pages_matched[$id] = 0;
                        // namespace: always 0 hit
                    }
                }
                $stack[] = $pages_matched;
                break;
            case 'AND':
                // and operation
                list($pages1, $pages2) = array_splice($stack, -2);
                $stack[] = ft_resultCombine(array($pages1, $pages2));
                break;
            case 'OR':
                // or operation
                list($pages1, $pages2) = array_splice($stack, -2);
                $stack[] = ft_resultUnite(array($pages1, $pages2));
                break;
            case 'NOT':
                // not operation (unary)
                $pages = array_pop($stack);
                $stack[] = ft_resultComplement(array($pages_all, $pages));
                break;
        }
    }
    $docs = array_pop($stack);
    if (empty($docs)) {
        return array();
    }
    // check: settings, acls, existence
    foreach (array_keys($docs) as $id) {
        if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ || !page_exists($id, '', false)) {
            unset($docs[$id]);
        }
    }
    // sort docs by count
    arsort($docs);
    return $docs;
}