Exemplo n.º 1
0
function search($dbpath, $querystring, $offset = 0, $pagesize = 10)
{
    // offset - defines starting point within result set
    // pagesize - defines number of records to retrieve
    // Open the database we're going to search.
    $db = new XapianDatabase($dbpath);
    ### Start of example code.
    // Set up a QueryParser with a stemmer and suitable prefixes
    $queryparser = new XapianQueryParser();
    $queryparser->set_stemmer(new XapianStem("en"));
    $queryparser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
    $queryparser->add_prefix("title", "S");
    $queryparser->add_prefix("description", "XD");
    $queryparser->add_boolean_prefix("material", "XM");
    // And parse the query
    $query = $queryparser->parse_query($querystring);
    ### End of example code.
    // Use an Enquire object on the database to run the query
    $enquire = new XapianEnquire($db);
    $enquire->set_query($query);
    // Set up a spy to inspect the MAKER value at slot 1
    $spy = new XapianValueCountMatchSpy(1);
    $enquire->add_matchspy($spy);
    // Retrieve the matches and compute start and end points
    $matches = $enquire->get_mset($offset, $pagesize);
    $start = $matches->begin();
    $end = $matches->end();
    // Use an array to record the DocIds of each match
    $docids = array();
    while (!$start->equals($end)) {
        // retrieve the document and its data
        $doc = $start->get_document();
        $fields = json_decode($doc->get_data());
        $position = $start->get_rank() + 1;
        // record the docid
        $docid = $start->get_docid();
        $docids[] = $docid;
        // display the results
        printf("%d: #%03d %s\n", $position, $docid, $fields->TITLE);
        // increment MSet iterator and our counter
        $start->next();
    }
    // Parse and display the spy values
    $spy_start = $spy->values_begin();
    $spy_end = $spy->values_end();
    while (!$spy_start->equals($spy_end)) {
        print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq());
        $spy_start->next();
    }
    // Finally, make sure we log the query and displayed results
    log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids)));
}
Exemplo n.º 2
0
 // Combine command line arguments up to "--" with spaces between
 // them, so that simple queries don't have to be quoted at the shell
 // level.
 $args = array_slice($argv, 2);
 $separator = array_search("--", $args);
 if ($separator === FALSE) {
     $separator = count($args);
 }
 $query_string = join(" ", array_slice($args, 0, $separator));
 $rset = new XapianRSet();
 foreach (array_slice($args, $separator + 1) as $docid) {
     $rset->add_document(intval($docid));
 }
 $qp = new XapianQueryParser();
 $stemmer = new XapianStem("english");
 $qp->set_stemmer($stemmer);
 $qp->set_database($database);
 $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
 $query = $qp->parse_query($query_string);
 print "Parsed query is: {$query->get_description()}\n";
 // Find the top 10 results for the query.
 $enquire->set_query($query);
 $matches = $enquire->get_mset(0, 10, $rset);
 // Display the results.
 print "{$matches->get_matches_estimated()} results found:\n";
 foreach ($matches->begin() as $i => $docid) {
     $n = $i->get_rank() + 1;
     $data = $i->get_document()->get_data();
     print "{$n}: {$i->get_percent()}% docid={$docid} [{$data}]\n\n";
 }
 // If no relevant docids were given, invent an RSet containing the top 5
Exemplo n.º 3
0
$db2 = Xapian::inmemory_open();
# Check PHP5 handling of Xapian::DocNotFoundError
try {
    $doc2 = $db->get_document(2);
    print "Retrieved non-existent document\n";
    exit(1);
} catch (Exception $e) {
    if ($e->getMessage() !== "DocNotFoundError: Docid 2 not found") {
        print "DocNotFoundError Exception string not as expected, got: '{$e->getMessage()}'\n";
        exit(1);
    }
}
# Check QueryParser parsing error.
try {
    $qp = new XapianQueryParser();
    $qp->set_stemmer(new XapianStem("en"));
    $qp->parse_query("test AND");
    print "Successfully parsed bad query\n";
    exit(1);
} catch (Exception $e) {
    if ($e->getMessage() !== "QueryParserError: Syntax: <expression> AND <expression>") {
        print "QueryParserError Exception string not as expected, got: '{$e->getMessage}()'\n";
        exit(1);
    }
}
# Check that open_stub() is wrapped as expected.
try {
    $db = Xapian::auto_open_stub("nosuchdir/nosuchdb");
    print "Opened non-existent stub database\n";
    exit(1);
} catch (Exception $e) {
Exemplo n.º 4
0
/**
 * Queries the database.
 * The xapian_query function queries the database using both a query string
 * and application-defined terms. Based on drupal-xapian
 *
 * @param   string          $query_string   The search string. This string will
 *                                          be parsed and stemmed automatically.
 * @param   XapianDatabase  $db             Xapian database to connect
 * @param   int             $start          An integer defining the first
 *                                          document to return
 * @param   int             $length         The number of results to return.
 * @param   array           $extra          An array containing arrays of
 *                                          extra terms to search for.
 * @param   int             $count_type     Number of items to retrieve
 * @return  array                           An array of nids corresponding to the results.
 */
function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0)
{
    try {
        if (!is_object($db)) {
            $db = new XapianDatabase(XAPIAN_DB);
        }
        // Build subqueries from $extra array. Now only used by tags search filter on search widget
        $subqueries = array();
        foreach ($extra as $subquery) {
            if (!empty($subquery)) {
                $subqueries[] = new XapianQuery($subquery);
            }
        }
        $query = NULL;
        $enquire = new XapianEnquire($db);
        if (!empty($query_string)) {
            $query_parser = new XapianQueryParser();
            //TODO: choose stemmer
            $stemmer = new XapianStem("english");
            $query_parser->set_stemmer($stemmer);
            $query_parser->set_database($db);
            $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
            $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID);
            $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID);
            $query = $query_parser->parse_query($query_string);
            $final_array = array_merge($subqueries, array($query));
            $query = new XapianQuery(XapianQuery::OP_AND, $final_array);
        } else {
            $query = new XapianQuery(XapianQuery::OP_OR, $subqueries);
        }
        $enquire->set_query($query);
        $matches = $enquire->get_mset((int) $start, (int) $length);
        $specific_fields = get_specific_field_list();
        $results = array();
        $i = $matches->begin();
        // Display the results.
        //echo $matches->get_matches_estimated().'results found';
        $count = 0;
        while (!$i->equals($matches->end())) {
            $count++;
            $document = $i->get_document();
            if (is_object($document)) {
                // process one item terms
                $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID);
                $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1);
                $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID);
                $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1);
                // process each specific field prefix
                foreach ($specific_fields as $specific_field) {
                    $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']);
                }
                // rest of data
                $results[$count]['xapian_data'] = unserialize($document->get_data());
                $results[$count]['score'] = $i->get_percent();
            }
            $i->next();
        }
        switch ($count_type) {
            case 1:
                // Lower bound
                $count = $matches->get_matches_lower_bound();
                break;
            case 2:
                // Upper bound
                $count = $matches->get_matches_upper_bound();
                break;
            case 0:
                // Best estimate
            // Best estimate
            default:
                $count = $matches->get_matches_estimated();
                break;
        }
        return array($count, $results);
    } catch (Exception $e) {
        display_xapian_error($e->getMessage());
        return NULL;
    }
}
Exemplo n.º 5
0
function tests(){
include_once("ressources/class.xapian.inc");
// Open the database for searching.
try {
    $database = new XapianDatabase("/home/dtouzeau/Documents/doc1.db");
    $database1=new XapianDatabase("/home/dtouzeau/Documents/doc1.db");
    
	$database->add_database($database1);
    // Start an enquire session.
    $enquire = new XapianEnquire($database);

    // Combine the rest of the command line arguments with spaces between
    // them, so that simple queries don't have to be quoted at the shell
    // level.
    $query_string = "david";

    $qp = new XapianQueryParser();
    $stemmer = new XapianStem("english");
    $qp->set_stemmer($stemmer);
    $qp->set_database($database);
    $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
    $query = $qp->parse_query($query_string);
    print "Parsed query is: {$query->get_description()}\n";

    // Find the top 10 results for the query.
    $enquire->set_query($query);
    $matches = $enquire->get_mset(0, 10);

    // Display the results.
    print "{$matches->get_matches_estimated()} results found:\n";

    $i = $matches->begin();
    while (!$i->equals($matches->end())) {
	$n = $i->get_rank() + 1;
	$data = $i->get_document()->get_data();
	print "$n: {$i->get_percent()}% docid={$i->get_docid()} [$data]\n\n";
	$i->next();
    }
} catch (Exception $e) {
    print $e->getMessage() . "\n";
    exit(1);
}

}
function search($dbpath, $querystring, $materials, $offset = 0, $pagesize = 10)
{
    // offset - defines starting point within result set
    // pagesize - defines number of records to retrieve
    // Open the database we're going to search.
    $db = new XapianDatabase($dbpath);
    ### Start of example code.
    // Set up a QueryParser with a stemmer and suitable prefixes
    $queryparser = new XapianQueryParser();
    $queryparser->set_stemmer(new XapianStem("english"));
    $queryparser->set_stem_strategy(XapianQueryParser::STEM_SOME);
    $queryparser->add_prefix("title", "S");
    $queryparser->add_prefix("description", "XD");
    // And parse the query
    $query = $queryparser->parse_query($querystring);
    if (empty($materials) === false) {
        // Filter the results to ones which contain at least one of the
        // materials.
        $material_queries = array();
        // Build a query for each material value
        foreach ($materials as $material) {
            $material = str_replace("material:", "", $material);
            $material_queries[] = new XapianQuery('XM' . strtolower($material));
        }
        // Combine these queries with an OR operator
        $material_query = new XapianQuery(XapianQuery::OP_AND, $material_queries);
        // Use the material query to filter the main query
        $query = new XapianQuery(XapianQuery::OP_FILTER, $query, $material_query);
    }
    ### End of example code.
    // Use an Enquire object on the database to run the query
    $enquire = new XapianEnquire($db);
    $enquire->set_query($query);
    // Set up a spy to inspect the MAKER value at slot 1
    $spy = new XapianValueCountMatchSpy(1);
    $enquire->add_matchspy($spy);
    // Retrieve the matches and compute start and end points
    $matches = $enquire->get_mset($offset, $pagesize);
    $start = $matches->begin();
    $end = $matches->end();
    $index = 0;
    // Use an array to record the DocIds of each match
    $docids = array();
    while (!$start->equals($end)) {
        // retrieve the document and its data
        $doc = $start->get_document();
        $fields = json_decode($doc->get_data());
        $position = $offset + $index + 1;
        // record the docid
        $docid = $start->get_docid();
        $docids[] = $docid;
        // display the results
        print sprintf("%d: #%03d %s\n", $position, $docid, $fields->TITLE);
        // increment MSet iterator and our counter
        $start->next();
        $index++;
    }
    // Parse and display the spy values
    $spy_start = $spy->values_begin();
    $spy_end = $spy->values_end();
    while (!$spy_start->equals($spy_end)) {
        print sprintf("Facet: %s; count: %d\n", $spy_start->get_term(), $spy_start->get_termfreq());
        $spy_start->next();
    }
    // Finally, make sure we log the query and displayed results
    log_info(sprintf("xapian.search:'%s'[%d:%d] = %s", $querystring, $offset, $offset + $pagesize, implode(" ", $docids)));
}
Exemplo n.º 7
0
 public function search($query_string)
 {
     $database = new XapianDatabase(self::$_database_path);
     // Start an enquire session.
     $enquire = new XapianEnquire($database);
     $qp = new XapianQueryParser();
     $stemmer = new XapianStem("english");
     $qp->set_stemmer($stemmer);
     $qp->set_database($database);
     $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
     $query = $qp->parse_query($query_string);
     // Find the top 10 results for the query.
     $enquire->set_query($query);
     $enquire->set_collapse_key(0, 1);
     //index '0' holds the file path, so we're collapsing on that value in order for a single value to be returned by the system
     $matches = $enquire->get_mset(0, $database->get_doccount());
     $i = $matches->begin();
     $results = array();
     while (!$i->equals($matches->end())) {
         $n = $i->get_rank() + 1;
         try {
             $fileobj = new EfrontFile($i->get_document()->get_value('file'));
             $results[] = array('id' => $fileobj['id'], 'path' => str_replace(G_ROOTPATH, '', $fileobj['path']), 'login' => $fileobj['users_LOGIN'] ? $fileobj['users_LOGIN'] : '', 'date' => formatTimestamp(filemtime($fileobj['path']), 'time_nosec'), 'name' => $fileobj['name'], 'extension' => $fileobj['extension'], 'score' => $i->get_percent(), 'content' => $i->get_document()->get_data(), 'icon' => $fileobj->getTypeImage());
         } catch (Exception $e) {
             //don't halt for missing files
         }
         $i->next();
     }
     return $results;
 }
Exemplo n.º 8
0
 /**
  * Return a list of IDs for the given search criters
  *
  * @param string $criteria 
  * @param int $limit 
  * @param int $offset 
  * @return array
  */
 public function get_by_criteria($criteria, $limit, $offset)
 {
     $qp = new XapianQueryParser();
     $enquire = new XapianEnquire($this->_database);
     if ($this->get_stem_locale()) {
         // Note, there may be a problem if this is different than at indexing time!
         $stemmer = new XapianStem($this->get_stem_locale());
         $qp->set_stemmer($stemmer);
         $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
     }
     $qp->set_database($this->_database);
     $query = $qp->parse_query($criteria, XapianQueryParser::FLAG_SPELLING_CORRECTION);
     $enquire->set_query($query);
     $this->_spelling = $qp->get_corrected_query_string();
     $matches = $enquire->get_mset($offset, $limit);
     // TODO: get count from $matches->get_matches_estimated() instead of current method
     $i = $matches->begin();
     $ids = array();
     while (!$i->equals($matches->end())) {
         $n = $i->get_rank() + 1;
         $ids[] = $i->get_document()->get_value(self::XAPIAN_FIELD_ID);
         $i->next();
     }
     return $ids;
 }
Exemplo n.º 9
0
function search($query, $num = 20) {
	$db = new XapianDatabase(XAPIAN_DIR);
	$enquire = new XapianEnquire($db);

	$stemmer = new XapianStem("english");
	$qp = new XapianQueryParser();
	$valuerange = new XapianNumberValueRangeProcessor(0);

	$qp->set_stemmer($stemmer);
	$qp->set_database($db);
	$qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
	$qp->set_default_op(Query_OP_AND);
	$qp->add_boolean_prefix('align', 'A');
	$qp->add_boolean_prefix('colour', 'C');
	$qp->add_boolean_prefix('ep', 'E');
	$qp->add_boolean_prefix('noise', 'N');
	$qp->add_boolean_prefix('series', 'S');
	$qp->add_valuerangeprocessor($valuerange);

	$query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE |
        	XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD |
	        XapianQueryParser::FLAG_SPELLING_CORRECTION);

	$enquire->set_query($query);
	$enquire->set_sort_by_value(1, true);
	$matches = $enquire->get_mset(0, $num);

	$desc = $query->get_description();
	$estimate = $matches->get_matches_estimated();

	$out = array();
	$iter = $matches->begin();
	while (!$iter->equals($matches->end())) {
		$doc = $iter->get_document();
		$data = array('text' => $doc->get_data());
		$rank = $iter->get_rank() + 1;
		$termiter = $doc->termlist_begin();
		$terms = array();
		while (!$termiter->equals($doc->termlist_end())) {
			$term = $termiter->get_term();
			$prefix = substr($term, 0, 1);
			if ($prefix == 'A') {
				$data['align'] = substr($term, 1);
			} elseif ($prefix == 'B') {
				$data['begin'] = substr($term, 1);
			} elseif ($prefix == 'C') {
				$data['colour'] = substr($term, 1);
			} elseif ($prefix == 'E') {
				$data['ep'] = substr($term, 1);
			} elseif ($prefix == 'N') {
				$data['noise'] = substr($term, 1);
			} elseif ($prefix == 'I') {
				$data['pos'] = $term;
			} elseif ($prefix == 'S') {
				$data['series'] = substr($term, 1);
			} else {
				$data['terms'][] = $term;
			}
			$termiter->next();
		}
		$out[] = $data;
		$iter->next();
	}

	$db = null;

	return array(
		'query' => $desc,
		'estimate' => $estimate,
		'data' => $out,
	);
}