Esempio n. 1
0
 /**
  * 
  * @brief xapian query
  * @param array $params
  * @param int $page
  * @param int $pagesize
  */
 public static function searchHospital($disease = '', $province = '', $city = '', $district = '', $street = '', $level = 0, $keyword = '', $page = '', $pagesize = 20, $id = '')
 {
     $page = empty($page) ? 1 : $page;
     if (!self::_connect('/var/www/html/diary/xapian/hospital')) {
         return false;
     }
     //
     if ($keyword) {
         $realQuery = DBScws::cutWord($keyword, true);
     }
     //
     if ($disease) {
         $realQuery[] = 'DISEASE' . $disease;
     }
     //
     if ($province) {
         $realQuery[] = 'PROVINCE' . $province;
     }
     //
     if ($city) {
         $realQuery[] = 'CITY' . $city;
     }
     //
     if ($district) {
         $realQuery[] = 'DISTRICT' . $district;
     }
     //
     if ($street) {
         $realQuery[] = 'STREET' . $street;
     }
     //
     if ($level) {
         $realQuery[] = 'LEVEL' . $level;
     }
     //id
     if ($id) {
         $realQuery[] = 'ID' . $id;
     }
     if (empty($realQuery)) {
         $realQuery[] = 'HOSPITAL' . 'default';
     }
     $queryparser = new XapianQueryParser();
     $queryparser->set_database(self::$_INSTANCE);
     $enquire = new XapianEnquire(self::$_INSTANCE);
     $enquire->set_sort_by_value(0);
     //
     $query = new XapianQuery(XapianQuery::OP_AND, $realQuery);
     $enquire->set_query($query);
     $matches = $enquire->get_mset(0, 4000);
     //
     $start = $matches->begin();
     $end = $matches->end();
     $count = $matches->size();
     $index = 0;
     $re = array();
     while (!$start->equals($end)) {
         $data = array();
         if ($index < $page * $pagesize && $index >= ($page - 1) * $pagesize) {
             $doc = $start->get_document();
             $result = json_decode($doc->get_data(), true);
             $result['phone'] = $result['contact'];
             $r = explode("||", $result['title']);
             $result['title'] = $r[0];
             $r = explode(",", $result['contact']);
             $result['contact'] = $r[0];
             unset($result['puid'], $result['thumb_img'], $result['score'], $result['website'], $result['post_at'], $result['refresh_at'], $result['grab_url'], $result['ad_status'], $result['ad_types'], $result['user_id'], $result['username'], $result['listing_status'], $result['base_tag'], $result['image_count']);
             $re[] = $result;
             /*
             $termStart = $doc->termlist_begin();
                             $termEnd   = $doc->termlist_end();
                             $d['term']='';
                             while (!($termStart->equals($termEnd))) {
                                 $d['term'].= '|'.$termStart->get_term();
                                 $termStart->next();
                             }
                             var_dump($d);
             */
         } elseif ($index >= $page * $pagesize) {
             break;
         }
         $start->next();
         $index++;
     }
     return array($re, $count);
 }
Esempio n. 2
0
 // them, so that simple queries don't have to be quoted at the shell
 // level.
 $args = array_slice($argv, 2);
 $separator = array_search("--", $args);
 if ($separator === FALSE) {
     $separator = count($args);
 }
 $query_string = join(" ", array_slice($args, 0, $separator));
 $rset = new XapianRSet();
 foreach (array_slice($args, $separator + 1) as $docid) {
     $rset->add_document(intval($docid));
 }
 $qp = new XapianQueryParser();
 $stemmer = new XapianStem("english");
 $qp->set_stemmer($stemmer);
 $qp->set_database($database);
 $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
 $query = $qp->parse_query($query_string);
 print "Parsed query is: {$query->get_description()}\n";
 // Find the top 10 results for the query.
 $enquire->set_query($query);
 $matches = $enquire->get_mset(0, 10, $rset);
 // Display the results.
 print "{$matches->get_matches_estimated()} results found:\n";
 foreach ($matches->begin() as $i => $docid) {
     $n = $i->get_rank() + 1;
     $data = $i->get_document()->get_data();
     print "{$n}: {$i->get_percent()}% docid={$docid} [{$data}]\n\n";
 }
 // If no relevant docids were given, invent an RSet containing the top 5
 // matches (or all the matches if there are less than 5).
Esempio n. 3
0
/**
 * Queries the database.
 * The xapian_query function queries the database using both a query string
 * and application-defined terms. Based on drupal-xapian
 *
 * @param   string          $query_string   The search string. This string will
 *                                          be parsed and stemmed automatically.
 * @param   XapianDatabase  $db             Xapian database to connect
 * @param   int             $start          An integer defining the first
 *                                          document to return
 * @param   int             $length         The number of results to return.
 * @param   array           $extra          An array containing arrays of
 *                                          extra terms to search for.
 * @param   int             $count_type     Number of items to retrieve
 * @return  array                           An array of nids corresponding to the results.
 */
function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0)
{
    try {
        if (!is_object($db)) {
            $db = new XapianDatabase(XAPIAN_DB);
        }
        // Build subqueries from $extra array. Now only used by tags search filter on search widget
        $subqueries = array();
        foreach ($extra as $subquery) {
            if (!empty($subquery)) {
                $subqueries[] = new XapianQuery($subquery);
            }
        }
        $query = NULL;
        $enquire = new XapianEnquire($db);
        if (!empty($query_string)) {
            $query_parser = new XapianQueryParser();
            //TODO: choose stemmer
            $stemmer = new XapianStem("english");
            $query_parser->set_stemmer($stemmer);
            $query_parser->set_database($db);
            $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
            $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID);
            $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID);
            $query = $query_parser->parse_query($query_string);
            $final_array = array_merge($subqueries, array($query));
            $query = new XapianQuery(XapianQuery::OP_AND, $final_array);
        } else {
            $query = new XapianQuery(XapianQuery::OP_OR, $subqueries);
        }
        $enquire->set_query($query);
        $matches = $enquire->get_mset((int) $start, (int) $length);
        $specific_fields = get_specific_field_list();
        $results = array();
        $i = $matches->begin();
        // Display the results.
        //echo $matches->get_matches_estimated().'results found';
        $count = 0;
        while (!$i->equals($matches->end())) {
            $count++;
            $document = $i->get_document();
            if (is_object($document)) {
                // process one item terms
                $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID);
                $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1);
                $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID);
                $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1);
                // process each specific field prefix
                foreach ($specific_fields as $specific_field) {
                    $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']);
                }
                // rest of data
                $results[$count]['xapian_data'] = unserialize($document->get_data());
                $results[$count]['score'] = $i->get_percent();
            }
            $i->next();
        }
        switch ($count_type) {
            case 1:
                // Lower bound
                $count = $matches->get_matches_lower_bound();
                break;
            case 2:
                // Upper bound
                $count = $matches->get_matches_upper_bound();
                break;
            case 0:
                // Best estimate
            // Best estimate
            default:
                $count = $matches->get_matches_estimated();
                break;
        }
        return array($count, $results);
    } catch (Exception $e) {
        display_xapian_error($e->getMessage());
        return NULL;
    }
}
Esempio n. 4
0
function tests(){
include_once("ressources/class.xapian.inc");
// Open the database for searching.
try {
    $database = new XapianDatabase("/home/dtouzeau/Documents/doc1.db");
    $database1=new XapianDatabase("/home/dtouzeau/Documents/doc1.db");
    
	$database->add_database($database1);
    // Start an enquire session.
    $enquire = new XapianEnquire($database);

    // Combine the rest of the command line arguments with spaces between
    // them, so that simple queries don't have to be quoted at the shell
    // level.
    $query_string = "david";

    $qp = new XapianQueryParser();
    $stemmer = new XapianStem("english");
    $qp->set_stemmer($stemmer);
    $qp->set_database($database);
    $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
    $query = $qp->parse_query($query_string);
    print "Parsed query is: {$query->get_description()}\n";

    // Find the top 10 results for the query.
    $enquire->set_query($query);
    $matches = $enquire->get_mset(0, 10);

    // Display the results.
    print "{$matches->get_matches_estimated()} results found:\n";

    $i = $matches->begin();
    while (!$i->equals($matches->end())) {
	$n = $i->get_rank() + 1;
	$data = $i->get_document()->get_data();
	print "$n: {$i->get_percent()}% docid={$i->get_docid()} [$data]\n\n";
	$i->next();
    }
} catch (Exception $e) {
    print $e->getMessage() . "\n";
    exit(1);
}

}
Esempio n. 5
0
 public function search($query_string)
 {
     $database = new XapianDatabase(self::$_database_path);
     // Start an enquire session.
     $enquire = new XapianEnquire($database);
     $qp = new XapianQueryParser();
     $stemmer = new XapianStem("english");
     $qp->set_stemmer($stemmer);
     $qp->set_database($database);
     $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
     $query = $qp->parse_query($query_string);
     // Find the top 10 results for the query.
     $enquire->set_query($query);
     $enquire->set_collapse_key(0, 1);
     //index '0' holds the file path, so we're collapsing on that value in order for a single value to be returned by the system
     $matches = $enquire->get_mset(0, $database->get_doccount());
     $i = $matches->begin();
     $results = array();
     while (!$i->equals($matches->end())) {
         $n = $i->get_rank() + 1;
         try {
             $fileobj = new EfrontFile($i->get_document()->get_value('file'));
             $results[] = array('id' => $fileobj['id'], 'path' => str_replace(G_ROOTPATH, '', $fileobj['path']), 'login' => $fileobj['users_LOGIN'] ? $fileobj['users_LOGIN'] : '', 'date' => formatTimestamp(filemtime($fileobj['path']), 'time_nosec'), 'name' => $fileobj['name'], 'extension' => $fileobj['extension'], 'score' => $i->get_percent(), 'content' => $i->get_document()->get_data(), 'icon' => $fileobj->getTypeImage());
         } catch (Exception $e) {
             //don't halt for missing files
         }
         $i->next();
     }
     return $results;
 }
Esempio n. 6
0
 /**
  * Return a list of IDs for the given search criters
  *
  * @param string $criteria 
  * @param int $limit 
  * @param int $offset 
  * @return array
  */
 public function get_by_criteria($criteria, $limit, $offset)
 {
     $qp = new XapianQueryParser();
     $enquire = new XapianEnquire($this->_database);
     if ($this->get_stem_locale()) {
         // Note, there may be a problem if this is different than at indexing time!
         $stemmer = new XapianStem($this->get_stem_locale());
         $qp->set_stemmer($stemmer);
         $qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
     }
     $qp->set_database($this->_database);
     $query = $qp->parse_query($criteria, XapianQueryParser::FLAG_SPELLING_CORRECTION);
     $enquire->set_query($query);
     $this->_spelling = $qp->get_corrected_query_string();
     $matches = $enquire->get_mset($offset, $limit);
     // TODO: get count from $matches->get_matches_estimated() instead of current method
     $i = $matches->begin();
     $ids = array();
     while (!$i->equals($matches->end())) {
         $n = $i->get_rank() + 1;
         $ids[] = $i->get_document()->get_value(self::XAPIAN_FIELD_ID);
         $i->next();
     }
     return $ids;
 }
Esempio n. 7
0
function search($query, $num = 20) {
	$db = new XapianDatabase(XAPIAN_DIR);
	$enquire = new XapianEnquire($db);

	$stemmer = new XapianStem("english");
	$qp = new XapianQueryParser();
	$valuerange = new XapianNumberValueRangeProcessor(0);

	$qp->set_stemmer($stemmer);
	$qp->set_database($db);
	$qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);
	$qp->set_default_op(Query_OP_AND);
	$qp->add_boolean_prefix('align', 'A');
	$qp->add_boolean_prefix('colour', 'C');
	$qp->add_boolean_prefix('ep', 'E');
	$qp->add_boolean_prefix('noise', 'N');
	$qp->add_boolean_prefix('series', 'S');
	$qp->add_valuerangeprocessor($valuerange);

	$query = $qp->parse_query($query, XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_PHRASE |
        	XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD |
	        XapianQueryParser::FLAG_SPELLING_CORRECTION);

	$enquire->set_query($query);
	$enquire->set_sort_by_value(1, true);
	$matches = $enquire->get_mset(0, $num);

	$desc = $query->get_description();
	$estimate = $matches->get_matches_estimated();

	$out = array();
	$iter = $matches->begin();
	while (!$iter->equals($matches->end())) {
		$doc = $iter->get_document();
		$data = array('text' => $doc->get_data());
		$rank = $iter->get_rank() + 1;
		$termiter = $doc->termlist_begin();
		$terms = array();
		while (!$termiter->equals($doc->termlist_end())) {
			$term = $termiter->get_term();
			$prefix = substr($term, 0, 1);
			if ($prefix == 'A') {
				$data['align'] = substr($term, 1);
			} elseif ($prefix == 'B') {
				$data['begin'] = substr($term, 1);
			} elseif ($prefix == 'C') {
				$data['colour'] = substr($term, 1);
			} elseif ($prefix == 'E') {
				$data['ep'] = substr($term, 1);
			} elseif ($prefix == 'N') {
				$data['noise'] = substr($term, 1);
			} elseif ($prefix == 'I') {
				$data['pos'] = $term;
			} elseif ($prefix == 'S') {
				$data['series'] = substr($term, 1);
			} else {
				$data['terms'][] = $term;
			}
			$termiter->next();
		}
		$out[] = $data;
		$iter->next();
	}

	$db = null;

	return array(
		'query' => $desc,
		'estimate' => $estimate,
		'data' => $out,
	);
}