try { // Open the database for update, creating a new database if necessary. $database = new XapianWritableDatabase($argv[1], Xapian::DB_CREATE_OR_OPEN); $indexer = new XapianTermGenerator(); $stemmer = new XapianStem("english"); $indexer->set_stemmer($stemmer); $para = ''; $lines = file("php://stdin"); foreach ($lines as $line) { $line = rtrim($line); if ($line == "" && $para != "") { // We've reached the end of a paragraph, so index it. $doc = new XapianDocument(); $doc->set_data($para); $indexer->set_document($doc); $indexer->index_text($para); // Add the document to the database. $database->add_document($doc); $para = ""; } else { if ($para != "") { $para .= " "; } $para .= $line; } }
} $enq = new XapianEnquire($db); $enq->set_query(new XapianQuery(XapianQuery::OP_OR, "there", "is")); $mset = $enq->get_mset(0, 10); if ($mset->size() != 1) { print "Unexpected \$mset->size()\n"; exit(1); } $terms = join(" ", $enq->get_matching_terms($mset->get_hit(0))); if ($terms != "is there") { print "Unexpected matching terms: {$terms}\n"; exit(1); } # Feature test for MatchDecider $doc = new XapianDocument(); $doc->set_data("Two"); $doc->add_posting($stem->apply("out"), 1); $doc->add_posting($stem->apply("outside"), 1); $doc->add_posting($stem->apply("source"), 2); $doc->add_value(0, "yes"); $db->add_document($doc); class testmatchdecider extends XapianMatchDecider { function apply($doc) { return $doc->get_value(0) == "yes"; } } if (defined('PHP_VERSION_ID') && PHP_VERSION_ID >= 50400) { print "Skipping known failure subclassing Xapian classes in PHP under PHP 5.4+\n"; } else {
/** * Index file contents * * @param array $lines The array of the file contents, each entry corresponds to a new line (included) */ protected function _index($lines, $file_path) { if (empty($lines)) { return false; } // Open the database for update, creating a new database if necessary. $database = new XapianWritableDatabase(self::$_database_path, Xapian::DB_CREATE_OR_OPEN); $indexer = new XapianTermGenerator(); $stemmer = new XapianStem("english"); $indexer->set_stemmer($stemmer); $para = ''; //$lines = file($path); foreach ($lines as $line) { $line = rtrim($line); if ($line == "" && $para != "") { // We've reached the end of a paragraph, so index it. $doc = new XapianDocument(); $doc->set_data($para); $doc->add_value('file', $file_path); //add meta-information to the entry $indexer->set_document($doc); $indexer->index_text($para); // Add the document to the database. $database->add_document($doc); $para = ""; } else { if ($para != "") { $para .= " "; } $para .= $line; } } // Set the database handle to Null to ensure that it gets closed // down cleanly or uncommitted changes may be lost. $database = Null; }
/** * Add a post to the index. Adds more metadata than may be strictly * required! * * @param Post $post the post being inserted */ public function index_post($post) { $doc = new XapianDocument(); // Store some useful stuff with the post $doc->set_data($post->content); $doc->add_value(self::XAPIAN_FIELD_URL, $post->permalink); $doc->add_value(self::XAPIAN_FIELD_TITLE, $post->title); $doc->add_value(self::XAPIAN_FIELD_USERID, $post->user_id); $doc->add_value(self::XAPIAN_FIELD_PUBDATE, $post->pubdate); $doc->add_value(self::XAPIAN_FIELD_CONTENTTYPE, $post->content_type); $doc->add_value(self::XAPIAN_FIELD_ID, $post->id); // Index title and body $this->_indexer->set_document($doc); $this->_indexer->index_text($post->title, 50); // add weight to titles $this->_indexer->index_text($post->content, 1); // Add terms $tags = $post->tags; foreach ($tags as $id => $tag) { $tag = (string) $tag; $this->_indexer->index_text($tag, 1, 'XTAG'); // with index for filter $this->_indexer->index_text($tag, 2); // without prefix for index } // Add uid $id = $this->get_uid($post); $doc->add_term($id); return $this->_database->replace_document($id, $doc); }
$vrp->apply($a, $b); if (Xapian::sortable_unserialise($a) != 10) { print Xapian::sortable_unserialise($a) . " != 10\n"; exit(1); } if (Xapian::sortable_unserialise($b) != 20) { print Xapian::sortable_unserialise($b) . " != 20\n"; exit(1); } $stem = new XapianStem("english"); if ($stem->get_description() != "Xapian::Stem(english)") { print "Unexpected \$stem->get_description()\n"; exit(1); } $doc = new XapianDocument(); $doc->set_data("ab"); if ($doc->get_data() === "a") { print "get_data+set_data truncates at a zero byte\n"; exit(1); } if ($doc->get_data() !== "ab") { print "get_data+set_data doesn't transparently handle a zero byte\n"; exit(1); } $doc->set_data("is there anybody out there?"); $doc->add_term("XYzzy"); $doc->add_posting($stem->apply("is"), 1); $doc->add_posting($stem->apply("there"), 2); $doc->add_posting($stem->apply("anybody"), 3); $doc->add_posting($stem->apply("out"), 4); $doc->add_posting($stem->apply("there"), 5);
function index_comment($database, $indexer, $row) { $doc = new XapianDocument(); $doc->set_data($row["id"]); // $doc->set_data($row["comment"]); // $doc->add_value(1, (string)$row["id"]); $indexer->set_document($doc); $indexer->index_text($row["comment"]); // Add the document to the database. $database->add_document($doc); }