function index($datapath, $dbpath)
{
    // Create or open the database we're going to be writing to.
    $db = new XapianWritableDatabase($dbpath, Xapian::DB_CREATE_OR_OPEN);
    // Set up a TermGenerator that we'll use in indexing
    $termgenerator = new XapianTermGenerator();
    $termgenerator->set_stemmer(new XapianStem('en'));
    // open the file
    $fH = open_file($datapath);
    //    Read the header row in
    $headers = get_csv_headers($fH);
    while (($row = parse_csv_row($fH, $headers)) !== false) {
        // mapping from field name to value using first row headers
        // We're just going to use id_NUMBER, TITLE and DESCRIPTION
        $description = $row['DESCRIPTION'];
        $title = $row['TITLE'];
        $identifier = $row['id_NUMBER'];
        $collection = $row['COLLECTION'];
        $maker = $row['MAKER'];
        // we make a document and tell the term generator to use this
        $doc = new XapianDocument();
        $termgenerator->set_document($doc);
        // index each field with a suitable prefix
        $termgenerator->index_text($title, 1, 'S');
        $termgenerator->index_text($description, 1, 'XD');
        // index fields without prefixes for general search
        $termgenerator->index_text($title);
        $termgenerator->increase_termpos();
        $termgenerator->index_text($description);
        ### Start of new indexing code.
        // index the MATERIALS field, splitting on semicolons
        $materials = explode(";", $row['MATERIALS']);
        foreach ($materials as $material) {
            $material = strtolower(trim($material));
            if ($material != '') {
                $doc->add_boolean_term('XM' . $material);
            }
        }
        ### End of new indexing code.
        // store all the fields for display purposes
        $doc->set_data(json_encode($row));
        // we use the identifier to ensure each object ends up
        // in the database only once no matter how many times
        // we run the indexer
        $idterm = "Q" . $identifier;
        $doc->add_term($idterm);
        $db->replace_document($idterm, $doc);
    }
}
示例#2
0
function index($datapath, $dbpath)
{
    // Create or open the database we're going to be writing to.
    $db = new XapianWritableDatabase($dbpath, Xapian::DB_CREATE_OR_OPEN);
    // Set up a TermGenerator that we'll use in indexing.
    $termgenerator = new XapianTermGenerator();
    $termgenerator->set_stemmer(new XapianStem('english'));
    // Open the file.
    $fH = open_file($datapath);
    // Read the header row in.
    $headers = get_csv_headers($fH);
    while (($row = parse_csv_row($fH, $headers)) !== false) {
        // '$row' maps field name to value.  The field names come from the
        // first row of the CSV file.
        //
        // We're just going to use DESCRIPTION, TITLE and id_NUMBER.
        $description = $row['DESCRIPTION'];
        $title = $row['TITLE'];
        $identifier = $row['id_NUMBER'];
        // We make a document and tell the term generator to use this.
        $doc = new XapianDocument();
        $termgenerator->set_document($doc);
        // Index each field with a suitable prefix.
        $termgenerator->index_text($title, 1, 'S');
        $termgenerator->index_text($description, 1, 'XD');
        // Index fields without prefixes for general search.
        $termgenerator->index_text($title);
        $termgenerator->increase_termpos();
        $termgenerator->index_text($description);
        // Store all the fields for display purposes.
        $doc->set_data(json_encode($row));
        // We use the identifier to ensure each object ends up in the
        // database only once no matter how many times we run the
        // indexer.
        $idterm = "Q" . $identifier;
        $doc->add_boolean_term($idterm);
        $db->replace_document($idterm, $doc);
    }
}