/**
  * Creates a search indexer with the given document type
  *
  * @param string $document_type the shortname of the document type to
  *                               index by.
  * @param MDB2_Driver_Common $db the database connection used by this
  *                                indexer.
  * @param boolean $new if true, this is a new search index and all indexed
  *                      words for the given document type are removed. If
  *                      false, we are appending to an existing index.
  *                      Defaults to false.
  * @param boolean $append if true, keywords keywords for documents that
  *                         are indexed are appended to the keywords that
  *                         may already exist for the document in the index.
  *                         Defaults to false.
  *
  * @see NateGoSearch::createDocumentType()
  *
  * @throws NateGoSearchDocumentTypeException if the document type shortname
  *                                           does not exist.
  */
 public function __construct($document_type, MDB2_Driver_Common $db, $new = false, $append = false)
 {
     // cache mb_string overloading status
     if (self::$use_mb_string === null) {
         self::$use_mb_string = extension_loaded('mbstring') && (ini_get('mbstring.func_overload') & 2) === 2;
     }
     $type = NateGoSearch::getDocumentType($db, $document_type);
     if ($type === null) {
         throw new NateGoSearchDocumentTypeException("Document type {$document_type} does not exist and cannot be " . "indexed. Document types must be created before being used.", 0, $document_type);
     }
     $this->document_type = $type;
     $this->db = $db;
     $this->new = $new;
     $this->append = $append;
 }
    protected function indexPhotos()
    {
        $spell_checker = new NateGoSearchPSpellSpellChecker('en_US', '', '', $this->getCustomWordList());
        $photo_indexer = new NateGoSearchIndexer('photo', $this->db);
        $photo_indexer->setSpellChecker($spell_checker);
        $photo_indexer->addTerm(new NateGoSearchTerm('title', 5));
        $photo_indexer->addTerm(new NateGoSearchTerm('tags', 2));
        $photo_indexer->addTerm(new NateGoSearchTerm('description'));
        $photo_indexer->setMaximumWordLength(32);
        $photo_indexer->addUnindexedWords(NateGoSearchIndexer::getDefaultUnindexedWords());
        $type = NateGoSearch::getDocumentType($this->db, 'photo');
        $sql = sprintf('select PinholePhoto.title, PinholePhoto.id,
				PinholePhoto.description, PinholePhoto.image_set
			from PinholePhoto
				inner join NateGoSearchQueue
					on PinholePhoto.id = NateGoSearchQueue.document_id
					and NateGoSearchQueue.document_type = %s
			order by PinholePhoto.id', $this->db->quote($type, 'integer'));
        $this->debug(Pinhole::_('Indexing photos ... ') . '   ');
        $photos = SwatDB::query($this->db, $sql, SwatDBClassMap::get('PinholePhotoWrapper'));
        $total = count($photos);
        $count = 0;
        $current_photo_id = null;
        foreach ($photos as $photo) {
            $ds = new SwatDetailsStore($photo);
            $ds->title = $photo->getTitle();
            $tags = '';
            foreach ($photo->tags as $tag) {
                $tags .= ' ' . $tag->title . ' ' . $tag->name;
            }
            $ds->tags = $tags;
            if ($count % 10 == 0) {
                $photo_indexer->commit();
                $this->debug(str_repeat(chr(8), 3));
                $this->debug(sprintf('%2d%%', $count / $total * 100));
            }
            $document = new NateGoSearchDocument($ds, 'id');
            $photo_indexer->index($document);
            $current_photo_id = $photo->id;
            $count++;
            $sql = sprintf('delete from NateGoSearchQueue where
				document_type = %s and document_id = %s', $this->db->quote($type, 'integer'), $this->db->quote($photo->id, 'integer'));
            SwatDB::exec($this->db, $sql);
        }
        if (count($photos) > 0 && isset($this->memcache)) {
            $this->memcache->flushNs('photos');
        }
        $this->debug(str_repeat(chr(8), 3) . Pinhole::_('done') . "\n");
        $photo_indexer->commit();
        unset($photo_indexer);
    }
Пример #3
0
    protected function indexComments()
    {
        $type_shortname = 'comment';
        $spell_checker = new NateGoSearchPSpellSpellChecker('en_US', '', '', $this->getCustomWordList());
        $comment_indexer = new NateGoSearchIndexer($type_shortname, $this->db);
        $comment_indexer->setSpellChecker($spell_checker);
        $comment_indexer->addTerm(new NateGoSearchTerm('fullname', 30));
        $comment_indexer->addTerm(new NateGoSearchTerm('email', 20));
        $comment_indexer->addTerm(new NateGoSearchTerm('bodytext', 1));
        $comment_indexer->setMaximumWordLength(32);
        $comment_indexer->addUnindexedWords(NateGoSearchIndexer::getDefaultUnindexedWords());
        $type = NateGoSearch::getDocumentType($this->db, $type_shortname);
        $sql = sprintf('select BlorgComment.*
			from BlorgComment
				inner join NateGoSearchQueue
					on BlorgComment.id = NateGoSearchQueue.document_id
					and NateGoSearchQueue.document_type = %s
			order by BlorgComment.id', $this->db->quote($type, 'integer'));
        $this->debug(Blorg::_('Indexing comments... ') . '   ');
        $comments = SwatDB::query($this->db, $sql, SwatDBClassMap::get('BlorgCommentWrapper'));
        $total = count($comments);
        $count = 0;
        foreach ($comments as $comment) {
            $ds = new SwatDetailsStore($comment);
            if ($count % 10 == 0) {
                $comment_indexer->commit();
                $this->debug(str_repeat(chr(8), 3));
                $this->debug(sprintf('%2d%%', $count / $total * 100));
            }
            $document = new NateGoSearchDocument($ds, 'id');
            $comment_indexer->index($document);
            $count++;
        }
        $this->debug(str_repeat(chr(8), 3) . Blorg::_('done') . "\n");
        $comment_indexer->commit();
        unset($comment_indexer);
        $sql = sprintf('delete from NateGoSearchQueue where document_type = %s', $this->db->quote($type, 'integer'));
        SwatDB::exec($this->db, $sql);
    }
    /**
     * Queries the NateGoSearch index with a set of keywords
     *
     * Querying does not directly return a set of results. This is due to the
     * way NateGoSearch is designed. The document ids from this search are
     * stored in a results table and accessed through a unique identifier.
     *
     * @param string $keywords the search string to query.
     *
     * @return NateGoSearchResult an object containing result information.
     *
     * @see NateGoSearchResult::getUniqueId()
     */
    public function query($keywords)
    {
        static $unique_counter = 0;
        $id = sha1(uniqid($unique_counter, true));
        $keywords = $this->normalizeKeywordsForSpelling($keywords);
        if ($this->spell_checker === null) {
            $misspellings = array();
        } else {
            $misspellings = $this->spell_checker->getMisspellingsInPhrase($keywords);
        }
        $misspellings = $this->getPopularReplacements($keywords, $misspellings);
        $keywords = $this->normalizeKeywordsForSearching($keywords);
        $keywords_hash = sha1($keywords);
        $results = new NateGoSearchResult($this->db, $id, $keywords, $this->document_types);
        $results->addMisspellings($misspellings);
        $searched_keywords = array();
        $keyword = strtok($keywords, ' ');
        while ($keyword) {
            if (in_array($keyword, $this->blocked_words)) {
                $results->addBlockedWords($keyword);
            } else {
                $searched_keywords[] = NateGoSearchIndexer::stemKeyword($keyword);
                $results->addSearchedWords($keyword);
            }
            $keyword = strtok(' ');
        }
        $keywords = implode(' ', $searched_keywords);
        if (count($this->document_types) > 0) {
            $this->db->loadModule('Function');
            $params = array($this->db->quote($keywords, 'text'), $this->db->quote($keywords_hash, 'text'), $this->quoteArray($this->document_types), $this->db->quote($id, 'text'));
            $types = array('text');
            $rs = $this->db->function->executeStoredProc('nateGoSearch', $params, $types);
            if (MDB2::isError($rs)) {
                throw new NateGoSearchDBException($rs);
            }
            $unique_id = $rs->fetchOne();
            if (MDB2::isError($unique_id)) {
                throw new NateGoSearchDBException($unique_id);
            }
            $unique_counter++;
            $results->setUniqueId($unique_id);
            $sql = sprintf('select count(document_id) from %s
				where unique_id = %s', $results->getResultTable(), $this->db->quote($unique_id, 'text'));
            $document_count = $this->db->queryOne($sql);
            if (MDB2::isError($document_count)) {
                throw new NateGoSearchDBException($document_count);
            }
            $results->setDocumentCount($document_count);
        }
        return $results;
    }