/** * Creates a search indexer with the given document type * * @param string $document_type the shortname of the document type to * index by. * @param MDB2_Driver_Common $db the database connection used by this * indexer. * @param boolean $new if true, this is a new search index and all indexed * words for the given document type are removed. If * false, we are appending to an existing index. * Defaults to false. * @param boolean $append if true, keywords keywords for documents that * are indexed are appended to the keywords that * may already exist for the document in the index. * Defaults to false. * * @see NateGoSearch::createDocumentType() * * @throws NateGoSearchDocumentTypeException if the document type shortname * does not exist. */ public function __construct($document_type, MDB2_Driver_Common $db, $new = false, $append = false) { // cache mb_string overloading status if (self::$use_mb_string === null) { self::$use_mb_string = extension_loaded('mbstring') && (ini_get('mbstring.func_overload') & 2) === 2; } $type = NateGoSearch::getDocumentType($db, $document_type); if ($type === null) { throw new NateGoSearchDocumentTypeException("Document type {$document_type} does not exist and cannot be " . "indexed. Document types must be created before being used.", 0, $document_type); } $this->document_type = $type; $this->db = $db; $this->new = $new; $this->append = $append; }
protected function indexPhotos() { $spell_checker = new NateGoSearchPSpellSpellChecker('en_US', '', '', $this->getCustomWordList()); $photo_indexer = new NateGoSearchIndexer('photo', $this->db); $photo_indexer->setSpellChecker($spell_checker); $photo_indexer->addTerm(new NateGoSearchTerm('title', 5)); $photo_indexer->addTerm(new NateGoSearchTerm('tags', 2)); $photo_indexer->addTerm(new NateGoSearchTerm('description')); $photo_indexer->setMaximumWordLength(32); $photo_indexer->addUnindexedWords(NateGoSearchIndexer::getDefaultUnindexedWords()); $type = NateGoSearch::getDocumentType($this->db, 'photo'); $sql = sprintf('select PinholePhoto.title, PinholePhoto.id, PinholePhoto.description, PinholePhoto.image_set from PinholePhoto inner join NateGoSearchQueue on PinholePhoto.id = NateGoSearchQueue.document_id and NateGoSearchQueue.document_type = %s order by PinholePhoto.id', $this->db->quote($type, 'integer')); $this->debug(Pinhole::_('Indexing photos ... ') . ' '); $photos = SwatDB::query($this->db, $sql, SwatDBClassMap::get('PinholePhotoWrapper')); $total = count($photos); $count = 0; $current_photo_id = null; foreach ($photos as $photo) { $ds = new SwatDetailsStore($photo); $ds->title = $photo->getTitle(); $tags = ''; foreach ($photo->tags as $tag) { $tags .= ' ' . $tag->title . ' ' . $tag->name; } $ds->tags = $tags; if ($count % 10 == 0) { $photo_indexer->commit(); $this->debug(str_repeat(chr(8), 3)); $this->debug(sprintf('%2d%%', $count / $total * 100)); } $document = new NateGoSearchDocument($ds, 'id'); $photo_indexer->index($document); $current_photo_id = $photo->id; $count++; $sql = sprintf('delete from NateGoSearchQueue where document_type = %s and document_id = %s', $this->db->quote($type, 'integer'), $this->db->quote($photo->id, 'integer')); SwatDB::exec($this->db, $sql); } if (count($photos) > 0 && isset($this->memcache)) { $this->memcache->flushNs('photos'); } $this->debug(str_repeat(chr(8), 3) . Pinhole::_('done') . "\n"); $photo_indexer->commit(); unset($photo_indexer); }
protected function indexComments() { $type_shortname = 'comment'; $spell_checker = new NateGoSearchPSpellSpellChecker('en_US', '', '', $this->getCustomWordList()); $comment_indexer = new NateGoSearchIndexer($type_shortname, $this->db); $comment_indexer->setSpellChecker($spell_checker); $comment_indexer->addTerm(new NateGoSearchTerm('fullname', 30)); $comment_indexer->addTerm(new NateGoSearchTerm('email', 20)); $comment_indexer->addTerm(new NateGoSearchTerm('bodytext', 1)); $comment_indexer->setMaximumWordLength(32); $comment_indexer->addUnindexedWords(NateGoSearchIndexer::getDefaultUnindexedWords()); $type = NateGoSearch::getDocumentType($this->db, $type_shortname); $sql = sprintf('select BlorgComment.* from BlorgComment inner join NateGoSearchQueue on BlorgComment.id = NateGoSearchQueue.document_id and NateGoSearchQueue.document_type = %s order by BlorgComment.id', $this->db->quote($type, 'integer')); $this->debug(Blorg::_('Indexing comments... ') . ' '); $comments = SwatDB::query($this->db, $sql, SwatDBClassMap::get('BlorgCommentWrapper')); $total = count($comments); $count = 0; foreach ($comments as $comment) { $ds = new SwatDetailsStore($comment); if ($count % 10 == 0) { $comment_indexer->commit(); $this->debug(str_repeat(chr(8), 3)); $this->debug(sprintf('%2d%%', $count / $total * 100)); } $document = new NateGoSearchDocument($ds, 'id'); $comment_indexer->index($document); $count++; } $this->debug(str_repeat(chr(8), 3) . Blorg::_('done') . "\n"); $comment_indexer->commit(); unset($comment_indexer); $sql = sprintf('delete from NateGoSearchQueue where document_type = %s', $this->db->quote($type, 'integer')); SwatDB::exec($this->db, $sql); }
/** * Queries the NateGoSearch index with a set of keywords * * Querying does not directly return a set of results. This is due to the * way NateGoSearch is designed. The document ids from this search are * stored in a results table and accessed through a unique identifier. * * @param string $keywords the search string to query. * * @return NateGoSearchResult an object containing result information. * * @see NateGoSearchResult::getUniqueId() */ public function query($keywords) { static $unique_counter = 0; $id = sha1(uniqid($unique_counter, true)); $keywords = $this->normalizeKeywordsForSpelling($keywords); if ($this->spell_checker === null) { $misspellings = array(); } else { $misspellings = $this->spell_checker->getMisspellingsInPhrase($keywords); } $misspellings = $this->getPopularReplacements($keywords, $misspellings); $keywords = $this->normalizeKeywordsForSearching($keywords); $keywords_hash = sha1($keywords); $results = new NateGoSearchResult($this->db, $id, $keywords, $this->document_types); $results->addMisspellings($misspellings); $searched_keywords = array(); $keyword = strtok($keywords, ' '); while ($keyword) { if (in_array($keyword, $this->blocked_words)) { $results->addBlockedWords($keyword); } else { $searched_keywords[] = NateGoSearchIndexer::stemKeyword($keyword); $results->addSearchedWords($keyword); } $keyword = strtok(' '); } $keywords = implode(' ', $searched_keywords); if (count($this->document_types) > 0) { $this->db->loadModule('Function'); $params = array($this->db->quote($keywords, 'text'), $this->db->quote($keywords_hash, 'text'), $this->quoteArray($this->document_types), $this->db->quote($id, 'text')); $types = array('text'); $rs = $this->db->function->executeStoredProc('nateGoSearch', $params, $types); if (MDB2::isError($rs)) { throw new NateGoSearchDBException($rs); } $unique_id = $rs->fetchOne(); if (MDB2::isError($unique_id)) { throw new NateGoSearchDBException($unique_id); } $unique_counter++; $results->setUniqueId($unique_id); $sql = sprintf('select count(document_id) from %s where unique_id = %s', $results->getResultTable(), $this->db->quote($unique_id, 'text')); $document_count = $this->db->queryOne($sql); if (MDB2::isError($document_count)) { throw new NateGoSearchDBException($document_count); } $results->setDocumentCount($document_count); } return $results; }