/** * Main shell logic. * * @return void * @author John Anderson */ public function run() { $args = func_get_args(); if (count($args) < 1 || isset($this->stats)) { $this->stats(); } else { if (count($args) > 1) { $query = implode(" ", $args); } else { $query = $args[0]; } $stemmer = new Stemmer(); $queryStems = $stemmer->string($query); $totalDocuments = Page::find('count'); $results = Term::findRankedByTerms($queryStems, $totalDocuments); $this->out(); $this->out("Query: {:purple}{$query}{:end}"); $this->displayResults($results); } }
/** * Provides the stemming and term indexing for a given page. * * @param string $string * @return void * @author John Anderson * @see http://forge.mysql.com/wiki/MySQL_Internals_Algorithms#Full-text_Search */ public function generateStems($string, $page) { $stemmer = new Stemmer(); $stems = $stemmer->html($string); $uniqueStems = array(); foreach ($stems as $stem) { $uniqueStems[$stem][] = '1'; } $dtfs = array(); $sumDtf = 0; foreach ($uniqueStems as $stem => $instances) { $dtf = log(count($instances)) + 1; $sumDtf += $dtf; $dtfs[$stem] = $dtf; } foreach ($uniqueStems as $stem => $instances) { $term = Term::create(); $term->created = date('Y-m-d H:i:s'); $term->modified = date('Y-m-d H:i:s'); $term->term = $stem; $term->page_id = $page->id; $term->base = $dtfs[$stem] / $sumDtf; $term->normalization = count($uniqueStems) / (1 + 0.0115 * count($uniqueStems)); $term->save(); } }