protected function checkStemmer(Stemmer $stemmer, \Iterator $words, \Iterator $stems) { foreach ($words as $word) { $stem = $stems->current(); $this->assertEquals($stemmer->stem($word), $stem, "The stem for '{$word}' should be '{$stem}' not '{$stemmer->stem($word)}'"); $stems->next(); } }
function PricerrTheme_autosuggest_it() { include 'classes/stem.php'; include 'classes/cleaner.php'; global $wpdb; $string = $_POST['queryString']; $stemmer = new Stemmer(); $stemmed_string = $stemmer->stem($string); $clean_string = new jSearchString(); $stemmed_string = $clean_string->parseString($stemmed_string); $new_string = ''; foreach (array_unique(split(" ", $stemmed_string)) as $array => $value) { if (strlen($value) >= 1) { $new_string .= '' . $value . ' '; } } //$new_string = substr ( $new_string,0, ( strLen ( $new_string ) -1 ) ); $new_string = htmlspecialchars($_POST['queryString']); if (strlen($new_string) > 0) { $split_stemmed = split(" ", $new_string); $sql = "SELECT DISTINCT COUNT(*) as occurences, " . $wpdb->prefix . "posts.post_title, " . $wpdb->prefix . "posts.ID FROM " . $wpdb->prefix . "posts,\r\n\t\t\t" . $wpdb->prefix . "postmeta WHERE " . $wpdb->prefix . "posts.post_status='publish' and \r\n\t\t\t" . $wpdb->prefix . "posts.post_type='job' \r\n\t\t\t\r\n\t\t\t\t\tAND " . $wpdb->prefix . "posts.ID = " . $wpdb->prefix . "postmeta.post_id \r\n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_key = 'closed' \r\n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_value = '0' \r\n\t\t\t\r\n\t\t\tAND ("; while (list($key, $val) = each($split_stemmed)) { if ($val != '' && strlen($val) > 0) { $sql .= "(" . $wpdb->prefix . "posts.post_title LIKE '%" . $val . "%' OR " . $wpdb->prefix . "posts.post_content LIKE '%" . $val . "%') OR"; } } $sql = substr($sql, 0, strlen($sql) - 3); //this will eat the last OR $sql .= ") GROUP BY " . $wpdb->prefix . "posts.post_title ORDER BY occurences DESC LIMIT 10"; /* SELECT DISTINCT COUNT(*) as occurences, wp_posts.post_title FROM wp_posts, wp_postmeta WHERE wp_posts.post_status='publish' and wp_posts.post_type='job' AND wp_posts.ID = wp_postmeta.post_id AND wp_postmeta.meta_key = 'closed' AND wp_postmeta.meta_value = '0' AND ((wp_posts.post_title LIKE '%test%' OR wp_posts.post_content LIKE '%test%')) GROUP BY wp_posts.post_title ORDER BY occurences DESC LIMIT 10 */ $r = $wpdb->get_results($sql, ARRAY_A); if (count($r) > 0) { foreach ($r as $row) { echo '<ul id="sk_auto_suggest">'; $prm = get_permalink($row['ID']); echo '<li onClick="window.location=\'' . $prm . '\';">' . PricerrTheme_wrap_the_title($row['post_title'], $row['ID']) . '</li>'; echo '</ul>'; } } else { echo '<ul>'; echo '<li onClick="fill(\'' . $new_string . '\');">' . __('No results found', 'PricerrTheme') . '</li>'; echo '</ul>'; } } }
public static function stems($text) { // split sentence into words $words = preg_split('/[^a-zA-Z\'"-]+/', $text, -1, PREG_SPLIT_NO_EMPTY); // stemmer plugin require_once BASEPATH . '/application/plugins/class.stemmer.inc'; $stemmer = new Stemmer(); $result = ''; foreach ($words as $word) { // if is at least three characters and not in the list of stopwords... if (strlen($word) > 2 && !in_array($word, self::$stopwords)) { // stem & attach to result $result .= $stemmer->stem(strtolower($word)) . ' '; } } // trailing space $result = substr($result, 0, -1); return $result; }
<html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>porter stemmer</title> </head> <body> <form action="porter_stemmer_test.php" method="get"> <input name="word" size="100"> <button type="submit">GO</button> </form> </body> </html> <?php include 'class.stemmer.inc.php'; include 'porter_stemmer.php'; $word = isset($_GET['word']) ? $_GET['word'] : ''; $stemmer = new Stemmer(); echo "class.stemmer.inc.php: " . $stemmer->stem($word); echo "<br>"; echo "porter_stemmer.php: " . PorterStemmer::Stem($word);
<?php global $wpdb; include 'classes/stem.php'; include 'classes/cleaner.php'; $string = $_POST['queryString']; $stemmer = new Stemmer(); $stemmed_string = $stemmer->stem($string); $clean_string = new jSearchString(); $stemmed_string = $clean_string->parseString($stemmed_string); $new_string = ''; foreach (array_unique(split(" ", $stemmed_string)) as $array => $value) { if (strlen($value) >= 1) { $new_string .= '' . $value . ' '; } } $new_string = substr($new_string, 0, strLen($new_string) - 1); $new_string = htmlspecialchars($_POST['queryString']); if (strlen($new_string) > 0) { $split_stemmed = split(" ", $new_string); $sql = "SELECT DISTINCT COUNT(*) as occurences, " . $wpdb->prefix . "posts.post_title FROM " . $wpdb->prefix . "posts,\n\t\t\t" . $wpdb->prefix . "postmeta WHERE " . $wpdb->prefix . "posts.post_status='publish' and \n\t\t\t" . $wpdb->prefix . "posts.post_type='project' \n\t\t\t\n\t\t\t\t\tAND " . $wpdb->prefix . "posts.ID = " . $wpdb->prefix . "postmeta.post_id \n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_key = 'closed' \n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_value = '0' \n\t\t\t\n\t\t\tAND ("; while (list($key, $val) = each($split_stemmed)) { if ($val != '' && strlen($val) > 0) { $sql .= "(" . $wpdb->prefix . "posts.post_title LIKE '%" . $val . "%' OR " . $wpdb->prefix . "posts.post_content LIKE '%" . $val . "%') OR"; } } $sql = substr($sql, 0, strLen($sql) - 3); //this will eat the last OR $sql .= ") GROUP BY " . $wpdb->prefix . "posts.post_title ORDER BY occurences DESC LIMIT 10"; $query = mysql_query($sql) or die(mysql_error()); //$row_sql = mysql_fetch_assoc ( $query );
public function testAgainstDictionary() { return; $data = file("tests/data.txt", FILE_IGNORE_NEW_LINES); for ($i = 0; $i < count($data); $i++) { $line = preg_split('#\\s+#', $data[$i]); $this->assertEquals($line[1], Stemmer::stem($line[0])); } }
public function termDocumentCorrelation($_term, $_document) { if ($this->l_termCorrelation !== null && $this->l_documentCorrelation !== null && isset($this->l_termIndex[$term = Stemmer::stem($_term)]) && isset($this->l_documentIndex[$document = (string) $_document])) { return $this->l_termDocumentCorrelation[$this->l_termIndex[$term]][$this->l_documentIndex[$document]]; } return null; }
public function stem_string() { $simple_array = array(); $stemming; //making an instance of the class Stemmer which is an open source script for stemming $stemming = new Stemmer(); $simple_array_stemmed = array(); preg_match_all('@\\w+\\b@', $this->input_user_query, $output); /*var_dump($output);*/ foreach ($output as $key) { $simple_array = $key; } foreach ($simple_array as $key) { //we dont want to stem NOT OR or AND and turn them into lowercase Boolean search would not work then if ($key != 'NOT' && $key != 'OR' && $key != 'AND') { $just_stemmed = $stemming->stem($key); } else { $just_stemmed = $key; } /*echo $just_stemmed;*/ $simple_array_stemmed[] = $just_stemmed; /*echo $key;*/ } /*var_dump($simple_array_stemmed);*/ //put the modefied sting into $this->input_user_query $this->input_user_query = implode($simple_array_stemmed, ' '); /*echo ('I am stemmed');*/ /*var_dump($this->input_user_query);*/ }