Esempio n. 1
0
 protected function checkStemmer(Stemmer $stemmer, \Iterator $words, \Iterator $stems)
 {
     foreach ($words as $word) {
         $stem = $stems->current();
         $this->assertEquals($stemmer->stem($word), $stem, "The stem for '{$word}' should be '{$stem}' not '{$stemmer->stem($word)}'");
         $stems->next();
     }
 }
Esempio n. 2
0
function PricerrTheme_autosuggest_it()
{
    include 'classes/stem.php';
    include 'classes/cleaner.php';
    global $wpdb;
    $string = $_POST['queryString'];
    $stemmer = new Stemmer();
    $stemmed_string = $stemmer->stem($string);
    $clean_string = new jSearchString();
    $stemmed_string = $clean_string->parseString($stemmed_string);
    $new_string = '';
    foreach (array_unique(split(" ", $stemmed_string)) as $array => $value) {
        if (strlen($value) >= 1) {
            $new_string .= '' . $value . ' ';
        }
    }
    //$new_string = substr ( $new_string,0, ( strLen ( $new_string ) -1 ) );
    $new_string = htmlspecialchars($_POST['queryString']);
    if (strlen($new_string) > 0) {
        $split_stemmed = split(" ", $new_string);
        $sql = "SELECT DISTINCT COUNT(*) as occurences, " . $wpdb->prefix . "posts.post_title, " . $wpdb->prefix . "posts.ID FROM " . $wpdb->prefix . "posts,\r\n\t\t\t" . $wpdb->prefix . "postmeta WHERE " . $wpdb->prefix . "posts.post_status='publish' and \r\n\t\t\t" . $wpdb->prefix . "posts.post_type='job' \r\n\t\t\t\r\n\t\t\t\t\tAND " . $wpdb->prefix . "posts.ID = " . $wpdb->prefix . "postmeta.post_id \r\n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_key = 'closed' \r\n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_value = '0' \r\n\t\t\t\r\n\t\t\tAND (";
        while (list($key, $val) = each($split_stemmed)) {
            if ($val != '' && strlen($val) > 0) {
                $sql .= "(" . $wpdb->prefix . "posts.post_title LIKE '%" . $val . "%' OR " . $wpdb->prefix . "posts.post_content LIKE '%" . $val . "%') OR";
            }
        }
        $sql = substr($sql, 0, strlen($sql) - 3);
        //this will eat the last OR
        $sql .= ") GROUP BY " . $wpdb->prefix . "posts.post_title ORDER BY occurences DESC LIMIT 10";
        /*
        			SELECT DISTINCT COUNT(*) as occurences, wp_posts.post_title FROM wp_posts, wp_postmeta WHERE wp_posts.post_status='publish' and wp_posts.post_type='job' AND wp_posts.ID = wp_postmeta.post_id AND wp_postmeta.meta_key = 'closed' AND wp_postmeta.meta_value = '0' AND ((wp_posts.post_title LIKE '%test%' OR wp_posts.post_content LIKE '%test%')) GROUP BY wp_posts.post_title ORDER BY occurences DESC LIMIT 10 */
        $r = $wpdb->get_results($sql, ARRAY_A);
        if (count($r) > 0) {
            foreach ($r as $row) {
                echo '<ul id="sk_auto_suggest">';
                $prm = get_permalink($row['ID']);
                echo '<li onClick="window.location=\'' . $prm . '\';">' . PricerrTheme_wrap_the_title($row['post_title'], $row['ID']) . '</li>';
                echo '</ul>';
            }
        } else {
            echo '<ul>';
            echo '<li onClick="fill(\'' . $new_string . '\');">' . __('No results found', 'PricerrTheme') . '</li>';
            echo '</ul>';
        }
    }
}
Esempio n. 3
0
 public static function stems($text)
 {
     // split sentence into words
     $words = preg_split('/[^a-zA-Z\'"-]+/', $text, -1, PREG_SPLIT_NO_EMPTY);
     // stemmer plugin
     require_once BASEPATH . '/application/plugins/class.stemmer.inc';
     $stemmer = new Stemmer();
     $result = '';
     foreach ($words as $word) {
         // if is at least three characters and not in the list of stopwords...
         if (strlen($word) > 2 && !in_array($word, self::$stopwords)) {
             // stem & attach to result
             $result .= $stemmer->stem(strtolower($word)) . ' ';
         }
     }
     // trailing space
     $result = substr($result, 0, -1);
     return $result;
 }
<html>
<head>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
	<title>porter stemmer</title> 
</head>
<body>
	<form action="porter_stemmer_test.php" method="get">
		<input name="word" size="100">
		<button type="submit">GO</button>
	</form>
</body>
</html>
 
<?php 
include 'class.stemmer.inc.php';
include 'porter_stemmer.php';
$word = isset($_GET['word']) ? $_GET['word'] : '';
$stemmer = new Stemmer();
echo "class.stemmer.inc.php:  " . $stemmer->stem($word);
echo "<br>";
echo "porter_stemmer.php:   " . PorterStemmer::Stem($word);
Esempio n. 5
0
<?php

global $wpdb;
include 'classes/stem.php';
include 'classes/cleaner.php';
$string = $_POST['queryString'];
$stemmer = new Stemmer();
$stemmed_string = $stemmer->stem($string);
$clean_string = new jSearchString();
$stemmed_string = $clean_string->parseString($stemmed_string);
$new_string = '';
foreach (array_unique(split(" ", $stemmed_string)) as $array => $value) {
    if (strlen($value) >= 1) {
        $new_string .= '' . $value . ' ';
    }
}
$new_string = substr($new_string, 0, strLen($new_string) - 1);
$new_string = htmlspecialchars($_POST['queryString']);
if (strlen($new_string) > 0) {
    $split_stemmed = split(" ", $new_string);
    $sql = "SELECT DISTINCT COUNT(*) as occurences, " . $wpdb->prefix . "posts.post_title FROM " . $wpdb->prefix . "posts,\n\t\t\t" . $wpdb->prefix . "postmeta WHERE " . $wpdb->prefix . "posts.post_status='publish' and \n\t\t\t" . $wpdb->prefix . "posts.post_type='project' \n\t\t\t\n\t\t\t\t\tAND " . $wpdb->prefix . "posts.ID = " . $wpdb->prefix . "postmeta.post_id \n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_key = 'closed' \n\t\t\t\t\tAND " . $wpdb->prefix . "postmeta.meta_value = '0' \n\t\t\t\n\t\t\tAND (";
    while (list($key, $val) = each($split_stemmed)) {
        if ($val != '' && strlen($val) > 0) {
            $sql .= "(" . $wpdb->prefix . "posts.post_title LIKE '%" . $val . "%' OR " . $wpdb->prefix . "posts.post_content LIKE '%" . $val . "%') OR";
        }
    }
    $sql = substr($sql, 0, strLen($sql) - 3);
    //this will eat the last OR
    $sql .= ") GROUP BY " . $wpdb->prefix . "posts.post_title ORDER BY occurences DESC LIMIT 10";
    $query = mysql_query($sql) or die(mysql_error());
    //$row_sql = mysql_fetch_assoc ( $query );
Esempio n. 6
0
 public function testAgainstDictionary()
 {
     return;
     $data = file("tests/data.txt", FILE_IGNORE_NEW_LINES);
     for ($i = 0; $i < count($data); $i++) {
         $line = preg_split('#\\s+#', $data[$i]);
         $this->assertEquals($line[1], Stemmer::stem($line[0]));
     }
 }
 public function termDocumentCorrelation($_term, $_document)
 {
     if ($this->l_termCorrelation !== null && $this->l_documentCorrelation !== null && isset($this->l_termIndex[$term = Stemmer::stem($_term)]) && isset($this->l_documentIndex[$document = (string) $_document])) {
         return $this->l_termDocumentCorrelation[$this->l_termIndex[$term]][$this->l_documentIndex[$document]];
     }
     return null;
 }
 public function stem_string()
 {
     $simple_array = array();
     $stemming;
     //making an instance of the class Stemmer which is an open source script for stemming
     $stemming = new Stemmer();
     $simple_array_stemmed = array();
     preg_match_all('@\\w+\\b@', $this->input_user_query, $output);
     /*var_dump($output);*/
     foreach ($output as $key) {
         $simple_array = $key;
     }
     foreach ($simple_array as $key) {
         //we dont want to stem NOT OR or AND and turn them into lowercase Boolean search would not work then
         if ($key != 'NOT' && $key != 'OR' && $key != 'AND') {
             $just_stemmed = $stemming->stem($key);
         } else {
             $just_stemmed = $key;
         }
         /*echo $just_stemmed;*/
         $simple_array_stemmed[] = $just_stemmed;
         /*echo $key;*/
     }
     /*var_dump($simple_array_stemmed);*/
     //put the modefied sting into $this->input_user_query
     $this->input_user_query = implode($simple_array_stemmed, ' ');
     /*echo ('I am stemmed');*/
     /*var_dump($this->input_user_query);*/
 }