/**
  * Apply English normalizing logic to string.
  *
  * @param array $words
  * @return array
  */
 public static function stemWord($string)
 {
     if (extension_loaded('stem')) {
         return stem_english($string);
     }
     return $string;
 }
Example #2
0
 static function _stemWord($word)
 {
     $oldlocale = \setlocale(LC_ALL, 0);
     \setlocale(LC_ALL, 'C');
     $word = \stem_russian($word);
     $word = \stem_english($word);
     \setlocale(LC_ALL, $oldlocale);
     return $word;
 }
Example #3
0
 public static function Stem($words)
 {
     $result = array();
     $ex = array();
     foreach ($words as $key => $word) {
         if (isset(self::$cache[$word])) {
             $tmp = self::$cache[$word];
         } else {
             if (substr($key, 0, 2) == 'ru') {
                 $tmp = stem_russian_unicode($word);
             } else {
                 $tmp = stem_english($word);
             }
             self::$cache[$word] = $tmp;
         }
         if (!isset($ex[$tmp])) {
             $result[] = $tmp;
             $ex[$tmp] = 1;
         }
     }
     return $result;
 }
 public function sortOutHlCoords()
 {
     //Lucene operators
     $operators = array("and", "or", "not");
     $config = $this->getServiceLocator()->get('config');
     $paramInfo = $this->sortOutParams($config);
     //collect building blocks
     $resLoc = $paramInfo['resLoc'];
     $site = $paramInfo['site'];
     $collection = $paramInfo['collection'];
     $container = $paramInfo['container'];
     $reel = $paramInfo['reel'];
     $page = $paramInfo['page'];
     //the all important query
     $hl = $this->params()->fromRoute('hl', '');
     //coordinates to pass back
     $coords = [];
     //pass back empty coordinate set if any of these parameters
     //are missing
     if ($this->isNullOrEmpty($reel) || $this->isNullOrEmpty($page) || $this->isNullOrEmpty($hl)) {
         return array("imgloc" => '', "indloc" => '', "coords" => $coords);
     }
     //if
     //location of files - ODW file layout
     $resLoc .= '/' . $site . '/' . $collection . '/' . $container . '/' . $reel . '/odw/' . $page . '/';
     $imgLoc = $resLoc . '../../' . $page . '.jpg';
     $iaLoc = $resLoc . 'ia/' . $page . '.jpg';
     //not all images will have IA derivative
     if (file_exists($iaLoc) !== false) {
         $imgLoc = $iaLoc;
     }
     $indLoc = $resLoc . 'index/imgworks';
     //need index directory and segments file to be valid lucene layout
     if (!file_exists($indLoc . '/segments.gen')) {
         return array("imgloc" => $imgLoc, "indloc" => $indLoc, "coords" => $coords);
     }
     //get coordinates from Lucene index
     $searchText = '';
     //use Lucene tokens for searching
     $queryTokens = Analyzer\Analyzer::getDefault()->tokenize($hl);
     foreach ($queryTokens as $token) {
         $searchTerm = $token->getTermText();
         if (!in_array($searchTerm, $operators)) {
             //no snowball analyzer or other stemming option
             //in Lucene 2.x, so create stem seperately
             $searchText .= stem_english($searchTerm);
             //Lucene dropped this limitation after 2.x
             //but this version won't wildcard without
             //at least 3 characters in term
             if (strlen($searchTerm) >= 3) {
                 $searchText .= "* ";
             }
             //if strlen
         }
         //if
     }
     //foreach
     //now do search
     $index = Lucene\Lucene::open($indLoc);
     $searchResults = $index->find($searchText);
     //assemble results
     foreach ($searchResults as $searchResult) {
         array_push($coords, [$searchResult->x1, $searchResult->y1, $searchResult->x2, $searchResult->y2]);
     }
     //foreach
     //pass back image and index location in addition to results
     return array("imgloc" => $imgLoc, "indloc" => $indLoc, "coords" => $coords);
 }