예제 #1
0
파일: PlainType.php 프로젝트: enhavo/enhavo
 function index($value, $options, $properties = null)
 {
     //create a new IndexItem
     $indexItem = new IndexItem();
     //if rawData is set in properties or properties is empty -> set rawData in the IndexItem
     if (empty($properties) || in_array('rawData', $properties)) {
         $indexItem->setRawData($value);
     }
     //get weight if it is set in options; otherwise just take 1 as weight
     $weight = 1;
     if (isset($options['weight'])) {
         $weight = $options['weight'];
     }
     //if weight is set in properties or properties is empty -> set weight in the IndexItem
     if (empty($properties) || in_array('weight', $properties)) {
         $indexItem->setWeight($weight);
     }
     //if data or scoredWords is set in properties or properties is empty -> set data and scoredWords in the IndexItem
     if (empty($properties) || in_array('data', $properties) || in_array('scoredWords', $properties)) {
         $accum = ' ';
         $minimumWordSize = $this->getMinimumWordSize();
         //indexing plain text and save in DB
         //get seperated words
         $words = $this->searchIndexSplit($value);
         $scoredWords = array();
         //set focus to 1 at the beginning
         $focus = 1;
         //get the right score for every word
         foreach ($words as $word) {
             // Add word to accumulator
             $accum .= $word . " ";
             list($scoredWords, $focus) = $this->scoreWord($word, $weight, $minimumWordSize, $scoredWords, $focus);
         }
         //prepare the scoredWordsArray for IndexItem
         $indexItemArray = [];
         $counter = 0;
         foreach ($scoredWords as $word => $score) {
             $indexItemArray[$counter]['word'] = $word;
             $indexItemArray[$counter]['locale'] = $this->container->getParameter('locale');
             $indexItemArray[$counter]['type'] = $options['type'];
             $indexItemArray[$counter]['score'] = $score;
             $counter++;
         }
         //set data and scored words
         $indexItem->setData(rtrim($accum));
         $indexItem->setScoredWords($indexItemArray);
     }
     //return the indexItem
     return array($indexItem);
 }
예제 #2
0
파일: HtmlType.php 프로젝트: enhavo/enhavo
 function index($val, $options, $properties = null)
 {
     //create a new IndexItem
     $indexItem = new IndexItem();
     //if rawData is set in properties or properties is empty -> set rawData in the IndexItem
     if (empty($properties) || in_array('rawData', $properties)) {
         $indexItem->setRawData($val);
     }
     //if data or scoredWords is set in properties or properties is empty -> set data and scoredWords in the IndexItem
     if (empty($properties) || in_array('data', $properties) || in_array('scoredWords', $properties)) {
         $accum = ' ';
         //indexing html text and save in DB
         //get weights of words
         $tagYaml = $this->util->getMainPath() . '/Enhavo/Bundle/SearchBundle/Resources/config/tag_weights.yml';
         $yaml = new Parser();
         $tags = $yaml->parse(file_get_contents($tagYaml));
         if (isset($options['weights'])) {
             foreach ($options['weights'] as $key => $value) {
                 if (array_key_exists($key, $tags)) {
                     $tags[$key] = $value;
                 } else {
                     $tags[$key] = $value;
                 }
             }
         }
         // Strip off all ignored tags, insert space before and after them to keep word boundaries.
         $val = str_replace(array('<', '>'), array(' <', '> '), $val);
         $val = strip_tags($val, '<' . implode('><', array_keys($tags)) . '>');
         // Split HTML tags from plain text.
         $split = preg_split('/\\s*<([^>]+?)>\\s*/', $val, -1, PREG_SPLIT_DELIM_CAPTURE);
         $tag = FALSE;
         // Odd/even counter. Tag or no tag.
         $score = 1;
         // Starting score per word
         $tagstack = array();
         // Stack with open tags
         $tagwords = 0;
         // Counter for consecutive words
         $focus = 1;
         // Focus state
         $scoredWords = array();
         $minimumWordSize = $this->getMinimumWordSize($options);
         //go trough the array of text and tags
         foreach ($split as $value) {
             //if tag is true we are handling the tags in the array, if tag is false we are handling text between the tags
             if ($tag) {
                 // Increase or decrease score per word based on tag
                 list($tagname) = explode(' ', $value, 2);
                 $tagname = strtolower($tagname);
                 // Closing or opening tag?
                 if ($tagname[0] == '/') {
                     $tagname = substr($tagname, 1);
                     // If we encounter unexpected tags, reset score to avoid incorrect boosting.
                     if (!count($tagstack) || $tagstack[0] != $tagname) {
                         $tagstack = array();
                         $score = 1;
                     } else {
                         // Remove from tag stack and decrement score
                         $score = max(1, $score - $tags[array_shift($tagstack)]);
                     }
                 } else {
                     if (isset($tagstack[0]) && $tagstack[0] == $tagname) {
                         // None of the tags we look for make sense when nested identically.
                         // If they are, it's probably broken HTML.
                         $tagstack = array();
                         $score = 1;
                     } else {
                         // Add to open tag stack and increment score
                         array_unshift($tagstack, $tagname);
                         $score += $tags[$tagname];
                     }
                 }
                 // A tag change occurred, reset counter.
                 $tagwords = 0;
             } else {
                 // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
                 if ($value != '') {
                     $words = $this->searchIndexSplit($value);
                     foreach ($words as $word) {
                         // Add word to accumulator
                         $accum .= $word . " ";
                         // Check wordlength
                         list($scoredWords, $focus) = $this->scoreWord($word, $score, $minimumWordSize, $scoredWords, $focus);
                         $tagwords++;
                         // Too many words inside a single tag probably mean a tag was accidentally left open.
                         if (count($tagstack) && $tagwords >= 15) {
                             $tagstack = array();
                             $score = 1;
                         }
                     }
                 }
             }
             $tag = !$tag;
         }
         //prepare the scoredWordsArray for IndexItem
         $indexItemArray = [];
         $counter = 0;
         foreach ($scoredWords as $word => $score) {
             $indexItemArray[$counter]['word'] = $word;
             $indexItemArray[$counter]['locale'] = $this->container->getParameter('locale');
             $indexItemArray[$counter]['type'] = $options['type'];
             $indexItemArray[$counter]['score'] = $score;
             $counter++;
         }
         //set data and scored words
         $indexItem->setData(rtrim($accum));
         $indexItem->setScoredWords($indexItemArray);
     }
     //return the indexItem
     return array($indexItem);
 }