Example #1
0
 /**
  * Парсинг текста
  *
  * @param string $sText     Исходный текст
  * @param array  $aError    Возвращает список возникших ошибок
  *
  * @return string
  */
 public function TextParser($sText, &$aError = null)
 {
     $sResult = $this->oTextParser->parse($sText, $aError);
     return $sResult;
 }
Example #2
0
 /**
  * Создание сниппета
  *
  * @param string $sText
  *
  * @return string
  */
 protected function _makeSnippet($sText)
 {
     $aError = array();
     $sRegexp = $this->_preparePattern();
     // * Если задано, то вырезаем все теги
     if ($this->bSkipAllTags) {
         $sText = strip_tags($sText);
     } else {
         $sText = $this->oTextParser->parse($sText, $aError);
         $sText = str_replace('<br/>', '', $sText);
     }
     //$sText = str_replace(' ', '  ', $sText);
     if (mb_preg_match_all($sRegexp, $sText, $aMatches, PREG_OFFSET_CAPTURE)) {
         // * Создаем набор фрагментов текста
         $sSnippet = '';
         $aFragmentSets = array();
         $nFragmentSetsCount = -1;
         $nCount = 0;
         $aLastSet = array();
         $nLastLen = 0;
         foreach ($aMatches[0] as $aMatch) {
             $sFrTxt = $aMatch[0];
             $nFrPos = $aMatch[1];
             $nFrLen = mb_strlen($sFrTxt);
             // Создаем сеты фрагментов, чтобы близлежащие слова попали в один сет
             if ($nFragmentSetsCount == -1 || $nLastLen == 0) {
                 $aLastSet = array('txt' => $sFrTxt, 'pos' => $nFrPos, 'len' => $nFrLen);
                 $nLastLen = $nFrPos + $nFrLen;
                 $aFragmentSets[++$nFragmentSetsCount][] = $aLastSet;
             } else {
                 if ($nFrPos + $nFrLen - $aLastSet['pos'] < $this->nSnippetLength) {
                     $aFragmentSets[$nFragmentSetsCount][] = array('txt' => $sFrTxt, 'pos' => $nFrPos, 'len' => $nFrLen);
                     $nLastLen = $nFrPos + $nFrLen - $aLastSet['pos'];
                 } else {
                     $aLastSet = array('txt' => $sFrTxt, 'pos' => $nFrPos, 'len' => $nFrLen);
                     $nLastLen = $nFrPos + $nFrLen;
                     $aFragmentSets[++$nFragmentSetsCount][] = $aLastSet;
                 }
             }
         }
         $aFragments = array();
         $nPos = 0;
         foreach ($aFragmentSets as $aSet) {
             $nLen = 0;
             foreach ($aSet as $aWord) {
                 if ($nLen == 0) {
                     $nLen = $aWord['len'];
                     $nPos = $aWord['pos'];
                 } else {
                     $nLen = $aWord['pos'] + $aWord['len'] - $nPos;
                 }
             }
             $aFragments[] = $this->_makeSnippetFragment($sText, $aSet, $nPos, $nLen);
             if ($this->nSnippetMaxFragments > 0 && ++$nCount >= $this->nSnippetMaxFragments) {
                 break;
             }
         }
         foreach ($aFragments as $sFragment) {
             $sSnippet .= $this->sSnippetBeforeFragment . $sFragment . $this->sSnippetAfterFragment;
         }
     } else {
         if (mb_strlen($sText) > $this->nSnippetMaxLength) {
             $sSnippet = mb_substr($sText, 0, $this->nSnippetMaxLength) . '&hellip;';
         } else {
             $sSnippet = $sText;
         }
     }
     return $sSnippet;
 }