/**
  * Converts given $text into a URL slug consisting of URL valid characters.
  * For non-Unicode setups this means character in the range a-z, numbers and _, for Unicode
  * setups it means all characters except space, &, ;, /, :, =, ?, [, ], (, ), -.
  *
  * Invalid characters are converted to -.
  *
  * Example with a non-Unicode setup
  *
  * 'My car' => 'My-car'
  * 'What is this?' => 'What-is-this'
  * 'This & that' => 'This-that'
  * 'myfile.tpl' => 'Myfile-tpl',
  * 'øæå' => 'oeaeaa'
  *
  * @param string $text
  * @param string $defaultText
  * @param string|null $transformation
  *
  * @return string
  */
 public function convert($text, $defaultText = '_1', $transformation = null)
 {
     if (!isset($transformation)) {
         $transformation = $this->configuration['transformation'];
     }
     if (strlen($text) === 0) {
         $text = $defaultText;
     }
     if (isset($this->configuration['transformationGroups'][$transformation]['commands']) && !empty($this->configuration['transformationGroups'][$transformation]['commands'])) {
         $text = $this->transformationProcessor->transform($text, $this->configuration['transformationGroups'][$transformation]['commands']);
     }
     return $this->cleanupText($text, $this->configuration['transformationGroups'][$transformation]['cleanupMethod']);
 }
 /**
  * Converts given $text into a URL slug consisting of URL valid characters.
  * For non-Unicode setups this means character in the range a-z, numbers and _, for Unicode
  * setups it means all characters except space, &, ;, /, :, =, ?, [, ], (, ), -
  *
  * Invalid characters are converted to -.
  *
  * Example with a non-Unicode setup
  *
  * 'My car' => 'My-car'
  * 'What is this?' => 'What-is-this'
  * 'This & that' => 'This-that'
  * 'myfile.tpl' => 'Myfile-tpl',
  * 'øæå' => 'oeaeaa'
  *
  * @param string $text
  * @param string $defaultText
  * @param string|null $transformation
  *
  * @return string
  */
 public function convert($text, $defaultText = "_1", $transformation = null)
 {
     if (!isset($transformation)) {
         $transformation = $this->configuration["transformation"];
     }
     if (strlen($text) === 0) {
         $text = $defaultText;
     }
     if (isset($this->configuration["transformationGroups"][$transformation]["commands"]) && !empty($this->configuration["transformationGroups"][$transformation]["commands"])) {
         $text = $this->transformationProcessor->transform($text, $this->configuration["transformationGroups"][$transformation]["commands"]);
     }
     return $this->cleanupText($text, $this->configuration["transformationGroups"][$transformation]["cleanupMethod"]);
 }
    /**
     * Get subquery to select relevant word IDs
     *
     * @uses getStopWordThresholdValue() To get threshold for words we would like to ignore in query.
     *
     * @param \eZ\Publish\Core\Persistence\Database\SelectQuery $query
     * @param string $string
     *
     * @return \eZ\Publish\Core\Persistence\Database\SelectQuery
     */
    protected function getWordIdSubquery( SelectQuery $query, $string )
    {
        $subQuery = $query->subSelect();
        $tokens = $this->tokenizeString(
            $this->processor->transform( $string, $this->configuration['commands'] )
        );
        $wordExpressions = array();
        foreach ( $tokens as $token )
        {
            $wordExpressions[] = $this->getWordExpression( $subQuery, $token );
        }

        $whereCondition = $subQuery->expr->lOr( $wordExpressions );

        // If stop word threshold is below 100%, make it part of $whereCondition
        if ( $this->configuration['stopWordThresholdFactor'] < 1 )
        {
            $whereCondition = $subQuery->expr->lAnd(
                $whereCondition,
                $subQuery->expr->lt(
                    $this->dbHandler->quoteColumn( 'object_count' ),
                    $subQuery->bindValue( $this->getStopWordThresholdValue() )
                )
            );
        }

        $subQuery
            ->select( $this->dbHandler->quoteColumn( 'id' ) )
            ->from( $this->dbHandler->quoteTable( 'ezsearch_word' ) )
            ->where( $whereCondition );
        return $subQuery;
    }
Exemple #4
0
 /**
  * Get subquery to select relevant word IDs
  *
  * @param \eZ\Publish\Core\Persistence\Database\SelectQuery $query
  * @param string $string
  *
  * @return \eZ\Publish\Core\Persistence\Database\SelectQuery
  */
 protected function getWordIdSubquery(SelectQuery $query, $string)
 {
     $subQuery = $query->subSelect();
     $tokens = $this->tokenizeString($this->processor->transform($string, $this->configuration['commands']));
     $wordExpressions = array();
     foreach ($tokens as $token) {
         $wordExpressions[] = $this->getWordExpression($subQuery, $token);
     }
     $subQuery->select($this->dbHandler->quoteColumn('id'))->from($this->dbHandler->quoteTable('ezsearch_word'))->where($subQuery->expr->lAnd($subQuery->expr->lOr($wordExpressions), $subQuery->expr->lt($this->dbHandler->quoteColumn('object_count'), $subQuery->bindValue($this->configuration['searchThresholdValue']))));
     return $subQuery;
 }
 /**
  * Build WordIDArray and update ezsearch_word table.
  *
  * Ported from the legacy code
  *
  * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L155
  *
  * @param array $indexArrayOnlyWords words for object to add
  *
  * @return array wordIDArray
  */
 private function buildWordIDArray(array $indexArrayOnlyWords)
 {
     $wordCount = count($indexArrayOnlyWords);
     $wordIDArray = [];
     $wordArray = [];
     // store the words in the index and remember the ID
     $this->dbHandler->beginTransaction();
     for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 500) {
         // Fetch already indexed words from database
         $wordArrayChuck = array_slice($indexArrayOnlyWords, $arrayCount, 500);
         $wordRes = $this->searchIndex->getWords($wordArrayChuck);
         // Build a has of the existing words
         $wordResCount = count($wordRes);
         $existingWordArray = [];
         for ($i = 0; $i < $wordResCount; ++$i) {
             $wordIDArray[] = $wordRes[$i]['id'];
             $existingWordArray[] = $wordRes[$i]['word'];
             $wordArray[$wordRes[$i]['word']] = $wordRes[$i]['id'];
         }
         // Update the object count of existing words by one
         if (count($wordIDArray) > 0) {
             $this->searchIndex->incrementWordObjectCount($wordIDArray);
         }
         // Insert if there is any news words
         $newWordArray = array_diff($wordArrayChuck, $existingWordArray);
         if (count($newWordArray) > 0) {
             $this->searchIndex->addWords($newWordArray);
             $newWordRes = $this->searchIndex->getWords($newWordArray);
             $newWordCount = count($newWordRes);
             for ($i = 0; $i < $newWordCount; ++$i) {
                 $wordLowercase = $this->transformationProcessor->transformByGroup($newWordRes[$i]['word'], 'lowercase');
                 $wordArray[$wordLowercase] = $newWordRes[$i]['id'];
             }
         }
     }
     $this->dbHandler->commit();
     return $wordArray;
 }
 /**
  * Downcases a given string using string transformation processor.
  *
  * @param string $string
  *
  * @return string
  */
 protected function lowerCase($string)
 {
     return $this->transformationProcessor->transformByGroup($string, "lowercase");
 }
 /**
  * Construct instance of TransformationProcessor\DefinitionBased.
  *
  * Through the $ruleFiles array, a list of files with full text
  * transformation rules is given. These files are parsed by
  * {@link \eZ\Publish\Core\Persistence\TransformationProcessor\DefinitionBased\Parser}
  * and then used for normalization in the full text search.
  *
  * @param \eZ\Publish\Core\Persistence\TransformationProcessor\DefinitionBased\Parser $parser
  * @param \eZ\Publish\Core\Persistence\TransformationProcessor\PcreCompiler $compiler
  * @param array $ruleFiles
  *
  * @return \eZ\Publish\Core\Persistence\TransformationProcessor\DefinitionBased
  */
 public function __construct(Parser $parser, PcreCompiler $compiler, array $ruleFiles = array())
 {
     parent::__construct($compiler, $ruleFiles);
     $this->parser = $parser;
 }
 /**
  * Constructor.
  *
  * @param \eZ\Publish\Core\Persistence\TransformationProcessor\PcreCompiler $compiler
  * @param string $installDir Base dir for rule loading
  * @param array $ruleFiles
  */
 public function __construct(PcreCompiler $compiler, array $ruleFiles = array())
 {
     parent::__construct($compiler, $ruleFiles);
 }