/** * Converts given $text into a URL slug consisting of URL valid characters. * For non-Unicode setups this means character in the range a-z, numbers and _, for Unicode * setups it means all characters except space, &, ;, /, :, =, ?, [, ], (, ), -. * * Invalid characters are converted to -. * * Example with a non-Unicode setup * * 'My car' => 'My-car' * 'What is this?' => 'What-is-this' * 'This & that' => 'This-that' * 'myfile.tpl' => 'Myfile-tpl', * 'øæå' => 'oeaeaa' * * @param string $text * @param string $defaultText * @param string|null $transformation * * @return string */ public function convert($text, $defaultText = '_1', $transformation = null) { if (!isset($transformation)) { $transformation = $this->configuration['transformation']; } if (strlen($text) === 0) { $text = $defaultText; } if (isset($this->configuration['transformationGroups'][$transformation]['commands']) && !empty($this->configuration['transformationGroups'][$transformation]['commands'])) { $text = $this->transformationProcessor->transform($text, $this->configuration['transformationGroups'][$transformation]['commands']); } return $this->cleanupText($text, $this->configuration['transformationGroups'][$transformation]['cleanupMethod']); }
/** * Converts given $text into a URL slug consisting of URL valid characters. * For non-Unicode setups this means character in the range a-z, numbers and _, for Unicode * setups it means all characters except space, &, ;, /, :, =, ?, [, ], (, ), - * * Invalid characters are converted to -. * * Example with a non-Unicode setup * * 'My car' => 'My-car' * 'What is this?' => 'What-is-this' * 'This & that' => 'This-that' * 'myfile.tpl' => 'Myfile-tpl', * 'øæå' => 'oeaeaa' * * @param string $text * @param string $defaultText * @param string|null $transformation * * @return string */ public function convert($text, $defaultText = "_1", $transformation = null) { if (!isset($transformation)) { $transformation = $this->configuration["transformation"]; } if (strlen($text) === 0) { $text = $defaultText; } if (isset($this->configuration["transformationGroups"][$transformation]["commands"]) && !empty($this->configuration["transformationGroups"][$transformation]["commands"])) { $text = $this->transformationProcessor->transform($text, $this->configuration["transformationGroups"][$transformation]["commands"]); } return $this->cleanupText($text, $this->configuration["transformationGroups"][$transformation]["cleanupMethod"]); }
/** * Get subquery to select relevant word IDs * * @uses getStopWordThresholdValue() To get threshold for words we would like to ignore in query. * * @param \eZ\Publish\Core\Persistence\Database\SelectQuery $query * @param string $string * * @return \eZ\Publish\Core\Persistence\Database\SelectQuery */ protected function getWordIdSubquery( SelectQuery $query, $string ) { $subQuery = $query->subSelect(); $tokens = $this->tokenizeString( $this->processor->transform( $string, $this->configuration['commands'] ) ); $wordExpressions = array(); foreach ( $tokens as $token ) { $wordExpressions[] = $this->getWordExpression( $subQuery, $token ); } $whereCondition = $subQuery->expr->lOr( $wordExpressions ); // If stop word threshold is below 100%, make it part of $whereCondition if ( $this->configuration['stopWordThresholdFactor'] < 1 ) { $whereCondition = $subQuery->expr->lAnd( $whereCondition, $subQuery->expr->lt( $this->dbHandler->quoteColumn( 'object_count' ), $subQuery->bindValue( $this->getStopWordThresholdValue() ) ) ); } $subQuery ->select( $this->dbHandler->quoteColumn( 'id' ) ) ->from( $this->dbHandler->quoteTable( 'ezsearch_word' ) ) ->where( $whereCondition ); return $subQuery; }
/** * Get subquery to select relevant word IDs * * @param \eZ\Publish\Core\Persistence\Database\SelectQuery $query * @param string $string * * @return \eZ\Publish\Core\Persistence\Database\SelectQuery */ protected function getWordIdSubquery(SelectQuery $query, $string) { $subQuery = $query->subSelect(); $tokens = $this->tokenizeString($this->processor->transform($string, $this->configuration['commands'])); $wordExpressions = array(); foreach ($tokens as $token) { $wordExpressions[] = $this->getWordExpression($subQuery, $token); } $subQuery->select($this->dbHandler->quoteColumn('id'))->from($this->dbHandler->quoteTable('ezsearch_word'))->where($subQuery->expr->lAnd($subQuery->expr->lOr($wordExpressions), $subQuery->expr->lt($this->dbHandler->quoteColumn('object_count'), $subQuery->bindValue($this->configuration['searchThresholdValue'])))); return $subQuery; }
/** * Build WordIDArray and update ezsearch_word table. * * Ported from the legacy code * * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L155 * * @param array $indexArrayOnlyWords words for object to add * * @return array wordIDArray */ private function buildWordIDArray(array $indexArrayOnlyWords) { $wordCount = count($indexArrayOnlyWords); $wordIDArray = []; $wordArray = []; // store the words in the index and remember the ID $this->dbHandler->beginTransaction(); for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 500) { // Fetch already indexed words from database $wordArrayChuck = array_slice($indexArrayOnlyWords, $arrayCount, 500); $wordRes = $this->searchIndex->getWords($wordArrayChuck); // Build a has of the existing words $wordResCount = count($wordRes); $existingWordArray = []; for ($i = 0; $i < $wordResCount; ++$i) { $wordIDArray[] = $wordRes[$i]['id']; $existingWordArray[] = $wordRes[$i]['word']; $wordArray[$wordRes[$i]['word']] = $wordRes[$i]['id']; } // Update the object count of existing words by one if (count($wordIDArray) > 0) { $this->searchIndex->incrementWordObjectCount($wordIDArray); } // Insert if there is any news words $newWordArray = array_diff($wordArrayChuck, $existingWordArray); if (count($newWordArray) > 0) { $this->searchIndex->addWords($newWordArray); $newWordRes = $this->searchIndex->getWords($newWordArray); $newWordCount = count($newWordRes); for ($i = 0; $i < $newWordCount; ++$i) { $wordLowercase = $this->transformationProcessor->transformByGroup($newWordRes[$i]['word'], 'lowercase'); $wordArray[$wordLowercase] = $newWordRes[$i]['id']; } } } $this->dbHandler->commit(); return $wordArray; }
/** * Downcases a given string using string transformation processor. * * @param string $string * * @return string */ protected function lowerCase($string) { return $this->transformationProcessor->transformByGroup($string, "lowercase"); }
/** * Construct instance of TransformationProcessor\DefinitionBased. * * Through the $ruleFiles array, a list of files with full text * transformation rules is given. These files are parsed by * {@link \eZ\Publish\Core\Persistence\TransformationProcessor\DefinitionBased\Parser} * and then used for normalization in the full text search. * * @param \eZ\Publish\Core\Persistence\TransformationProcessor\DefinitionBased\Parser $parser * @param \eZ\Publish\Core\Persistence\TransformationProcessor\PcreCompiler $compiler * @param array $ruleFiles * * @return \eZ\Publish\Core\Persistence\TransformationProcessor\DefinitionBased */ public function __construct(Parser $parser, PcreCompiler $compiler, array $ruleFiles = array()) { parent::__construct($compiler, $ruleFiles); $this->parser = $parser; }
/** * Constructor. * * @param \eZ\Publish\Core\Persistence\TransformationProcessor\PcreCompiler $compiler * @param string $installDir Base dir for rule loading * @param array $ruleFiles */ public function __construct(PcreCompiler $compiler, array $ruleFiles = array()) { parent::__construct($compiler, $ruleFiles); }