/** * Converts given $text into a URL slug consisting of URL valid characters. * For non-Unicode setups this means character in the range a-z, numbers and _, for Unicode * setups it means all characters except space, &, ;, /, :, =, ?, [, ], (, ), -. * * Invalid characters are converted to -. * * Example with a non-Unicode setup * * 'My car' => 'My-car' * 'What is this?' => 'What-is-this' * 'This & that' => 'This-that' * 'myfile.tpl' => 'Myfile-tpl', * 'øæå' => 'oeaeaa' * * @param string $text * @param string $defaultText * @param string|null $transformation * * @return string */ public function convert($text, $defaultText = '_1', $transformation = null) { if (!isset($transformation)) { $transformation = $this->configuration['transformation']; } if (strlen($text) === 0) { $text = $defaultText; } if (isset($this->configuration['transformationGroups'][$transformation]['commands']) && !empty($this->configuration['transformationGroups'][$transformation]['commands'])) { $text = $this->transformationProcessor->transform($text, $this->configuration['transformationGroups'][$transformation]['commands']); } return $this->cleanupText($text, $this->configuration['transformationGroups'][$transformation]['cleanupMethod']); }
/** * Converts given $text into a URL slug consisting of URL valid characters. * For non-Unicode setups this means character in the range a-z, numbers and _, for Unicode * setups it means all characters except space, &, ;, /, :, =, ?, [, ], (, ), - * * Invalid characters are converted to -. * * Example with a non-Unicode setup * * 'My car' => 'My-car' * 'What is this?' => 'What-is-this' * 'This & that' => 'This-that' * 'myfile.tpl' => 'Myfile-tpl', * 'øæå' => 'oeaeaa' * * @param string $text * @param string $defaultText * @param string|null $transformation * * @return string */ public function convert($text, $defaultText = "_1", $transformation = null) { if (!isset($transformation)) { $transformation = $this->configuration["transformation"]; } if (strlen($text) === 0) { $text = $defaultText; } if (isset($this->configuration["transformationGroups"][$transformation]["commands"]) && !empty($this->configuration["transformationGroups"][$transformation]["commands"])) { $text = $this->transformationProcessor->transform($text, $this->configuration["transformationGroups"][$transformation]["commands"]); } return $this->cleanupText($text, $this->configuration["transformationGroups"][$transformation]["cleanupMethod"]); }
/** * Get subquery to select relevant word IDs * * @uses getStopWordThresholdValue() To get threshold for words we would like to ignore in query. * * @param \eZ\Publish\Core\Persistence\Database\SelectQuery $query * @param string $string * * @return \eZ\Publish\Core\Persistence\Database\SelectQuery */ protected function getWordIdSubquery( SelectQuery $query, $string ) { $subQuery = $query->subSelect(); $tokens = $this->tokenizeString( $this->processor->transform( $string, $this->configuration['commands'] ) ); $wordExpressions = array(); foreach ( $tokens as $token ) { $wordExpressions[] = $this->getWordExpression( $subQuery, $token ); } $whereCondition = $subQuery->expr->lOr( $wordExpressions ); // If stop word threshold is below 100%, make it part of $whereCondition if ( $this->configuration['stopWordThresholdFactor'] < 1 ) { $whereCondition = $subQuery->expr->lAnd( $whereCondition, $subQuery->expr->lt( $this->dbHandler->quoteColumn( 'object_count' ), $subQuery->bindValue( $this->getStopWordThresholdValue() ) ) ); } $subQuery ->select( $this->dbHandler->quoteColumn( 'id' ) ) ->from( $this->dbHandler->quoteTable( 'ezsearch_word' ) ) ->where( $whereCondition ); return $subQuery; }
/** * Get subquery to select relevant word IDs * * @param \eZ\Publish\Core\Persistence\Database\SelectQuery $query * @param string $string * * @return \eZ\Publish\Core\Persistence\Database\SelectQuery */ protected function getWordIdSubquery(SelectQuery $query, $string) { $subQuery = $query->subSelect(); $tokens = $this->tokenizeString($this->processor->transform($string, $this->configuration['commands'])); $wordExpressions = array(); foreach ($tokens as $token) { $wordExpressions[] = $this->getWordExpression($subQuery, $token); } $subQuery->select($this->dbHandler->quoteColumn('id'))->from($this->dbHandler->quoteTable('ezsearch_word'))->where($subQuery->expr->lAnd($subQuery->expr->lOr($wordExpressions), $subQuery->expr->lt($this->dbHandler->quoteColumn('object_count'), $subQuery->bindValue($this->configuration['searchThresholdValue'])))); return $subQuery; }
/** * Index search engine full text data corresponding to content object field values. * * Ported from the legacy code * @see https://github.com/ezsystems/ezpublish-legacy/blob/master/kernel/search/plugins/ezsearchengine/ezsearchengine.php#L45 * * @param \eZ\Publish\Core\Search\Legacy\Content\FullTextData $fullTextData */ public function index(FullTextData $fullTextData) { $indexArray = []; $indexArrayOnlyWords = []; $wordCount = 0; $placement = 0; // Remove previously indexed content if exists to avoid keeping in index removed field values $this->remove($fullTextData->id); foreach ($fullTextData->values as $fullTextValue) { /** @var \eZ\Publish\Core\Search\Legacy\Content\FullTextValue $fullTextValue */ if (is_numeric(trim($fullTextValue->value))) { $integerValue = (int) $fullTextValue->value; if ($integerValue > self::DB_INT_MAX) { $integerValue = 0; } } else { $integerValue = 0; } $text = $this->transformationProcessor->transform($fullTextValue->value, $this->fullTextSearchConfiguration['commands']); // split by non-words $wordArray = preg_split('/\\W/u', $text, -1, PREG_SPLIT_NO_EMPTY); foreach ($wordArray as $word) { if (trim($word) === '') { continue; } // words stored in search index are limited to 150 characters if (mb_strlen($word) > 150) { $word = mb_substr($word, 0, 150); } $indexArray[] = ['Word' => $word, 'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId, 'identifier' => $fullTextValue->fieldDefinitionIdentifier, 'integer_value' => $integerValue]; $indexArrayOnlyWords[$word] = 1; ++$wordCount; // if we have "www." before word than // treat it as url and add additional entry to the index if (mb_substr(mb_strtolower($word), 0, 4) === 'www.') { $additionalUrlWord = substr($word, 4); $indexArray[] = ['Word' => $additionalUrlWord, 'ContentClassAttributeID' => $fullTextValue->fieldDefinitionId, 'identifier' => $fullTextValue->fieldDefinitionIdentifier, 'integer_value' => $integerValue]; $indexArrayOnlyWords[$additionalUrlWord] = 1; ++$wordCount; } } } $wordIDArray = $this->buildWordIDArray(array_keys($indexArrayOnlyWords)); $this->dbHandler->beginTransaction(); for ($arrayCount = 0; $arrayCount < $wordCount; $arrayCount += 1000) { $placement = $this->indexWords($fullTextData, array_slice($indexArray, $arrayCount, 1000), $wordIDArray, $placement); } $this->dbHandler->commit(); }