function selectSubstringRandom($inputText, $substringLength) { $split = explode(" ", standardizeText($inputText)); //split standardized string into words $pos = rand(0, strlen($inputText) - $substringLength); //get a random position $substring = implode(" ", array_slice($split, $pos, $substringLength)); while (inRepository($substring)) { //while the chosen substring is found in the repository $pos = rand(0, strlen($inputText) - $substringLength); //get a random position $substring = implode(" ", array_slice($split, $pos, $substringLength)); //pull out the random substring } return $substring; }
function selectSubstring($useRepository, $repositoryLocations, $histogram, $inputText, $substringLength) { /* * Return the lowest scored substring from $inputText */ $alpha = 1; //local repository weight $beta = 0.5; //global repository weight $substringScores = array(); $split = explode(" ", standardizeText($inputText)); //split standardized string into words //iterate through all possible substrings of the specified length for ($i = 0; $i < count($split) - $substringLength + 1; $i++) { $substring = implode(" ", array_slice($split, $i, $substringLength)); //grab a substring of the correct length $repositoryScore = 0; if ($useRepository) { //trigger on the global variable that indicates if the global repository is being used $repositoryScore = repositoryScore($substring); } $score = $alpha * localScore($histogram, $substring) + $beta * $repositoryScore; $substringScores[$i] = $score; } asort($substringScores); //sort the frequency array by value but preserve keys reset($substringScores); //reset the key pointer so we can iterate correctly //grab the lowest scored substring $substring = implode(" ", array_slice($split, key($substringScores), $substringLength)); if ($useRepository) { while (inRepository($repositoryLocations, $substring)) { //while the chosen substring is found in the repository if (!next($substringScores)) { //next() returns false at the end of the array return ""; //if a unique substring is not found, return "" } $substring = implode(" ", array_slice($split, key($substringScores), $substringLength)); } } return $substring; }