Exemple #1
0
function selectSubstringRandom($inputText, $substringLength)
{
    $split = explode(" ", standardizeText($inputText));
    //split standardized string into words
    $pos = rand(0, strlen($inputText) - $substringLength);
    //get a random position
    $substring = implode(" ", array_slice($split, $pos, $substringLength));
    while (inRepository($substring)) {
        //while the chosen substring is found in the repository
        $pos = rand(0, strlen($inputText) - $substringLength);
        //get a random position
        $substring = implode(" ", array_slice($split, $pos, $substringLength));
        //pull out the random substring
    }
    return $substring;
}
Exemple #2
0
function selectSubstring($useRepository, $repositoryLocations, $histogram, $inputText, $substringLength)
{
    /*
     * Return the lowest scored substring from $inputText
     */
    $alpha = 1;
    //local repository weight
    $beta = 0.5;
    //global repository weight
    $substringScores = array();
    $split = explode(" ", standardizeText($inputText));
    //split standardized string into words
    //iterate through all possible substrings of the specified length
    for ($i = 0; $i < count($split) - $substringLength + 1; $i++) {
        $substring = implode(" ", array_slice($split, $i, $substringLength));
        //grab a substring of the correct length
        $repositoryScore = 0;
        if ($useRepository) {
            //trigger on the global variable that indicates if the global repository is being used
            $repositoryScore = repositoryScore($substring);
        }
        $score = $alpha * localScore($histogram, $substring) + $beta * $repositoryScore;
        $substringScores[$i] = $score;
    }
    asort($substringScores);
    //sort the frequency array by value but preserve keys
    reset($substringScores);
    //reset the key pointer so we can iterate correctly
    //grab the lowest scored substring
    $substring = implode(" ", array_slice($split, key($substringScores), $substringLength));
    if ($useRepository) {
        while (inRepository($repositoryLocations, $substring)) {
            //while the chosen substring is found in the repository
            if (!next($substringScores)) {
                //next() returns false at the end of the array
                return "";
                //if a unique substring is not found, return ""
            }
            $substring = implode(" ", array_slice($split, key($substringScores), $substringLength));
        }
    }
    return $substring;
}