예제 #1
0
function write_count($sequence, $key)
{
    $map = generate_frequencies($sequence, strlen($key), false);
    if (isset($map[$key])) {
        $value = $map[$key];
    } else {
        $value = 0;
    }
    printf("%d\t%s\n", $value, $key);
}
function _get_word_list($projectid, $timeCutoff)
{
    $messages = array();
    // load the suggestions
    $suggestions = load_project_good_word_suggestions($projectid, $timeCutoff);
    if (!is_array($suggestions)) {
        $messages[] = sprintf(_("Unable to load suggestions: %s"), $suggestions);
        return array(array(), array(), array(), array(), array(), array(), $messages);
    }
    if (count($suggestions) == 0) {
        return array(array(), array(), array(), array(), array(), array(), $messages);
    }
    // load project good words
    $project_good_words = load_project_good_words($projectid);
    // load project bad words
    $project_bad_words = load_project_bad_words($projectid);
    // get the latest project text of all pages up to last possible round
    $last_possible_round = get_Round_for_round_number(MAX_NUM_PAGE_EDITING_ROUNDS);
    $pages_res = page_info_query($projectid, $last_possible_round->id, 'LE');
    $all_words_w_freq = get_distinct_words_in_text(get_page_texts($pages_res));
    // array to hold all words
    $all_suggestions = array();
    $round_page_count = array();
    // parse the suggestions complex array
    // it is in the format: $suggestions[$round][$pagenum]=$wordsArray
    foreach ($suggestions as $round => $pageArray) {
        $round_suggestions = array();
        foreach ($pageArray as $page => $words) {
            // add the words to the per-round array
            $round_suggestions = array_merge($round_suggestions, $words);
            // add the words to the combined array too
            $all_suggestions = array_merge($all_suggestions, $words);
            @$round_page_count[$round]++;
        }
        // remove any words already on the project's good or bad words lists
        $round_suggestions = array_diff($round_suggestions, array_merge($project_good_words, $project_bad_words));
        // get the suggestion occurrences
        $round_suggestions_w_occurrences[$round] = generate_frequencies($round_suggestions);
        // get suggestion with project word frequency
        $round_suggestions_w_freq[$round] = array_intersect_key($all_words_w_freq, array_flip($round_suggestions));
        // multisort screws up all-numeric words so we need to preprocess first
        prep_numeric_keys_for_multisort($round_suggestions_w_freq[$round]);
        // sort the list by frequency, then by word
        array_multisort(array_values($round_suggestions_w_freq[$round]), SORT_DESC, array_map('strtolower', array_keys($round_suggestions_w_freq[$round])), SORT_ASC, $round_suggestions_w_freq[$round]);
    }
    // now, remove any words that are already on the project's good or bad words lists
    $all_suggestions = array_diff($all_suggestions, array_merge($project_good_words, $project_bad_words));
    // get the number of suggestion occurrences
    $all_suggestions_w_occurrences = generate_frequencies($all_suggestions);
    // $all_suggestions doesn't have frequency info,
    // so start with the info in $all_words_w_freq,
    // and extract the items where the key matches a key in $all_suggestions.
    $all_suggestions_w_freq = array_intersect_key($all_words_w_freq, array_flip($all_suggestions));
    // multisort screws up all-numeric words so we need to preprocess first
    prep_numeric_keys_for_multisort($all_suggestions_w_freq);
    // sort the list by frequency, then by word
    array_multisort(array_values($all_suggestions_w_freq), SORT_DESC, array_map('strtolower', array_keys($all_suggestions_w_freq)), SORT_ASC, $all_suggestions_w_freq);
    // get a list of all rounds
    $rounds = array_keys($round_suggestions_w_freq);
    return array($all_suggestions_w_freq, $all_suggestions_w_occurrences, $round_suggestions_w_freq, $round_suggestions_w_occurrences, $rounds, $round_page_count, $messages);
}
예제 #3
0
파일: bench.php 프로젝트: jemmy655/hippyvm
function write_count(&$sequence, $key)
{
    $map = generate_frequencies($sequence, strlen($key), false);
    printf("%d\t%s\n", array_key_exists($key, $map) ? $map[$key] : 0, $key);
}