function write_count($sequence, $key) { $map = generate_frequencies($sequence, strlen($key), false); if (isset($map[$key])) { $value = $map[$key]; } else { $value = 0; } printf("%d\t%s\n", $value, $key); }
function _get_word_list($projectid, $timeCutoff) { $messages = array(); // load the suggestions $suggestions = load_project_good_word_suggestions($projectid, $timeCutoff); if (!is_array($suggestions)) { $messages[] = sprintf(_("Unable to load suggestions: %s"), $suggestions); return array(array(), array(), array(), array(), array(), array(), $messages); } if (count($suggestions) == 0) { return array(array(), array(), array(), array(), array(), array(), $messages); } // load project good words $project_good_words = load_project_good_words($projectid); // load project bad words $project_bad_words = load_project_bad_words($projectid); // get the latest project text of all pages up to last possible round $last_possible_round = get_Round_for_round_number(MAX_NUM_PAGE_EDITING_ROUNDS); $pages_res = page_info_query($projectid, $last_possible_round->id, 'LE'); $all_words_w_freq = get_distinct_words_in_text(get_page_texts($pages_res)); // array to hold all words $all_suggestions = array(); $round_page_count = array(); // parse the suggestions complex array // it is in the format: $suggestions[$round][$pagenum]=$wordsArray foreach ($suggestions as $round => $pageArray) { $round_suggestions = array(); foreach ($pageArray as $page => $words) { // add the words to the per-round array $round_suggestions = array_merge($round_suggestions, $words); // add the words to the combined array too $all_suggestions = array_merge($all_suggestions, $words); @$round_page_count[$round]++; } // remove any words already on the project's good or bad words lists $round_suggestions = array_diff($round_suggestions, array_merge($project_good_words, $project_bad_words)); // get the suggestion occurrences $round_suggestions_w_occurrences[$round] = generate_frequencies($round_suggestions); // get suggestion with project word frequency $round_suggestions_w_freq[$round] = array_intersect_key($all_words_w_freq, array_flip($round_suggestions)); // multisort screws up all-numeric words so we need to preprocess first prep_numeric_keys_for_multisort($round_suggestions_w_freq[$round]); // sort the list by frequency, then by word array_multisort(array_values($round_suggestions_w_freq[$round]), SORT_DESC, array_map('strtolower', array_keys($round_suggestions_w_freq[$round])), SORT_ASC, $round_suggestions_w_freq[$round]); } // now, remove any words that are already on the project's good or bad words lists $all_suggestions = array_diff($all_suggestions, array_merge($project_good_words, $project_bad_words)); // get the number of suggestion occurrences $all_suggestions_w_occurrences = generate_frequencies($all_suggestions); // $all_suggestions doesn't have frequency info, // so start with the info in $all_words_w_freq, // and extract the items where the key matches a key in $all_suggestions. $all_suggestions_w_freq = array_intersect_key($all_words_w_freq, array_flip($all_suggestions)); // multisort screws up all-numeric words so we need to preprocess first prep_numeric_keys_for_multisort($all_suggestions_w_freq); // sort the list by frequency, then by word array_multisort(array_values($all_suggestions_w_freq), SORT_DESC, array_map('strtolower', array_keys($all_suggestions_w_freq)), SORT_ASC, $all_suggestions_w_freq); // get a list of all rounds $rounds = array_keys($round_suggestions_w_freq); return array($all_suggestions_w_freq, $all_suggestions_w_occurrences, $round_suggestions_w_freq, $round_suggestions_w_occurrences, $rounds, $round_page_count, $messages); }
function write_count(&$sequence, $key) { $map = generate_frequencies($sequence, strlen($key), false); printf("%d\t%s\n", array_key_exists($key, $map) ? $map[$key] : 0, $key); }