public static function sortByWordsCount($set_1, $set_2) { $first = str_word_count($set_1, 0, Analyzer::getCyrChars(["*"])); $second = str_word_count($set_2, 0, Analyzer::getCyrChars(["*"])); if ($first == $second) { return 0; } return $first > $second ? -1 : 1; }
public function count_entries($zones, $n_gramms, $files_arrays, $paths, $urls_count) { $results = []; $units_set = []; $counter = new Counter(); $lemmatizator = new Lemmatizator(); $analyzer = new Analyzer(); foreach ($zones as $zone) { foreach ($n_gramms as $query => $n_gramms_array) { $N_size = count($n_gramms_array); for ($n = $N_size; $n > 0; $n--) { foreach ($n_gramms_array[$n] as $n_gramm) { // count n-gramm AS IS direct order // -------------------------------- $results[$query][$zone][$n_gramm] = !isset($results[$query][$zone][$n_gramm]) ? $counter->countUnitInFileSet($n_gramm, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$n_gramm]; // file_put_contents("/var/www/test.txt", $n_gramm."\n", FILE_APPEND); if (!in_array($n_gramm, $units_set)) { $units_set[] = $n_gramm; } // count n-gramm NORMAL forms direct order // --------------------------------------- $n_gramm_normal = $lemmatizator->getUnitWithForms($n_gramm); $unit_normal = $lemmatizator->makeUnitNormal($n_gramm); $results[$query][$zone][$unit_normal . " - FORMS"] = !isset($results[$query][$zone][$unit_normal . " - FORMS"]) ? $counter->countUnitInFileSet($n_gramm_normal, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$unit_normal . " - FORMS"]; // file_put_contents("/var/www/test.txt", $unit_normal." - FORMS"."\n", FILE_APPEND); if (!in_array($unit_normal . " - FORMS", $units_set)) { $units_set[] = $unit_normal . " - FORMS"; } // if( !in_array($n_gramm." - FORMS", $units_set) ){$units_set[] = $n_gramm." - FORMS";} // count n-gramm AS IS PERMUTATED all variants // ------------------------------------------- $n_gramm_permutated_array = $analyzer->permutate(explode(" ", $n_gramm)); foreach ($n_gramm_permutated_array as $n_gramm_permutated) { $results[$query][$zone][$n_gramm_permutated] = !isset($results[$query][$zone][$n_gramm_permutated]) ? $counter->countUnitInFileSet($n_gramm_permutated, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$n_gramm_permutated]; // file_put_contents("/var/www/test.txt", $n_gramm_permutated."\n", FILE_APPEND); if (!in_array($n_gramm_permutated, $units_set)) { $units_set[] = $n_gramm_permutated; } // count NORMAL form for this n-gramm (as is permutated) // ---------------------------------------------------- $n_gramm_permutated_normal = $lemmatizator->getUnitWithForms($n_gramm_permutated); $unit_normal = $lemmatizator->makeUnitNormal($n_gramm_permutated); $results[$query][$zone][$unit_normal . " - FORMS"] = !isset($results[$query][$zone][$unit_normal . " - FORMS"]) ? $counter->countUnitInFileSet($n_gramm_permutated_normal, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$unit_normal . " - FORMS"]; // file_put_contents("/var/www/test.txt", $unit_normal." - FORMS"."\n", FILE_APPEND); if (!in_array($unit_normal . " - FORMS", $units_set)) { $units_set[] = $unit_normal . " - FORMS"; } // if( !in_array($n_gramm_permutated." - FORMS", $units_set) ){$units_set[] = $n_gramm_permutated." - FORMS";} } // count n-gramm AS IS with STARS added all variants (length = n + 1star) DIRECT order // ----------------------------------------------------------------------------------- $n_gramm_stared_array = $analyzer->getWithStars(explode(" ", $n_gramm)); foreach ($n_gramm_stared_array as $n_gramm_stared) { $results[$query][$zone][$n_gramm_stared] = !isset($results[$query][$zone][$n_gramm_stared]) ? $counter->countUnitInFileSet($n_gramm_stared, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$n_gramm_stared]; // file_put_contents("/var/www/test.txt", $n_gramm_stared."\n", FILE_APPEND); if (!in_array($n_gramm_stared, $units_set)) { $units_set[] = $n_gramm_stared; } // count NORMAL form for this n-gramm (with STARS length = n+1star) // ---------------------------------------------------------------- $n_gramm_stared_normal = $lemmatizator->getUnitWithForms($n_gramm_stared); $unit_normal = $lemmatizator->makeUnitNormal($n_gramm_stared); $results[$query][$zone][$unit_normal . " - FORMS"] = !isset($results[$query][$zone][$unit_normal . " - FORMS"]) ? $counter->countUnitInFileSet($n_gramm_stared_normal, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$unit_normal . " - FORMS"]; // file_put_contents("/var/www/test.txt", $unit_normal." - FORMS"."\n", FILE_APPEND); if (!in_array($unit_normal . " - FORMS", $units_set)) { $units_set[] = $unit_normal . " - FORMS"; } // if( !in_array($n_gramm_stared." - FORMS", $units_set) ){$units_set[] = $n_gramm_stared." - FORMS";} } // count n-gramm AS IS with STARS added INSTEAD the word all variants (length = n with the star) DIRECT order // ----------------------------------------------------------------------------------- $n_gramm_stared_instead_array = $analyzer->getWithStarsInstead(explode(" ", $n_gramm)); foreach ($n_gramm_stared_instead_array as $n_gramm_instead) { $results[$query][$zone][$n_gramm_instead] = !isset($results[$query][$zone][$n_gramm_instead]) ? $counter->countUnitInFileSet($n_gramm_instead, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$n_gramm_instead]; // file_put_contents("/var/www/test.txt", $n_gramm_instead."\n", FILE_APPEND); if (!in_array($n_gramm_instead, $units_set)) { $units_set[] = $n_gramm_instead; } // count NORMAL form for this n-gramm (with STARS length = n with the star) DIRECT order // ---------------------------------------------------------------- $n_gramm_stared_instead_normal = $lemmatizator->getUnitWithForms($n_gramm_instead); $unit_normal = $lemmatizator->makeUnitNormal($n_gramm_instead); $results[$query][$zone][$unit_normal . " - FORMS"] = !isset($results[$query][$zone][$unit_normal . " - FORMS"]) ? $counter->countUnitInFileSet($n_gramm_stared_instead_normal, $files_arrays[$zone][$query], $paths[$zone]) : $results[$query][$zone][$unit_normal . " - FORMS"]; // file_put_contents("/var/www/test.txt", $unit_normal." - FORMS"."\n", FILE_APPEND); if (!in_array($unit_normal . " - FORMS", $units_set)) { $units_set[] = $unit_normal . " - FORMS"; } // if( !in_array($n_gramm_instead." - FORMS", $units_set) ){$units_set[] = $n_gramm_instead." - FORMS";}; } } } //END of foreach - each single n-gramm } //END of for size of n-gramms } //END of foreach for $zones // file_put_contents("/var/www/test.txt", print_r($results,true)."\n", FILE_APPEND); $combined_result = $this->combineCommonUnits($results, $units_set, $urls_count); return $combined_result; }