示例#1
0
 public function substract($rest, $unit)
 {
     $lemmatizator = new Lemmatizator();
     $norm_rest = str_replace(" - FORMS", "", $lemmatizator->makeUnitNormal($rest));
     $norm_unit = str_replace(" - FORMS", "", $lemmatizator->makeUnitNormal($unit));
     $rest_arr_normals = array_diff(explode(" ", $norm_rest), explode(" ", $norm_unit));
     $rest_arr = array_combine(explode(" ", $norm_rest), explode(" ", $rest));
     $new_rest = "";
     foreach ($rest_arr_normals as $norm) {
         $new_rest .= " " . $rest_arr[$norm];
     }
     return $new_rest;
 }
示例#2
0
 public function combineCommonUnits($results, $units_set, $urls_count)
 {
     $lemmatizator = new Lemmatizator();
     $combined_result = [];
     $entries_sums = [];
     //is neccessary for bodies counting
     $zones = $this->allZones();
     $queries = array_keys($results);
     $counter = new Counter();
     foreach ($zones as $zone) {
         foreach ($units_set as $unit) {
             foreach ($queries as $query) {
                 if (isset($results[$query][$zone][$unit])) {
                     $combined_result[$zone][$unit]['entries'][] = $results[$query][$zone][$unit];
                     //count sums for bodies counting
                     if ($zone != 'title') {
                         //titles are not included in bodies counting
                         foreach ($results[$query][$zone][$unit] as $url_num => $entry_count) {
                             $entries_sums[$unit][$query][$url_num] = isset($entries_sums[$unit][$query][$url_num]) ? $entries_sums[$unit][$query][$url_num] + $entry_count : $entry_count;
                         }
                     }
                     //get the averages array only for those sets which has been realy counted for entries
                     $combined_result[$zone][$unit]['average'][] = $counter->getAverage($results[$query][$zone][$unit]);
                 } else {
                     //make the false sets of entries for those which have not been realy counted for entries
                     $combined_result[$zone][$unit]['entries'][] = array_fill(0, $urls_count[$query], 0);
                 }
             }
         }
     }
     // correct the extra FORMs
     //========================
     //get the list of 'asis' units from units list
     //use the fact that we need those which have not no FORMs form right after the asis form
     $asis_to_correct_set = [];
     foreach ($units_set as $key => $unit) {
         if (strpos($unit, 'FORMS') === false && (!isset($units_set[$key + 1]) || strpos($units_set[$key + 1], 'FORMS') === false)) {
             $asis_to_correct_set[] = $unit;
         }
     }
     //get the list of FORMs units that should be corrected
     //to do this proceed each from asis list with makeUnitNormal function
     $forms_to_correct_set = [];
     foreach ($asis_to_correct_set as $asis) {
         $asis_normal = $lemmatizator->makeUnitNormal($asis);
         $asis_normal .= " - FORMS";
         $forms_to_correct_set[$asis_normal][] = $asis;
     }
     //make the array FORMs-unit => all it's asis units
     foreach (array_keys($forms_to_correct_set) as $asis_normal) {
         $more_key = array_search($asis_normal, $units_set);
         array_unshift($forms_to_correct_set[$asis_normal], $units_set[$more_key - 1]);
     }
     //prepare factors and divider data for each FORMs-unit (with zone)
     $correct_factors = [];
     foreach (array_keys($forms_to_correct_set) as $form) {
         foreach ($zones as $zone) {
             $correct_factors[$zone][$form]['divider'] = count($combined_result[$zone][$form]['average']);
             foreach ($forms_to_correct_set[$form] as $asis) {
                 $correct_factors[$zone][$form]['factors'][$asis] = $correct_factors[$zone][$form]['divider'] - count($combined_result[$zone][$asis]['average']);
             }
         }
     }
     //not the end of correctness procedure -> to be continued
     //=======================================================
     //start the combine averages procedure
     foreach ($zones as $zone) {
         foreach ($units_set as $unit) {
             $entries_flattened = [];
             foreach ($combined_result[$zone][$unit]['entries'] as $arr) {
                 $entries_flattened = array_merge($entries_flattened, $arr);
             }
             // file_put_contents(storage_path("tmp/results.txt"), print_r($combined_result['plain']['КОШКА - FORMS'], true),FILE_APPEND);//FOR TESTING !!!
             $combined_result[$zone][$unit]['average'] = $counter->getAverage($combined_result[$zone][$unit]['average']);
             $combined_result[$zone][$unit]['entries'] = $entries_flattened;
         }
     }
     //end of combine averages procedure
     //==================================
     //continue the correctness procedure
     //get the amendment for each zone-FORMSunit
     foreach (array_keys($forms_to_correct_set) as $form) {
         foreach ($zones as $zone) {
             $dividend = 0;
             foreach ($forms_to_correct_set[$form] as $asis) {
                 $summand = $correct_factors[$zone][$form]['factors'][$asis] * $combined_result[$zone][$asis]['average'];
                 $dividend += $summand;
             }
             $correct_factors[$zone][$form]['amendment'] = -1.0 * round($dividend / $correct_factors[$zone][$form]['divider'], 0, PHP_ROUND_HALF_UP);
         }
     }
     //correct the averages in combined result
     foreach ($correct_factors as $zone => $form_array) {
         foreach ($form_array as $form => $factors_array) {
             $combined_result[$zone][$form]['average'] += $factors_array['amendment'];
             if ($combined_result[$zone][$form]['average'] < 0) {
                 $combined_result[$zone][$form]['average'] = 0;
             }
         }
     }
     //start the bodies count procedure
     //================================
     $bodies = [];
     foreach ($entries_sums as $unit => $query_sums) {
         $bodies[$unit] = [];
         foreach ($query_sums as $query => $sums) {
             $bodies[$unit] = array_merge($bodies[$unit], $sums);
         }
         $bodies[$unit] = $counter->getAverage($bodies[$unit]);
         // $body_amendment = 0;
         foreach ($zones as $zone) {
             if ($zone != 'title') {
                 // $bodies[$unit] = $bodies[$unit];
                 $bodies[$unit] = $bodies[$unit] - $combined_result[$zone][$unit]['average'];
             }
         }
         //get the average amendment from all zones
         foreach ($correct_factors as $zone => $form_array) {
             if (isset($correct_factors[$zone][$unit]) && $zone != 'title') {
                 $body_amendment = isset($body_amendment) ? $body_amendment += $correct_factors[$zone][$unit]['amendment'] : $correct_factors[$zone][$unit]['amendment'];
             }
         }
         // $body_amendment = $body_amendment / 4.0;
         if (isset($body_amendment)) {
             $body_amendment = round($body_amendment, 0, PHP_ROUND_HALF_UP);
             //average for 4 zones w/o title
             $bodies[$unit] += $body_amendment;
             if ($bodies[$unit] < 0) {
                 $bodies[$unit] = 0;
             }
         }
         //correct the body amount with average amendment
     }
     return [$combined_result, $bodies];
 }
 public function handle_referencies(Request $request, Reference $reference, Lemmatizator $lemmatizator, Storekeeper $storekeeper)
 {
     $this->authorize('client-usage', $request->user());
     $raw_data = $request->all();
     // file_put_contents("/var/www/test.txt", print_r($raw_data,true));
     // validates if there is any notEmpty queries in the set
     // validates if the sample url is valid
     // validates if project_id or new project name is present
     if (isset($raw_data['ref'])) {
         $raw_data['ref'] = $this->removeSecondScreenDuplicates($raw_data['ref']);
     }
     if (!empty($raw_data['ref'])) {
         $raw_data['ref'] = $this->removeQueriesWithNoRefs($raw_data['ref']);
     }
     if (isset($raw_data['sample_url'])) {
         $raw_data['sample_url'] = trim($raw_data['sample_url']);
     }
     $validator = Validator::make($raw_data, ['sample_url' => 'required_without:sample_html|url', 'sample_html' => 'required_without:sample_url|mimes:html,htm,htmls,odt,oth,ott,doc,dot,docx,dotx', 'ref' => 'required', 'query' => 'required_without:project_id', 'project_name' => 'required_without:project_id|max:20|alpha_num_dash_lat', 'project_id' => 'required_without:project_name']);
     //validates if mentioned only one of url or local file for sample
     $validator->after(function ($validator) {
         if (Input::get('sample_url') != '' && Input::hasFile('sample_html')) {
             $validator->errors()->add('sample_url', 'Choose smth one between remote url or local file');
         }
     });
     //make $inputed_responce using ref_dirty data to fill form fields with already edited by user data if validation fails
     if ($validator->fails()) {
         $id = isset($raw_data['project_id']) ? $raw_data['project_id'] : null;
         $inputed_responce = [];
         foreach (Input::get('ref_dirty') as $query => $dirty_refs) {
             if ($dirty_refs) {
                 $query = $query == '0' ? "" : $query;
                 $inputed_responce[$query] = $dirty_refs;
             } else {
                 $inputed_responce = [];
                 break;
             }
         }
         return redirect()->action('WebsiteController@referencies_handler', [$id])->withErrors($validator)->withInput(Input::except('ref', 'query', 'ref_dirty'))->with('inputed_responce', $inputed_responce);
     }
     if (isset($raw_data['last'])) {
         $last = $raw_data['last'];
     } else {
         $last = null;
     }
     //get the project_id from INPUT or set the new one
     if (isset($raw_data['project_id'])) {
         $project_id = strip_tags($raw_data['project_id']);
     } else {
         // mkdir empty dirs which are necessary for query handling
         // initialize project
         $new_project = $storekeeper->initializeStoring($raw_data['project_name'], $raw_data['query']);
         $project_id = $new_project->id;
     }
     //set exact flag to the project if it is checked
     if (isset($raw_data['exact'])) {
         $this_project = Project::find($project_id);
         $this_project->exact = true;
         $this_project->save();
     }
     //store additional queries to file at project dir
     if (isset($raw_data['dop_queries'])) {
         $raw_query['dop_queries'] = strip_tags($raw_data['dop_queries']);
         $dop_queries_array = preg_split("/[,;\\.]*[\t\n\f\r]+\\s*/", trim($raw_data['dop_queries']), -1, PREG_SPLIT_NO_EMPTY);
     } else {
         $dop_queries_array = [];
     }
     //check if dop_queries duplicate main queries
     foreach ($dop_queries_array as $num => $dop_query) {
         $dop_query = $this->clean_query($dop_query);
         if (in_array($dop_query, array_keys($raw_data['ref']))) {
             unset($dop_queries_array[$num]);
         }
     }
     if (!empty($dop_queries_array)) {
         foreach ($dop_queries_array as $key => $raw_query) {
             $dop_query = trim(array_keys($lemmatizator->splitToWords($raw_query))[0]);
             $dop_queries_array[$key] = $dop_query;
         }
         $dop_queries = implode("\n", $dop_queries_array);
         $project_dir = Project::where('id', $project_id)->first()->dir;
         file_put_contents(storage_path("tmp/" . $project_dir . "/reports/dop_queries.txt"), $dop_queries);
     }
     // obtain an array of referencies instead of text from textarea
     // text is splitted against different white-spaces and the commas
     foreach ($raw_data['ref'] as $ref => $urls_text) {
         //splitToWords ЗДЕСЬ используется только для "очистки" запроса
         //меняется е на ё, убираются стоп-слова и т.д.
         //splitToWords возвращает массив, в котором ключами являются тексты запросов, а значениями массивы составляющих их слов
         //используем splitToWords для того, чтобы "очистка" была одинаковой на всех этапах обработки
         $query = array_keys($lemmatizator->splitToWords($ref))[0];
         //заменяем запрос на очищенный в переменной last
         if ($query != $ref && !is_null($last)) {
             $last[$query] = $last[$ref];
             unset($last[$ref]);
         }
         //make array of urls instead of text
         $data[$query] = preg_split("/[\\s,]+/", $urls_text);
     }
     $responce = [];
     $refs_handled = [];
     foreach ($data as $query => $urls_array) {
         foreach ($urls_array as $key => $url) {
             //remove empty strings if any
             if ($url == "") {
                 unset($data[$query][$key]);
             }
         }
     }
     if ($raw_data['sample_url'] || Input::hasFile('sample_html')) {
         if ($raw_data['sample_url']) {
             $src_type = 'http';
             $sample_url = Input::get('sample_url');
         } else {
             $src_type = 'local';
             $sample_url = Input::file('sample_html')->getRealPath();
         }
         //get html of sample from file or from www
         try {
             $sample_content = $reference->getHtml($sample_url, 'sample', 0, $project_id, $src_type);
             $sample_cleaned_content = $reference->removeGarbage($sample_content, $project_id, "sample_0.html");
         } catch (\Exception $e) {
             $mes = $e->getMessage();
             $inputed_responce = [];
             foreach (Input::get('ref') as $query => $refs) {
                 $query = $query == '0' ? "" : $query;
                 $inputed_responce[$query] = $refs;
             }
             return redirect()->action('WebsiteController@referencies_handler', [$project_id])->with('alert', "Cannot fetch html from the sample url: " . $sample_url . "\n" . $mes)->withInput(Input::except('ref', 'query'))->with('inputed_responce', $inputed_responce);
         }
     } else {
         $sample_file_name = 'sample_0.html';
         $sample_cleaned_content = '';
     }
     // return '<pre>'.print_r($last,true).'</pre>';
     $this->dispatch(new HandleDataJob($sample_cleaned_content, $data, $project_id, $last));
     return redirect()->action('UserController@show_user', [$request->user()->id])->with('info', "Your Project is in process. Gather report when it'll appear in reports column.");
 }