示例#1
0
 public function makeZonesFilesArrays($nodes_arrays, $project)
 {
     $zones = $this->allZones();
     $storekeeper = new Storekeeper();
     $files_arrays = [];
     //empty array for storing the filenames according to zones (key->(string)zone_name => value->array with the filenames set)
     $paths = $this->getZonesPaths($zones, $project);
     foreach ($nodes_arrays as $query_text => $url) {
         if (!is_array($url)) {
             // ????????? TODO: SAVE THE ERROR HERE
             $storekeeper->storeErrors([$url], $project_id);
             unset($nodes_arrays[$query_text]);
         }
     }
     // make the empty arrays for filenames for each zone
     foreach ($nodes_arrays as $query_text => $files) {
         foreach ($files as $file => $nodes) {
             $filename = str_replace(".html", ".txt", $file);
             // add the filename to zone-filename array
             foreach ($zones as $zone) {
                 $files_arrays[$zone][$query_text][] = $filename;
                 if (!in_array($zone, ['a', 'h1', 'title'])) {
                     file_put_contents($paths[$zone] . $filename, "");
                     // clean content of the storage file
                 }
             }
             //add the text content of each node to the corresponding ZONE file
             $this->separatePlainFromFragment($nodes, $paths, $filename);
         }
     }
     // make CLEANED files for each zone, put them to zone-dir, make the array of filenames
     // (remove punctuation marks, remove stop-words, remove extra whitespaces)
     $lemmatizator = new Lemmatizator();
     foreach ($zones as $zone) {
         foreach ($files_arrays[$zone] as $query => $files_array) {
             foreach ($files_array as $filename) {
                 $content = file_get_contents($paths[$zone] . $filename);
                 $words[$zone][] = str_word_count($content, 0, "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя*");
                 $short_content = $lemmatizator->makeTextShort($content);
                 file_put_contents($paths[$zone] . $filename, $short_content);
             }
         }
     }
     return [$files_arrays, $words];
 }