public function makeZonesFilesArrays($nodes_arrays, $project) { $zones = $this->allZones(); $storekeeper = new Storekeeper(); $files_arrays = []; //empty array for storing the filenames according to zones (key->(string)zone_name => value->array with the filenames set) $paths = $this->getZonesPaths($zones, $project); foreach ($nodes_arrays as $query_text => $url) { if (!is_array($url)) { // ????????? TODO: SAVE THE ERROR HERE $storekeeper->storeErrors([$url], $project_id); unset($nodes_arrays[$query_text]); } } // make the empty arrays for filenames for each zone foreach ($nodes_arrays as $query_text => $files) { foreach ($files as $file => $nodes) { $filename = str_replace(".html", ".txt", $file); // add the filename to zone-filename array foreach ($zones as $zone) { $files_arrays[$zone][$query_text][] = $filename; if (!in_array($zone, ['a', 'h1', 'title'])) { file_put_contents($paths[$zone] . $filename, ""); // clean content of the storage file } } //add the text content of each node to the corresponding ZONE file $this->separatePlainFromFragment($nodes, $paths, $filename); } } // make CLEANED files for each zone, put them to zone-dir, make the array of filenames // (remove punctuation marks, remove stop-words, remove extra whitespaces) $lemmatizator = new Lemmatizator(); foreach ($zones as $zone) { foreach ($files_arrays[$zone] as $query => $files_array) { foreach ($files_array as $filename) { $content = file_get_contents($paths[$zone] . $filename); $words[$zone][] = str_word_count($content, 0, "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя*"); $short_content = $lemmatizator->makeTextShort($content); file_put_contents($paths[$zone] . $filename, $short_content); } } } return [$files_arrays, $words]; }