/** * Download a specific law's card page. * * @param string $id Law ID. * @param bool $re_download Whether or not to re-download card page. * * @return Law * @throws JobChangePriorityException * @throws Exceptions\ProxyBanned */ function downloadCard($id, $re_download = false) { /** * @var $law Law */ $law = Law::find($id); try { $card = downloadCard($id, ['re_download' => $re_download || $this->re_download, 'check_related' => $law->status == Law::NOT_DOWNLOADED && !max_date()]); } catch (Exceptions\ProxyBanned $e) { throw $e; } catch (\Exception $e) { $message = str_replace('ShvetsGroup\\Service\\Exceptions\\', '', get_class($e)) . ($e->getMessage() ? ': ' . $e->getMessage() : ''); throw new JobChangePriorityException($message, -15); } DB::transaction(function () use($law, $card) { $law->card = $card['card']; $law->title = $card['title']; $law->date = $card['date']; $law->setIssuers($card['meta'][Issuer::FIELD_NAME]); $law->setTypes($card['meta'][Type::FIELD_NAME]); $law->state = isset($card['meta'][State::FIELD_NAME]) ? reset($card['meta'][State::FIELD_NAME]) : State::STATE_UNKNOWN; $law->has_text = $card['has_text'] ? $law->has_text = Law::HAS_TEXT : ($law->has_text = Law::NO_TEXT); $has_unknown_revision = false; foreach ($card['revisions'] as &$revision) { if ($revision['date'] == '??.??.????') { $has_unknown_revision = true; continue; } $data = ['law_id' => $revision['law_id'], 'date' => $revision['date'], 'comment' => $revision['comment']]; if ($law->notHasText() || isset($revision['no_text']) && $revision['no_text'] && $revision['date'] != $card['active_revision']) { $data['status'] = Revision::NO_TEXT; $data['text'] = ''; } $r = Revision::findROrNew($data['law_id'], $data['date']); $r->save(); $r->update($data); } // We should update revision which has just come into power. if ($law->active_revision && $law->active_revision != $card['active_revision']) { Revision::find($data['law_id'], $card['active_revision'])->update(['status' => Revision::NEEDS_UPDATE]); } $law->active_revision = $card['active_revision']; foreach ($law->revisions()->where('status', Revision::NEEDS_UPDATE)->get() as $revision) { $this->jobsManager->add('command.lawgrabber.download', 'downloadRevision', ['law_id' => $revision->law_id, 'date' => $revision->date], 'download', $revision->date == $law->active_revision ? 0 : -1); } if (isset($card['changes_laws']) && $card['changes_laws']) { Law::where('id', array_column($card['changes_laws'], 'id'))->update(['status' => Law::DOWNLOADED_BUT_HAS_UNKNOWN_REVISION]); foreach ($card['changes_laws'] as $l) { $this->jobsManager->add('command.lawgrabber.download', 'downloadCard', ['id' => $l['id'], 're_download' => true], 'download', 2); } } $law->card_updated = $card['timestamp']; $law->status = $has_unknown_revision ? Law::DOWNLOADED_BUT_HAS_UNKNOWN_REVISION : Law::UP_TO_DATE; $law->save(); }); return $law; }
/** * Execute console command. */ public function handle() { $fix = $this->option('fix'); $old = $this->option('old_files'); if ($old) { return $this->move_files(); } $downloaded_card = Law::where('status', Law::UP_TO_DATE)->count(); $downloaded_text = Law::where('status', Law::DOWNLOADED_REVISIONS)->count(); $downloaded_relations = Law::where('status', Law::DOWNLOADED_RELATIONS)->count(); $without_text = Law::where('status', '>', Law::NOT_DOWNLOADED)->where('has_text', Law::NO_TEXT)->count(); $not_downloaded = Law::where('status', Law::NOT_DOWNLOADED)->count(); $law_dir = $this->getDownloadsDir() . '/zakon.rada.gov.ua/laws/show/'; function is_fake($html, $is_text = true) { return downloader()->detectFakeContent($html); } function is_403($html) { return downloader()->detectFakeContent($html, '403'); } function remove_dir($dir) { exec('rm -rf ' . $dir); } $result_count = Law::where('status', '<', Law::SAVED)->count(); $nd_orphaned_dirs = 0; $d_broken_card = 0; $d_no_files = 0; $d_fake_content = 0; $d_unknown_text_true_content = 0; $d_unknown_text_no_text = 0; $i = 1; Law::where('status', '<', Law::SAVED)->orderBy('id')->chunk(200, function ($laws) use($fix, $law_dir, &$i, &$result_count, &$nd_orphaned_dirs, &$d_broken_card, &$d_no_files, &$d_fake_content, &$d_unknown_text_true_content, &$d_unknown_text_no_text) { foreach ($laws as $law) { $law_path = $law_dir . $law->id; $card_path = $law_dir . $law->id . '/card.html'; $text_path = $law_dir . $law->id . '/text.html'; $page_path = $law_dir . $law->id . '/page.html'; if ($law->status == Law::NOT_DOWNLOADED && is_dir($law_path)) { $nd_orphaned_dirs++; if ($fix) { remove_dir($law_path); } continue; } if ($law->status > Law::NOT_DOWNLOADED && (file_exists($card_path) && is_403(file_get_contents($card_path)) || !file_exists($card_path))) { $d_broken_card++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && !file_exists($text_path) && !file_exists($page_path)) { $d_no_files++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && (file_exists($text_path) || file_exists($page_path))) { if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) { $d_fake_content++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } } if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && (file_exists($text_path) || file_exists($page_path))) { if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) { $d_fake_content++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } } if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && file_exists($card_path)) { $html = file_get_contents($card_path); if (strpos($html, 'Текст відсутній') !== false) { $d_unknown_text_no_text++; if ($fix) { $law->update(['status' => Law::UP_TO_DATE, 'has_text' => Law::NO_TEXT]); } } else { $d_no_files++; if ($fix) { $law->update(['status' => Law::NOT_DOWNLOADED]); } } } if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && !file_exists($card_path)) { if ($fix) { $law->update(['status' => Law::NOT_DOWNLOADED]); } } print "\rChecked " . $i . ' of ' . $result_count . ' (' . floor($i / $result_count * 100) . '%)'; $i++; } }); print "\n" . 'Downloaded card : ' . $downloaded_card; print "\n" . 'Downloaded text : ' . $downloaded_text . ' (without text: ' . $without_text . ')'; print "\n" . 'Downloaded relations : ' . $downloaded_relations; print "\n" . 'Not downloaded : ' . $not_downloaded; print "\n" . '-------------------------------------------------'; print "\n" . 'Junk directories : ' . $nd_orphaned_dirs; print "\n" . 'Broken card page : ' . $d_broken_card; print "\n" . 'Missing files for downloads: ' . $d_no_files; print "\n" . 'Fake content for downloads : ' . $d_fake_content; print "\n" . 'Has text, but not marked : ' . $d_unknown_text_true_content; print "\n" . 'No text, but not marked : ' . $d_unknown_text_no_text; if ($fix) { print "\n" . 'ALL PROBLEMS FIXED'; } print "\n"; return true; }