Beispiel #1
1
 /**
  * Download a specific law's card page.
  *
  * @param string $id          Law ID.
  * @param bool   $re_download Whether or not to re-download card page.
  *
  * @return Law
  * @throws JobChangePriorityException
  * @throws Exceptions\ProxyBanned
  */
 function downloadCard($id, $re_download = false)
 {
     /**
      * @var $law Law
      */
     $law = Law::find($id);
     try {
         $card = downloadCard($id, ['re_download' => $re_download || $this->re_download, 'check_related' => $law->status == Law::NOT_DOWNLOADED && !max_date()]);
     } catch (Exceptions\ProxyBanned $e) {
         throw $e;
     } catch (\Exception $e) {
         $message = str_replace('ShvetsGroup\\Service\\Exceptions\\', '', get_class($e)) . ($e->getMessage() ? ': ' . $e->getMessage() : '');
         throw new JobChangePriorityException($message, -15);
     }
     DB::transaction(function () use($law, $card) {
         $law->card = $card['card'];
         $law->title = $card['title'];
         $law->date = $card['date'];
         $law->setIssuers($card['meta'][Issuer::FIELD_NAME]);
         $law->setTypes($card['meta'][Type::FIELD_NAME]);
         $law->state = isset($card['meta'][State::FIELD_NAME]) ? reset($card['meta'][State::FIELD_NAME]) : State::STATE_UNKNOWN;
         $law->has_text = $card['has_text'] ? $law->has_text = Law::HAS_TEXT : ($law->has_text = Law::NO_TEXT);
         $has_unknown_revision = false;
         foreach ($card['revisions'] as &$revision) {
             if ($revision['date'] == '??.??.????') {
                 $has_unknown_revision = true;
                 continue;
             }
             $data = ['law_id' => $revision['law_id'], 'date' => $revision['date'], 'comment' => $revision['comment']];
             if ($law->notHasText() || isset($revision['no_text']) && $revision['no_text'] && $revision['date'] != $card['active_revision']) {
                 $data['status'] = Revision::NO_TEXT;
                 $data['text'] = '';
             }
             $r = Revision::findROrNew($data['law_id'], $data['date']);
             $r->save();
             $r->update($data);
         }
         // We should update revision which has just come into power.
         if ($law->active_revision && $law->active_revision != $card['active_revision']) {
             Revision::find($data['law_id'], $card['active_revision'])->update(['status' => Revision::NEEDS_UPDATE]);
         }
         $law->active_revision = $card['active_revision'];
         foreach ($law->revisions()->where('status', Revision::NEEDS_UPDATE)->get() as $revision) {
             $this->jobsManager->add('command.lawgrabber.download', 'downloadRevision', ['law_id' => $revision->law_id, 'date' => $revision->date], 'download', $revision->date == $law->active_revision ? 0 : -1);
         }
         if (isset($card['changes_laws']) && $card['changes_laws']) {
             Law::where('id', array_column($card['changes_laws'], 'id'))->update(['status' => Law::DOWNLOADED_BUT_HAS_UNKNOWN_REVISION]);
             foreach ($card['changes_laws'] as $l) {
                 $this->jobsManager->add('command.lawgrabber.download', 'downloadCard', ['id' => $l['id'], 're_download' => true], 'download', 2);
             }
         }
         $law->card_updated = $card['timestamp'];
         $law->status = $has_unknown_revision ? Law::DOWNLOADED_BUT_HAS_UNKNOWN_REVISION : Law::UP_TO_DATE;
         $law->save();
     });
     return $law;
 }
Beispiel #2
0
 /**
  * Execute console command.
  */
 public function handle()
 {
     $fix = $this->option('fix');
     $old = $this->option('old_files');
     if ($old) {
         return $this->move_files();
     }
     $downloaded_card = Law::where('status', Law::UP_TO_DATE)->count();
     $downloaded_text = Law::where('status', Law::DOWNLOADED_REVISIONS)->count();
     $downloaded_relations = Law::where('status', Law::DOWNLOADED_RELATIONS)->count();
     $without_text = Law::where('status', '>', Law::NOT_DOWNLOADED)->where('has_text', Law::NO_TEXT)->count();
     $not_downloaded = Law::where('status', Law::NOT_DOWNLOADED)->count();
     $law_dir = $this->getDownloadsDir() . '/zakon.rada.gov.ua/laws/show/';
     function is_fake($html, $is_text = true)
     {
         return downloader()->detectFakeContent($html);
     }
     function is_403($html)
     {
         return downloader()->detectFakeContent($html, '403');
     }
     function remove_dir($dir)
     {
         exec('rm -rf ' . $dir);
     }
     $result_count = Law::where('status', '<', Law::SAVED)->count();
     $nd_orphaned_dirs = 0;
     $d_broken_card = 0;
     $d_no_files = 0;
     $d_fake_content = 0;
     $d_unknown_text_true_content = 0;
     $d_unknown_text_no_text = 0;
     $i = 1;
     Law::where('status', '<', Law::SAVED)->orderBy('id')->chunk(200, function ($laws) use($fix, $law_dir, &$i, &$result_count, &$nd_orphaned_dirs, &$d_broken_card, &$d_no_files, &$d_fake_content, &$d_unknown_text_true_content, &$d_unknown_text_no_text) {
         foreach ($laws as $law) {
             $law_path = $law_dir . $law->id;
             $card_path = $law_dir . $law->id . '/card.html';
             $text_path = $law_dir . $law->id . '/text.html';
             $page_path = $law_dir . $law->id . '/page.html';
             if ($law->status == Law::NOT_DOWNLOADED && is_dir($law_path)) {
                 $nd_orphaned_dirs++;
                 if ($fix) {
                     remove_dir($law_path);
                 }
                 continue;
             }
             if ($law->status > Law::NOT_DOWNLOADED && (file_exists($card_path) && is_403(file_get_contents($card_path)) || !file_exists($card_path))) {
                 $d_broken_card++;
                 if ($fix) {
                     remove_dir($law_path);
                     $law->update(['status' => Law::NOT_DOWNLOADED]);
                 }
             }
             if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && !file_exists($text_path) && !file_exists($page_path)) {
                 $d_no_files++;
                 if ($fix) {
                     remove_dir($law_path);
                     $law->update(['status' => Law::NOT_DOWNLOADED]);
                 }
             }
             if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && (file_exists($text_path) || file_exists($page_path))) {
                 if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) {
                     $d_fake_content++;
                     if ($fix) {
                         remove_dir($law_path);
                         $law->update(['status' => Law::NOT_DOWNLOADED]);
                     }
                 }
             }
             if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && (file_exists($text_path) || file_exists($page_path))) {
                 if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) {
                     $d_fake_content++;
                     if ($fix) {
                         remove_dir($law_path);
                         $law->update(['status' => Law::NOT_DOWNLOADED]);
                     }
                 }
             }
             if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && file_exists($card_path)) {
                 $html = file_get_contents($card_path);
                 if (strpos($html, 'Текст відсутній') !== false) {
                     $d_unknown_text_no_text++;
                     if ($fix) {
                         $law->update(['status' => Law::UP_TO_DATE, 'has_text' => Law::NO_TEXT]);
                     }
                 } else {
                     $d_no_files++;
                     if ($fix) {
                         $law->update(['status' => Law::NOT_DOWNLOADED]);
                     }
                 }
             }
             if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && !file_exists($card_path)) {
                 if ($fix) {
                     $law->update(['status' => Law::NOT_DOWNLOADED]);
                 }
             }
             print "\rChecked " . $i . ' of ' . $result_count . ' (' . floor($i / $result_count * 100) . '%)';
             $i++;
         }
     });
     print "\n" . 'Downloaded card      : ' . $downloaded_card;
     print "\n" . 'Downloaded text      : ' . $downloaded_text . ' (without text: ' . $without_text . ')';
     print "\n" . 'Downloaded relations : ' . $downloaded_relations;
     print "\n" . 'Not downloaded : ' . $not_downloaded;
     print "\n" . '-------------------------------------------------';
     print "\n" . 'Junk directories           : ' . $nd_orphaned_dirs;
     print "\n" . 'Broken card page           : ' . $d_broken_card;
     print "\n" . 'Missing files for downloads: ' . $d_no_files;
     print "\n" . 'Fake content for downloads : ' . $d_fake_content;
     print "\n" . 'Has text, but not marked   : ' . $d_unknown_text_true_content;
     print "\n" . 'No text, but not marked    : ' . $d_unknown_text_no_text;
     if ($fix) {
         print "\n" . 'ALL PROBLEMS FIXED';
     }
     print "\n";
     return true;
 }