Example #1
0
 /**
  * @dataProvider revisions
  */
 public function testFormatRevisionComment($law_id, $law_type, $law_title, $date, $comment, $add_Links, $expected)
 {
     $law = Law::create(['id' => $law_id, 'title' => $law_title]);
     $law->setTypes([$law_type]);
     $revision = new Revision(['law_id' => $law_id, 'date' => $date, 'comment' => $comment]);
     $this->assertEquals($expected, $this->formatter->formatRevisionComment($revision, $add_Links));
 }
 /**
  * @param string $law_id
  * @param string $date
  * @param array  $options
  *
  * @return string
  * @throws Exceptions\RevisionDateNotFound
  * @throws Exceptions\WrongDateException
  */
 public function downloadRevision($law_id, $date, $options = [])
 {
     $opendata = false;
     $law = Law::find($law_id);
     $law_url = ($opendata ? '/go/' : '/laws/show/') . $law_id;
     $edition_part = '/ed' . date_format(date_create_from_format('Y-m-d', $date), 'Ymd');
     if ($law->active_revision == $date) {
         $url = $law_url;
         $options['save_as'] = '/laws/show/' . $law_id . $edition_part . '/page';
     } else {
         $url = $law_url . $edition_part;
         $options['save_as'] = '/laws/show/' . $law_id . $edition_part . '/page';
     }
     $options += ['law_id' => $law_id, 'date' => $date, 'law_url' => $law_url, 'edition_part' => $edition_part, 'opendata' => $opendata];
     return $this->download($url, $options);
 }
 public function formatLinks($text, Revision $revision)
 {
     $text = preg_replace_callback('%<a (?:class="(?:rvts96|rvts99)" )?href="(.*?)(?:/ed[0-9]+)?(?:/paran[0-9]+)?(?:#n[0-9]+)?"(?: target="_blank")?>(.*?)</a>%', function ($matches) use($revision) {
         $url = urldecode($matches[1]);
         $text = $matches[2];
         if (!$url || $url == '/laws/show/' . $revision->law_id) {
             return $text;
         } else {
             if (preg_match('%/laws/show/(.*?)(?:$|/ed|#|\\?)%', $url, $matches)) {
                 $law_id = $matches[1];
                 $law = Law::find($law_id);
                 if ($law) {
                     $issuers = $law->issuers()->get()->all();
                     $first_issuer = reset($issuers);
                     $url = '/' . $first_issuer->group_name . '/' . $first_issuer->name . '/' . $law_id . '.md';
                 }
             }
             return "[{$text}]({$url})";
         }
     }, $text);
     $text = preg_replace('%\\( \\[(.*?)\\]\\((.*?)\\) \\)%', "([\$1](\$2))", $text);
     return $text;
 }
Example #4
0
 public function testDownloadRevision()
 {
     $this->assertTrue(downloader()->isDownloaded('/laws/card/254к/96-вр'));
     $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page'));
     $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page2'));
     $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page3'));
     $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page4'));
     Law::firstOrCreate(['id' => '254к/96-вр']);
     $law = $this->obj->downloadCard('254к/96-вр');
     $revision = $this->obj->downloadRevision('254к/96-вр', '2014-05-15');
     $text = file_get_contents(base_path() . '/tests/fixtures/partials/254к/96-вр/text.txt');
     $this->assertEquals($revision->text, $text);
     $this->assertEquals($law->active_revision()->first()->text, $text);
     $this->assertEquals($revision->status, Revision::UP_TO_DATE);
     $this->assertTrue(downloader()->isDownloaded('/laws/card/2952-17'));
     $this->assertTrue(downloader()->isDownloaded('/laws/show/2952-17/ed20110201/page'));
     Law::firstOrCreate(['id' => '2952-17']);
     $law = $this->obj->downloadCard('2952-17');
     $revision = $this->obj->downloadRevision('2952-17', '2011-02-01');
     $text = file_get_contents(base_path() . '/tests/fixtures/partials/2952-17/text.txt');
     $this->assertEquals($revision->text, $text);
     $this->assertEquals($law->active_revision()->first()->text, $text);
     $this->assertEquals($revision->status, Revision::UP_TO_DATE);
 }
Example #5
0
 /**
  * Crawl the law list page. Take all law urls from it and add them to database.
  *
  * @param string $law_list_url Law list URL.
  * @param string $date
  * @param int $page_num
  * @param bool $re_download
  */
 public function discoverDailyLawListPage($law_list_url, $page_num, $date, $re_download = false)
 {
     $data = downloadList($law_list_url, ['re_download' => $page_num > 1 ? $re_download || $this->re_download : false, 'save' => $date != date('Y-m-d')]);
     foreach ($data['laws'] as $id => $law) {
         Law::firstOrCreate(['id' => $id])->update(['date' => $law['date']]);
         $this->jobsManager->add('command.lawgrabber.download', 'downloadCard', ['id' => $id], 'download', 1);
     }
 }
Example #6
0
 /**
  * Job failure callback.
  *
  * @param            $law_id
  * @param            $date
  * @param bool|false $re_download
  */
 public function downloadRevisionFail($law_id, $date, $re_download = false)
 {
     $law = Law::find($law_id);
     $revision = $law->getRevision($date);
     $revision->update(['status' => Revision::DOWNLOAD_ERROR]);
 }
Example #7
0
 public function getLawURL($law_id, $base = '', $raw = false)
 {
     $law = Law::find($law_id);
     if (!$law) {
         return rtrim($base, '/') . '/' . $law_id;
     }
     $issuers = $law->issuers()->get()->all();
     $first_issuer = reset($issuers);
     $prefix = $this->is_raw ? 'laws' : $first_issuer->group_name . '/' . $first_issuer->name;
     $filename = $prefix . '/' . $law_id . '.md';
     return rtrim($base, '/') . '/' . $filename;
 }
Example #8
0
<?php

use LawGrabber\Laws\Law;
use LawGrabber\Laws\Revision;
use LawPages\LawRenderer;
Route::get('/laws/show/{law_id}/ed{date}/raw', function ($law_id, $date) {
    $law = Law::findOrFail($law_id);
    $date = date_format(date_create_from_format('Ymd', $date), 'Y-m-d');
    $revision = Revision::find($law_id, $date);
    return view('lawpages::law_page')->with(['law' => $law, 'revision' => $revision, 'raw' => true]);
})->where(['law_id' => '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+', 'date' => '[0-9]+']);
Route::get('/laws/show/{law_id}/ed{date}', function ($law_id, $date) {
    $law = Law::findOrFail($law_id);
    $date = date_format(date_create_from_format('Ymd', $date), 'Y-m-d');
    $revision = Revision::find($law_id, $date);
    return view('lawpages::law_page')->with(['law' => $law, 'revision' => $revision]);
})->where(['law_id' => '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+', 'date' => '[0-9]+']);
Route::get('/laws/show/{law_id}/raw', function ($law_id) {
    $law = Law::findOrFail($law_id);
    return view('lawpages::law_page')->with('law', $law)->with('raw', true);
})->where('law_id', '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+');
Route::get('/laws/show/{law_id}', function ($law_id) {
    $law = Law::findOrFail($law_id);
    return view('lawpages::law_page')->with(['law' => $law]);
})->where(['law_id' => '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+']);
// TODO: Laws with includes.
// TODO: Laws without text, but with files.
// TODO: Laws with tables.
Example #9
0
 /**
  * Execute console command.
  */
 public function handle()
 {
     $fix = $this->option('fix');
     $old = $this->option('old_files');
     if ($old) {
         return $this->move_files();
     }
     $downloaded_card = Law::where('status', Law::UP_TO_DATE)->count();
     $downloaded_text = Law::where('status', Law::DOWNLOADED_REVISIONS)->count();
     $downloaded_relations = Law::where('status', Law::DOWNLOADED_RELATIONS)->count();
     $without_text = Law::where('status', '>', Law::NOT_DOWNLOADED)->where('has_text', Law::NO_TEXT)->count();
     $not_downloaded = Law::where('status', Law::NOT_DOWNLOADED)->count();
     $law_dir = $this->getDownloadsDir() . '/zakon.rada.gov.ua/laws/show/';
     function is_fake($html, $is_text = true)
     {
         return downloader()->detectFakeContent($html);
     }
     function is_403($html)
     {
         return downloader()->detectFakeContent($html, '403');
     }
     function remove_dir($dir)
     {
         exec('rm -rf ' . $dir);
     }
     $result_count = Law::where('status', '<', Law::SAVED)->count();
     $nd_orphaned_dirs = 0;
     $d_broken_card = 0;
     $d_no_files = 0;
     $d_fake_content = 0;
     $d_unknown_text_true_content = 0;
     $d_unknown_text_no_text = 0;
     $i = 1;
     Law::where('status', '<', Law::SAVED)->orderBy('id')->chunk(200, function ($laws) use($fix, $law_dir, &$i, &$result_count, &$nd_orphaned_dirs, &$d_broken_card, &$d_no_files, &$d_fake_content, &$d_unknown_text_true_content, &$d_unknown_text_no_text) {
         foreach ($laws as $law) {
             $law_path = $law_dir . $law->id;
             $card_path = $law_dir . $law->id . '/card.html';
             $text_path = $law_dir . $law->id . '/text.html';
             $page_path = $law_dir . $law->id . '/page.html';
             if ($law->status == Law::NOT_DOWNLOADED && is_dir($law_path)) {
                 $nd_orphaned_dirs++;
                 if ($fix) {
                     remove_dir($law_path);
                 }
                 continue;
             }
             if ($law->status > Law::NOT_DOWNLOADED && (file_exists($card_path) && is_403(file_get_contents($card_path)) || !file_exists($card_path))) {
                 $d_broken_card++;
                 if ($fix) {
                     remove_dir($law_path);
                     $law->update(['status' => Law::NOT_DOWNLOADED]);
                 }
             }
             if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && !file_exists($text_path) && !file_exists($page_path)) {
                 $d_no_files++;
                 if ($fix) {
                     remove_dir($law_path);
                     $law->update(['status' => Law::NOT_DOWNLOADED]);
                 }
             }
             if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && (file_exists($text_path) || file_exists($page_path))) {
                 if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) {
                     $d_fake_content++;
                     if ($fix) {
                         remove_dir($law_path);
                         $law->update(['status' => Law::NOT_DOWNLOADED]);
                     }
                 }
             }
             if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && (file_exists($text_path) || file_exists($page_path))) {
                 if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) {
                     $d_fake_content++;
                     if ($fix) {
                         remove_dir($law_path);
                         $law->update(['status' => Law::NOT_DOWNLOADED]);
                     }
                 }
             }
             if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && file_exists($card_path)) {
                 $html = file_get_contents($card_path);
                 if (strpos($html, 'Текст відсутній') !== false) {
                     $d_unknown_text_no_text++;
                     if ($fix) {
                         $law->update(['status' => Law::UP_TO_DATE, 'has_text' => Law::NO_TEXT]);
                     }
                 } else {
                     $d_no_files++;
                     if ($fix) {
                         $law->update(['status' => Law::NOT_DOWNLOADED]);
                     }
                 }
             }
             if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && !file_exists($card_path)) {
                 if ($fix) {
                     $law->update(['status' => Law::NOT_DOWNLOADED]);
                 }
             }
             print "\rChecked " . $i . ' of ' . $result_count . ' (' . floor($i / $result_count * 100) . '%)';
             $i++;
         }
     });
     print "\n" . 'Downloaded card      : ' . $downloaded_card;
     print "\n" . 'Downloaded text      : ' . $downloaded_text . ' (without text: ' . $without_text . ')';
     print "\n" . 'Downloaded relations : ' . $downloaded_relations;
     print "\n" . 'Not downloaded : ' . $not_downloaded;
     print "\n" . '-------------------------------------------------';
     print "\n" . 'Junk directories           : ' . $nd_orphaned_dirs;
     print "\n" . 'Broken card page           : ' . $d_broken_card;
     print "\n" . 'Missing files for downloads: ' . $d_no_files;
     print "\n" . 'Fake content for downloads : ' . $d_fake_content;
     print "\n" . 'Has text, but not marked   : ' . $d_unknown_text_true_content;
     print "\n" . 'No text, but not marked    : ' . $d_unknown_text_no_text;
     if ($fix) {
         print "\n" . 'ALL PROBLEMS FIXED';
     }
     print "\n";
     return true;
 }