/** * @dataProvider revisions */ public function testFormatRevisionComment($law_id, $law_type, $law_title, $date, $comment, $add_Links, $expected) { $law = Law::create(['id' => $law_id, 'title' => $law_title]); $law->setTypes([$law_type]); $revision = new Revision(['law_id' => $law_id, 'date' => $date, 'comment' => $comment]); $this->assertEquals($expected, $this->formatter->formatRevisionComment($revision, $add_Links)); }
/** * @param string $law_id * @param string $date * @param array $options * * @return string * @throws Exceptions\RevisionDateNotFound * @throws Exceptions\WrongDateException */ public function downloadRevision($law_id, $date, $options = []) { $opendata = false; $law = Law::find($law_id); $law_url = ($opendata ? '/go/' : '/laws/show/') . $law_id; $edition_part = '/ed' . date_format(date_create_from_format('Y-m-d', $date), 'Ymd'); if ($law->active_revision == $date) { $url = $law_url; $options['save_as'] = '/laws/show/' . $law_id . $edition_part . '/page'; } else { $url = $law_url . $edition_part; $options['save_as'] = '/laws/show/' . $law_id . $edition_part . '/page'; } $options += ['law_id' => $law_id, 'date' => $date, 'law_url' => $law_url, 'edition_part' => $edition_part, 'opendata' => $opendata]; return $this->download($url, $options); }
public function formatLinks($text, Revision $revision) { $text = preg_replace_callback('%<a (?:class="(?:rvts96|rvts99)" )?href="(.*?)(?:/ed[0-9]+)?(?:/paran[0-9]+)?(?:#n[0-9]+)?"(?: target="_blank")?>(.*?)</a>%', function ($matches) use($revision) { $url = urldecode($matches[1]); $text = $matches[2]; if (!$url || $url == '/laws/show/' . $revision->law_id) { return $text; } else { if (preg_match('%/laws/show/(.*?)(?:$|/ed|#|\\?)%', $url, $matches)) { $law_id = $matches[1]; $law = Law::find($law_id); if ($law) { $issuers = $law->issuers()->get()->all(); $first_issuer = reset($issuers); $url = '/' . $first_issuer->group_name . '/' . $first_issuer->name . '/' . $law_id . '.md'; } } return "[{$text}]({$url})"; } }, $text); $text = preg_replace('%\\( \\[(.*?)\\]\\((.*?)\\) \\)%', "([\$1](\$2))", $text); return $text; }
public function testDownloadRevision() { $this->assertTrue(downloader()->isDownloaded('/laws/card/254к/96-вр')); $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page')); $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page2')); $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page3')); $this->assertTrue(downloader()->isDownloaded('/laws/show/254к/96-вр/ed20140515/page4')); Law::firstOrCreate(['id' => '254к/96-вр']); $law = $this->obj->downloadCard('254к/96-вр'); $revision = $this->obj->downloadRevision('254к/96-вр', '2014-05-15'); $text = file_get_contents(base_path() . '/tests/fixtures/partials/254к/96-вр/text.txt'); $this->assertEquals($revision->text, $text); $this->assertEquals($law->active_revision()->first()->text, $text); $this->assertEquals($revision->status, Revision::UP_TO_DATE); $this->assertTrue(downloader()->isDownloaded('/laws/card/2952-17')); $this->assertTrue(downloader()->isDownloaded('/laws/show/2952-17/ed20110201/page')); Law::firstOrCreate(['id' => '2952-17']); $law = $this->obj->downloadCard('2952-17'); $revision = $this->obj->downloadRevision('2952-17', '2011-02-01'); $text = file_get_contents(base_path() . '/tests/fixtures/partials/2952-17/text.txt'); $this->assertEquals($revision->text, $text); $this->assertEquals($law->active_revision()->first()->text, $text); $this->assertEquals($revision->status, Revision::UP_TO_DATE); }
/** * Crawl the law list page. Take all law urls from it and add them to database. * * @param string $law_list_url Law list URL. * @param string $date * @param int $page_num * @param bool $re_download */ public function discoverDailyLawListPage($law_list_url, $page_num, $date, $re_download = false) { $data = downloadList($law_list_url, ['re_download' => $page_num > 1 ? $re_download || $this->re_download : false, 'save' => $date != date('Y-m-d')]); foreach ($data['laws'] as $id => $law) { Law::firstOrCreate(['id' => $id])->update(['date' => $law['date']]); $this->jobsManager->add('command.lawgrabber.download', 'downloadCard', ['id' => $id], 'download', 1); } }
/** * Job failure callback. * * @param $law_id * @param $date * @param bool|false $re_download */ public function downloadRevisionFail($law_id, $date, $re_download = false) { $law = Law::find($law_id); $revision = $law->getRevision($date); $revision->update(['status' => Revision::DOWNLOAD_ERROR]); }
public function getLawURL($law_id, $base = '', $raw = false) { $law = Law::find($law_id); if (!$law) { return rtrim($base, '/') . '/' . $law_id; } $issuers = $law->issuers()->get()->all(); $first_issuer = reset($issuers); $prefix = $this->is_raw ? 'laws' : $first_issuer->group_name . '/' . $first_issuer->name; $filename = $prefix . '/' . $law_id . '.md'; return rtrim($base, '/') . '/' . $filename; }
<?php use LawGrabber\Laws\Law; use LawGrabber\Laws\Revision; use LawPages\LawRenderer; Route::get('/laws/show/{law_id}/ed{date}/raw', function ($law_id, $date) { $law = Law::findOrFail($law_id); $date = date_format(date_create_from_format('Ymd', $date), 'Y-m-d'); $revision = Revision::find($law_id, $date); return view('lawpages::law_page')->with(['law' => $law, 'revision' => $revision, 'raw' => true]); })->where(['law_id' => '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+', 'date' => '[0-9]+']); Route::get('/laws/show/{law_id}/ed{date}', function ($law_id, $date) { $law = Law::findOrFail($law_id); $date = date_format(date_create_from_format('Ymd', $date), 'Y-m-d'); $revision = Revision::find($law_id, $date); return view('lawpages::law_page')->with(['law' => $law, 'revision' => $revision]); })->where(['law_id' => '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+', 'date' => '[0-9]+']); Route::get('/laws/show/{law_id}/raw', function ($law_id) { $law = Law::findOrFail($law_id); return view('lawpages::law_page')->with('law', $law)->with('raw', true); })->where('law_id', '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+'); Route::get('/laws/show/{law_id}', function ($law_id) { $law = Law::findOrFail($law_id); return view('lawpages::law_page')->with(['law' => $law]); })->where(['law_id' => '[A-Za-z0-9_абвгґдеєжзиіїйклмнопрстуфхцчшщьюяАБВГҐДЕЄЖЗИІЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯыЫъЪ\\-\\/]+']); // TODO: Laws with includes. // TODO: Laws without text, but with files. // TODO: Laws with tables.
/** * Execute console command. */ public function handle() { $fix = $this->option('fix'); $old = $this->option('old_files'); if ($old) { return $this->move_files(); } $downloaded_card = Law::where('status', Law::UP_TO_DATE)->count(); $downloaded_text = Law::where('status', Law::DOWNLOADED_REVISIONS)->count(); $downloaded_relations = Law::where('status', Law::DOWNLOADED_RELATIONS)->count(); $without_text = Law::where('status', '>', Law::NOT_DOWNLOADED)->where('has_text', Law::NO_TEXT)->count(); $not_downloaded = Law::where('status', Law::NOT_DOWNLOADED)->count(); $law_dir = $this->getDownloadsDir() . '/zakon.rada.gov.ua/laws/show/'; function is_fake($html, $is_text = true) { return downloader()->detectFakeContent($html); } function is_403($html) { return downloader()->detectFakeContent($html, '403'); } function remove_dir($dir) { exec('rm -rf ' . $dir); } $result_count = Law::where('status', '<', Law::SAVED)->count(); $nd_orphaned_dirs = 0; $d_broken_card = 0; $d_no_files = 0; $d_fake_content = 0; $d_unknown_text_true_content = 0; $d_unknown_text_no_text = 0; $i = 1; Law::where('status', '<', Law::SAVED)->orderBy('id')->chunk(200, function ($laws) use($fix, $law_dir, &$i, &$result_count, &$nd_orphaned_dirs, &$d_broken_card, &$d_no_files, &$d_fake_content, &$d_unknown_text_true_content, &$d_unknown_text_no_text) { foreach ($laws as $law) { $law_path = $law_dir . $law->id; $card_path = $law_dir . $law->id . '/card.html'; $text_path = $law_dir . $law->id . '/text.html'; $page_path = $law_dir . $law->id . '/page.html'; if ($law->status == Law::NOT_DOWNLOADED && is_dir($law_path)) { $nd_orphaned_dirs++; if ($fix) { remove_dir($law_path); } continue; } if ($law->status > Law::NOT_DOWNLOADED && (file_exists($card_path) && is_403(file_get_contents($card_path)) || !file_exists($card_path))) { $d_broken_card++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && !file_exists($text_path) && !file_exists($page_path)) { $d_no_files++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::HAS_TEXT && (file_exists($text_path) || file_exists($page_path))) { if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) { $d_fake_content++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } } if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && (file_exists($text_path) || file_exists($page_path))) { if (file_exists($text_path) && is_fake(file_get_contents($text_path), 1) || file_exists($page_path) && is_fake(file_get_contents($page_path), 0)) { $d_fake_content++; if ($fix) { remove_dir($law_path); $law->update(['status' => Law::NOT_DOWNLOADED]); } } } if ($law->status >= Law::DOWNLOADED_REVISIONS && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && file_exists($card_path)) { $html = file_get_contents($card_path); if (strpos($html, 'Текст відсутній') !== false) { $d_unknown_text_no_text++; if ($fix) { $law->update(['status' => Law::UP_TO_DATE, 'has_text' => Law::NO_TEXT]); } } else { $d_no_files++; if ($fix) { $law->update(['status' => Law::NOT_DOWNLOADED]); } } } if ($law->status > Law::NOT_DOWNLOADED && $law->has_text == Law::UNKNOWN && !(file_exists($text_path) || file_exists($page_path)) && !file_exists($card_path)) { if ($fix) { $law->update(['status' => Law::NOT_DOWNLOADED]); } } print "\rChecked " . $i . ' of ' . $result_count . ' (' . floor($i / $result_count * 100) . '%)'; $i++; } }); print "\n" . 'Downloaded card : ' . $downloaded_card; print "\n" . 'Downloaded text : ' . $downloaded_text . ' (without text: ' . $without_text . ')'; print "\n" . 'Downloaded relations : ' . $downloaded_relations; print "\n" . 'Not downloaded : ' . $not_downloaded; print "\n" . '-------------------------------------------------'; print "\n" . 'Junk directories : ' . $nd_orphaned_dirs; print "\n" . 'Broken card page : ' . $d_broken_card; print "\n" . 'Missing files for downloads: ' . $d_no_files; print "\n" . 'Fake content for downloads : ' . $d_fake_content; print "\n" . 'Has text, but not marked : ' . $d_unknown_text_true_content; print "\n" . 'No text, but not marked : ' . $d_unknown_text_no_text; if ($fix) { print "\n" . 'ALL PROBLEMS FIXED'; } print "\n"; return true; }