/** * Execute console command. */ public function handle() { if ($this->option('single')) { $output = []; exec('pgrep -l -f "^php (.*?)artisan start"', $output); foreach ($output as $line) { $pid = preg_replace('|([0-9]+)(\\s.*)|u', '$1', $line); if ($pid != getmypid()) { exec("kill -9 {$pid}"); } } } if ($this->option('proxies')) { $this->workers = $this->option('proxies'); $this->proxyManager->useProxy(true); } if ($this->proxyManager->useProxy()) { $this->proxyManager->connect($this->workers, $this->option('kill_old_proxies')); } if ($job_id = $this->option('job')) { $job = Job::find($job_id); if ($job) { $job->execute(); } else { _log("Job {$job_id} is not found."); } return; } if (!$this->jobsManager->count()) { _log('No jobs found. Initializing a new discovery and download jobs.'); $this->discoverer->discoverNewLaws(); $this->downloader->downloadNewLaws(); } $this->jobsManager->launch($this->workers); }
/** * Execute console command. */ public function handle() { if ($this->option('jobs') || $this->option('all')) { $this->jobsManager->deleteAll(); } if ($this->option('proxies') || $this->option('all')) { $this->proxy->reset(); } return true; }
/** * Validate download result. * * @param $path * @param $html * @param $status * @param $options * * @return bool * @throws Exceptions\DocumentCantBeDownloaded * @throws Exceptions\DocumentHasErrors * @throws Exceptions\DocumentIsMissing * @throws Exceptions\ProxyBanned * @throws Exceptions\UnknownProblem */ protected function validate($path, &$html, &$status, $options) { $opendata = isset($options['opendata']) && $options['opendata'] ? true : false; // access denied if ($status == 403 || $this->detectFakeContent($html, '403')) { $this->proxyManager->banProxy(); $this->download_error($path, $html, new Exceptions\ProxyBanned($this->proxyManager->getProxyIp())); } // document is missing or server might be down if (in_array($status, [204, 400, 404, 500, 502]) || $this->detectFakeContent($html, '404')) { $hasMoreIdentities = $this->identity->switchIdentity(); if ($hasMoreIdentities) { $status = $status != 200 ? $status : 204; return false; } else { $this->download_error($path, $html, new Exceptions\DocumentIsMissing()); } } // status is ok, but document load was not finished if (in_array($status, [206]) || strpos($html, '</body>') === false) { $status = 206; return false; } // status is ok, but document content has errors if ($errors = $this->detectFakeContent($html, 'error')) { $this->download_error($path, $html, new Exceptions\DocumentHasErrors($errors)); } // status is ok, but document JS protected if ($newUrl = $this->detectJSProtection($html)) { $result = $this->doDownload($newUrl, 10, $opendata); $html = $result['html']; $status = $result['status']; if ($this->detectJSProtection($html)) { $this->download_error($path, $html, new Exceptions\DocumentCantBeDownloaded('Strong JS protection.')); } // do a second validation run on fresh content. return $this->validate($path, $html, $status, $options); } if (!in_array($status, [200, 300, 301, 302, 303, 304, 307, 408])) { $this->download_error($path, $html, new Exceptions\UnknownProblem("Download status is {$status}.", $this->shortURL($options['url']), isset($html) ? $html : '{NO DATA}')); } return true; }
public function realWorkersCount($workers_count) { return env('OFFLINE_PRIORITY', false) ? $workers_count : min($workers_count, $this->proxyManager->count()); }