/** * Download a page. * * @param string $url URL of the page. * @param array $options * - bool $re_download Whether or not to re-download the page if it's already in cache. * - bool $save Whether or not to cache a local copy of the page. * - string $save_as Alternative file name for the page. * download successful. * * @param callable $process_callback * * @return string * @throws Exceptions\DocumentCantBeDownloaded * @throws Exceptions\DocumentHasErrors * @throws Exceptions\DocumentIsMissing * @throws Exceptions\ProxyBanned * @throws Exceptions\RevisionDateNotFound * @throws Exceptions\UnknownProblem * @throws \Exception */ public function download($url, $options = [], callable $process_callback = null) { $default_options = ['url' => $url, 're_download' => false, 'save' => true, 'save_as' => null]; $options = array_merge($default_options, $options); $opendata = isset($options['opendata']) && $options['opendata'] ? true : false; $save_as = $options['save_as'] ? $options['save_as'] : null; $output = $this->shortURL($url) . ': '; if ($this->isDownloaded($save_as ?: $url) && !$options['re_download']) { $file_path = $this->URL2path($save_as ?: $url); $html = file_get_contents($file_path); $status = 200; try { if (!$this->validate($save_as ?: $url, $html, $status, $options)) { throw new \Exception('Can not validate saved file.'); } $output .= '* '; _log($output); $result = $this->doProcess($html, $status, $options, $process_callback); return $result; } catch (\Exception $e) { unlink($file_path); } } elseif (env('OFFLINE_PRIORITY', false)) { throw new Exceptions\OfflinePriority(); } try { $output = $this->proxyManager->getProxyAddress() . '/' . $this->proxyManager->getProxyIp() . ' → ' . $output . ' @'; $result = []; $attempts = 0; $status = 0; do { // log failed stages when loop restarts if ($result) { $output .= '-' . $status; } $attempts++; $result = $this->doDownload($url, 5, $opendata); $html = $result['html']; $status = $result['status']; if (!$this->validate($save_as ?: $url, $html, $status, $options)) { continue; } if ($options['save']) { $this->saveFile($save_as ?: $url, $html); } try { $result = $this->doProcess($html, $status, $options, $process_callback); } catch (Exceptions\ContentError $e) { throw $e; } catch (\Exception $e) { continue; } $output .= '-' . $status . '-OK'; return $result; } while ($attempts < 5); throw new Exceptions\DocumentCantBeDownloaded('Too many failed attempts (' . $attempts . ').'); } finally { $this->proxyManager->releaseProxy(); _log($output); } }