Esempio n. 1
0
 /**
  * Download a page.
  *
  * @param string   $url URL of the page.
  * @param array    $options
  *                      - bool $re_download Whether or not to re-download the page if it's already in cache.
  *                      - bool $save Whether or not to cache a local copy of the page.
  *                      - string $save_as Alternative file name for the page.
  *                      download successful.
  *
  * @param callable $process_callback
  *
  * @return string
  * @throws Exceptions\DocumentCantBeDownloaded
  * @throws Exceptions\DocumentHasErrors
  * @throws Exceptions\DocumentIsMissing
  * @throws Exceptions\ProxyBanned
  * @throws Exceptions\RevisionDateNotFound
  * @throws Exceptions\UnknownProblem
  * @throws \Exception
  */
 public function download($url, $options = [], callable $process_callback = null)
 {
     $default_options = ['url' => $url, 're_download' => false, 'save' => true, 'save_as' => null];
     $options = array_merge($default_options, $options);
     $opendata = isset($options['opendata']) && $options['opendata'] ? true : false;
     $save_as = $options['save_as'] ? $options['save_as'] : null;
     $output = $this->shortURL($url) . ': ';
     if ($this->isDownloaded($save_as ?: $url) && !$options['re_download']) {
         $file_path = $this->URL2path($save_as ?: $url);
         $html = file_get_contents($file_path);
         $status = 200;
         try {
             if (!$this->validate($save_as ?: $url, $html, $status, $options)) {
                 throw new \Exception('Can not validate saved file.');
             }
             $output .= '* ';
             _log($output);
             $result = $this->doProcess($html, $status, $options, $process_callback);
             return $result;
         } catch (\Exception $e) {
             unlink($file_path);
         }
     } elseif (env('OFFLINE_PRIORITY', false)) {
         throw new Exceptions\OfflinePriority();
     }
     try {
         $output = $this->proxyManager->getProxyAddress() . '/' . $this->proxyManager->getProxyIp() . ' → ' . $output . ' @';
         $result = [];
         $attempts = 0;
         $status = 0;
         do {
             // log failed stages when loop restarts
             if ($result) {
                 $output .= '-' . $status;
             }
             $attempts++;
             $result = $this->doDownload($url, 5, $opendata);
             $html = $result['html'];
             $status = $result['status'];
             if (!$this->validate($save_as ?: $url, $html, $status, $options)) {
                 continue;
             }
             if ($options['save']) {
                 $this->saveFile($save_as ?: $url, $html);
             }
             try {
                 $result = $this->doProcess($html, $status, $options, $process_callback);
             } catch (Exceptions\ContentError $e) {
                 throw $e;
             } catch (\Exception $e) {
                 continue;
             }
             $output .= '-' . $status . '-OK';
             return $result;
         } while ($attempts < 5);
         throw new Exceptions\DocumentCantBeDownloaded('Too many failed attempts (' . $attempts . ').');
     } finally {
         $this->proxyManager->releaseProxy();
         _log($output);
     }
 }