Esempio n. 1
0
 /**
  * Validate download result.
  *
  * @param $path
  * @param $html
  * @param $status
  * @param $options
  *
  * @return bool
  * @throws Exceptions\DocumentCantBeDownloaded
  * @throws Exceptions\DocumentHasErrors
  * @throws Exceptions\DocumentIsMissing
  * @throws Exceptions\ProxyBanned
  * @throws Exceptions\UnknownProblem
  */
 protected function validate($path, &$html, &$status, $options)
 {
     $opendata = isset($options['opendata']) && $options['opendata'] ? true : false;
     // access denied
     if ($status == 403 || $this->detectFakeContent($html, '403')) {
         $this->proxyManager->banProxy();
         $this->download_error($path, $html, new Exceptions\ProxyBanned($this->proxyManager->getProxyIp()));
     }
     // document is missing or server might be down
     if (in_array($status, [204, 400, 404, 500, 502]) || $this->detectFakeContent($html, '404')) {
         $hasMoreIdentities = $this->identity->switchIdentity();
         if ($hasMoreIdentities) {
             $status = $status != 200 ? $status : 204;
             return false;
         } else {
             $this->download_error($path, $html, new Exceptions\DocumentIsMissing());
         }
     }
     // status is ok, but document load was not finished
     if (in_array($status, [206]) || strpos($html, '</body>') === false) {
         $status = 206;
         return false;
     }
     // status is ok, but document content has errors
     if ($errors = $this->detectFakeContent($html, 'error')) {
         $this->download_error($path, $html, new Exceptions\DocumentHasErrors($errors));
     }
     // status is ok, but document JS protected
     if ($newUrl = $this->detectJSProtection($html)) {
         $result = $this->doDownload($newUrl, 10, $opendata);
         $html = $result['html'];
         $status = $result['status'];
         if ($this->detectJSProtection($html)) {
             $this->download_error($path, $html, new Exceptions\DocumentCantBeDownloaded('Strong JS protection.'));
         }
         // do a second validation run on fresh content.
         return $this->validate($path, $html, $status, $options);
     }
     if (!in_array($status, [200, 300, 301, 302, 303, 304, 307, 408])) {
         $this->download_error($path, $html, new Exceptions\UnknownProblem("Download status is {$status}.", $this->shortURL($options['url']), isset($html) ? $html : '{NO DATA}'));
     }
     return true;
 }