/** * Validate download result. * * @param $path * @param $html * @param $status * @param $options * * @return bool * @throws Exceptions\DocumentCantBeDownloaded * @throws Exceptions\DocumentHasErrors * @throws Exceptions\DocumentIsMissing * @throws Exceptions\ProxyBanned * @throws Exceptions\UnknownProblem */ protected function validate($path, &$html, &$status, $options) { $opendata = isset($options['opendata']) && $options['opendata'] ? true : false; // access denied if ($status == 403 || $this->detectFakeContent($html, '403')) { $this->proxyManager->banProxy(); $this->download_error($path, $html, new Exceptions\ProxyBanned($this->proxyManager->getProxyIp())); } // document is missing or server might be down if (in_array($status, [204, 400, 404, 500, 502]) || $this->detectFakeContent($html, '404')) { $hasMoreIdentities = $this->identity->switchIdentity(); if ($hasMoreIdentities) { $status = $status != 200 ? $status : 204; return false; } else { $this->download_error($path, $html, new Exceptions\DocumentIsMissing()); } } // status is ok, but document load was not finished if (in_array($status, [206]) || strpos($html, '</body>') === false) { $status = 206; return false; } // status is ok, but document content has errors if ($errors = $this->detectFakeContent($html, 'error')) { $this->download_error($path, $html, new Exceptions\DocumentHasErrors($errors)); } // status is ok, but document JS protected if ($newUrl = $this->detectJSProtection($html)) { $result = $this->doDownload($newUrl, 10, $opendata); $html = $result['html']; $status = $result['status']; if ($this->detectJSProtection($html)) { $this->download_error($path, $html, new Exceptions\DocumentCantBeDownloaded('Strong JS protection.')); } // do a second validation run on fresh content. return $this->validate($path, $html, $status, $options); } if (!in_array($status, [200, 300, 301, 302, 303, 304, 307, 408])) { $this->download_error($path, $html, new Exceptions\UnknownProblem("Download status is {$status}.", $this->shortURL($options['url']), isset($html) ? $html : '{NO DATA}')); } return true; }