private function fetchFile(KalturaBatchJob $job, KalturaImportJobData $data) { KalturaLog::debug("fetchFile({$job->id})"); $jobSubType = $job->jobSubType; if (in_array($jobSubType, array(kFileTransferMgrType::SCP, kFileTransferMgrType::SFTP))) { // use SSH file transfer manager for SFTP/SCP return $this->fetchFileSsh($job, $data); } try { $sourceUrl = $data->srcFileUrl; KalturaLog::debug("sourceUrl [{$sourceUrl}]"); $this->updateJob($job, 'Downloading file header', KalturaBatchJobStatus::QUEUED, 1); $fileSize = null; $resumeOffset = 0; if ($data->destFileLocalPath && file_exists($data->destFileLocalPath)) { $curlWrapper = new KCurlWrapper($sourceUrl, $this->taskConfig->params->curlVerbose); $useNoBody = $job->executionAttempts > 1; // if the process crashed first time, tries with no body instead of range 0-0 $curlHeaderResponse = $curlWrapper->getHeader($useNoBody); if (!$curlHeaderResponse || !count($curlHeaderResponse->headers)) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::FAILED); return $job; } if ($curlWrapper->getError()) { KalturaLog::err("Headers error: " . $curlWrapper->getError()); KalturaLog::err("Headers error number: " . $curlWrapper->getErrorNumber()); $curlWrapper->close(); $curlWrapper = new KCurlWrapper($sourceUrl, $this->taskConfig->params->curlVerbose); } if (!$curlHeaderResponse->isGoodCode()) { $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, "HTTP Error: " . $curlHeaderResponse->code . " " . $curlHeaderResponse->codeName, KalturaBatchJobStatus::FAILED); return $job; } if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } $curlWrapper->close(); if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize >= $fileSize) { return $this->moveFile($job, $data->destFileLocalPath, $fileSize); } else { $resumeOffset = $actualFileSize; } } } $curlWrapper = new KCurlWrapper($sourceUrl, $this->taskConfig->params->curlVerbose); $curlWrapper->setTimeout($this->taskConfig->params->curlTimeout); if ($resumeOffset) { $curlWrapper->setResumeOffset($resumeOffset); } else { // creates a temp file path $destFile = $this->getTempFilePath($sourceUrl); KalturaLog::debug("destFile [{$destFile}]"); $data->destFileLocalPath = $destFile; $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING, 2, $data); } KalturaLog::debug("Executing curl"); $res = $curlWrapper->exec($data->destFileLocalPath); KalturaLog::debug("Curl results: {$res}"); if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } else { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize == $resumeOffset) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } } } $curlWrapper->close(); if (!file_exists($data->destFileLocalPath)) { $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, "Error: output file doesn't exist", KalturaBatchJobStatus::RETRY); return $job; } // check the file size only if its first or second retry // in case it failed few times, taks the file as is if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize < $fileSize) { $percent = floor($actualFileSize * 100 / $fileSize); $this->updateJob($job, "Downloaded size: {$actualFileSize}({$percent}%)", KalturaBatchJobStatus::PROCESSING, $percent, $data); $this->kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); // $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_WRONG_SIZE, "Expected file size[$fileSize] actual file size[$actualFileSize]", KalturaBatchJobStatus::RETRY); return $job; } } $this->updateJob($job, 'File imported, copy to shared folder', KalturaBatchJobStatus::PROCESSED, 90); $job = $this->moveFile($job, $data->destFileLocalPath, $fileSize); } catch (Exception $ex) { $this->closeJob($job, KalturaBatchJobErrorTypes::RUNTIME, $ex->getCode(), "Error: " . $ex->getMessage(), KalturaBatchJobStatus::FAILED); } return $job; }
private function fetchFile(KalturaBatchJob $job, KalturaImportJobData $data) { KalturaLog::debug("fetchFile({$job->id})"); try { $sourceUrl = $data->srcFileUrl; KalturaLog::debug("sourceUrl [{$sourceUrl}]"); $this->updateJob($job, 'Downloading file header', KalturaBatchJobStatus::QUEUED, 1); $curlWrapper = new KCurlWrapper($sourceUrl); $useNoBody = $job->executionAttempts > 1; // if the process crashed first time, tries with no body instead of range 0-0 $curlHeaderResponse = $curlWrapper->getHeader($useNoBody); if (!$curlHeaderResponse || $curlWrapper->getError()) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::FAILED); return $job; } if (!$curlHeaderResponse->isGoodCode()) { $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, "HTTP Error: " . $curlHeaderResponse->code . " " . $curlHeaderResponse->codeName, KalturaBatchJobStatus::FAILED); return $job; } $fileSize = null; if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } $curlWrapper->close(); $resumeOffset = 0; if ($fileSize && $data->destFileLocalPath && file_exists($data->destFileLocalPath)) { clearstatcache(); $actualFileSize = filesize($data->destFileLocalPath); if ($actualFileSize >= $fileSize) { return $this->moveFile($job, $data->destFileLocalPath, $fileSize); } else { $resumeOffset = $actualFileSize; } } $curlWrapper = new KCurlWrapper($sourceUrl); $curlWrapper->setTimeout($this->taskConfig->params->curlTimeout); if ($resumeOffset) { $curlWrapper->setResumeOffset($resumeOffset); } else { // creates a temp file path $rootPath = $this->taskConfig->params->localTempPath; $res = self::createDir($rootPath); if (!$res) { KalturaLog::err("Cannot continue import without temp directory"); die; } $uniqid = uniqid('import_'); $destFile = realpath($rootPath) . "/{$uniqid}"; KalturaLog::debug("destFile [{$destFile}]"); // in case the url has added arguments, remove them (and reveal the URL path) // in order to find the file extension $urlPathEndIndex = strpos($sourceUrl, "?"); if ($urlPathEndIndex !== false) { $sourceUrlPath = substr($sourceUrl, 0, $urlPathEndIndex); } else { $sourceUrlPath = $sourceUrl; } $ext = pathinfo($sourceUrlPath, PATHINFO_EXTENSION); if (strlen($ext)) { $destFile .= ".{$ext}"; } $data->destFileLocalPath = $destFile; $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING, 2, $data); } KalturaLog::debug("Executing curl"); $res = $curlWrapper->exec($data->destFileLocalPath); KalturaLog::debug("Curl results: {$res}"); if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } else { clearstatcache(); $actualFileSize = filesize($data->destFileLocalPath); if ($actualFileSize == $resumeOffset) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } } } $curlWrapper->close(); if (!file_exists($data->destFileLocalPath)) { $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, "Error: output file doesn't exist", KalturaBatchJobStatus::RETRY); return $job; } // check the file size only if its first or second retry // in case it failed few times, taks the file as is if ($fileSize) { clearstatcache(); $actualFileSize = filesize($data->destFileLocalPath); if ($actualFileSize < $fileSize) { $percent = floor($actualFileSize * 100 / $fileSize); $this->updateJob($job, "Downloaded size: {$actualFileSize}({$percent}%)", KalturaBatchJobStatus::PROCESSING, $percent, $data); $this->kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); // $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_WRONG_SIZE, "Expected file size[$fileSize] actual file size[$actualFileSize]", KalturaBatchJobStatus::RETRY); return $job; } } $this->updateJob($job, 'File imported, copy to shared folder', KalturaBatchJobStatus::PROCESSED, 90); $job = $this->moveFile($job, $data->destFileLocalPath, $fileSize); } catch (Exception $ex) { $this->closeJob($job, KalturaBatchJobErrorTypes::RUNTIME, $ex->getCode(), "Error: " . $ex->getMessage(), KalturaBatchJobStatus::FAILED); } return $job; }
/** * Download a file from $sourceUrl to $fileDestination * @param KalturaBatchJob $job * @param string $sourceUrl * @param string $fileDestination * @param KCurlHeaderResponse $curlHeaderResponse header fetched for the $sourceUrl */ private function fetchFile(KalturaBatchJob &$job, $sourceUrl, $fileDestination, $curlHeaderResponse = null, $fileSize = null) { KalturaLog::debug('fetchFile - job id [' . $job->id . '], source url [' . $sourceUrl . '], destination [' . $fileDestination . ']'); if (!$fileSize) { // fetch header if not given if (!$curlHeaderResponse) { $curlHeaderResponse = $this->fetchHeader($job, $sourceUrl); if (!$curlHeaderResponse) { return false; // job already closed with an error } } // try to get file size from headers $fileSize = null; if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } } // if file already exists - check if we can start from specific offset on exising partial content $resumeOffset = 0; if ($fileSize && $fileDestination && file_exists($fileDestination)) { clearstatcache(); $actualFileSize = filesize($fileDestination); if ($actualFileSize >= $fileSize) { // file download finished ? KalturaLog::debug('File exists with size [' . $actualFileSize . '] - checking if finished...'); return $this->checkFile($job, $fileDestination, $fileSize); } else { // will resume from the current offset KalturaLog::debug('File partialy exists - resume offset set to [' . $actualFileSize . ']'); $resumeOffset = $actualFileSize; } } // get http body $curlWrapper = new KCurlWrapper($sourceUrl); $curlWrapper->setTimeout($this->taskConfig->params->curlTimeout); if ($resumeOffset) { // will resume from the current offset $curlWrapper->setResumeOffset($resumeOffset); } else { // create destination directory if doesn't already exist $res = self::createDir(dirname($fileDestination)); if (!$res) { $msg = 'Error: Cannot create destination directory [' . dirname($fileDestination) . ']'; KalturaLog::err($msg); $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::CANNOT_CREATE_DIRECTORY, $msg, KalturaBatchJobStatus::RETRY); return false; } // about to start downloading $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING); } KalturaLog::debug("Executing curl for downloading file at [{$sourceUrl}]"); $res = $curlWrapper->exec($fileDestination); // download file KalturaLog::debug("Curl results: {$res}"); // handle errors if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { // an error other than timeout occured - cannot continue (timeout is handled with resuming) $msg = 'Error: ' . $curlWrapper->getError(); KalturaLog::err($msg); $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, $msg, KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return false; } else { // timeout error occured KalturaLog::debug('Timeout occured'); clearstatcache(); $actualFileSize = filesize($fileDestination); if ($actualFileSize == $resumeOffset) { // no downloading was done at all - error $msg = 'Error: ' . $curlWrapper->getError(); KalturaLog::err($msg); $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, $msg, KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return false; } } } $curlWrapper->close(); if (!file_exists($fileDestination)) { // destination file does not exist for an unknown reason $msg = "Error: output file doesn't exist"; KalturaLog::err($msg); $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, $msg, KalturaBatchJobStatus::RETRY); return false; } if ($fileSize) { clearstatcache(); $actualFileSize = filesize($fileDestination); if ($actualFileSize < $fileSize) { // part of file was downloaded - will resume in next run KalturaLog::debug('File partialy downloaded - will resumt in next run'); $this->updateJob($job, "Downloaded size: {$actualFileSize}", KalturaBatchJobStatus::PROCESSING); $this->kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); return false; } } KalturaLog::debug('File downloaded completely - will now check if done...'); $this->updateJob($job, 'File downloaded', KalturaBatchJobStatus::PROCESSING); // file downloaded completely - check it return $this->checkFile($job, $fileDestination, $fileSize); }
private function fetchFile(KalturaBatchJob $job, KalturaImportJobData $data) { $jobSubType = $job->jobSubType; $sshProtocols = array(kFileTransferMgrType::SCP, kFileTransferMgrType::SFTP); if (in_array($jobSubType, $sshProtocols)) { // use SSH file transfer manager for SFTP/SCP return $this->fetchFileSsh($job, $data); } try { $sourceUrl = $data->srcFileUrl; $this->updateJob($job, 'Downloading file header', KalturaBatchJobStatus::QUEUED); $fileSize = null; $resumeOffset = 0; if ($data->destFileLocalPath && file_exists($data->destFileLocalPath)) { $curlWrapper = new KCurlWrapper(self::$taskConfig->params); $useNoBody = $job->executionAttempts > 1; // if the process crashed first time, tries with no body instead of range 0-0 $curlHeaderResponse = $curlWrapper->getHeader($sourceUrl, $useNoBody); if (!$curlHeaderResponse || !count($curlHeaderResponse->headers)) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), "Couldn't read file. Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::FAILED); return $job; } if ($curlWrapper->getError()) { KalturaLog::err("Headers error: " . $curlWrapper->getError()); KalturaLog::err("Headers error number: " . $curlWrapper->getErrorNumber()); $curlWrapper->close(); $curlWrapper = new KCurlWrapper(self::$taskConfig->params); } if (!$curlHeaderResponse->isGoodCode()) { $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, "Failed while reading file. HTTP Error: " . $curlHeaderResponse->code . " " . $curlHeaderResponse->codeName, KalturaBatchJobStatus::FAILED); $curlWrapper->close(); return $job; } if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } $curlWrapper->close(); if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize >= $fileSize) { return $this->moveFile($job, $data->destFileLocalPath, $fileSize); } else { $resumeOffset = $actualFileSize; } } } $curlWrapper = new KCurlWrapper(self::$taskConfig->params); if (is_null($fileSize)) { // Read file size $curlHeaderResponse = $curlWrapper->getHeader($sourceUrl, true); if ($curlHeaderResponse && count($curlHeaderResponse->headers) && !$curlWrapper->getError() && isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } //Close the curl used to fetch the header and create a new one. //When fetching headers we set curl options that than are not reset once header is fetched. //Not all servers support all the options so we need to remove them from our headers. $curlWrapper->close(); $curlWrapper = new KCurlWrapper(self::$taskConfig->params); } if ($resumeOffset) { $curlWrapper->setResumeOffset($resumeOffset); } else { // creates a temp file path $destFile = $this->getTempFilePath($sourceUrl); KalturaLog::debug("destFile [{$destFile}]"); $data->destFileLocalPath = $destFile; $data->fileSize = is_null($fileSize) ? -1 : $fileSize; $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING, $data); } $res = $curlWrapper->exec($sourceUrl, $data->destFileLocalPath); KalturaLog::debug("Curl results: {$res}"); if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } else { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize == $resumeOffset) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "No new information. Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } if (!$fileSize) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Received timeout, but no filesize available. Completed size [{$actualFileSize}]" . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } } } $curlWrapper->close(); if (!file_exists($data->destFileLocalPath)) { $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, "Error: output file doesn't exist", KalturaBatchJobStatus::RETRY); return $job; } // check the file size only if its first or second retry // in case it failed few times, taks the file as is if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize < $fileSize) { $percent = floor($actualFileSize * 100 / $fileSize); $this->updateJob($job, "Downloaded size: {$actualFileSize}({$percent}%)", KalturaBatchJobStatus::PROCESSING, $data); self::$kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); // $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_WRONG_SIZE, "Expected file size[$fileSize] actual file size[$actualFileSize]", KalturaBatchJobStatus::RETRY); return $job; } KalturaLog::info("headers " . print_r($curlHeaderResponse, true)); $pluginInstances = KalturaPluginManager::getPluginInstances('IKalturaImportHandler'); foreach ($pluginInstances as $pluginInstance) { /* @var $pluginInstance IKalturaImportHandler */ $data = $pluginInstance->handleImportContent($curlHeaderResponse, $data, KBatchBase::$taskConfig->params); } } $this->updateJob($job, 'File imported, copy to shared folder', KalturaBatchJobStatus::PROCESSED); $job = $this->moveFile($job, $data->destFileLocalPath); } catch (kTemporaryException $tex) { $data->destFileLocalPath = KalturaClient::getKalturaNullValue(); $tex->setData($data); throw $tex; } catch (Exception $ex) { $data->destFileLocalPath = KalturaClient::getKalturaNullValue(); if ($ex->getMessage() == KCurlWrapper::COULD_NOT_CONNECT_TO_HOST_ERROR) { throw new kTemporaryException($ex->getMessage(), $ex->getCode(), $data); } $this->closeJob($job, KalturaBatchJobErrorTypes::RUNTIME, $ex->getCode(), "Error: " . $ex->getMessage(), KalturaBatchJobStatus::FAILED, $data); } return $job; }