private function fetchFile(KalturaBatchJob $job, KalturaImportJobData $data) { KalturaLog::debug("fetchFile({$job->id})"); $jobSubType = $job->jobSubType; if (in_array($jobSubType, array(kFileTransferMgrType::SCP, kFileTransferMgrType::SFTP))) { // use SSH file transfer manager for SFTP/SCP return $this->fetchFileSsh($job, $data); } try { $sourceUrl = $data->srcFileUrl; KalturaLog::debug("sourceUrl [{$sourceUrl}]"); $this->updateJob($job, 'Downloading file header', KalturaBatchJobStatus::QUEUED, 1); $fileSize = null; $resumeOffset = 0; if ($data->destFileLocalPath && file_exists($data->destFileLocalPath)) { $curlWrapper = new KCurlWrapper($sourceUrl, $this->taskConfig->params->curlVerbose); $useNoBody = $job->executionAttempts > 1; // if the process crashed first time, tries with no body instead of range 0-0 $curlHeaderResponse = $curlWrapper->getHeader($useNoBody); if (!$curlHeaderResponse || !count($curlHeaderResponse->headers)) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::FAILED); return $job; } if ($curlWrapper->getError()) { KalturaLog::err("Headers error: " . $curlWrapper->getError()); KalturaLog::err("Headers error number: " . $curlWrapper->getErrorNumber()); $curlWrapper->close(); $curlWrapper = new KCurlWrapper($sourceUrl, $this->taskConfig->params->curlVerbose); } if (!$curlHeaderResponse->isGoodCode()) { $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, "HTTP Error: " . $curlHeaderResponse->code . " " . $curlHeaderResponse->codeName, KalturaBatchJobStatus::FAILED); return $job; } if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } $curlWrapper->close(); if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize >= $fileSize) { return $this->moveFile($job, $data->destFileLocalPath, $fileSize); } else { $resumeOffset = $actualFileSize; } } } $curlWrapper = new KCurlWrapper($sourceUrl, $this->taskConfig->params->curlVerbose); $curlWrapper->setTimeout($this->taskConfig->params->curlTimeout); if ($resumeOffset) { $curlWrapper->setResumeOffset($resumeOffset); } else { // creates a temp file path $destFile = $this->getTempFilePath($sourceUrl); KalturaLog::debug("destFile [{$destFile}]"); $data->destFileLocalPath = $destFile; $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING, 2, $data); } KalturaLog::debug("Executing curl"); $res = $curlWrapper->exec($data->destFileLocalPath); KalturaLog::debug("Curl results: {$res}"); if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } else { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize == $resumeOffset) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } } } $curlWrapper->close(); if (!file_exists($data->destFileLocalPath)) { $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, "Error: output file doesn't exist", KalturaBatchJobStatus::RETRY); return $job; } // check the file size only if its first or second retry // in case it failed few times, taks the file as is if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize < $fileSize) { $percent = floor($actualFileSize * 100 / $fileSize); $this->updateJob($job, "Downloaded size: {$actualFileSize}({$percent}%)", KalturaBatchJobStatus::PROCESSING, $percent, $data); $this->kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); // $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_WRONG_SIZE, "Expected file size[$fileSize] actual file size[$actualFileSize]", KalturaBatchJobStatus::RETRY); return $job; } } $this->updateJob($job, 'File imported, copy to shared folder', KalturaBatchJobStatus::PROCESSED, 90); $job = $this->moveFile($job, $data->destFileLocalPath, $fileSize); } catch (Exception $ex) { $this->closeJob($job, KalturaBatchJobErrorTypes::RUNTIME, $ex->getCode(), "Error: " . $ex->getMessage(), KalturaBatchJobStatus::FAILED); } return $job; }
/** * Fetches the header for the given $url and closes the job on any errors * @param KalturaBatchJob $job * @param string $url * @return false|KCurlHeaderResponse */ private function fetchHeader(KalturaBatchJob &$job, $url) { KalturaLog::debug('Fetching header for [' . $url . ']'); $this->updateJob($job, 'Downloading header for [' . $url . ']', KalturaBatchJobStatus::PROCESSING); // fetch the http headers $curlWrapper = new KCurlWrapper($url); $curlHeaderResponse = $curlWrapper->getHeader(); $curlError = $curlWrapper->getError(); $curlWrapper->close(); if (!$curlHeaderResponse || $curlError) { // error fetching headers $msg = 'Error: ' . $curlWrapper->getError(); KalturaLog::err($msg); $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), $msg, KalturaBatchJobStatus::FAILED); return false; } if (!$curlHeaderResponse->isGoodCode()) { // some error exists in the response $msg = 'HTTP Error: ' . $curlHeaderResponse->code . ' ' . $curlHeaderResponse->codeName; KalturaLog::err($msg); $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, $msg, KalturaBatchJobStatus::FAILED); return false; } // header fetched successfully - return it return $curlHeaderResponse; }
public static function handleImportContent($curlInfo, $importData, $params) { if (!($curlInfo->headers['content-length'] < 16000 && $curlInfo->headers['content-type'] == 'text/html')) { return $importData; } KalturaLog::debug('content-length [' . $curlInfo->headers['content-length'] . '] content-type [' . $curlInfo->headers['content-type'] . ']'); KalturaLog::info('Handle Import data: Webex Plugin'); $matches = null; $recordId = null; if (isset($curlInfo->headers['set-cookie'])) { $recordId = $curlInfo->getCookieValue($curlInfo->headers['set-cookie'], 'recordId'); if ($recordId == null) { throw new Exception('recordId value not found'); } } else { throw new Exception('set-cookie was not found in header'); } $data = file_get_contents($importData->destFileLocalPath); KalturaLog::info("data:\n\n{$data}\n\n"); if (!preg_match("/href='([^']+)';/", $data, $matches)) { throw new Exception('Starting URL not found'); } $url2 = $matches[1]; $curlWrapper = new KCurlWrapper(); $curlWrapper->setOpt(CURLOPT_COOKIE, 'DetectionBrowserStatus=3|1|32|1|11|2;' . $curlInfo->headers["set-cookie"]); $result = $curlWrapper->exec($url2); KalturaLog::info("result:\n\n{$result}\n\n"); if (!preg_match("/var prepareTicket = '([^']+)';/", $result, $matches)) { throw new Exception('prepareTicket parameter not found'); } $prepareTicket = $matches[1]; if (!preg_match('/function (download\\(\\).+prepareTicket;)/s', $result, $matches)) { throw new Exception('download function not found'); } if (!preg_match('/http.+prepareTicket/', $matches[0], $matches)) { throw new Exception('prepareTicket URL not found'); } $url3 = $matches[0]; $url3 = str_replace(array('"', ' ', '+', 'recordId', 'prepareTicket=prepareTicket'), array('', '', '', $recordId, "prepareTicket={$prepareTicket}"), $url3); if (!preg_match("/var downloadUrl = '(http[^']+)' \\+ ticket;/", $result, $matches)) { throw new Exception('Download URL not found'); } $url4 = $matches[1]; $status = null; $iterations = isset($params->webex->iterations) && !is_null($params->webex->iterations) ? intval($params->webex->iterations) : 10; $sleep = isset($params->webex->sleep) && !is_null($params->webex->sleep) ? intval($params->webex->sleep) : 3; for ($i = 0; $i < $iterations; $i++) { $result = $curlWrapper->exec($url3); KalturaLog::info("result ({$i}):\n\n{$result}\n\n"); if (!preg_match("/window\\.parent\\.func_prepare\\('([^']+)','([^']*)','([^']*)'\\);/", $result, $matches)) { KalturaLog::err("Invalid result returned for prepareTicket request - should contain call to the func_prepare method\n {$result}"); throw new Exception('Invalid result: func_prepare function not found'); } $status = $matches[1]; if ($status == 'OKOK') { break; } sleep($sleep); } if ($status != 'OKOK') { KalturaLog::info("Invalid result returned for prepareTicket request. Last result:\n " . $result); throw new kTemporaryException('Invalid result returned for prepareTicket request'); } $ticket = $matches[3]; $url4 .= $ticket; $curlWrapper->setOpt(CURLOPT_RETURNTRANSFER, false); $fileName = pathinfo($importData->destFileLocalPath, PATHINFO_FILENAME); $destFileLocalPath = preg_replace("/{$fileName}\\.[\\w\\d]+/", "{$fileName}.arf", $importData->destFileLocalPath); $importData->destFileLocalPath = $destFileLocalPath; KalturaLog::info('destination: ' . $importData->destFileLocalPath); $result = $curlWrapper->exec($url4, $importData->destFileLocalPath); if (!$result) { $code = $curlWrapper->getErrorNumber(); $message = $curlWrapper->getError(); throw new Exception($message, $code); } $curlWrapper->close(); $importData->fileSize = kFile::fileSize($importData->destFileLocalPath); return $importData; }
private function fetchFile(KalturaBatchJob $job, KalturaImportJobData $data) { KalturaLog::debug("fetchFile({$job->id})"); try { $sourceUrl = $data->srcFileUrl; KalturaLog::debug("sourceUrl [{$sourceUrl}]"); $this->updateJob($job, 'Downloading file header', KalturaBatchJobStatus::QUEUED, 1); $curlWrapper = new KCurlWrapper($sourceUrl); $useNoBody = $job->executionAttempts > 1; // if the process crashed first time, tries with no body instead of range 0-0 $curlHeaderResponse = $curlWrapper->getHeader($useNoBody); if (!$curlHeaderResponse || $curlWrapper->getError()) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::FAILED); return $job; } if (!$curlHeaderResponse->isGoodCode()) { $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, "HTTP Error: " . $curlHeaderResponse->code . " " . $curlHeaderResponse->codeName, KalturaBatchJobStatus::FAILED); return $job; } $fileSize = null; if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } $curlWrapper->close(); $resumeOffset = 0; if ($fileSize && $data->destFileLocalPath && file_exists($data->destFileLocalPath)) { clearstatcache(); $actualFileSize = filesize($data->destFileLocalPath); if ($actualFileSize >= $fileSize) { return $this->moveFile($job, $data->destFileLocalPath, $fileSize); } else { $resumeOffset = $actualFileSize; } } $curlWrapper = new KCurlWrapper($sourceUrl); $curlWrapper->setTimeout($this->taskConfig->params->curlTimeout); if ($resumeOffset) { $curlWrapper->setResumeOffset($resumeOffset); } else { // creates a temp file path $rootPath = $this->taskConfig->params->localTempPath; $res = self::createDir($rootPath); if (!$res) { KalturaLog::err("Cannot continue import without temp directory"); die; } $uniqid = uniqid('import_'); $destFile = realpath($rootPath) . "/{$uniqid}"; KalturaLog::debug("destFile [{$destFile}]"); // in case the url has added arguments, remove them (and reveal the URL path) // in order to find the file extension $urlPathEndIndex = strpos($sourceUrl, "?"); if ($urlPathEndIndex !== false) { $sourceUrlPath = substr($sourceUrl, 0, $urlPathEndIndex); } else { $sourceUrlPath = $sourceUrl; } $ext = pathinfo($sourceUrlPath, PATHINFO_EXTENSION); if (strlen($ext)) { $destFile .= ".{$ext}"; } $data->destFileLocalPath = $destFile; $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING, 2, $data); } KalturaLog::debug("Executing curl"); $res = $curlWrapper->exec($data->destFileLocalPath); KalturaLog::debug("Curl results: {$res}"); if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } else { clearstatcache(); $actualFileSize = filesize($data->destFileLocalPath); if ($actualFileSize == $resumeOffset) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } } } $curlWrapper->close(); if (!file_exists($data->destFileLocalPath)) { $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, "Error: output file doesn't exist", KalturaBatchJobStatus::RETRY); return $job; } // check the file size only if its first or second retry // in case it failed few times, taks the file as is if ($fileSize) { clearstatcache(); $actualFileSize = filesize($data->destFileLocalPath); if ($actualFileSize < $fileSize) { $percent = floor($actualFileSize * 100 / $fileSize); $this->updateJob($job, "Downloaded size: {$actualFileSize}({$percent}%)", KalturaBatchJobStatus::PROCESSING, $percent, $data); $this->kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); // $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_WRONG_SIZE, "Expected file size[$fileSize] actual file size[$actualFileSize]", KalturaBatchJobStatus::RETRY); return $job; } } $this->updateJob($job, 'File imported, copy to shared folder', KalturaBatchJobStatus::PROCESSED, 90); $job = $this->moveFile($job, $data->destFileLocalPath, $fileSize); } catch (Exception $ex) { $this->closeJob($job, KalturaBatchJobErrorTypes::RUNTIME, $ex->getCode(), "Error: " . $ex->getMessage(), KalturaBatchJobStatus::FAILED); } return $job; }
private function fetchFile(KalturaBatchJob $job, KalturaImportJobData $data) { $jobSubType = $job->jobSubType; $sshProtocols = array(kFileTransferMgrType::SCP, kFileTransferMgrType::SFTP); if (in_array($jobSubType, $sshProtocols)) { // use SSH file transfer manager for SFTP/SCP return $this->fetchFileSsh($job, $data); } try { $sourceUrl = $data->srcFileUrl; $this->updateJob($job, 'Downloading file header', KalturaBatchJobStatus::QUEUED); $fileSize = null; $resumeOffset = 0; if ($data->destFileLocalPath && file_exists($data->destFileLocalPath)) { $curlWrapper = new KCurlWrapper(self::$taskConfig->params); $useNoBody = $job->executionAttempts > 1; // if the process crashed first time, tries with no body instead of range 0-0 $curlHeaderResponse = $curlWrapper->getHeader($sourceUrl, $useNoBody); if (!$curlHeaderResponse || !count($curlHeaderResponse->headers)) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $curlWrapper->getErrorNumber(), "Couldn't read file. Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::FAILED); return $job; } if ($curlWrapper->getError()) { KalturaLog::err("Headers error: " . $curlWrapper->getError()); KalturaLog::err("Headers error number: " . $curlWrapper->getErrorNumber()); $curlWrapper->close(); $curlWrapper = new KCurlWrapper(self::$taskConfig->params); } if (!$curlHeaderResponse->isGoodCode()) { $this->closeJob($job, KalturaBatchJobErrorTypes::HTTP, $curlHeaderResponse->code, "Failed while reading file. HTTP Error: " . $curlHeaderResponse->code . " " . $curlHeaderResponse->codeName, KalturaBatchJobStatus::FAILED); $curlWrapper->close(); return $job; } if (isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } $curlWrapper->close(); if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize >= $fileSize) { return $this->moveFile($job, $data->destFileLocalPath, $fileSize); } else { $resumeOffset = $actualFileSize; } } } $curlWrapper = new KCurlWrapper(self::$taskConfig->params); if (is_null($fileSize)) { // Read file size $curlHeaderResponse = $curlWrapper->getHeader($sourceUrl, true); if ($curlHeaderResponse && count($curlHeaderResponse->headers) && !$curlWrapper->getError() && isset($curlHeaderResponse->headers['content-length'])) { $fileSize = $curlHeaderResponse->headers['content-length']; } //Close the curl used to fetch the header and create a new one. //When fetching headers we set curl options that than are not reset once header is fetched. //Not all servers support all the options so we need to remove them from our headers. $curlWrapper->close(); $curlWrapper = new KCurlWrapper(self::$taskConfig->params); } if ($resumeOffset) { $curlWrapper->setResumeOffset($resumeOffset); } else { // creates a temp file path $destFile = $this->getTempFilePath($sourceUrl); KalturaLog::debug("destFile [{$destFile}]"); $data->destFileLocalPath = $destFile; $data->fileSize = is_null($fileSize) ? -1 : $fileSize; $this->updateJob($job, "Downloading file, size: {$fileSize}", KalturaBatchJobStatus::PROCESSING, $data); } $res = $curlWrapper->exec($sourceUrl, $data->destFileLocalPath); KalturaLog::debug("Curl results: {$res}"); if (!$res || $curlWrapper->getError()) { $errNumber = $curlWrapper->getErrorNumber(); if ($errNumber != CURLE_OPERATION_TIMEOUTED) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } else { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize == $resumeOffset) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "No new information. Error: " . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } if (!$fileSize) { $this->closeJob($job, KalturaBatchJobErrorTypes::CURL, $errNumber, "Received timeout, but no filesize available. Completed size [{$actualFileSize}]" . $curlWrapper->getError(), KalturaBatchJobStatus::RETRY); $curlWrapper->close(); return $job; } } } $curlWrapper->close(); if (!file_exists($data->destFileLocalPath)) { $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_DOESNT_EXIST, "Error: output file doesn't exist", KalturaBatchJobStatus::RETRY); return $job; } // check the file size only if its first or second retry // in case it failed few times, taks the file as is if ($fileSize) { clearstatcache(); $actualFileSize = kFile::fileSize($data->destFileLocalPath); if ($actualFileSize < $fileSize) { $percent = floor($actualFileSize * 100 / $fileSize); $this->updateJob($job, "Downloaded size: {$actualFileSize}({$percent}%)", KalturaBatchJobStatus::PROCESSING, $data); self::$kClient->batch->resetJobExecutionAttempts($job->id, $this->getExclusiveLockKey(), $job->jobType); // $this->closeJob($job, KalturaBatchJobErrorTypes::APP, KalturaBatchJobAppErrors::OUTPUT_FILE_WRONG_SIZE, "Expected file size[$fileSize] actual file size[$actualFileSize]", KalturaBatchJobStatus::RETRY); return $job; } KalturaLog::info("headers " . print_r($curlHeaderResponse, true)); $pluginInstances = KalturaPluginManager::getPluginInstances('IKalturaImportHandler'); foreach ($pluginInstances as $pluginInstance) { /* @var $pluginInstance IKalturaImportHandler */ $data = $pluginInstance->handleImportContent($curlHeaderResponse, $data, KBatchBase::$taskConfig->params); } } $this->updateJob($job, 'File imported, copy to shared folder', KalturaBatchJobStatus::PROCESSED); $job = $this->moveFile($job, $data->destFileLocalPath); } catch (kTemporaryException $tex) { $data->destFileLocalPath = KalturaClient::getKalturaNullValue(); $tex->setData($data); throw $tex; } catch (Exception $ex) { $data->destFileLocalPath = KalturaClient::getKalturaNullValue(); if ($ex->getMessage() == KCurlWrapper::COULD_NOT_CONNECT_TO_HOST_ERROR) { throw new kTemporaryException($ex->getMessage(), $ex->getCode(), $data); } $this->closeJob($job, KalturaBatchJobErrorTypes::RUNTIME, $ex->getCode(), "Error: " . $ex->getMessage(), KalturaBatchJobStatus::FAILED, $data); } return $job; }