/** * get Content of DOC file * * @param string $file * @return string The extracted content of the file */ public function getContent($file) { // create the tempfile which will contain the content if (TYPO3_VERSION_INTEGER >= 7000000) { $tempFileName = TYPO3\CMS\Core\Utility\GeneralUtility::tempnam('doc_files-Indexer'); } else { $tempFileName = t3lib_div::tempnam('doc_files-Indexer'); } // Delete if exists, just to be safe. @unlink($tempFileName); // generate and execute the pdftotext commandline tool $cmd = $this->app['catdoc'] . ' -s8859-1 -dutf-8 ' . escapeshellarg($file) . ' > ' . escapeshellarg($tempFileName); if (TYPO3_VERSION_INTEGER >= 7000000) { TYPO3\CMS\Core\Utility\CommandUtility::exec($cmd); } else { t3lib_utility_Command::exec($cmd); } // check if the tempFile was successfully created if (@is_file($tempFileName)) { if (TYPO3_VERSION_INTEGER >= 7000000) { $content = TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($tempFileName); } else { $content = t3lib_div::getUrl($tempFileName); } unlink($tempFileName); } else { return false; } // check if content was found if (strlen($content)) { return $content; } else { return false; } }
/** * get Content of DOC file * * @param string $file * @return string The extracted content of the file */ public function getContent($file) { // create the tempfile which will contain the content $tempFileName = TYPO3\CMS\Core\Utility\GeneralUtility::tempnam('xls_files-Indexer'); // Delete if exists, just to be safe. @unlink($tempFileName); // generate and execute the pdftotext commandline tool $cmd = $this->app['xls2csv'] . ' -c \' \' -q 0 -s8859-1 -dutf-8 ' . escapeshellarg($file) . ' > ' . escapeshellarg($tempFileName); TYPO3\CMS\Core\Utility\CommandUtility::exec($cmd); // check if the tempFile was successfully created if (@is_file($tempFileName)) { $content = TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($tempFileName); unlink($tempFileName); } else { return false; } // check if content was found if (strlen($content)) { return $content; } else { return false; } }
/** * get Content of PDF file * * @param string $file * @return string The extracted content of the file */ public function getContent($file) { $this->fileInfo = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_kesearch_lib_fileinfo'); $this->fileInfo->setFile($file); // get PDF informations if (!($pdfInfo = $this->getPdfInfo($file))) { return false; } // proceed only of there are any pages found if (intval($pdfInfo['pages']) && $this->isAppArraySet) { // create the tempfile which will contain the content $tempFileName = TYPO3\CMS\Core\Utility\GeneralUtility::tempnam('pdf_files-Indexer'); // Delete if exists, just to be safe. @unlink($tempFileName); // generate and execute the pdftotext commandline tool $cmd = $this->app['pdftotext'] . ' -enc UTF-8 -q ' . escapeshellarg($file) . ' ' . escapeshellarg($tempFileName); TYPO3\CMS\Core\Utility\CommandUtility::exec($cmd); // check if the tempFile was successfully created if (@is_file($tempFileName)) { $content = TYPO3\CMS\Core\Utility\GeneralUtility::getUrl($tempFileName); unlink($tempFileName); } else { $this->addError('Content for file ' . $file . ' could not be extracted. Maybe it is encrypted?'); // return empty string if no content was found $content = ''; } return $this->removeEndJunk($content); } else { return false; } }