/** * Extracts text from a file using Apache Tika * * @param FileInterface $file * @return string Text extracted from the input file */ public function extractText(FileInterface $file) { $extractedContent = NULL; $tika = ServiceFactory::getTika($this->configuration['extractor']); $extractedContent = $tika->extractText($file); return $extractedContent; }
/** * Extracts meta data from a file using Apache Tika * * @param File $file * @param array $previousExtractedData Already extracted/existing data * @return array */ public function extractMetaData(File $file, array $previousExtractedData = array()) { $metaData = array(); $tika = ServiceFactory::getTika($this->configuration['extractor']); $metaData['language'] = $tika->detectLanguageFromFile($file); return $metaData; }
/** * Extracts meta data from a file using Apache Tika * * @param File $file * @param array $previousExtractedData Already extracted/existing data * @return array */ public function extractMetaData(File $file, array $previousExtractedData = array()) { $metaData = NULL; $tikaService = ServiceFactory::getTika($this->configuration['extractor']); $extractedMetaData = $tikaService->extractMetaData($file); $metaData = $this->normalizeMetaData($extractedMetaData); return $metaData; }
/** * @test */ public function getTikaThrowsExceptionForInvalidConfiguration() { $backup = $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika']; $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'] = 'invalid configuration'; try { $extractor = ServiceFactory::getTika('foo'); } catch (\RuntimeException $e) { $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'] = $backup; return; } $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'] = $backup; $this->fail('Did not throw RuntimeException'); }
/** * Initializes resources commonly needed for several actions. * * @return void */ protected function initializeAction() { parent::initializeAction(); $this->tikaConfiguration = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika']); $this->tikaService = ServiceFactory::getTika($this->tikaConfiguration['extractor']); }