Example #1
0
 /**
  * Extracts text from a file using Apache Tika
  *
  * @param FileInterface $file
  * @return string Text extracted from the input file
  */
 public function extractText(FileInterface $file)
 {
     $extractedContent = NULL;
     $tika = ServiceFactory::getTika($this->configuration['extractor']);
     $extractedContent = $tika->extractText($file);
     return $extractedContent;
 }
Example #2
0
 /**
  * Extracts meta data from a file using Apache Tika
  *
  * @param File $file
  * @param array $previousExtractedData Already extracted/existing data
  * @return array
  */
 public function extractMetaData(File $file, array $previousExtractedData = array())
 {
     $metaData = array();
     $tika = ServiceFactory::getTika($this->configuration['extractor']);
     $metaData['language'] = $tika->detectLanguageFromFile($file);
     return $metaData;
 }
Example #3
0
 /**
  * Extracts meta data from a file using Apache Tika
  *
  * @param File $file
  * @param array $previousExtractedData Already extracted/existing data
  * @return array
  */
 public function extractMetaData(File $file, array $previousExtractedData = array())
 {
     $metaData = NULL;
     $tikaService = ServiceFactory::getTika($this->configuration['extractor']);
     $extractedMetaData = $tikaService->extractMetaData($file);
     $metaData = $this->normalizeMetaData($extractedMetaData);
     return $metaData;
 }
Example #4
0
 /**
  * @test
  */
 public function getTikaThrowsExceptionForInvalidConfiguration()
 {
     $backup = $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'];
     $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'] = 'invalid configuration';
     try {
         $extractor = ServiceFactory::getTika('foo');
     } catch (\RuntimeException $e) {
         $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'] = $backup;
         return;
     }
     $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika'] = $backup;
     $this->fail('Did not throw RuntimeException');
 }
 /**
  * Initializes resources commonly needed for several actions.
  *
  * @return void
  */
 protected function initializeAction()
 {
     parent::initializeAction();
     $this->tikaConfiguration = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['tika']);
     $this->tikaService = ServiceFactory::getTika($this->tikaConfiguration['extractor']);
 }