示例#1
0
 /**
  * Extracts text from a file using Apache Tika
  *
  * @param FileInterface $file
  * @return string Text extracted from the input file
  */
 public function extractText(FileInterface $file)
 {
     $extractedContent = NULL;
     $tika = TikaServiceFactory::getTika($this->configuration['extractor']);
     $extractedContent = $tika->extractText($file);
     return $extractedContent;
 }
示例#2
0
 /**
  * Extracts meta data from a file using Apache Tika
  *
  * @param Resource\File $file
  * @param array $previousExtractedData Already extracted/existing data
  * @return array
  */
 public function extractMetaData(Resource\File $file, array $previousExtractedData = array())
 {
     $metaData = NULL;
     $tikaService = TikaServiceFactory::getTika($this->configuration['extractor']);
     $extractedMetaData = $tikaService->extractMetaData($file);
     $metaData = $this->normalizeMetaData($extractedMetaData);
     return $metaData;
 }