/** * Extracts text from a file using Apache Tika * * @param FileInterface $file * @return string Text extracted from the input file */ public function extractText(FileInterface $file) { $extractedContent = NULL; $tika = TikaServiceFactory::getTika($this->configuration['extractor']); $extractedContent = $tika->extractText($file); return $extractedContent; }
/** * Extracts meta data from a file using Apache Tika * * @param Resource\File $file * @param array $previousExtractedData Already extracted/existing data * @return array */ public function extractMetaData(Resource\File $file, array $previousExtractedData = array()) { $metaData = NULL; $tikaService = TikaServiceFactory::getTika($this->configuration['extractor']); $extractedMetaData = $tikaService->extractMetaData($file); $metaData = $this->normalizeMetaData($extractedMetaData); return $metaData; }