/** * Gets the list of prioritised extractor classes * * @return array */ protected static function get_extractor_classes() { // Check cache if (self::$sorted_extractor_classes) { return self::$sorted_extractor_classes; } // Generate the sorted list of extractors on demand. $classes = ClassInfo::subclassesFor("FileTextExtractor"); array_shift($classes); $classPriorities = array(); foreach ($classes as $class) { $classPriorities[$class] = Config::inst()->get($class, 'priority'); } arsort($classPriorities); // Save classes $sortedClasses = array_keys($classPriorities); return self::$sorted_extractor_classes = $sortedClasses; }
/** * Tries to parse the file contents if a FileTextExtractor class exists to handle the file type, and returns the text. * The value is also cached into the File record itself. * * @param $forceParse If false, the file content is only parsed on demand. If true, the content parsing is forced, bypassing the * cached version * @return String */ function extractFileAsText($forceParse = false) { if (!$forceParse && $this->owner->FileContentCache) { return $this->owner->FileContentCache; } // Determine which extractor can process this file. $extractor = FileTextExtractor::for_file($this->owner->FullPath); if (!$extractor) { return null; } $text = $extractor->getContent($this->owner->FullPath); if (!$text) { return null; } $this->owner->FileContentCache = $text; $this->owner->write(); return $text; }
/** * Tries to parse the file contents if a FileTextExtractor class exists to handle the file type, and returns the text. * The value is also cached into the File record itself. * * @param boolean $disableCache If false, the file content is only parsed on demand. * If true, the content parsing is forced, bypassing the cached version * @return string */ public function extractFileAsText($disableCache = false) { if (!$disableCache) { $text = $this->getTextCache()->load($this->owner); if ($text) { return $text; } } // Determine which extractor can process this file. $extractor = FileTextExtractor::for_file($this->owner->FullPath); if (!$extractor) { return null; } $text = $extractor->getContent($this->owner->FullPath); if (!$text) { return null; } $this->getTextCache()->save($this->owner, $text); return $text; }
/** * @param String $path * @return FileTextExtractor */ static function for_file($path) { $extension = pathinfo($path, PATHINFO_EXTENSION); if (!self::$sorted_extractor_classes) { // Generate the sorted list of extractors on demand. $classes = ClassInfo::subclassesFor("FileTextExtractor"); array_shift($classes); $sortedClasses = array(); foreach ($classes as $class) { $sortedClasses[$class] = Config::inst()->get($class, 'priority'); } arsort($sortedClasses); self::$sorted_extractor_classes = $sortedClasses; } foreach (self::$sorted_extractor_classes as $className => $priority) { $formatter = new $className(); $matched = array_filter($formatter->supportedExtensions(), function ($compare) use($extension) { return strtolower($compare) == strtolower($extension); }); if ($matched) { return $formatter; } } }