function getFullText()
 {
     global $configArray;
     //Check to see if the text has already been extracted
     $fullText = "";
     $fullTextPath = $configArray['EContent']['fullTextPath'];
     $textFile = "{$fullTextPath}/{$this->recordId}.txt";
     if (file_exists($textFile)) {
         return file_get_contents($textFile);
     } else {
         if ($this->item_type == 'text') {
             return file_get_contents($textFile);
         } elseif ($this->item_type == 'epub') {
             require_once ROOT_DIR . '/sys/eReader/ebook.php';
             $epubFile = $configArray['EContent']['library'] . '/' . $this->filename;
             $ebook = new ebook($epubFile);
             if (!$ebook->readErrorOccurred()) {
                 $fhnd = fopen($textFile, 'w');
                 for ($i = 0; $i < $ebook->getManifestSize(); $i++) {
                     $manifestId = $ebook->getManifestItem($i, 'id');
                     $manifestHref = $ebook->getManifestItem($i, 'href');
                     $manifestType = $ebook->getManifestItem($i, 'type');
                     if (!in_array($manifestType, array('image/jpeg', 'image/gif', 'image/tif', 'text/css'))) {
                         try {
                             $componentText = $ebook->getContentById($manifestId);
                             fwrite($fhnd, strip_tags($componentText));
                         } catch (Exeption $e) {
                             //Ignore it
                             //'Unable to load content for component ' . $component;
                         }
                     }
                 }
                 fclose($fhnd);
                 return file_get_contents($textFile);
             } else {
                 return "";
             }
         } elseif ($this->item_type == 'pdf') {
             /* This takes too long for large files */
             /*$pdfboxJar = $configArray['EContent']['pdfbox'];
             		$pdfFile = $configArray['EContent']['library'] . '/'. $this->filename;
             		$textFile = $configArray['EContent']['fullTextPath'] . '/'. $this->filename;
             		shell_exec('java -jar $pdfboxJar ExtractText $pdfFile $textFile');
             		return file_get_contents($textFile);*/
             return "";
         } else {
             //Full text not available
             return "";
         }
     }
 }
Exemple #2
0
 /**
  * @param ebook $ebook
  * @param string $id
  * @param string $item
  * @param string $file
  * @return mixed|string
  */
 function getComponentCustom($ebook, $id, $item, $file)
 {
     global $configArray;
     $component = $_REQUEST['component'];
     $component = stripslashes($component);
     if (strpos($component, "#") > 0) {
         $component = substr($component, 0, strpos($component, "#"));
     }
     try {
         $componentText = $ebook->getContentById($component);
     } catch (Exception $e) {
         return 'Unable to load content for component ' . $component;
     }
     //Get the componentType of the content we are getting.
     for ($i = 0; $i < $ebook->getManifestSize(); $i++) {
         $manifestId = $ebook->getManifestItem($i, 'id');
         $manifestType = $ebook->getManifestItem($i, 'type');
         if ($manifestId == $component) {
             $componentType = $manifestType;
         }
     }
     if (isset($componentType) && in_array($componentType, array('image/jpeg', 'image/gif', 'image/tif', 'text/css'))) {
         header("Content-type: {$componentType}");
         //Do not json encode the data
     } else {
         //After we get the component, we need to do some processing to fix internal links, images, and css files
         //so they display properly.
         //Loop through the manifest to find any files that are referenced
         for ($i = 0; $i < $ebook->getManifestSize(); $i++) {
             $manifestId = $ebook->getManifestItem($i, 'id');
             $manifestHref = $ebook->getManifestItem($i, 'href');
             $manifestType = $ebook->getManifestItem($i, 'type');
             if (in_array($manifestType, array('image/jpeg', 'image/gif', 'image/tif', 'text/css'))) {
                 //Javascript or image
                 $pattern = str_replace("~", "\\~", preg_quote($manifestHref));
                 if ($manifestType == 'text/css') {
                     //Ignore css for now
                     $replacement = '';
                 } else {
                     $replacement = $configArray['Site']['path'] . "/EContent/" . preg_quote($id) . "/JSON?method=getComponentCustom&item=" . $item . "&component=" . preg_quote($manifestId) . "&file=" . $file;
                 }
                 $componentText = preg_replace("~{$pattern}~", $replacement, $componentText);
             } else {
                 //Link to another location within the document
                 //convert to a window.reader.moveTo(componentId, location)
                 //$componentText = preg_replace('/<a href=["\']#'. preg_quote($manifestHref) . '["\']/', "<a onclick=\"window.parent.reader.moveTo({componentId: '{$manifestId}', xpath:'//a[@id={$manifestId}]'})\" href=\"#\"", $componentText);
                 $quotedManifest = preg_quote($manifestHref, '/');
                 $componentText = preg_replace('/<a href=["\']' . $quotedManifest . '["\']/', "<a onclick=\"return showTocEntry('{$manifestId}');\" href=\"#\"", $componentText);
                 $componentText = preg_replace('/<a href=["\']' . $quotedManifest . '#(.*?)["\']/', "<a onclick=\"return showTocEntry('{$manifestId}#\\1');\" href=\"#\"", $componentText);
                 /*$pattern = str_replace("~", "\~", '<a (.*?)href=["\']'. preg_quote($manifestHref) . '#(.*?)["\']');
                 		$replacement = '<a \\1 onclick=\"window.parent.reader.moveTo({componentId: \'' . addslashes($manifestId) . '\', xpath:\'//a[@id=\\2]\'});return false;" href="#"';
                 		$componentText = preg_replace("~$pattern~", $replacement, $componentText);*/
             }
         }
         header('Content-type: text/plain');
         header('Cache-Control: no-cache, must-revalidate');
         // HTTP/1.1
         header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
         // Date in the past
         if (is_null($componentText)) {
             $componentText = '';
         }
         $componentText = json_encode(array('result' => $componentText));
     }
     return $componentText;
 }