function getFullText() { global $configArray; //Check to see if the text has already been extracted $fullText = ""; $fullTextPath = $configArray['EContent']['fullTextPath']; $textFile = "{$fullTextPath}/{$this->recordId}.txt"; if (file_exists($textFile)) { return file_get_contents($textFile); } else { if ($this->item_type == 'text') { return file_get_contents($textFile); } elseif ($this->item_type == 'epub') { require_once ROOT_DIR . '/sys/eReader/ebook.php'; $epubFile = $configArray['EContent']['library'] . '/' . $this->filename; $ebook = new ebook($epubFile); if (!$ebook->readErrorOccurred()) { $fhnd = fopen($textFile, 'w'); for ($i = 0; $i < $ebook->getManifestSize(); $i++) { $manifestId = $ebook->getManifestItem($i, 'id'); $manifestHref = $ebook->getManifestItem($i, 'href'); $manifestType = $ebook->getManifestItem($i, 'type'); if (!in_array($manifestType, array('image/jpeg', 'image/gif', 'image/tif', 'text/css'))) { try { $componentText = $ebook->getContentById($manifestId); fwrite($fhnd, strip_tags($componentText)); } catch (Exeption $e) { //Ignore it //'Unable to load content for component ' . $component; } } } fclose($fhnd); return file_get_contents($textFile); } else { return ""; } } elseif ($this->item_type == 'pdf') { /* This takes too long for large files */ /*$pdfboxJar = $configArray['EContent']['pdfbox']; $pdfFile = $configArray['EContent']['library'] . '/'. $this->filename; $textFile = $configArray['EContent']['fullTextPath'] . '/'. $this->filename; shell_exec('java -jar $pdfboxJar ExtractText $pdfFile $textFile'); return file_get_contents($textFile);*/ return ""; } else { //Full text not available return ""; } } }
/** * @param ebook $ebook * @param string $id * @param string $item * @param string $file * @return mixed|string */ function getComponentCustom($ebook, $id, $item, $file) { global $configArray; $component = $_REQUEST['component']; $component = stripslashes($component); if (strpos($component, "#") > 0) { $component = substr($component, 0, strpos($component, "#")); } try { $componentText = $ebook->getContentById($component); } catch (Exception $e) { return 'Unable to load content for component ' . $component; } //Get the componentType of the content we are getting. for ($i = 0; $i < $ebook->getManifestSize(); $i++) { $manifestId = $ebook->getManifestItem($i, 'id'); $manifestType = $ebook->getManifestItem($i, 'type'); if ($manifestId == $component) { $componentType = $manifestType; } } if (isset($componentType) && in_array($componentType, array('image/jpeg', 'image/gif', 'image/tif', 'text/css'))) { header("Content-type: {$componentType}"); //Do not json encode the data } else { //After we get the component, we need to do some processing to fix internal links, images, and css files //so they display properly. //Loop through the manifest to find any files that are referenced for ($i = 0; $i < $ebook->getManifestSize(); $i++) { $manifestId = $ebook->getManifestItem($i, 'id'); $manifestHref = $ebook->getManifestItem($i, 'href'); $manifestType = $ebook->getManifestItem($i, 'type'); if (in_array($manifestType, array('image/jpeg', 'image/gif', 'image/tif', 'text/css'))) { //Javascript or image $pattern = str_replace("~", "\\~", preg_quote($manifestHref)); if ($manifestType == 'text/css') { //Ignore css for now $replacement = ''; } else { $replacement = $configArray['Site']['path'] . "/EContent/" . preg_quote($id) . "/JSON?method=getComponentCustom&item=" . $item . "&component=" . preg_quote($manifestId) . "&file=" . $file; } $componentText = preg_replace("~{$pattern}~", $replacement, $componentText); } else { //Link to another location within the document //convert to a window.reader.moveTo(componentId, location) //$componentText = preg_replace('/<a href=["\']#'. preg_quote($manifestHref) . '["\']/', "<a onclick=\"window.parent.reader.moveTo({componentId: '{$manifestId}', xpath:'//a[@id={$manifestId}]'})\" href=\"#\"", $componentText); $quotedManifest = preg_quote($manifestHref, '/'); $componentText = preg_replace('/<a href=["\']' . $quotedManifest . '["\']/', "<a onclick=\"return showTocEntry('{$manifestId}');\" href=\"#\"", $componentText); $componentText = preg_replace('/<a href=["\']' . $quotedManifest . '#(.*?)["\']/', "<a onclick=\"return showTocEntry('{$manifestId}#\\1');\" href=\"#\"", $componentText); /*$pattern = str_replace("~", "\~", '<a (.*?)href=["\']'. preg_quote($manifestHref) . '#(.*?)["\']'); $replacement = '<a \\1 onclick=\"window.parent.reader.moveTo({componentId: \'' . addslashes($manifestId) . '\', xpath:\'//a[@id=\\2]\'});return false;" href="#"'; $componentText = preg_replace("~$pattern~", $replacement, $componentText);*/ } } header('Content-type: text/plain'); header('Cache-Control: no-cache, must-revalidate'); // HTTP/1.1 header('Expires: Mon, 26 Jul 1997 05:00:00 GMT'); // Date in the past if (is_null($componentText)) { $componentText = ''; } $componentText = json_encode(array('result' => $componentText)); } return $componentText; }