$tag_post = '</b>'; } else { if ($format == "XML") { if (false === $callback) { header('Content-type: text/xml'); } else { header('application/x-javascript'); } $tag_pre = '</CONTEXT>'; $tag_post = '<CONTEXT>'; } else { fatal("Unknown format request. "); } } // Ensure file is readable checkPrivs($url); // This looks like where we load the djvu.xml - $$$ and rapidly exhaust memory for large books such as OED if (!($document = file_get_contents($url))) { fatal("could not load {$url}"); } $time1 = microtime(true) - $time0; //// Pass 1 - build up page* arrays with xml fragments corresponding to matches $pagenumber = 0; foreach (explode('</OBJECT>', $document) as $page) { $pagenumber++; if (matches_terms($page, $terms) && strstr($page, '<OBJECT ')) { // extract the page value so that we know what page we are on, // Brad Neuberg, bkn3@columbia.edu if (!preg_match('|<PARAM name="PAGE" value="([^"]*)"\\s*\\/>|', $page, $match)) { fatal("page value not set on page number {$pagenumber} in {$page}!"); }
This file is part of BookReader. BookReader is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. BookReader is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with BookReader. If not, see <http://www.gnu.org/licenses/>. The BookReader source is hosted at http://github.com/openlibrary/bookreader/ */ //$env = 'LD_LIBRARY_PATH=/petabox/sw/lib/lxml/lib PYTHONPATH=/petabox/sw/lib/lxml/lib/python2.5/site-packages:$PYTHONPATH'; checkPrivs($_GET['path']); $path = escapeshellarg($_GET['path']); $page = escapeshellarg($_GET['page']); $callback = escapeshellarg($_GET['callback']); header('Content-Type: application/javascript'); passthru("python BookReaderGetText.py {$path} {$page} {$callback}"); function checkPrivs($filename) { if (!is_readable($filename)) { header('HTTP/1.1 403 Forbidden'); exit(0); } }
checkPrivs($scanDataFile); $scanData = simplexml_load_file($scanDataFile); } else { if (file_exists($scanDataZip)) { checkPrivs($scanDataZip); $cmd = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml'; exec($cmd, $output, $retval); if ($retval != 0) { BRFatal("Could not unzip ScanData!"); } $dump = join("\n", $output); $scanData = simplexml_load_string($dump); } else { if (file_exists("{$itemPath}/scandata.xml")) { // For e.g. Scribe v.0 books! checkPrivs("{$itemPath}/scandata.xml"); $scanData = simplexml_load_file("{$itemPath}/scandata.xml"); } else { BRFatal("ScanData file not found!"); } } } $metaDataFile = "{$itemPath}/{$id}_meta.xml"; if (!file_exists($metaDataFile)) { BRFatal("MetaData file not found!"); } $metaData = simplexml_load_file($metaDataFile); //$firstLeaf = $scanData->pageData->page[0]['leafNum']; ?> <?php
return preg_match($pattern, $identifier) == 1; } function checkPrivs($filename) { if (!is_readable($filename)) { header('HTTP/1.1 403 Forbidden'); exit(0); } } $filename = "{$path}/{$doc}_abbyy.gz"; if (file_exists($filename)) { checkPrivs($filename); } else { $filename = "{$path}/{$doc}_abbyy.zip"; if (file_exists($filename)) { checkPrivs($filename); } } $contentType = 'application/json'; // default if ($callback) { if (!isValidCallback($callback)) { throw new Exception("Invalid callback"); } $contentType = 'text/javascript'; // JSONP is not JSON } header('Content-type: ' . $contentType . ';charset=UTF-8'); header('Access-Control-Allow-Origin: *'); // allow cross-origin requests $item_id = escapeshellarg($item_id);