$sql = 'select reference_id from rdmp_reference where issn="0007-5167" AND title REGEXP "^Opinion [0-9]+$"'; $sql = 'select reference_id from rdmp_reference where reference_id=80624'; // Not opinion titles $sql = 'select reference_id from rdmp_reference where issn="0007-5167" AND title NOT REGEXP "^Case [0-9]+$" AND year > 1980'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } while (!$result->EOF) { $ids[] = $result->fields['reference_id']; $result->MoveNext(); } foreach ($ids as $reference_id) { $reference = db_retrieve_reference($reference_id); // Get text of first page $text = bhl_fetch_ocr_text($reference->PageID); //echo $text; $lines = explode("\\n", $text); /* print_r($lines); $n = count($lines); $i = 1; $done = false; $title = ''; while (($i < $n) && !$done) { //echo $lines[$i] . "\n"; if (preg_match('/^RULING/i', $lines[$i])) { $done = true; }
} // BMNH 12 (Madagascar grasshoppers) //$items = array(19513); //$items = array(32860); // Proceedings of the United States National Museum v47 1915 Rathburn crab plates //$items = array(47571); // Catalogue of colubrine snakes in the collection of the British Museum $items = array(32774); foreach ($items as $ItemID) { // This will hold pages $pages = array(); // Find all pages for this item, ordered by SequenceOrder $sql = 'SELECT page.PageID FROM page INNER JOIN bhl_page USING (PageID) WHERE (page.ItemID = ' . $ItemID . ') ORDER BY page.SequenceOrder '; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __LINE__ . "]: " . $sql); } while (!$result->EOF) { $pages[] = $result->fields['PageID']; $result->MoveNext(); } print_r($pages); foreach ($pages as $page) { echo "{$page}\n"; bhl_fetch_page_image($page); bhl_fetch_ocr_text($page, '', 30); } }
$obj->resources->page->image = 'http://biostor.org/reference/' . $id . '/pages/{page}-{size}'; $obj->resources->search = 'http://biostor.org/dvs/' . $id . '/json?q={query}'; $obj->sections = array(); $obj->annotations = array(); // support text indexing $sql = 'SELECT * FROM rdmp_documentcloud WHERE reference_id=' . $id . ' LIMIT 1'; //echo $sql; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } if ($result->NumRows() == 1) { } else { foreach ($bhl_pages as $p) { //print_r($p); $text = bhl_fetch_ocr_text($p->PageID); $text = str_replace("\\n", " ", $text); $sql = 'INSERT INTO rdmp_documentcloud(reference_id,page,ocr_text) VALUES(' . $id . ',' . ($p->page_order + 1) . ',' . $db->qstr($text) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } header('Content-type: text/plain'); if ($callback != '') { echo $callback . '('; } if (0) { $obj->title = "a"; $obj->description = "b";
function bhl_geocode_reference($reference_id) { $pages = bhl_retrieve_reference_pages($reference_id); foreach ($pages as $page) { $text = bhl_fetch_ocr_text($page->PageID); $pts = points_from_text($text); //echo $page->PageID; //print_r($pts); if (count($pts) > 0) { foreach ($pts as $pt) { $loc = new stdclass(); $loc->name = ''; $loc->latitude = $pt->latitude; $loc->longitude = $pt->longitude; $locality_id = db_store_locality($loc); bhl_store_locality_link($page->PageID, $locality_id); } } else { // No localities bhl_store_locality_link($page->PageID, 0); } } }
function bhl_pages_with_name_thumbnails($reference_id, $NameBankID) { global $config; $html = ''; $hits = bhl_pages_in_reference_with_name($reference_id, $NameBankID); foreach ($hits as $hit) { // filter on figure $has_figure = false; $text = bhl_fetch_ocr_text($hit); $lines = explode("\\n", $text); foreach ($lines as $line) { //$html .= '<p>' . $line . '</p>'; if (preg_match('/^(Fig\\.|Figure|Figs\\.)/i', $line)) { $has_figure = true; } } if ($has_figure) { $image = bhl_fetch_page_image($hit); $html .= '<a href="' . $config['web_root'] . 'reference/' . $reference_id . '/page/' . $hit . '">'; $html .= '<img style="padding:2px;border:1px solid blue;margin:2px;" id="thumbnail_image_' . $hit . '" src="' . $image->thumbnail->url . '" width="' . $image->thumbnail->width . '" height="' . $image->thumbnail->height . '"/>'; $html .= '</a>'; } } return $html; }