function get_sequence($accession, &$item) { $id = find_genbank($accession); if ($id == 0) { // We don't have this sequence (but see below) $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=' . $accession . '&rettype=gb&retmode=xml'; //echo $url; $xml = get($url); //echo $xml; // Did we get an error? // Nothing returned if ($xml == '') { return 0; } //echo "\n\n" . __LINE__ . "\n\n"; // NCBI error (sequence doesn't exist, or might not be released $dom = new DOMDocument(); $dom->loadXML($xml); $xpath = new DOMXPath($dom); $xpath_query = "//Error"; $nodeCollection = $xpath->query($xpath_query); $ok = true; foreach ($nodeCollection as $node) { if ($node->firstChild->nodeValue != '') { $ok = false; } } if (!$ok) { return 0; } //echo "\n\n" . __LINE__ . "\n\n"; $xml = str_replace('<!DOCTYPE GBSet PUBLIC "-//NCBI//NCBI GBSeq/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_GBSeq.dtd">', '', $xml); $xp = new XsltProcessor(); $xsl = new DomDocument(); $xsl->load('xsl/gb2JSON.xsl'); $xp->importStylesheet($xsl); $xml_doc = new DOMDocument(); $xml_doc->loadXML($xml); $json = $xp->transformToXML($xml_doc); //echo $json; $data = json_decode($json); //print_r($data); // Handle case where was have this sequnece from EMBL harvesting $id = set_gi($data->accession, $data->gi); if ($id == 0) { // new sequence gb_postprocess($data); //print_r($data); $id = store_genbank($data); $item = $data; } else { // we have this already from EMBL $json = retrieve_genbank_json($id); $item = json_decode($json); } } else { //echo 'have it' . "\n"; $json = retrieve_genbank_json($id); $item = json_decode($json); } // CouchDB modification if (isset($item->source->latitude)) { // Geometry for sequence $item->source->geometry = new stdclass(); $item->source->geometry->type = "MultiPoint"; $item->source->geometry->coordinates = array(); $item->source->geometry->coordinates[] = array((double) $item->source->longitude, (double) $item->source->latitude); } else { if (isset($item->source->specimen)) { // Use geomtry from specimen if (isset($item->source->specimen->latitude)) { $item->source->geometry = new stdclass(); $item->source->geometry->type = "MultiPoint"; $item->source->geometry->coordinates = array(); $item->source->geometry->coordinates[] = array((double) $item->source->specimen->longitude, (double) $item->source->specimen->latitude); } } } return $id; }
// fetch page $url = 'http://localhost/~rpage/biostor/api.php?page=' . $PageID . '&format=html'; $json = get($url); //echo $json; $obj = json_decode($json); if (isset($obj->html)) { $page = html_to_page($obj->html); $hits = array(); if (0) { $hits = array_merge($hits, find_specimens($page->text)); } if (1) { $hits = array_merge($hits, find_points($page->text)); } if (0) { $hits = array_merge($hits, find_genbank($page->text)); } $annotations = annotations_from_hits($uri, $page, $hits); echo "annotations\n"; print_r($annotations); foreach ($annotations as $annotation) { echo json_encode($annotation->data); $h->add_annotation($annotation->data); echo "\n"; } } $page_count++; } ?>