Esempio n. 1
0
function fetch_sequence($id)
{
    $genbank_sequence = null;
    // Query URL
    $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=' . $id . '&rettype=gb&retmode=xml';
    $xml = get($url);
    //echo $xml;
    if ($xml != '') {
        $xp = new XsltProcessor();
        $xsl = new DomDocument();
        $xsl->load('xml2json.xslt');
        $xp->importStylesheet($xsl);
        $dom = new DOMDocument();
        $dom->loadXML($xml);
        $xpath = new DOMXPath($dom);
        $json = $xp->transformToXML($dom);
        //echo $json;
        // fix "-" in variable names
        $json = str_replace('"GBSeq_feature-table"', '"GBSeq_feature_table"', $json);
        $json = str_replace('"GBSeq_primary-accession"', '"GBSeq_primary_accession"', $json);
        $json = str_replace('"GBSeq_other-seqids"', '"GBSeq_other_seqids"', $json);
        $json = str_replace('"GBSeq_update-date"', '"GBSeq_update_date"', $json);
        $json = str_replace('"GBSeq_create-date"', '"GBSeq_create_date"', $json);
        $json = str_replace('"GBSeq_accession-version"', '"GBSeq_accession_version"', $json);
        $sequences = json_decode($json);
        //print_r($sequences);
        foreach ($sequences->GBSet as $GBSet) {
            $genbank_sequence = new stdclass();
            $genbank_sequence->accession = $GBSet->GBSeq_primary_accession;
            $genbank_sequence->accession_version = $GBSet->GBSeq_accession_version;
            $genbank_sequence->organism = $GBSet->GBSeq_organism;
            $genbank_sequence->definition = $GBSet->GBSeq_definition;
            $genbank_sequence->moltype = $GBSet->GBSeq_moltype;
            // dates
            if (false != strtotime($GBSet->GBSeq_update_date)) {
                $genbank_sequence->updated = date("Y-m-d", strtotime($GBSet->GBSeq_update_date));
            }
            if (false != strtotime($GBSet->GBSeq_create_date)) {
                $genbank_sequence->created = date("Y-m-d", strtotime($GBSet->GBSeq_create_date));
            }
            // keywords
            $genbank_sequence->barcode = false;
            if ($GBSet->GBSeq_keywords) {
                foreach ($GBSet->GBSeq_keywords as $keyword) {
                    if ($keyword = 'BARCODE') {
                        $genbank_sequence->barcode = true;
                    }
                }
            }
            foreach ($GBSet->GBSeq_other_seqids as $seqids) {
                if (preg_match('/gi\\|(?<gi>\\d+)$/', $seqids, $m)) {
                    $genbank_sequence->gi = (int) $m['gi'];
                }
            }
            $genbank_sequence->references = array();
            foreach ($GBSet->GBSeq_references as $GBReference) {
                $reference = new stdclass();
                $reference->title = $GBReference->GBReference_title;
                $reference->citation = $GBReference->GBReference_journal;
                if (isset($GBReference->GBReference_authors)) {
                    foreach ($GBReference->GBReference_authors as $a) {
                        $parts = parse_name($a);
                        $author = new stdClass();
                        $author->name = $a;
                        if (isset($parts['last'])) {
                            $author->lastname = $parts['last'];
                        }
                        if (isset($parts['first'])) {
                            $author->forename = $parts['first'];
                            if (array_key_exists('middle', $parts)) {
                                $author->forename .= ' ' . $parts['middle'];
                            }
                        }
                        $reference->author[] = $author;
                    }
                }
                if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+)(\\s+\\((?<issue>.*)\\))?,\\s+(?<spage>\\d+)-(?<epage>\\d+)\\s+\\((?<year>[0-9]{4})\\)/', $reference->citation, $m)) {
                    $reference->journal = new stdclass();
                    $reference->journal->name = $m['journal'];
                    $reference->journal->volume = $m['volume'];
                    if ($m['issue'] != '') {
                        $reference->journal->issue = $m['issue'];
                    }
                    $reference->journal->pages = $m['spage'];
                    if ($m['epage'] != '') {
                        $reference->journal->pages .= '--' . $m['epage'];
                    }
                }
                if (isset($GBReference->GBReference_pubmed)) {
                    $identifier = new stdclass();
                    $identifier->type = 'pmid';
                    $identifier->id = (int) $GBReference->GBReference_pubmed;
                    $reference->identifier[] = $identifier;
                }
                if (isset($GBReference->GBReference_xref)) {
                    if ($GBReference->GBReference_xref->GBXref->GBXref_dbname == 'doi') {
                        $identifier = new stdclass();
                        $identifier->type = 'doi';
                        $identifier->id = $GBReference->GBReference_xref->GBXref->GBXref_id;
                        $reference->identifier[] = $identifier;
                    }
                }
                $genbank_sequence->references[] = $reference;
            }
            foreach ($GBSet->GBSeq_feature_table as $feature_table) {
                switch ($feature_table->GBFeature_key) {
                    case 'source':
                        if (!isset($genbank_sequence->source)) {
                            $genbank_sequence->source = new stdclass();
                        }
                        foreach ($feature_table->GBFeature_quals as $feature_quals) {
                            switch ($feature_quals->GBQualifier_name) {
                                case 'db_xref':
                                    $genbank_sequence->source->tax_id = (int) str_replace("taxon:", '', $feature_quals->GBQualifier_value);
                                    break;
                                case 'collection_date':
                                    $genbank_sequence->source->collection_date = $feature_quals->GBQualifier_value;
                                    break;
                                case 'collected_by':
                                    $genbank_sequence->source->collected_by = $feature_quals->GBQualifier_value;
                                    break;
                                case 'country':
                                    $genbank_sequence->source->country = $feature_quals->GBQualifier_value;
                                    break;
                                case 'host':
                                    $genbank_sequence->source->host = $feature_quals->GBQualifier_value;
                                    break;
                                case 'locality':
                                    $genbank_sequence->source->locality = $feature_quals->GBQualifier_value;
                                    break;
                                case 'isolation_source':
                                    $genbank_sequence->source->isolation_source = $feature_quals->GBQualifier_value;
                                    break;
                                case 'isolate':
                                    $genbank_sequence->source->isolate = $feature_quals->GBQualifier_value;
                                    break;
                                case 'lat_lon':
                                    $genbank_sequence->source->lat_lon = $feature_quals->GBQualifier_value;
                                    break;
                                case 'mol_type':
                                    $genbank_sequence->source->mol_type = $feature_quals->GBQualifier_value;
                                    break;
                                case 'organelle':
                                    $genbank_sequence->source->organelle = $feature_quals->GBQualifier_value;
                                    break;
                                case 'specimen_voucher':
                                    $genbank_sequence->source->specimen_voucher = $feature_quals->GBQualifier_value;
                                    break;
                                default:
                                    break;
                            }
                        }
                        if (isset($genbank_sequence->source->lat_lon)) {
                            process_lat_lon($genbank_sequence);
                        }
                        process_locality($genbank_sequence);
                        break;
                    default:
                        break;
                }
            }
            if (isset($genbank_sequence->source->latitude)) {
                $genbank_sequence->source->geometry = new stdclass();
                $genbank_sequence->source->geometry->type = "Point";
                $genbank_sequence->source->geometry->coordinates = array((double) $genbank_sequence->source->longitude, (double) $genbank_sequence->source->latitude);
            }
        }
    }
    return $genbank_sequence;
}
Esempio n. 2
0
function fetch_sequences($ids)
{
    $hits = new stdclass();
    $hits->ids = $ids;
    $hits->sequences = array();
    $hits->geometry = new stdclass();
    $hits->geometry->type = "MultiPoint";
    $hits->geometry->coordinates = array();
    // Query URL
    $url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=' . join(",", $ids) . '&rettype=gb&retmode=xml';
    $xml = get($url);
    if ($xml != '') {
        $xp = new XsltProcessor();
        $xsl = new DomDocument();
        $xsl->load('xml2json.xslt');
        $xp->importStylesheet($xsl);
        $dom = new DOMDocument();
        $dom->loadXML($xml);
        $xpath = new DOMXPath($dom);
        $json = $xp->transformToXML($dom);
        // fix "-" in variable names
        $json = str_replace('"GBSeq_feature-table"', '"GBSeq_feature_table"', $json);
        $json = str_replace('"GBSeq_primary-accession"', '"GBSeq_primary_accession"', $json);
        $json = str_replace('"GBSeq_other-seqids"', '"GBSeq_other_seqids"', $json);
        $json = str_replace('"GBSeq_create-date"', '"GBSeq_create_date"', $json);
        $json = str_replace('"GBSeq_update-date"', '"GBSeq_update_date"', $json);
        $sequences = json_decode($json);
        foreach ($sequences->GBSet as $GBSet) {
            $genbank_sequence = new stdclass();
            $genbank_sequence->accession = $GBSet->GBSeq_primary_accession;
            $genbank_sequence->organism = $GBSet->GBSeq_organism;
            $genbank_sequence->created = format_date($GBSet->GBSeq_create_date);
            $genbank_sequence->updated = format_date($GBSet->GBSeq_update_date);
            foreach ($GBSet->GBSeq_other_seqids as $seqids) {
                if (preg_match('/gi\\|(?<gi>\\d+)$/', $seqids, $m)) {
                    $genbank_sequence->gi = $m['gi'];
                }
            }
            $genbank_sequence->references = array();
            foreach ($GBSet->GBSeq_references as $GBReference) {
                $reference = new stdclass();
                $reference->title = $GBReference->GBReference_title;
                $reference->citation = $GBReference->GBReference_journal;
                $reference->authors = array();
                if (isset($GBReference->GBReference_authors)) {
                    foreach ($GBReference->GBReference_authors as $author) {
                        $reference->authors[] = $author;
                    }
                }
                if (isset($GBReference->GBReference_pubmed)) {
                    if (!isset($reference->identifiers)) {
                        $reference->identifiers = new stdclass();
                    }
                    $reference->identifiers->pmid = $GBReference->GBReference_pubmed;
                }
                if (isset($GBReference->GBReference_xref)) {
                    if ($GBReference->GBReference_xref->GBXref->GBXref_dbname == 'doi') {
                        if (!isset($reference->identifiers)) {
                            $reference->identifiers = new stdclass();
                        }
                        $reference->identifiers->doi = $GBReference->GBReference_xref->GBXref->GBXref_id;
                    }
                }
                $genbank_sequence->references[] = $reference;
            }
            foreach ($GBSet->GBSeq_feature_table as $feature_table) {
                switch ($feature_table->GBFeature_key) {
                    case 'source':
                        foreach ($feature_table->GBFeature_quals as $feature_quals) {
                            switch ($feature_quals->GBQualifier_name) {
                                case 'db_xref':
                                    $genbank_sequence->source->tax_id = str_replace("taxon:", '', $feature_quals->GBQualifier_value);
                                    break;
                                case 'country':
                                    $genbank_sequence->source->country = $feature_quals->GBQualifier_value;
                                    break;
                                case 'host':
                                    $genbank_sequence->source->host = $feature_quals->GBQualifier_value;
                                    break;
                                case 'locality':
                                    $genbank_sequence->source->locality = $feature_quals->GBQualifier_value;
                                    break;
                                case 'isolation_source':
                                    $genbank_sequence->source->isolation_source = $feature_quals->GBQualifier_value;
                                    break;
                                case 'isolate':
                                    $genbank_sequence->source->isolate = $feature_quals->GBQualifier_value;
                                    break;
                                case 'lat_lon':
                                    $genbank_sequence->source->lat_lon = $feature_quals->GBQualifier_value;
                                    break;
                                case 'specimen_voucher':
                                    $genbank_sequence->source->specimen_voucher = $feature_quals->GBQualifier_value;
                                    break;
                                default:
                                    break;
                            }
                        }
                        if (isset($genbank_sequence->source->lat_lon)) {
                            process_lat_lon($genbank_sequence);
                        }
                        process_locality($genbank_sequence);
                        break;
                    default:
                        break;
                }
            }
            if (isset($genbank_sequence->source->latitude)) {
                $hits->geometry->coordinates[] = array($genbank_sequence->source->longitude, $genbank_sequence->source->latitude);
            }
            $hits->sequences[] = $genbank_sequence;
        }
    }
    return $hits;
}