Пример #1
0
 function Parse()
 {
     $l = $this->GetReadFile()->Read(100000);
     $header = explode("\t", trim(substr($l, 1)));
     if (($c = count($header)) != 54) {
         trigger_erorr("Expecting 54 columns, found {$c}!");
         return FALSE;
     }
     // check # of columns
     while ($l = $this->GetReadFile()->Read(100000)) {
         $a = explode("\t", trim($l));
         // 13 is the original identifier
         $ids = explode("|", $a[13], 2);
         $this->GetNS()->ParsePrefixedName($ids[0], $ns, $str);
         $this->Parse4IDLabel($str, $id, $label);
         $id = str_replace('"', '', $id);
         $iid = $this->GetNS()->MapQName("{$ns}:{$id}");
         $this->AddRDF($this->QQuad($iid, "void:inDataset", $this->GetDatasetURI()));
         // get the type
         if ($a[52] == "X") {
             $label = "Pairwise interaction between {$a['0']} and {$a['1']}";
             $type = "Pairwise-Interaction";
         } else {
             if ($a[52] == "C") {
                 $label = $a[53] . " component complex";
                 $type = "Multimeric-Complex";
             } else {
                 if ($a[52] == "Y") {
                     $label = "homomeric complex composed of {$a['0']}";
                     $type = "Homopolymeric-Complex";
                 }
             }
         }
         $this->AddRDF($this->QQuad($iid, "rdf:type", "irefindex_vocabulary:{$type}"));
         // generate the label
         // interaction type[52] by method[6]
         if ($a[6] != '-') {
             $qname = $this->ParseString($a[6], $ns, $id, $method);
             if ($qname) {
                 $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:method", $qname));
             }
         }
         $method_label = '';
         if ($method != 'NA' && $method != '-1') {
             $method_label = " identified by {$method} ";
         }
         $this->AddRDF($this->QQuadL($iid, "rdfs:label", "{$label}" . $method_label . " [{$iid}]"));
         $this->AddRDF($this->QQuadO_URL($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50]));
         // set the interators
         for ($i = 0; $i <= 1; $i++) {
             $p = 'a';
             if ($i == 1) {
                 $p = 'b';
             }
             $interactor = $this->ParseString($a[$i], $ns, $id, $label);
             $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:interactor_{$p}", $interactor));
             // biological role
             $role = $a[16 + $i];
             if ($role != '-') {
                 $qname = $this->ParseString($role, $ns, $id, $label);
                 if ($qname != "mi:0000") {
                     $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:interactor_{$p}" . "_biological_role", $qname));
                 }
             }
             // experimental role
             $role = $a[18 + $i];
             if ($role != '-') {
                 $qname = $this->ParseString($role, $ns, $id, $label);
                 if ($qname != "mi:0000") {
                     $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:interactor_{$p}" . "_experimental_role", $qname));
                 }
             }
             // interactor type
             $type = $a[20 + $i];
             if ($type != '-') {
                 $qname = $this->ParseString($type, $ns, $id, $label);
                 $this->AddRDF($this->QQuad($interactor, "rdf:type", $qname));
             }
         }
         // add the alternatives through the taxon + seq redundant group
         for ($i = 2; $i <= 3; $i++) {
             $taxid = '';
             $irogid = "irefindex_irogid:" . $a[42 + ($i - 2)];
             if (!isset($defined[$irogid])) {
                 $defined[$irogid] = '';
                 $this->AddRDF($this->QQuadL($irogid, "rdfs:label", "[{$irogid}]"));
                 $this->AddRDF($this->QQuad($irogid, "rdf:type", "irefindex_vocabulary:Taxon-Sequence-Identical-Group"));
                 $tax = $a[9 + ($i - 2)];
                 if ($tax && $tax != '-' && $tax != '-1') {
                     $taxid = $this->ParseString($tax, $ns, $id, $label);
                     $this->AddRDF($this->QQuad($irogid, "irefindex_vocabulary:taxon", $taxid));
                 }
             }
             $list = explode("|", $a[3]);
             foreach ($list as $item) {
                 $qname = $this->ParseString($item, $ns, $id, $label);
                 if ($ns && $ns != 'irefindex_rogid' && $ns != 'irefindex_irogid') {
                     $this->AddRDF($this->QQuad($qname, "irefindex_vocabulary:taxon-sequence-identical-group", $irogid));
                     if ($taxid && $taxid != '-' && $taxid != '-1') {
                         $this->AddRDF($this->QQuad($qname, "irefindex_vocabulary:taxon", $taxid));
                     }
                 }
             }
         }
         // add the aliases through the canonical group
         for ($i = 4; $i <= 5; $i++) {
             $icrogid = "irefindex_icrogid:" . $a[49 + ($i - 4)];
             if (!isset($defined[$icrogid])) {
                 $defined[$icrogid] = '';
                 $this->AddRDF($this->QQuadL($icrogid, "rdfs:label", "[{$icrogid}]"));
                 $this->AddRDF($this->QQuad($icrogid, "rdf:type", "irefindex_vocabulary:Taxon-Sequence-Similar-Group"));
             }
             $list = explode("|", $a[3]);
             foreach ($list as $item) {
                 $qname = $this->ParseString($item, $ns, $id, $label);
                 if ($ns && $ns != 'crogid' && $ns != 'icrogid') {
                     $this->AddRDF($this->QQuad($qname, "irefindex_vocabulary:taxon-sequence-similar-group", $icrogid));
                 }
             }
         }
         // publications
         $list = explode("|", $a[8]);
         foreach ($list as $item) {
             if ($item == '-') {
                 continue;
             }
             $qname = $this->ParseString($item, $ns, $id, $label);
             $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:article", $qname));
         }
         // MI interaction type
         if ($a[11] != '-' && $a[11] != 'NA') {
             $qname = $this->ParseString($a[11], $ns, $id, $label);
             $this->AddRDF($this->QQuad($iid, "rdf:type", $qname));
             if (!isset($defined[$qname])) {
                 $defined[$qname] = '';
                 $this->AddRDF($this->QQuadL($qname, "rdfs:label", "{$label} [{$qname}]"));
             }
         }
         // source
         if ($a[12] != '-') {
             $qname = $this->ParseString($a[12], $ns, $id, $label);
             $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:source", $qname));
         }
         // confidence
         $list = explode("|", $a[14]);
         foreach ($list as $item) {
             $this->ParseString($item, $ns, $id, $label);
             if ($ns == 'lpr') {
                 //  lowest number of distinct interactions that any one article reported
                 $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:minimum-number-interactions-reported", $id));
             } else {
                 if ($ns == "hpr") {
                     //  higher number of distinct interactions that any one article reports
                     $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:maximum-number-interactions-reported", $id));
                 } else {
                     if ($ns = 'hp') {
                         //  total number of unique PMIDs used to support the interaction
                         $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:number-supporting-articles", $id));
                     }
                 }
             }
         }
         // expansion method
         if ($a[15]) {
             $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:expansion-method", $a[15]));
         }
         // host organism
         if ($a[28] != '-') {
             $qname = $this->ParseString($a[28], $ns, $id, $label);
             $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:host-organism", $qname));
         }
         // created
         $this->AddRDF($this->QQuadL($iid, "dc:created", $a[30]));
         // taxon-sequence identical interaction group
         $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:taxon-sequence-identical-interaction-group", "irefindex_irigid:" . $a[44]));
         // taxon-sequence similar interaction group
         $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:taxon-sequence-similar-interaction-group", "irefindex_crigid:" . $a[50]));
         $this->WriteRDFBufferToWriteFile();
     }
 }
Пример #2
0
 function Parse()
 {
     $l = parent::getReadFile()->read(100000);
     $header = explode("\t", trim(substr($l, 1)));
     if (($c = count($header)) != 54) {
         trigger_erorr("Expecting 54 columns, found {$c}!");
         return FALSE;
     }
     // check # of columns
     while ($l = parent::getReadFile()->read(500000)) {
         $a = explode("\t", trim($l));
         // irefindex identifiers
         $rigid = "irefindex." . $a[34];
         # checksum for interaction
         $rogida = "irefindex." . $a[32];
         # checksum for A
         $rogidb = "irefindex." . $a[33];
         # checksum for B
         $irigid = "irefindex.irigid:" . $a[44];
         # integer id for interaction
         $irogida = "irefindex.irogid:" . $a[42];
         # integer id for A
         $irogidb = "irefindex.irogid:" . $a[43];
         # integer id for B
         $crigid = "irefindex.crigid:" . $a[47];
         # checksum for canonical interaction
         $icrigid = "irefindex.icrigid:" . $a[50];
         # integer id for canonical interaction
         $crogida = "irefindex.crogid:" . $a[45];
         # checksum for A's canonical group
         $crogidb = "irefindex.crogid:" . $a[46];
         # checksum for B's canonical group
         $icrogida = "irefindex.icrogid:" . $a[48];
         # integer for A's canonical group
         $icrogidb = "irefindex.icrogid:" . $a[49];
         # integer for B's canonical group
         // 13 contains the original identifier, the rigid, and the edgetype
         $ids = explode("|", $a[13]);
         if (count($ids) != 3) {
             trigger_error("Expecting 3 entries in column 14");
             print_r($ids);
             exit;
         }
         parent::getRegistry()->parseQName($ids[0], $ns, $id);
         if ($id == '-') {
             // this happens with hprd
             $iid = "hprd:" . substr($ids[1], 6);
         } else {
             $iid = $ns . ":" . $id;
         }
         // get the type
         if ($a[52] == "X") {
             $label = "{$a['0']} - {$a['1']} Interaction";
             $type = "Pairwise-Interaction";
         } else {
             if ($a[52] == "C") {
                 $label = $a[53] . " component complex";
                 #num of participants
                 $type = "Multimeric-Complex";
             } else {
                 if ($a[52] == "Y") {
                     $label = "{$a['0']} homomeric complex";
                     $type = "Homopolymeric-Complex";
                 }
             }
         }
         parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type)));
         // interaction type[52] by method[6]
         unset($method);
         if ($a[6] != '-') {
             $data = $this->ParseStringArray($a[6]);
             $method = trim($data["label"]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             if ($qname) {
                 parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50]));
         // set the interactors
         for ($i = 0; $i <= 1; $i++) {
             $p = 'a';
             if ($i == 1) {
                 $p = 'b';
             }
             $data = $this->ParseStringArray($a[$i]);
             $interactor = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor));
             // biological role
             $role = $a[16 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // experimental role
             $role = $a[18 + $i];
             if ($role != '-') {
                 $data = $this->ParseStringArray($role);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 if ($qname != "mi:0000") {
                     parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label']));
                 }
             }
             // interactor type
             $type = $a[20 + $i];
             if ($type != '-') {
                 $data = $this->ParseStringArray($type);
                 $qname = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label']));
             }
         }
         // add the alternatives through the taxon + seq redundant group
         for ($i = 2; $i <= 3; $i++) {
             $taxid = '';
             $rogid = "irefindex." . $a[32 + ($i - 2)];
             parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group"));
             $tax = $a[9 + ($i - 2)];
             if ($tax && $tax != '-' && $tax != '-1') {
                 $data = $this->ParseStringArray($tax);
                 $taxid = trim($data["ns"]) . ":" . trim($data["id"]);
                 parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid));
             }
             $list = explode("|", $a[3 + ($i - 2)]);
             foreach ($list as $item) {
                 $data = $this->ParseStringArray($item);
                 $ns = trim($data["ns"]);
                 $id = trim($data["id"]);
                 $qname = $ns . ":" . $id;
                 if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') {
                     parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname));
                     if ($taxid && $taxid != '-' && $taxid != '-1') {
                         parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid));
                     }
                 }
             }
         }
         // publications
         $list = explode("|", $a[8]);
         foreach ($list as $item) {
             if ($item == '-' && $item != 'pubmed:0') {
                 continue;
             }
             $data = $this->ParseStringArray($item);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname));
         }
         // MI interaction type
         if ($a[11] != '-' && $a[11] != 'NA') {
             $data = $this->ParseStringArray($a[11]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, "rdf:type", $qname));
             if (!isset($defined[$qname])) {
                 $defined[$qname] = '';
                 parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label']));
             }
         }
         // source
         if ($a[12] != '-') {
             $data = $this->ParseStringArray($a[12]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname));
         }
         // confidence
         $list = explode("|", $a[14]);
         foreach ($list as $item) {
             $data = $this->ParseStringArray($item);
             $ns = trim($data["ns"]);
             $id = trim($data["id"]);
             if ($ns == 'lpr') {
                 //  lowest number of distinct interactions that any one article reported
                 parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id));
             } else {
                 if ($ns == "hpr") {
                     //  higher number of distinct interactions that any one article reports
                     parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id));
                 } else {
                     if ($ns = 'hp') {
                         //  total number of unique PMIDs used to support the interaction
                         parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id));
                     }
                 }
             }
         }
         // expansion method
         if ($a[15]) {
             $id = parent::getRes() . md5($a[15]);
             parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id));
         }
         // host organism
         if ($a[28] != '-') {
             $data = $this->ParseStringArray($a[28]);
             $qname = trim($data["ns"]) . ":" . trim($data["id"]);
             parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname));
         }
         // @todo add to record
         // created 2010/05/18
         $date = str_replace("/", "-", $a[30]) . "T00:00:00Z";
         parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime"));
         // taxon-sequence identical interaction group
         parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group"));
         parent::writeRDFBufferToWriteFile();
     }
 }