function Parse() { $l = $this->GetReadFile()->Read(100000); $header = explode("\t", trim(substr($l, 1))); if (($c = count($header)) != 54) { trigger_erorr("Expecting 54 columns, found {$c}!"); return FALSE; } // check # of columns while ($l = $this->GetReadFile()->Read(100000)) { $a = explode("\t", trim($l)); // 13 is the original identifier $ids = explode("|", $a[13], 2); $this->GetNS()->ParsePrefixedName($ids[0], $ns, $str); $this->Parse4IDLabel($str, $id, $label); $id = str_replace('"', '', $id); $iid = $this->GetNS()->MapQName("{$ns}:{$id}"); $this->AddRDF($this->QQuad($iid, "void:inDataset", $this->GetDatasetURI())); // get the type if ($a[52] == "X") { $label = "Pairwise interaction between {$a['0']} and {$a['1']}"; $type = "Pairwise-Interaction"; } else { if ($a[52] == "C") { $label = $a[53] . " component complex"; $type = "Multimeric-Complex"; } else { if ($a[52] == "Y") { $label = "homomeric complex composed of {$a['0']}"; $type = "Homopolymeric-Complex"; } } } $this->AddRDF($this->QQuad($iid, "rdf:type", "irefindex_vocabulary:{$type}")); // generate the label // interaction type[52] by method[6] if ($a[6] != '-') { $qname = $this->ParseString($a[6], $ns, $id, $method); if ($qname) { $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:method", $qname)); } } $method_label = ''; if ($method != 'NA' && $method != '-1') { $method_label = " identified by {$method} "; } $this->AddRDF($this->QQuadL($iid, "rdfs:label", "{$label}" . $method_label . " [{$iid}]")); $this->AddRDF($this->QQuadO_URL($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50])); // set the interators for ($i = 0; $i <= 1; $i++) { $p = 'a'; if ($i == 1) { $p = 'b'; } $interactor = $this->ParseString($a[$i], $ns, $id, $label); $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:interactor_{$p}", $interactor)); // biological role $role = $a[16 + $i]; if ($role != '-') { $qname = $this->ParseString($role, $ns, $id, $label); if ($qname != "mi:0000") { $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:interactor_{$p}" . "_biological_role", $qname)); } } // experimental role $role = $a[18 + $i]; if ($role != '-') { $qname = $this->ParseString($role, $ns, $id, $label); if ($qname != "mi:0000") { $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:interactor_{$p}" . "_experimental_role", $qname)); } } // interactor type $type = $a[20 + $i]; if ($type != '-') { $qname = $this->ParseString($type, $ns, $id, $label); $this->AddRDF($this->QQuad($interactor, "rdf:type", $qname)); } } // add the alternatives through the taxon + seq redundant group for ($i = 2; $i <= 3; $i++) { $taxid = ''; $irogid = "irefindex_irogid:" . $a[42 + ($i - 2)]; if (!isset($defined[$irogid])) { $defined[$irogid] = ''; $this->AddRDF($this->QQuadL($irogid, "rdfs:label", "[{$irogid}]")); $this->AddRDF($this->QQuad($irogid, "rdf:type", "irefindex_vocabulary:Taxon-Sequence-Identical-Group")); $tax = $a[9 + ($i - 2)]; if ($tax && $tax != '-' && $tax != '-1') { $taxid = $this->ParseString($tax, $ns, $id, $label); $this->AddRDF($this->QQuad($irogid, "irefindex_vocabulary:taxon", $taxid)); } } $list = explode("|", $a[3]); foreach ($list as $item) { $qname = $this->ParseString($item, $ns, $id, $label); if ($ns && $ns != 'irefindex_rogid' && $ns != 'irefindex_irogid') { $this->AddRDF($this->QQuad($qname, "irefindex_vocabulary:taxon-sequence-identical-group", $irogid)); if ($taxid && $taxid != '-' && $taxid != '-1') { $this->AddRDF($this->QQuad($qname, "irefindex_vocabulary:taxon", $taxid)); } } } } // add the aliases through the canonical group for ($i = 4; $i <= 5; $i++) { $icrogid = "irefindex_icrogid:" . $a[49 + ($i - 4)]; if (!isset($defined[$icrogid])) { $defined[$icrogid] = ''; $this->AddRDF($this->QQuadL($icrogid, "rdfs:label", "[{$icrogid}]")); $this->AddRDF($this->QQuad($icrogid, "rdf:type", "irefindex_vocabulary:Taxon-Sequence-Similar-Group")); } $list = explode("|", $a[3]); foreach ($list as $item) { $qname = $this->ParseString($item, $ns, $id, $label); if ($ns && $ns != 'crogid' && $ns != 'icrogid') { $this->AddRDF($this->QQuad($qname, "irefindex_vocabulary:taxon-sequence-similar-group", $icrogid)); } } } // publications $list = explode("|", $a[8]); foreach ($list as $item) { if ($item == '-') { continue; } $qname = $this->ParseString($item, $ns, $id, $label); $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:article", $qname)); } // MI interaction type if ($a[11] != '-' && $a[11] != 'NA') { $qname = $this->ParseString($a[11], $ns, $id, $label); $this->AddRDF($this->QQuad($iid, "rdf:type", $qname)); if (!isset($defined[$qname])) { $defined[$qname] = ''; $this->AddRDF($this->QQuadL($qname, "rdfs:label", "{$label} [{$qname}]")); } } // source if ($a[12] != '-') { $qname = $this->ParseString($a[12], $ns, $id, $label); $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:source", $qname)); } // confidence $list = explode("|", $a[14]); foreach ($list as $item) { $this->ParseString($item, $ns, $id, $label); if ($ns == 'lpr') { // lowest number of distinct interactions that any one article reported $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:minimum-number-interactions-reported", $id)); } else { if ($ns == "hpr") { // higher number of distinct interactions that any one article reports $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:maximum-number-interactions-reported", $id)); } else { if ($ns = 'hp') { // total number of unique PMIDs used to support the interaction $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:number-supporting-articles", $id)); } } } } // expansion method if ($a[15]) { $this->AddRDF($this->QQuadL($iid, "irefindex_vocabulary:expansion-method", $a[15])); } // host organism if ($a[28] != '-') { $qname = $this->ParseString($a[28], $ns, $id, $label); $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:host-organism", $qname)); } // created $this->AddRDF($this->QQuadL($iid, "dc:created", $a[30])); // taxon-sequence identical interaction group $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:taxon-sequence-identical-interaction-group", "irefindex_irigid:" . $a[44])); // taxon-sequence similar interaction group $this->AddRDF($this->QQuad($iid, "irefindex_vocabulary:taxon-sequence-similar-interaction-group", "irefindex_crigid:" . $a[50])); $this->WriteRDFBufferToWriteFile(); } }
function Parse() { $l = parent::getReadFile()->read(100000); $header = explode("\t", trim(substr($l, 1))); if (($c = count($header)) != 54) { trigger_erorr("Expecting 54 columns, found {$c}!"); return FALSE; } // check # of columns while ($l = parent::getReadFile()->read(500000)) { $a = explode("\t", trim($l)); // irefindex identifiers $rigid = "irefindex." . $a[34]; # checksum for interaction $rogida = "irefindex." . $a[32]; # checksum for A $rogidb = "irefindex." . $a[33]; # checksum for B $irigid = "irefindex.irigid:" . $a[44]; # integer id for interaction $irogida = "irefindex.irogid:" . $a[42]; # integer id for A $irogidb = "irefindex.irogid:" . $a[43]; # integer id for B $crigid = "irefindex.crigid:" . $a[47]; # checksum for canonical interaction $icrigid = "irefindex.icrigid:" . $a[50]; # integer id for canonical interaction $crogida = "irefindex.crogid:" . $a[45]; # checksum for A's canonical group $crogidb = "irefindex.crogid:" . $a[46]; # checksum for B's canonical group $icrogida = "irefindex.icrogid:" . $a[48]; # integer for A's canonical group $icrogidb = "irefindex.icrogid:" . $a[49]; # integer for B's canonical group // 13 contains the original identifier, the rigid, and the edgetype $ids = explode("|", $a[13]); if (count($ids) != 3) { trigger_error("Expecting 3 entries in column 14"); print_r($ids); exit; } parent::getRegistry()->parseQName($ids[0], $ns, $id); if ($id == '-') { // this happens with hprd $iid = "hprd:" . substr($ids[1], 6); } else { $iid = $ns . ":" . $id; } // get the type if ($a[52] == "X") { $label = "{$a['0']} - {$a['1']} Interaction"; $type = "Pairwise-Interaction"; } else { if ($a[52] == "C") { $label = $a[53] . " component complex"; #num of participants $type = "Multimeric-Complex"; } else { if ($a[52] == "Y") { $label = "{$a['0']} homomeric complex"; $type = "Homopolymeric-Complex"; } } } parent::addRDF(parent::describeIndividual($iid, $label, parent::getVoc() . $type) . parent::describeClass(parent::getVoc() . $type, str_replace("-", " ", $type))); // interaction type[52] by method[6] unset($method); if ($a[6] != '-') { $data = $this->ParseStringArray($a[6]); $method = trim($data["label"]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname) { parent::addRDF(parent::triplify($iid, parent::getVoc() . "method", $qname) . parent::describeClass($qname, $data['label'])); } } parent::addRDF(parent::triplify($iid, "rdfs:seeAlso", "http://wodaklab.org/iRefWeb/interaction/show/" . $a[50])); // set the interactors for ($i = 0; $i <= 1; $i++) { $p = 'a'; if ($i == 1) { $p = 'b'; } $data = $this->ParseStringArray($a[$i]); $interactor = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}", $interactor)); // biological role $role = $a[16 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_biological_role", $qname) . parent::describeClass($qname, $data['label'])); } } // experimental role $role = $a[18 + $i]; if ($role != '-') { $data = $this->ParseStringArray($role); $qname = trim($data["ns"]) . ":" . trim($data["id"]); if ($qname != "mi:0000") { parent::addRDF(parent::triplify($iid, parent::getVoc() . "interactor_{$p}" . "_experimental_role", $qname) . parent::describeClass($qname, $data['label'])); } } // interactor type $type = $a[20 + $i]; if ($type != '-') { $data = $this->ParseStringArray($type); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($interactor, "rdf:type", $qname) . parent::describeClass($qname, $data['label'])); } } // add the alternatives through the taxon + seq redundant group for ($i = 2; $i <= 3; $i++) { $taxid = ''; $rogid = "irefindex." . $a[32 + ($i - 2)]; parent::addRDF(parent::describeIndividual($rogid, "", parent::getVoc() . "Taxon-Sequence-Identical-Group") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Group", "Taxon + Sequence Identical Group")); $tax = $a[9 + ($i - 2)]; if ($tax && $tax != '-' && $tax != '-1') { $data = $this->ParseStringArray($tax); $taxid = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($rogid, parent::getVoc() . "x-taxonomy", $taxid)); } $list = explode("|", $a[3 + ($i - 2)]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); $qname = $ns . ":" . $id; if ($ns && $ns != 'rogid' && $ns != 'irogid' and $id != '-') { parent::addRDF(parent::triplify($rogid, parent::getVoc() . "has-member", $qname)); if ($taxid && $taxid != '-' && $taxid != '-1') { parent::addRDF(parent::triplify($qname, parent::getVoc() . "x-taxonomy", $taxid)); } } } } // publications $list = explode("|", $a[8]); foreach ($list as $item) { if ($item == '-' && $item != 'pubmed:0') { continue; } $data = $this->ParseStringArray($item); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "article", $qname)); } // MI interaction type if ($a[11] != '-' && $a[11] != 'NA') { $data = $this->ParseStringArray($a[11]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, "rdf:type", $qname)); if (!isset($defined[$qname])) { $defined[$qname] = ''; parent::addRDF(parent::triplifyString($qname, "rdfs:label", $data['label'])); } } // source if ($a[12] != '-') { $data = $this->ParseStringArray($a[12]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "source", $qname)); } // confidence $list = explode("|", $a[14]); foreach ($list as $item) { $data = $this->ParseStringArray($item); $ns = trim($data["ns"]); $id = trim($data["id"]); if ($ns == 'lpr') { // lowest number of distinct interactions that any one article reported parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "minimum-number-interactions-reported", $id)); } else { if ($ns == "hpr") { // higher number of distinct interactions that any one article reports parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "maximum-number-interactions-reported", $id)); } else { if ($ns = 'hp') { // total number of unique PMIDs used to support the interaction parent::addRDF(parent::triplifyString($iid, parent::getVoc() . "number-supporting-articles", $id)); } } } } // expansion method if ($a[15]) { $id = parent::getRes() . md5($a[15]); parent::addRDF(parent::describeIndividual($id, $a[15], parent::getVoc() . "Expansion-Method") . parent::describeClass(parent::getVoc() . "Expansion-Method", "Expansion Method") . parent::triplify($iid, parent::getVoc() . "expansion-method", $id)); } // host organism if ($a[28] != '-') { $data = $this->ParseStringArray($a[28]); $qname = trim($data["ns"]) . ":" . trim($data["id"]); parent::addRDF(parent::triplify($iid, parent::getVoc() . "host-organism", $qname)); } // @todo add to record // created 2010/05/18 $date = str_replace("/", "-", $a[30]) . "T00:00:00Z"; parent::addRDF(parent::triplifyString($iid, "dc:created", $date, "xsd:dateTime")); // taxon-sequence identical interaction group parent::addRDF(parent::triplify($iid, parent::getVoc() . "taxon-sequence-identical-interaction", $rigid) . parent::triplify($rigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Identical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Identical-Interaction", "Taxon + Sequence Identical Interaction") . parent::triplify($rigid, parent::getVoc() . "irigid", $irigid) . parent::triplify($rigid, parent::getVoc() . "interactor-a", $rogida) . parent::triplify($rogida, parent::getVoc() . "irogid", $irogida) . parent::triplify($rigid, parent::getVoc() . "interactor-b", $rogidb) . parent::triplify($rogidb, parent::getVoc() . "irogid", $irogidb) . parent::triplify($rogida, parent::getVoc() . "canonical-group", $crogida) . parent::triplify($rogidb, parent::getVoc() . "canonical-group", $crogidb) . parent::triplify($rigid, parent::getVoc() . "taxon-sequence-similar-interaction", $crigid) . parent::triplify($crigid, "rdf:type", parent::getVoc() . "Taxon-Sequence-Canonical-Interaction") . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Canonical-Interaction", "Taxon + Sequence Canonical Interaction") . parent::triplify($crigid, parent::getVoc() . "icrigid", $icrigid) . parent::triplify($crigid, parent::getVoc() . "interactor-a-canonical-group", $crogida) . parent::triplify($crogida, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogida, parent::getVoc() . "icrogid", $icrogida) . parent::triplify($crigid, parent::getVoc() . "interactor-b-canonical-group", $crogidb) . parent::triplify($crogidb, "rdf:type", parent::getVoc() . "Taxon-Sequence-Similar-Group") . parent::triplify($crogidb, parent::getVoc() . "icrogid", $icrogidb) . parent::describeClass(parent::getVoc() . "Taxon-Sequence-Similar-Group", "Taxon + Sequence Similar Group")); parent::writeRDFBufferToWriteFile(); } }