function __construct( $importdata, $dataformat ) { global $rdfiogExtraNSPrefixes, $rdfiogPropertiesToUseAsWikiTitle, $rdfiogUseNSPrefixInWikiTitleForProperties, $rdfiogUseNSPrefixInWikiTitleForEntities; if ( $dataformat == 'triples_array' ) { $this->m_triples = $importdata; } else { $this->m_unparseddata = $importdata; } $this->m_dataformat = $dataformat; $this->m_haserrors = false; $this->m_delete = false; if ( $this->m_dataformat == 'rdfxml' ) { $this->m_unparseddata = $this->cleanupXML( $this->m_unparseddata ); $this->m_parser = ARC2::getRDFXMLParser(); $this->parse(); } elseif ( $this->m_dataformat == 'turtle' ) { $this->m_unparseddata = $wgRequest->getText( 'importdata' ); $this->m_parser = ARC2::getTurtleParser(); $this->parse(); } $this->extractTripleIndex(); $this->m_nsprefixes = $this->getNSPrefixMappingFromParser(); if ( $rdfiogExtraNSPrefixes != '' ) { $this->addNamespacePrefixes( $rdfiogExtraNSPrefixes ); } $nsprefixes = $this->m_nsprefixes; $this->setNSPrefixes( $nsprefixes ); $this->setTripleIndex( $this->m_tripleindex ); // Previously in Equiv URI Class if ( isset($rdfiogUsePseudoNamespacesForProperties) ) { // TODO: Change to check options from import screen // use parameter set in LocalSettings.php $this->m_usenspintitles_properties = $rdfiogUsePseudoNamespacesForProperties; } if ( isset($rdfiogUsePseudoNamespacesForEntities) ) { // use parameter set in LocalSettings.php $this->m_usenspintitles_entities = $rdfiogUsePseudoNamespacesForEntities; } if ( !empty( $rdfiogPropertiesToUseAsWikiTitle ) ) { $this->m_wikititlepropertyuris = $rdfiogPropertiesToUseAsWikiTitle; } else { $this->m_wikititlepropertyuris = array( 'http://semantic-mediawiki.org/swivt/1.0#page', // Suggestion for new property 'http://www.w3.org/2000/01/rdf-schema#label', 'http://purl.org/dc/elements/1.1/title', 'http://www.w3.org/2004/02/skos/core#preferredLabel', 'http://xmlns.com/foaf/0.1/name' ); } $this->m_store = new RDFIOStore(); }
function parse_to_simple_index($rdfxml) { $triples = array(); if (strlen($rdfxml) > 0) { $parser = ARC2::getRDFXMLParser(); $parser->parse(null, $rdfxml); $triples = $parser->getSimpleIndex(0); } return $triples; }
/** * @deprecated triple lists are deprecated */ function describe_to_triple_list($uri, $graphs = array()) { $triples = array(); $response = $this->describe($uri, $graphs); if ($response->body) { $parser_args = array("bnode_prefix" => "genid", "base" => $this->uri); $parser = ARC2::getRDFXMLParser($parser_args); $parser->parse("", $response->body); $triples = $parser->getTriples(); } return $triples; }
/** * get_snapshots * * @return array * @author Keith Alexander **/ public function get_item_uris() { $parser = ARC2::getRDFXMLParser(); $parser->parse($this->uri); $triples = $parser->getTriples(); $this->errors = $parser->getErrors(); $uris = array(); foreach ($triples as $t) { if ($t['p'] == 'http://schemas.talis.com/2006/bigfoot/configuration#snapshot') { $uris[] = $t['o']; } } return $uris; }
function test_put_to_network_includes_body() { $fake_request_factory = new FakeRequestFactory(); $fake_request = new FakeHttpRequest(new HttpResponse(200)); $fake_request_factory->register('PUT', "http://example.org/res", $fake_request); $group = new NetworkResource("http://example.org/res"); $group->add_resource_triple("http://example.org/res", "http://example.org/pred", "http://example.org/obj"); $group->request_factory = $fake_request_factory; $group->put_to_network(); $parser = ARC2::getRDFXMLParser(array("bnode_prefix" => "genid", "base" => 'http://example.org/')); $parser->parse('http://example.org/', $fake_request->get_body()); $triples = $parser->getTriples(); $this->assertEquals(1, count($triples)); }
/** * get_item_uris * * @return array * @author Chris Clarke **/ public function get_item_uris() { $request = $this->request_factory->make('GET', $this->uri, $this->credentials); $request->set_accept("application/rdf+xml"); $response = $request->execute(); $parser = ARC2::getRDFXMLParser(); $parser->parse('', $response->body); $triples = $parser->getTriples(); $this->errors = $parser->getErrors(); $uris = array(); foreach ($triples as $t) { if ($t['p'] == 'http://schemas.talis.com/2006/bigfoot/configuration#snapshot') { $uris[] = $t['o']; } } return $uris; }
/** * Import RDF/XML, e.g. from the RDF Import Special Page. * @param string $importData */ public function importRdfXml($importData) { // Parse RDF/XML to triples $arc2rdfxmlparser = ARC2::getRDFXMLParser(); $arc2rdfxmlparser->parseData($importData); // Receive the data $triples = $arc2rdfxmlparser->triples; $tripleIndex = $arc2rdfxmlparser->getSimpleIndex(); $namespaces = $arc2rdfxmlparser->nsp; /* DEBUGGING echo "*** Triples ***"; echo( print_r( $triples, true )); echo "*** Triple index ***"; echo( print_r( $tripleIndex, true )); echo "*** Namespaces ***"; echo( print_r( $namespaces, true )); */ $this->importFromArc2Data($triples, $tripleIndex, $namespaces); return $output = array('triples' => $triples, 'tripleIndex' => $tripleIndex, 'namespaces' => $namespaces); }
/** * Extrahiert eingebettetes rdfa aus einer beliebigen HTML Seite und * speichert es im triple store * @param string $url * @return Response Ein Responseobjekt */ function extractOAI($rdf, $url) { global $ep; // Wenn es die URL im Graph schon gibt, nichts machen if (graphContainsUrl($url)) { $res = new Response(null, "URL {$url} already visited, skip indexing"); return $res; } else { $parser = ARC2::getRDFXMLParser(); $parser->parse($url, $rdf); // triple Darstellung $triples = $parser->getTriples(); // Wenn kein OAI gefunden wurde if (count($triples) < 1) { $res = new Response(null, "URL {$url} contains no OAI-RDF"); return $res; } // in Datenbank einfuegen $ep->insert($triples, ""); $res = new Response(null, "URL {$url}: added " . count($triples) . " triples"); return $res; } }
function parseRDFXMLResultDoc($url, $type, &$reader) { $parser = ARC2::getRDFXMLParser(); $parser->setReader($reader); $parser->parse($url); if ($parser->getErrors()) { $this->errors = $parser->errors; # @@@ Could do with (improved) addError(s)? return; } $this->result = $parser->getSimpleIndex(false); }
private function OWL2RDF($abbv) { $filename = parent::getReadFile()->getFilename(); $buf = file_get_contents("compress.zlib://" . $filename); $parser = ARC2::getRDFXMLParser('file://' . $filename); $parser->parse("http://bio2rdf.org/bioportal#", $buf); $triples = $parser->getTriples(); foreach ($triples as $i => $a) { $this->TriplifyMap($a, strtolower($abbv)); parent::writeRDFBufferToWriteFile(); } parent::clear(); }
/** * As for addTurtle but load a string of RDF XML * * @see addTurtle */ function addRDFXML($base, $data) { $parser = ARC2::getRDFXMLParser($this->arc2config); $parser->parse($base, $data); $errors = $parser->getErrors(); $parser->resetErrors(); if (sizeof($errors)) { if ($this->debug) { print "<h3>Error loading RDFXML string</h3>"; print "<ul><li>" . join("</li><li>", $errors) . "</li></ul>"; } return 0; } return $this->addTriples($parser->getTriples()); }
curl_setopt($ch, CURLOPT_URL, $u); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, 10); curl_setopt($ch, CURLOPT_HTTPHEADER, array("Accept: text/turtle, text/n3; q=0.9, application/turtle; q=0.8, application/n-triples; q=0.7, application/rdf+xml; q=0.6, application/json; q=0.4, */*; q=0.1")); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $data = curl_exec($ch); $_aux = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); $_aux2 = explode(";", $_aux); $content_type = array_shift($_aux2); curl_close($ch); $parser = NULL; $parsers = array(); $parsers[0]['formats'] = array('text/n3', 'application/x-turtle', 'application/turtle', 'text/turtle'); $parsers[0]['parser'] = ARC2::getTurtleParser(); $parsers[1]['formats'] = array('application/rdf+xml'); $parsers[1]['parser'] = ARC2::getRDFXMLParser(); $parsers[2]['formats'] = array('application/json', 'application/x-javascript', 'text/javascript', 'text/x-javascript', 'text/x-json'); $parsers[2]['parser'] = ARC2::getJSONParser(); foreach ($parsers as $v) { if (in_array($content_type, $v['formats'])) { $parser = $v['parser']; } } //If nothing fits, pray to your favorite god that this can be detected and parsed correctly by ARC2. $parser = ARC2::getRDFParser(); //end of workaround $parser->parse($u, $data); //Since IDK which namespace the documents contains, lets use the uri requested $triples = $parser->getTriples(); $first = true; $c = 0;
/** * Parse RDF, either from a string or a URL */ public function parse($url) { $parser = @ARC2::getRDFXMLParser(); if (stristr($url, '<rdf')) { $parser->parse(confirm_slash(base_url()), $url); // Parse an existing string } else { @$parser->parse($url); // Go out and get via the URL } $index = $parser->getSimpleIndex(0); return $index; }
/** * Add the triples parsed from the supplied RDF/XML to the graph * @param string $rdfxml the RDF/XML to parse * @param string $base the base URI against which relative URIs in the RDF/XML document will be resolved */ public function add_rdfxml($rdfxml, $base = '') { if ($rdfxml) { /** @var \ARC2_RDFXMLParser $parser */ $parser = \ARC2::getRDFXMLParser(); $parser->parse($base, $rdfxml); $this->_add_arc2_triple_list($parser->getTriples()); unset($parser); } }
/** * Parse the results of a search on the contentbox. * @param string uri the URI used to obtain the search * @param string xml the xml returned from a search request * @return ResourceList */ function parse_results_xml($uri, $xml) { // fix up unprefixed rdf:resource in rss 1.0 otherwise ARC gets confused $xml = preg_replace("~rdf:li resource=~", "rdf:li rdf:resource=", $xml); $parser_args = array("bnode_prefix" => "genid", "base" => $this->uri); $resources = new ResourceList(); $resources->items = array(); $parser = ARC2::getRDFXMLParser($parser_args); $parser->parse($this->uri, $xml); $triples = $parser->getTriples(); $index = ARC2::getSimpleIndex($triples, true); $resources->title = $index[$uri][RSS_TITLE][0]; $resources->description = $index[$uri][RSS_DESCRIPTION][0]; $resources->start_index = $index[$uri][OS_STARTINDEX][0]; $resources->items_per_page = $index[$uri][OS_ITEMSPERPAGE][0]; $resources->total_results = $index[$uri][OS_TOTALRESULTS][0]; $items_resource = $index[$uri][RSS_ITEMS][0]; foreach ($index[$items_resource] as $items_property => $items_property_value) { if (strpos($items_property, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#_') === 0) { $resources->items[] = $index[$items_property_value[0]]; } } return $resources; }
static function insertRDF($uri, $graph, $endpoint) { self::deleteTriples($uri, $graph, $endpoint); //READ NEW TRIPLES WITH THE SUBJECT $uri $parser = @ARC2::getRDFXMLParser(); @$parser->parse($uri); $err = $parser->getErrors(); if ($err) { throw new Exception(self::buildMessage($err)); } $newTriples = @$parser->getTriples(); for ($i = 0, $i_max = count($newTriples); $i < $i_max; $i++) { if ($uri != $newTriples[$i]['s']) { unset($newTriples[$i]); } } /* Serializer instantiation */ $ser = @new FourStore_NTriplesSerializer(); /* Serialize a triples array */ $doc = @$ser->getSerializedTriples($newTriples, 1); self::insert($doc, $graph, $endpoint); }
/** * Add the triples parsed from the supplied RDF/XML to the graph * @param string rdfxml the RDF/XML to parse * @param string base the base URI against which relative URIs in the RDF/XML document will be resolved */ function add_rdfxml($rdfxml, $base = '') { if ($rdfxml) { $parser = ARC2::getRDFXMLParser(); $parser->parse($base, $rdfxml); $this->_add_arc2_triple_list($parser->getTriples()); unset($parser); } }
function test_schedule_restore_posts_rdfxml_with_a_snapshot_uri() { $fake_request_factory = new FakeRequestFactory(); $fake_request = new FakeHttpRequest(new HttpResponse()); $fake_request_factory->register('POST', "http://example.org/store/jobs", $fake_request); $queue = new JobQueue("http://example.org/store/jobs"); $queue->request_factory = $fake_request_factory; $response = $queue->schedule_restore('http://example.org/snapshot'); $parser = ARC2::getRDFXMLParser(array("bnode_prefix" => "genid", "base" => 'http://example.org/')); $parser->parse('http://example.org/', $fake_request->get_body()); $triples = $parser->getTriples(); $found_triple = false; foreach ($triples as $triple) { if ($triple['p'] == 'http://schemas.talis.com/2006/bigfoot/configuration#snapshotUri' && $triple['o_type'] == 'uri' && $triple['o'] == 'http://example.org/snapshot') { $found_triple = true; break; } } $this->assertTrue($found_triple); }
function parse($base, $rdfxml) { $parser = ARC2::getRDFXMLParser(); $parser->parse($base, $rdfxml); return $parser->getTriples(); }
public function load($uri, $aliases = array(), $map = array()) { $this->forceString($uri); $uri = $this->expandURI($uri); if (substr($uri, 0, 5) == "data:") { $data = urldecode(preg_replace("/^data:[^,]*,/", "", $uri)); $parser = ARC2::getTurtleParser(); $parser->parse($uri, $data); } else { if (isset($this->loaded[$uri])) { return $this->loaded[$uri]; } if (isset($this->cacheDir)) { $filename = $this->cacheDir . "/" . md5($uri); if (!file_exists($filename) || filemtime($filename) + $this->cacheAge < time()) { # decache if out of date, even if we fail to re cache. if (file_exists($filename)) { unlink($filename); } $url = $uri; $ttl = 16; $mime = ""; $old_user_agent = ini_get('user_agent'); ini_set('user_agent', "PHP\r\nAccept: application/rdf+xml"); while ($ttl > 0) { # dirty hack to set the accept header without using curl if (!($rdf_fp = fopen($url, 'r'))) { break; } $meta_data = stream_get_meta_data($rdf_fp); $redir = 0; foreach ($meta_data['wrapper_data'] as $response) { if (substr(strtolower($response), 0, 10) == 'location: ') { $newurl = substr($response, 10); if (substr($newurl, 0, 1) == "/") { $parts = preg_split("/\\//", $url); $newurl = $parts[0] . "//" . $parts[2] . $newurl; } $url = $newurl; $redir = 1; } if (substr(strtolower($response), 0, 14) == 'content-type: ') { $mime = preg_replace("/\\s*;.*\$/", "", substr($response, 14)); } } if (!$redir) { break; } $ttl--; fclose($rdf_fp); } ini_set('user_agent', $old_user_agent); if ($ttl > 0 && $mime == "application/rdf+xml" && $rdf_fp) { # candidate for caching! if (!($cache_fp = fopen($filename, 'w'))) { echo "Cannot write file ({$filename})"; exit; } while (!feof($rdf_fp)) { fwrite($cache_fp, fread($rdf_fp, 8192)); } fclose($cache_fp); } fclose($rdf_fp); } } if (isset($filename) && file_exists($filename)) { $parser = ARC2::getRDFXMLParser(); $parser->parse($filename); } else { $parser = ARC2::getRDFParser(); # Don't try to load the same URI twice! if (!isset($this->firstGraphURI)) { $this->firstGraphURI = $uri; } $parser->parse($uri); } } $errors = $parser->getErrors(); $parser->resetErrors(); if (sizeof($errors)) { if ($this->debug) { print "<h3>Error loading: {$uri}</h3>"; print "<ul><li>" . join("</li><li>", $errors) . "</li></ul>"; } return 0; } $triples = $parser->getTriples(); foreach ($triples as $t) { $t["s"] = $this->cleanURI($t["s"]); if (!isset($map[$t["s"]])) { continue; } $t["p"] = $this->cleanURI($t["p"]); if ($t["p"] != "http://www.w3.org/2002/07/owl#sameAs") { continue; } $aliases[$t["o"]] = $t["s"]; } foreach ($triples as $t) { $t["s"] = $this->cleanURI($t["s"]); $t["p"] = $this->cleanURI($t["p"]); /* if( $t["o_type"]=="literal" ) { $t["o_dataype"] = $this->cleanURI($t["o_datatype"]); $this->t["sp"][$t["s"]][$t["p"]][] = array( "v"=>$t["o"], "d"=>$t["o_datatype"], "l"=>$t["o_lang"] ); } else { $t["o"] = $this->cleanURI($t["o"]); $this->t["sp"][$t["s"]][$t["p"]][] = $t["o"]; } $this->t["op"][$t["o"]][$t["p"]][] = $t["s"]; if( $alias ) { */ $mod = 0; if (isset($aliases[$t["s"]])) { $t["s"] = $aliases[$t["s"]]; } if (isset($aliases[$t["p"]])) { $t["p"] = $aliases[$t["p"]]; } if (isset($aliases[$t["o"]])) { $t["o"] = $aliases[$t["o"]]; } // if( $mod ) // { if ($t["o_type"] == "literal") { $this->t["sp"][$t["s"]][$t["p"]][] = array("v" => $t["o"], "d" => $t["o_datatype"], "l" => $t["o_lang"]); } else { $this->t["sp"][$t["s"]][$t["p"]][] = $t["o"]; } $this->t["op"][$t["o"]][$t["p"]][] = $t["s"]; // } // } } $this->loaded[$uri] = sizeof($triples); return $this->loaded[$uri]; }
/** * Get an array of property URIs from the specified ontology, * to function as a filter * @return array $vocab_p_uri_filter */ function getVocabPropertyUriFilter() { $vocaburl = $this->m_filtervocaburl; $RDFXMLParser = ARC2::getRDFXMLParser(); $RDFXMLParser->parse( $vocaburl ); $vocabtriples = $RDFXMLParser->getTriples(); $vocab_p_uri_filter = array(); foreach ( $vocabtriples as $vocabtriple ) { $p = $vocabtriple['p']; $o = $vocabtriple['o']; // For OWL vocabularies: if ( $p === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' && $o === 'http://www.w3.org/2002/07/owl#ObjectProperty' ) { $vocab_p_uri = $vocabtriple['s']; $vocab_p_uri_filter[] = $vocab_p_uri; } } return $vocab_p_uri_filter; }
function test_create_store_posts_rdfxml_with_a_single_store_template() { $fake_request_factory = new FakeRequestFactory(); $fake_request = new FakeHttpRequest(new HttpResponse()); $fake_request_factory->register('POST', "http://example.org/stores", $fake_request); $coll = new StoreCollection("http://example.org/stores"); $coll->request_factory = $fake_request_factory; $response = $coll->create_store("scooby", "http://example.org/template"); $parser = ARC2::getRDFXMLParser(array("bnode_prefix" => "genid", "base" => 'http://example.org/')); $parser->parse('http://example.org/', $fake_request->get_body()); $triples = $parser->getTriples(); $values = array(); foreach ($triples as $triple) { if ($triple['p'] == 'http://schemas.talis.com/2006/bigfoot/configuration#storeTemplate') { $values[] = $triple['o_type']; } } $this->assertEquals(1, count($values)); }
$q = "\r\nPREFIX dbpedia-owl-musicgenre: <http://dbpedia.org/ontology/MusicGenre/>\r\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\r\nSELECT DISTINCT ?genre ?genreuri\r\nWHERE\r\n { <{$uri}> dbpedia-owl-musicgenre:stylisticOrigin ?genreuri .\r\n ?genreuri rdfs:label ?genre . \r\n FILTER ( langMatches(lang(?genre), \"EN\") ) }\r\n"; $stylisticOrigins = $dbpedia->query($q, 'rows'); $q = "\r\nPREFIX dbpedia-owl-musicgenre: <http://dbpedia.org/ontology/MusicGenre/>\r\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\r\nSELECT DISTINCT ?genre ?genreuri\r\nWHERE\r\n { ?genreuri dbpedia-owl-musicgenre:musicSubgenre <{$uri}> ;\r\n rdfs:label ?genre . \r\n FILTER ( langMatches(lang(?genre), \"EN\") ) }\r\n"; $parentGenres = $dbpedia->query($q, 'rows'); $q = "\r\nPREFIX dbpedia-owl-musicgenre: <http://dbpedia.org/ontology/MusicGenre/>\r\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\r\nSELECT DISTINCT ?genre ?genreuri\r\nWHERE\r\n { ?genreuri dbpedia-owl-musicgenre:stylisticOrigin <{$uri}> ;\r\n rdfs:label ?genre . \r\n FILTER ( langMatches(lang(?genre), \"EN\") ) }\r\n"; $stylisticChildren = $dbpedia->query($q, 'rows'); /* Now, we finally grab artists. */ $q = "\r\nPREFIX dbpedia-owl-artist: <http://dbpedia.org/ontology/Artist/>\r\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\r\nPREFIX dbpprop: <http://dbpedia.org/property/>\r\nSELECT DISTINCT ?artist ?artisturi ?yearsActive\r\nWHERE\r\n { ?artisturi dbpedia-owl-artist:genre <{$uri}> ;\r\n rdfs:label ?artist .\r\n OPTIONAL { ?artisturi dbpprop:yearsActive ?yearsActive . }\r\n FILTER ( langMatches(lang(?artist), \"EN\") ) }\r\n\r\n"; $artists = $dbpedia->query($q, 'rows'); /* But we need to do some legwork to get the number of albums, since we don't have a sameAs link back to DBTune. We use sameAs.org to go backwards. */ $newArtists = array(); foreach ($artists as $key => $artist) { $sameAsURI = "http://sameas.org/rdf?uri=" . urlencode($artist['artisturi']); $parser = ARC2::getRDFXMLParser(); $parser->parse($sameAsURI); $triples = $parser->getTriples(); $dbturi = ''; foreach ($triples as $triple) { if (substr($triple['o'], 0, 46) == 'http://dbtune.org/musicbrainz/resource/artist/') { $dbturi = $triple['o']; break; } } $newArtists[$key] = $artist; $newArtists[$key]['artisturi'] = $dbturi; print_r($newArtists[$key]); if ($dbturi != '') { /* Now that we have the DBTune URI, we can query it for the number of albums. */