public function run($r, $graphUri) { include_once 'ARC2/ARC2.php'; $uri = $r->getUri(); $url = $r->getLocator(); $config = array('auto_extract' => 0); $parser = ARC2::getSemHTMLParser(); @$parser->parse($url); @$parser->extractRDF('rdfa'); $triples = $parser->getTriples(); if (!is_array($triples) || count($triples) == 0) { return false; } $data = array(); $data[$uri] = array(); // transform arc2 triple to RDF_PHP structure // TODO: blank nodes? foreach ($triples as $triple) { // but only for the requested resource and not ignored properties // TODO: use a generic fetch preset from linkeddata wrapper if ($triple['s'] == $uri && array_search($triple['p'], $this->_config->ignore->toArray()) === false) { // create resource objects if ($triple['o_type'] == 'uri') { $data[$uri][$triple['p']][] = array('type' => 'uri', 'value' => $triple['o']); } // create literal objects if ($triple['o_type'] == 'literal') { $newObject = array('type' => 'literal', 'value' => $triple['o']); if ($triple['o_lang'] != '') { $newObject['lang'] = $triple['o_lang']; } if ($triple['o_datatype'] != '') { $newObject['datatype'] = $triple['o_datatype']; } $data[$uri][$triple['p']][] = $newObject; } } } // set a default class type if configured and no type was extracted if (!isset($data[$uri][EF_RDF_TYPE]) && isset($this->_config->defaultClass)) { $data[$uri][EF_RDF_TYPE][] = array('type' => 'uri', 'value' => $this->_config->defaultClass); } $fullResult['status_description'] = "RDFa data found for URI {$uri}"; $fullResult['add'] = $data; $fullResult['status_codes'] = array(Erfurt_Wrapper::NO_MODIFICATIONS); $fullResult['status_codes'][] = Erfurt_Wrapper::RESULT_HAS_ADD; return $fullResult; }
/** * Extrahiert eingebettetes rdfa aus einer beliebigen HTML Seite und * speichert es im triple store * @param string $url * @return Response Ein Responseobjekt */ function extractRDFa($url) { global $ep; // Wenn es die URL im Graph schon gibt, nichts machen if (graphContainsUrl($url)) { $res = new Response(null, "URL {$url} already visited, skip indexing"); return $res; } else { $parser = ARC2::getSemHTMLParser(); $parser->parse($url); $parser->extractRDF('rdfa'); // triple Darstellung $triples = $parser->getTriples(); // Wenn kein rdfa gefunden wurde if (count($triples) < 1) { $res = new Response(null, "URL {$url} contains no rdfa"); return $res; } // in Datenbank einfügen $ep->insert($triples, $url); $res = new Response(null, "URL {$url}: added " . count($triples) . " triples"); return $res; } }
<?php //error_reporting(0); include 'config.php'; include ARC_DIR . 'ARC2.php'; include_once BASE_DIR . 'checkLogin.php'; ARC2::inc('PMJ_ResourceGraphPlugin'); //require_once(CLASSES_DIR . 'Recording.php'); header('Content-type: application/json'); $store = ARC2::getStore($config); if (!$store->isSetUp()) { $store->setUp(); } /* Parse what ARC can from the page */ //this can take a bit long, though. $parser = ARC2::getSemHTMLParser(); $parser->parse($_POST['page']); $parser->extractRDF('dc rdfa'); $triples = $parser->getTriples(); $q = 'INSERT INTO <> { ' . $parser->toNTriples($triples) . ' } '; $parseRS = $store->query($q); $graph = ARC2::getComponent('PMJ_ResourceGraphPlugin', $graphConfig); $recordingRes = ARC2::getComponent('PMJ_ResourcePlusPlugin', $graphConfig); $userURI = $_SESSION['userURI']; $recordingURI = 'http://data.rubrick-jetpack.org/Recordings/' . sha1($userURI . time()); $recordingRes->setURI($recordingURI); $recordingRes->addPropValue('rdf:type', 'r:Recording', 'uri'); $recordingRes->addPropValue('sioc:has_creator', $userURI, 'uri'); $recordingRes->addPropValue('r:hasPage', $_POST['page'], 'uri'); $recordingRes->addPropValue('dcterms:created', date('c'), 'literal', 'xsd:dateTime'); $recordingRes->addPropValue('r:hasRubric', $_POST['rubric'], 'uri');
function semhtml($url, $output, $type = '') { $parser = ARC2::getSemHTMLParser($this->a); $parser->parse($url); $parser->extractRDF($type); $triples = $parser->getTriples(); $document = $parser->toTurtle($triples); return $this->to_rdf($url, $document, $output); }
function sendPing($to, $message, $base_uri, $verbose = false) { $ret = "<br/>\n"; $to = trim($to); // fetch the user's profile $person = new MyProfile($to, $base_uri, SPARQL_ENDPOINT); $person->load(); $profile = $person->get_profile(); $to_name = $person->get_name(); $to_email = $person->get_email(); $pingback_service = $profile->get("pingback:to"); // set form data $source = $_SESSION['webid']; // parse the pingback form $config = array('auto_extract' => 0); $parser = ARC2::getSemHTMLParser($config); $parser->parse($pingback_service); $parser->extractRDF('rdfa'); // load triples $triples = $parser->getTriples(); // proceed only if the user has defined a pingback:to relation if ($pingback_service != null) { if (sizeof($triples) > 0) { //echo "<pre>" . print_r($triples, true) . "</pre>\n"; foreach ($triples as $triple) { // proceed only if we have a valid pingback resource if ($triple['o'] == 'http://purl.org/net/pingback/Container') { $fields = array('source' => $source, 'target' => $to, 'comment' => $message); // Should really replace curl with an ajax call //open connection to pingback service $ch = curl_init(); //set the url, number of POST vars, POST data curl_setopt($ch, CURLOPT_URL, $pingback_service); curl_setopt($ch, CURLOPT_POST, count($fields)); curl_setopt($ch, CURLOPT_POSTFIELDS, $fields); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //execute post $return = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); //close connection curl_close($ch); if ($httpCode == '201' || $httpCode == '202') { $ret .= success('Message delivered!'); } else { $ret .= error('Something happened and I couldn\'t deliver the message!'); $ret .= "<p>Details:</p>\n"; $ret .= "</p>" . $return . "</p>\n"; } break; } } } else { $ret .= " <p>{$pingback_service} does not comply with semantic pingback standards! Showing the pingback service page instead.</p>\n"; // show frame $ret .= " <iframe src=\"{$pingback_service}\" width=\"100%\" height=\"300\">\n"; $ret .= " <p>Your browser does not support iframes.</p>\n"; $ret .= " </iframe>\n"; } } else { // no valid pingback service found, fallback to AKSW $ret .= " <p>Could not find a pingback service for the given WebID. Here is a generic pingback service provided by http://pingback.aksw.org/.</p>\n"; $ret .= " <iframe src=\"http://pingback.aksw.org/\" width=\"100%\" height=\"300\">\n"; $ret .= " <p>Your browser does not support iframes.</p>\n"; $ret .= " </iframe>\n"; } if ($verbose) { return $ret; } }
/** * Add the triples parsed from the supplied RDFa to the graph * @param string html the HTML containing RDFa to parse * @param string base the base URI against which relative URIs in the Turtle document will be resolved */ function add_rdfa($html, $base = '') { if ($html) { $parser = ARC2::getSemHTMLParser(); $parser->parse($base, $html); $parser->extractRDF('rdfa'); $this->_add_arc2_triple_list($parser->getTriples()); unset($parser); } }
/** * Stores all subjects of RDF triples extracted from the RDFa representation of a given fragment in this format: * fragmentURI LOOMP:contains resourceURI . * * @param Resource $fragmentRes Resource Object * @param string $rdfa RDFa representation of a given fragment */ private function _saveResFromRDFa($fragmentRes, $rdfa) { $parser = ARC2::getSemHTMLParser(); $parser->parse('', $rdfa); $parser->extractRDF('rdfa'); $triples = $parser->getTriples(); $res = array(); for ($i = 0, $i_max = count($triples); $i < $i_max; $i++) { $triple = $triples[$i]; if ($triple['s_type'] == "uri") { if (!in_array($triple['s'], $res)) { $res[] = $triple['s']; $this->rdfModel->add(new Statement($fragmentRes, LOOMP::CONTAINS(), new Resource($triple['s']))); } } if ($triple['o_type'] == 'literal') { $obj = new Literal($triple['o'], $triple['o_lang']); if ($triple['o_datatype']) { $obj->setDatatype($triple['o_datatype']); } } else { $obj = new Resource($triple['o']); } $this->rdfModel->add(new Statement(new Resource($triple['s']), new Resource($triple['p']), $obj)); } }
public function __construct($u) { $this->_rdfa_parser = ARC2::getSemHTMLParser(); $this->_rdfa_parser->parse($u); $this->_rdfa_parser->extractRDF('rdfa'); $index = $this->_rdfa_parser->getSimpleIndex(0); foreach ($index[$this->Base()] as $prop => $values) { $matches = array(); if (preg_match('!^http://opengraphprotocol.org/schema/(.+)$!i', $prop, $matches)) { $p = strtolower($matches[1]); } elseif (preg_match('!^http://ogp.me/ns#(.+)$!i', $prop, $matches)) { $p = strtolower($matches[1]); } else { $p = $prop; } foreach ($values as $value) { if ($value['type'] == 'bnode') { continue; } $this->_data[$p][] = $value['value']; } } return $this; }