getSemHTMLParser() static public method

static public getSemHTMLParser ( $a = '' )
Example #1
0
 public function run($r, $graphUri)
 {
     include_once 'ARC2/ARC2.php';
     $uri = $r->getUri();
     $url = $r->getLocator();
     $config = array('auto_extract' => 0);
     $parser = ARC2::getSemHTMLParser();
     @$parser->parse($url);
     @$parser->extractRDF('rdfa');
     $triples = $parser->getTriples();
     if (!is_array($triples) || count($triples) == 0) {
         return false;
     }
     $data = array();
     $data[$uri] = array();
     // transform arc2 triple to RDF_PHP structure
     // TODO: blank nodes?
     foreach ($triples as $triple) {
         // but only for the requested resource and not ignored properties
         // TODO: use a generic fetch preset from linkeddata wrapper
         if ($triple['s'] == $uri && array_search($triple['p'], $this->_config->ignore->toArray()) === false) {
             // create resource objects
             if ($triple['o_type'] == 'uri') {
                 $data[$uri][$triple['p']][] = array('type' => 'uri', 'value' => $triple['o']);
             }
             // create literal objects
             if ($triple['o_type'] == 'literal') {
                 $newObject = array('type' => 'literal', 'value' => $triple['o']);
                 if ($triple['o_lang'] != '') {
                     $newObject['lang'] = $triple['o_lang'];
                 }
                 if ($triple['o_datatype'] != '') {
                     $newObject['datatype'] = $triple['o_datatype'];
                 }
                 $data[$uri][$triple['p']][] = $newObject;
             }
         }
     }
     // set a default class type if configured and no type was extracted
     if (!isset($data[$uri][EF_RDF_TYPE]) && isset($this->_config->defaultClass)) {
         $data[$uri][EF_RDF_TYPE][] = array('type' => 'uri', 'value' => $this->_config->defaultClass);
     }
     $fullResult['status_description'] = "RDFa data found for URI {$uri}";
     $fullResult['add'] = $data;
     $fullResult['status_codes'] = array(Erfurt_Wrapper::NO_MODIFICATIONS);
     $fullResult['status_codes'][] = Erfurt_Wrapper::RESULT_HAS_ADD;
     return $fullResult;
 }
/**
 * Extrahiert eingebettetes rdfa aus einer beliebigen HTML Seite und
 * speichert es im triple store
 * @param string $url
 * @return Response Ein Responseobjekt
 */
function extractRDFa($url)
{
    global $ep;
    // Wenn es die URL im Graph schon gibt, nichts machen
    if (graphContainsUrl($url)) {
        $res = new Response(null, "URL {$url} already visited, skip indexing");
        return $res;
    } else {
        $parser = ARC2::getSemHTMLParser();
        $parser->parse($url);
        $parser->extractRDF('rdfa');
        // triple Darstellung
        $triples = $parser->getTriples();
        // Wenn kein rdfa gefunden wurde
        if (count($triples) < 1) {
            $res = new Response(null, "URL {$url} contains no rdfa");
            return $res;
        }
        // in Datenbank einfügen
        $ep->insert($triples, $url);
        $res = new Response(null, "URL {$url}: added " . count($triples) . " triples");
        return $res;
    }
}
Example #3
0
<?php

//error_reporting(0);
include 'config.php';
include ARC_DIR . 'ARC2.php';
include_once BASE_DIR . 'checkLogin.php';
ARC2::inc('PMJ_ResourceGraphPlugin');
//require_once(CLASSES_DIR . 'Recording.php');
header('Content-type: application/json');
$store = ARC2::getStore($config);
if (!$store->isSetUp()) {
    $store->setUp();
}
/* Parse what ARC can from the page */
//this can take a bit long, though.
$parser = ARC2::getSemHTMLParser();
$parser->parse($_POST['page']);
$parser->extractRDF('dc rdfa');
$triples = $parser->getTriples();
$q = 'INSERT INTO <>  { ' . $parser->toNTriples($triples) . ' } ';
$parseRS = $store->query($q);
$graph = ARC2::getComponent('PMJ_ResourceGraphPlugin', $graphConfig);
$recordingRes = ARC2::getComponent('PMJ_ResourcePlusPlugin', $graphConfig);
$userURI = $_SESSION['userURI'];
$recordingURI = 'http://data.rubrick-jetpack.org/Recordings/' . sha1($userURI . time());
$recordingRes->setURI($recordingURI);
$recordingRes->addPropValue('rdf:type', 'r:Recording', 'uri');
$recordingRes->addPropValue('sioc:has_creator', $userURI, 'uri');
$recordingRes->addPropValue('r:hasPage', $_POST['page'], 'uri');
$recordingRes->addPropValue('dcterms:created', date('c'), 'literal', 'xsd:dateTime');
$recordingRes->addPropValue('r:hasRubric', $_POST['rubric'], 'uri');
 function semhtml($url, $output, $type = '')
 {
     $parser = ARC2::getSemHTMLParser($this->a);
     $parser->parse($url);
     $parser->extractRDF($type);
     $triples = $parser->getTriples();
     $document = $parser->toTurtle($triples);
     return $this->to_rdf($url, $document, $output);
 }
Example #5
0
function sendPing($to, $message, $base_uri, $verbose = false)
{
    $ret = "<br/>\n";
    $to = trim($to);
    // fetch the user's profile
    $person = new MyProfile($to, $base_uri, SPARQL_ENDPOINT);
    $person->load();
    $profile = $person->get_profile();
    $to_name = $person->get_name();
    $to_email = $person->get_email();
    $pingback_service = $profile->get("pingback:to");
    // set form data
    $source = $_SESSION['webid'];
    // parse the pingback form
    $config = array('auto_extract' => 0);
    $parser = ARC2::getSemHTMLParser($config);
    $parser->parse($pingback_service);
    $parser->extractRDF('rdfa');
    // load triples
    $triples = $parser->getTriples();
    // proceed only if the user has defined a pingback:to relation
    if ($pingback_service != null) {
        if (sizeof($triples) > 0) {
            //echo "<pre>" . print_r($triples, true) . "</pre>\n";
            foreach ($triples as $triple) {
                // proceed only if we have a valid pingback resource
                if ($triple['o'] == 'http://purl.org/net/pingback/Container') {
                    $fields = array('source' => $source, 'target' => $to, 'comment' => $message);
                    // Should really replace curl with an ajax call
                    //open connection to pingback service
                    $ch = curl_init();
                    //set the url, number of POST vars, POST data
                    curl_setopt($ch, CURLOPT_URL, $pingback_service);
                    curl_setopt($ch, CURLOPT_POST, count($fields));
                    curl_setopt($ch, CURLOPT_POSTFIELDS, $fields);
                    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                    //execute post
                    $return = curl_exec($ch);
                    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                    //close connection
                    curl_close($ch);
                    if ($httpCode == '201' || $httpCode == '202') {
                        $ret .= success('Message delivered!');
                    } else {
                        $ret .= error('Something happened and I couldn\'t deliver the message!');
                        $ret .= "<p>Details:</p>\n";
                        $ret .= "</p>" . $return . "</p>\n";
                    }
                    break;
                }
            }
        } else {
            $ret .= "   <p>{$pingback_service} does not comply with semantic pingback standards! Showing the pingback service page instead.</p>\n";
            // show frame
            $ret .= "   <iframe src=\"{$pingback_service}\" width=\"100%\" height=\"300\">\n";
            $ret .= "   <p>Your browser does not support iframes.</p>\n";
            $ret .= "   </iframe>\n";
        }
    } else {
        // no valid pingback service found, fallback to AKSW
        $ret .= "   <p>Could not find a pingback service for the given WebID. Here is a generic pingback service provided by http://pingback.aksw.org/.</p>\n";
        $ret .= "   <iframe src=\"http://pingback.aksw.org/\" width=\"100%\" height=\"300\">\n";
        $ret .= "   <p>Your browser does not support iframes.</p>\n";
        $ret .= "   </iframe>\n";
    }
    if ($verbose) {
        return $ret;
    }
}
 /**
  * Add the triples parsed from the supplied RDFa to the graph
  * @param string html the HTML containing RDFa to parse
  * @param string base the base URI against which relative URIs in the Turtle document will be resolved
  */
 function add_rdfa($html, $base = '')
 {
     if ($html) {
         $parser = ARC2::getSemHTMLParser();
         $parser->parse($base, $html);
         $parser->extractRDF('rdfa');
         $this->_add_arc2_triple_list($parser->getTriples());
         unset($parser);
     }
 }
Example #7
0
 /**
  * Stores all subjects of RDF triples extracted from the RDFa representation of a given fragment in this format:
  * fragmentURI LOOMP:contains resourceURI .
  *
  * @param  Resource    $fragmentRes          Resource Object
  * @param  string      $rdfa          		 RDFa representation of a given fragment
  */
 private function _saveResFromRDFa($fragmentRes, $rdfa)
 {
     $parser = ARC2::getSemHTMLParser();
     $parser->parse('', $rdfa);
     $parser->extractRDF('rdfa');
     $triples = $parser->getTriples();
     $res = array();
     for ($i = 0, $i_max = count($triples); $i < $i_max; $i++) {
         $triple = $triples[$i];
         if ($triple['s_type'] == "uri") {
             if (!in_array($triple['s'], $res)) {
                 $res[] = $triple['s'];
                 $this->rdfModel->add(new Statement($fragmentRes, LOOMP::CONTAINS(), new Resource($triple['s'])));
             }
         }
         if ($triple['o_type'] == 'literal') {
             $obj = new Literal($triple['o'], $triple['o_lang']);
             if ($triple['o_datatype']) {
                 $obj->setDatatype($triple['o_datatype']);
             }
         } else {
             $obj = new Resource($triple['o']);
         }
         $this->rdfModel->add(new Statement(new Resource($triple['s']), new Resource($triple['p']), $obj));
     }
 }
 public function __construct($u)
 {
     $this->_rdfa_parser = ARC2::getSemHTMLParser();
     $this->_rdfa_parser->parse($u);
     $this->_rdfa_parser->extractRDF('rdfa');
     $index = $this->_rdfa_parser->getSimpleIndex(0);
     foreach ($index[$this->Base()] as $prop => $values) {
         $matches = array();
         if (preg_match('!^http://opengraphprotocol.org/schema/(.+)$!i', $prop, $matches)) {
             $p = strtolower($matches[1]);
         } elseif (preg_match('!^http://ogp.me/ns#(.+)$!i', $prop, $matches)) {
             $p = strtolower($matches[1]);
         } else {
             $p = $prop;
         }
         foreach ($values as $value) {
             if ($value['type'] == 'bnode') {
                 continue;
             }
             $this->_data[$p][] = $value['value'];
         }
     }
     return $this;
 }