/** * Parse a JSON-LD document into an EasyRdf\Graph * * Attention: Since JSON-LD supports datasets, a document may contain * multiple graphs and not just one. This parser returns only the * default graph. An alternative would be to merge all graphs. * * @param Graph $graph the graph to load the data into * @param string $data the RDF document data * @param string $format the format of the input data * @param string $baseUri the base URI of the data being parsed * * @throws Exception * @throws \EasyRdf\Exception * @return integer The number of triples added to the graph */ public function parse($graph, $data, $format, $baseUri) { parent::checkParseParams($graph, $data, $format, $baseUri); if ($format != 'jsonld') { throw new \EasyRdf\Exception("EasyRdf\\Parser\\JsonLd does not support {$format}"); } try { $quads = LD\JsonLD::toRdf($data, array('base' => $baseUri)); } catch (LD\Exception\JsonLdException $e) { throw new Exception($e->getMessage()); } foreach ($quads as $quad) { // Ignore named graphs if (null !== $quad->getGraph()) { continue; } $subject = (string) $quad->getSubject(); if ('_:' === substr($subject, 0, 2)) { $subject = $this->remapBnode($subject); } $predicate = (string) $quad->getProperty(); if ($quad->getObject() instanceof \ML\IRI\IRI) { $object = array('type' => 'uri', 'value' => (string) $quad->getObject()); if ('_:' === substr($object['value'], 0, 2)) { $object = array('type' => 'bnode', 'value' => $this->remapBnode($object['value'])); } } else { $object = array('type' => 'literal', 'value' => $quad->getObject()->getValue()); if ($quad->getObject() instanceof LD\LanguageTaggedString) { $object['lang'] = $quad->getObject()->getLanguage(); } else { $object['datatype'] = $quad->getObject()->getType(); } } $this->addTriple($subject, $predicate, $object); } return $this->tripleCount; }
/** * Parse an N-Triples document into an EasyRdf\Graph * * @param Graph $graph the graph to load the data into * @param string $data the RDF document data * @param string $format the format of the input data * @param string $baseUri the base URI of the data being parsed * * @throws Exception * @throws \EasyRdf\Exception * @return integer The number of triples added to the graph */ public function parse($graph, $data, $format, $baseUri) { parent::checkParseParams($graph, $data, $format, $baseUri); if ($format != 'ntriples') { throw new \EasyRdf\Exception("EasyRdf\\Parser\\Ntriples does not support: {$format}"); } $lines = preg_split('/\\x0D?\\x0A/', strval($data)); foreach ($lines as $index => $line) { $lineNum = $index + 1; if (preg_match('/^\\s*#/', $line)) { # Comment continue; } elseif (preg_match('/^\\s*(.+?)\\s+<([^<>]+?)>\\s+(.+?)\\s*\\.\\s*$/', $line, $matches)) { $this->addTriple($this->parseNtriplesSubject($matches[1], $lineNum), $this->unescapeString($matches[2]), $this->parseNtriplesObject($matches[3], $lineNum)); } elseif (preg_match('/^\\s*$/', $line)) { # Blank line continue; } else { throw new Exception("Failed to parse statement", $lineNum); } } return $this->tripleCount; }
/** * Parse RDFa 1.1 into an EasyRdf\Graph * * @param Graph $graph the graph to load the data into * @param string $data the RDF document data * @param string $format the format of the input data * @param string $baseUri the base URI of the data being parsed * * @throws \EasyRdf\Exception * @return integer The number of triples added to the graph */ public function parse($graph, $data, $format, $baseUri) { parent::checkParseParams($graph, $data, $format, $baseUri); if ($format != 'rdfa') { throw new \EasyRdf\Exception("EasyRdf\\Parser\\Rdfa does not support: {$format}"); } // Initialise evaluation context. $context = $this->initialContext(); libxml_use_internal_errors(true); // Parse the document into DOM $doc = new \DOMDocument(); // Attempt to parse the document as strict XML, and fall back to HTML // if XML parsing fails. if ($doc->loadXML($data, LIBXML_NONET)) { if ($this->debug) { print "Document was parsed as XML."; } // Collect all xmlns namespaces defined throughout the document. $sxe = simplexml_import_dom($doc); $context['xmlns'] = $sxe->getDocNamespaces(true); unset($context['xmlns']['']); } else { $doc->loadHTML($data); if ($this->debug) { print "Document was parsed as HTML."; } } // Establish the base for both XHTML and HTML documents. $xpath = new \DOMXPath($doc); $xpath->registerNamespace('xh', "http://www.w3.org/1999/xhtml"); $nodeList = $xpath->query('/xh:html/xh:head/xh:base'); if ($node = $nodeList->item(0) and $href = $node->getAttribute('href')) { $this->baseUri = new ParsedUri($href); } $nodeList = $xpath->query('/html/head/base'); if ($node = $nodeList->item(0) and $href = $node->getAttribute('href')) { $this->baseUri = new ParsedUri($href); } // Remove the fragment from the base URI $this->baseUri->setFragment(null); // Recursively process XML nodes $this->processNode($doc, $context); return $this->tripleCount; }
/** * Parse an RDF document into an EasyRdf\Graph * * @param Graph $graph the graph to load the data into * @param string $data the RDF document data * @param string $format the format of the input data * @param string $baseUri the base URI of the data being parsed * * @throws Exception * @throws \EasyRdf\Exception * @return integer The number of triples added to the graph */ public function parse($graph, $data, $format, $baseUri) { parent::checkParseParams($graph, $data, $format, $baseUri); $parser = librdf_new_parser($this->world, $format, null, null); if (!$parser) { throw new \EasyRdf\Exception("Failed to create librdf_parser of type: {$format}"); } $rdfUri = librdf_new_uri($this->world, $baseUri); if (!$rdfUri) { throw new \EasyRdf\Exception("Failed to create librdf_uri from: {$baseUri}"); } $stream = librdf_parser_parse_string_as_stream($parser, $data, $rdfUri); if (!$stream) { throw new Exception("Failed to parse RDF stream"); } do { $statement = librdf_stream_get_object($stream); if ($statement) { $subject = self::nodeUriString(librdf_statement_get_subject($statement)); $predicate = self::nodeUriString(librdf_statement_get_predicate($statement)); $object = self::nodeToRdfPhp(librdf_statement_get_object($statement)); $this->addTriple($subject, $predicate, $object); } } while (!librdf_stream_next($stream)); $errorCount = $this->parserErrorCount($parser); if ($errorCount) { throw new Exception("{$errorCount} errors while parsing."); } librdf_free_uri($rdfUri); librdf_free_stream($stream); librdf_free_parser($parser); return $this->tripleCount; }
/** * Parse an RDF/XML document into an EasyRdf\Graph * * @param Graph $graph the graph to load the data into * @param string $data the RDF document data * @param string $format the format of the input data * @param string $baseUri the base URI of the data being parsed * * @throws Exception * @throws \EasyRdf\Exception * @return integer The number of triples added to the graph */ public function parse($graph, $data, $format, $baseUri) { parent::checkParseParams($graph, $data, $format, $baseUri); if ($format != 'rdfxml') { throw new \EasyRdf\Exception("EasyRdf\\Parser\\RdfXml does not support: {$format}"); } $this->init($graph, $baseUri); $this->resetBnodeMap(); /* xml parser */ $this->initXMLParser(); /* parse */ if (!xml_parse($this->xmlParser, $data, false)) { $message = xml_error_string(xml_get_error_code($this->xmlParser)); throw new Exception('XML error: "' . $message . '"', xml_get_current_line_number($this->xmlParser), xml_get_current_column_number($this->xmlParser)); } xml_parser_free($this->xmlParser); return $this->tripleCount; }