/** * Import data into wiki pages */ function importData(RDFIORequestData $requestData) { $rdfImporter = new RDFIORDFImporter(); if ($requestData->importSource === 'url') { if ($requestData->externalRdfUrl === '') { throw new RDFIOException('URL field is empty!'); } else { if (!RDFIOUtils::isURI($requestData->externalRdfUrl)) { throw new RDFIOException('Invalid URL provided!'); } } $rdfData = file_get_contents($requestData->externalRdfUrl); } else { if ($requestData->importSource === 'textfield') { if ($requestData->importData === '') { throw new RDFIOException('RDF field is empty!'); } $rdfData = $requestData->importData; } else { throw new RDFIOException('Import source is not selected!'); } } switch ($requestData->dataFormat) { case 'rdfxml': $importInfo = $rdfImporter->importRdfXml($rdfData); $triples = $importInfo['triples']; break; case 'turtle': $importInfo = $rdfImporter->importTurtle($rdfData); $triples = $importInfo['triples']; break; } return $output = array('triples' => $triples); }
public function execute() { $indata_file = $this->getOption('indata', ''); $chunksize = intval($this->getOption('chunksize', 0)); $chunksleep = floatval($this->getOption('chunksleep', 0.0)); $offset = intval($this->getOption('offset', 0)); $verbose = $this->getOption('verbose', false); echo "Starting import from file: {$indata_file}\n"; if ($offset > 0) { echo "Starting with offset {$offset} ...\n"; } $rdfImporter = new RDFIORDFImporter(); $indata_fh = fopen($indata_file, 'r'); $lineinchunk = 1; $chunkindex = 1; $lineindex = 0; $totalimported = 0; $importdata = ''; while ($line = fgets($indata_fh)) { if ($lineindex >= $offset) { if ($chunksize > 0 && $lineinchunk == 1) { if ($verbose) { echo "Starting chunk {$chunkindex} ...\n"; } } $importdata .= $line; if ($verbose) { echo "Appended line {$lineinchunk} in chunk {$chunkindex}, to indata ...\n"; } $totalimported++; if ($chunksize != 0 && $lineinchunk == $chunksize) { $rdfImporter->importTurtle($importdata); $totalwithoffset = $totalimported + $offset; echo "Imported {$chunksize} triples in chunk {$chunkindex} ({$totalimported} triples imported in total, and {$totalwithoffset} including offset)!\n"; // Reset variables $lineinchunk = 0; $importdata = ''; // Bump chunk index $chunkindex++; if ($verbose) { echo 'Now sleeping for ' . strval($chunksleep) . ' seconds before continuing with next chunk ...'; } sleep($chunksleep); } $lineinchunk++; } $lineindex++; } // Import any remaining stuff, or all the stuff, if chunksize = 0 $rdfImporter->importTurtle($importdata); fclose($indata_fh); echo "Finished importing everything ({$totalimported} triples in total)!\n"; }
/** * After a query is parsed, import the parsed data to the wiki */ function importTriplesInQuery() { if ($this->checkAllowInsert()) { $triples = $this->requestdata->query_parsed['query']['construct_triples']; $rdfImporter = new RDFIORDFImporter(); $rdfImporter->importTriples($triples); $this->successMsg("Successfully imported the triples!"); } }
protected function import($limit = 10, $offset = 0) { global $wgOut, $wgRequest; //$rdfioUtils = new RDFIOUtils(); $externalSparqlUrl = $wgRequest->getText('extsparqlurl'); if ($externalSparqlUrl === '') { throw new RDFIOException('Empty SPARQL Url provided!'); } else { if (!RDFIOUtils::isURI($externalSparqlUrl)) { throw new RDFIOException('Invalid SPARQL Url provided! (Must start with \'http://\' or \'https://\')'); } } $sparqlQuery = urlencode("SELECT DISTINCT * WHERE { ?s ?p ?o } OFFSET {$offset} LIMIT {$limit}"); $sparqlQueryUrl = $externalSparqlUrl . '/' . '?query=' . $sparqlQuery; $sparqlResultXml = file_get_contents($sparqlQueryUrl); $sparqlResultXmlObj = simplexml_load_string($sparqlResultXml); $importTriples = array(); if (is_object($sparqlResultXmlObj)) { foreach ($sparqlResultXmlObj->results->children() as $result) { $triple = array(); // $wgOut->addHTML( print_r($result, true) ); foreach ($result as $binding) { if ($binding['name'] == 's') { $s = (string) $binding->uri[0]; if ($s == '') { throw new Exception('Could not extract subject from empty string (' . print_r($binding->uri, true) . '), in SPARQLImport'); } $triple['s'] = $s; $triple['s_type'] = $this->resourceType($triple['s']); } else { if ($binding['name'] == 'p') { $p = (string) $binding->uri[0]; if ($p == '') { throw new Exception('Could not extract predicate from empty string (' . print_r($binding->uri, true) . '), in SPARQLImport'); } $triple['p'] = $p; $triple['p_type'] = $this->resourceType($triple['p']); } else { if ($binding['name'] == 'o') { $o = (string) $binding->uri[0]; if ($o == '') { throw new Exception('Could not extract object from empty string (' . print_r($binding->uri, true) . '), in SPARQLImport'); } $triple['o'] = $o; $triple['o_type'] = $this->resourceType($triple['o']); $triple['o_datatype'] = ''; } } } } $importTriples[] = $triple; } $rdfImporter = new RDFIORDFImporter(); $rdfImporter->importTriples($importTriples); $wgOut->addHTML($rdfImporter->showImportedTriples($importTriples)); } else { RDFIOUtils::formatErrorHTML("Error", "There was a problem importing from the endpoint. Are you sure that the given URL is a valid SPARQL endpoint?"); } return $output = array('externalSparqlUrl' => $externalSparqlUrl); }