public function extractPage($pageId, $pageTitle, $pageSource) { $this->setPageURI($pageTitle); // Set up the result object $result = new ExtractionResult($pageId, $this->language, $this->getExtractorID()); // Return empty result if there is no title if ($this->decode_title($pageTitle) == NULL) { return $result; } try { $breadCrumb = new Breadcrumb($pageId); //echo "\n\n\n\n\nPAGE = $pageId --- $pageTitle\n\n"; $triples = $this->rootTripleGenerator->generate($breadCrumb, $pageSource); //print_r($triples); // Add annotation to all triples //$rootSubject = $this->getPageURI();//RDFTriple::page($pageId); //$rootSubject = RDFTriple::page($pageId); //$this->log(DEBUG, "diff root to getPageURI: $rootSubject | {$this->getPageURI()}"); $this->log(TRACE, "generated triples:"); $logmsg = ""; foreach ($triples as $triple) { $triple->addOnDeleteCascadeAnnotation($this->getPageURI()); $result->addTripleObject($triple); $logmsg .= $triple->toNTriples(); } $this->log(TRACE, "\n" . $logmsg); } catch (Exception $e) { $this->log(WARN, "Caught exception: " . $e->getMessage()); } //$this->log('info','LiveMappingBasedExtractor: Count of generated triples ' + sizeof($result)); return $result; }
public function extractPage($pageID, $pageTitle, $pageSource) { $result = new ExtractionResult($pageID, $this->language, $this->getExtractorID()); global $parseResult; // Contains the Extraction result $parseResult = null; $this->parsePage($pageID, $pageSource, $this->language); if (count($parseResult) < 1) { return $result; } $knownProperties = array($parseResult[0][1]); foreach ($parseResult as $myTriple) { try { $subject = RDFtriple::URI($myTriple[0]); } catch (Exception $e) { echo 'Caught exception: ', $e->getMessage(), "\n"; continue; } // Rename Properties like LeaderName1, LeaderName2, ... to LeaderName if (preg_match("/(.*[^0-9_]+)([0-9])\$/", $myTriple[1], $matches)) { // if property consist of letters from another writing system then latin, e.g. Korean, // the words are decoded as e.g. _percent_B1, this must not be changed // if language.use_percent_encoding = false, it looks like e.g. %B1 if (substr(substr($myTriple[1], -11), 0, 9) != "_percent_" && !ereg("%([A-F0-9]{2})", substr($myTriple[1], -3))) { $key = array_search($matches[1], $knownProperties); if ($key) { $myTriple[1] = $knownProperties[$key]; } else { array_push($knownProperties, $matches[1]); $myTriple[1] = $matches[1]; } } } else { if (!array_search($myTriple[1], $knownProperties)) { array_push($knownProperties, $myTriple[1]); } } // if a property is longer than the maximum configured length, we do // do not write the triple if (strlen($myTriple[1]) > $GLOBALS['W2RCFG']['maximumPropertyLength']) { continue; } try { $predicate = RDFtriple::URI($myTriple[1]); } catch (Exception $e) { echo 'Caught exception: ', $e->getMessage(), "\n"; continue; } if ($myTriple[3] == "r") { try { $object = RDFtriple::URI($myTriple[2]); } catch (Exception $e) { echo 'Caught exception: ', $e->getMessage(), "\n"; continue; } } else { if ($myTriple[5] == null) { $myTriple[5] = $this->language; } $object = RDFtriple::literal($myTriple[2], $myTriple[4], $myTriple[5]); } //this is for the db:London/rating //subtemplate problem $triple = new RDFtriple($subject, $predicate, $object); $currentSubject = RDFtriple::page($pageID); $small = $currentSubject->getURI(); $big = $subject->getURI(); if (strpos($big, $small) === 0 && strlen($big) > strlen($small)) { $triple->addOnDeleteCascadeAnnotation($currentSubject); } $result->addTripleObject($triple); $this->allPredicates->addPredicate($myTriple[1]); } return $result; }