public function extractPage($pageId, $pageTitle, $pageSource)
 {
     $this->setPageURI($pageTitle);
     // Set up the result object
     $result = new ExtractionResult($pageId, $this->language, $this->getExtractorID());
     // Return empty result if there is no title
     if ($this->decode_title($pageTitle) == NULL) {
         return $result;
     }
     try {
         $breadCrumb = new Breadcrumb($pageId);
         //echo "\n\n\n\n\nPAGE = $pageId --- $pageTitle\n\n";
         $triples = $this->rootTripleGenerator->generate($breadCrumb, $pageSource);
         //print_r($triples);
         // Add annotation to all triples
         //$rootSubject = $this->getPageURI();//RDFTriple::page($pageId);
         //$rootSubject = RDFTriple::page($pageId);
         //$this->log(DEBUG, "diff root to getPageURI: $rootSubject | {$this->getPageURI()}");
         $this->log(TRACE, "generated triples:");
         $logmsg = "";
         foreach ($triples as $triple) {
             $triple->addOnDeleteCascadeAnnotation($this->getPageURI());
             $result->addTripleObject($triple);
             $logmsg .= $triple->toNTriples();
         }
         $this->log(TRACE, "\n" . $logmsg);
     } catch (Exception $e) {
         $this->log(WARN, "Caught exception: " . $e->getMessage());
     }
     //$this->log('info','LiveMappingBasedExtractor: Count of generated triples ' + sizeof($result));
     return $result;
 }
Example #2
0
 public function extractPage($pageID, $pageTitle, $pageSource)
 {
     $result = new ExtractionResult($pageID, $this->language, $this->getExtractorID());
     global $parseResult;
     // Contains the Extraction result
     $parseResult = null;
     $this->parsePage($pageID, $pageSource, $this->language);
     if (count($parseResult) < 1) {
         return $result;
     }
     $knownProperties = array($parseResult[0][1]);
     foreach ($parseResult as $myTriple) {
         try {
             $subject = RDFtriple::URI($myTriple[0]);
         } catch (Exception $e) {
             echo 'Caught exception: ', $e->getMessage(), "\n";
             continue;
         }
         // Rename Properties like LeaderName1, LeaderName2, ... to LeaderName
         if (preg_match("/(.*[^0-9_]+)([0-9])\$/", $myTriple[1], $matches)) {
             // if property consist of letters from another writing system then latin, e.g. Korean,
             // the words are decoded as e.g. _percent_B1, this must not be changed
             // if language.use_percent_encoding = false, it looks like e.g. %B1
             if (substr(substr($myTriple[1], -11), 0, 9) != "_percent_" && !ereg("%([A-F0-9]{2})", substr($myTriple[1], -3))) {
                 $key = array_search($matches[1], $knownProperties);
                 if ($key) {
                     $myTriple[1] = $knownProperties[$key];
                 } else {
                     array_push($knownProperties, $matches[1]);
                     $myTriple[1] = $matches[1];
                 }
             }
         } else {
             if (!array_search($myTriple[1], $knownProperties)) {
                 array_push($knownProperties, $myTriple[1]);
             }
         }
         // if a property is longer than the maximum configured length, we do
         // do not write the triple
         if (strlen($myTriple[1]) > $GLOBALS['W2RCFG']['maximumPropertyLength']) {
             continue;
         }
         try {
             $predicate = RDFtriple::URI($myTriple[1]);
         } catch (Exception $e) {
             echo 'Caught exception: ', $e->getMessage(), "\n";
             continue;
         }
         if ($myTriple[3] == "r") {
             try {
                 $object = RDFtriple::URI($myTriple[2]);
             } catch (Exception $e) {
                 echo 'Caught exception: ', $e->getMessage(), "\n";
                 continue;
             }
         } else {
             if ($myTriple[5] == null) {
                 $myTriple[5] = $this->language;
             }
             $object = RDFtriple::literal($myTriple[2], $myTriple[4], $myTriple[5]);
         }
         //this is for the db:London/rating
         //subtemplate problem
         $triple = new RDFtriple($subject, $predicate, $object);
         $currentSubject = RDFtriple::page($pageID);
         $small = $currentSubject->getURI();
         $big = $subject->getURI();
         if (strpos($big, $small) === 0 && strlen($big) > strlen($small)) {
             $triple->addOnDeleteCascadeAnnotation($currentSubject);
         }
         $result->addTripleObject($triple);
         $this->allPredicates->addPredicate($myTriple[1]);
     }
     return $result;
 }