コード例 #1
0
ファイル: AbstractExtractor.php プロジェクト: ljarray/dbpedia
 public function extract($node, $subjectUri, $pageContext)
 {
     $this->curl = curl_init();
     $url = sprintf(LocalConfiguration::abstractPageUrlFormat, $this->language, $node->getRoot()->getTitle()->encoded());
     curl_setopt($this->curl, CURLOPT_URL, $url);
     curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
     curl_setopt($this->curl, CURLOPT_BINARYTRANSFER, true);
     curl_setopt($this->curl, CURLOPT_FAILONERROR, true);
     $text = curl_exec($this->curl);
     if (false === $text) {
         //echo curl_error($this->curl);
         // TODO
         /*
                 ('error', 'nr '.curl_errno($this->curl). "\n".curl_error($this->curl));
                     $this->log('error', $this->getExtractorID().' call to '.$url. ' failed');
         			$this->log('error', 'Please define AbstractExtractor.page_url_format correctly in your option file, e.g. dbpedia.ini.');
         			$this->log('error', 'nr '.curl_errno($this->curl). "\n".curl_error($this->curl));
         			// die($this->getExtractorID().' call to '.$url. ' failed');
         			echo($this->getExtractorID().' call to '.$url. ' failed');
         			return new ExtractionResult($pageID, $this->language, $this->getExtractorID());
         */
     }
     if (!empty($text)) {
         $text = StringUtil::htmlDecode($text);
         $shorttext = $this->short($text);
         if (!empty($shorttext)) {
             $quad = new RdfQuad($subjectUri, $this->ontology->getProperty(self::SHORT_PROPERTY), $shorttext, $node->getSourceUri());
             $this->destination->addQuad($quad);
         }
         $quad = new RdfQuad($subjectUri, $this->ontology->getProperty(self::LONG_PROPERTY), $text, $node->getSourceUri());
         $this->destination->addQuad($quad);
     }
     return true;
 }
コード例 #2
0
ファイル: StringParser.php プロジェクト: ljarray/dbpedia
 public function parse(Node $node)
 {
     $result = self::nodeToString($node);
     //Clean text
     $result = WikiUtil::removeWikiEmphasis($result);
     $result = StringUtil::htmlDecode($result);
     if (strlen($result) > 0) {
         return $result;
     }
 }
コード例 #3
0
ファイル: PageContext.php プロジェクト: ljarray/dbpedia
 public function generate($baseUri, $node)
 {
     if (isset($node)) {
         //Retrieve text
         $text = $this->nodeToText($node);
         //Normalize text
         $text = WikiUtil::removeWikiEmphasis($text);
         $text = StringUtil::htmlDecode($text);
         $text = preg_replace('/ +/', ' ', $text);
         //remove duplicate spaces
         $text = str_replace('(', ' ', $text);
         $text = str_replace(')', ' ', $text);
         $text = strip_tags($text);
         $text = substr($text, 0, 50);
         $text = trim($text);
         $text = str_replace(' ', '_', $text);
         $text = urlencode($text);
         //Test if the base URI ends with a prefix of text
         $baseLen = strlen($baseUri);
         $textLen = strlen($text);
         for ($i = $baseLen - 1; $i > 0 && $baseLen - $i < $textLen; $i--) {
             if (substr_compare($baseUri, $text, $i, $textLen, true) === 0) {
                 $text = substr($text, $baseLen - $i);
                 break;
             }
         }
         //Remove leading underscore
         if (!empty($text) && $text[0] === '_') {
             $text = substr($text, 1);
         }
         //Generate URI
         $uri = $baseUri . '__' . $text;
     } else {
         $uri = $baseUri;
     }
     //Resolve collisions
     if (!isset($this->uris[$uri])) {
         //No collision
         $this->uris[$uri] = 1;
     } else {
         //Collision found
         $index = $this->uris[$uri];
         $this->uris[$uri] = $index + 1;
         $uri .= '__' . $index;
     }
     return $uri;
 }