private function parseOptionKeyValuePair($optionKeyValuePair)
 {
     $keyValue = explode('=', $optionKeyValuePair);
     $key = trim($keyValue[0]);
     $value = trim($keyValue[1]);
     switch ($key) {
         case 'vextwarning':
             $this->options->setVendorExtensionIssuesAsWarnings(filter_var($value, FILTER_VALIDATE_BOOLEAN));
             break;
         case 'output':
             $this->options->setOutputFormat($value);
             break;
         case 'lang':
             $this->options->setLanguage($value);
             break;
         case 'warning':
             $warningLevel = filter_var($value, FILTER_VALIDATE_INT, array('options' => array('min_range' => 0, 'default' => 2)));
             $this->options->setWarningLevel($warningLevel);
             break;
         case 'medium':
             $this->options->setMedium($value);
             break;
         case 'profile':
             $this->options->setProfile($value);
             break;
     }
 }
示例#2
0
<?php

/**
 * This file starts the DBpedia extraction process for abstracts.
 *
 * Warning: The script needs several days to complete on an average PC.
 */
error_reporting(E_ALL);
// automatically loads required classes
require 'dbpedia.php';
// set $extractionDir and $extractionLanguages
require 'extractionconfig.php';
$manager = new ExtractionManager();
// loop over all languages
foreach ($extractionLanguages as $currLanguage) {
    Options::setLanguage($currLanguage);
    $pageTitles = new ArticlesSqlIterator($currLanguage);
    $job = new ExtractionJob(new DatabaseWikipediaCollection($currLanguage), $pageTitles);
    $extractionDirLang = $extractionDir . '/' . $currLanguage . '/';
    if (!is_dir($extractionDirLang)) {
        mkdir($extractionDirLang);
    }
    // AbstractExtractor has references to its two destinations, see below
    $group = new ExtractionGroup(new NullDestination());
    $shortDestination = new csvNTripleDestination($extractionDirLang . "shortabstract_" . $currLanguage);
    $longDestination = new csvNTripleDestination($extractionDirLang . "longabstract_" . $currLanguage);
    $extractorInstance = new AbstractExtractor();
    $extractorInstance->setDestinations($shortDestination, $longDestination);
    $group->addExtractor($extractorInstance);
    $job->addExtractionGroup($group);
    $date = date(DATE_RFC822);
示例#3
0
 foreach ($it as $key => $metainfo) {
     Timer::start("main::processing");
     //****PREPROCESSING*****
     //print_r($metainfo);
     $pageTitle = $metainfo['pageTitle'];
     $pageTitles = new ArrayObject(array($pageTitle));
     $pageURI = '';
     try {
         $pageURI = RDFtriple::page($pageTitle);
     } catch (Exception $e) {
         Logger::warn('main: invalid uri for ' . $pageTitle);
         continue;
     }
     Logger::info("Title:  {$pageTitle} " . mb_detect_encoding($pageTitle) . "");
     $language = $metainfo['language'];
     Options::setLanguage($language);
     $lastarticlestmp[] = $pageURI->getURI();
     $metainfo['oaiidentifier'] = $metainfo['oaiId'];
     $metainfo['oaiId'] = Util::getOaiIDfromIdentifier($language, $metainfo['oaiidentifier']);
     Logger::info("oaiId " . $metainfo['oaiId']);
     //***MAINTAINANCE***
     $count++;
     //log statistics
     if ($count % Options::getOption('printStatInterval') == 0) {
         printAll($lastarticles, $language);
     }
     //50 last articles to statisticdir
     if ($count < 50) {
         $lastarticles = $lastarticlestmp;
     }
     if ($count % 50 == 0) {
示例#4
0
 * the LiveWikipedia. The file outputs the generated triples 
 * directly. This is the best way for developers to verify that
 * their extractors are working. Once the extractor is working on
 * an article, developers should use extract_dataset to produce a
 * full data set and see whether it works in a full extraction.
 *
 * See http://wiki.dbpedia.org/Documentation for an overview of
 * the DBpedia extraction framework.
 *
 * @author Jens Lehmann
 */
include 'dbpedia.php';
// configure settings
// change the Extractor class to your extractor
//this should be done in config/dbpedia.ini
Options::setLanguage('ko');
$language = Options::getOption('language');
//$extractor = new ActiveAbstractExtractor();
$extractor = new KoInfoboxExtractor();
/*
$extractor = new InfoboxExtractor();
*/
//$extractor = new SkosCategoriesExtractor();
//these are articles for testing
//$article[] = 'London';
//$article[] = 'Category:Pasta';
$t = '이탈리아';
$t = '서울특별시';
/*
$t = 'Berlin';
*/