<?php

include 'dbpedia.php';
error_reporting(E_ALL);
Timer::start("main::Runtime");
// Setup the language version of Wikipedia
$language = Options::getOption('language');
$pageTitles = new AllTemplatesSqlIterator($language);
$job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles);
$groupInfoboxes = new ExtractionGroup(new NullDestination());
$groupInfoboxes->addExtractor(new TemplateRedirectExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
//Execute the Extraction Job
$manager = new ExtractionManager();
$manager->execute($job);
Timer::stop("main::Runtime");
Timer::printTime();
Пример #2
0
 * 
 */
require_once 'dbpedia.php';
require_once 'extraction/extractTemplates.php';
include "extraction/config.inc.php";
function __autoload($class_name)
{
    require_once $class_name . '.php';
}
error_reporting(E_ALL |  E_NOTICE);
$manager = new ExtractionManager();
$pageTitlesEn = new AllArticlesSqlIterator("en");
$jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn);
$groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt"));
$groupArticlesLabelEn->addExtractor(new LabelExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesLabelEn);
$groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt"));
$groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn);
$groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt"));
$groupImages->addExtractor(new ImageExtractor());
$jobEnWiki->addExtractionGroup($groupImages);
$groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt"));
$groupWikipages->addExtractor(new WikipageExtractor());
$jobEnWiki->addExtractionGroup($groupWikipages);
$groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt"));
$groupInfoboxes->addExtractor(new InfoboxExtractor());
$jobEnWiki->addExtractionGroup($groupInfoboxes);
$groupSemantic = new ExtractionGroup(new NTripleDumpDestination("semantic.nt"));
$groupSemantic->addExtractor(new SemanticExtractor());
$jobEnWiki->addExtractionGroup($groupSemantic);
Пример #3
0
    }
}
// var_dump($sourceFiles);
// var_dump($resultFiles);
// Instantiate a new ExtractionJob
$job = new ExtractionJob(new TestWikipedia($language), new ArrayObject($sourceFiles));
// Create one ExtractionGroup for each Extractor
foreach ($extractorObjects as $extractor) {
    // Do not use extractors without test files
    if ($resultFiles[$extractor->getExtractorID()] == null) {
        echo "\ncontinue";
    }
    $currentExtractor = $extractor;
    $group = new ExtractionGroup(new TestDestination(SHOW_OUTPUT));
    $group->addExtractor($currentExtractor);
    $job->addExtractionGroup($group);
}
// Execute the ExtractionJob
$manager = new ExtractionManager();
$manager->execute($job);
// Cycle over all ExtractionGroups
$testResult = "Logfile for DBpedia Data Extraction (" . date("D M j Y  G:i:s T") . ")\n";
// Cycle over all ExtractionGroups
foreach ($job->getExtractionGroups() as $currentGroup) {
    // Cycle over all extractors
    foreach ($currentGroup->getExtractors() as $extractor) {
        $testResult .= "\nResults for: " . $extractor->getExtractorID() . "\n";
        foreach ($sourceFiles as $key => $page) {
            // Only get files, which are available for the extractor
            if (!isset($resultFiles[$extractor->getExtractorID()][$key])) {
                continue;
Пример #4
0
require_once 'dbpedia.php';
function __autoload($class_name)
{
    require_once $class_name . '.php';
}
// Enter the resources you want to extract.
// [To extract all Wikipedia articles from an SQL-Dump use AllArticlesSqlIterator (see extract.php)]
// when using a MySqlIterator, don't use "new ArrayObject" at Job creation
$pageTitles = array("Michael_Foot", "Millard_Fillmore");
//, "Michael_Jordan", "Google");
// Setup the language version of Wikipedia
$language = "en";
// Instantiate a new ExtractionJob
$job = new ExtractionJob(new LiveWikipedia($language), new ArrayObject($pageTitles));
// Create ExtractionGroups for each Extractors
$groupInfoboxes = new ExtractionGroup(new SimpleDumpDestination());
$groupInfoboxes->addExtractor(new InfoboxExtractor());
$groupImages = new ExtractionGroup(new SimpleDumpDestination());
$groupImages->addExtractor(new ImageExtractor());
$groupShortAbstracts = new ExtractionGroup(new SimpleDumpDestination());
$groupShortAbstracts->addExtractor(new ShortAbstractExtractor());
$groupLabels = new ExtractionGroup(new SimpleDumpDestination());
$groupLabels->addExtractor(new LabelExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
$job->addExtractionGroup($groupImages);
$job->addExtractionGroup($groupShortAbstracts);
$job->addExtractionGroup($groupLabels);
// Execute the ExtractionJob
$manager = new ExtractionManager();
$manager->execute($job);