<?php

include 'dbpedia.php';
error_reporting(E_ALL);
Timer::start("main::Runtime");
// Setup the language version of Wikipedia
$language = Options::getOption('language');
$pageTitles = new AllTemplatesSqlIterator($language);
$job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles);
$groupInfoboxes = new ExtractionGroup(new NullDestination());
$groupInfoboxes->addExtractor(new TemplateRedirectExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
//Execute the Extraction Job
$manager = new ExtractionManager();
$manager->execute($job);
Timer::stop("main::Runtime");
Timer::printTime();
Exemple #2
0
 * Finally it reads out the ExtractionResults from each extractor and passes it to
 * the respective destination. The finish() methods from extractors and destination
 * are called, in order to close these.
 * 
 */
require_once 'dbpedia.php';
require_once 'extraction/extractTemplates.php';
include "extraction/config.inc.php";
function __autoload($class_name)
{
    require_once $class_name . '.php';
}
error_reporting(E_ALL |  E_NOTICE);
$manager = new ExtractionManager();
$pageTitlesEn = new AllArticlesSqlIterator("en");
$jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn);
$groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt"));
$groupArticlesLabelEn->addExtractor(new LabelExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesLabelEn);
$groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt"));
$groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn);
$groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt"));
$groupImages->addExtractor(new ImageExtractor());
$jobEnWiki->addExtractionGroup($groupImages);
$groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt"));
$groupWikipages->addExtractor(new WikipageExtractor());
$jobEnWiki->addExtractionGroup($groupWikipages);
$groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt"));
$groupInfoboxes->addExtractor(new InfoboxExtractor());
$jobEnWiki->addExtractionGroup($groupInfoboxes);
Exemple #3
0
$resultFiles = array();
foreach ($extractorObjects as $extractor => $extractorObject) {
    // Get all pages from the files stored in "pageSources" ans "expectedResults".
    $sourcePath = dirname(__FILE__) . "/pageSources";
    $resultPath = dirname(__FILE__) . "/expectedResults/{$extractor}";
    foreach (glob($sourcePath . "/*.txt") as $filename) {
        if (file_exists($resultPath . "/" . basename($filename))) {
            $sourceFiles[basename($filename)] = basename($filename);
            $resultFiles[$extractorObject->getExtractorID()][basename($filename)] = $resultPath . "/" . basename($filename);
        }
    }
}
// var_dump($sourceFiles);
// var_dump($resultFiles);
// Instantiate a new ExtractionJob
$job = new ExtractionJob(new TestWikipedia($language), new ArrayObject($sourceFiles));
// Create one ExtractionGroup for each Extractor
foreach ($extractorObjects as $extractor) {
    // Do not use extractors without test files
    if ($resultFiles[$extractor->getExtractorID()] == null) {
        echo "\ncontinue";
    }
    $currentExtractor = $extractor;
    $group = new ExtractionGroup(new TestDestination(SHOW_OUTPUT));
    $group->addExtractor($currentExtractor);
    $job->addExtractionGroup($group);
}
// Execute the ExtractionJob
$manager = new ExtractionManager();
$manager->execute($job);
// Cycle over all ExtractionGroups
Exemple #4
0
include "extraction/config.inc.php";
// Load interfaces
require_once 'dbpedia.php';
function __autoload($class_name)
{
    require_once $class_name . '.php';
}
// Enter the resources you want to extract.
// [To extract all Wikipedia articles from an SQL-Dump use AllArticlesSqlIterator (see extract.php)]
// when using a MySqlIterator, don't use "new ArrayObject" at Job creation
$pageTitles = array("Michael_Foot", "Millard_Fillmore");
//, "Michael_Jordan", "Google");
// Setup the language version of Wikipedia
$language = "en";
// Instantiate a new ExtractionJob
$job = new ExtractionJob(new LiveWikipedia($language), new ArrayObject($pageTitles));
// Create ExtractionGroups for each Extractors
$groupInfoboxes = new ExtractionGroup(new SimpleDumpDestination());
$groupInfoboxes->addExtractor(new InfoboxExtractor());
$groupImages = new ExtractionGroup(new SimpleDumpDestination());
$groupImages->addExtractor(new ImageExtractor());
$groupShortAbstracts = new ExtractionGroup(new SimpleDumpDestination());
$groupShortAbstracts->addExtractor(new ShortAbstractExtractor());
$groupLabels = new ExtractionGroup(new SimpleDumpDestination());
$groupLabels->addExtractor(new LabelExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
$job->addExtractionGroup($groupImages);
$job->addExtractionGroup($groupShortAbstracts);
$job->addExtractionGroup($groupLabels);
// Execute the ExtractionJob