<?php

include 'dbpedia.php';
error_reporting(E_ALL);
Timer::start("main::Runtime");
// Setup the language version of Wikipedia
$language = Options::getOption('language');
$pageTitles = new AllTemplatesSqlIterator($language);
$job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles);
$groupInfoboxes = new ExtractionGroup(new NullDestination());
$groupInfoboxes->addExtractor(new TemplateRedirectExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
//Execute the Extraction Job
$manager = new ExtractionManager();
$manager->execute($job);
Timer::stop("main::Runtime");
Timer::printTime();
Example #2
0
 * page source to the extractor, triggering its extractPage() method.
 *
 * Finally it reads out the ExtractionResults from each extractor and passes it to
 * the respective destination. The finish() methods from extractors and destination
 * are called, in order to close these.
 * 
 */
require_once 'dbpedia.php';
require_once 'extraction/extractTemplates.php';
include "extraction/config.inc.php";
function __autoload($class_name)
{
    require_once $class_name . '.php';
}
error_reporting(E_ALL |  E_NOTICE);
$manager = new ExtractionManager();
$pageTitlesEn = new AllArticlesSqlIterator("en");
$jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn);
$groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt"));
$groupArticlesLabelEn->addExtractor(new LabelExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesLabelEn);
$groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt"));
$groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn);
$groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt"));
$groupImages->addExtractor(new ImageExtractor());
$jobEnWiki->addExtractionGroup($groupImages);
$groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt"));
$groupWikipages->addExtractor(new WikipageExtractor());
$jobEnWiki->addExtractionGroup($groupWikipages);
$groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt"));