<?php include 'dbpedia.php'; error_reporting(E_ALL); Timer::start("main::Runtime"); // Setup the language version of Wikipedia $language = Options::getOption('language'); $pageTitles = new AllTemplatesSqlIterator($language); $job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles); $groupInfoboxes = new ExtractionGroup(new NullDestination()); $groupInfoboxes->addExtractor(new TemplateRedirectExtractor()); // Add the ExtractionGroups to the ExtractionJob $job->addExtractionGroup($groupInfoboxes); //Execute the Extraction Job $manager = new ExtractionManager(); $manager->execute($job); Timer::stop("main::Runtime"); Timer::printTime();
* page source to the extractor, triggering its extractPage() method. * * Finally it reads out the ExtractionResults from each extractor and passes it to * the respective destination. The finish() methods from extractors and destination * are called, in order to close these. * */ require_once 'dbpedia.php'; require_once 'extraction/extractTemplates.php'; include "extraction/config.inc.php"; function __autoload($class_name) { require_once $class_name . '.php'; } error_reporting(E_ALL | E_NOTICE); $manager = new ExtractionManager(); $pageTitlesEn = new AllArticlesSqlIterator("en"); $jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn); $groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt")); $groupArticlesLabelEn->addExtractor(new LabelExtractor()); $jobEnWiki->addExtractionGroup($groupArticlesLabelEn); $groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt")); $groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor()); $jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn); $groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt")); $groupImages->addExtractor(new ImageExtractor()); $jobEnWiki->addExtractionGroup($groupImages); $groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt")); $groupWikipages->addExtractor(new WikipageExtractor()); $jobEnWiki->addExtractionGroup($groupWikipages); $groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt"));