<?php include 'dbpedia.php'; error_reporting(E_ALL); Timer::start("main::Runtime"); // Setup the language version of Wikipedia $language = Options::getOption('language'); $pageTitles = new AllTemplatesSqlIterator($language); $job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles); $groupInfoboxes = new ExtractionGroup(new NullDestination()); $groupInfoboxes->addExtractor(new TemplateRedirectExtractor()); // Add the ExtractionGroups to the ExtractionJob $job->addExtractionGroup($groupInfoboxes); //Execute the Extraction Job $manager = new ExtractionManager(); $manager->execute($job); Timer::stop("main::Runtime"); Timer::printTime();
* */ require_once 'dbpedia.php'; require_once 'extraction/extractTemplates.php'; include "extraction/config.inc.php"; function __autoload($class_name) { require_once $class_name . '.php'; } error_reporting(E_ALL | E_NOTICE); $manager = new ExtractionManager(); $pageTitlesEn = new AllArticlesSqlIterator("en"); $jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn); $groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt")); $groupArticlesLabelEn->addExtractor(new LabelExtractor()); $jobEnWiki->addExtractionGroup($groupArticlesLabelEn); $groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt")); $groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor()); $jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn); $groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt")); $groupImages->addExtractor(new ImageExtractor()); $jobEnWiki->addExtractionGroup($groupImages); $groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt")); $groupWikipages->addExtractor(new WikipageExtractor()); $jobEnWiki->addExtractionGroup($groupWikipages); $groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt")); $groupInfoboxes->addExtractor(new InfoboxExtractor()); $jobEnWiki->addExtractionGroup($groupInfoboxes); $groupSemantic = new ExtractionGroup(new NTripleDumpDestination("semantic.nt")); $groupSemantic->addExtractor(new SemanticExtractor()); $jobEnWiki->addExtractionGroup($groupSemantic);
} } // var_dump($sourceFiles); // var_dump($resultFiles); // Instantiate a new ExtractionJob $job = new ExtractionJob(new TestWikipedia($language), new ArrayObject($sourceFiles)); // Create one ExtractionGroup for each Extractor foreach ($extractorObjects as $extractor) { // Do not use extractors without test files if ($resultFiles[$extractor->getExtractorID()] == null) { echo "\ncontinue"; } $currentExtractor = $extractor; $group = new ExtractionGroup(new TestDestination(SHOW_OUTPUT)); $group->addExtractor($currentExtractor); $job->addExtractionGroup($group); } // Execute the ExtractionJob $manager = new ExtractionManager(); $manager->execute($job); // Cycle over all ExtractionGroups $testResult = "Logfile for DBpedia Data Extraction (" . date("D M j Y G:i:s T") . ")\n"; // Cycle over all ExtractionGroups foreach ($job->getExtractionGroups() as $currentGroup) { // Cycle over all extractors foreach ($currentGroup->getExtractors() as $extractor) { $testResult .= "\nResults for: " . $extractor->getExtractorID() . "\n"; foreach ($sourceFiles as $key => $page) { // Only get files, which are available for the extractor if (!isset($resultFiles[$extractor->getExtractorID()][$key])) { continue;
require_once 'dbpedia.php'; function __autoload($class_name) { require_once $class_name . '.php'; } // Enter the resources you want to extract. // [To extract all Wikipedia articles from an SQL-Dump use AllArticlesSqlIterator (see extract.php)] // when using a MySqlIterator, don't use "new ArrayObject" at Job creation $pageTitles = array("Michael_Foot", "Millard_Fillmore"); //, "Michael_Jordan", "Google"); // Setup the language version of Wikipedia $language = "en"; // Instantiate a new ExtractionJob $job = new ExtractionJob(new LiveWikipedia($language), new ArrayObject($pageTitles)); // Create ExtractionGroups for each Extractors $groupInfoboxes = new ExtractionGroup(new SimpleDumpDestination()); $groupInfoboxes->addExtractor(new InfoboxExtractor()); $groupImages = new ExtractionGroup(new SimpleDumpDestination()); $groupImages->addExtractor(new ImageExtractor()); $groupShortAbstracts = new ExtractionGroup(new SimpleDumpDestination()); $groupShortAbstracts->addExtractor(new ShortAbstractExtractor()); $groupLabels = new ExtractionGroup(new SimpleDumpDestination()); $groupLabels->addExtractor(new LabelExtractor()); // Add the ExtractionGroups to the ExtractionJob $job->addExtractionGroup($groupInfoboxes); $job->addExtractionGroup($groupImages); $job->addExtractionGroup($groupShortAbstracts); $job->addExtractionGroup($groupLabels); // Execute the ExtractionJob $manager = new ExtractionManager(); $manager->execute($job);