<?php include 'dbpedia.php'; error_reporting(E_ALL); Timer::start("main::Runtime"); // Setup the language version of Wikipedia $language = Options::getOption('language'); $pageTitles = new AllTemplatesSqlIterator($language); $job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles); $groupInfoboxes = new ExtractionGroup(new NullDestination()); $groupInfoboxes->addExtractor(new TemplateRedirectExtractor()); // Add the ExtractionGroups to the ExtractionJob $job->addExtractionGroup($groupInfoboxes); //Execute the Extraction Job $manager = new ExtractionManager(); $manager->execute($job); Timer::stop("main::Runtime"); Timer::printTime();
<?php include 'dbpedia.php'; error_reporting(E_ALL); Timer::start("main::Runtime"); $language = Options::getOption('language'); $pageTitles = new AllArticlesSqlIterator($language); $job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles); $destination = new NTripleDumpDestination("c:/dbpedia34/en/redirects_" . $language . ".nt"); $extractor = new RedirectExtractor($language); $groupInfoboxes = new ExtractionGroup($destination); $groupInfoboxes->addExtractor($extractor); $job->addExtractionGroup($groupInfoboxes); $manager = new ExtractionManager(); $manager->execute($job); Timer::stop("main::Runtime"); Timer::printTime();
$resultFiles[$extractorObject->getExtractorID()][basename($filename)] = $resultPath . "/" . basename($filename); } } } // var_dump($sourceFiles); // var_dump($resultFiles); // Instantiate a new ExtractionJob $job = new ExtractionJob(new TestWikipedia($language), new ArrayObject($sourceFiles)); // Create one ExtractionGroup for each Extractor foreach ($extractorObjects as $extractor) { // Do not use extractors without test files if ($resultFiles[$extractor->getExtractorID()] == null) { echo "\ncontinue"; } $currentExtractor = $extractor; $group = new ExtractionGroup(new TestDestination(SHOW_OUTPUT)); $group->addExtractor($currentExtractor); $job->addExtractionGroup($group); } // Execute the ExtractionJob $manager = new ExtractionManager(); $manager->execute($job); // Cycle over all ExtractionGroups $testResult = "Logfile for DBpedia Data Extraction (" . date("D M j Y G:i:s T") . ")\n"; // Cycle over all ExtractionGroups foreach ($job->getExtractionGroups() as $currentGroup) { // Cycle over all extractors foreach ($currentGroup->getExtractors() as $extractor) { $testResult .= "\nResults for: " . $extractor->getExtractorID() . "\n"; foreach ($sourceFiles as $key => $page) { // Only get files, which are available for the extractor
* Warning: The script needs several days to complete on an average PC. */ error_reporting(E_ALL); // automatically loads required classes require 'dbpedia.php'; // set $extractionDir and $extractionLanguages require 'extractionconfig.php'; $manager = new ExtractionManager(); // loop over all languages foreach ($extractionLanguages as $currLanguage) { Options::setLanguage($currLanguage); $pageTitles = new ArticlesSqlIterator($currLanguage); $job = new ExtractionJob(new DatabaseWikipediaCollection($currLanguage), $pageTitles); $extractionDirLang = $extractionDir . '/' . $currLanguage . '/'; if (!is_dir($extractionDirLang)) { mkdir($extractionDirLang); } // AbstractExtractor has references to its two destinations, see below $group = new ExtractionGroup(new NullDestination()); $shortDestination = new csvNTripleDestination($extractionDirLang . "shortabstract_" . $currLanguage); $longDestination = new csvNTripleDestination($extractionDirLang . "longabstract_" . $currLanguage); $extractorInstance = new AbstractExtractor(); $extractorInstance->setDestinations($shortDestination, $longDestination); $group->addExtractor($extractorInstance); $job->addExtractionGroup($group); $date = date(DATE_RFC822); Logger::info("Starting abstract extraction job for language {$currLanguage} at {$date}\n"); $manager->execute($job); $date = date(DATE_RFC822); Logger::info("Finished abstract extraction job for language {$currLanguage} at {$date}\n"); }
if (isset($sent)) { require_once 'dbpedia.php'; require_once 'extraction/extractTemplates.php'; require_once 'en-arc_ntriples_serializer.php'; define("RDFAPI_INCLUDE_DIR", "api/"); include RDFAPI_INCLUDE_DIR . "RdfAPI.php"; // require_once 'RDFapi/RdfAPI.php'; // RAP function __autoload($class_name) { require_once $class_name . '.php'; } $pageTitles = array($resource); //, "London", "Paris"); $job = new ExtractionJob(new LiveWikipedia($lang), new ArrayObject($pageTitles)); $destination = new WebDebugDestination(); $group = new ExtractionGroup($destination); $group->addExtractor(new LabelExtractor()); $group->addExtractor(new WikipageExtractor()); // TODO: does AbstractExtractor work? $group->addExtractor(new AbstractExtractor($destination, $destination)); $group->addExtractor(new ImageExtractor()); $group->addExtractor(new InfoboxExtractor()); $group->addExtractor(new PersondataExtractor()); $job->addExtractionGroup($group); $manager = new ExtractionManager(); $manager->execute($job); } ?> </body>
// // Start of DBpedia extraction process // /////////////////////////////////////////// if (isset($sent)) { require_once 'extraction/extractTemplates.php'; function __autoload($class_name) { require_once $class_name . '.php'; } // Resource to extract $pageTitle = array($resource); // Instantiate a new ExtractionJob $job = new ExtractionJob(new LiveWikipedia($lang), new ArrayObject($pageTitle)); // Create ExtractionGroups for each Extractors $group = new ExtractionGroup(new WebDebugDestination()); $group->addExtractor(new InfoboxExtractor()); $group->addExtractor(new ImageExtractor()); $group->addExtractor(new ShortAbstractExtractor()); $group->addExtractor(new LabelExtractor()); $group->addExtractor(new WikipageExtractor()); $group->addExtractor(new SemanticExtractor()); $group->addExtractor(new DBpediaLinkExtractor()); $group->addExtractor(new WcGeoExtractor()); // Add the ExtractionGroups to the ExtractionJob $job->addExtractionGroup($group); // Execute the ExtractionJob $manager = new ExtractionManager(); $manager->execute($job); } // Close HTML
<?php /** * Start file for testing an extractor on a specific language. * This runs the extractor on the full language database. The * main purpose is to produce single data sets in contrast to * performing a full extraction (as in extract.php) or testing * an extractor on a specific article only (as in start.php). * * See http://wiki.dbpedia.org/Documentation for an overview of * the DBpedia extraction framework. * * @author Jens Lehmann */ // autoloader include 'dbpedia.php'; // configure settings $language = Options::getOption('language'); $extractor = new HomepageExtractor(); $outputFilePrefix = 'test'; // sets up a job and executes it (usually you do not need to changes this code) $pageTitles = new ArticlesSqlIterator($language); $job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles); $group = new ExtractionGroup(new csvNTripleDestination($outputFilePrefix)); $group->addExtractor($extractor); $job->addExtractionGroup($group); Logger::info("Job created.\n"); $manager = new ExtractionManager(); $manager->execute($job); Logger::info("Job finished.\n");
echo "<form name=\"getResource\" action=\"index.php\" method=\"GET\">\r\n\t\tResource: <input type=\"text\" name=\"resource\" value=\"{$resource}\" /><br />\r\n\t\tLanguage: <input type=\"text\" name=\"lang\" value=\"{$lang}\" /> <br />\r\n\t\t<input type=\"submit\" name=\"sent\" value=\"Search\r\n\t\t\" />\r\n\t </form>"; if (isset($sent)) { require_once 'dbpedia.php'; require_once 'extraction/extractTemplates.php'; require_once 'en-arc_ntriples_serializer.php'; define("RDFAPI_INCLUDE_DIR", "api/"); include RDFAPI_INCLUDE_DIR . "RdfAPI.php"; // require_once 'RDFapi/RdfAPI.php'; // RAP function __autoload($class_name) { require_once $class_name . '.php'; } $pageTitles = array($resource); //, "London", "Paris"); $job = new ExtractionJob(new LiveWikipedia($lang), new ArrayObject($pageTitles)); $group = new ExtractionGroup(new WebDebugDestination()); $group->addExtractor(new LabelExtractor()); $group->addExtractor(new WikipageExtractor()); $group->addExtractor(new ShortAbstractExtractor()); $group->addExtractor(new ImageExtractor()); $group->addExtractor(new InfoboxExtractor()); $job->addExtractionGroup($group); $manager = new ExtractionManager(); $manager->execute($job); } ?> </body> </html>
require_once $class_name . '.php'; } error_reporting(E_ALL | E_NOTICE); $manager = new ExtractionManager(); $pageTitlesEn = new AllArticlesSqlIterator("en"); $jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn); $groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt")); $groupArticlesLabelEn->addExtractor(new LabelExtractor()); $jobEnWiki->addExtractionGroup($groupArticlesLabelEn); $groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt")); $groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor()); $jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn); $groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt")); $groupImages->addExtractor(new ImageExtractor()); $jobEnWiki->addExtractionGroup($groupImages); $groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt")); $groupWikipages->addExtractor(new WikipageExtractor()); $jobEnWiki->addExtractionGroup($groupWikipages); $groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt")); $groupInfoboxes->addExtractor(new InfoboxExtractor()); $jobEnWiki->addExtractionGroup($groupInfoboxes); $groupSemantic = new ExtractionGroup(new NTripleDumpDestination("semantic.nt")); $groupSemantic->addExtractor(new SemanticExtractor()); $jobEnWiki->addExtractionGroup($groupSemantic); $groupDBpedia = new ExtractionGroup(new NTripleDumpDestination("dbpedia_links.nt")); $groupDBpedia->addExtractor(new DBpediaLinkExtractor()); $jobEnWiki->addExtractionGroup($groupDBpedia); $groupGeoCodes = new ExtractionGroup(new NTripleDumpDestination("geocodes.nt")); $groupGeoCodes->addExtractor(new WcGeoExtractor()); $jobEnWiki->addExtractionGroup($groupGeoCodes); $manager->execute($jobEnWiki);
require_once 'dbpedia.php'; function __autoload($class_name) { require_once $class_name . '.php'; } // Enter the resources you want to extract. // [To extract all Wikipedia articles from an SQL-Dump use AllArticlesSqlIterator (see extract.php)] // when using a MySqlIterator, don't use "new ArrayObject" at Job creation $pageTitles = array("Michael_Foot", "Millard_Fillmore"); //, "Michael_Jordan", "Google"); // Setup the language version of Wikipedia $language = "en"; // Instantiate a new ExtractionJob $job = new ExtractionJob(new LiveWikipedia($language), new ArrayObject($pageTitles)); // Create ExtractionGroups for each Extractors $groupInfoboxes = new ExtractionGroup(new SimpleDumpDestination()); $groupInfoboxes->addExtractor(new InfoboxExtractor()); $groupImages = new ExtractionGroup(new SimpleDumpDestination()); $groupImages->addExtractor(new ImageExtractor()); $groupShortAbstracts = new ExtractionGroup(new SimpleDumpDestination()); $groupShortAbstracts->addExtractor(new ShortAbstractExtractor()); $groupLabels = new ExtractionGroup(new SimpleDumpDestination()); $groupLabels->addExtractor(new LabelExtractor()); // Add the ExtractionGroups to the ExtractionJob $job->addExtractionGroup($groupInfoboxes); $job->addExtractionGroup($groupImages); $job->addExtractionGroup($groupShortAbstracts); $job->addExtractionGroup($groupLabels); // Execute the ExtractionJob $manager = new ExtractionManager(); $manager->execute($job);