<?php

include 'dbpedia.php';
error_reporting(E_ALL);
Timer::start("main::Runtime");
// Setup the language version of Wikipedia
$language = Options::getOption('language');
$pageTitles = new AllTemplatesSqlIterator($language);
$job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles);
$groupInfoboxes = new ExtractionGroup(new NullDestination());
$groupInfoboxes->addExtractor(new TemplateRedirectExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
//Execute the Extraction Job
$manager = new ExtractionManager();
$manager->execute($job);
Timer::stop("main::Runtime");
Timer::printTime();
Example #2
0
<?php

include 'dbpedia.php';
error_reporting(E_ALL);
Timer::start("main::Runtime");
$language = Options::getOption('language');
$pageTitles = new AllArticlesSqlIterator($language);
$job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles);
$destination = new NTripleDumpDestination("c:/dbpedia34/en/redirects_" . $language . ".nt");
$extractor = new RedirectExtractor($language);
$groupInfoboxes = new ExtractionGroup($destination);
$groupInfoboxes->addExtractor($extractor);
$job->addExtractionGroup($groupInfoboxes);
$manager = new ExtractionManager();
$manager->execute($job);
Timer::stop("main::Runtime");
Timer::printTime();
Example #3
0
            $resultFiles[$extractorObject->getExtractorID()][basename($filename)] = $resultPath . "/" . basename($filename);
        }
    }
}
// var_dump($sourceFiles);
// var_dump($resultFiles);
// Instantiate a new ExtractionJob
$job = new ExtractionJob(new TestWikipedia($language), new ArrayObject($sourceFiles));
// Create one ExtractionGroup for each Extractor
foreach ($extractorObjects as $extractor) {
    // Do not use extractors without test files
    if ($resultFiles[$extractor->getExtractorID()] == null) {
        echo "\ncontinue";
    }
    $currentExtractor = $extractor;
    $group = new ExtractionGroup(new TestDestination(SHOW_OUTPUT));
    $group->addExtractor($currentExtractor);
    $job->addExtractionGroup($group);
}
// Execute the ExtractionJob
$manager = new ExtractionManager();
$manager->execute($job);
// Cycle over all ExtractionGroups
$testResult = "Logfile for DBpedia Data Extraction (" . date("D M j Y  G:i:s T") . ")\n";
// Cycle over all ExtractionGroups
foreach ($job->getExtractionGroups() as $currentGroup) {
    // Cycle over all extractors
    foreach ($currentGroup->getExtractors() as $extractor) {
        $testResult .= "\nResults for: " . $extractor->getExtractorID() . "\n";
        foreach ($sourceFiles as $key => $page) {
            // Only get files, which are available for the extractor
Example #4
0
 * Warning: The script needs several days to complete on an average PC.
 */
error_reporting(E_ALL);
// automatically loads required classes
require 'dbpedia.php';
// set $extractionDir and $extractionLanguages
require 'extractionconfig.php';
$manager = new ExtractionManager();
// loop over all languages
foreach ($extractionLanguages as $currLanguage) {
    Options::setLanguage($currLanguage);
    $pageTitles = new ArticlesSqlIterator($currLanguage);
    $job = new ExtractionJob(new DatabaseWikipediaCollection($currLanguage), $pageTitles);
    $extractionDirLang = $extractionDir . '/' . $currLanguage . '/';
    if (!is_dir($extractionDirLang)) {
        mkdir($extractionDirLang);
    }
    // AbstractExtractor has references to its two destinations, see below
    $group = new ExtractionGroup(new NullDestination());
    $shortDestination = new csvNTripleDestination($extractionDirLang . "shortabstract_" . $currLanguage);
    $longDestination = new csvNTripleDestination($extractionDirLang . "longabstract_" . $currLanguage);
    $extractorInstance = new AbstractExtractor();
    $extractorInstance->setDestinations($shortDestination, $longDestination);
    $group->addExtractor($extractorInstance);
    $job->addExtractionGroup($group);
    $date = date(DATE_RFC822);
    Logger::info("Starting abstract extraction job for language {$currLanguage} at {$date}\n");
    $manager->execute($job);
    $date = date(DATE_RFC822);
    Logger::info("Finished abstract extraction job for language {$currLanguage} at {$date}\n");
}
Example #5
0
if (isset($sent)) {
    require_once 'dbpedia.php';
    require_once 'extraction/extractTemplates.php';
    require_once 'en-arc_ntriples_serializer.php';
    define("RDFAPI_INCLUDE_DIR", "api/");
    include RDFAPI_INCLUDE_DIR . "RdfAPI.php";
    // require_once 'RDFapi/RdfAPI.php'; // RAP
    function __autoload($class_name)
    {
        require_once $class_name . '.php';
    }
    $pageTitles = array($resource);
    //, "London", "Paris");
    $job = new ExtractionJob(new LiveWikipedia($lang), new ArrayObject($pageTitles));
    $destination = new WebDebugDestination();
    $group = new ExtractionGroup($destination);
    $group->addExtractor(new LabelExtractor());
    $group->addExtractor(new WikipageExtractor());
    // TODO: does AbstractExtractor work?
    $group->addExtractor(new AbstractExtractor($destination, $destination));
    $group->addExtractor(new ImageExtractor());
    $group->addExtractor(new InfoboxExtractor());
    $group->addExtractor(new PersondataExtractor());
    $job->addExtractionGroup($group);
    $manager = new ExtractionManager();
    $manager->execute($job);
}
?>


</body>
Example #6
0
//
// Start of DBpedia extraction process
//
///////////////////////////////////////////
if (isset($sent)) {
    require_once 'extraction/extractTemplates.php';
    function __autoload($class_name)
    {
        require_once $class_name . '.php';
    }
    // Resource to extract
    $pageTitle = array($resource);
    // Instantiate a new ExtractionJob
    $job = new ExtractionJob(new LiveWikipedia($lang), new ArrayObject($pageTitle));
    // Create ExtractionGroups for each Extractors
    $group = new ExtractionGroup(new WebDebugDestination());
    $group->addExtractor(new InfoboxExtractor());
    $group->addExtractor(new ImageExtractor());
    $group->addExtractor(new ShortAbstractExtractor());
    $group->addExtractor(new LabelExtractor());
    $group->addExtractor(new WikipageExtractor());
    $group->addExtractor(new SemanticExtractor());
    $group->addExtractor(new DBpediaLinkExtractor());
    $group->addExtractor(new WcGeoExtractor());
    // Add the ExtractionGroups to the ExtractionJob
    $job->addExtractionGroup($group);
    // Execute the ExtractionJob
    $manager = new ExtractionManager();
    $manager->execute($job);
}
// Close HTML
Example #7
0
<?php

/**
 * Start file for testing an extractor on a specific language.
 * This runs the extractor on the full language database. The
 * main purpose is to produce single data sets in contrast to
 * performing a full extraction (as in extract.php) or testing
 * an extractor on a specific article only (as in start.php).
 *
 * See http://wiki.dbpedia.org/Documentation for an overview of
 * the DBpedia extraction framework.
 *
 * @author Jens Lehmann
 */
// autoloader
include 'dbpedia.php';
// configure settings
$language = Options::getOption('language');
$extractor = new HomepageExtractor();
$outputFilePrefix = 'test';
// sets up a job and executes it (usually you do not need to changes this code)
$pageTitles = new ArticlesSqlIterator($language);
$job = new ExtractionJob(new DatabaseWikipediaCollection($language), $pageTitles);
$group = new ExtractionGroup(new csvNTripleDestination($outputFilePrefix));
$group->addExtractor($extractor);
$job->addExtractionGroup($group);
Logger::info("Job created.\n");
$manager = new ExtractionManager();
$manager->execute($job);
Logger::info("Job finished.\n");
Example #8
0
echo "<form name=\"getResource\" action=\"index.php\" method=\"GET\">\r\n\t\tResource: <input type=\"text\" name=\"resource\" value=\"{$resource}\" /><br />\r\n\t\tLanguage: <input type=\"text\" name=\"lang\" value=\"{$lang}\" /> <br />\r\n\t\t<input type=\"submit\" name=\"sent\" value=\"Search\r\n\t\t\" />\r\n\t  </form>";
if (isset($sent)) {
    require_once 'dbpedia.php';
    require_once 'extraction/extractTemplates.php';
    require_once 'en-arc_ntriples_serializer.php';
    define("RDFAPI_INCLUDE_DIR", "api/");
    include RDFAPI_INCLUDE_DIR . "RdfAPI.php";
    // require_once 'RDFapi/RdfAPI.php'; // RAP
    function __autoload($class_name)
    {
        require_once $class_name . '.php';
    }
    $pageTitles = array($resource);
    //, "London", "Paris");
    $job = new ExtractionJob(new LiveWikipedia($lang), new ArrayObject($pageTitles));
    $group = new ExtractionGroup(new WebDebugDestination());
    $group->addExtractor(new LabelExtractor());
    $group->addExtractor(new WikipageExtractor());
    $group->addExtractor(new ShortAbstractExtractor());
    $group->addExtractor(new ImageExtractor());
    $group->addExtractor(new InfoboxExtractor());
    $job->addExtractionGroup($group);
    $manager = new ExtractionManager();
    $manager->execute($job);
}
?>


</body>
</html>
Example #9
0
    require_once $class_name . '.php';
}
error_reporting(E_ALL |  E_NOTICE);
$manager = new ExtractionManager();
$pageTitlesEn = new AllArticlesSqlIterator("en");
$jobEnWiki = new ExtractionJob(new LiveWikipedia("en"), $pageTitlesEn);
$groupArticlesLabelEn = new ExtractionGroup(new NTripleDumpDestination("articles_label.nt"));
$groupArticlesLabelEn->addExtractor(new LabelExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesLabelEn);
$groupArticlesShortAbstractEn = new ExtractionGroup(new NTripleDumpDestination("articles_abstract.nt"));
$groupArticlesShortAbstractEn->addExtractor(new ShortAbstractExtractor());
$jobEnWiki->addExtractionGroup($groupArticlesShortAbstractEn);
$groupImages = new ExtractionGroup(new NTripleDumpDestination("articles_image.nt"));
$groupImages->addExtractor(new ImageExtractor());
$jobEnWiki->addExtractionGroup($groupImages);
$groupWikipages = new ExtractionGroup(new NTripleDumpDestination("articles_wikipage.nt"));
$groupWikipages->addExtractor(new WikipageExtractor());
$jobEnWiki->addExtractionGroup($groupWikipages);
$groupInfoboxes = new ExtractionGroup(new NTripleDumpDestination("infoboxes.nt"), new NTripleDumpDestination("infoboxes.properties.nt"));
$groupInfoboxes->addExtractor(new InfoboxExtractor());
$jobEnWiki->addExtractionGroup($groupInfoboxes);
$groupSemantic = new ExtractionGroup(new NTripleDumpDestination("semantic.nt"));
$groupSemantic->addExtractor(new SemanticExtractor());
$jobEnWiki->addExtractionGroup($groupSemantic);
$groupDBpedia = new ExtractionGroup(new NTripleDumpDestination("dbpedia_links.nt"));
$groupDBpedia->addExtractor(new DBpediaLinkExtractor());
$jobEnWiki->addExtractionGroup($groupDBpedia);
$groupGeoCodes = new ExtractionGroup(new NTripleDumpDestination("geocodes.nt"));
$groupGeoCodes->addExtractor(new WcGeoExtractor());
$jobEnWiki->addExtractionGroup($groupGeoCodes);
$manager->execute($jobEnWiki);
Example #10
0
require_once 'dbpedia.php';
function __autoload($class_name)
{
    require_once $class_name . '.php';
}
// Enter the resources you want to extract.
// [To extract all Wikipedia articles from an SQL-Dump use AllArticlesSqlIterator (see extract.php)]
// when using a MySqlIterator, don't use "new ArrayObject" at Job creation
$pageTitles = array("Michael_Foot", "Millard_Fillmore");
//, "Michael_Jordan", "Google");
// Setup the language version of Wikipedia
$language = "en";
// Instantiate a new ExtractionJob
$job = new ExtractionJob(new LiveWikipedia($language), new ArrayObject($pageTitles));
// Create ExtractionGroups for each Extractors
$groupInfoboxes = new ExtractionGroup(new SimpleDumpDestination());
$groupInfoboxes->addExtractor(new InfoboxExtractor());
$groupImages = new ExtractionGroup(new SimpleDumpDestination());
$groupImages->addExtractor(new ImageExtractor());
$groupShortAbstracts = new ExtractionGroup(new SimpleDumpDestination());
$groupShortAbstracts->addExtractor(new ShortAbstractExtractor());
$groupLabels = new ExtractionGroup(new SimpleDumpDestination());
$groupLabels->addExtractor(new LabelExtractor());
// Add the ExtractionGroups to the ExtractionJob
$job->addExtractionGroup($groupInfoboxes);
$job->addExtractionGroup($groupImages);
$job->addExtractionGroup($groupShortAbstracts);
$job->addExtractionGroup($groupLabels);
// Execute the ExtractionJob
$manager = new ExtractionManager();
$manager->execute($job);