/** * Main method, which loads the configuration and maps all source files. */ public function start() { $startTime = microtime(true); $this->context = new ExtractionContext(); //Create sources $this->ontologySource = new FileSource(self::ONTOLOGY_DIR, array('.svn')); $this->mappingsSource = new FileSource(self::MAPPINGS_DIR, array('.svn')); $this->pageSource = new FileSource(self::SOURCE_DIR, array('.svn')); $this->templatesSource = new FileSource(self::SOURCE_DIR, array(WikiTitle::NS_TEMPLATE)); //$this->pageSource = new XMLSource("D:\enwiki-latest-pages-articles.xml"); //$this->templatesSource = new XMLSource("D:\enwiki-latest-pages-articles.xml"); //Create destinations $destination = new FileQuadDestination(self::OUTPUT_FILE); $destinations = new SingletonQuadDestinations($destination); $this->context->setDestinations($destinations); //Load redirects $this->redirects = $this->loadRedirects(); $this->context->setRedirects($this->redirects); //Load ontology and mappings $this->loadOntology(); $this->loadMappings(); //Add extractors $this->extractor->addExtractor(LabelExtractor::load($this->ontology, $this->context)); $this->extractor->addExtractor(GeoExtractor::load($this->ontology, $this->context)); $this->extractor->addExtractor(AbstractExtractor::load($this->ontology, $this->context)); //Extract $destination->open(); $this->extract(); $destination->close(); $stopTime = microtime(true); echo "Total time: " . ($stopTime - $startTime); }
/** * * @param $abstracts should AbstractExtractor be added? * TODO: Create an interface ExtractorBuilder with methods addProperties() and createExtractor(). * Give this method an array of ExtractorBuilders. */ public function loadMappings($ontology, $context, $abstracts = true) { $mappingPages = $this->loadAllPages($this->mappingDir); $extractor = new ExtractionManager(); $extractor->addExtractor(MappingExtractor::load($mappingPages, $ontology, $context)); $extractor->addExtractor(\dbpedia\mapping\LabelExtractor::load($ontology, $context)); if ($abstracts) { $extractor->addExtractor(\dbpedia\mapping\AbstractExtractor::load($ontology, $context)); } $this->destroyAllPages($mappingPages); return $extractor; }