public function extractPage($pageID, $pageTitle, $pageSource) { $this->extractor->setPageURI($pageID); if (!$this->extractor->isActive()) { return $result = new ExtractionResult($pageID, $this->extractor->getLanguage(), $this->getExtractorID()); } Timer::start($this->extractor->getExtractorID()); $result = $this->extractor->extractPage($pageID, $pageTitle, $pageSource); Timer::stop($this->extractor->getExtractorID()); Timer::start('validation'); //$this->extractor->check(); if (Options::getOption('validateExtractors')) { ValidateExtractionResult::validate($result, $this->extractor); } Timer::stop('validation'); Statistics::increaseCount($this->extractor->getExtractorID(), 'created_Triples', count($result->getTriples())); Statistics::increaseCount('Total', 'created_Triples', count($result->getTriples())); if ($this->extractor->isGenerateOWLAxiomAnnotations()) { $triples = $result->getTriples(); if (count($triples) > 0) { foreach ($triples as $triple) { $triple->addDCModifiedAnnotation(); $triple->addExtractedByAnnotation($this->extractor->getExtractorID()); } } } return $result; }
public function smarterDiffItOWLAxioms() { Timer::start('LiveUpdateDestination::diffItOWLAxioms::total'); Timer::start('LiveUpdateDestination::diffItOWLAxioms::preparation'); //a store is needed for language here $store = null; //$store = new SPARQLToRDFTriple($this->uri, $this->language); $propLangFilter = Options::getOption('stringPredicateWithForeignlanguages'); $graphURI = Options::getOption('graphURI'); $annotationGraphURI = Options::getOption('annotationGraphURI'); //generate the regex filter according to namespaces //includes language properties foreach ($propLangFilter as $one) { $this->predicateFilterList[] = $one; } $tripleDiff = new TripleDiff($this->uri, $this->language, $this->predicateFilterList, $this->objectFilterList, $this->predicateObjectFilterList, $store); $filterForNotAnnotatedTriples = $tripleDiff->createFilter($this->predicateFilterList, $this->objectFilterList, $this->predicateObjectFilterList); //$langTriples = $store->getRDFTripleForLangProperties($propLangFilter); //create a filter for extractors $subjectpattern = $this->uri->toSPARULPattern($this->storespecific); /* $extractedByPattern = RDFtriple::URI(DBM_ORIGIN)->toSPARULPattern($this->storespecific); $extractorFilter = ""; $extTerms = array(); foreach ($this->activeExtractors as $one){ $u = new URI($one); $extPattern = $u->toSPARULPattern($this->storespecific); $extTerms[] = ' ?extractor = '.$extPattern.' '; } foreach ($this->purgeExtractors as $one){ $u = new URI($one); $extPattern = $u->toSPARULPattern($this->storespecific); $extTerms[] = ' ?extractor = '.$extPattern.' '; } $extractorFilter = 'FILTER ( '.TripleDiff::assembleTerms($extTerms,'||').') . '; */ $preparation = Timer::stop('LiveUpdateDestination::diffItOWLAxioms::preparation'); $this->log(TRACE, 'prep needed: ' . $preparation); //*********************** //DELETE ALL NON STATIC TRIPLES //********************** //delete all triples with the current subject //according to the filters //do not delete special properties see below //Timer::start('LiveUpdateDestination::diffItOWLAxioms::notAnnotated'); $deleteSPARUL['delete_with_subject_not_static'] = 'DELETE FROM <' . $graphURI . '> { ' . $subjectpattern . ' ?p ?o } WHERE { ' . $subjectpattern . ' ?p ?o . FILTER (' . $filterForNotAnnotatedTriples . '). }'; //*********************** //LANGUAGE //*********************** //delete all triples with the current subject //where the lang properties with string object //from other language version are given, which should stay $x = 0; //var_dump($langTriples); foreach ($propLangFilter as $one) { $u = new URI($one, false); $deleteSPARUL['delete_english' . $x++] = 'DELETE FROM GRAPH <' . $graphURI . '> { ' . $subjectpattern . ' ' . $u->toSPARULPattern($this->storespecific) . ' ?o } WHERE { ' . $subjectpattern . ' ' . $u->toSPARULPattern($this->storespecific) . ' ?o . FILTER ( lang(?o) = \'en\'). }'; } //**************************** //DELETE ANNOTATIONS //**************************** //delete the corresponding annotations $deleteSPARUL['delete_corresponding_annotations'] = 'DELETE FROM <' . $annotationGraphURI . '> { ?axiom ?axp ?axo . } WHERE { ?axiom <' . OWL_SUBJECT . '> ' . $subjectpattern . ' . ?axiom ?axp ?axo . }'; //echo $deleteSPARUL['delete_corresponding_annotations'] ;die; //*********************** //MISSING: DELETE ANOMALIES I.E. source Page //*********************** //TODO go to infobox extractor and //add an annotation to all subject/rating objects to which subject they belong //and then delete them also /* $deleteSPARUL['delete_anomalies'] = 'DELETE FROM <' . $annotationGraphURI . '> { ?axiom ?axp ?axo . } WHERE { ?axiom <'.DBM_ONDELETECASCADE.'> '.$subjectpattern.' . ?axiom ?axp ?axo . }'; */ //********************** //GENERATE NEW TRIPLES //********************** Timer::start('LiveUpdateDestination::diffItOWLAxioms::insertSPARULCreation'); $insertSPARUL = array(); $insertSPARUL['insert_triples'] = array(); $insertSPARUL['insert_annotations'] = array(); $globalannotationpattern = ""; $globaltriplepattern = ""; $this->log(DEBUG, 'number of triples: ' . count($this->tripleFromExtractor)); foreach ($this->tripleFromExtractor as $triple) { $pattern = $triple->toSPARULPattern($this->storespecific); $insertSPARUL['insert_triples'][] = 'INSERT INTO GRAPH <' . $graphURI . '> { ' . $pattern . ' }'; $globaltriplepattern .= $pattern . "\n"; $annotations = $triple->getOWLAxiomAnnotations(); Statistics::increaseCount('Total', 'createdAnnotations', count($annotations)); if (count($annotations) > 0) { $pattern = ""; foreach ($annotations as $ann) { $current = $ann->toSPARULPattern($this->storespecific); $pattern .= $current; $globalannotationpattern .= $current . "\n"; } //annotations for one triple are aggregated to one query $insertSPARUL['insert_annotations'][] = 'INSERT INTO GRAPH <' . $annotationGraphURI . '> { ' . $pattern . ' }'; } } $this->log(DEBUG, 'number of annotation inserts: ' . count($insertSPARUL['insert_annotations'])); $insertSPARUL['globalAnnotationPattern'] = 'INSERT INTO GRAPH <' . $annotationGraphURI . '> { ' . $globalannotationpattern . ' }'; $insertSPARUL['globalTriplePattern'] = 'INSERT INTO GRAPH <' . $graphURI . '> { ' . $globaltriplepattern . ' }'; $this->log(DEBUG, 'length globalTriplePattern: ' . strlen($insertSPARUL['globalTriplePattern'])); $this->log(DEBUG, 'length globalAnnotationPattern: ' . strlen($insertSPARUL['globalAnnotationPattern'])); Timer::stop('LiveUpdateDestination::diffItOWLAxioms::insertSPARULCreation'); $result = array(); $result['del'] = $deleteSPARUL; $result['ins'] = $insertSPARUL; Timer::stop('LiveUpdateDestination::diffItOWLAxioms::total'); return $result; }
//$destination = new SimpleDumpDestination(); $group = new ExtractionGroup($destination); //ESTIMATE TYPE $namespaceId = $metainfo['namespaceId']; $pageSource = $collection->getSource($pageTitle); if ($namespaceId == 14 && strpos($pageTitle, $metainfo['namespaceName']) === 0) { $type = CATEGORY; } else { if (Util::isRedirect($pageSource, $language)) { //#REDIRECT [[Blueprint (CSS framework)]] $type = REDIRECT; } else { $type = ARTICLE; } } Statistics::increaseCount(STAT_TOTAL, $type); Logger::info($type . ": " . $pageURI->getURI() . " (" . $count . ", " . mb_detect_encoding($pageURI->getURI()) . ")"); //****EXTRACTORS ****** foreach ($extractors[$type] as $extractor => $status) { $extractorClassName = $extractor . EXTRACTOR; Logger::debug($extractorClassName . " Status: " . $status); $extractorClass = new ReflectionClass($extractorClassName); $extractorInstance = $extractorClass->newInstance(); $extractorInstance->setStatus($status); $extractorInstance->addAdditionalInfo($metainfo); //$extractorInstance->addMetaData(ExtractorConfiguration::getMetadata($language, $extractorClassName)); Statistics::addExtractorMetaArray($extractorInstance->getMetadata()); //Statistics::addExtractorMeta($extractorInstance->getExtractorID(),'status', $group->addExtractor($extractorInstance); } $job->addExtractionGroup($group);
public function _odbc_ttlp_insert_annotations($triplesToAdd) { if ($this->debug_turn_off_insert) { return; } if (false == $this->generateOWLAxiomAnnotations) { return; } //********************** //GENERATE NEW TRIPLES //********************** Timer::start('LiveUpdateDestination::_odbc_ttlp_insert_annotations'); Timer::start('LiveUpdateDestination::_odbc_ttlp_insert_annotations::string_creation'); $globalAnnotationNTriplePattern = ""; $annotationCounter = 0; foreach ($triplesToAdd as $triple) { $annotations = $triple->getOWLAxiomAnnotationsAsNTriple($this->oaiId); $globalAnnotationNTriplePattern .= implode('', $annotations); Statistics::increaseCount('Total', 'createdAnnotations', count($annotations)); $annotationCounter += count($annotations); } $this->log(DEBUG, 'number of annotation inserts: ' . $annotationCounter); $this->log(DEBUG, 'length globalAnnotationPattern: ' . strlen($globalAnnotationNTriplePattern)); Timer::stop('LiveUpdateDestination::_odbc_ttlp_insert_annotations::string_creation'); //TESTS>>>>>>>>>>>> $where = 'WHERE { ?s <' . OWL_SUBJECT . '> ' . $this->subjectSPARULpattern . ' . ?s ?p ?o} '; if (Options::getOption('debug_run_tests')) { $countbefore = $this->_testwherepart($where, $this->annotationGraphURI); } //TESTS<<<<<<<<<<<< Timer::start('LiveUpdateDestination::_odbc_ttlp_insert_annotations::insert_operation'); $globalSuccess = $this->_odbc_ttlp_execute($globalAnnotationNTriplePattern, $this->annotationGraphURI); Timer::stop('LiveUpdateDestination::_odbc_ttlp_insert_annotations::insert_operation'); if ($globalSuccess) { $this->counterInserts += 1; } /* else{ foreach ($triplesToAdd as $triple){ $annotations = $triple->getOWLAxiomAnnotationsAsNTriple($this->oaiId); $globalAnnotationNTriplePattern .= implode('',$annotations ); Statistics::increaseCount( 'Total','createdAnnotations', count($annotations)); $annotationCounter +=count($annotations); } } */ Timer::stop('LiveUpdateDestination::_odbc_ttlp_insert_annotations'); //TESTS>>>>>>>>>>>> if (Options::getOption('debug_run_tests')) { $countafter = $this->_testwherepart($where, $this->annotationGraphURI); $this->log(INFO, 'TEST _odbc_ttlp_insert_annotations, before: ' . $countbefore . ' after: ' . $countafter . ' triples'); if ($countafter - $countbefore < 0 && $annotationCounter > 0) { $this->log(WARN, 'TEST FAILED, INSERT ANNOTATIONS AFTER SHOULD BE BIGGER THAN BEFORE'); } else { $this->log(INFO, 'SUCCESS'); } } //TESTS<<<<<<<<<<<< }