Exemplo n.º 1
0
 public function extractPage($pageID, $pageTitle, $pageSource)
 {
     $this->extractor->setPageURI($pageID);
     if (!$this->extractor->isActive()) {
         return $result = new ExtractionResult($pageID, $this->extractor->getLanguage(), $this->getExtractorID());
     }
     Timer::start($this->extractor->getExtractorID());
     $result = $this->extractor->extractPage($pageID, $pageTitle, $pageSource);
     Timer::stop($this->extractor->getExtractorID());
     Timer::start('validation');
     //$this->extractor->check();
     if (Options::getOption('validateExtractors')) {
         ValidateExtractionResult::validate($result, $this->extractor);
     }
     Timer::stop('validation');
     Statistics::increaseCount($this->extractor->getExtractorID(), 'created_Triples', count($result->getTriples()));
     Statistics::increaseCount('Total', 'created_Triples', count($result->getTriples()));
     if ($this->extractor->isGenerateOWLAxiomAnnotations()) {
         $triples = $result->getTriples();
         if (count($triples) > 0) {
             foreach ($triples as $triple) {
                 $triple->addDCModifiedAnnotation();
                 $triple->addExtractedByAnnotation($this->extractor->getExtractorID());
             }
         }
     }
     return $result;
 }
    public function smarterDiffItOWLAxioms()
    {
        Timer::start('LiveUpdateDestination::diffItOWLAxioms::total');
        Timer::start('LiveUpdateDestination::diffItOWLAxioms::preparation');
        //a store is needed for language here
        $store = null;
        //$store = new SPARQLToRDFTriple($this->uri, $this->language);
        $propLangFilter = Options::getOption('stringPredicateWithForeignlanguages');
        $graphURI = Options::getOption('graphURI');
        $annotationGraphURI = Options::getOption('annotationGraphURI');
        //generate the regex filter according to namespaces
        //includes language properties
        foreach ($propLangFilter as $one) {
            $this->predicateFilterList[] = $one;
        }
        $tripleDiff = new TripleDiff($this->uri, $this->language, $this->predicateFilterList, $this->objectFilterList, $this->predicateObjectFilterList, $store);
        $filterForNotAnnotatedTriples = $tripleDiff->createFilter($this->predicateFilterList, $this->objectFilterList, $this->predicateObjectFilterList);
        //$langTriples = $store->getRDFTripleForLangProperties($propLangFilter);
        //create a filter for extractors
        $subjectpattern = $this->uri->toSPARULPattern($this->storespecific);
        /*
                    $extractedByPattern = RDFtriple::URI(DBM_ORIGIN)->toSPARULPattern($this->storespecific);
        
                    $extractorFilter = "";
                    $extTerms = array();
                    foreach ($this->activeExtractors as $one){
                         $u = new URI($one);
                         $extPattern = $u->toSPARULPattern($this->storespecific);
                         $extTerms[] = ' ?extractor = '.$extPattern.' ';
                    }
                    foreach ($this->purgeExtractors as $one){
                         $u = new URI($one);
                         $extPattern = $u->toSPARULPattern($this->storespecific);
                         $extTerms[] = ' ?extractor = '.$extPattern.' ';
                    }
                    $extractorFilter = 'FILTER ( '.TripleDiff::assembleTerms($extTerms,'||').') . ';
        */
        $preparation = Timer::stop('LiveUpdateDestination::diffItOWLAxioms::preparation');
        $this->log(TRACE, 'prep needed: ' . $preparation);
        //***********************
        //DELETE ALL NON STATIC TRIPLES
        //**********************
        //delete all triples with the current subject
        //according to the filters
        //do not delete special properties see below
        //Timer::start('LiveUpdateDestination::diffItOWLAxioms::notAnnotated');
        $deleteSPARUL['delete_with_subject_not_static'] = 'DELETE  FROM <' . $graphURI . '>
    { ' . $subjectpattern . ' ?p ?o }
WHERE {
    ' . $subjectpattern . ' ?p ?o .
    FILTER (' . $filterForNotAnnotatedTriples . ').
}';
        //***********************
        //LANGUAGE
        //***********************
        //delete all triples with the current subject
        //where the lang properties with string object
        //from other language version are given, which should stay
        $x = 0;
        //var_dump($langTriples);
        foreach ($propLangFilter as $one) {
            $u = new URI($one, false);
            $deleteSPARUL['delete_english' . $x++] = 'DELETE FROM GRAPH <' . $graphURI . '>
{ ' . $subjectpattern . ' ' . $u->toSPARULPattern($this->storespecific) . ' ?o }
WHERE {
    ' . $subjectpattern . ' ' . $u->toSPARULPattern($this->storespecific) . ' ?o .
    FILTER ( lang(?o) = \'en\').
}';
        }
        //****************************
        //DELETE ANNOTATIONS
        //****************************
        //delete the corresponding annotations
        $deleteSPARUL['delete_corresponding_annotations'] = 'DELETE  FROM <' . $annotationGraphURI . '>
    { ?axiom ?axp  ?axo .  }
WHERE {
    ?axiom <' . OWL_SUBJECT . '> ' . $subjectpattern . ' .
    ?axiom ?axp  ?axo .
}';
        //echo $deleteSPARUL['delete_corresponding_annotations'] ;die;
        //***********************
        //MISSING: DELETE ANOMALIES I.E. source Page
        //***********************
        //TODO go to infobox extractor and
        //add an annotation to all subject/rating objects to which subject they belong
        //and then delete them also
        /*
        $deleteSPARUL['delete_anomalies'] =
        'DELETE  FROM <' . $annotationGraphURI . '>
            { ?axiom ?axp  ?axo .  }
        WHERE {
            ?axiom <'.DBM_ONDELETECASCADE.'> '.$subjectpattern.' .
            ?axiom ?axp  ?axo .
        }';
        */
        //**********************
        //GENERATE NEW TRIPLES
        //**********************
        Timer::start('LiveUpdateDestination::diffItOWLAxioms::insertSPARULCreation');
        $insertSPARUL = array();
        $insertSPARUL['insert_triples'] = array();
        $insertSPARUL['insert_annotations'] = array();
        $globalannotationpattern = "";
        $globaltriplepattern = "";
        $this->log(DEBUG, 'number of triples: ' . count($this->tripleFromExtractor));
        foreach ($this->tripleFromExtractor as $triple) {
            $pattern = $triple->toSPARULPattern($this->storespecific);
            $insertSPARUL['insert_triples'][] = 'INSERT INTO GRAPH <' . $graphURI . '> { ' . $pattern . ' }';
            $globaltriplepattern .= $pattern . "\n";
            $annotations = $triple->getOWLAxiomAnnotations();
            Statistics::increaseCount('Total', 'createdAnnotations', count($annotations));
            if (count($annotations) > 0) {
                $pattern = "";
                foreach ($annotations as $ann) {
                    $current = $ann->toSPARULPattern($this->storespecific);
                    $pattern .= $current;
                    $globalannotationpattern .= $current . "\n";
                }
                //annotations for one triple are aggregated to one query
                $insertSPARUL['insert_annotations'][] = 'INSERT INTO GRAPH <' . $annotationGraphURI . '> { ' . $pattern . ' }';
            }
        }
        $this->log(DEBUG, 'number of annotation inserts: ' . count($insertSPARUL['insert_annotations']));
        $insertSPARUL['globalAnnotationPattern'] = 'INSERT INTO GRAPH <' . $annotationGraphURI . '> { ' . $globalannotationpattern . ' }';
        $insertSPARUL['globalTriplePattern'] = 'INSERT INTO GRAPH <' . $graphURI . '> { ' . $globaltriplepattern . ' }';
        $this->log(DEBUG, 'length globalTriplePattern: ' . strlen($insertSPARUL['globalTriplePattern']));
        $this->log(DEBUG, 'length globalAnnotationPattern: ' . strlen($insertSPARUL['globalAnnotationPattern']));
        Timer::stop('LiveUpdateDestination::diffItOWLAxioms::insertSPARULCreation');
        $result = array();
        $result['del'] = $deleteSPARUL;
        $result['ins'] = $insertSPARUL;
        Timer::stop('LiveUpdateDestination::diffItOWLAxioms::total');
        return $result;
    }
Exemplo n.º 3
0
 //$destination = new SimpleDumpDestination();
 $group = new ExtractionGroup($destination);
 //ESTIMATE TYPE
 $namespaceId = $metainfo['namespaceId'];
 $pageSource = $collection->getSource($pageTitle);
 if ($namespaceId == 14 && strpos($pageTitle, $metainfo['namespaceName']) === 0) {
     $type = CATEGORY;
 } else {
     if (Util::isRedirect($pageSource, $language)) {
         //#REDIRECT [[Blueprint (CSS framework)]]
         $type = REDIRECT;
     } else {
         $type = ARTICLE;
     }
 }
 Statistics::increaseCount(STAT_TOTAL, $type);
 Logger::info($type . ": " . $pageURI->getURI() . " (" . $count . ", " . mb_detect_encoding($pageURI->getURI()) . ")");
 //****EXTRACTORS ******
 foreach ($extractors[$type] as $extractor => $status) {
     $extractorClassName = $extractor . EXTRACTOR;
     Logger::debug($extractorClassName . " Status: " . $status);
     $extractorClass = new ReflectionClass($extractorClassName);
     $extractorInstance = $extractorClass->newInstance();
     $extractorInstance->setStatus($status);
     $extractorInstance->addAdditionalInfo($metainfo);
     //$extractorInstance->addMetaData(ExtractorConfiguration::getMetadata($language, $extractorClassName));
     Statistics::addExtractorMetaArray($extractorInstance->getMetadata());
     //Statistics::addExtractorMeta($extractorInstance->getExtractorID(),'status',
     $group->addExtractor($extractorInstance);
 }
 $job->addExtractionGroup($group);
 public function _odbc_ttlp_insert_annotations($triplesToAdd)
 {
     if ($this->debug_turn_off_insert) {
         return;
     }
     if (false == $this->generateOWLAxiomAnnotations) {
         return;
     }
     //**********************
     //GENERATE NEW TRIPLES
     //**********************
     Timer::start('LiveUpdateDestination::_odbc_ttlp_insert_annotations');
     Timer::start('LiveUpdateDestination::_odbc_ttlp_insert_annotations::string_creation');
     $globalAnnotationNTriplePattern = "";
     $annotationCounter = 0;
     foreach ($triplesToAdd as $triple) {
         $annotations = $triple->getOWLAxiomAnnotationsAsNTriple($this->oaiId);
         $globalAnnotationNTriplePattern .= implode('', $annotations);
         Statistics::increaseCount('Total', 'createdAnnotations', count($annotations));
         $annotationCounter += count($annotations);
     }
     $this->log(DEBUG, 'number of annotation inserts: ' . $annotationCounter);
     $this->log(DEBUG, 'length globalAnnotationPattern: ' . strlen($globalAnnotationNTriplePattern));
     Timer::stop('LiveUpdateDestination::_odbc_ttlp_insert_annotations::string_creation');
     //TESTS>>>>>>>>>>>>
     $where = 'WHERE { ?s <' . OWL_SUBJECT . '> ' . $this->subjectSPARULpattern . ' . ?s ?p ?o} ';
     if (Options::getOption('debug_run_tests')) {
         $countbefore = $this->_testwherepart($where, $this->annotationGraphURI);
     }
     //TESTS<<<<<<<<<<<<
     Timer::start('LiveUpdateDestination::_odbc_ttlp_insert_annotations::insert_operation');
     $globalSuccess = $this->_odbc_ttlp_execute($globalAnnotationNTriplePattern, $this->annotationGraphURI);
     Timer::stop('LiveUpdateDestination::_odbc_ttlp_insert_annotations::insert_operation');
     if ($globalSuccess) {
         $this->counterInserts += 1;
     }
     /*
                 else{
                     foreach ($triplesToAdd as $triple){
                             $annotations = $triple->getOWLAxiomAnnotationsAsNTriple($this->oaiId);
                             $globalAnnotationNTriplePattern .= implode('',$annotations );
                             Statistics::increaseCount( 'Total','createdAnnotations', count($annotations));
                             $annotationCounter +=count($annotations);
                         }
                 }
     */
     Timer::stop('LiveUpdateDestination::_odbc_ttlp_insert_annotations');
     //TESTS>>>>>>>>>>>>
     if (Options::getOption('debug_run_tests')) {
         $countafter = $this->_testwherepart($where, $this->annotationGraphURI);
         $this->log(INFO, 'TEST _odbc_ttlp_insert_annotations, before: ' . $countbefore . ' after: ' . $countafter . ' triples');
         if ($countafter - $countbefore < 0 && $annotationCounter > 0) {
             $this->log(WARN, 'TEST FAILED, INSERT ANNOTATIONS AFTER SHOULD BE BIGGER THAN BEFORE');
         } else {
             $this->log(INFO, 'SUCCESS');
         }
     }
     //TESTS<<<<<<<<<<<<
 }