function __construct($pageID, $language) { $this->uri = RDFTriple::page($pageID); $this->language = $language; if (Options::isOptionSet('predicateFilter')) { $p = Options::getOption('predicateFilter'); foreach ($p as $one) { //echo $one."\n"; $this->predicateFilterList[] = trim($one); } } if (Options::isOptionSet('objectFilter')) { $o = Options::getOption('objectFilter'); foreach ($o as $one) { //echo $one."\n"; $this->objectFilterList[] = trim($one); } } if (Options::isOptionSet('predicateObjectFilter')) { $po = Options::getOption('predicateObjectFilter'); foreach ($po as $one) { //echo $one."\n"; $pos = strpos($one, " "); $currentp = trim(substr($one, 0, $pos)); $currento = trim(substr($one, $pos + 1)); //echo $currentp."\n"; //echo $currento."\n"; $this->predicateFilterList[] = $one; } } }
public function extractPage($pageID, $pageTitle, $pageSource) { $result = new ExtractionResult($pageID, $this->language, self::extractorID); $pagelinks = $this->extract_internal_links($pageSource, $this->language); //var_dump($pagelinks); foreach ($pagelinks as $LinkURI) { $result->addTriple(RDFTriple::page($pageID), RDFTriple::predicate("wikilink"), RDFTriple::page($LinkURI)); } return $result; }
public function extractPage($pageID, $pageTitle, $pageSource) { $result = new ExtractionResult($pageID, $this->language, self::extractorID); if (preg_match_all("/Category:(.*)/", $pageID, $match)) { $result->addTriple(RDFTriple::page($pageID), RDFTriple::URI("http://www.w3.org/2004/02/skos/core#prefLabel"), RDFTriple::Literal($this->decode_title($pageTitle), NULL, $this->language)); $result->addTriple(RDFTriple::page($pageID), RDFTriple::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), RDFTriple::URI("http://www.w3.org/2004/02/skos/core#Concept")); if (preg_match_all("/\\[\\[Category:(.*)\\]\\]/", $pageSource, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { $result->addTriple(RDFTriple::page($pageID), RDFTriple::URI("http://www.w3.org/2004/02/skos/core#broader"), RDFTriple::page("Category:" . $match[1])); } } } return $result; }
/** * This function returns two results: * [triples: the generated triples, metaTriples: meta triples 2: the used templates] * * Unfortunately I haven't renamed the variables yet - result * is the array containing the result TRIPLES!! - its not this * 2 element. * */ private function myGenerate(BreadCrumb $breadCrumb, $value) { // result is the array containing: triples, meta triples, used templates $result = array(array(), array(), array()); $rootSubjectUri = RDFTriple::page($breadCrumb->getRoot()); // this array is only relevant on depth 0 $relatedClasses = array(); //$metaTriples = array(); //$usedTemplateNames = array(); // 'parent' means the parent of the value - thus subject and predicate $parentName = $this->breadCrumbTransformer->transform($breadCrumb); $parentResource = RDFTriple::page($parentName); $parentPropertyName = null; $tmp = $breadCrumb->peekTop(0); if (isset($tmp)) { $parentPropertyName = $tmp->getPropertyName(); } // Get all templates on this site, indexed by name // (there may be multiple templates with the same name) $nameToTemplates = SimpleWikiTemplateMatcher::match($value); //print_r($value); //print_r($nameToTemplates); //echo "NOW COMES THE STORM\n"; foreach ($nameToTemplates as $templateName => $templates) { if (strlen($templateName) < 1) { continue; } //echo "GOT TEMPLATE NAME $templateName\n"; $templateName = $this->mediaWikiUtil->toCanonicalWikiCase($templateName); if (!$this->templateNameFilter->doesAccept($templateName)) { continue; } $templateUri = RDFTriple::URI(DB_TEMPLATE_NS . $templateName, false); $result[2][$templateName] = 1; // Get annotations for the template - if there are any $lookupName = "Template:{$templateName}/doc"; if ($breadCrumb->getDepth() == 0) { $ta = $this->templateDb->getTemplateAnnotation($lookupName); // Create the triples for "relatesToClass" // But only for the page itself (not for sub templates) // if no related class exists, default to rdf:type owl:Thing if (isset($ta)) { foreach ($ta->getRelatedClasses() as $item) { $relatedClasses[$item] = 1; } } } foreach ($templates as $templateIndex => $template) { //echo "GOT TEMPLATE INDEX $templateIndex\n"; // Iterate over all arguments $arguments = $template->getArguments(); foreach ($arguments as $argumentName => $values) { //echo "GOT ARGUMENT NAME $argumentName\n"; // propertyNs defaults to DB_PROPERTY_NS unless there // exists a mapping in the templatedb. In that case it will // be set to DB_ONTOLOGY_NS $propertyNs = DB_PROPERTY_NS; $pa = null; if (isset($ta)) { $pas = $ta->getPropertyAnnotations(); if (array_key_exists($argumentName, $pas)) { $pa = $pas[$argumentName]; $propertyNs = DB_ONTOLOGY_NS; } } //print_r($ta); //echo "PROPERTY NS : $lookupName - $argumentName = $propertyNs\n"; // Fake a property mapping if there was none in the db // This maps argumentName back to iteself if (!isset($pa)) { // If there was no mapping we might ignore it // depending on an option (We can prevent this extractor // to generate triples with properties in the // dbp:property namespace // We allow such triples on subResources though. if ($this->allowUnmappedProperties != true && $breadCrumb->getDepth() == 0) { continue; } // If there was no mapping, also rename numeric // argument names (e.g. 1 becomes property1) // this is just cosmetic for the result if (is_numeric($argumentName)) { $argumentName = "property{$argumentName}"; } $pa = new PropertyAnnotation($argumentName); $pa->addMapping(new PropertyMapping($argumentName)); } foreach ($pa->getMappings() as $pm) { $parseHint = $pm->getParseHint(); //echo "Mapping $argumentName : {$pm->getRenamedValue()}\n\n"; // if the renamed value is not set, use the original // name // otherwise use the mapped value if (!isEmptyString($pm->getRenamedValue())) { $argumentName = $pm->getRenamedValue(); } $argumentName = trim($argumentName); //echo "Mapping $argumentName : {$pm->getRenamedValue()}\n\n"; // Skip empty properties // FIXME does that even happen? if (strlen($argumentName) < 1) { continue; } //echo "TN = $templateName, AN = $argumentName\n"; $childBreadcrumb = $breadCrumb->createClone(); $childBreadcrumb->push(new BreadcrumbNode($templateName, $templateIndex, $argumentName)); //$templateChildName = $this->breadcrumbToSubject($childBreadcrumb); $templateChildName = $this->breadCrumbTransformer->transform($childBreadcrumb); // If there is no parse hint we might be able to derive it if (!isset($parseHint)) { $parseHint = $this->deriveParseHintFromName($argumentName); } // Attempt to obtain a triple generator $tripleGenerator = $this->getTripleGenerator($parseHint); // If we DONT have a triple generator // we fall through to default handling $localResult = array(array(), array(), array()); if (isset($tripleGenerator)) { foreach ($values as $valueIndex => $value) { //echo "GOT VALUE $value\n"; $value = trim($value); // Skip empty values if ($value == "") { continue; } //echo "PROCESSING $templateChildName - $argumentName $value\n"; $tmp = $tripleGenerator->generate($templateChildName, $argumentName, $value); $localResult[0] = array_merge($localResult[0], $tmp); //echo "LOCALRESULT\n"; //print_r($localResult[0]); //print_r($triples); //echo "\nSigh\n"; //if(isset($triples)) // $result = array_merge($result, $triples); } // append the generated triples //continue; } else { // No parse hint - default handling // if property date and object an timespan // we extract it with following special case $argumentName = propertyToCamelCase($argumentName); $argumentName = encodeLocalName($argumentName); if (in_array($argumentName, $GLOBALS['W2RCFG']['ignoreProperties'])) { continue; } // turn the argument name into a property name $propertyName = $propertyNs . $argumentName; foreach ($values as $valueIndex => $value) { $value = trim($value); // Skip empty values if ($value == "") { continue; } if ($argumentName == "date") { $value = str_replace("[", "", $value); $value = str_replace("]", "", $value); $value = str_replace("–", "-", $value); } // Parse out sub templates // if something was extracted: // .) connect subject with subsubject // .) indicate usage at wikipage $subResources = $this->myGenerate($childBreadcrumb, $value); for ($i = 0; $i < 3; ++$i) { $localResult[$i] = array_merge($localResult[$i], $subResources[$i]); } //$result = array_merge($result, $triples); //echo "GOT OBJECT $value\n"; $localResult[0] = array_merge($localResult[0], parseAttributeValueWrapper($value, $templateChildName, $propertyName, $this->language)); //$result = array_merge($result, $triples); } } // For each triple add the ExtractedFromTemplate-Annotation // Exclude triples with wikiPageUsesTemplate as predicate though foreach ($localResult[0] as $triple) { $triple->addExtractedFromTemplateAnnotation($templateUri); } // Add on delete cascade annotation if ($breadCrumb->getDepth() > 1) { foreach ($localResult[0] as $triple) { $triple->addOnDeleteCascadeAnnotation($rootSubjectUri); } } // merge the results //for($i = 0; $i < 3; ++$i) // $result[$i] = array_merge($result[$i], $localResult[$i]); //} for ($i = 0; $i < 3; ++$i) { $result[$i] = array_merge($result[$i], $localResult[$i]); } } } /* How to connect a sub-subject to the root subject? if($breadCrumb->getDepth() == 0) continue; // Create the parent-child connection $parentChildTriple = new RDFtriple( $parentResource, RDFtriple::URI(DB_PROPERTY_NS . encodeLocalName($parentPropertyName), false), RDFtriple::page($templateChildName)); //$result[1][] = $parentChildTriple; */ } } if (count($relatedClasses) > 0) { foreach ($relatedClasses as $relatedClass => $dummy) { $result[1][] = new RDFtriple($parentResource, RDFtriple::URI(RDF_TYPE, false), RDFtriple::URI(DB_ONTOLOGY_NS . $relatedClass, false)); } } else { if ($breadCrumb->getDepth() == 0) { $result[1][] = new RDFtriple($parentResource, RDFtriple::URI(RDF_TYPE, false), RDFtriple::URI(OWL_THING, false)); } } // Add the wiki page uses template triples - but only on depth 0 if ($breadCrumb->getDepth() == 0) { foreach ($result[2] as $name => $dummy) { $result[1][] = new RDFTriple($parentResource, self::$wikiPageUsesTemplateUri, RDFTriple::URI(DB_TEMPLATE_NS . $name, false)); } } $n = count($result[0]) + count($result[1]); $this->log(TRACE, "Generated a total of {$n} triples at {$breadCrumb}"); foreach ($result[0] as $item) { $this->log(TRACE, $item); } foreach ($result[1] as $item) { $this->log(TRACE, $item); } return $result; }