public function extractPage($pageID, $pageTitle, $pageSource) { $result = new ExtractionResult($pageID, $this->language, self::extractorID); $pageID = encodeLocalName($pageID); // Remove Template as this is already extracted by the Infobox Extractor // Find subtemplates and remove Subtemplates, which are listed as ignored! preg_match_all('~\\{((?>[^{}]+)|(?R))*\\}~x', $pageSource, $subTemplates); foreach ($subTemplates[0] as $key => $subTemplate) { $subTemplate = preg_replace("/(^\\{\\{)|(\\}\\}\$)/", "", $subTemplate); // Cut Brackets / {} $pageSource = str_replace('{{' . $subTemplate . '}}', '', $pageSource); } // Extract internal Semantic Links $findSemanticLinks = preg_match_all('/(\\[\\[)([a-zA-z0-9\\- _]+)(::)([^\\]]+)\\]\\]/', $pageSource, $matches, PREG_SET_ORDER); foreach ($matches as $match) { $result->addTriple(RDFtriple::page($pageID), RDFtriple::predicate(encodeLocalName($match[2])), RDFtriple::page($match[4])); } // Extract Literals $findSemanticLinks = preg_match_all('/(\\[\\[)([a-zA-Z\\-_ ]+)(:=)([^\\]]+)\\]\\]/', $pageSource, $matches, PREG_SET_ORDER); foreach ($matches as $match) { $triple = array(); $triple = parseAttributeValue($match[4], $pageID, $match[2]); // object, object_is, datatype(, language) $lexicalForm = $triple[0]; $datatype = $triple[2]; $predicate = propertyToCamelCase(encodeLocalName($match[2])); // Continue if empty String if ($lexicalForm == null) { continue; } $result->addTriple(RDFtriple::page($pageID), RDFTriple::predicate($predicate), RDFtriple::literal($lexicalForm, $datatype, 'en')); } return $result; }
public function extractPage($pageID, $pageTitle, $pageSource) { $result = new ExtractionResult($pageID, $this->language, self::extractorID); $pageID = encodeLocalName($pageID); // Extract Wikipedia Link if (preg_match('/\\{\\{wikipedia\\-c(\\-note)?\\}\\}/', $pageSource)) { $result->addTriple(RDFtriple::page($pageID), RDFtriple::URI("http://www.w3.org/2002/07/owl#sameAs"), RDFtriple::URI("http://dbpedia.org/resource/" . $pageID)); } return $result; }
public function generate($subjectName, $propertyName, $value) { $result = array(); $links = $this->parser->parse($value); foreach ($links as $link) { $link = $this->mediaWikiUtil->toCanonicalWikiCase($link); $link = encodeLocalName($link); $resource = $this->basePath . $link; $result[] = new RDFtriple(RDFtriple::page($subjectName), RDFtriple::URI(DB_ONTOLOGY_NS . $propertyName), RDFtriple::URI($resource)); } return $result; }
/** * Writes the triple + additional information such as language, whether an object is a reference * or a literal and the datatype into a global array ($parseResult) * * @param subject: String containing the triples subject * @param predicate: String containing the triples predicate * @param object: String containing the triples object * @param file: Legacy, should be removed in the future * @param object_is: 'r' if object is a reference, 'l' if object is a literal, 'b' if object is a blanknode * @param dtype: String containing a literals XS D:datatype * @param lang: String containing a literals language * * TODO: Should encodeLocalName be used for the whole URL? Should URI objects be used? * */ function writeTripel($subject, $predicate, $object, $file = 'main', $object_is = 'r', $dtype = NULL, $lang = NULL) { global $parseResult; if ($object_is == 'r' && !URI::validate(encodeLocalName($object))) { return null; } // If $object_is == 'l', encodeLocalName shouldn't be used, the string will be encoded like e.g. \uBC18\uC57C if ($object_is != 'l') { $object = encodeLocalName($object); } $predicate = encodeLocalName($predicate); if (USE_PERCENT_ENCODING) { $predicate = str_replace("%", "_percent_", $predicate); } else { if (ereg("%([A-F0-9]{2})", substr($predicate, -3))) { $predicate .= "_"; } } $parseResult[] = array(encodeLocalName($subject), $predicate, $object, $object_is, $dtype, $lang); }
/** * This function returns two results: * [triples: the generated triples, metaTriples: meta triples 2: the used templates] * * Unfortunately I haven't renamed the variables yet - result * is the array containing the result TRIPLES!! - its not this * 2 element. * */ private function myGenerate(BreadCrumb $breadCrumb, $value) { // result is the array containing: triples, meta triples, used templates $result = array(array(), array(), array()); $rootSubjectUri = RDFTriple::page($breadCrumb->getRoot()); // this array is only relevant on depth 0 $relatedClasses = array(); //$metaTriples = array(); //$usedTemplateNames = array(); // 'parent' means the parent of the value - thus subject and predicate $parentName = $this->breadCrumbTransformer->transform($breadCrumb); $parentResource = RDFTriple::page($parentName); $parentPropertyName = null; $tmp = $breadCrumb->peekTop(0); if (isset($tmp)) { $parentPropertyName = $tmp->getPropertyName(); } // Get all templates on this site, indexed by name // (there may be multiple templates with the same name) $nameToTemplates = SimpleWikiTemplateMatcher::match($value); //print_r($value); //print_r($nameToTemplates); //echo "NOW COMES THE STORM\n"; foreach ($nameToTemplates as $templateName => $templates) { if (strlen($templateName) < 1) { continue; } //echo "GOT TEMPLATE NAME $templateName\n"; $templateName = $this->mediaWikiUtil->toCanonicalWikiCase($templateName); if (!$this->templateNameFilter->doesAccept($templateName)) { continue; } $templateUri = RDFTriple::URI(DB_TEMPLATE_NS . $templateName, false); $result[2][$templateName] = 1; // Get annotations for the template - if there are any $lookupName = "Template:{$templateName}/doc"; if ($breadCrumb->getDepth() == 0) { $ta = $this->templateDb->getTemplateAnnotation($lookupName); // Create the triples for "relatesToClass" // But only for the page itself (not for sub templates) // if no related class exists, default to rdf:type owl:Thing if (isset($ta)) { foreach ($ta->getRelatedClasses() as $item) { $relatedClasses[$item] = 1; } } } foreach ($templates as $templateIndex => $template) { //echo "GOT TEMPLATE INDEX $templateIndex\n"; // Iterate over all arguments $arguments = $template->getArguments(); foreach ($arguments as $argumentName => $values) { //echo "GOT ARGUMENT NAME $argumentName\n"; // propertyNs defaults to DB_PROPERTY_NS unless there // exists a mapping in the templatedb. In that case it will // be set to DB_ONTOLOGY_NS $propertyNs = DB_PROPERTY_NS; $pa = null; if (isset($ta)) { $pas = $ta->getPropertyAnnotations(); if (array_key_exists($argumentName, $pas)) { $pa = $pas[$argumentName]; $propertyNs = DB_ONTOLOGY_NS; } } //print_r($ta); //echo "PROPERTY NS : $lookupName - $argumentName = $propertyNs\n"; // Fake a property mapping if there was none in the db // This maps argumentName back to iteself if (!isset($pa)) { // If there was no mapping we might ignore it // depending on an option (We can prevent this extractor // to generate triples with properties in the // dbp:property namespace // We allow such triples on subResources though. if ($this->allowUnmappedProperties != true && $breadCrumb->getDepth() == 0) { continue; } // If there was no mapping, also rename numeric // argument names (e.g. 1 becomes property1) // this is just cosmetic for the result if (is_numeric($argumentName)) { $argumentName = "property{$argumentName}"; } $pa = new PropertyAnnotation($argumentName); $pa->addMapping(new PropertyMapping($argumentName)); } foreach ($pa->getMappings() as $pm) { $parseHint = $pm->getParseHint(); //echo "Mapping $argumentName : {$pm->getRenamedValue()}\n\n"; // if the renamed value is not set, use the original // name // otherwise use the mapped value if (!isEmptyString($pm->getRenamedValue())) { $argumentName = $pm->getRenamedValue(); } $argumentName = trim($argumentName); //echo "Mapping $argumentName : {$pm->getRenamedValue()}\n\n"; // Skip empty properties // FIXME does that even happen? if (strlen($argumentName) < 1) { continue; } //echo "TN = $templateName, AN = $argumentName\n"; $childBreadcrumb = $breadCrumb->createClone(); $childBreadcrumb->push(new BreadcrumbNode($templateName, $templateIndex, $argumentName)); //$templateChildName = $this->breadcrumbToSubject($childBreadcrumb); $templateChildName = $this->breadCrumbTransformer->transform($childBreadcrumb); // If there is no parse hint we might be able to derive it if (!isset($parseHint)) { $parseHint = $this->deriveParseHintFromName($argumentName); } // Attempt to obtain a triple generator $tripleGenerator = $this->getTripleGenerator($parseHint); // If we DONT have a triple generator // we fall through to default handling $localResult = array(array(), array(), array()); if (isset($tripleGenerator)) { foreach ($values as $valueIndex => $value) { //echo "GOT VALUE $value\n"; $value = trim($value); // Skip empty values if ($value == "") { continue; } //echo "PROCESSING $templateChildName - $argumentName $value\n"; $tmp = $tripleGenerator->generate($templateChildName, $argumentName, $value); $localResult[0] = array_merge($localResult[0], $tmp); //echo "LOCALRESULT\n"; //print_r($localResult[0]); //print_r($triples); //echo "\nSigh\n"; //if(isset($triples)) // $result = array_merge($result, $triples); } // append the generated triples //continue; } else { // No parse hint - default handling // if property date and object an timespan // we extract it with following special case $argumentName = propertyToCamelCase($argumentName); $argumentName = encodeLocalName($argumentName); if (in_array($argumentName, $GLOBALS['W2RCFG']['ignoreProperties'])) { continue; } // turn the argument name into a property name $propertyName = $propertyNs . $argumentName; foreach ($values as $valueIndex => $value) { $value = trim($value); // Skip empty values if ($value == "") { continue; } if ($argumentName == "date") { $value = str_replace("[", "", $value); $value = str_replace("]", "", $value); $value = str_replace("–", "-", $value); } // Parse out sub templates // if something was extracted: // .) connect subject with subsubject // .) indicate usage at wikipage $subResources = $this->myGenerate($childBreadcrumb, $value); for ($i = 0; $i < 3; ++$i) { $localResult[$i] = array_merge($localResult[$i], $subResources[$i]); } //$result = array_merge($result, $triples); //echo "GOT OBJECT $value\n"; $localResult[0] = array_merge($localResult[0], parseAttributeValueWrapper($value, $templateChildName, $propertyName, $this->language)); //$result = array_merge($result, $triples); } } // For each triple add the ExtractedFromTemplate-Annotation // Exclude triples with wikiPageUsesTemplate as predicate though foreach ($localResult[0] as $triple) { $triple->addExtractedFromTemplateAnnotation($templateUri); } // Add on delete cascade annotation if ($breadCrumb->getDepth() > 1) { foreach ($localResult[0] as $triple) { $triple->addOnDeleteCascadeAnnotation($rootSubjectUri); } } // merge the results //for($i = 0; $i < 3; ++$i) // $result[$i] = array_merge($result[$i], $localResult[$i]); //} for ($i = 0; $i < 3; ++$i) { $result[$i] = array_merge($result[$i], $localResult[$i]); } } } /* How to connect a sub-subject to the root subject? if($breadCrumb->getDepth() == 0) continue; // Create the parent-child connection $parentChildTriple = new RDFtriple( $parentResource, RDFtriple::URI(DB_PROPERTY_NS . encodeLocalName($parentPropertyName), false), RDFtriple::page($templateChildName)); //$result[1][] = $parentChildTriple; */ } } if (count($relatedClasses) > 0) { foreach ($relatedClasses as $relatedClass => $dummy) { $result[1][] = new RDFtriple($parentResource, RDFtriple::URI(RDF_TYPE, false), RDFtriple::URI(DB_ONTOLOGY_NS . $relatedClass, false)); } } else { if ($breadCrumb->getDepth() == 0) { $result[1][] = new RDFtriple($parentResource, RDFtriple::URI(RDF_TYPE, false), RDFtriple::URI(OWL_THING, false)); } } // Add the wiki page uses template triples - but only on depth 0 if ($breadCrumb->getDepth() == 0) { foreach ($result[2] as $name => $dummy) { $result[1][] = new RDFTriple($parentResource, self::$wikiPageUsesTemplateUri, RDFTriple::URI(DB_TEMPLATE_NS . $name, false)); } } $n = count($result[0]) + count($result[1]); $this->log(TRACE, "Generated a total of {$n} triples at {$breadCrumb}"); foreach ($result[0] as $item) { $this->log(TRACE, $item); } foreach ($result[1] as $item) { $this->log(TRACE, $item); } return $result; }
/** * Parses internal Links: * - If a Link is found: links to currencies are replaced with the respective symbol, external links are removed * (these are usually references), links to dates are removed (if more than one link was found). * - If only digits and currencies are at the beginning of the String, anything else is removed and the number * is parsed for it's type (int, float, unit) * - In any other cases, where internal links are mixed with text, the function compares the aggregated word-length * of the links, with the length of text items. If the links are longer, the String is parsed as a link list, else * the brackets are removed and the String is recognized as text. * * * */ function catchLinkList(&$o, $s, $p, &$dtype) { // Match for any Link $foundLink = preg_match_all("/(\\[{2})([^\\]]+)(\\]{2})/", $o, $matches); if (!$foundLink) { return false; } // Initialize object-type with literal $object_is = 'l'; // echo "\n$o"; // Test whether property is included in known Linklists and parse Links $knownLinkLists = $GLOBALS['linklistpredicates']; // Remove DBpedia Base URI $propertyName = substr($p, strlen($GLOBALS['W2RCFG']['propertyBase']), strlen($p)); // Compare property-name with known LinkList properties foreach ($knownLinkLists as $linkList) { if ($linkList == $propertyName) { preg_match_all("/(\\[{2})([^\\]]+)(\\]{2})/", $o, $matches); foreach ($matches[2] as $l) { if (strlen($l) > 1) { // Extract internal links of type [[abc|def]] $pos = stripos($l, "|"); if ($pos) { $l = substr($l, 0, $pos); } $object = $GLOBALS['W2RCFG']['wikipediaBase'] . ucwords(encodeLocalName($l)); $object_is = 'r'; writeTripel($s, $p, $object, 'main', $object_is); unset($object); } } return true; } } // $weight: If text is mixed with length, this is the weight assigned to the links // in order to decide whether the composite link/text String is parsed as link-list or text-litearal // any value > 1, gives more weight to links, any value between 0 and 1, morr to the text part $weight = 1.25; // If an internal Link was found: // Replace Links to currencies with the respective Symbol $currencies = array("U.S. (D|d)ollar" => "\$", "United States (D|d)ollar" => "\$", "Dollar" => "\$", "Euro" => "€", "Yen" => "¥", "Pound" => "£"); // $z = str_replace('$','\$',$o); foreach ($currencies as $key => $currency) { // Do not match real Links to currencies e.g. United_States: currency = [[United States Dollar]] ($) if (preg_match('/^\\s*\\[{2}' . $key . '\\s?\\|?[^\\]]*\\]{2}[\\(\\s ]*' . $currency . '[\\)\\s]*$/', $o)) { break; } $o = trim(preg_replace('/(^[^' . $currency . ']*)(\\[{2}' . $key . '\\s?\\|?[^\\]]*\\]{2})/', '\\1' . $currency, $o)); // Old Version // $o = trim(preg_replace("/\[{2}".$key."\s?\|?[^\]]*\]{2}(^$)/",$currency,$o)); } // Remove External Links (these are usually references) $o = trim(preg_replace("/\\[http:\\/\\/[^\\]]+\\]/", "", $o)); // Remove any Links between parentheses // Remove links in parentheses. Bug: Destroys Links with "()" inside an internal Link. e.g. Boris_Becker: birthplace $o = trim(preg_replace("/\\([^\\[\\]]*\\[{2}[^\\)\\]]*\\]{2}[^\\)]*\\)/", "", $o)); // If Link is a Date and more than one Link was found, remove Link if ($foundLink > 1) { $months = array("January", "February", "March", "April", "May", "June", "July", "August", "September", "November", "December"); foreach ($months as $month) { // $o = trim(preg_replace("/\[{2}$month [0-9]{1,2}\]{2},?[\s]*,?/","",$o)); $o = trim(preg_replace("/\\[{2}{$month} [0-9]{1,2}\\]{2},?[\\s]*(,?[\\s]*(\\(?\\[{2}|\\()[0-9]{4}(\\]{2}\\)?|\\)))?/", "", $o)); } // If Link is a year, remove Link $o = trim(preg_replace("/\\(?\\[{1,2}[0-9]{4}\\]{1,2}\\)?/", "", $o)); } // Initialize ResultString $resultstring = ""; // String begins with Text and is followed by one or more Links if (preg_match("/^([^\\[]+)(\\[{2})*/", $o, $stringStart)) { // String ends with a Link -> this means String is like: "abc [[def | jjj ]] ghi [[ xyz ]]" (Problem "abc [[def]][[xyz]]") if (preg_match("/\\]{2}\$/", $o)) { // Match Text, followed by a Link $found = preg_match_all("/([^\\[\\]]+)(\\[{2})([^\\]]+)(\\]{2})/", $o, $matches); $linkPos = 3; // Position of Links in $matches $textPos = 1; // Position of Text in $matches // String ends with Text -> this means String is like: "abc [[def | jjj ]] ghi [[ xyz ]] klm" } else { // Initialize ResultString with "abc " $resultstring = $stringStart[1]; // Match Link, followed by Text $found = preg_match_all("/(\\[{2})([^\\]]+)(\\]{2})([^\\[]+)/", $o, $matches); $linkPos = 2; // Position of Links in $matches $textPos = 4; // Position of Text in $matches } // String starts with numbers and/or currency Symbols if (preg_match("/(^[\\s]*([0-9\$€£¥]+[\\.,][0-9\$€£¥]+|[0-9\$€£¥]+)[\\s]*(((B|b)illion)?|((M|m)illion)?|((T|t)rillion)?|((Q|q)uadrillion)?))(.*)/", $o, $numberMatch) && strlen(trim($numberMatch[1])) > 2 || preg_match("/^([0-9\$€£¥]+[\\.,][0-9\$€£¥]+|[0-9\$€£¥]+)([\\s]*\$)/", $o)) { // CodeBlock for parsing Numbers // Remove any remaining Links // $o = preg_replace("/\[{2}[^\]]+\]{2}/","",$o); // echo "\n$o"; // Read Links to numbers, e.g., BMW: revenue => € 4.9 [[10000000 (number)| billion]] if (preg_match("/(^[\\s]*([0-9\$€£¥]+[\\.,][0-9\$€£¥]+|[0-9\$€£¥]+)[\\s]*)(\\[{2}[^\\]\\|]*\\(number\\)[^\\]\\|]*\\|)([^\\]]+)(\\]{2})(.*\$)/", $o, $numberMatch)) { $o = trim($numberMatch[1]) . " " . trim($numberMatch[4]); } else { if ($startPos = strpos($o, "[[")) { // Remove anything after first Link $o = substr($o, 0, $startPos); } } // Remove year: e.g. p:revenue = "22 billion $ (2004) => "22 billion $"; if (preg_match("/^[\\s]*[0-9\$€£¥]+[^\\(]+\\([0-9]{4}\\).*/", $o)) { $o = trim(preg_replace("/\\([0-9]{4}\\)/", "", $o)); } $o = trim($o); // echo "\n parsing for values $o"; list($o, $o_is, $dtype, $lang) = parseAttributeValue($o, $s, $p); if ($o !== NULL) { writeTripel($s, $p, $o, 'main', $o_is, $dtype, $lang); } return true; } else { // Calculate aggregate length of text and Links $lengthLink = 0; $lengthText = strlen($resultstring); foreach ($matches[$linkPos] as $match) { if ($pos = strpos($match, "|")) { $lengthLink += strlen(preg_replace("/\\s/", "", substr($match, $pos, strlen($match) - $pos))); } else { $lengthLink += strlen(preg_replace("/\\s/", "", $match)); } } foreach ($matches[$textPos] as $match) { $lengthText += strlen(preg_replace("/\\s/", "", $match)); } // compare aggregated length of links and literals ($weight is defined at the top of this function) if ($weight * $lengthLink >= $lengthText) { // CodeBlock for Start with Text and more Links than Text // echo "Start with Text: more Links ($lengthLink,$lengthText)"; foreach ($matches[$linkPos] as $l) { if (strlen($l) > 1) { // Extract internal links of type [[abc|def]] $pos = stripos($l, "|"); if ($pos) { $l = substr($l, 0, $pos); } $object = $GLOBALS['W2RCFG']['wikipediaBase'] . ucwords(encodeLocalName($l)); $object_is = 'r'; writeTripel($s, $p, $object, 'main', $object_is); unset($object); } } return true; } else { // CodeBlock for Start with Text and more Text than Links // echo "Start with Text: more Text ($lengthLink,$lengthText) ($o)"; // Replace Links with their Labels $o = preg_replace_callback("/(\\[{2}[^\\|^\\]]+)(\\|)([^\\]]+)(\\]{2})/", 'getLabelForLink', $o); // Replace simple links with their link-text $o = preg_replace("/\\[{2}|\\]{2}/", "", $o); writeTripel($s, $p, $o, 'main', $object_is); return true; } } // String begins with Links and is followed by Text or Links) } else { if (preg_match("/^(\\[{2})([^\\]]+)(\\]{2})/", $o, $stringStart)) { // String ends with a Link -> this means String is like: "[[def | jjj ]] ghi [[ xyz ]]" if (preg_match("/\\]{2}\$/", $o)) { // Initialize ResultString with "[[def | jjj ]]" $resultstring = $stringStart[2]; // Match Text, followed by a Link $found = preg_match_all("/([^\\[\\]]+)(\\[{2})([^\\]]+)(\\]{2})/", $o, $matches); $linkPos = 3; // Position of Links in $matches $textPos = 1; // Position of Text in $matches // String ends with Text -> this means String is like: "[[def | jjj ]] ghi [[ xyz ]] klm" } else { // Match Link, followed by Text $found = preg_match_all("/(\\[{2})([^\\]]+)(\\]{2})([^\\[]+)/", $o, $matches); $linkPos = 2; // Position of Links in $matches $textPos = 4; // Position of Text in $matches } // String is composed only of Links -> this means String is like: "[[abc]][[def]]" if (!$found) { // CodeBlock for returning only Links $found = preg_match_all("/(\\[{2})([^\\]]+)(\\]{2})/", $o, $matches); foreach ($matches[2] as $l) { if (strlen($l) > 1) { // Extract internal links of type [[abc|def]] $pos = stripos($l, "|"); if ($pos) { $l = substr($l, 0, $pos); } $object = $GLOBALS['W2RCFG']['wikipediaBase'] . ucwords(encodeLocalName($l)); $object_is = 'r'; writeTripel($s, $p, $object, 'main', $object_is); unset($object); } } return true; } else { // Calculate aggregate length of text and Links // If String starts and ends with Link, add length of first Link $lengthLink = strlen($resultstring); if ($lengthLink > 0) { // If first Links of type [[abc | def]] only count "def" if ($pos = strpos($resultstring, "|")) { $lengthLink += strlen(preg_replace("/\\s/", "", substr($resultstring, $pos, strlen($resultstring) - $pos))); } } $lengthText = 0; // add length of current link (in $matches) to aggregate length (if link is like [[abc|def]], only def counts) foreach ($matches[$linkPos] as $match) { if ($pos = strpos($match, "|")) { $lengthLink += strlen(preg_replace("/\\s/", "", substr($match, $pos, strlen($match) - $pos))); } else { $lengthLink += strlen(preg_replace("/\\s/", "", $match)); } } // add length of literals to aggregate text-length foreach ($matches[$textPos] as $match) { $lengthText += strlen(preg_replace("/\\s/", "", $match)); } // compare aggregated length of links and literals ($weight is defined at the top of this function) if ($weight * $lengthLink >= $lengthText) { // CodeBlock for Start with Link and more Links than Text // echo "Start with Link: more Links ($lengthLink,$lengthText) ($o)"; if (strlen($resultstring) > 1) { array_unshift($matches[$linkPos], $resultstring); } foreach ($matches[$linkPos] as $l) { if (strlen($l) > 1) { // Extract internal links of type [[abc|def]] $pos = stripos($l, "|"); if ($pos) { $l = substr($l, 0, $pos); } $object = $GLOBALS['W2RCFG']['wikipediaBase'] . ucwords(encodeLocalName($l)); $object_is = 'r'; writeTripel($s, $p, $object, 'main', $object_is); unset($object); } } return true; } else { // CodeBlock for Start with Link and more Text than Links // echo "Start with Link: more Text ($lengthLink,$lengthText) ($o)"; // Replace Links with their Labels $o = preg_replace_callback("/(\\[{2}[^\\|^\\]]+)(\\|)([^\\]]+)(\\]{2})/", 'getLabelForLink', $o); // Replace simple links with their link-text $o = preg_replace("/\\[{2}|\\]{2}/", "", $o); writeTripel($s, $p, $o, 'main', $object_is); return true; } } } } }
function printexplicitTyping($name, $filename, $name_is, $object_is = 'n') { static $namearray = array(); static $predicatetypearray = array(); if ($name_is == 'c') { $save = $name . ':Cat'; } if ($name_is == 't') { $save = $name . ':Temp'; } if ($name_is == 'p') { $save = $name . ':Pred'; } if (!arrayMultiSearch($save, $namearray[$filename])) { $namearray[$filename][] = $save; if ($object_is != 'n') { $predicatetypearray[$save][$filename]['is'] = $object_is; } $filedecisionTemplate = $GLOBALS['rdftypeProperty'] != $GLOBALS['W2RCFG']['templateProperty'] ? 'main' : 'type'; $filedecisionCategory = $GLOBALS['rdftypeProperty'] != $GLOBALS['W2RCFG']['categoryProperty'] ? 'main' : 'type'; if ($name_is == 'c' && $filedecisionCategory == 'type') { writeTripel($name, $GLOBALS['W2RCFG']['categoryProperty'], $GLOBALS['W2RCFG']['classBase'], 'type'); } if ($name_is == 'c' && $filedecisionCategory == 'main') { printexplicitTyping($GLOBALS['W2RCFG']['categoryProperty'], $filename, 'p', 'r'); } if ($name_is == 't' && $filedecisionTemplate == 'type') { writeTripel($GLOBALS['W2RCFG']['wikipediaBase'] . $GLOBALS['templateLabel'] . ':' . encodeLocalName($name), $GLOBALS['rdftypeProperty'], $GLOBALS['W2RCFG']['classBase'], 'type'); } if ($name_is == 't' && $filedecisionTemplate == 'main') { printexplicitTyping($GLOBALS['W2RCFG']['templateProperty'], $filename, 'p', 'r'); } if ($name_is == 'p') { writeTripel($name, $GLOBALS['rdftypeProperty'], $object_is == 'l' ? $GLOBALS['W2RCFG']['datatypePropertyBase'] : $GLOBALS['W2RCFG']['objectPropertyBase'], 'type'); } return; } else { return $predicatetypearray[$save][$filename]['is']; } }