/** * This method will be called before an article is displayed or previewed. * For display and preview we strip out the semantic properties and append them * at the end of the article. * * @param Parser $parser * @param string $text */ static public function onInternalParseBeforeLinks( &$parser, &$text ) { global $smwgStoreAnnotations, $smwgLinksInValues; SMWParseData::stripMagicWords( $text, $parser ); // Store the results if enabled (we have to parse them in any case, // in order to clean the wiki source for further processing). $smwgStoreAnnotations = smwfIsSemanticsProcessed( $parser->getTitle()->getNamespace() ); SMWParserExtensions::$mTempStoreAnnotations = true; // used for [[SMW::on]] and [[SMW:off]] // Process redirects, if any (it seems that there is indeed no more direct way of getting this info from MW) if ( $smwgStoreAnnotations ) { $rt = Title::newFromRedirect( $text ); if ( !is_null( $rt ) ) { $p = new SMWDIProperty( '_REDI' ); $di = SMWDIWikiPage::newFromTitle( $rt, '__red' ); SMWParseData::getSMWData( $parser )->addPropertyObjectValue( $p, $di ); } } // only used in subsequent callbacks, forgotten afterwards SMWParserExtensions::$mTempParser = $parser; // In the regexp matches below, leading ':' escapes the markup, as known for Categories. // Parse links to extract semantic properties. if ( $smwgLinksInValues ) { // More complex regexp -- lib PCRE may cause segfaults if text is long :-( $semanticLinkPattern = '/\[\[ # Beginning of the link (?:([^:][^]]*):[=:])+ # Property name (or a list of those) ( # After that: (?:[^|\[\]] # either normal text (without |, [ or ]) |\[\[[^]]*\]\] # or a [[link]] |\[[^]]*\] # or an [external link] )*) # all this zero or more times (?:\|([^]]*))? # Display text (like "text" in [[link|text]]), optional \]\] # End of link /xu'; $text = preg_replace_callback( $semanticLinkPattern, array( 'SMWParserExtensions', 'parsePropertiesCallback' ), $text ); } else { // Simpler regexps -- no segfaults found for those, but no links in values. $semanticLinkPattern = '/\[\[ # Beginning of the link (?:([^:][^]]*):[=:])+ # Property name (or a list of those) ([^\[\]]*) # content: anything but [, |, ] \]\] # End of link /xu'; $text = preg_replace_callback( $semanticLinkPattern, array( 'SMWParserExtensions', 'simpleParsePropertiesCallback' ), $text ); } // Add link to RDF to HTML header. // TODO: do escaping via Html or Xml class. SMWOutputs::requireHeadItem( 'smw_rdf', '<link rel="alternate" type="application/rdf+xml" title="' . htmlspecialchars( $parser->getTitle()->getPrefixedText() ) . '" href="' . htmlspecialchars( SpecialPage::getTitleFor( 'ExportRDF', $parser->getTitle()->getPrefixedText() )->getLocalUrl( 'xmlmime=rdf' ) ) . "\" />" ); SMWOutputs::commitToParser( $parser ); return true; // always return true, in order not to stop MW's hook processing! }
/** * @since 2.0 made protected; use printAllToFile or printAllToOutput */ protected function printAll($ns_restriction = false, $delay, $delayeach) { $linkCache = LinkCache::singleton(); $db = wfGetDB(DB_SLAVE); $this->delay_flush = 10; $this->serializer->startSerialization(); $this->serializer->serializeExpData(SMWExporter::getInstance()->getOntologyExpData('')); $end = $db->selectField('page', 'max(page_id)', false, __METHOD__); $a_count = 0; // DEBUG $d_count = 0; // DEBUG $delaycount = $delayeach; for ($id = 1; $id <= $end; $id += 1) { $title = Title::newFromID($id); if (is_null($title) || !smwfIsSemanticsProcessed($title->getNamespace())) { continue; } if (!self::fitsNsRestriction($ns_restriction, $title->getNamespace())) { continue; } $a_count += 1; // DEBUG $diPage = SMWDIWikiPage::newFromTitle($title); $this->queuePage($diPage, 1); while (count($this->element_queue) > 0) { $diPage = reset($this->element_queue); $this->serializePage($diPage, $diPage->recdepth); // resolve dependencies that will otherwise not be printed foreach ($this->element_queue as $key => $diaux) { if (!smwfIsSemanticsProcessed($diaux->getNamespace()) || !self::fitsNsRestriction($ns_restriction, $diaux->getNamespace())) { // Note: we do not need to check the cache to guess if an element was already // printed. If so, it would not be included in the queue in the first place. $d_count += 1; // DEBUG } else { // don't carry values that you do not want to export (yet) unset($this->element_queue[$key]); } } // sleep each $delaycount for $delay µs to be nice to the server if ($delaycount-- < 0 && $delayeach != 0) { usleep($delay); $delaycount = $delayeach; } } $this->flush(); $linkCache->clear(); } $this->serializer->finishSerialization(); $this->flush(true); }
/** * @see smwfIsSemanticsProcessed * * FIXME Delete this test in 1.11 * * @since 1.9 */ public function testSmwfIsSemanticsProcessed() { $result = smwfIsSemanticsProcessed(NS_MAIN); $this->assertInternalType('boolean', $result); $this->assertTrue($result); }
/** * This function takes care of storing the collected semantic data and takes * care of clearing out any outdated entries for the processed page. It assume that * parsing has happened and that all relevant data is contained in the provided parser * output. * * Optionally, this function also takes care of triggering indirect updates that might be * needed for overall database consistency. If the saved page describes a property or data type, * the method checks whether the property type, the data type, the allowed values, or the * conversion factors have changed. If so, it triggers SMWUpdateJobs for the relevant articles, * which then asynchronously update the semantic data in the database. * * @param $parseroutput ParserOutput object that contains the results of parsing which will * be stored. * @param $title Title object specifying the page that should be saved. * @param $makejobs Bool stating whether jobs should be created to trigger further updates if * this appears to be necessary after this update. * * @todo FIXME: Some job generations here might create too many jobs at once on a large wiki. Use incremental jobs instead. */ public static function storeData($parseroutput, Title $title, $makejobs = true) { global $smwgEnableUpdateJobs, $smwgDeclarationProperties, $smwgPageSpecialProperties; $semdata = $parseroutput->mSMWData; $namespace = $title->getNamespace(); $processSemantics = smwfIsSemanticsProcessed($namespace); if (!isset($semdata)) { // no data at all? $semdata = new SMWSemanticData(SMWDIWikiPage::newFromTitle($title)); } if ($processSemantics) { $props = array(); foreach ($smwgPageSpecialProperties as $propId) { // Do not calculate the same property again. if (array_key_exists($propId, $props)) { continue; } // Remember the property is processed. $props[$propId] = true; $prop = new SMWDIProperty($propId); if (count($semdata->getPropertyValues($prop)) > 0) { continue; } // Calculate property value. $value = null; switch ($propId) { case '_MDAT': $timestamp = Revision::getTimeStampFromID($title, $title->getLatestRevID()); $value = self::getDataItemFromMWTimestamp($timestamp); break; case '_CDAT': $timestamp = $title->getFirstRevision()->getTimestamp(); $value = self::getDataItemFromMWTimestamp($timestamp); break; case '_NEWP': $value = new SMWDIBoolean($title->isNewPage()); break; case '_LEDT': $revision = Revision::newFromId($title->getLatestRevID()); $user = User::newFromId($revision->getUser()); $value = SMWDIWikiPage::newFromTitle($user->getUserPage()); break; } if (!is_null($value)) { $semdata->addPropertyObjectValue($prop, $value); } // Issue error or warning? } // foreach } else { // data found, but do all operations as if it was empty $semdata = new SMWSemanticData($semdata->getSubject()); } // Check if the semantic data has been changed. // Sets the updateflag to true if so. // Careful: storage access must happen *before* the storage update; // even finding uses of a property fails after its type was changed. $updatejobflag = false; $jobs = array(); if ($makejobs && $smwgEnableUpdateJobs && $namespace == SMW_NS_PROPERTY) { // If it is a property, then we need to check if the type or the allowed values have been changed. $ptype = new SMWDIProperty('_TYPE'); $oldtype = smwfGetStore()->getPropertyValues($semdata->getSubject(), $ptype); $newtype = $semdata->getPropertyValues($ptype); if (!self::equalDatavalues($oldtype, $newtype)) { $updatejobflag = true; } else { foreach ($smwgDeclarationProperties as $prop) { $pv = new SMWDIProperty($prop); $oldvalues = smwfGetStore()->getPropertyValues($semdata->getSubject(), $pv); $newvalues = $semdata->getPropertyValues($pv); $updatejobflag = !self::equalDatavalues($oldvalues, $newvalues); } } if ($updatejobflag) { $prop = new SMWDIProperty($title->getDBkey()); $subjects = smwfGetStore()->getAllPropertySubjects($prop); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } wfRunHooks('smwUpdatePropertySubjects', array(&$jobs)); $subjects = smwfGetStore()->getPropertySubjects(new SMWDIProperty('_ERRP'), $semdata->getSubject()); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } } } elseif ($makejobs && $smwgEnableUpdateJobs && $namespace == SMW_NS_TYPE) { // if it is a type we need to check if the conversion factors have been changed $pconv = new SMWDIProperty('_CONV'); $ptype = new SMWDIProperty('_TYPE'); $oldfactors = smwfGetStore()->getPropertyValues($semdata->getSubject(), $pconv); $newfactors = $semdata->getPropertyValues($pconv); $updatejobflag = !self::equalDatavalues($oldfactors, $newfactors); if ($updatejobflag) { $store = smwfGetStore(); /// FIXME: this will kill large wikis! Use incremental updates! $dv = SMWDataValueFactory::newTypeIdValue('__typ', $title->getDBkey()); $proppages = $store->getPropertySubjects($ptype, $dv); foreach ($proppages as $proppage) { $propertyTitle = $proppage->getTitle(); if (!is_null($propertyTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $propertyTitle); $jobs[] = $task; // wikia change end } $prop = new SMWDIProperty($proppage->getDBkey()); $subjects = $store->getAllPropertySubjects($prop); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } $subjects = smwfGetStore()->getPropertySubjects(new SMWDIProperty('_ERRP'), $prop->getWikiPageValue()); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } } } } // Actually store semantic data, or at least clear it if needed if ($processSemantics) { smwfGetStore()->updateData($semdata); } else { smwfGetStore()->clearData($semdata->getSubject()); } // Finally trigger relevant Updatejobs if necessary if ($updatejobflag) { // wikia change start - jobqueue migration \Wikia\Tasks\Tasks\BaseTask::batch($jobs); // wikia change end } return true; }
$doit = true; } if ($properties && $ns == SMW_NS_PROPERTY) { $doit = true; } if ($types && $ns == SMW_NS_TYPE) { $doit = true; } if (!$doit) { continue; } } if ($verbose) { print "({$num_files}) Processing page with ID " . $id . " ...\n"; } if (smwfIsSemanticsProcessed($title->getNamespace())) { $revision = Revision::newFromTitle($title); if ($revision === NULL) { continue; } $wgParser->parse($revision->getText(), $title, $options, true, true, $revision->getID()); SMWFactbox::storeData($title, true); // sleep to be nice to the server if ($delay !== false && ($num_files + 1) % 100 === 0) { usleep($delay); } } else { smwfGetStore()->deleteSubject($title); // sleep to be nice to the server // (for this case, sleep only every 1000 pages, so that large chunks of e.g. messages are processed more quickly) if ($delay !== false && ($num_files + 1) % 1000 === 0) {