/** * This function takes care of storing the collected semantic data and takes * care of clearing out any outdated entries for the processed page. It assume that * parsing has happened and that all relevant data is contained in the provided parser * output. * * Optionally, this function also takes care of triggering indirect updates that might be * needed for overall database consistency. If the saved page describes a property or data type, * the method checks whether the property type, the data type, the allowed values, or the * conversion factors have changed. If so, it triggers SMWUpdateJobs for the relevant articles, * which then asynchronously update the semantic data in the database. * * @param $parseroutput ParserOutput object that contains the results of parsing which will * be stored. * @param $title Title object specifying the page that should be saved. * @param $makejobs Bool stating whether jobs should be created to trigger further updates if * this appears to be necessary after this update. * * @todo FIXME: Some job generations here might create too many jobs at once on a large wiki. Use incremental jobs instead. */ public static function storeData($parseroutput, Title $title, $makejobs = true) { global $smwgEnableUpdateJobs, $smwgDeclarationProperties, $smwgPageSpecialProperties; $semdata = $parseroutput->mSMWData; $namespace = $title->getNamespace(); $processSemantics = smwfIsSemanticsProcessed($namespace); if (!isset($semdata)) { // no data at all? $semdata = new SMWSemanticData(SMWDIWikiPage::newFromTitle($title)); } if ($processSemantics) { $props = array(); foreach ($smwgPageSpecialProperties as $propId) { // Do not calculate the same property again. if (array_key_exists($propId, $props)) { continue; } // Remember the property is processed. $props[$propId] = true; $prop = new SMWDIProperty($propId); if (count($semdata->getPropertyValues($prop)) > 0) { continue; } // Calculate property value. $value = null; switch ($propId) { case '_MDAT': $timestamp = Revision::getTimeStampFromID($title, $title->getLatestRevID()); $value = self::getDataItemFromMWTimestamp($timestamp); break; case '_CDAT': $timestamp = $title->getFirstRevision()->getTimestamp(); $value = self::getDataItemFromMWTimestamp($timestamp); break; case '_NEWP': $value = new SMWDIBoolean($title->isNewPage()); break; case '_LEDT': $revision = Revision::newFromId($title->getLatestRevID()); $user = User::newFromId($revision->getUser()); $value = SMWDIWikiPage::newFromTitle($user->getUserPage()); break; } if (!is_null($value)) { $semdata->addPropertyObjectValue($prop, $value); } // Issue error or warning? } // foreach } else { // data found, but do all operations as if it was empty $semdata = new SMWSemanticData($semdata->getSubject()); } // Check if the semantic data has been changed. // Sets the updateflag to true if so. // Careful: storage access must happen *before* the storage update; // even finding uses of a property fails after its type was changed. $updatejobflag = false; $jobs = array(); if ($makejobs && $smwgEnableUpdateJobs && $namespace == SMW_NS_PROPERTY) { // If it is a property, then we need to check if the type or the allowed values have been changed. $ptype = new SMWDIProperty('_TYPE'); $oldtype = smwfGetStore()->getPropertyValues($semdata->getSubject(), $ptype); $newtype = $semdata->getPropertyValues($ptype); if (!self::equalDatavalues($oldtype, $newtype)) { $updatejobflag = true; } else { foreach ($smwgDeclarationProperties as $prop) { $pv = new SMWDIProperty($prop); $oldvalues = smwfGetStore()->getPropertyValues($semdata->getSubject(), $pv); $newvalues = $semdata->getPropertyValues($pv); $updatejobflag = !self::equalDatavalues($oldvalues, $newvalues); } } if ($updatejobflag) { $prop = new SMWDIProperty($title->getDBkey()); $subjects = smwfGetStore()->getAllPropertySubjects($prop); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } wfRunHooks('smwUpdatePropertySubjects', array(&$jobs)); $subjects = smwfGetStore()->getPropertySubjects(new SMWDIProperty('_ERRP'), $semdata->getSubject()); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } } } elseif ($makejobs && $smwgEnableUpdateJobs && $namespace == SMW_NS_TYPE) { // if it is a type we need to check if the conversion factors have been changed $pconv = new SMWDIProperty('_CONV'); $ptype = new SMWDIProperty('_TYPE'); $oldfactors = smwfGetStore()->getPropertyValues($semdata->getSubject(), $pconv); $newfactors = $semdata->getPropertyValues($pconv); $updatejobflag = !self::equalDatavalues($oldfactors, $newfactors); if ($updatejobflag) { $store = smwfGetStore(); /// FIXME: this will kill large wikis! Use incremental updates! $dv = SMWDataValueFactory::newTypeIdValue('__typ', $title->getDBkey()); $proppages = $store->getPropertySubjects($ptype, $dv); foreach ($proppages as $proppage) { $propertyTitle = $proppage->getTitle(); if (!is_null($propertyTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $propertyTitle); $jobs[] = $task; // wikia change end } $prop = new SMWDIProperty($proppage->getDBkey()); $subjects = $store->getAllPropertySubjects($prop); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } $subjects = smwfGetStore()->getPropertySubjects(new SMWDIProperty('_ERRP'), $prop->getWikiPageValue()); foreach ($subjects as $subject) { $subjectTitle = $subject->getTitle(); if (!is_null($subjectTitle)) { // wikia change start - jobqueue migration $task = new \Wikia\Tasks\Tasks\JobWrapperTask(); $task->call('SMWUpdateJob', $subjectTitle); $jobs[] = $task; // wikia change end } } } } } // Actually store semantic data, or at least clear it if needed if ($processSemantics) { smwfGetStore()->updateData($semdata); } else { smwfGetStore()->clearData($semdata->getSubject()); } // Finally trigger relevant Updatejobs if necessary if ($updatejobflag) { // wikia change start - jobqueue migration \Wikia\Tasks\Tasks\BaseTask::batch($jobs); // wikia change end } return true; }
/** * Main function, that takes an array of RDFIOWikiPage objects, and writes to * MediaWiki using the WikiObjectModel extension. * @param array $wikiPages */ public function import($wikiPages) { global $wgOut; foreach ($wikiPages as $wikiTitle => $wikiPage) { // Get properties, categories, templates and related data from the page $newWikiContent = ""; $mwTitleObj = Title::newFromText($wikiTitle); // If page exists, get its data $titleIsObj = is_object($mwTitleObj); $titleExists = $mwTitleObj->exists(); $newTemplateCalls = null; if ($titleIsObj && $titleExists) { $mwPageObj = WikiPage::factory($mwTitleObj); $oldWikiContent = $mwPageObj->getText(); $mwProperties = array(); $mwCategories = array(); $mwTemplates = array(); preg_match('/^\\s?$/', $oldWikiContent, $isBlank); // Find all the properties stored in the conventional way within the page preg_match_all('/\\[\\[(.*)::(.*)\\]\\]/', $oldWikiContent, $propertyMatches); $propertyWikitextInPage = $propertyMatches[0]; $propertyNameInPage = $propertyMatches[1]; $propertyValueInPage = $propertyMatches[2]; foreach ($propertyNameInPage as $index => $propertyName) { $mwProperties[$propertyName] = array('value' => $propertyValueInPage[$index], 'wikitext' => $propertyWikitextInPage[$index]); } // Find all the categories, in the same way preg_match_all('/\\[\\[Category:(.*)\\]\\]/', $oldWikiContent, $categoryMatches); $categoryWikitextInPage = $categoryMatches[0]; $categoryNameInPage = $categoryMatches[1]; foreach ($categoryNameInPage as $index => $categoryName) { $mwCategories[$categoryName] = array('wikitext' => $categoryWikitextInPage[$index]); } // Find all the templates preg_match_all('/\\{\\{\\s?([^#][a-zA-Z0-9]+)\\s?\\|(.*)\\}\\}/U', $oldWikiContent, $templateMatches); $templateCallInPage = $templateMatches[0]; $templateNameInPage = $templateMatches[1]; $templateParamsInPage = $templateMatches[2]; foreach ($templateNameInPage as $index => $templateName) { $mwTemplates[$templateName]['templateCallText'] = $templateCallInPage[$index]; $mwTemplates[$templateName]['templateParamsValues'] = $templateParamsInPage[$index]; } if (!empty($isBlank)) { $newTemplates = $this->getTemplatesForCategories($wikiPage); foreach ($newTemplates as $name => $callText) { $mwTemplates[$name]['templateCallText'] = $callText; $newTemplateCalls .= $callText . "\n"; } } if (!empty($mwTemplates)) { // Extract the wikitext from each template foreach ($mwTemplates as $templateName => $array) { $mwTemplatePageTitle = Title::newFromText($templateName, $defaultNamespace = NS_TEMPLATE); $mwTemplateObj = WikiPage::factory($mwTemplatePageTitle); $mwTemplateText = $mwTemplateObj->getText(); $mwTemplates[$templateName]['wikitext'] = $mwTemplateText; // Get the properties and parameter names used in the templates preg_match_all('/\\[\\[(.*)::\\{\\{\\{(.*)\\|?\\}\\}\\}\\]\\]/', $mwTemplateText, $templateParameterMatches); $propertyNameInTemplate = $templateParameterMatches[1]; $parameterNameInTemplate = $templateParameterMatches[2]; foreach ($parameterNameInTemplate as $index => $templateParameter) { // Store parameter-property pairings both ways round for easy lookup $mwTemplates[$templateName]['parameters'][$templateParameter]['property'] = $propertyNameInTemplate[$index]; $mwTemplates[$templateName]['properties'][$propertyNameInTemplate[$index]] = $parameterNameInTemplate[$index]; } $hasTemplateParams = array_key_exists('templateParamsValues', $mwTemplates[$templateName]); // Get the parameter values used in the templates if ($hasTemplateParams) { $templateParameterValues = explode('|', $mwTemplates[$templateName]['templateParamsValues']); foreach ($templateParameterValues as $paramPair) { $paramValueArray = explode('=', $paramPair); $paramName = $paramValueArray[0]; $paramValue = $paramValueArray[1]; $mwTemplates[$templateName]['parameters'][$paramName]['value'] = $paramValue; } } } } // put existing template calls into an array for updating more than one fact foreach ($mwTemplates as $name => $array) { $updatedTemplateCalls[$name] = $array['templateCallText']; } } $newWikiContent = $oldWikiContent; // using new variable to separate extraction from editing if (!$titleExists) { // if page doesn't exist, check for categories in the wikipage data, and add an empty template call to the page wikitext $newTemplates = $this->getTemplatesForCategories($wikiPage); foreach ($newTemplates as $name => $callText) { $mwTemplates[$name]['templateCallText'] = $callText; $newTemplateCalls .= $callText . "\n"; } } if ($newTemplateCalls) { $newWikiContent .= $newTemplateCalls; } // Add categories to the wiki text // The new wikitext is actually added to the page at the end. // This allows us to add a template call associated with the category and then populate it with parameters in the facts section $newCategoriesAsWikiText = "\n"; foreach ($wikiPage->getCategories() as $category) { $categoryTitle = Title::newFromText($category, $defaultNamespace = NS_CATEGORY); $categoryTitleWikified = $categoryTitle->getText(); if (!array_key_exists($categoryTitleWikified, $mwCategories)) { $newCategoriesAsWikiText .= '[[Category:' . $categoryTitleWikified . "]]\n"; // Is there an inbuilt class method to do this? Can't find one in Category. } } // Add facts (properties) to the wiki text $newPropertiesAsWikiText = "\n"; foreach ($wikiPage->getFacts() as $fact) { $pred = $fact['p']; $obj = $fact['o']; $predTitle = Title::newFromText($pred); $predTitleWikified = $predTitle->getText(); $isEquivURI = strpos($pred, "Equivalent URI") !== false; $hasLocalUrl = strpos($obj, "Special:URIResolver") !== false; $templatesWithProperty = array(); $isInTemplate = null; // Find whether the property is in any template(s) on the page if (!empty($mwTemplates)) { foreach ($mwTemplates as $templateName => $array) { $isInTemplate = array_key_exists($predTitleWikified, $mwTemplates[$templateName]['properties']); if ($isInTemplate && !in_array($templateName, $templatesWithProperty)) { $templatesWithProperty[] = $templateName; } } } $isInPage = array_key_exists($predTitleWikified, $mwProperties); // Set new value - this will be used in different ways depending on whether property is inside or outside template if ($isEquivURI) { // FIXME: Should be done for all "URL type" facts, not just // Equivalent URI:s // Since this is a URL, it should not be made into a WikiTitle $newSMWValue = SMWDataValueFactory::newTypeIdValue('_uri', $obj); } else { // Create an updated property $objTitle = Title::newFromText($obj); $newSMWValue = SMWWikiPageValue::makePageFromTitle($objTitle); } $newValueText = $newSMWValue->getWikiValue(); // Handle updating differently depending on whether property exists in/outside template if ($hasLocalUrl && $isEquivURI) { // Don't update Equivalent URI if the URL is a local URL (thus containing // "Special:URIResolver"). } else { if ($isInTemplate) { // Code to update/add property to template call(s) foreach ($templatesWithProperty as $index => $templateName) { $oldTemplateCall = $updatedTemplateCalls[$templateName]; // use temp value as may be updated more than once $parameter = $mwTemplates[$templateName]['properties'][$predTitleWikified]; $oldValue = null; $hasOldValue = array_key_exists('value', $mwTemplates[$templateName]['parameters'][$parameter]); if ($hasOldValue) { $oldValue = $mwTemplates[$templateName]['parameters'][$parameter]['value']; } $newParamValueText = $parameter . '=' . $newValueText; $newTemplateCall = $oldTemplateCall; if ($hasOldValue) { // if the parameter already had a value and there's a new value, replace this value in the template call if ($newValueText != $oldValue) { $oldParamValueText = $parameter . '=' . $oldValue; $newTemplateCall = str_replace($oldParamValueText, $newParamValueText, $oldTemplateCall); } } else { // if the parameter wasn't previously populated, add it to the parameter list in the template call preg_match('/(\\{\\{\\s?.*\\s?\\|?.?)(\\}\\})/', $oldTemplateCall, $templateCallMatch); if (!empty($templateCallMatch)) { $templateCallBeginning = $templateCallMatch[1]; $templateCallEnd = $templateCallMatch[2]; $newTemplateCall = $templateCallBeginning . '|' . $newParamValueText . $templateCallEnd; } } } if ($newTemplateCall != $oldTemplateCall) { // if the template call has been updated, change it in the page wikitext and update the placeholder variable $newWikiContent = str_replace($oldTemplateCall, $newTemplateCall, $newWikiContent); $updatedTemplateCalls[$templateName] = $newTemplateCall; } } else { if ($isInPage) { // if it's a standard property in the page, replace value with new one if different $oldPropertyText = $mwProperties[$predTitleWikified]['wikitext']; // Store the old wiki text for the fact, in order to replace later $newPropertyText = '[[' . $predTitleWikified . '::' . $newValueText . ']]'; // Replace the existing property with new value if ($newPropertyText != $oldPropertyText) { $newWikiContent = str_replace($oldPropertyText, $newPropertyText, $newWikiContent); } } else { if (!$isInPage) { // If property isn't in the page (outside of templates) ... $newPropertyAsWikiText = '[[' . $predTitleWikified . '::' . $obj . ']]'; $newPropertiesAsWikiText .= $newPropertyAsWikiText . "\n"; } } } } } $newWikiContent .= $newPropertiesAsWikiText; $newWikiContent .= $newCategoriesAsWikiText; // Write to wiki $this->writeToArticle($wikiTitle, $newWikiContent, 'Update by RDFIO'); } }