/** * Extracts category tags from wikitext and returns a hash with an array * of categories data and a modified version of the wikitext with the category * tags removed. * * @param String $wikitext * @param Boolean $force default === false to skip cache set it to true * * @return Array */ public static function extractCategoriesFromWikitext($wikitext, $force = false, $lang = null) { wfProfileIn(__METHOD__); if (!$force && is_array(self::$data)) { wfProfileOut(__METHOD__); return self::$data; } $app = F::app(); // enable changes in Preprocessor and Parser $app->wg->CategorySelectEnabled = true; // prepare Parser $app->wg->Parser->startExternalParse($app->wg->Title, new ParserOptions(), OT_WIKI); // get DOM tree [PPNode_DOM class] as an XML string $xml = $app->wg->Parser->preprocessToDom($wikitext)->__toString(); // disable changes in Preprocessor and Parser $app->wg->CategorySelectEnabled = false; // add encoding information $xml = '<?xml version="1.0" encoding="UTF-8"?>' . $xml; //init variables self::$nodeLevel = 0; self::getDefaultNamespaces($lang); // we will ignore categories added inside following list of tags (BugId:8208) self::$tagsWhiteList = array_keys($app->wg->Parser->mTagHooks); // and includeonly tags (BugId:99450) self::$tagsWhiteList[] = 'includeonly'; //create XML DOM document from provided XML $dom = new DOMDocument(); $dom->loadXML($xml); //get everything under main node $root = $dom->getElementsByTagName('root')->item(0); self::$frame = $app->wg->Parser->getPreprocessor()->newFrame(); $categories = self::parseNode($root); // make wikitext from DOM tree $modifiedWikitext = self::$frame->expand($root, PPFrame::NO_TEMPLATES | PPFrame::RECOVER_COMMENTS); // replace markers back to wikitext $modifiedWikitext = $app->wg->Parser->mStripState->unstripBoth($modifiedWikitext); self::$data = array('categories' => $categories, 'wikitext' => rtrim($modifiedWikitext)); wfProfileOut(__METHOD__); return self::$data; }