/** * Fetches the requested pages markup, cleans it and returns a DOMDocument. * @param array $aParams Needs the 'article-id' or 'title' key to be set and valid. * @return array */ public static function getPage($aParams) { wfRunHooks('BSUEModulePDFbeforeGetPage', array(&$aParams)); $oBookmarksDOM = new DOMDocument(); $oBookmarksDOM->loadXML('<bookmarks></bookmarks>'); $oTitle = null; if (isset($aParams['article-id'])) { $oTitle = Title::newFromID($aParams['article-id']); } if ($oTitle == null) { //HINT: This is probably the wrong place for urldecode(); Should be //done by caller. I.e. BookExportModulePDF $oTitle = Title::newFromText(urldecode($aParams['title'])); } $oPCP = new BsPageContentProvider(); $oPageDOM = $oPCP->getDOMDocumentContentFor($oTitle, $aParams + array('follow-redirects' => true)); // TODO RBV (06.12.11 17:09): Follow Redirect... setting or default? //Collect Metadata $aData = self::collectData($oTitle, $oPageDOM, $aParams); //Cleanup DOM self::cleanUpDOM($oTitle, $oPageDOM, $aParams); $oBookmarkNode = BsUniversalExportHelper::getBookmarkElementForPageDOM($oPageDOM); //HINT: http://www.mm-newmedia.de/blog/2010/05/wrong-document-error-wtf/ $oBookmarksDOM->documentElement->appendChild($oBookmarksDOM->importNode($oBookmarkNode, true)); $oDOMXPath = new DOMXPath($oPageDOM); $oFirstHeading = $oDOMXPath->query("//*[contains(@class, 'firstHeading')]")->item(0); $oBodyContent = $oDOMXPath->query("//*[contains(@class, 'bodyContent')]")->item(0); // TODO RBV (01.02.12 11:28): What if no TOC? $oTOCULElement = $oDOMXPath->query("//*[contains(@class, 'toc')]//ul")->item(0); if (isset($aParams['display-title'])) { $oBookmarkNode->setAttribute('name', $aParams['display-title']); $oFirstHeading->nodeValue = $aParams['display-title']; $aData['meta']['title'] = $aParams['display-title']; } $aPage = array('resources' => $aData['resources'], 'dom' => $oPageDOM, 'firstheading-element' => $oFirstHeading, 'bodycontent-element' => $oBodyContent, 'toc-ul-element' => $oTOCULElement, 'bookmarks-dom' => $oBookmarksDOM, 'bookmark-element' => $oBookmarkNode, 'meta' => $aData['meta']); wfRunHooks('BSUEModulePDFgetPage', array($oTitle, &$aPage, &$aParams, $oDOMXPath)); return $aPage; }