/** * */ function wfSpecialExport($page = '') { global $wgOut, $wgLang, $wgRequest; if ($wgRequest->getVal('action') == 'submit') { $page = $wgRequest->getText('pages'); $curonly = $wgRequest->getCheck('curonly'); } else { # Pre-check the 'current version only' box in the UI $curonly = true; } if ($page != '') { $wgOut->disable(); header("Content-type: application/xml; charset=utf-8"); $pages = explode("\n", $page); $db =& wfGetDB(DB_SLAVE); $history = $curonly ? MW_EXPORT_CURRENT : MW_EXPORT_FULL; $exporter = new WikiExporter($db, $history); $exporter->openStream(); $exporter->pagesByName($pages); $exporter->closeStream(); return; } $wgOut->addWikiText(wfMsg("exporttext")); $titleObj = Title::makeTitle(NS_SPECIAL, "Export"); $action = $titleObj->escapeLocalURL('action=submit'); $wgOut->addHTML("\n<form method='post' action=\"{$action}\">\n<input type='hidden' name='action' value='submit' />\n<textarea name='pages' cols='40' rows='10'></textarea><br />\n<label><input type='checkbox' name='curonly' value='true' checked='checked' />\n" . wfMsg("exportcuronly") . "</label><br />\n<input type='submit' />\n</form>\n"); }
/** * @covers WikiExporter::pageByTitle */ public function testPageByTitle() { global $wgContLang; $pageTitle = 'UTPage'; $exporter = new WikiExporter($this->db, WikiExporter::FULL); $title = Title::newFromText($pageTitle); $sink = new DumpStringOutput(); $exporter->setOutputSink($sink); $exporter->openStream(); $exporter->pageByTitle($title); $exporter->closeStream(); // This throws error if invalid xml output $xmlObject = simplexml_load_string($sink); /** * Check namespaces match xml */ $xmlNamespaces = (array) $xmlObject->siteinfo->namespaces->namespace; $xmlNamespaces = str_replace(' ', '_', $xmlNamespaces); unset($xmlNamespaces['@attributes']); foreach ($xmlNamespaces as &$namespaceObject) { if (is_object($namespaceObject)) { $namespaceObject = ''; } } $actualNamespaces = (array) $wgContLang->getNamespaces(); $actualNamespaces = array_values($actualNamespaces); $this->assertEquals($actualNamespaces, $xmlNamespaces); // Check xml page title correct $xmlTitle = (array) $xmlObject->page->title; $this->assertEquals($pageTitle, $xmlTitle[0]); // Check xml page text is not empty $text = (array) $xmlObject->page->revision->text; $this->assertNotEquals('', $text[0]); }
public function testExportDemo() { $fname = "../../docs/export-demo.xml"; $version = WikiExporter::schemaVersion(); $dom = new DomDocument(); $dom->load($fname); // Ensure, the demo is for the current version $this->assertEquals($dom->documentElement->getAttribute('version'), $version, 'export-demo.xml should have the current version'); $this->assertTrue($dom->schemaValidate("../../docs/export-" . $version . ".xsd"), "schemaValidate has found an error"); }
/** * Validates a xml file against the xsd. * * The validation is slow, because php has to read the xsd on each call. * * @param $fname string: name of file to validate */ protected function validateXmlFileAgainstXsd($fname) { $version = WikiExporter::schemaVersion(); $dom = new DomDocument(); $dom->load($fname); try { $this->assertTrue($dom->schemaValidate("../../docs/export-" . $version . ".xsd"), "schemaValidate has found an error"); } catch (Exception $e) { $this->fail("xml not valid against xsd: " . $e->getMessage()); } }
/** * Validates a xml file against the xsd. * * The validation is slow, because php has to read the xsd on each call. * * @param $fname string: name of file to validate */ protected function validateXmlFileAgainstXsd($fname) { $version = WikiExporter::schemaVersion(); $dom = new DomDocument(); $dom->load($fname); // Ensure, the demo is for the current version $this->assertEquals($dom->documentElement->getAttribute('version'), $version, 'export-demo.xml should have the current version'); try { $this->assertTrue($dom->schemaValidate("../../docs/export-" . $version . ".xsd"), "schemaValidate has found an error"); } catch (Exception $e) { $this->fail("xml not valid against xsd: " . $e->getMessage()); } }
/** * @covers WikiExporter::pageByTitle */ public function testPageByTitle() { global $wgContLang; $pageTitle = 'UTPage'; $exporter = new WikiExporter($this->db, WikiExporter::FULL); $title = Title::newFromText($pageTitle); ob_start(); $exporter->openStream(); $exporter->pageByTitle($title); $exporter->closeStream(); $xmlString = ob_get_clean(); // This throws error if invalid xml output $xmlObject = simplexml_load_string($xmlString); /** * Check namespaces match xml * FIXME: PHP 5.3 support. When we don't support PHP 5.3, * add ->namespace to object and remove from array */ $xmlNamespaces = (array) $xmlObject->siteinfo->namespaces; $xmlNamespaces = str_replace(' ', '_', $xmlNamespaces['namespace']); unset($xmlNamespaces['@attributes']); foreach ($xmlNamespaces as &$namespaceObject) { if (is_object($namespaceObject)) { $namespaceObject = ''; } } $actualNamespaces = (array) $wgContLang->getNamespaces(); $actualNamespaces = array_values($actualNamespaces); $this->assertEquals($actualNamespaces, $xmlNamespaces); // Check xml page title correct $xmlTitle = (array) $xmlObject->page->title; $this->assertEquals($pageTitle, $xmlTitle[0]); // Check xml page text is not empty $text = (array) $xmlObject->page->revision->text; $this->assertNotEquals('', $text[0]); }
function dump($history) { # This shouldn't happen if on console... ;) header('Content-type: text/html; charset=UTF-8'); # Notice messages will foul up your XML output even if they're # relatively harmless. ini_set('display_errors', false); $this->startTime = wfTime(); $dbr =& wfGetDB(DB_SLAVE); $this->maxCount = $dbr->selectField('page', 'MAX(page_id)', '', 'BackupDumper::dump'); $this->startTime = wfTime(); $db =& $this->backupDb(); $exporter = new WikiExporter($db, $history, MW_EXPORT_STREAM); $exporter->setPageCallback(array(&$this, 'reportPage')); $exporter->setRevisionCallback(array(&$this, 'revCount')); $exporter->openStream(); if (is_null($this->pages)) { $exporter->allPages(); } else { $exporter->pagesByName($this->pages); } $exporter->closeStream(); $this->report(true); }
function __construct( &$db ) { parent::__construct( $db, WikiExporter::FULL ); }
/** * */ function wfSpecialExport($page = '') { global $wgOut, $wgRequest, $wgExportAllowListContributors; global $wgExportAllowHistory, $wgExportMaxHistory; $curonly = true; if ($wgRequest->getVal('action') == 'submit') { $page = $wgRequest->getText('pages'); $curonly = $wgRequest->getCheck('curonly'); } if ($wgRequest->getCheck('history')) { $curonly = false; } if (!$wgExportAllowHistory) { // Override $curonly = true; } $list_authors = $wgRequest->getCheck('listauthors'); if (!$curonly || !$wgExportAllowListContributors) { $list_authors = false; } if ($page != '') { $wgOut->disable(); // Cancel output buffering and gzipping if set // This should provide safer streaming for pages with history while ($status = ob_get_status()) { ob_end_clean(); if ($status['name'] == 'ob_gzhandler') { header('Content-Encoding:'); } } header("Content-type: application/xml; charset=utf-8"); $pages = explode("\n", $page); $db =& wfGetDB(DB_SLAVE); $history = $curonly ? MW_EXPORT_CURRENT : MW_EXPORT_FULL; $exporter = new WikiExporter($db, $history); $exporter->list_authors = $list_authors; $exporter->openStream(); foreach ($pages as $page) { if ($wgExportMaxHistory && !$curonly) { $title = Title::newFromText($page); if ($title) { $count = Revision::countByTitle($db, $title); if ($count > $wgExportMaxHistory) { wfDebug(__FUNCTION__ . ": Skipped {$page}, {$count} revisions too big\n"); continue; } } } $exporter->pageByName($page); } $exporter->closeStream(); return; } $wgOut->addWikiText(wfMsg("exporttext")); $titleObj = Title::makeTitle(NS_SPECIAL, "Export"); $form = wfOpenElement('form', array('method' => 'post', 'action' => $titleObj->getLocalUrl())); $form .= wfOpenElement('textarea', array('name' => 'pages', 'cols' => 40, 'rows' => 10)) . '</textarea><br />'; if ($wgExportAllowHistory) { $form .= wfCheck('curonly', true, array('value' => 'true', 'id' => 'curonly')); $form .= wfLabel(wfMsg('exportcuronly'), 'curonly') . '<br />'; } else { $wgOut->addWikiText(wfMsg('exportnohistory')); } $form .= wfHidden('action', 'submit'); $form .= wfSubmitButton(wfMsg('export-submit')) . '</form>'; $wgOut->addHtml($form); }
/** * */ function wfSpecialExport($page = '') { global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors; global $wgExportAllowHistory, $wgExportMaxHistory; $curonly = true; $doexport = false; if ($wgRequest->getCheck('addcat')) { $page = $wgRequest->getText('pages'); $catname = $wgRequest->getText('catname'); if ($catname !== '' && $catname !== NULL && $catname !== false) { $t = Title::makeTitleSafe(NS_MAIN, $catname); if ($t) { /** * @fixme This can lead to hitting memory limit for very large * categories. Ideally we would do the lookup synchronously * during the export in a single query. */ $catpages = wfExportGetPagesFromCategory($t); if ($catpages) { $page .= "\n" . implode("\n", $catpages); } } } } else { if ($wgRequest->wasPosted() && $page == '') { $page = $wgRequest->getText('pages'); $curonly = $wgRequest->getCheck('curonly'); $rawOffset = $wgRequest->getVal('offset'); if ($rawOffset) { $offset = wfTimestamp(TS_MW, $rawOffset); } else { $offset = null; } $limit = $wgRequest->getInt('limit'); $dir = $wgRequest->getVal('dir'); $history = array('dir' => 'asc', 'offset' => false, 'limit' => $wgExportMaxHistory); $historyCheck = $wgRequest->getCheck('history'); if ($curonly) { $history = WikiExporter::CURRENT; } elseif (!$historyCheck) { if ($limit > 0 && $limit < $wgExportMaxHistory) { $history['limit'] = $limit; } if (!is_null($offset)) { $history['offset'] = $offset; } if (strtolower($dir) == 'desc') { $history['dir'] = 'desc'; } } if ($page != '') { $doexport = true; } } else { // Default to current-only for GET requests $page = $wgRequest->getText('pages', $page); $historyCheck = $wgRequest->getCheck('history'); if ($historyCheck) { $history = WikiExporter::FULL; } else { $history = WikiExporter::CURRENT; } if ($page != '') { $doexport = true; } } } if (!$wgExportAllowHistory) { // Override $history = WikiExporter::CURRENT; } $list_authors = $wgRequest->getCheck('listauthors'); if (!$curonly || !$wgExportAllowListContributors) { $list_authors = false; } if ($doexport) { $wgOut->disable(); // Cancel output buffering and gzipping if set // This should provide safer streaming for pages with history wfResetOutputBuffers(); header("Content-type: application/xml; charset=utf-8"); if ($wgRequest->getCheck('wpDownload')) { // Provide a sane filename suggestion $filename = urlencode($wgSitename . '-' . wfTimestampNow() . '.xml'); $wgRequest->response()->header("Content-disposition: attachment;filename={$filename}"); } /* Split up the input and look up linked pages */ $inputPages = array_filter(explode("\n", $page), 'wfFilterPage'); $pageSet = array_flip($inputPages); if ($wgRequest->getCheck('templates')) { $pageSet = wfExportGetTemplates($inputPages, $pageSet); } /* // Enable this when we can do something useful exporting/importing image information. :) if( $wgRequest->getCheck( 'images' ) ) { $pageSet = wfExportGetImages( $inputPages, $pageSet ); } */ $pages = array_keys($pageSet); /* Ok, let's get to it... */ if ($history == WikiExporter::CURRENT) { $lb = false; $db = wfGetDB(DB_SLAVE); $buffer = WikiExporter::BUFFER; } else { // Use an unbuffered query; histories may be very long! $lb = wfGetLBFactory()->newMainLB(); $db = $lb->getConnection(DB_SLAVE); $buffer = WikiExporter::STREAM; // This might take a while... :D wfSuppressWarnings(); set_time_limit(0); wfRestoreWarnings(); } $exporter = new WikiExporter($db, $history, $buffer); $exporter->list_authors = $list_authors; $exporter->openStream(); foreach ($pages as $page) { /* if( $wgExportMaxHistory && !$curonly ) { $title = Title::newFromText( $page ); if( $title ) { $count = Revision::countByTitle( $db, $title ); if( $count > $wgExportMaxHistory ) { wfDebug( __FUNCTION__ . ": Skipped $page, $count revisions too big\n" ); continue; } } }*/ #Bug 8824: Only export pages the user can read $title = Title::newFromText($page); if (is_null($title)) { continue; } #TODO: perhaps output an <error> tag or something. if (!$title->userCanRead()) { continue; } #TODO: perhaps output an <error> tag or something. $exporter->pageByTitle($title); } $exporter->closeStream(); if ($lb) { $lb->closeAll(); } return; } $self = SpecialPage::getTitleFor('Export'); $wgOut->addHTML(wfMsgExt('exporttext', 'parse')); $form = Xml::openElement('form', array('method' => 'post', 'action' => $self->getLocalUrl('action=submit'))); $form .= Xml::inputLabel(wfMsg('export-addcattext'), 'catname', 'catname', 40) . ' '; $form .= Xml::submitButton(wfMsg('export-addcat'), array('name' => 'addcat')) . '<br />'; $form .= Xml::openElement('textarea', array('name' => 'pages', 'cols' => 40, 'rows' => 10)); $form .= htmlspecialchars($page); $form .= Xml::closeElement('textarea'); $form .= '<br />'; if ($wgExportAllowHistory) { $form .= Xml::checkLabel(wfMsg('exportcuronly'), 'curonly', 'curonly', true) . '<br />'; } else { $wgOut->addHTML(wfMsgExt('exportnohistory', 'parse')); } $form .= Xml::checkLabel(wfMsg('export-templates'), 'templates', 'wpExportTemplates', false) . '<br />'; // Enable this when we can do something useful exporting/importing image information. :) //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />'; $form .= Xml::checkLabel(wfMsg('export-download'), 'wpDownload', 'wpDownload', true) . '<br />'; $form .= Xml::submitButton(wfMsg('export-submit'), array('accesskey' => 's')); $form .= Xml::closeElement('form'); $wgOut->addHTML($form); }
/** * */ function wfSpecialExport($page = '') { global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors; global $wgExportAllowHistory, $wgExportMaxHistory; $curonly = true; $doexport = false; if ($wgRequest->getCheck('addcat')) { $page = $wgRequest->getText('pages'); $catname = $wgRequest->getText('catname'); if ($catname !== '' && $catname !== NULL && $catname !== false) { $t = Title::makeTitleSafe(NS_CATEGORY, $catname); if ($t) { $catpages = wfExportGetPagesFromCategory($t); if ($catpages) { $page .= "\n" . implode("\n", $catpages); } } } } else { if ($wgRequest->wasPosted() && $page == '') { $page = $wgRequest->getText('pages'); $curonly = $wgRequest->getCheck('curonly'); $rawOffset = $wgRequest->getVal('offset'); if ($rawOffset) { $offset = wfTimestamp(TS_MW, $rawOffset); } else { $offset = null; } $limit = $wgRequest->getInt('limit'); $dir = $wgRequest->getVal('dir'); $history = array('dir' => 'asc', 'offset' => false, 'limit' => $wgExportMaxHistory); $historyCheck = $wgRequest->getCheck('history'); if ($curonly) { $history = WikiExporter::CURRENT; } elseif (!$historyCheck) { if ($limit > 0 && $limit < $wgExportMaxHistory) { $history['limit'] = $limit; } if (!is_null($offset)) { $history['offset'] = $offset; } if (strtolower($dir) == 'desc') { $history['dir'] = 'desc'; } } if ($page != '') { $doexport = true; } } else { // Default to current-only for GET requests $page = $wgRequest->getText('pages', $page); $historyCheck = $wgRequest->getCheck('history'); if ($historyCheck) { $history = WikiExporter::FULL; } else { $history = WikiExporter::CURRENT; } if ($page != '') { $doexport = true; } } } if (!$wgExportAllowHistory) { // Override $history = WikiExporter::CURRENT; } $list_authors = $wgRequest->getCheck('listauthors'); if (!$curonly || !$wgExportAllowListContributors) { $list_authors = false; } if ($doexport) { $wgOut->disable(); // Cancel output buffering and gzipping if set // This should provide safer streaming for pages with history wfResetOutputBuffers(); header("Content-type: application/xml; charset=utf-8"); if ($wgRequest->getCheck('wpDownload')) { // Provide a sane filename suggestion $filename = urlencode($wgSitename . '-' . wfTimestampNow() . '.xml'); $wgRequest->response()->header("Content-disposition: attachment;filename={$filename}"); } $pages = explode("\n", $page); $db = wfGetDB(DB_SLAVE); $exporter = new WikiExporter($db, $history); $exporter->list_authors = $list_authors; $exporter->openStream(); foreach ($pages as $page) { /* if( $wgExportMaxHistory && !$curonly ) { $title = Title::newFromText( $page ); if( $title ) { $count = Revision::countByTitle( $db, $title ); if( $count > $wgExportMaxHistory ) { wfDebug( __FUNCTION__ . ": Skipped $page, $count revisions too big\n" ); continue; } } }*/ #Bug 8824: Only export pages the user can read $title = Title::newFromText($page); if (is_null($title)) { continue; } #TODO: perhaps output an <error> tag or something. if (!$title->userCan('read')) { continue; } #TODO: perhaps output an <error> tag or something. $exporter->pageByTitle($title); } $exporter->closeStream(); return; } $self = SpecialPage::getTitleFor('Export'); $wgOut->addHtml(wfMsgExt('exporttext', 'parse')); $form = Xml::openElement('form', array('method' => 'post', 'action' => $self->getLocalUrl('action=submit'))); $form .= Xml::inputLabel(wfMsg('export-addcattext'), 'catname', 'catname', 40) . ' '; $form .= Xml::submitButton(wfMsg('export-addcat'), array('name' => 'addcat')) . '<br />'; $form .= Xml::openElement('textarea', array('name' => 'pages', 'cols' => 40, 'rows' => 10)); $form .= htmlspecialchars($page); $form .= Xml::closeElement('textarea'); $form .= '<br />'; if ($wgExportAllowHistory) { $form .= Xml::checkLabel(wfMsg('exportcuronly'), 'curonly', 'curonly', true) . '<br />'; } else { $wgOut->addHtml(wfMsgExt('exportnohistory', 'parse')); } $form .= Xml::checkLabel(wfMsg('export-download'), 'wpDownload', 'wpDownload', true) . '<br />'; $form .= Xml::submitButton(wfMsg('export-submit')); $form .= Xml::closeElement('form'); $wgOut->addHtml($form); }
/** * Do the actual page exporting * * @param $page String: user input on what page(s) to export * @param $history Mixed: one of the WikiExporter history export constants * @param $list_authors Boolean: Whether to add distinct author list (when * not returning full history) * @param $exportall Boolean: Whether to export everything */ private function doExport($page, $history, $list_authors, $exportall) { // If we are grabbing everything, enable full history and ignore the rest if ($exportall) { $history = WikiExporter::FULL; } else { $pageSet = array(); // Inverted index of all pages to look up // Split up and normalize input foreach (explode("\n", $page) as $pageName) { $pageName = trim($pageName); $title = Title::newFromText($pageName); if ($title && $title->getInterwiki() == '' && $title->getText() !== '') { // Only record each page once! $pageSet[$title->getPrefixedText()] = true; } } // Set of original pages to pass on to further manipulation... $inputPages = array_keys($pageSet); // Look up any linked pages if asked... if ($this->templates) { $pageSet = $this->getTemplates($inputPages, $pageSet); } $linkDepth = $this->pageLinkDepth; if ($linkDepth) { $pageSet = $this->getPageLinks($inputPages, $pageSet, $linkDepth); } /* // Enable this when we can do something useful exporting/importing image information. :) if( $this->images ) ) { $pageSet = $this->getImages( $inputPages, $pageSet ); } */ $pages = array_keys($pageSet); // Normalize titles to the same format and remove dupes, see bug 17374 foreach ($pages as $k => $v) { $pages[$k] = str_replace(" ", "_", $v); } $pages = array_unique($pages); } /* Ok, let's get to it... */ if ($history == WikiExporter::CURRENT) { $lb = false; $db = wfGetDB(DB_SLAVE); $buffer = WikiExporter::BUFFER; } else { // Use an unbuffered query; histories may be very long! $lb = wfGetLBFactory()->newMainLB(); $db = $lb->getConnection(DB_SLAVE); $buffer = WikiExporter::STREAM; // This might take a while... :D wfSuppressWarnings(); set_time_limit(0); wfRestoreWarnings(); } $exporter = new WikiExporter($db, $history, $buffer); $exporter->list_authors = $list_authors; $exporter->openStream(); if ($exportall) { $exporter->allPages(); } else { foreach ($pages as $page) { /* if( $wgExportMaxHistory && !$this->curonly ) { $title = Title::newFromText( $page ); if( $title ) { $count = Revision::countByTitle( $db, $title ); if( $count > $wgExportMaxHistory ) { wfDebug( __FUNCTION__ . ": Skipped $page, $count revisions too big\n" ); continue; } } }*/ #Bug 8824: Only export pages the user can read $title = Title::newFromText($page); if (is_null($title)) { continue; #TODO: perhaps output an <error> tag or something. } if (!$title->userCan('read', $this->getUser())) { continue; #TODO: perhaps output an <error> tag or something. } $exporter->pageByTitle($title); } } $exporter->closeStream(); if ($lb) { $lb->closeAll(); } }
/** * Appends an element for each page in the current pageSet with the * most general information (id, title), plus any title normalizations * and missing or invalid title/pageids/revids. */ private function outputGeneralPageInfo() { $pageSet = $this->getPageSet(); $result = $this->getResult(); # We don't check for a full result set here because we can't be adding # more than 380K. The maximum revision size is in the megabyte range, # and the maximum result size must be even higher than that. // Title normalizations $normValues = array(); foreach ($pageSet->getNormalizedTitles() as $rawTitleStr => $titleStr) { $normValues[] = array('from' => $rawTitleStr, 'to' => $titleStr); } if (count($normValues)) { $result->setIndexedTagName($normValues, 'n'); $result->addValue('query', 'normalized', $normValues); } // Interwiki titles $intrwValues = array(); foreach ($pageSet->getInterwikiTitles() as $rawTitleStr => $interwikiStr) { $intrwValues[] = array('title' => $rawTitleStr, 'iw' => $interwikiStr); } if (count($intrwValues)) { $result->setIndexedTagName($intrwValues, 'i'); $result->addValue('query', 'interwiki', $intrwValues); } // Show redirect information $redirValues = array(); foreach ($pageSet->getRedirectTitles() as $titleStrFrom => $titleStrTo) { $redirValues[] = array('from' => strval($titleStrFrom), 'to' => $titleStrTo); } if (count($redirValues)) { $result->setIndexedTagName($redirValues, 'r'); $result->addValue('query', 'redirects', $redirValues); } // // Missing revision elements // $missingRevIDs = $pageSet->getMissingRevisionIDs(); if (count($missingRevIDs)) { $revids = array(); foreach ($missingRevIDs as $revid) { $revids[$revid] = array('revid' => $revid); } $result->setIndexedTagName($revids, 'rev'); $result->addValue('query', 'badrevids', $revids); } // // Page elements // $pages = array(); // Report any missing titles foreach ($pageSet->getMissingTitles() as $fakeId => $title) { $vals = array(); ApiQueryBase::addTitleInfo($vals, $title); $vals['missing'] = ''; $pages[$fakeId] = $vals; } // Report any invalid titles foreach ($pageSet->getInvalidTitles() as $fakeId => $title) { $pages[$fakeId] = array('title' => $title, 'invalid' => ''); } // Report any missing page ids foreach ($pageSet->getMissingPageIDs() as $pageid) { $pages[$pageid] = array('pageid' => $pageid, 'missing' => ''); } // Output general page information for found titles foreach ($pageSet->getGoodTitles() as $pageid => $title) { $vals = array(); $vals['pageid'] = $pageid; ApiQueryBase::addTitleInfo($vals, $title); $pages[$pageid] = $vals; } if (count($pages)) { if ($this->params['indexpageids']) { $pageIDs = array_keys($pages); // json treats all map keys as strings - converting to match $pageIDs = array_map('strval', $pageIDs); $result->setIndexedTagName($pageIDs, 'id'); $result->addValue('query', 'pageids', $pageIDs); } $result->setIndexedTagName($pages, 'page'); $result->addValue('query', 'pages', $pages); } if ($this->params['export']) { $exporter = new WikiExporter($this->getDB()); // WikiExporter writes to stdout, so catch its // output with an ob ob_start(); $exporter->openStream(); foreach (@$pageSet->getGoodTitles() as $title) { if ($title->userCanRead()) { $exporter->pageByTitle($title); } } $exporter->closeStream(); $exportxml = ob_get_contents(); ob_end_clean(); // Don't check the size of exported stuff // It's not continuable, so it would cause more // problems than it'd solve $result->disableSizeCheck(); if ($this->params['exportnowrap']) { $result->reset(); // Raw formatter will handle this $result->addValue(null, 'text', $exportxml); $result->addValue(null, 'mime', 'text/xml'); } else { $r = array(); ApiResult::setContent($r, $exportxml); $result->addValue('query', 'export', $r); } $result->enableSizeCheck(); } }
/** * @param ApiPageSet $pageSet Pages to be exported * @param ApiResult $result Result to output to */ private function doExport($pageSet, $result) { $exportTitles = []; $titles = $pageSet->getGoodTitles(); if (count($titles)) { $user = $this->getUser(); /** @var $title Title */ foreach ($titles as $title) { if ($title->userCan('read', $user)) { $exportTitles[] = $title; } } } $exporter = new WikiExporter($this->getDB()); $sink = new DumpStringOutput(); $exporter->setOutputSink($sink); $exporter->openStream(); foreach ($exportTitles as $title) { $exporter->pageByTitle($title); } $exporter->closeStream(); // Don't check the size of exported stuff // It's not continuable, so it would cause more // problems than it'd solve if ($this->mParams['exportnowrap']) { $result->reset(); // Raw formatter will handle this $result->addValue(null, 'text', $sink, ApiResult::NO_SIZE_CHECK); $result->addValue(null, 'mime', 'text/xml', ApiResult::NO_SIZE_CHECK); } else { $result->addValue('query', 'export', $sink, ApiResult::NO_SIZE_CHECK); $result->addValue('query', ApiResult::META_BC_SUBELEMENTS, ['export']); } }
/** * Opens the XML output stream's root "<mediawiki>" element. * This does not include an xml directive, so is safe to include * as a subelement in a larger XML stream. Namespace and XML Schema * references are included. * * Output will be encoded in UTF-8. * * @return string */ function openStream() { global $wgLanguageCode; $ver = WikiExporter::schemaVersion(); return Xml::element('mediawiki', array('xmlns' => "http://www.mediawiki.org/xml/export-{$ver}/", 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-{$ver}/ " . "http://www.mediawiki.org/xml/export-{$ver}.xsd", 'version' => $ver, 'xml:lang' => $wgLanguageCode), null) . "\n" . $this->siteInfo(); }
/** * Do the actual page exporting * * @param string $page User input on what page(s) to export * @param int $history One of the WikiExporter history export constants * @param bool $list_authors Whether to add distinct author list (when * not returning full history) * @param bool $exportall Whether to export everything */ private function doExport($page, $history, $list_authors, $exportall) { // If we are grabbing everything, enable full history and ignore the rest if ($exportall) { $history = WikiExporter::FULL; } else { $pageSet = []; // Inverted index of all pages to look up // Split up and normalize input foreach (explode("\n", $page) as $pageName) { $pageName = trim($pageName); $title = Title::newFromText($pageName); if ($title && !$title->isExternal() && $title->getText() !== '') { // Only record each page once! $pageSet[$title->getPrefixedText()] = true; } } // Set of original pages to pass on to further manipulation... $inputPages = array_keys($pageSet); // Look up any linked pages if asked... if ($this->templates) { $pageSet = $this->getTemplates($inputPages, $pageSet); } $linkDepth = $this->pageLinkDepth; if ($linkDepth) { $pageSet = $this->getPageLinks($inputPages, $pageSet, $linkDepth); } $pages = array_keys($pageSet); // Normalize titles to the same format and remove dupes, see bug 17374 foreach ($pages as $k => $v) { $pages[$k] = str_replace(" ", "_", $v); } $pages = array_unique($pages); } /* Ok, let's get to it... */ if ($history == WikiExporter::CURRENT) { $lb = false; $db = wfGetDB(DB_REPLICA); $buffer = WikiExporter::BUFFER; } else { // Use an unbuffered query; histories may be very long! $lb = wfGetLBFactory()->newMainLB(); $db = $lb->getConnection(DB_REPLICA); $buffer = WikiExporter::STREAM; // This might take a while... :D MediaWiki\suppressWarnings(); set_time_limit(0); MediaWiki\restoreWarnings(); } $exporter = new WikiExporter($db, $history, $buffer); $exporter->list_authors = $list_authors; $exporter->openStream(); if ($exportall) { $exporter->allPages(); } else { foreach ($pages as $page) { # Bug 8824: Only export pages the user can read $title = Title::newFromText($page); if (is_null($title)) { // @todo Perhaps output an <error> tag or something. continue; } if (!$title->userCan('read', $this->getUser())) { // @todo Perhaps output an <error> tag or something. continue; } $exporter->pageByTitle($title); } } $exporter->closeStream(); if ($lb) { $lb->closeAll(); } }
/** * Do the actual page exporting * @param string $page User input on what page(s) to export * @param mixed $history one of the WikiExporter history export constants */ private function doExport($page, $history, $list_authors) { global $wgExportMaxHistory; /* Split up the input and look up linked pages */ $inputPages = array_filter(explode("\n", $page), array($this, 'filterPage')); $pageSet = array_flip($inputPages); if ($this->templates) { $pageSet = $this->getTemplates($inputPages, $pageSet); } if ($linkDepth = $this->pageLinkDepth) { $pageSet = $this->getPageLinks($inputPages, $pageSet, $linkDepth); } /* // Enable this when we can do something useful exporting/importing image information. :) if( $this->images ) ) { $pageSet = $this->getImages( $inputPages, $pageSet ); } */ $pages = array_keys($pageSet); /* Ok, let's get to it... */ if ($history == WikiExporter::CURRENT) { $lb = false; $db = wfGetDB(DB_SLAVE); $buffer = WikiExporter::BUFFER; } else { // Use an unbuffered query; histories may be very long! $lb = wfGetLBFactory()->newMainLB(); $db = $lb->getConnection(DB_SLAVE); $buffer = WikiExporter::STREAM; // This might take a while... :D wfSuppressWarnings(); set_time_limit(0); wfRestoreWarnings(); } $exporter = new WikiExporter($db, $history, $buffer); $exporter->list_authors = $list_authors; $exporter->openStream(); foreach ($pages as $page) { /* if( $wgExportMaxHistory && !$this->curonly ) { $title = Title::newFromText( $page ); if( $title ) { $count = Revision::countByTitle( $db, $title ); if( $count > $wgExportMaxHistory ) { wfDebug( __FUNCTION__ . ": Skipped $page, $count revisions too big\n" ); continue; } } }*/ #Bug 8824: Only export pages the user can read $title = Title::newFromText($page); if (is_null($title)) { continue; } #TODO: perhaps output an <error> tag or something. if (!$title->userCanRead()) { continue; } #TODO: perhaps output an <error> tag or something. $exporter->pageByTitle($title); } $exporter->closeStream(); if ($lb) { $lb->closeAll(); } }
/** * @param $pageSet ApiPageSet Pages to be exported * @param $result ApiResult Result to output to */ private function doExport($pageSet, $result) { $exportTitles = array(); $titles = $pageSet->getGoodTitles(); if (count($titles)) { $user = $this->getUser(); /** @var $title Title */ foreach ($titles as $title) { if ($title->userCan('read', $user)) { $exportTitles[] = $title; } } } $exporter = new WikiExporter($this->getDB()); // WikiExporter writes to stdout, so catch its // output with an ob ob_start(); $exporter->openStream(); foreach ($exportTitles as $title) { $exporter->pageByTitle($title); } $exporter->closeStream(); $exportxml = ob_get_contents(); ob_end_clean(); // Don't check the size of exported stuff // It's not continuable, so it would cause more // problems than it'd solve $result->disableSizeCheck(); if ($this->mParams['exportnowrap']) { $result->reset(); // Raw formatter will handle this $result->addValue(null, 'text', $exportxml); $result->addValue(null, 'mime', 'text/xml'); } else { $r = array(); ApiResult::setContent($r, $exportxml); $result->addValue('query', 'export', $r); } $result->enableSizeCheck(); }
/** * */ function wfSpecialExport($page = '') { global $wgOut, $wgRequest, $wgExportAllowListContributors; global $wgExportAllowHistory, $wgExportMaxHistory; $curonly = true; $fullHistory = array('dir' => 'asc', 'offset' => false, 'limit' => $wgExportMaxHistory); if ($wgRequest->wasPosted()) { $page = $wgRequest->getText('pages'); $curonly = $wgRequest->getCheck('curonly'); $rawOffset = $wgRequest->getVal('offset'); if ($rawOffset) { $offset = wfTimestamp(TS_MW, $rawOffset); } else { $offset = null; } $limit = $wgRequest->getInt('limit'); $dir = $wgRequest->getVal('dir'); $history = array('dir' => 'asc', 'offset' => false, 'limit' => $wgExportMaxHistory); $historyCheck = $wgRequest->getCheck('history'); if ($curonly) { $history = WikiExporter::CURRENT; } elseif (!$historyCheck) { if ($limit > 0 && $limit < $wgExportMaxHistory) { $history['limit'] = $limit; } if (!is_null($offset)) { $history['offset'] = $offset; } if (strtolower($dir) == 'desc') { $history['dir'] = 'desc'; } } } else { // Default to current-only for GET requests $page = $wgRequest->getText('pages', $page); $historyCheck = $wgRequest->getCheck('history'); if ($historyCheck) { $history = WikiExporter::FULL; } else { $history = WikiExporter::CURRENT; } } if (!$wgExportAllowHistory) { // Override $history = WikiExporter::CURRENT; } $list_authors = $wgRequest->getCheck('listauthors'); if (!$curonly || !$wgExportAllowListContributors) { $list_authors = false; } if ($page != '') { $wgOut->disable(); // Cancel output buffering and gzipping if set // This should provide safer streaming for pages with history while ($status = ob_get_status()) { ob_end_clean(); if ($status['name'] == 'ob_gzhandler') { header('Content-Encoding:'); } } header("Content-type: application/xml; charset=utf-8"); $pages = explode("\n", $page); $db =& wfGetDB(DB_SLAVE); $exporter = new WikiExporter($db, $history); $exporter->list_authors = $list_authors; $exporter->openStream(); foreach ($pages as $page) { /* if( $wgExportMaxHistory && !$curonly ) { $title = Title::newFromText( $page ); if( $title ) { $count = Revision::countByTitle( $db, $title ); if( $count > $wgExportMaxHistory ) { wfDebug( __FUNCTION__ . ": Skipped $page, $count revisions too big\n" ); continue; } } }*/ $exporter->pageByName($page); } $exporter->closeStream(); return; } $wgOut->addWikiText(wfMsg("exporttext")); $titleObj = Title::makeTitle(NS_SPECIAL, "Export"); $form = wfOpenElement('form', array('method' => 'post', 'action' => $titleObj->getLocalUrl())); $form .= wfOpenElement('textarea', array('name' => 'pages', 'cols' => 40, 'rows' => 10)) . '</textarea><br />'; if ($wgExportAllowHistory) { $form .= wfCheck('curonly', true, array('value' => 'true', 'id' => 'curonly')); $form .= wfLabel(wfMsg('exportcuronly'), 'curonly') . '<br />'; } else { $wgOut->addWikiText(wfMsg('exportnohistory')); } $form .= wfHidden('action', 'submit'); $form .= wfSubmitButton(wfMsg('export-submit')) . '</form>'; $wgOut->addHtml($form); }