/** * Populates the search index with content from all pages */ protected function populateSearchIndex() { $res = $this->db->select('page', 'MAX(page_id) AS count'); $s = $this->db->fetchObject($res); $count = $s->count; $this->output("Rebuilding index fields for {$count} pages...\n"); $n = 0; $fields = array_merge(Revision::selectPageFields(), Revision::selectFields(), Revision::selectTextFields()); while ($n < $count) { if ($n) { $this->output($n . "\n"); } $end = $n + self::RTI_CHUNK_SIZE - 1; $res = $this->db->select(['page', 'revision', 'text'], $fields, ["page_id BETWEEN {$n} AND {$end}", 'page_latest = rev_id', 'rev_text_id = old_id'], __METHOD__); foreach ($res as $s) { try { $title = Title::makeTitle($s->page_namespace, $s->page_title); $rev = new Revision($s); $content = $rev->getContent(); $u = new SearchUpdate($s->page_id, $title, $content); $u->doUpdate(); } catch (MWContentSerializationException $ex) { $this->output("Failed to deserialize content of revision {$s->rev_id} of page " . "`" . $title->getPrefixedDBkey() . "`!\n"); } } $n += self::RTI_CHUNK_SIZE; } }
/** * @param ApiPageSet $resultPageSet * @return void */ protected function run(ApiPageSet $resultPageSet = null) { $db = $this->getDB(); $params = $this->extractRequestParams(false); $result = $this->getResult(); $this->requireMaxOneParameter($params, 'user', 'excludeuser'); // Namespace check is likely to be desired, but can't be done // efficiently in SQL. $miser_ns = null; $needPageTable = false; if ($params['namespace'] !== null) { $params['namespace'] = array_unique($params['namespace']); sort($params['namespace']); if ($params['namespace'] != MWNamespace::getValidNamespaces()) { $needPageTable = true; if ($this->getConfig()->get('MiserMode')) { $miser_ns = $params['namespace']; } else { $this->addWhere(array('page_namespace' => $params['namespace'])); } } } $this->addTables('revision'); if ($resultPageSet === null) { $this->parseParameters($params); $this->addTables('page'); $this->addJoinConds(array('page' => array('INNER JOIN', array('rev_page = page_id')))); $this->addFields(Revision::selectFields()); $this->addFields(Revision::selectPageFields()); // Review this depeneding on the outcome of T113901 $this->addOption('STRAIGHT_JOIN'); } else { $this->limit = $this->getParameter('limit') ?: 10; $this->addFields(array('rev_timestamp', 'rev_id')); if ($params['generatetitles']) { $this->addFields(array('rev_page')); } if ($needPageTable) { $this->addTables('page'); $this->addJoinConds(array('page' => array('INNER JOIN', array('rev_page = page_id')))); $this->addFieldsIf(array('page_namespace'), (bool) $miser_ns); // Review this depeneding on the outcome of T113901 $this->addOption('STRAIGHT_JOIN'); } } if ($this->fld_tags) { $this->addTables('tag_summary'); $this->addJoinConds(array('tag_summary' => array('LEFT JOIN', array('rev_id=ts_rev_id')))); $this->addFields('ts_tags'); } if ($this->fetchContent) { $this->addTables('text'); $this->addJoinConds(array('text' => array('INNER JOIN', array('rev_text_id=old_id')))); $this->addFields('old_id'); $this->addFields(Revision::selectTextFields()); } if ($params['user'] !== null) { $id = User::idFromName($params['user']); if ($id) { $this->addWhereFld('rev_user', $id); } else { $this->addWhereFld('rev_user_text', $params['user']); } } elseif ($params['excludeuser'] !== null) { $id = User::idFromName($params['excludeuser']); if ($id) { $this->addWhere('rev_user != ' . $id); } else { $this->addWhere('rev_user_text != ' . $db->addQuotes($params['excludeuser'])); } } if ($params['user'] !== null || $params['excludeuser'] !== null) { // Paranoia: avoid brute force searches (bug 17342) if (!$this->getUser()->isAllowed('deletedhistory')) { $bitmask = Revision::DELETED_USER; } elseif (!$this->getUser()->isAllowedAny('suppressrevision', 'viewsuppressed')) { $bitmask = Revision::DELETED_USER | Revision::DELETED_RESTRICTED; } else { $bitmask = 0; } if ($bitmask) { $this->addWhere($db->bitAnd('rev_deleted', $bitmask) . " != {$bitmask}"); } } $dir = $params['dir']; if ($params['continue'] !== null) { $op = $dir == 'newer' ? '>' : '<'; $cont = explode('|', $params['continue']); $this->dieContinueUsageIf(count($cont) != 2); $ts = $db->addQuotes($db->timestamp($cont[0])); $rev_id = (int) $cont[1]; $this->dieContinueUsageIf(strval($rev_id) !== $cont[1]); $this->addWhere("rev_timestamp {$op} {$ts} OR " . "(rev_timestamp = {$ts} AND " . "rev_id {$op}= {$rev_id})"); } $this->addOption('LIMIT', $this->limit + 1); $sort = $dir == 'newer' ? '' : ' DESC'; $orderby = array(); // Targeting index rev_timestamp, user_timestamp, or usertext_timestamp // But 'user' is always constant for the latter two, so it doesn't matter here. $orderby[] = "rev_timestamp {$sort}"; $orderby[] = "rev_id {$sort}"; $this->addOption('ORDER BY', $orderby); $res = $this->select(__METHOD__); $pageMap = array(); // Maps rev_page to array index $count = 0; $nextIndex = 0; $generated = array(); foreach ($res as $row) { if (++$count > $this->limit) { // We've had enough $this->setContinueEnumParameter('continue', "{$row->rev_timestamp}|{$row->rev_id}"); break; } // Miser mode namespace check if ($miser_ns !== null && !in_array($row->page_namespace, $miser_ns)) { continue; } if ($resultPageSet !== null) { if ($params['generatetitles']) { $generated[$row->rev_page] = $row->rev_page; } else { $generated[] = $row->rev_id; } } else { $revision = Revision::newFromRow($row); $rev = $this->extractRevisionInfo($revision, $row); if (!isset($pageMap[$row->rev_page])) { $index = $nextIndex++; $pageMap[$row->rev_page] = $index; $title = $revision->getTitle(); $a = array('pageid' => $title->getArticleID(), 'revisions' => array($rev)); ApiResult::setIndexedTagName($a['revisions'], 'rev'); ApiQueryBase::addTitleInfo($a, $title); $fit = $result->addValue(array('query', $this->getModuleName()), $index, $a); } else { $index = $pageMap[$row->rev_page]; $fit = $result->addValue(array('query', $this->getModuleName(), $index, 'revisions'), null, $rev); } if (!$fit) { $this->setContinueEnumParameter('continue', "{$row->rev_timestamp}|{$row->rev_id}"); break; } } } if ($resultPageSet !== null) { if ($params['generatetitles']) { $resultPageSet->populateFromPageIDs($generated); } else { $resultPageSet->populateFromRevisionIDs($generated); } } else { $result->addIndexedTagName(array('query', $this->getModuleName()), 'page'); } }
public function execute() { $params = $this->extractRequestParams(false); // If any of those parameters are used, work in 'enumeration' mode. // Enum mode can only be used when exactly one page is provided. // Enumerating revisions on multiple pages make it extremely // difficult to manage continuations and require additional SQL indexes $enumRevMode = !is_null($params['user']) || !is_null($params['excludeuser']) || !is_null($params['limit']) || !is_null($params['startid']) || !is_null($params['endid']) || $params['dir'] === 'newer' || !is_null($params['start']) || !is_null($params['end']); $pageSet = $this->getPageSet(); $pageCount = $pageSet->getGoodTitleCount(); $revCount = $pageSet->getRevisionCount(); // Optimization -- nothing to do if ($revCount === 0 && $pageCount === 0) { return; } if ($revCount > 0 && $enumRevMode) { $this->dieUsage('The revids= parameter may not be used with the list options (limit, startid, endid, dirNewer, start, end).', 'revids'); } if ($pageCount > 1 && $enumRevMode) { $this->dieUsage('titles, pageids or a generator was used to supply multiple pages, but the limit, startid, endid, dirNewer, user, excludeuser, start and end parameters may only be used on a single page.', 'multpages'); } if (!is_null($params['difftotext'])) { $this->difftotext = $params['difftotext']; } elseif (!is_null($params['diffto'])) { if ($params['diffto'] == 'cur') { $params['diffto'] = 0; } if ((!ctype_digit($params['diffto']) || $params['diffto'] < 0) && $params['diffto'] != 'prev' && $params['diffto'] != 'next') { $this->dieUsage('rvdiffto must be set to a non-negative number, "prev", "next" or "cur"', 'diffto'); } // Check whether the revision exists and is readable, // DifferenceEngine returns a rather ambiguous empty // string if that's not the case if ($params['diffto'] != 0) { $difftoRev = Revision::newFromID($params['diffto']); if (!$difftoRev) { $this->dieUsageMsg(array('nosuchrevid', $params['diffto'])); } if ($difftoRev->isDeleted(Revision::DELETED_TEXT)) { $this->setWarning("Couldn't diff to r{$difftoRev->getID()}: content is hidden"); $params['diffto'] = null; } } $this->diffto = $params['diffto']; } $db = $this->getDB(); $this->addTables('page'); $this->addFields(Revision::selectFields()); $this->addWhere('page_id = rev_page'); $prop = array_flip($params['prop']); // Optional fields $this->fld_ids = isset($prop['ids']); // $this->addFieldsIf('rev_text_id', $this->fld_ids); // should this be exposed? $this->fld_flags = isset($prop['flags']); $this->fld_timestamp = isset($prop['timestamp']); $this->fld_comment = isset($prop['comment']); $this->fld_parsedcomment = isset($prop['parsedcomment']); $this->fld_size = isset($prop['size']); $this->fld_sha1 = isset($prop['sha1']); $this->fld_userid = isset($prop['userid']); $this->fld_user = isset($prop['user']); $this->token = $params['token']; // Possible indexes used $index = array(); $userMax = $this->fld_content ? ApiBase::LIMIT_SML1 : ApiBase::LIMIT_BIG1; $botMax = $this->fld_content ? ApiBase::LIMIT_SML2 : ApiBase::LIMIT_BIG2; $limit = $params['limit']; if ($limit == 'max') { $limit = $this->getMain()->canApiHighLimits() ? $botMax : $userMax; $this->getResult()->setParsedLimit($this->getModuleName(), $limit); } if (!is_null($this->token) || $pageCount > 0) { $this->addFields(Revision::selectPageFields()); } if (isset($prop['tags'])) { $this->fld_tags = true; $this->addTables('tag_summary'); $this->addJoinConds(array('tag_summary' => array('LEFT JOIN', array('rev_id=ts_rev_id')))); $this->addFields('ts_tags'); } if (!is_null($params['tag'])) { $this->addTables('change_tag'); $this->addJoinConds(array('change_tag' => array('INNER JOIN', array('rev_id=ct_rev_id')))); $this->addWhereFld('ct_tag', $params['tag']); global $wgOldChangeTagsIndex; $index['change_tag'] = $wgOldChangeTagsIndex ? 'ct_tag' : 'change_tag_tag_id'; } if (isset($prop['content']) || !is_null($this->difftotext)) { // For each page we will request, the user must have read rights for that page foreach ($pageSet->getGoodTitles() as $title) { if (!$title->userCan('read')) { $this->dieUsage('The current user is not allowed to read ' . $title->getPrefixedText(), 'accessdenied'); } } $this->addTables('text'); $this->addWhere('rev_text_id=old_id'); $this->addFields('old_id'); $this->addFields(Revision::selectTextFields()); $this->fld_content = isset($prop['content']); $this->expandTemplates = $params['expandtemplates']; $this->generateXML = $params['generatexml']; $this->parseContent = $params['parse']; if ($this->parseContent) { // Must manually initialize unset limit if (is_null($limit)) { $limit = 1; } // We are only going to parse 1 revision per request $this->validateLimit('limit', $limit, 1, 1, 1); } if (isset($params['section'])) { $this->section = $params['section']; } else { $this->section = false; } } // add user name, if needed if ($this->fld_user) { $this->addTables('user'); $this->addJoinConds(array('user' => Revision::userJoinCond())); $this->addFields(Revision::selectUserFields()); } // Bug 24166 - API error when using rvprop=tags $this->addTables('revision'); if ($enumRevMode) { // This is mostly to prevent parameter errors (and optimize SQL?) if (!is_null($params['startid']) && !is_null($params['start'])) { $this->dieUsage('start and startid cannot be used together', 'badparams'); } if (!is_null($params['endid']) && !is_null($params['end'])) { $this->dieUsage('end and endid cannot be used together', 'badparams'); } if (!is_null($params['user']) && !is_null($params['excludeuser'])) { $this->dieUsage('user and excludeuser cannot be used together', 'badparams'); } // Continuing effectively uses startid. But we can't use rvstartid // directly, because there is no way to tell the client to ''not'' // send rvstart if it sent it in the original query. So instead we // send the continuation startid as rvcontinue, and ignore both // rvstart and rvstartid when that is supplied. if (!is_null($params['continue'])) { $params['startid'] = $params['continue']; unset($params['start']); } // This code makes an assumption that sorting by rev_id and rev_timestamp produces // the same result. This way users may request revisions starting at a given time, // but to page through results use the rev_id returned after each page. // Switching to rev_id removes the potential problem of having more than // one row with the same timestamp for the same page. // The order needs to be the same as start parameter to avoid SQL filesort. if (is_null($params['startid']) && is_null($params['endid'])) { $this->addTimestampWhereRange('rev_timestamp', $params['dir'], $params['start'], $params['end']); } else { $this->addWhereRange('rev_id', $params['dir'], $params['startid'], $params['endid']); // One of start and end can be set // If neither is set, this does nothing $this->addTimestampWhereRange('rev_timestamp', $params['dir'], $params['start'], $params['end'], false); } // must manually initialize unset limit if (is_null($limit)) { $limit = 10; } $this->validateLimit('limit', $limit, 1, $userMax, $botMax); // There is only one ID, use it $ids = array_keys($pageSet->getGoodTitles()); $this->addWhereFld('rev_page', reset($ids)); if (!is_null($params['user'])) { $this->addWhereFld('rev_user_text', $params['user']); } elseif (!is_null($params['excludeuser'])) { $this->addWhere('rev_user_text != ' . $db->addQuotes($params['excludeuser'])); } if (!is_null($params['user']) || !is_null($params['excludeuser'])) { // Paranoia: avoid brute force searches (bug 17342) $this->addWhere($db->bitAnd('rev_deleted', Revision::DELETED_USER) . ' = 0'); } } elseif ($revCount > 0) { $max = $this->getMain()->canApiHighLimits() ? $botMax : $userMax; $revs = $pageSet->getRevisionIDs(); if (self::truncateArray($revs, $max)) { $this->setWarning("Too many values supplied for parameter 'revids': the limit is {$max}"); } // Get all revision IDs $this->addWhereFld('rev_id', array_keys($revs)); if (!is_null($params['continue'])) { $this->addWhere('rev_id >= ' . intval($params['continue'])); } $this->addOption('ORDER BY', 'rev_id'); // assumption testing -- we should never get more then $revCount rows. $limit = $revCount; } elseif ($pageCount > 0) { $max = $this->getMain()->canApiHighLimits() ? $botMax : $userMax; $titles = $pageSet->getGoodTitles(); if (self::truncateArray($titles, $max)) { $this->setWarning("Too many values supplied for parameter 'titles': the limit is {$max}"); } // When working in multi-page non-enumeration mode, // limit to the latest revision only $this->addWhere('page_id=rev_page'); $this->addWhere('page_latest=rev_id'); // Get all page IDs $this->addWhereFld('page_id', array_keys($titles)); // Every time someone relies on equality propagation, god kills a kitten :) $this->addWhereFld('rev_page', array_keys($titles)); if (!is_null($params['continue'])) { $cont = explode('|', $params['continue']); if (count($cont) != 2) { $this->dieUsage('Invalid continue param. You should pass the original ' . 'value returned by the previous query', '_badcontinue'); } $pageid = intval($cont[0]); $revid = intval($cont[1]); $this->addWhere("rev_page > {$pageid} OR " . "(rev_page = {$pageid} AND " . "rev_id >= {$revid})"); } $this->addOption('ORDER BY', array('rev_page', 'rev_id')); // assumption testing -- we should never get more then $pageCount rows. $limit = $pageCount; } else { ApiBase::dieDebug(__METHOD__, 'param validation?'); } $this->addOption('LIMIT', $limit + 1); $this->addOption('USE INDEX', $index); $count = 0; $res = $this->select(__METHOD__); foreach ($res as $row) { if (++$count > $limit) { // We've reached the one extra which shows that there are additional pages to be had. Stop here... if (!$enumRevMode) { ApiBase::dieDebug(__METHOD__, 'Got more rows then expected'); // bug report } $this->setContinueEnumParameter('continue', intval($row->rev_id)); break; } $fit = $this->addPageSubItem($row->rev_page, $this->extractRowInfo($row), 'rev'); if (!$fit) { if ($enumRevMode) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } elseif ($revCount > 0) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } else { $this->setContinueEnumParameter('continue', intval($row->rev_page) . '|' . intval($row->rev_id)); } break; } } }
protected function run(ApiPageSet $resultPageSet = null) { $params = $this->extractRequestParams(false); // If any of those parameters are used, work in 'enumeration' mode. // Enum mode can only be used when exactly one page is provided. // Enumerating revisions on multiple pages make it extremely // difficult to manage continuations and require additional SQL indexes $enumRevMode = !is_null($params['user']) || !is_null($params['excludeuser']) || !is_null($params['limit']) || !is_null($params['startid']) || !is_null($params['endid']) || $params['dir'] === 'newer' || !is_null($params['start']) || !is_null($params['end']); $pageSet = $this->getPageSet(); $pageCount = $pageSet->getGoodTitleCount(); $revCount = $pageSet->getRevisionCount(); // Optimization -- nothing to do if ($revCount === 0 && $pageCount === 0) { // Nothing to do return; } if ($revCount > 0 && count($pageSet->getLiveRevisionIDs()) === 0) { // We're in revisions mode but all given revisions are deleted return; } if ($revCount > 0 && $enumRevMode) { $this->dieUsage('The revids= parameter may not be used with the list options ' . '(limit, startid, endid, dirNewer, start, end).', 'revids'); } if ($pageCount > 1 && $enumRevMode) { $this->dieUsage('titles, pageids or a generator was used to supply multiple pages, ' . 'but the limit, startid, endid, dirNewer, user, excludeuser, start ' . 'and end parameters may only be used on a single page.', 'multpages'); } // In non-enum mode, rvlimit can't be directly used. Use the maximum // allowed value. if (!$enumRevMode) { $this->setParsedLimit = false; $params['limit'] = 'max'; } $db = $this->getDB(); $this->addTables(array('revision', 'page')); $this->addJoinConds(array('page' => array('INNER JOIN', array('page_id = rev_page')))); if ($resultPageSet === null) { $this->parseParameters($params); $this->token = $params['token']; $this->addFields(Revision::selectFields()); if ($this->token !== null || $pageCount > 0) { $this->addFields(Revision::selectPageFields()); } } else { $this->limit = $this->getParameter('limit') ?: 10; $this->addFields(array('rev_id', 'rev_page')); } if ($this->fld_tags) { $this->addTables('tag_summary'); $this->addJoinConds(array('tag_summary' => array('LEFT JOIN', array('rev_id=ts_rev_id')))); $this->addFields('ts_tags'); } if (!is_null($params['tag'])) { $this->addTables('change_tag'); $this->addJoinConds(array('change_tag' => array('INNER JOIN', array('rev_id=ct_rev_id')))); $this->addWhereFld('ct_tag', $params['tag']); } if ($this->fetchContent) { // For each page we will request, the user must have read rights for that page $user = $this->getUser(); /** @var $title Title */ foreach ($pageSet->getGoodTitles() as $title) { if (!$title->userCan('read', $user)) { $this->dieUsage('The current user is not allowed to read ' . $title->getPrefixedText(), 'accessdenied'); } } $this->addTables('text'); $this->addJoinConds(array('text' => array('INNER JOIN', array('rev_text_id=old_id')))); $this->addFields('old_id'); $this->addFields(Revision::selectTextFields()); } // add user name, if needed if ($this->fld_user) { $this->addTables('user'); $this->addJoinConds(array('user' => Revision::userJoinCond())); $this->addFields(Revision::selectUserFields()); } if ($enumRevMode) { // This is mostly to prevent parameter errors (and optimize SQL?) if (!is_null($params['startid']) && !is_null($params['start'])) { $this->dieUsage('start and startid cannot be used together', 'badparams'); } if (!is_null($params['endid']) && !is_null($params['end'])) { $this->dieUsage('end and endid cannot be used together', 'badparams'); } if (!is_null($params['user']) && !is_null($params['excludeuser'])) { $this->dieUsage('user and excludeuser cannot be used together', 'badparams'); } // Continuing effectively uses startid. But we can't use rvstartid // directly, because there is no way to tell the client to ''not'' // send rvstart if it sent it in the original query. So instead we // send the continuation startid as rvcontinue, and ignore both // rvstart and rvstartid when that is supplied. if (!is_null($params['continue'])) { $params['startid'] = $params['continue']; $params['start'] = null; } // This code makes an assumption that sorting by rev_id and rev_timestamp produces // the same result. This way users may request revisions starting at a given time, // but to page through results use the rev_id returned after each page. // Switching to rev_id removes the potential problem of having more than // one row with the same timestamp for the same page. // The order needs to be the same as start parameter to avoid SQL filesort. if (is_null($params['startid']) && is_null($params['endid'])) { $this->addTimestampWhereRange('rev_timestamp', $params['dir'], $params['start'], $params['end']); } else { $this->addWhereRange('rev_id', $params['dir'], $params['startid'], $params['endid']); // One of start and end can be set // If neither is set, this does nothing $this->addTimestampWhereRange('rev_timestamp', $params['dir'], $params['start'], $params['end'], false); } // There is only one ID, use it $ids = array_keys($pageSet->getGoodTitles()); $this->addWhereFld('rev_page', reset($ids)); if (!is_null($params['user'])) { $this->addWhereFld('rev_user_text', $params['user']); } elseif (!is_null($params['excludeuser'])) { $this->addWhere('rev_user_text != ' . $db->addQuotes($params['excludeuser'])); } if (!is_null($params['user']) || !is_null($params['excludeuser'])) { // Paranoia: avoid brute force searches (bug 17342) if (!$this->getUser()->isAllowed('deletedhistory')) { $bitmask = Revision::DELETED_USER; } elseif (!$this->getUser()->isAllowedAny('suppressrevision', 'viewsuppressed')) { $bitmask = Revision::DELETED_USER | Revision::DELETED_RESTRICTED; } else { $bitmask = 0; } if ($bitmask) { $this->addWhere($db->bitAnd('rev_deleted', $bitmask) . " != {$bitmask}"); } } } elseif ($revCount > 0) { $revs = $pageSet->getLiveRevisionIDs(); // Get all revision IDs $this->addWhereFld('rev_id', array_keys($revs)); if (!is_null($params['continue'])) { $this->addWhere('rev_id >= ' . intval($params['continue'])); } $this->addOption('ORDER BY', 'rev_id'); } elseif ($pageCount > 0) { $titles = $pageSet->getGoodTitles(); // When working in multi-page non-enumeration mode, // limit to the latest revision only $this->addWhere('page_latest=rev_id'); // Get all page IDs $this->addWhereFld('page_id', array_keys($titles)); // Every time someone relies on equality propagation, god kills a kitten :) $this->addWhereFld('rev_page', array_keys($titles)); if (!is_null($params['continue'])) { $cont = explode('|', $params['continue']); $this->dieContinueUsageIf(count($cont) != 2); $pageid = intval($cont[0]); $revid = intval($cont[1]); $this->addWhere("rev_page > {$pageid} OR " . "(rev_page = {$pageid} AND " . "rev_id >= {$revid})"); } $this->addOption('ORDER BY', array('rev_page', 'rev_id')); } else { ApiBase::dieDebug(__METHOD__, 'param validation?'); } $this->addOption('LIMIT', $this->limit + 1); $count = 0; $generated = array(); $res = $this->select(__METHOD__); foreach ($res as $row) { if (++$count > $this->limit) { // We've reached the one extra which shows that there are // additional pages to be had. Stop here... if ($enumRevMode) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } elseif ($revCount > 0) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } else { $this->setContinueEnumParameter('continue', intval($row->rev_page) . '|' . intval($row->rev_id)); } break; } if ($resultPageSet !== null) { $generated[] = $row->rev_id; } else { $revision = new Revision($row); $rev = $this->extractRevisionInfo($revision, $row); if ($this->token !== null) { $title = $revision->getTitle(); $tokenFunctions = $this->getTokenFunctions(); foreach ($this->token as $t) { $val = call_user_func($tokenFunctions[$t], $title->getArticleID(), $title, $revision); if ($val === false) { $this->setWarning("Action '{$t}' is not allowed for the current user"); } else { $rev[$t . 'token'] = $val; } } } $fit = $this->addPageSubItem($row->rev_page, $rev, 'rev'); if (!$fit) { if ($enumRevMode) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } elseif ($revCount > 0) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } else { $this->setContinueEnumParameter('continue', intval($row->rev_page) . '|' . intval($row->rev_id)); } break; } } } if ($resultPageSet !== null) { $resultPageSet->populateFromRevisionIDs($generated); } }
public function execute() { $params = $this->extractRequestParams(false); // If any of those parameters are used, work in 'enumeration' mode. // Enum mode can only be used when exactly one page is provided. // Enumerating revisions on multiple pages make it extremely // difficult to manage continuations and require additional SQL indexes $enumRevMode = !is_null($params['user']) || !is_null($params['excludeuser']) || !is_null($params['limit']) || !is_null($params['startid']) || !is_null($params['endid']) || $params['dir'] === 'newer' || !is_null($params['start']) || !is_null($params['end']); $pageSet = $this->getPageSet(); $pageCount = $pageSet->getGoodTitleCount(); $revCount = $pageSet->getRevisionCount(); // Optimization -- nothing to do if ($revCount === 0 && $pageCount === 0) { return; } if ($revCount > 0 && $enumRevMode) { $this->dieUsage('The revids= parameter may not be used with the list options (limit, startid, endid, dirNewer, start, end).', 'revids'); } if ($pageCount > 1 && $enumRevMode) { $this->dieUsage('titles, pageids or a generator was used to supply multiple pages, but the limit, startid, endid, dirNewer, user, excludeuser, start and end parameters may only be used on a single page.', 'multpages'); } $this->addTables('revision'); $this->addFields(Revision::selectFields()); $this->addTables('page'); $this->addWhere('page_id = rev_page'); $prop = array_flip($params['prop']); // Optional fields $this->fld_ids = isset($prop['ids']); // $this->addFieldsIf('rev_text_id', $this->fld_ids); // should this be exposed? $this->fld_flags = isset($prop['flags']); $this->fld_timestamp = isset($prop['timestamp']); $this->fld_comment = isset($prop['comment']); $this->fld_size = isset($prop['size']); $this->fld_user = isset($prop['user']); $this->token = $params['token']; if (!is_null($this->token) || $pageCount > 0) { $this->addFields(Revision::selectPageFields()); } if (isset($prop['content'])) { // For each page we will request, the user must have read rights for that page foreach ($pageSet->getGoodTitles() as $title) { if (!$title->userCanRead()) { $this->dieUsage('The current user is not allowed to read ' . $title->getPrefixedText(), 'accessdenied'); } } $this->addTables('text'); $this->addWhere('rev_text_id=old_id'); $this->addFields('old_id'); $this->addFields(Revision::selectTextFields()); $this->fld_content = true; $this->expandTemplates = $params['expandtemplates']; $this->generateXML = $params['generatexml']; if (isset($params['section'])) { $this->section = $params['section']; } else { $this->section = false; } } $userMax = $this->fld_content ? ApiBase::LIMIT_SML1 : ApiBase::LIMIT_BIG1; $botMax = $this->fld_content ? ApiBase::LIMIT_SML2 : ApiBase::LIMIT_BIG2; $limit = $params['limit']; if ($limit == 'max') { $limit = $this->getMain()->canApiHighLimits() ? $botMax : $userMax; $this->getResult()->addValue('limits', $this->getModuleName(), $limit); } if ($enumRevMode) { // This is mostly to prevent parameter errors (and optimize SQL?) if (!is_null($params['startid']) && !is_null($params['start'])) { $this->dieUsage('start and startid cannot be used together', 'badparams'); } if (!is_null($params['endid']) && !is_null($params['end'])) { $this->dieUsage('end and endid cannot be used together', 'badparams'); } if (!is_null($params['user']) && !is_null($params['excludeuser'])) { $this->dieUsage('user and excludeuser cannot be used together', 'badparams'); } // This code makes an assumption that sorting by rev_id and rev_timestamp produces // the same result. This way users may request revisions starting at a given time, // but to page through results use the rev_id returned after each page. // Switching to rev_id removes the potential problem of having more than // one row with the same timestamp for the same page. // The order needs to be the same as start parameter to avoid SQL filesort. if (is_null($params['startid']) && is_null($params['endid'])) { $this->addWhereRange('rev_timestamp', $params['dir'], $params['start'], $params['end']); } else { $this->addWhereRange('rev_id', $params['dir'], $params['startid'], $params['endid']); } // must manually initialize unset limit if (is_null($limit)) { $limit = 10; } $this->validateLimit('limit', $limit, 1, $userMax, $botMax); // There is only one ID, use it $this->addWhereFld('rev_page', current(array_keys($pageSet->getGoodTitles()))); if (!is_null($params['user'])) { $this->addWhereFld('rev_user_text', $params['user']); } elseif (!is_null($params['excludeuser'])) { $this->addWhere('rev_user_text != ' . $this->getDB()->addQuotes($params['excludeuser'])); } } elseif ($revCount > 0) { $max = $this->getMain()->canApiHighLimits() ? $botMax : $userMax; $revs = $pageSet->getRevisionIDs(); if (self::truncateArray($revs, $max)) { $this->setWarning("Too many values supplied for parameter 'revids': the limit is {$max}"); } // Get all revision IDs $this->addWhereFld('rev_id', array_keys($revs)); // assumption testing -- we should never get more then $revCount rows. $limit = $revCount; } elseif ($pageCount > 0) { $max = $this->getMain()->canApiHighLimits() ? $botMax : $userMax; $titles = $pageSet->getGoodTitles(); if (self::truncateArray($titles, $max)) { $this->setWarning("Too many values supplied for parameter 'titles': the limit is {$max}"); } // When working in multi-page non-enumeration mode, // limit to the latest revision only $this->addWhere('page_id=rev_page'); $this->addWhere('page_latest=rev_id'); // Get all page IDs $this->addWhereFld('page_id', array_keys($titles)); // assumption testing -- we should never get more then $pageCount rows. $limit = $pageCount; } else { ApiBase::dieDebug(__METHOD__, 'param validation?'); } $this->addOption('LIMIT', $limit + 1); $data = array(); $count = 0; $res = $this->select(__METHOD__); $db = $this->getDB(); while ($row = $db->fetchObject($res)) { if (++$count > $limit) { // We've reached the one extra which shows that there are additional pages to be had. Stop here... if (!$enumRevMode) { ApiBase::dieDebug(__METHOD__, 'Got more rows then expected'); } // bug report $this->setContinueEnumParameter('startid', intval($row->rev_id)); break; } $revision = new Revision($row); $this->getResult()->addValue(array('query', 'pages', $revision->getPage(), 'revisions'), null, $this->extractRowInfo($revision)); } $db->freeResult($res); // Ensure that all revisions are shown as '<rev>' elements $result = $this->getResult(); if ($result->getIsRawMode()) { $data =& $result->getData(); foreach ($data['query']['pages'] as &$page) { if (is_array($page) && array_key_exists('revisions', $page)) { $result->setIndexedTagName($page['revisions'], 'rev'); } } } }
protected function run(ApiPageSet $resultPageSet = null) { $params = $this->extractRequestParams(false); // If any of those parameters are used, work in 'enumeration' mode. // Enum mode can only be used when exactly one page is provided. // Enumerating revisions on multiple pages make it extremely // difficult to manage continuations and require additional SQL indexes $enumRevMode = $params['user'] !== null || $params['excludeuser'] !== null || $params['limit'] !== null || $params['startid'] !== null || $params['endid'] !== null || $params['dir'] === 'newer' || $params['start'] !== null || $params['end'] !== null; $pageSet = $this->getPageSet(); $pageCount = $pageSet->getGoodTitleCount(); $revCount = $pageSet->getRevisionCount(); // Optimization -- nothing to do if ($revCount === 0 && $pageCount === 0) { // Nothing to do return; } if ($revCount > 0 && count($pageSet->getLiveRevisionIDs()) === 0) { // We're in revisions mode but all given revisions are deleted return; } if ($revCount > 0 && $enumRevMode) { $this->dieUsage('The revids= parameter may not be used with the list options ' . '(limit, startid, endid, dirNewer, start, end).', 'revids'); } if ($pageCount > 1 && $enumRevMode) { $this->dieUsage('titles, pageids or a generator was used to supply multiple pages, ' . 'but the limit, startid, endid, dirNewer, user, excludeuser, start ' . 'and end parameters may only be used on a single page.', 'multpages'); } // In non-enum mode, rvlimit can't be directly used. Use the maximum // allowed value. if (!$enumRevMode) { $this->setParsedLimit = false; $params['limit'] = 'max'; } $db = $this->getDB(); $this->addTables(['revision', 'page']); $this->addJoinConds(['page' => ['INNER JOIN', ['page_id = rev_page']]]); if ($resultPageSet === null) { $this->parseParameters($params); $this->token = $params['token']; $this->addFields(Revision::selectFields()); if ($this->token !== null || $pageCount > 0) { $this->addFields(Revision::selectPageFields()); } } else { $this->limit = $this->getParameter('limit') ?: 10; $this->addFields(['rev_id', 'rev_timestamp', 'rev_page']); } if ($this->fld_tags) { $this->addTables('tag_summary'); $this->addJoinConds(['tag_summary' => ['LEFT JOIN', ['rev_id=ts_rev_id']]]); $this->addFields('ts_tags'); } if ($params['tag'] !== null) { $this->addTables('change_tag'); $this->addJoinConds(['change_tag' => ['INNER JOIN', ['rev_id=ct_rev_id']]]); $this->addWhereFld('ct_tag', $params['tag']); } if ($this->fetchContent) { // For each page we will request, the user must have read rights for that page $user = $this->getUser(); /** @var $title Title */ foreach ($pageSet->getGoodTitles() as $title) { if (!$title->userCan('read', $user)) { $this->dieUsage('The current user is not allowed to read ' . $title->getPrefixedText(), 'accessdenied'); } } $this->addTables('text'); $this->addJoinConds(['text' => ['INNER JOIN', ['rev_text_id=old_id']]]); $this->addFields('old_id'); $this->addFields(Revision::selectTextFields()); } // add user name, if needed if ($this->fld_user) { $this->addTables('user'); $this->addJoinConds(['user' => Revision::userJoinCond()]); $this->addFields(Revision::selectUserFields()); } if ($enumRevMode) { // Indexes targeted: // page_timestamp if we don't have rvuser // page_user_timestamp if we have a logged-in rvuser // page_timestamp or usertext_timestamp if we have an IP rvuser // This is mostly to prevent parameter errors (and optimize SQL?) if ($params['startid'] !== null && $params['start'] !== null) { $this->dieUsage('start and startid cannot be used together', 'badparams'); } if ($params['endid'] !== null && $params['end'] !== null) { $this->dieUsage('end and endid cannot be used together', 'badparams'); } if ($params['user'] !== null && $params['excludeuser'] !== null) { $this->dieUsage('user and excludeuser cannot be used together', 'badparams'); } if ($params['continue'] !== null) { $cont = explode('|', $params['continue']); $this->dieContinueUsageIf(count($cont) != 2); $op = $params['dir'] === 'newer' ? '>' : '<'; $continueTimestamp = $db->addQuotes($db->timestamp($cont[0])); $continueId = (int) $cont[1]; $this->dieContinueUsageIf($continueId != $cont[1]); $this->addWhere("rev_timestamp {$op} {$continueTimestamp} OR " . "(rev_timestamp = {$continueTimestamp} AND " . "rev_id {$op}= {$continueId})"); } $this->addTimestampWhereRange('rev_timestamp', $params['dir'], $params['start'], $params['end']); $this->addWhereRange('rev_id', $params['dir'], $params['startid'], $params['endid']); // There is only one ID, use it $ids = array_keys($pageSet->getGoodTitles()); $this->addWhereFld('rev_page', reset($ids)); if ($params['user'] !== null) { $user = User::newFromName($params['user']); if ($user && $user->getId() > 0) { $this->addWhereFld('rev_user', $user->getId()); } else { $this->addWhereFld('rev_user_text', $params['user']); } } elseif ($params['excludeuser'] !== null) { $user = User::newFromName($params['excludeuser']); if ($user && $user->getId() > 0) { $this->addWhere('rev_user != ' . $user->getId()); } else { $this->addWhere('rev_user_text != ' . $db->addQuotes($params['excludeuser'])); } } if ($params['user'] !== null || $params['excludeuser'] !== null) { // Paranoia: avoid brute force searches (bug 17342) if (!$this->getUser()->isAllowed('deletedhistory')) { $bitmask = Revision::DELETED_USER; } elseif (!$this->getUser()->isAllowedAny('suppressrevision', 'viewsuppressed')) { $bitmask = Revision::DELETED_USER | Revision::DELETED_RESTRICTED; } else { $bitmask = 0; } if ($bitmask) { $this->addWhere($db->bitAnd('rev_deleted', $bitmask) . " != {$bitmask}"); } } } elseif ($revCount > 0) { // Always targets the PRIMARY index $revs = $pageSet->getLiveRevisionIDs(); // Get all revision IDs $this->addWhereFld('rev_id', array_keys($revs)); if ($params['continue'] !== null) { $this->addWhere('rev_id >= ' . intval($params['continue'])); } $this->addOption('ORDER BY', 'rev_id'); } elseif ($pageCount > 0) { // Always targets the rev_page_id index $titles = $pageSet->getGoodTitles(); // When working in multi-page non-enumeration mode, // limit to the latest revision only $this->addWhere('page_latest=rev_id'); // Get all page IDs $this->addWhereFld('page_id', array_keys($titles)); // Every time someone relies on equality propagation, god kills a kitten :) $this->addWhereFld('rev_page', array_keys($titles)); if ($params['continue'] !== null) { $cont = explode('|', $params['continue']); $this->dieContinueUsageIf(count($cont) != 2); $pageid = intval($cont[0]); $revid = intval($cont[1]); $this->addWhere("rev_page > {$pageid} OR " . "(rev_page = {$pageid} AND " . "rev_id >= {$revid})"); } $this->addOption('ORDER BY', ['rev_page', 'rev_id']); } else { ApiBase::dieDebug(__METHOD__, 'param validation?'); } $this->addOption('LIMIT', $this->limit + 1); $count = 0; $generated = []; $hookData = []; $res = $this->select(__METHOD__, [], $hookData); foreach ($res as $row) { if (++$count > $this->limit) { // We've reached the one extra which shows that there are // additional pages to be had. Stop here... if ($enumRevMode) { $this->setContinueEnumParameter('continue', $row->rev_timestamp . '|' . intval($row->rev_id)); } elseif ($revCount > 0) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } else { $this->setContinueEnumParameter('continue', intval($row->rev_page) . '|' . intval($row->rev_id)); } break; } if ($resultPageSet !== null) { $generated[] = $row->rev_id; } else { $revision = new Revision($row); $rev = $this->extractRevisionInfo($revision, $row); if ($this->token !== null) { $title = $revision->getTitle(); $tokenFunctions = $this->getTokenFunctions(); foreach ($this->token as $t) { $val = call_user_func($tokenFunctions[$t], $title->getArticleID(), $title, $revision); if ($val === false) { $this->setWarning("Action '{$t}' is not allowed for the current user"); } else { $rev[$t . 'token'] = $val; } } } $fit = $this->processRow($row, $rev, $hookData) && $this->addPageSubItem($row->rev_page, $rev, 'rev'); if (!$fit) { if ($enumRevMode) { $this->setContinueEnumParameter('continue', $row->rev_timestamp . '|' . intval($row->rev_id)); } elseif ($revCount > 0) { $this->setContinueEnumParameter('continue', intval($row->rev_id)); } else { $this->setContinueEnumParameter('continue', intval($row->rev_page) . '|' . intval($row->rev_id)); } break; } } } if ($resultPageSet !== null) { $resultPageSet->populateFromRevisionIDs($generated); } }
public function execute() { global $wgContLang; $dbw = TranslationMemoryUpdater::getDatabaseHandle(); if ( $dbw === null ) { $this->error( "Database file not configured" ); $this->exit(); } $dbw->setFlag( DBO_TRX ); // HUGE speed improvement $groups = MessageGroups::singleton()->getGroups(); // TODO: encapsulate list of valid language codes $languages = Language::getLanguageNames( false ); unset( $languages['en'] ); foreach ( $groups as $id => $group ) { if ( $group->isMeta() ) { continue; } $this->output( "Processing: {$group->getLabel()} ", $id ); $capitalized = MWNamespace::isCapitalized( $group->getNamespace() ); $ns_text = $wgContLang->getNsText( $group->getNamespace() ); $definitions = $group->load( $group->getSourceLanguage() ); foreach ( $definitions as $key => $definition ) { // TODO: would be nice to do key normalisation closer to the message groups, to avoid transforming back and forth. // But how to preserve the original keys... $key = strtr( $key, ' ', '_' ); $key = $capitalized ? $wgContLang->ucfirst( $key ) : $key; $dbr = wfGetDB( DB_SLAVE ); $tables = array( 'page', 'revision', 'text' ); // selectFields to stfu Revision class $vars = array_merge( Revision::selectTextFields(), array( 'page_title' ), Revision::selectFields() ); $conds = array( 'page_latest = rev_id', 'rev_text_id = old_id', 'page_namespace' => $group->getNamespace(), 'page_title ' . $dbr->buildLike( "$key/", $dbr->anyString() ) ); $res = $dbr->select( $tables, $vars, $conds, __METHOD__ ); // Assure that there is at least one translation if ( $res->numRows() < 1 ) { continue; } $insert = array( 'text' => $definition, 'context' => "$ns_text:$key", 'length' => strlen( $definition ), 'lang' => $group->getSourceLanguage(), ); $source_id = $dbw->selectField( '`sources`', 'sid', $insert, __METHOD__ ); if ( $source_id === false ) { $dbw->insert( '`sources`', $insert, __METHOD__ ); $source_id = $dbw->insertId(); } $this->output( ' ', $id ); foreach ( $res as $row ) { list( , $code ) = TranslateUtils::figureMessage( $row->page_title ); $revision = new Revision( $row ); $insert = array( 'text' => $revision->getText(), 'lang' => $code, 'time' => wfTimestamp(), 'sid' => $source_id ); // We only do SQlite which doesn't need to know unique indexes $dbw->replace( '`targets`', null, $insert, __METHOD__ ); } $this->output( "{$res->numRows()}", $id ); } // each translation> $dbw->commit(); } // each group> }
public function fetchTranslatorsPortal( $natives ) { $titles = array(); foreach ( $natives as $code => $_ ) { $titles[] = Title::capitalize( $code, NS_PORTAL ) . '/translators'; } $dbr = wfGetDB( DB_SLAVE ); $tables = array( 'page', 'revision', 'text' ); $vars = array_merge( Revision::selectTextFields(), array( 'page_title', 'page_namespace' ), Revision::selectFields() ); $conds = array( 'page_latest = rev_id', 'rev_text_id = old_id', 'page_namespace' => NS_PORTAL, 'page_title' => $titles, ); $res = $dbr->select( $tables, $vars, $conds, __METHOD__ ); $users = array(); $lb = new LinkBatch; foreach ( $res as $row ) { $rev = new Revision( $row ); $text = $rev->getText(); $code = strtolower( preg_replace( '!/translators$!', '', $row->page_title ) ); preg_match_all( '!{{[Uu]ser\|([^}|]+)!', $text, $matches, PREG_SET_ORDER ); foreach ( $matches as $match ) { $user = Title::capitalize( $match[1], NS_USER ); $lb->add( NS_USER, $user ); $lb->add( NS_USER_TALK, $user ); if ( !isset( $users[$code] ) ) $users[$code] = array(); $users[$code][strtr( $user, '_', ' ' )] = -1; } } $lb->execute(); return $users; }
/** * Fetch an article from this or another local MediaWiki database. * This is probably *very* fragile, and shouldn't be used perhaps. * * @param string $wiki * @param string $article * @return string */ function getArticleText($wiki, $article) { wfDebugLog('SpamBlacklist', "Fetching {$this->getBlacklistType()} blacklist from '{$article}' on '{$wiki}'...\n"); $title = Title::newFromText($article); // Load all the relevant tables from the correct DB. // This assumes that old_text is the actual text or // that the external store system is at least unified. $row = wfGetDB(DB_SLAVE, array(), $wiki)->selectRow(array('page', 'revision', 'text'), array_merge(Revision::selectFields(), Revision::selectPageFields(), Revision::selectTextFields()), array('page_namespace' => $title->getNamespace(), 'page_title' => $title->getDBkey(), 'rev_id=page_latest', 'old_id=rev_text_id'), __METHOD__); return $row ? Revision::newFromRow($row)->getText() : false; }