/** * Filter links according to parameters. * * @param string $type Type of filter (eg. tags, permalink, etc.). * @param mixed $request Filter content. * @param bool $casesensitive Optional: Perform case sensitive filter if true. * @param bool $privateonly Optional: Only returns private links if true. * * @return array filtered link list. */ public function filter($type, $request, $casesensitive = false, $privateonly = false) { switch ($type) { case self::$FILTER_HASH: return $this->filterSmallHash($request); case self::$FILTER_TAG | self::$FILTER_TEXT: if (!empty($request)) { $filtered = $this->links; if (isset($request[0])) { $filtered = $this->filterTags($request[0], $casesensitive, $privateonly); } if (isset($request[1])) { $lf = new LinkFilter($filtered); $filtered = $lf->filterFulltext($request[1], $privateonly); } return $filtered; } return $this->noFilter($privateonly); case self::$FILTER_TEXT: return $this->filterFulltext($request, $privateonly); case self::$FILTER_TAG: return $this->filterTags($request, $casesensitive, $privateonly); case self::$FILTER_DAY: return $this->filterDay($request); default: return $this->noFilter($privateonly); } }
/** * Check whether $content contains a link to $filterEntry * * @param Content $content Content to check * @param string $filterEntry Domainparts, see makeRegex() for more details * @return int 0 if no match or 1 if there's at least one match */ static function matchEntry(Content $content, $filterEntry) { if (!$content instanceof TextContent) { // TODO: handle other types of content too. // Maybe create ContentHandler::matchFilter( LinkFilter ). // Think about a common base class for LinkFilter and MagicWord. return 0; } $text = $content->getNativeData(); $regex = LinkFilter::makeRegex($filterEntry); return preg_match($regex, $text); }
/** * Return an appropriately formatted LIKE query and the clause */ static function mungeQuery($query, $prot) { $field = 'el_index'; $rv = LinkFilter::makeLike($query, $prot); if ($rv === false) { //makeLike doesn't handle wildcard in IP, so we'll have to munge here. if (preg_match('/^(:?[0-9]{1,3}\\.)+\\*\\s*$|^(:?[0-9]{1,3}\\.){3}[0-9]{1,3}:[0-9]*\\*\\s*$/', $query)) { $rv = $prot . rtrim($query, " \t*") . '%'; $field = 'el_to'; } } return array($rv, $field); }
function cleanupArticle($id, $domain) { $title = Title::newFromID($id); if (!$title) { print "Internal error: no page for ID {$id}\n"; return; } print $title->getPrefixedDBkey() . " ..."; $rev = Revision::newFromTitle($title); $reverted = false; $revId = $rev->getId(); $currentRevId = $revId; $regex = LinkFilter::makeRegex($domain); while ($rev && preg_match($regex, $rev->getText())) { # Revision::getPrevious can't be used in this way before MW 1.6 (Revision.php 1.26) #$rev = $rev->getPrevious(); $revId = $title->getPreviousRevisionID($revId); if ($revId) { $rev = Revision::newFromTitle($title, $revId); } else { $rev = false; } } if ($revId == $currentRevId) { // The regex didn't match the current article text // This happens e.g. when a link comes from a template rather than the page itself print "False match\n"; } else { $dbw =& wfGetDB(DB_MASTER); $dbw->immediateBegin(); if (!$rev) { // Didn't find a non-spammy revision, blank the page print "blanking\n"; $article = new Article($title); $article->updateArticle('', wfMsg('spam_blanking', $domain), false, false); } else { // Revert to this revision print "reverting\n"; $article = new Article($title); $article->updateArticle($rev->getText(), wfMsg('spam_reverting', $domain), false, false); } $dbw->immediateCommit(); wfDoUpdates(); } }
function getQueryInfo() { global $wgMiserMode; $dbr = wfGetDB(DB_SLAVE); // strip everything past first wildcard, so that // index-based-only lookup would be done list($this->mMungedQuery, $clause) = self::mungeQuery($this->mQuery, $this->mProt); if ($this->mMungedQuery === false) { // Invalid query; return no results return array('tables' => 'page', 'fields' => 'page_id', 'conds' => '0=1'); } $stripped = LinkFilter::keepOneWildcard($this->mMungedQuery); $like = $dbr->buildLike($stripped); $retval = array('tables' => array('page', 'externallinks'), 'fields' => array('namespace' => 'page_namespace', 'title' => 'page_title', 'value' => 'el_index', 'url' => 'el_to'), 'conds' => array('page_id = el_from', "{$clause} {$like}"), 'options' => array('USE INDEX' => $clause)); if (isset($this->mNs) && !$wgMiserMode) { $retval['conds']['page_namespace'] = $this->mNs; } return $retval; }
function getSQL() { global $wgMiserMode; $dbr = wfGetDB(DB_SLAVE); $page = $dbr->tableName('page'); $externallinks = $dbr->tableName('externallinks'); /* strip everything past first wildcard, so that index-based-only lookup would be done */ list($munged, $clause) = self::mungeQuery($this->mQuery, $this->mProt); $stripped = LinkFilter::keepOneWildcard($munged); $like = $dbr->buildLike($stripped); $encSQL = ''; if (isset($this->mNs) && !$wgMiserMode) { $encSQL = 'AND page_namespace=' . $dbr->addQuotes($this->mNs); } $use_index = $dbr->useIndexClause($clause); return "SELECT\n\t\t\t\tpage_namespace AS namespace,\n\t\t\t\tpage_title AS title,\n\t\t\t\tel_index AS value,\n\t\t\t\tel_to AS url\n\t\t\tFROM\n\t\t\t\t{$page},\n\t\t\t\t{$externallinks} {$use_index}\n\t\t\tWHERE\n\t\t\t\tpage_id=el_from\n\t\t\t\tAND {$clause} {$like}\n\t\t\t\t{$encSQL}"; }
/** * Filter links according to search parameters. * * @param array $filterRequest Search request content. Supported keys: * - searchtags: list of tags * - searchterm: term search * @param bool $casesensitive Optional: Perform case sensitive filter * @param bool $privateonly Optional: Returns private links only if true. * * @return array filtered links, all links if no suitable filter was provided. */ public function filterSearch($filterRequest = array(), $casesensitive = false, $privateonly = false) { // Filter link database according to parameters. $searchtags = !empty($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : ''; $searchterm = !empty($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : ''; // Search tags + fullsearch. if (empty($type) && !empty($searchtags) && !empty($searchterm)) { $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; $request = array($searchtags, $searchterm); } elseif (!empty($searchtags)) { $type = LinkFilter::$FILTER_TAG; $request = $searchtags; } elseif (!empty($searchterm)) { $type = LinkFilter::$FILTER_TEXT; $request = $searchterm; } else { $type = ''; $request = ''; } $linkFilter = new LinkFilter($this->_links); return $linkFilter->filter($type, $request, $casesensitive, $privateonly); }
/** * Check whether $text contains a link to $filterEntry * * @param $text String: text to check * @param $filterEntry String: domainparts, see makeRegex() for more details * @return Integer: 0 if no match or 1 if there's at least one match */ static function matchEntry($text, $filterEntry) { $regex = LinkFilter::makeRegex($filterEntry); return preg_match($regex, $text); }
public function getQueryInfo() { $dbr = wfGetDB(DB_SLAVE); // strip everything past first wildcard, so that // index-based-only lookup would be done list($this->mungedQuery, $clause) = self::mungeQuery($this->mQuery, $this->mProt); if ($this->mungedQuery === false) { // Invalid query; return no results return ['tables' => 'page', 'fields' => 'page_id', 'conds' => '0=1']; } $stripped = LinkFilter::keepOneWildcard($this->mungedQuery); $like = $dbr->buildLike($stripped); $retval = ['tables' => ['page', 'externallinks'], 'fields' => ['namespace' => 'page_namespace', 'title' => 'page_title', 'value' => 'el_index', 'url' => 'el_to'], 'conds' => ['page_id = el_from', "{$clause} {$like}"], 'options' => ['USE INDEX' => $clause]]; if ($this->mNs !== null && !$this->getConfig()->get('MiserMode')) { $retval['conds']['page_namespace'] = $this->mNs; } return $retval; }
private function run($resultPageSet = null) { $params = $this->extractRequestParams(); $protocol = $params['protocol']; $query = $params['query']; // Find the right prefix global $wgUrlProtocols; if ($protocol && !in_array($protocol, $wgUrlProtocols)) { foreach ($wgUrlProtocols as $p) { if (substr($p, 0, strlen($protocol)) === $protocol) { $protocol = $p; break; } } } else { $protocol = null; } $db = $this->getDB(); $this->addTables(array('page', 'externallinks')); // must be in this order for 'USE INDEX' $this->addOption('USE INDEX', 'el_index'); $this->addWhere('page_id=el_from'); $this->addWhereFld('page_namespace', $params['namespace']); if (!is_null($query) || $query != '') { if (is_null($protocol)) { $protocol = 'http://'; } $likeQuery = LinkFilter::makeLikeArray($query, $protocol); if (!$likeQuery) { $this->dieUsage('Invalid query', 'bad_query'); } $likeQuery = LinkFilter::keepOneWildcard($likeQuery); $this->addWhere('el_index ' . $db->buildLike($likeQuery)); } elseif (!is_null($protocol)) { $this->addWhere('el_index ' . $db->buildLike("{$protocol}", $db->anyString())); } $prop = array_flip($params['prop']); $fld_ids = isset($prop['ids']); $fld_title = isset($prop['title']); $fld_url = isset($prop['url']); if (is_null($resultPageSet)) { $this->addFields(array('page_id', 'page_namespace', 'page_title')); $this->addFieldsIf('el_to', $fld_url); } else { $this->addFields($resultPageSet->getPageTableFields()); } $limit = $params['limit']; $offset = $params['offset']; $this->addOption('LIMIT', $limit + 1); if (isset($offset)) { $this->addOption('OFFSET', $offset); } $res = $this->select(__METHOD__); $result = $this->getResult(); $count = 0; foreach ($res as $row) { if (++$count > $limit) { // We've reached the one extra which shows that there are additional pages to be had. Stop here... $this->setContinueEnumParameter('offset', $offset + $limit); break; } if (is_null($resultPageSet)) { $vals = array(); if ($fld_ids) { $vals['pageid'] = intval($row->page_id); } if ($fld_title) { $title = Title::makeTitle($row->page_namespace, $row->page_title); ApiQueryBase::addTitleInfo($vals, $title); } if ($fld_url) { $vals['url'] = $row->el_to; } $fit = $result->addValue(array('query', $this->getModuleName()), null, $vals); if (!$fit) { $this->setContinueEnumParameter('offset', $offset + $count - 1); break; } } else { $resultPageSet->processDbRow($row); } } if (is_null($resultPageSet)) { $result->setIndexedTagName_internal(array('query', $this->getModuleName()), $this->getModulePrefix()); } }
/** * Rename tags starting with a '-' to work with tag exclusion search. */ public function updateMethodRenameDashTags() { $linklist = $this->linkDB->filterSearch(); foreach ($linklist as $link) { $link['tags'] = preg_replace('/(^| )\\-/', '$1', $link['tags']); $link['tags'] = implode(' ', array_unique(LinkFilter::tagsStrToArray($link['tags'], true))); $this->linkDB[$link['linkdate']] = $link; } $this->linkDB->savedb($this->config['config']['PAGECACHE']); return true; }
/** * Return an appropriately formatted LIKE query */ static function mungeQuery($query, $prot) { return LinkFilter::makeLike($query, $prot); }
private function cleanupArticle($id, $domain) { $title = Title::newFromID($id); if (!$title) { $this->error("Internal error: no page for ID {$id}"); return; } $this->output($title->getPrefixedDBkey() . " ..."); $rev = Revision::newFromTitle($title); $revId = $rev->getId(); $currentRevId = $revId; while ($rev && LinkFilter::matchEntry($rev->getText(), $domain)) { # Revision::getPrevious can't be used in this way before MW 1.6 (Revision.php 1.26) #$rev = $rev->getPrevious(); $revId = $title->getPreviousRevisionID($revId); if ($revId) { $rev = Revision::newFromTitle($title, $revId); } else { $rev = false; } } if ($revId == $currentRevId) { // The regex didn't match the current article text // This happens e.g. when a link comes from a template rather than the page itself $this->output("False match\n"); } else { $dbw = wfGetDB(DB_MASTER); $dbw->begin(); if (!$rev) { // Didn't find a non-spammy revision, blank the page $this->output("blanking\n"); $article = new Article($title); $article->updateArticle('', wfMsg('spam_blanking', $domain), false, false); } else { // Revert to this revision $this->output("reverting\n"); $article = new Article($title); $article->updateArticle($rev->getText(), wfMsg('spam_reverting', $domain), false, false); } $dbw->commit(); wfDoUpdates(); } }
/** * testMakeLikeArrayWithInvalidPatterns() * * Tests whether LinkFilter::makeLikeArray($pattern) will reject invalid search patterns * * @dataProvider provideInvalidPatterns * * @param string $pattern Invalid search pattern */ function testMakeLikeArrayWithInvalidPatterns($pattern) { $this->assertFalse(LinkFilter::makeLikeArray($pattern), "'{$pattern}' is not a valid pattern and should be rejected"); }
private function run($resultPageSet = null) { $params = $this->extractRequestParams(); $protocol = $params['protocol']; $query = $params['query']; if (is_null($query)) { $this->dieUsage('Missing required query parameter', 'params'); } // Find the right prefix global $wgUrlProtocols; foreach ($wgUrlProtocols as $p) { if (substr($p, 0, strlen($protocol)) === $protocol) { $protocol = $p; break; } } $likeQuery = LinkFilter::makeLike($query, $protocol); if (!$likeQuery) { $this->dieUsage('Invalid query', 'bad_query'); } $likeQuery = substr($likeQuery, 0, strpos($likeQuery, '%') + 1); $this->addTables(array('page', 'externallinks')); // must be in this order for 'USE INDEX' $this->addOption('USE INDEX', 'el_index'); $db = $this->getDB(); $this->addWhere('page_id=el_from'); $this->addWhere('el_index LIKE ' . $db->addQuotes($likeQuery)); $this->addWhereFld('page_namespace', $params['namespace']); $prop = array_flip($params['prop']); $fld_ids = isset($prop['ids']); $fld_title = isset($prop['title']); $fld_url = isset($prop['url']); if (is_null($resultPageSet)) { $this->addFields(array('page_id', 'page_namespace', 'page_title')); $this->addFieldsIf('el_to', $fld_url); } else { $this->addFields($resultPageSet->getPageTableFields()); } $limit = $params['limit']; $offset = $params['offset']; $this->addOption('LIMIT', $limit + 1); if (isset($offset)) { $this->addOption('OFFSET', $offset); } $res = $this->select(__METHOD__); $data = array(); $count = 0; while ($row = $db->fetchObject($res)) { if (++$count > $limit) { // We've reached the one extra which shows that there are additional pages to be had. Stop here... $this->setContinueEnumParameter('offset', $offset + $limit + 1); break; } if (is_null($resultPageSet)) { $vals = array(); if ($fld_ids) { $vals['pageid'] = intval($row->page_id); } if ($fld_title) { $title = Title::makeTitle($row->page_namespace, $row->page_title); $vals['ns'] = intval($title->getNamespace()); $vals['title'] = $title->getPrefixedText(); } if ($fld_url) { $vals['url'] = $row->el_to; } $data[] = $vals; } else { $resultPageSet->processDbRow($row); } } $db->freeResult($res); if (is_null($resultPageSet)) { $result = $this->getResult(); $result->setIndexedTagName($data, $this->getModulePrefix()); $result->addValue('query', $this->getModuleName(), $data); } }
private function cleanupArticle($id, $domain) { $title = Title::newFromID($id); if (!$title) { $this->error("Internal error: no page for ID {$id}"); return; } $this->output($title->getPrefixedDBkey() . " ..."); $rev = Revision::newFromTitle($title); $currentRevId = $rev->getId(); while ($rev && ($rev->isDeleted(Revision::DELETED_TEXT) || LinkFilter::matchEntry($rev->getContent(Revision::RAW), $domain))) { $rev = $rev->getPrevious(); } if ($rev && $rev->getId() == $currentRevId) { // The regex didn't match the current article text // This happens e.g. when a link comes from a template rather than the page itself $this->output("False match\n"); } else { $dbw = wfGetDB(DB_MASTER); $dbw->begin(__METHOD__); $page = WikiPage::factory($title); if ($rev) { // Revert to this revision $content = $rev->getContent(Revision::RAW); $this->output("reverting\n"); $page->doEditContent($content, wfMessage('spam_reverting', $domain)->inContentLanguage()->text(), EDIT_UPDATE, $rev->getId()); } elseif ($this->hasOption('delete')) { // Didn't find a non-spammy revision, blank the page $this->output("deleting\n"); $page->doDeleteArticle(wfMessage('spam_deleting', $domain)->inContentLanguage()->text()); } else { // Didn't find a non-spammy revision, blank the page $handler = ContentHandler::getForTitle($title); $content = $handler->makeEmptyContent(); $this->output("blanking\n"); $page->doEditContent($content, wfMessage('spam_blanking', $domain)->inContentLanguage()->text()); } $dbw->commit(__METHOD__); } }
function cleanUp($phrase, $database) { global $wgOut, $wgUser; if (!isset($phrase) || "" == $phrase) { $this->showForm(wfMsg('cleanupspam_error_empty')); return; } /* do a check whether something actually _is_ inside $wgLocalDatabases */ if (!is_array($wgLocalDatabases) && 'local' == $this->mMode) { $this->showForm(wfMsg('cleanupspam_no_local')); return; } $like = LinkFilter::makeLike($phrase); if (!$like) { $this->showForm(wfMsg('cleanupspam_error_not_valid') . ": " . $phrase); return; } $like = $phrase; $dbr =& wfGetDB(DB_SLAVE); switch ($this->mMode) { case 'this': /* Clean up spam just on this wiki */ $res = $dbr->select('externallinks', array('DISTINCT el_from'), array('el_to LIKE ' . $dbr->addQuotes("%//{$phrase}%")), $fname); $count = $dbr->numRows($res); if ($count) { $wgOut->addWikiText("Found {$count} article(s) containing links to '''{$phrase}'''.\n"); while ($row = $dbr->fetchObject($res)) { if ('revert' == $this->mDo) { /* have eyes on this */ $this->cleanupArticle($row->el_from, $phrase); } else { /* just add more data and that should be fine */ $this->writeupArticle($row->el_from, $phrase, $row->el_to); } } } else { $wgOut->addWikiText(wfMsg('cleanupspam_count_zero', "'''" . $phrase . "'''.")); } break; case 'all': /* todo check for no wikis in city_list */ $wikis = $this->fetchWikias(); if (!is_array($wikis)) { return; } $wgOut->addWikiText("Finding spam on all (" . count($wikis) . ") wikis.\n"); foreach ($wikis as $db) { $count = $dbr->selectField("`" . $db->city_dbname . "`.externallinks", 'COUNT(*)', array('el_to LIKE ' . $dbr->addQuotes("%//{$phrase}%")), $fname); if ($count) { $found = true; $this->cleanUp($phrase, $db->city_dbname); } } if ('revert' == $this->mDo) { if ($found) { $wgOut->addWikiText(wfMsg('cleanupspam_cleanup_finished')); } else { $wgOut->addWikiText(wfMsg('cleanupspam_none_found', $phrase)); } } break; } if ('revert' == $this->mDo) { $wgOut->addWikiText(wfMsg('cleanupspam_cleanup_finished')); } $sk = $wgUser->getSkin(); $titleObj = Title::makeTitle(NS_SPECIAL, 'Cleanupspam'); $link_back = $sk->makeKnownLinkObj($titleObj, '<b>here</b>'); $wgOut->addHtml("<br/>" . wfMsg('cleanupspam_link_back') . " " . $link_back . "."); }
/** * @param $query String * @param $protocol String * @return null|string */ public function prepareUrlQuerySearchString($query = null, $protocol = null) { $db = $this->getDb(); if (!is_null($query) || $query != '') { if (is_null($protocol)) { $protocol = 'http://'; } $likeQuery = LinkFilter::makeLikeArray($query, $protocol); if (!$likeQuery) { $this->dieUsage('Invalid query', 'bad_query'); } $likeQuery = LinkFilter::keepOneWildcard($likeQuery); return 'el_index ' . $db->buildLike($likeQuery); } elseif (!is_null($protocol)) { return 'el_index ' . $db->buildLike("{$protocol}", $db->anyString()); } return null; }
/** * Filter links. * * @param string $type Type of filter. * @param mixed $request Search request, string or array. * @param bool $casesensitive Optional: Perform case sensitive filter * @param bool $privateonly Optional: Returns private links only if true. * * @return array filtered links */ public function filter($type, $request, $casesensitive = false, $privateonly = false) { $linkFilter = new LinkFilter($this->_links); $requestFilter = is_array($request) ? implode(' ', $request) : $request; return $linkFilter->filter($type, trim($requestFilter), $casesensitive, $privateonly); }