Example #1
0
 /**
  * Filter links according to parameters.
  *
  * @param string $type          Type of filter (eg. tags, permalink, etc.).
  * @param mixed  $request       Filter content.
  * @param bool   $casesensitive Optional: Perform case sensitive filter if true.
  * @param bool   $privateonly   Optional: Only returns private links if true.
  *
  * @return array filtered link list.
  */
 public function filter($type, $request, $casesensitive = false, $privateonly = false)
 {
     switch ($type) {
         case self::$FILTER_HASH:
             return $this->filterSmallHash($request);
         case self::$FILTER_TAG | self::$FILTER_TEXT:
             if (!empty($request)) {
                 $filtered = $this->links;
                 if (isset($request[0])) {
                     $filtered = $this->filterTags($request[0], $casesensitive, $privateonly);
                 }
                 if (isset($request[1])) {
                     $lf = new LinkFilter($filtered);
                     $filtered = $lf->filterFulltext($request[1], $privateonly);
                 }
                 return $filtered;
             }
             return $this->noFilter($privateonly);
         case self::$FILTER_TEXT:
             return $this->filterFulltext($request, $privateonly);
         case self::$FILTER_TAG:
             return $this->filterTags($request, $casesensitive, $privateonly);
         case self::$FILTER_DAY:
             return $this->filterDay($request);
         default:
             return $this->noFilter($privateonly);
     }
 }
Example #2
0
 /**
  * Check whether $content contains a link to $filterEntry
  *
  * @param Content $content Content to check
  * @param string $filterEntry Domainparts, see makeRegex() for more details
  * @return int 0 if no match or 1 if there's at least one match
  */
 static function matchEntry(Content $content, $filterEntry)
 {
     if (!$content instanceof TextContent) {
         // TODO: handle other types of content too.
         //      Maybe create ContentHandler::matchFilter( LinkFilter ).
         //      Think about a common base class for LinkFilter and MagicWord.
         return 0;
     }
     $text = $content->getNativeData();
     $regex = LinkFilter::makeRegex($filterEntry);
     return preg_match($regex, $text);
 }
Example #3
0
 /**
  * Return an appropriately formatted LIKE query and the clause
  */
 static function mungeQuery($query, $prot)
 {
     $field = 'el_index';
     $rv = LinkFilter::makeLike($query, $prot);
     if ($rv === false) {
         //makeLike doesn't handle wildcard in IP, so we'll have to munge here.
         if (preg_match('/^(:?[0-9]{1,3}\\.)+\\*\\s*$|^(:?[0-9]{1,3}\\.){3}[0-9]{1,3}:[0-9]*\\*\\s*$/', $query)) {
             $rv = $prot . rtrim($query, " \t*") . '%';
             $field = 'el_to';
         }
     }
     return array($rv, $field);
 }
Example #4
0
function cleanupArticle($id, $domain)
{
    $title = Title::newFromID($id);
    if (!$title) {
        print "Internal error: no page for ID {$id}\n";
        return;
    }
    print $title->getPrefixedDBkey() . " ...";
    $rev = Revision::newFromTitle($title);
    $reverted = false;
    $revId = $rev->getId();
    $currentRevId = $revId;
    $regex = LinkFilter::makeRegex($domain);
    while ($rev && preg_match($regex, $rev->getText())) {
        # Revision::getPrevious can't be used in this way before MW 1.6 (Revision.php 1.26)
        #$rev = $rev->getPrevious();
        $revId = $title->getPreviousRevisionID($revId);
        if ($revId) {
            $rev = Revision::newFromTitle($title, $revId);
        } else {
            $rev = false;
        }
    }
    if ($revId == $currentRevId) {
        // The regex didn't match the current article text
        // This happens e.g. when a link comes from a template rather than the page itself
        print "False match\n";
    } else {
        $dbw =& wfGetDB(DB_MASTER);
        $dbw->immediateBegin();
        if (!$rev) {
            // Didn't find a non-spammy revision, blank the page
            print "blanking\n";
            $article = new Article($title);
            $article->updateArticle('', wfMsg('spam_blanking', $domain), false, false);
        } else {
            // Revert to this revision
            print "reverting\n";
            $article = new Article($title);
            $article->updateArticle($rev->getText(), wfMsg('spam_reverting', $domain), false, false);
        }
        $dbw->immediateCommit();
        wfDoUpdates();
    }
}
Example #5
0
 function getQueryInfo()
 {
     global $wgMiserMode;
     $dbr = wfGetDB(DB_SLAVE);
     // strip everything past first wildcard, so that
     // index-based-only lookup would be done
     list($this->mMungedQuery, $clause) = self::mungeQuery($this->mQuery, $this->mProt);
     if ($this->mMungedQuery === false) {
         // Invalid query; return no results
         return array('tables' => 'page', 'fields' => 'page_id', 'conds' => '0=1');
     }
     $stripped = LinkFilter::keepOneWildcard($this->mMungedQuery);
     $like = $dbr->buildLike($stripped);
     $retval = array('tables' => array('page', 'externallinks'), 'fields' => array('namespace' => 'page_namespace', 'title' => 'page_title', 'value' => 'el_index', 'url' => 'el_to'), 'conds' => array('page_id = el_from', "{$clause} {$like}"), 'options' => array('USE INDEX' => $clause));
     if (isset($this->mNs) && !$wgMiserMode) {
         $retval['conds']['page_namespace'] = $this->mNs;
     }
     return $retval;
 }
Example #6
0
 function getSQL()
 {
     global $wgMiserMode;
     $dbr = wfGetDB(DB_SLAVE);
     $page = $dbr->tableName('page');
     $externallinks = $dbr->tableName('externallinks');
     /* strip everything past first wildcard, so that index-based-only lookup would be done */
     list($munged, $clause) = self::mungeQuery($this->mQuery, $this->mProt);
     $stripped = LinkFilter::keepOneWildcard($munged);
     $like = $dbr->buildLike($stripped);
     $encSQL = '';
     if (isset($this->mNs) && !$wgMiserMode) {
         $encSQL = 'AND page_namespace=' . $dbr->addQuotes($this->mNs);
     }
     $use_index = $dbr->useIndexClause($clause);
     return "SELECT\n\t\t\t\tpage_namespace AS namespace,\n\t\t\t\tpage_title AS title,\n\t\t\t\tel_index AS value,\n\t\t\t\tel_to AS url\n\t\t\tFROM\n\t\t\t\t{$page},\n\t\t\t\t{$externallinks} {$use_index}\n\t\t\tWHERE\n\t\t\t\tpage_id=el_from\n\t\t\t\tAND {$clause} {$like}\n\t\t\t\t{$encSQL}";
 }
Example #7
0
 /**
  * Filter links according to search parameters.
  *
  * @param array  $filterRequest Search request content. Supported keys:
  *                                - searchtags: list of tags
  *                                - searchterm: term search
  * @param bool   $casesensitive Optional: Perform case sensitive filter
  * @param bool   $privateonly   Optional: Returns private links only if true.
  *
  * @return array filtered links, all links if no suitable filter was provided.
  */
 public function filterSearch($filterRequest = array(), $casesensitive = false, $privateonly = false)
 {
     // Filter link database according to parameters.
     $searchtags = !empty($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
     $searchterm = !empty($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
     // Search tags + fullsearch.
     if (empty($type) && !empty($searchtags) && !empty($searchterm)) {
         $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT;
         $request = array($searchtags, $searchterm);
     } elseif (!empty($searchtags)) {
         $type = LinkFilter::$FILTER_TAG;
         $request = $searchtags;
     } elseif (!empty($searchterm)) {
         $type = LinkFilter::$FILTER_TEXT;
         $request = $searchterm;
     } else {
         $type = '';
         $request = '';
     }
     $linkFilter = new LinkFilter($this->_links);
     return $linkFilter->filter($type, $request, $casesensitive, $privateonly);
 }
Example #8
0
 /**
  * Check whether $text contains a link to $filterEntry
  *
  * @param $text String: text to check
  * @param $filterEntry String: domainparts, see makeRegex() for more details
  * @return Integer: 0 if no match or 1 if there's at least one match
  */
 static function matchEntry($text, $filterEntry)
 {
     $regex = LinkFilter::makeRegex($filterEntry);
     return preg_match($regex, $text);
 }
 public function getQueryInfo()
 {
     $dbr = wfGetDB(DB_SLAVE);
     // strip everything past first wildcard, so that
     // index-based-only lookup would be done
     list($this->mungedQuery, $clause) = self::mungeQuery($this->mQuery, $this->mProt);
     if ($this->mungedQuery === false) {
         // Invalid query; return no results
         return ['tables' => 'page', 'fields' => 'page_id', 'conds' => '0=1'];
     }
     $stripped = LinkFilter::keepOneWildcard($this->mungedQuery);
     $like = $dbr->buildLike($stripped);
     $retval = ['tables' => ['page', 'externallinks'], 'fields' => ['namespace' => 'page_namespace', 'title' => 'page_title', 'value' => 'el_index', 'url' => 'el_to'], 'conds' => ['page_id = el_from', "{$clause} {$like}"], 'options' => ['USE INDEX' => $clause]];
     if ($this->mNs !== null && !$this->getConfig()->get('MiserMode')) {
         $retval['conds']['page_namespace'] = $this->mNs;
     }
     return $retval;
 }
 private function run($resultPageSet = null)
 {
     $params = $this->extractRequestParams();
     $protocol = $params['protocol'];
     $query = $params['query'];
     // Find the right prefix
     global $wgUrlProtocols;
     if ($protocol && !in_array($protocol, $wgUrlProtocols)) {
         foreach ($wgUrlProtocols as $p) {
             if (substr($p, 0, strlen($protocol)) === $protocol) {
                 $protocol = $p;
                 break;
             }
         }
     } else {
         $protocol = null;
     }
     $db = $this->getDB();
     $this->addTables(array('page', 'externallinks'));
     // must be in this order for 'USE INDEX'
     $this->addOption('USE INDEX', 'el_index');
     $this->addWhere('page_id=el_from');
     $this->addWhereFld('page_namespace', $params['namespace']);
     if (!is_null($query) || $query != '') {
         if (is_null($protocol)) {
             $protocol = 'http://';
         }
         $likeQuery = LinkFilter::makeLikeArray($query, $protocol);
         if (!$likeQuery) {
             $this->dieUsage('Invalid query', 'bad_query');
         }
         $likeQuery = LinkFilter::keepOneWildcard($likeQuery);
         $this->addWhere('el_index ' . $db->buildLike($likeQuery));
     } elseif (!is_null($protocol)) {
         $this->addWhere('el_index ' . $db->buildLike("{$protocol}", $db->anyString()));
     }
     $prop = array_flip($params['prop']);
     $fld_ids = isset($prop['ids']);
     $fld_title = isset($prop['title']);
     $fld_url = isset($prop['url']);
     if (is_null($resultPageSet)) {
         $this->addFields(array('page_id', 'page_namespace', 'page_title'));
         $this->addFieldsIf('el_to', $fld_url);
     } else {
         $this->addFields($resultPageSet->getPageTableFields());
     }
     $limit = $params['limit'];
     $offset = $params['offset'];
     $this->addOption('LIMIT', $limit + 1);
     if (isset($offset)) {
         $this->addOption('OFFSET', $offset);
     }
     $res = $this->select(__METHOD__);
     $result = $this->getResult();
     $count = 0;
     foreach ($res as $row) {
         if (++$count > $limit) {
             // We've reached the one extra which shows that there are additional pages to be had. Stop here...
             $this->setContinueEnumParameter('offset', $offset + $limit);
             break;
         }
         if (is_null($resultPageSet)) {
             $vals = array();
             if ($fld_ids) {
                 $vals['pageid'] = intval($row->page_id);
             }
             if ($fld_title) {
                 $title = Title::makeTitle($row->page_namespace, $row->page_title);
                 ApiQueryBase::addTitleInfo($vals, $title);
             }
             if ($fld_url) {
                 $vals['url'] = $row->el_to;
             }
             $fit = $result->addValue(array('query', $this->getModuleName()), null, $vals);
             if (!$fit) {
                 $this->setContinueEnumParameter('offset', $offset + $count - 1);
                 break;
             }
         } else {
             $resultPageSet->processDbRow($row);
         }
     }
     if (is_null($resultPageSet)) {
         $result->setIndexedTagName_internal(array('query', $this->getModuleName()), $this->getModulePrefix());
     }
 }
Example #11
0
 /**
  * Rename tags starting with a '-' to work with tag exclusion search.
  */
 public function updateMethodRenameDashTags()
 {
     $linklist = $this->linkDB->filterSearch();
     foreach ($linklist as $link) {
         $link['tags'] = preg_replace('/(^| )\\-/', '$1', $link['tags']);
         $link['tags'] = implode(' ', array_unique(LinkFilter::tagsStrToArray($link['tags'], true)));
         $this->linkDB[$link['linkdate']] = $link;
     }
     $this->linkDB->savedb($this->config['config']['PAGECACHE']);
     return true;
 }
 /**
  * Return an appropriately formatted LIKE query
  */
 static function mungeQuery($query, $prot)
 {
     return LinkFilter::makeLike($query, $prot);
 }
Example #13
0
 private function cleanupArticle($id, $domain)
 {
     $title = Title::newFromID($id);
     if (!$title) {
         $this->error("Internal error: no page for ID {$id}");
         return;
     }
     $this->output($title->getPrefixedDBkey() . " ...");
     $rev = Revision::newFromTitle($title);
     $revId = $rev->getId();
     $currentRevId = $revId;
     while ($rev && LinkFilter::matchEntry($rev->getText(), $domain)) {
         # Revision::getPrevious can't be used in this way before MW 1.6 (Revision.php 1.26)
         #$rev = $rev->getPrevious();
         $revId = $title->getPreviousRevisionID($revId);
         if ($revId) {
             $rev = Revision::newFromTitle($title, $revId);
         } else {
             $rev = false;
         }
     }
     if ($revId == $currentRevId) {
         // The regex didn't match the current article text
         // This happens e.g. when a link comes from a template rather than the page itself
         $this->output("False match\n");
     } else {
         $dbw = wfGetDB(DB_MASTER);
         $dbw->begin();
         if (!$rev) {
             // Didn't find a non-spammy revision, blank the page
             $this->output("blanking\n");
             $article = new Article($title);
             $article->updateArticle('', wfMsg('spam_blanking', $domain), false, false);
         } else {
             // Revert to this revision
             $this->output("reverting\n");
             $article = new Article($title);
             $article->updateArticle($rev->getText(), wfMsg('spam_reverting', $domain), false, false);
         }
         $dbw->commit();
         wfDoUpdates();
     }
 }
Example #14
0
 /**
  * testMakeLikeArrayWithInvalidPatterns()
  *
  * Tests whether LinkFilter::makeLikeArray($pattern) will reject invalid search patterns
  *
  * @dataProvider provideInvalidPatterns
  *
  * @param string $pattern Invalid search pattern
  */
 function testMakeLikeArrayWithInvalidPatterns($pattern)
 {
     $this->assertFalse(LinkFilter::makeLikeArray($pattern), "'{$pattern}' is not a valid pattern and should be rejected");
 }
Example #15
0
 private function run($resultPageSet = null)
 {
     $params = $this->extractRequestParams();
     $protocol = $params['protocol'];
     $query = $params['query'];
     if (is_null($query)) {
         $this->dieUsage('Missing required query parameter', 'params');
     }
     // Find the right prefix
     global $wgUrlProtocols;
     foreach ($wgUrlProtocols as $p) {
         if (substr($p, 0, strlen($protocol)) === $protocol) {
             $protocol = $p;
             break;
         }
     }
     $likeQuery = LinkFilter::makeLike($query, $protocol);
     if (!$likeQuery) {
         $this->dieUsage('Invalid query', 'bad_query');
     }
     $likeQuery = substr($likeQuery, 0, strpos($likeQuery, '%') + 1);
     $this->addTables(array('page', 'externallinks'));
     // must be in this order for 'USE INDEX'
     $this->addOption('USE INDEX', 'el_index');
     $db = $this->getDB();
     $this->addWhere('page_id=el_from');
     $this->addWhere('el_index LIKE ' . $db->addQuotes($likeQuery));
     $this->addWhereFld('page_namespace', $params['namespace']);
     $prop = array_flip($params['prop']);
     $fld_ids = isset($prop['ids']);
     $fld_title = isset($prop['title']);
     $fld_url = isset($prop['url']);
     if (is_null($resultPageSet)) {
         $this->addFields(array('page_id', 'page_namespace', 'page_title'));
         $this->addFieldsIf('el_to', $fld_url);
     } else {
         $this->addFields($resultPageSet->getPageTableFields());
     }
     $limit = $params['limit'];
     $offset = $params['offset'];
     $this->addOption('LIMIT', $limit + 1);
     if (isset($offset)) {
         $this->addOption('OFFSET', $offset);
     }
     $res = $this->select(__METHOD__);
     $data = array();
     $count = 0;
     while ($row = $db->fetchObject($res)) {
         if (++$count > $limit) {
             // We've reached the one extra which shows that there are additional pages to be had. Stop here...
             $this->setContinueEnumParameter('offset', $offset + $limit + 1);
             break;
         }
         if (is_null($resultPageSet)) {
             $vals = array();
             if ($fld_ids) {
                 $vals['pageid'] = intval($row->page_id);
             }
             if ($fld_title) {
                 $title = Title::makeTitle($row->page_namespace, $row->page_title);
                 $vals['ns'] = intval($title->getNamespace());
                 $vals['title'] = $title->getPrefixedText();
             }
             if ($fld_url) {
                 $vals['url'] = $row->el_to;
             }
             $data[] = $vals;
         } else {
             $resultPageSet->processDbRow($row);
         }
     }
     $db->freeResult($res);
     if (is_null($resultPageSet)) {
         $result = $this->getResult();
         $result->setIndexedTagName($data, $this->getModulePrefix());
         $result->addValue('query', $this->getModuleName(), $data);
     }
 }
Example #16
0
 private function cleanupArticle($id, $domain)
 {
     $title = Title::newFromID($id);
     if (!$title) {
         $this->error("Internal error: no page for ID {$id}");
         return;
     }
     $this->output($title->getPrefixedDBkey() . " ...");
     $rev = Revision::newFromTitle($title);
     $currentRevId = $rev->getId();
     while ($rev && ($rev->isDeleted(Revision::DELETED_TEXT) || LinkFilter::matchEntry($rev->getContent(Revision::RAW), $domain))) {
         $rev = $rev->getPrevious();
     }
     if ($rev && $rev->getId() == $currentRevId) {
         // The regex didn't match the current article text
         // This happens e.g. when a link comes from a template rather than the page itself
         $this->output("False match\n");
     } else {
         $dbw = wfGetDB(DB_MASTER);
         $dbw->begin(__METHOD__);
         $page = WikiPage::factory($title);
         if ($rev) {
             // Revert to this revision
             $content = $rev->getContent(Revision::RAW);
             $this->output("reverting\n");
             $page->doEditContent($content, wfMessage('spam_reverting', $domain)->inContentLanguage()->text(), EDIT_UPDATE, $rev->getId());
         } elseif ($this->hasOption('delete')) {
             // Didn't find a non-spammy revision, blank the page
             $this->output("deleting\n");
             $page->doDeleteArticle(wfMessage('spam_deleting', $domain)->inContentLanguage()->text());
         } else {
             // Didn't find a non-spammy revision, blank the page
             $handler = ContentHandler::getForTitle($title);
             $content = $handler->makeEmptyContent();
             $this->output("blanking\n");
             $page->doEditContent($content, wfMessage('spam_blanking', $domain)->inContentLanguage()->text());
         }
         $dbw->commit(__METHOD__);
     }
 }
Example #17
0
 function cleanUp($phrase, $database)
 {
     global $wgOut, $wgUser;
     if (!isset($phrase) || "" == $phrase) {
         $this->showForm(wfMsg('cleanupspam_error_empty'));
         return;
     }
     /* do a check whether something actually _is_ inside $wgLocalDatabases */
     if (!is_array($wgLocalDatabases) && 'local' == $this->mMode) {
         $this->showForm(wfMsg('cleanupspam_no_local'));
         return;
     }
     $like = LinkFilter::makeLike($phrase);
     if (!$like) {
         $this->showForm(wfMsg('cleanupspam_error_not_valid') . ": " . $phrase);
         return;
     }
     $like = $phrase;
     $dbr =& wfGetDB(DB_SLAVE);
     switch ($this->mMode) {
         case 'this':
             /* Clean up spam just on this wiki */
             $res = $dbr->select('externallinks', array('DISTINCT el_from'), array('el_to LIKE ' . $dbr->addQuotes("%//{$phrase}%")), $fname);
             $count = $dbr->numRows($res);
             if ($count) {
                 $wgOut->addWikiText("Found {$count} article(s) containing links to '''{$phrase}'''.\n");
                 while ($row = $dbr->fetchObject($res)) {
                     if ('revert' == $this->mDo) {
                         /* have eyes on this */
                         $this->cleanupArticle($row->el_from, $phrase);
                     } else {
                         /* just add more data and that should be fine */
                         $this->writeupArticle($row->el_from, $phrase, $row->el_to);
                     }
                 }
             } else {
                 $wgOut->addWikiText(wfMsg('cleanupspam_count_zero', "'''" . $phrase . "'''."));
             }
             break;
         case 'all':
             /* todo check for no wikis in city_list */
             $wikis = $this->fetchWikias();
             if (!is_array($wikis)) {
                 return;
             }
             $wgOut->addWikiText("Finding spam on all (" . count($wikis) . ") wikis.\n");
             foreach ($wikis as $db) {
                 $count = $dbr->selectField("`" . $db->city_dbname . "`.externallinks", 'COUNT(*)', array('el_to LIKE ' . $dbr->addQuotes("%//{$phrase}%")), $fname);
                 if ($count) {
                     $found = true;
                     $this->cleanUp($phrase, $db->city_dbname);
                 }
             }
             if ('revert' == $this->mDo) {
                 if ($found) {
                     $wgOut->addWikiText(wfMsg('cleanupspam_cleanup_finished'));
                 } else {
                     $wgOut->addWikiText(wfMsg('cleanupspam_none_found', $phrase));
                 }
             }
             break;
     }
     if ('revert' == $this->mDo) {
         $wgOut->addWikiText(wfMsg('cleanupspam_cleanup_finished'));
     }
     $sk = $wgUser->getSkin();
     $titleObj = Title::makeTitle(NS_SPECIAL, 'Cleanupspam');
     $link_back = $sk->makeKnownLinkObj($titleObj, '<b>here</b>');
     $wgOut->addHtml("<br/>" . wfMsg('cleanupspam_link_back') . " " . $link_back . ".");
 }
Example #18
0
 /**
  * @param $query String
  * @param $protocol String
  * @return null|string
  */
 public function prepareUrlQuerySearchString($query = null, $protocol = null)
 {
     $db = $this->getDb();
     if (!is_null($query) || $query != '') {
         if (is_null($protocol)) {
             $protocol = 'http://';
         }
         $likeQuery = LinkFilter::makeLikeArray($query, $protocol);
         if (!$likeQuery) {
             $this->dieUsage('Invalid query', 'bad_query');
         }
         $likeQuery = LinkFilter::keepOneWildcard($likeQuery);
         return 'el_index ' . $db->buildLike($likeQuery);
     } elseif (!is_null($protocol)) {
         return 'el_index ' . $db->buildLike("{$protocol}", $db->anyString());
     }
     return null;
 }
Example #19
0
 /**
  * Filter links.
  *
  * @param string $type          Type of filter.
  * @param mixed  $request       Search request, string or array.
  * @param bool   $casesensitive Optional: Perform case sensitive filter
  * @param bool   $privateonly   Optional: Returns private links only if true.
  *
  * @return array filtered links
  */
 public function filter($type, $request, $casesensitive = false, $privateonly = false)
 {
     $linkFilter = new LinkFilter($this->_links);
     $requestFilter = is_array($request) ? implode(' ', $request) : $request;
     return $linkFilter->filter($type, trim($requestFilter), $casesensitive, $privateonly);
 }