/**
 * Search the sef url and aliases tables for
 * urls similar to the current one
 *
 * @param $bits segment of the requested url, borken down to each individual words in the url
 * @param $originalBits segment of the urls, broken down by only by slashes
 * @param $limit
 * @return unknown_type
 */
function shSearchSimilarUrls($bits, $originalBits, $searchedPath, $params)
{
    // init result
    $urls = array();
    // do we have data to work with ?
    if (empty($bits)) {
        return $urls;
    }
    // get params we need
    // how many urls to display, max ?
    $limit = $params->get('max_number_of_urls', 5);
    // include pdf ?
    $includePdf = $params->get('include_pdf', 0);
    // include printable ?
    $includePrint = $params->get('include_print', 0);
    // get db instance
    $db =& JFactory::getDBO();
    // search the redirection table for similar urls
    $sql = 'select oldurl, newurl, id, dateadd from  #__sh404sef_urls where newurl <> "" ';
    // virtuemart hack
    $sql .= ' AND oldurl not like ' . $db->Quote('%vmchk%');
    // additional conditions : never include feed results
    $sql .= ' AND newurl not like ' . $db->Quote('%format=feed%');
    // additional user-set conditions
    if (!$includePdf) {
        $sql .= ' AND newurl not like ' . $db->Quote('%format=pdf%');
    }
    if (!$includePrint) {
        $sql .= ' AND newurl not like ' . $db->Quote('%print=1%');
    }
    // apply exclusion list
    $excludedWords = $params->get('excluded_words_sef', '');
    if (!empty($excludedWords)) {
        $words = explode("\n", $excludedWords);
        foreach ($words as $word) {
            $word = trim($word);
            if (!empty($word)) {
                $sql .= ' AND oldurl not like ' . $db->Quote('%' . $word . '%');
            }
        }
    }
    $excludedWords = $params->get('excluded_words_non_sef', '');
    if (!empty($excludedWords)) {
        $words = explode("\n", $excludedWords);
        foreach ($words as $word) {
            $word = trim($word);
            if (!empty($word)) {
                $sql .= ' AND newurl not like ' . $db->Quote('%' . $word . '%');
            }
        }
    }
    // search the redirection table for similar urls
    $sql .= ' AND ( ';
    $sql .= ' soundex(oldurl) = soundex(' . $db->Quote(implode('/', $originalBits)) . ')';
    $subSql = array();
    foreach ($bits as $bit) {
        $subSql[] = ' oldurl like ' . $db->Quote('%' . $bit . '%');
    }
    $sql .= ' OR ';
    $sql .= implode(' OR ', $subSql);
    $sql .= ')';
    // group and limit result set
    $sql .= ' GROUP BY oldurl';
    $sql .= ' limit 500';
    // perform query
    $db->setQuery($sql);
    $urlList = $db->loadObjectList();
    $urlList = empty($urlList) ? array() : $urlList;
    // rank them, trying to have the best one near the top of the list
    $urlList = shRankSimilarUrlsSimilarText($urlList, $searchedPath);
    // only keep a limited number
    $urlList = array_slice($urlList, 0, $limit);
    // now build an unordered list with the remaining solutions
    reset($urlList);
    // return whatever we found
    return $urlList;
}
Esempio n. 2
0
/**
 * Search the sef url and aliases tables for
 * urls similar to the current one
 *
 * @param $bits segment of the requested url, borken down to each individual words in the url
 * @param $originalBits segment of the urls, broken down by only by slashes
 * @param $limit
 * @return unknown_type
 */
function shSearchSimilarUrls($bits, $originalBits, $params)
{
    // init result
    $urls = '';
    // do we have data to work with ?
    if (empty($bits)) {
        return $urls;
    }
    // get params we need
    // how many urls to display, max ?
    $limit = $params->get('max_number_of_urls', 5);
    // include pdf ?
    $includePdf = $params->get('include_pdf', 0);
    // include printable ?
    $includePrint = $params->get('include_print', 0);
    // get db instance
    $db =& JFactory::getDBO();
    // search the redirection table for similar urls
    $sql = 'select oldurl, newurl from  #__redirection where newurl <> "" ';
    $subSql = array();
    foreach ($bits as $bit) {
        $subSql[] = ' oldurl like ' . $db->Quote('%' . $bit . '%');
    }
    $sql .= ' AND (' . implode(' OR ', $subSql) . ')';
    // virtuemart hack
    $sql .= ' AND oldurl not like ' . $db->Quote('%vmchk%');
    // additional conditions : never include feed results
    $sql .= ' AND newurl not like ' . $db->Quote('%format=feed%');
    // additional user-set conditions
    if (!$includePdf) {
        $sql .= ' AND newurl not like ' . $db->Quote('%format=pdf%');
    }
    if (!$includePrint) {
        $sql .= ' AND newurl not like ' . $db->Quote('%print=1%');
    }
    // group and limit result set
    $sql .= ' GROUP BY oldurl';
    $sql .= ' limit 30';
    $db->setQuery($sql);
    $urlList1 = $db->loadObjectList();
    $urlList1 = empty($urlList1) ? array() : $urlList1;
    // method # 2 : soundex
    // search the redirection table for similar urls
    $sql = 'select oldurl, newurl from  #__redirection where newurl <> ""';
    $sql .= ' AND soundex(oldurl) = soundex("' . implode('/', $originalBits) . '")';
    // don't accept urls twice
    if (!empty($urlList1)) {
        $subSql = array();
        foreach ($urlList1 as $url) {
            $subSql[] = $db->Quote($url->oldurl);
        }
        $subSql = ' AND oldurl not in (' . implode(', ', $subSql) . ')';
        $sql .= $subSql;
    }
    // virtuemart hack
    $sql .= ' AND oldurl not like ' . $db->Quote('%vmchk%');
    // additional conditions : never include feed results
    $sql .= ' AND newurl not like ' . $db->Quote('%format=feed%');
    // additional user-set conditions
    if (!$includePdf) {
        $sql .= ' AND newurl not like ' . $db->Quote('%format=pdf%');
    }
    if (!$includePrint) {
        $sql .= ' AND newurl not like ' . $db->Quote('%print=1%');
    }
    // group and limit result set
    $sql .= ' GROUP BY oldurl';
    $sql .= ' limit 30';
    // perform query
    $db->setQuery($sql);
    $urlList2 = $db->loadObjectList();
    $urlList2 = empty($urlList2) ? array() : $urlList2;
    // now group both solutions array
    $urlList = array_merge($urlList1, $urlList2);
    // rank them, trying to have the best one near the top of the list
    $urlList = shRankSimilarUrlsSimilarText($urlList);
    // only keep a limited number
    $urlList = array_slice($urlList, 0, $limit);
    // now build an unordered list with the remaining solutions
    reset($urlList);
    if (!empty($urlList)) {
        foreach ($urlList as $url) {
            $urls .= '<li><a href="' . JRoute::_($url->newurl) . '">' . $url->oldurl . '</a></li>';
        }
        $urls = '<ul>' . $urls . '</ul>';
    }
    // return whatever we found
    return $urls;
}