/** * Search the sef url and aliases tables for * urls similar to the current one * * @param $bits segment of the requested url, borken down to each individual words in the url * @param $originalBits segment of the urls, broken down by only by slashes * @param $limit * @return unknown_type */ function shSearchSimilarUrls($bits, $originalBits, $searchedPath, $params) { // init result $urls = array(); // do we have data to work with ? if (empty($bits)) { return $urls; } // get params we need // how many urls to display, max ? $limit = $params->get('max_number_of_urls', 5); // include pdf ? $includePdf = $params->get('include_pdf', 0); // include printable ? $includePrint = $params->get('include_print', 0); // get db instance $db =& JFactory::getDBO(); // search the redirection table for similar urls $sql = 'select oldurl, newurl, id, dateadd from #__sh404sef_urls where newurl <> "" '; // virtuemart hack $sql .= ' AND oldurl not like ' . $db->Quote('%vmchk%'); // additional conditions : never include feed results $sql .= ' AND newurl not like ' . $db->Quote('%format=feed%'); // additional user-set conditions if (!$includePdf) { $sql .= ' AND newurl not like ' . $db->Quote('%format=pdf%'); } if (!$includePrint) { $sql .= ' AND newurl not like ' . $db->Quote('%print=1%'); } // apply exclusion list $excludedWords = $params->get('excluded_words_sef', ''); if (!empty($excludedWords)) { $words = explode("\n", $excludedWords); foreach ($words as $word) { $word = trim($word); if (!empty($word)) { $sql .= ' AND oldurl not like ' . $db->Quote('%' . $word . '%'); } } } $excludedWords = $params->get('excluded_words_non_sef', ''); if (!empty($excludedWords)) { $words = explode("\n", $excludedWords); foreach ($words as $word) { $word = trim($word); if (!empty($word)) { $sql .= ' AND newurl not like ' . $db->Quote('%' . $word . '%'); } } } // search the redirection table for similar urls $sql .= ' AND ( '; $sql .= ' soundex(oldurl) = soundex(' . $db->Quote(implode('/', $originalBits)) . ')'; $subSql = array(); foreach ($bits as $bit) { $subSql[] = ' oldurl like ' . $db->Quote('%' . $bit . '%'); } $sql .= ' OR '; $sql .= implode(' OR ', $subSql); $sql .= ')'; // group and limit result set $sql .= ' GROUP BY oldurl'; $sql .= ' limit 500'; // perform query $db->setQuery($sql); $urlList = $db->loadObjectList(); $urlList = empty($urlList) ? array() : $urlList; // rank them, trying to have the best one near the top of the list $urlList = shRankSimilarUrlsSimilarText($urlList, $searchedPath); // only keep a limited number $urlList = array_slice($urlList, 0, $limit); // now build an unordered list with the remaining solutions reset($urlList); // return whatever we found return $urlList; }
/** * Search the sef url and aliases tables for * urls similar to the current one * * @param $bits segment of the requested url, borken down to each individual words in the url * @param $originalBits segment of the urls, broken down by only by slashes * @param $limit * @return unknown_type */ function shSearchSimilarUrls($bits, $originalBits, $params) { // init result $urls = ''; // do we have data to work with ? if (empty($bits)) { return $urls; } // get params we need // how many urls to display, max ? $limit = $params->get('max_number_of_urls', 5); // include pdf ? $includePdf = $params->get('include_pdf', 0); // include printable ? $includePrint = $params->get('include_print', 0); // get db instance $db =& JFactory::getDBO(); // search the redirection table for similar urls $sql = 'select oldurl, newurl from #__redirection where newurl <> "" '; $subSql = array(); foreach ($bits as $bit) { $subSql[] = ' oldurl like ' . $db->Quote('%' . $bit . '%'); } $sql .= ' AND (' . implode(' OR ', $subSql) . ')'; // virtuemart hack $sql .= ' AND oldurl not like ' . $db->Quote('%vmchk%'); // additional conditions : never include feed results $sql .= ' AND newurl not like ' . $db->Quote('%format=feed%'); // additional user-set conditions if (!$includePdf) { $sql .= ' AND newurl not like ' . $db->Quote('%format=pdf%'); } if (!$includePrint) { $sql .= ' AND newurl not like ' . $db->Quote('%print=1%'); } // group and limit result set $sql .= ' GROUP BY oldurl'; $sql .= ' limit 30'; $db->setQuery($sql); $urlList1 = $db->loadObjectList(); $urlList1 = empty($urlList1) ? array() : $urlList1; // method # 2 : soundex // search the redirection table for similar urls $sql = 'select oldurl, newurl from #__redirection where newurl <> ""'; $sql .= ' AND soundex(oldurl) = soundex("' . implode('/', $originalBits) . '")'; // don't accept urls twice if (!empty($urlList1)) { $subSql = array(); foreach ($urlList1 as $url) { $subSql[] = $db->Quote($url->oldurl); } $subSql = ' AND oldurl not in (' . implode(', ', $subSql) . ')'; $sql .= $subSql; } // virtuemart hack $sql .= ' AND oldurl not like ' . $db->Quote('%vmchk%'); // additional conditions : never include feed results $sql .= ' AND newurl not like ' . $db->Quote('%format=feed%'); // additional user-set conditions if (!$includePdf) { $sql .= ' AND newurl not like ' . $db->Quote('%format=pdf%'); } if (!$includePrint) { $sql .= ' AND newurl not like ' . $db->Quote('%print=1%'); } // group and limit result set $sql .= ' GROUP BY oldurl'; $sql .= ' limit 30'; // perform query $db->setQuery($sql); $urlList2 = $db->loadObjectList(); $urlList2 = empty($urlList2) ? array() : $urlList2; // now group both solutions array $urlList = array_merge($urlList1, $urlList2); // rank them, trying to have the best one near the top of the list $urlList = shRankSimilarUrlsSimilarText($urlList); // only keep a limited number $urlList = array_slice($urlList, 0, $limit); // now build an unordered list with the remaining solutions reset($urlList); if (!empty($urlList)) { foreach ($urlList as $url) { $urls .= '<li><a href="' . JRoute::_($url->newurl) . '">' . $url->oldurl . '</a></li>'; } $urls = '<ul>' . $urls . '</ul>'; } // return whatever we found return $urls; }