/** ************************************************ * Trims whitespace off the url, then removes the / * off the right side of the url if it exists * @param: * @return: null * @throws: ** ***********************************************/ public function setURL($url) { if (is_string($url) && WebUtils::isValidURL($url)) { $url = rtrim(trim($url), '/'); $this->sURL = $url; } else { throw new Exception('LogParser.setURL accepts valid URLs only.'); } }
/** ************************************************ * This function tries to travers the given url's path * but will not work if the realtive path is not traversable * with the given url. * @param: $url string; a url * @param: $relPath string; a relative path * @return: string; the url after it was traversed by * relPath. Or false if there was some error/problem * TODO: Do a better job of adding / in the return string ***************************************************/ public static function traversRelPath($sUrl, $relPath) { if (!WebUtils::isValidURL($sUrl)) { return false; } $baseP = WebUtils::getCurrentPathURL($sUrl); $baseU = WebUtils::getBaseURL($sUrl); $path = WebUtils::getPathOnly($sUrl); $path = explode('/', $path); $relPath = explode('/', $relPath); //echo '<pre>Path '. var_export($path, true) . '</pre><br/>'; //echo '<pre>rel '. var_export($relPath, true) . '</pre><br/>'; foreach ($relPath as $r) { if ($r != '.' && $r != '..') { array_push($path, array_shift($relPath)); continue; } else { if ($r == '.') { array_shift($relPath); continue; } else { if ($r == '..') { array_shift($relPath); if (array_pop($path) == null) { return false; } } } } } return rtrim($baseU . implode('/', $path) . implode('/', $relPath), '/'); }
/** ************************************************ * Put a valid URL on the stack. Use the url as a key * to eliminate duplicates. It also makes sure the URL * is within $this->aDomainLimits. * @param: $sLink string; A url * @return: null * @throws: Exception * DEPRICATED use pushTraversed ***************************************************/ public function pushToHash($sLink) { if (is_string($sLink) && WebUtils::isValidURL($sLink)) { if ($this->isWithInDomain($sLink)) { $this->aLinkStack[$sLink] = $sLink; echo '<span style="margin-left:20px;">Starting with: </span>' . $sLink . '<br/>'; } } else { throw new Exception('Spiderman.pushToHash only accepts valid URL strings \'' . $sLink . '\''); } }
/** ************************************************ * Turns all relative URLs into absolute URLs * @return: null * Notes: * Different forms of urls * /dir/ * /dir * /dir/index.php * dir/index.php * dir/index.php?var=v * index.php * /index.php * ./dir * ../../dir * <a href="javascript:;" class="prev"></a> ***************************************************/ private function processURLs() { $tmpArray = array(); foreach ($this->aAnchors as $url) { $url = trim($url); echo $this->sSay('I\'m going to process url: ', $url); if (WebUtils::isValidURL($url)) { $url = rtrim($url, '/'); $tmpArray[] = $url; //echo $this->sSay('After Processing url: ', $url); } else { if (strlen($url) <= 0) { continue; } if ($this->containsJS($url)) { continue; } if ($this->containsHash($url)) { continue; } //TODO: Not sure about this might not need it. URLs can contain hashes but link to a differnt page? if ($this->containsMailTo($url)) { continue; } if (strpos($url, '/') === 0) { //means root. $url = rtrim($url, '/'); $tmpArray[] = $this->sBaseURL . $url; } else { if (strpos($url, './') === 0 || strpos($url, '../') === 0) { //realative path try and travers $url = rtrim($url, '/'); echo $this->sSay('I\'ve found a relative path ', $url); $rtn = WebUtils::traversRelPath($this->sURL, $url); echo $this->sSay('I\'ve tried to travers it and arrived at ', $rtn); if (WebUtils::isValidURL($rtn)) { $tmpArray[] = $rtn; } else { echo $this->sSay('Relative url parse failed. url is', $rtn); } } else { //who knows just add it; $url = rtrim($url, '/'); echo $this->sSay('Not sure what to do with this url: ', $url); $tmpArray[] = $this->sCurrentPathURL . '/' . $url; echo $this->sSay('So I\'ll add on the current path: ', $this->sCurrentPathURL . '/' . $url); } } //echo $this->sSay('After Processing url: ', $tmpArray[count($tmpArray)-1]); } } $this->aAnchors = null; $this->aAnchors = $tmpArray; }