Ejemplo n.º 1
0
 /** ************************************************
  * Trims whitespace off the url, then removes the /
  * off the right side of the url if it exists
  * @param:
  * @return: null
  * @throws:
  ** ***********************************************/
 public function setURL($url)
 {
     if (is_string($url) && WebUtils::isValidURL($url)) {
         $url = rtrim(trim($url), '/');
         $this->sURL = $url;
     } else {
         throw new Exception('LogParser.setURL accepts valid URLs only.');
     }
 }
Ejemplo n.º 2
0
 /** ************************************************
  * This function tries to travers the given url's path
  * but will not work if the realtive path is not traversable
  * with the given url.
  * @param: $url string; a url
  * @param: $relPath string; a relative path 
  * @return: string; the url after it was traversed by
  * relPath. Or false if there was some error/problem
  * TODO: Do a better job of adding / in the return string
  ***************************************************/
 public static function traversRelPath($sUrl, $relPath)
 {
     if (!WebUtils::isValidURL($sUrl)) {
         return false;
     }
     $baseP = WebUtils::getCurrentPathURL($sUrl);
     $baseU = WebUtils::getBaseURL($sUrl);
     $path = WebUtils::getPathOnly($sUrl);
     $path = explode('/', $path);
     $relPath = explode('/', $relPath);
     //echo '<pre>Path '. var_export($path, true) . '</pre><br/>';
     //echo '<pre>rel '. var_export($relPath, true) . '</pre><br/>';
     foreach ($relPath as $r) {
         if ($r != '.' && $r != '..') {
             array_push($path, array_shift($relPath));
             continue;
         } else {
             if ($r == '.') {
                 array_shift($relPath);
                 continue;
             } else {
                 if ($r == '..') {
                     array_shift($relPath);
                     if (array_pop($path) == null) {
                         return false;
                     }
                 }
             }
         }
     }
     return rtrim($baseU . implode('/', $path) . implode('/', $relPath), '/');
 }
Ejemplo n.º 3
0
 /** ************************************************
  * Put a valid URL on the stack. Use the url as a key
  * to eliminate duplicates. It also makes sure the URL
  * is within $this->aDomainLimits.
  * @param: $sLink string; A url
  * @return: null
  * @throws: Exception
  * DEPRICATED use pushTraversed
  ***************************************************/
 public function pushToHash($sLink)
 {
     if (is_string($sLink) && WebUtils::isValidURL($sLink)) {
         if ($this->isWithInDomain($sLink)) {
             $this->aLinkStack[$sLink] = $sLink;
             echo '<span style="margin-left:20px;">Starting with: </span>' . $sLink . '<br/>';
         }
     } else {
         throw new Exception('Spiderman.pushToHash only accepts valid URL strings \'' . $sLink . '\'');
     }
 }
Ejemplo n.º 4
0
 /** ************************************************
  * Turns all relative URLs into absolute URLs
  * @return: null
  * Notes: 
  *   Different forms of urls
  *       /dir/
  *       /dir
  *       /dir/index.php
  *       dir/index.php
  *       dir/index.php?var=v
  *       index.php
  *       /index.php
  *       ./dir
  *       ../../dir
  *       <a href="javascript:;" class="prev"></a>
  ***************************************************/
 private function processURLs()
 {
     $tmpArray = array();
     foreach ($this->aAnchors as $url) {
         $url = trim($url);
         echo $this->sSay('I\'m going to process url: ', $url);
         if (WebUtils::isValidURL($url)) {
             $url = rtrim($url, '/');
             $tmpArray[] = $url;
             //echo $this->sSay('After Processing url: ', $url);
         } else {
             if (strlen($url) <= 0) {
                 continue;
             }
             if ($this->containsJS($url)) {
                 continue;
             }
             if ($this->containsHash($url)) {
                 continue;
             }
             //TODO: Not sure about this might not need it. URLs can contain hashes but link to a differnt page?
             if ($this->containsMailTo($url)) {
                 continue;
             }
             if (strpos($url, '/') === 0) {
                 //means root.
                 $url = rtrim($url, '/');
                 $tmpArray[] = $this->sBaseURL . $url;
             } else {
                 if (strpos($url, './') === 0 || strpos($url, '../') === 0) {
                     //realative path try and travers
                     $url = rtrim($url, '/');
                     echo $this->sSay('I\'ve found a relative path ', $url);
                     $rtn = WebUtils::traversRelPath($this->sURL, $url);
                     echo $this->sSay('I\'ve tried to travers it and arrived at ', $rtn);
                     if (WebUtils::isValidURL($rtn)) {
                         $tmpArray[] = $rtn;
                     } else {
                         echo $this->sSay('Relative url parse failed. url is', $rtn);
                     }
                 } else {
                     //who knows just add it;
                     $url = rtrim($url, '/');
                     echo $this->sSay('Not sure what to do with this url: ', $url);
                     $tmpArray[] = $this->sCurrentPathURL . '/' . $url;
                     echo $this->sSay('So I\'ll add on the current path: ', $this->sCurrentPathURL . '/' . $url);
                 }
             }
             //echo $this->sSay('After Processing url: ', $tmpArray[count($tmpArray)-1]);
         }
     }
     $this->aAnchors = null;
     $this->aAnchors = $tmpArray;
 }