예제 #1
0
 function testAbsoluteUrl()
 {
     $tests = array(array('base' => 'http://solutions.weblite.ca/', 'in' => 'index.html', 'out' => 'http://solutions.weblite.ca/index.html'), array('base' => 'http://solutions.weblite.ca/', 'in' => '/index.html', 'out' => 'http://solutions.weblite.ca/index.html'), array('base' => 'http://solutions.weblite.ca/foo/', 'in' => '/index.html', 'out' => 'http://solutions.weblite.ca/index.html'), array('base' => 'http://solutions.weblite.ca', 'in' => 'index.html', 'out' => 'http://solutions.weblite.ca/index.html'), array('base' => 'http://solutions.weblite.ca/foo', 'in' => 'index.html', 'out' => 'http://solutions.weblite.ca/foo/index.html'), array('base' => 'http://solutions.weblite.ca/foo/', 'in' => 'index.html', 'out' => 'http://solutions.weblite.ca/foo/index.html'), array('base' => 'https://foo.bar.com/foo/', 'in' => 'http://foo.bar.com/foo/index.html', 'out' => 'http://foo.bar.com/foo/index.html'), array('base' => 'https://foo.bar.com/foo/', 'in' => '//foo.bar.com/foo/index.html', 'out' => 'https://foo.bar.com/foo/index.html'), array('base' => 'http://foo.bar.com/foo/', 'in' => 'index.html?go=1&bar=2', 'out' => 'http://foo.bar.com/foo/index.html?go=1&bar=2'));
     foreach ($tests as $test) {
         $this->assertEquals($test['out'], SweteTools::absoluteUrl($test['in'], $test['base']), 'Converting ' . $test['in'] . ', ' . $test['base']);
     }
 }
예제 #2
0
 /**
  * @private
  *
  * @brief Callback used by preg_replace when converting CSS.
  */
 public function _cssCallback($match)
 {
     $absUrl = SweteTools::absoluteUrl($match[2], $this->_currBase);
     try {
         $hash = $this->processResource($absUrl, true);
         return 'url(' . $match[1] . $hash . $match[3] . ')';
     } catch (Exception $ex) {
         return $match[0];
     }
 }
예제 #3
0
 /**
  * @brief Processes the HTML found at a specified URL.  It goes through all of the 
  * links and adds them to the queue to be processed in later rounds.
  * @param string $html The HTML to be processed.
  * @param string $url The URL of the webpage that stored the html in $html
  * @returns void
  *
  */
 public function processHtml($html, $url)
 {
     require_once 'lib/simple_html_dom.php';
     if (strpos($url, $this->site->getSiteUrl()) !== 0) {
         throw new Exception("HTML could not be processed because the url {$url} lies outside the target site: " . $this->site->getSiteUrl());
     }
     $base = $url;
     if ($base[strlen($base) - 1] != '/') {
         $base = substr($base, 0, strrpos($base, '/'));
     }
     $dom = str_get_html($html);
     $baseTags = $dom->find('base[href]');
     foreach ($baseTags as $baseTag) {
         $base = $baseTag->href;
         if ($base[strlen($base) - 1] != '/') {
             $base .= '/';
         }
     }
     // Now that we have our base tag, we can begin to fire away.
     $node = $this->addNodeAtAbsoluteUrl($url, array('type' => 'page', 'url' => $url));
     if ($this->loadContent) {
         $this->addNodeAtAbsoluteUrl($url, array('content' => $html));
     }
     if (isset($this->delegate) and method_exists($this->delegate, 'loadContent')) {
         $this->delegate->loadContent($node, $html);
     }
     // Now let's harvest the links
     $links = $dom->find('a[href]');
     foreach ($links as $link) {
         $href = SweteTools::absoluteUrl($link->href, $base);
         if (strpos($href, $this->site->getSiteUrl()) !== 0) {
             // this link doesn't belong in our tree
             continue;
         } else {
             $this->addNodeAtAbsoluteUrl($href);
         }
     }
 }