function testCreate_FileProtocolWithHost() { $str = 'file://*****:*****@localhost/dir/file'; $uri = new lmbUri($str); $this->assertEqual($uri->getProtocol(), 'file'); $this->assertEqual($uri->getUser(), 'user'); $this->assertEqual($uri->getPassword(), 'pass'); $this->assertEqual($uri->getHost(), 'localhost'); $this->assertEqual($uri->getPath(), '/dir/file'); $str = 'file://*****:*****@localhost/c:\\dir\\file'; $uri = new lmbUri($str); $this->assertEqual($uri->getProtocol(), 'file'); $this->assertEqual($uri->getUser(), 'user'); $this->assertEqual($uri->getPassword(), 'pass'); $this->assertEqual($uri->getHost(), 'localhost'); // should it be just c:\dir\file ??? $this->assertEqual($uri->getPath(), '/c:\\dir\\file'); }
if (!isset($argv[1])) { die("index starting uri not specified!\n"); } $path = $_SERVER['LIMB_PROJECT_DIR']; require_once $path . '/setup.php'; lmb_require('limb/net/src/lmbUri.class.php'); lmb_require('limb/web_spider/src/lmbWebSpider.class.php'); lmb_require('limb/web_spider/src/lmbUriFilter.class.php'); lmb_require('limb/web_spider/src/lmbContentTypeFilter.class.php'); lmb_require('limb/web_spider/src/lmbSearchIndexingObserver.class.php'); lmb_require('limb/search/src/indexer/lmbFullTextSearchIndexer.class.php'); lmb_require('limb/search/src/indexer/lmbSearchTextNormalizer.class.php'); lmb_require('limb/web_spider/src/lmbUriNormalizer.class.php'); $uri = new lmbUri($argv[1]); $indexer = new lmbFullTextSearchIndexer(new lmbSearchTextNormalizer()); $indexer->useNOINDEX(); $observer = new lmbSearchIndexingObserver($indexer); $content_type_filter = new lmbContentTypeFilter(); $content_type_filter->allowContentType('text/html'); $uri_filter = new lmbUriFilter(); $uri_filter->allowHost($uri->getHost()); $uri_filter->allowProtocol('http'); $uri_filter->allowPathRegex('~.*~'); $normalizer = new lmbUriNormalizer(); $normalizer->stripQueryItem('PHPSESSID'); $spider = new lmbWebSpider(); $spider->setContentTypeFilter($content_type_filter); $spider->setUriFilter($uri_filter); $spider->setUriNormalizer($normalizer); $spider->registerObserver($observer); $spider->crawl($uri);