function testCrawlSite() { $s = new Dataface_Record('websites', array()); $s->setValues(array('website_url' => 'http://solutions.weblite.ca/', 'source_language' => 'en', 'target_language' => 'fr', 'website_name' => 'Site 1 french', 'active' => 1, 'base_path' => '/fr/', 'host' => 'localhost')); $s->save(); $site = SweteSite::loadSiteById($s->val('website_id')); $crawler = new SiteCrawler(); $crawler->site = $site; $crawler->startingPoint = 'http://solutions.weblite.ca/'; $crawler->depth = 3; $crawler->crawl(); //print_r($crawler->root); }
/** * @brief Performs the import. The resulting page is available * as the page property. * @throws Exception if the page already exists, or the HTTP response code * is not in the list of acceptable HTTP codes. * */ public function doImport() { $this->pagesAdded = array(); $this->pagesUpdated = array(); // First let's find out if this page already exists $crawler = new SiteCrawler(); $crawler->site = $this->site; $crawler->loadContent = $this->loadContent; $crawler->startingPoint = $this->startingPoint; if (!$crawler->startingPoint) { $crawler->startingPoint = $this->site->getSiteUrl(); } $crawler->depth = 3; if (isset($this->depth)) { $crawler->depth = $this->depth; } $crawler->crawl(); $this->importNode($crawler->root); }