Esempio n. 1
0
 function testCrawlSite()
 {
     $s = new Dataface_Record('websites', array());
     $s->setValues(array('website_url' => 'http://solutions.weblite.ca/', 'source_language' => 'en', 'target_language' => 'fr', 'website_name' => 'Site 1 french', 'active' => 1, 'base_path' => '/fr/', 'host' => 'localhost'));
     $s->save();
     $site = SweteSite::loadSiteById($s->val('website_id'));
     $crawler = new SiteCrawler();
     $crawler->site = $site;
     $crawler->startingPoint = 'http://solutions.weblite.ca/';
     $crawler->depth = 3;
     $crawler->crawl();
     //print_r($crawler->root);
 }
Esempio n. 2
0
 /**
  * @brief Performs the import.  The resulting page is available
  * as the page property.
  * @throws Exception if the page already exists, or the HTTP response code 
  * is not in the list of acceptable HTTP codes.
  *
  */
 public function doImport()
 {
     $this->pagesAdded = array();
     $this->pagesUpdated = array();
     // First let's find out if this page already exists
     $crawler = new SiteCrawler();
     $crawler->site = $this->site;
     $crawler->loadContent = $this->loadContent;
     $crawler->startingPoint = $this->startingPoint;
     if (!$crawler->startingPoint) {
         $crawler->startingPoint = $this->site->getSiteUrl();
     }
     $crawler->depth = 3;
     if (isset($this->depth)) {
         $crawler->depth = $this->depth;
     }
     $crawler->crawl();
     $this->importNode($crawler->root);
 }