Exemplo n.º 1
0
 /**
  * Scrape the url $this->url using a patte
  *
  * @param Pattern $p The pattern used to scrape the webpage.
  * @return array The array having all the information handled by the pattern.
  */
 public function scrape(Pattern $p)
 {
     $cookie_file = "cookie.txt";
     $options = array(CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17', CURLOPT_COOKIESESSION => true, CURLOPT_COOKIEFILE => $cookie_file, CURLOPT_COOKIEJAR => $cookie_file);
     $res = Curlib::singleRequest($this->url, $options);
     return $p->getInfo($res["content"]);
 }
Exemplo n.º 2
0
 public function testFormatBytes()
 {
     $int = 10;
     $this->assertEquals(Curlib::formatBytes($int), "10");
     $int = 1024;
     $this->assertEquals(Curlib::formatBytes($int), "1KB");
     $int = 1300234;
     $this->assertEquals(Curlib::formatBytes($int), "1.24MB");
     $int = 130023;
     $this->assertEquals(Curlib::formatBytes($int, 5), "126.97559KB");
     $int = null;
     $this->assertEquals(Curlib::formatBytes($int), "0");
 }
Exemplo n.º 3
0
 /**
  * Get desc and size of the product list based on the url $url.
  * The private variable products is populated.
  *
  * @param array $urls The array of links to take description and size
  *
  */
 private function getProductsDescAndSize($urls)
 {
     $cookie_file = "cookie.txt";
     $options = array(CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17', CURLOPT_COOKIESESSION => true, CURLOPT_COOKIEFILE => $cookie_file, CURLOPT_COOKIEJAR => $cookie_file);
     $contents = Curlib::multiRequest($urls, $options);
     if (!isset($this->products["description"])) {
         $this->products["description"] = array();
     }
     if (!isset($this->products["size"])) {
         $this->products["size"] = array();
     }
     foreach ($contents as $content) {
         preg_match("/<meta name=\"description\" content=\"(.*?)\"/", $content, $match);
         array_push($this->products["description"], $match[1]);
         array_push($this->products["size"], Curlib::formatBytes(Curlib::strBytes($content)));
     }
 }