/** * Scrape the url $this->url using a patte * * @param Pattern $p The pattern used to scrape the webpage. * @return array The array having all the information handled by the pattern. */ public function scrape(Pattern $p) { $cookie_file = "cookie.txt"; $options = array(CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17', CURLOPT_COOKIESESSION => true, CURLOPT_COOKIEFILE => $cookie_file, CURLOPT_COOKIEJAR => $cookie_file); $res = Curlib::singleRequest($this->url, $options); return $p->getInfo($res["content"]); }
public function testFormatBytes() { $int = 10; $this->assertEquals(Curlib::formatBytes($int), "10"); $int = 1024; $this->assertEquals(Curlib::formatBytes($int), "1KB"); $int = 1300234; $this->assertEquals(Curlib::formatBytes($int), "1.24MB"); $int = 130023; $this->assertEquals(Curlib::formatBytes($int, 5), "126.97559KB"); $int = null; $this->assertEquals(Curlib::formatBytes($int), "0"); }
/** * Get desc and size of the product list based on the url $url. * The private variable products is populated. * * @param array $urls The array of links to take description and size * */ private function getProductsDescAndSize($urls) { $cookie_file = "cookie.txt"; $options = array(CURLOPT_HEADER => 0, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17', CURLOPT_COOKIESESSION => true, CURLOPT_COOKIEFILE => $cookie_file, CURLOPT_COOKIEJAR => $cookie_file); $contents = Curlib::multiRequest($urls, $options); if (!isset($this->products["description"])) { $this->products["description"] = array(); } if (!isset($this->products["size"])) { $this->products["size"] = array(); } foreach ($contents as $content) { preg_match("/<meta name=\"description\" content=\"(.*?)\"/", $content, $match); array_push($this->products["description"], $match[1]); array_push($this->products["size"], Curlib::formatBytes(Curlib::strBytes($content))); } }