public function run() { $params = array('q' => $this->q, 'page' => 1, 'rpp' => 100); if (!empty($this->since)) { $params['since'] = date('Y-m-d', strtotime($this->since)); } if (!empty($this->locale)) { $params['locale'] = $this->locale; } if (!empty($this->geocode)) { $params['geocode'] = $this->geocode; } for ($i = 1; $i < 10; $i++) { $params['page'] = $i; $this->url = 'http://search.twitter.com/search.json?' . http_build_query($params, '', '&'); $response = Scraper::scrape($this->url); if ($json = json_decode($response)) { if (isset($json->total)) { $this->result = $json->total; break; } } } return $this->result; }
protected function execute(InputInterface $input, OutputInterface $output) { $default_url = "http://www.sainsburys.co.uk/webapp/wcs/stores/servlet/CategoryDisplay?listView=true&orderBy=FAVOURITES_FIRST&parent_category_rn=12518&top_category=12518&langId=44&beginIndex=0&pageSize=20&catalogId=10137&searchTerm=&categoryId=185749&listId=&storeId=10151&promotionId=#langId=44&storeId=10151&catalogId=10137&categoryId=185749&parent_category_rn=12518&top_category=12518&pageSize=20&orderBy=FAVOURITES_FIRST&searchTerm=&beginIndex=0&hideFilters=true"; $scraper = new Scraper($default_url); $scraped_products = $scraper->scrape(); if ($input->getOption('prettyprint')) { $pretty_print = True; } else { $pretty_print = False; } $json = $scraper->construct_json($scraped_products, $pretty = $pretty_print); $output->writeln($json); }
public function run() { // Google Search API default parameters $baseURL = 'http://ajax.googleapis.com/ajax/services/search/web?'; $defaultParams = array('v' => '1.0', 'rsz' => 'large', 'q' => $this->search); if (!empty($this->save)) { $defaultParams['save'] = $this->save; } if (!empty($this->language)) { $defaultParams['hl'] = $this->language; } if (!empty($this->country)) { if ($this->country === true) { if (!empty($this->language)) { $defaultParams['gl'] = $this->language; } } else { $defaultParams['gl'] = $this->country; } } // iterate over pages and try to find match with host $perPage = 8; $this->result = false; for ($p = 0; $p < 7; $p++) { $start = $p * $perPage; // build request url $url = $baseURL . http_build_query($defaultParams, '', '&') . '&start=' . $start; $response = Scraper::scrape($url); if (!is_string($response)) { continue; } $response = json_decode($response); // try to find result set if (!is_object($response)) { continue; } if (!isset($response->responseData->results)) { continue; } // find hostname in result set foreach ($response->responseData->results as $index => $result) { if (!preg_match('/' . preg_quote($this->TestGroup->host, '/') . '.*/', $result->url)) { continue; } $this->result = $start + $index + 1; break 2; } } return $this->result; }
public function run() { $response = Scraper::scrape($this->url); if (preg_match_all($this->regexp, $response, $found)) { if (isset($found['match'])) { $this->result = $found['match'][0]; } else { $this->result = $found[1][0]; } $this->result = preg_replace('@[.,]@', '', $this->result); // type conversion if (preg_match('@^-?\\s?\\d+$@', $this->result)) { $this->result = (double) $this->result; } } return $this->result; }
<?php require_once 'vendor/autoload.php'; require_once './classes/Scraper.class.php'; $default_url = "http://www.sainsburys.co.uk/webapp/wcs/stores/servlet/CategoryDisplay?listView=true&orderBy=FAVOURITES_FIRST&parent_category_rn=12518&top_category=12518&langId=44&beginIndex=0&pageSize=20&catalogId=10137&searchTerm=&categoryId=185749&listId=&storeId=10151&promotionId=#langId=44&storeId=10151&catalogId=10137&categoryId=185749&parent_category_rn=12518&top_category=12518&pageSize=20&orderBy=FAVOURITES_FIRST&searchTerm=&beginIndex=0&hideFilters=true"; $scraper = new Scraper($default_url); $scraped_products = $scraper->scrape(); print_r($scraper->construct_json($scraped_products, $pretty = True));
return $string; } private function startsWithPartofWord($name, $word) { // The general rule is that the first word of the name may start with // part of the SKU, so we extract the first word from the name and look // for it in the SKU. The string "$name" should be cleaned before // calling this function. $firstWord = current(explode(" ", $name)); if (stripos($word, $firstWord) == 0) { return true; } return false; } private function deleteFirstWord($string) { $elements = explode(" ", $string); array_shift($elements); $shortString = trim(implode(" ", $elements)); // Also trim off any starting hyphen and spaces. $shortString = trim($shortString, "-"); $shortString = trim($shortString); return $shortString; } } if ($argc < 4) { die("Usage: php scraper.php inputFile outputFile errorFile"); } $scraper = new Scraper($argv[1], $argv[2], $argv[3]); $scraper->scrape();