public function run()
 {
     $params = array('q' => $this->q, 'page' => 1, 'rpp' => 100);
     if (!empty($this->since)) {
         $params['since'] = date('Y-m-d', strtotime($this->since));
     }
     if (!empty($this->locale)) {
         $params['locale'] = $this->locale;
     }
     if (!empty($this->geocode)) {
         $params['geocode'] = $this->geocode;
     }
     for ($i = 1; $i < 10; $i++) {
         $params['page'] = $i;
         $this->url = 'http://search.twitter.com/search.json?' . http_build_query($params, '', '&');
         $response = Scraper::scrape($this->url);
         if ($json = json_decode($response)) {
             if (isset($json->total)) {
                 $this->result = $json->total;
                 break;
             }
         }
     }
     return $this->result;
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $default_url = "http://www.sainsburys.co.uk/webapp/wcs/stores/servlet/CategoryDisplay?listView=true&orderBy=FAVOURITES_FIRST&parent_category_rn=12518&top_category=12518&langId=44&beginIndex=0&pageSize=20&catalogId=10137&searchTerm=&categoryId=185749&listId=&storeId=10151&promotionId=#langId=44&storeId=10151&catalogId=10137&categoryId=185749&parent_category_rn=12518&top_category=12518&pageSize=20&orderBy=FAVOURITES_FIRST&searchTerm=&beginIndex=0&hideFilters=true";
     $scraper = new Scraper($default_url);
     $scraped_products = $scraper->scrape();
     if ($input->getOption('prettyprint')) {
         $pretty_print = True;
     } else {
         $pretty_print = False;
     }
     $json = $scraper->construct_json($scraped_products, $pretty = $pretty_print);
     $output->writeln($json);
 }
 public function run()
 {
     // Google Search API default parameters
     $baseURL = 'http://ajax.googleapis.com/ajax/services/search/web?';
     $defaultParams = array('v' => '1.0', 'rsz' => 'large', 'q' => $this->search);
     if (!empty($this->save)) {
         $defaultParams['save'] = $this->save;
     }
     if (!empty($this->language)) {
         $defaultParams['hl'] = $this->language;
     }
     if (!empty($this->country)) {
         if ($this->country === true) {
             if (!empty($this->language)) {
                 $defaultParams['gl'] = $this->language;
             }
         } else {
             $defaultParams['gl'] = $this->country;
         }
     }
     // iterate over pages and try to find match with host
     $perPage = 8;
     $this->result = false;
     for ($p = 0; $p < 7; $p++) {
         $start = $p * $perPage;
         // build request url
         $url = $baseURL . http_build_query($defaultParams, '', '&') . '&start=' . $start;
         $response = Scraper::scrape($url);
         if (!is_string($response)) {
             continue;
         }
         $response = json_decode($response);
         // try to find result set
         if (!is_object($response)) {
             continue;
         }
         if (!isset($response->responseData->results)) {
             continue;
         }
         // find hostname in result set
         foreach ($response->responseData->results as $index => $result) {
             if (!preg_match('/' . preg_quote($this->TestGroup->host, '/') . '.*/', $result->url)) {
                 continue;
             }
             $this->result = $start + $index + 1;
             break 2;
         }
     }
     return $this->result;
 }
 public function run()
 {
     $response = Scraper::scrape($this->url);
     if (preg_match_all($this->regexp, $response, $found)) {
         if (isset($found['match'])) {
             $this->result = $found['match'][0];
         } else {
             $this->result = $found[1][0];
         }
         $this->result = preg_replace('@[.,]@', '', $this->result);
         // type conversion
         if (preg_match('@^-?\\s?\\d+$@', $this->result)) {
             $this->result = (double) $this->result;
         }
     }
     return $this->result;
 }
Esempio n. 5
0
<?php

require_once 'vendor/autoload.php';
require_once './classes/Scraper.class.php';
$default_url = "http://www.sainsburys.co.uk/webapp/wcs/stores/servlet/CategoryDisplay?listView=true&orderBy=FAVOURITES_FIRST&parent_category_rn=12518&top_category=12518&langId=44&beginIndex=0&pageSize=20&catalogId=10137&searchTerm=&categoryId=185749&listId=&storeId=10151&promotionId=#langId=44&storeId=10151&catalogId=10137&categoryId=185749&parent_category_rn=12518&top_category=12518&pageSize=20&orderBy=FAVOURITES_FIRST&searchTerm=&beginIndex=0&hideFilters=true";
$scraper = new Scraper($default_url);
$scraped_products = $scraper->scrape();
print_r($scraper->construct_json($scraped_products, $pretty = True));
Esempio n. 6
0
        return $string;
    }
    private function startsWithPartofWord($name, $word)
    {
        // The general rule is that the first word of the name may start with
        // part of the SKU, so we extract the first word from the name and look
        // for it in the SKU.  The string "$name" should be cleaned before
        // calling this function.
        $firstWord = current(explode(" ", $name));
        if (stripos($word, $firstWord) == 0) {
            return true;
        }
        return false;
    }
    private function deleteFirstWord($string)
    {
        $elements = explode(" ", $string);
        array_shift($elements);
        $shortString = trim(implode(" ", $elements));
        // Also trim off any starting hyphen and spaces.
        $shortString = trim($shortString, "-");
        $shortString = trim($shortString);
        return $shortString;
    }
}
if ($argc < 4) {
    die("Usage: php scraper.php inputFile outputFile errorFile");
}
$scraper = new Scraper($argv[1], $argv[2], $argv[3]);
$scraper->scrape();