コード例 #1
0
 public function testScrape()
 {
     $parser = new \Seld\JsonLint\JsonParser();
     $googleScraper = Builder::create($this->engines[0], array(array('foo', 'baz'), 'google'));
     $outDir = $googleScraper->getOutDir();
     $this->assertFalse($googleScraper->scrape('bar'));
     $this->assertFalse($googleScraper->scrape('baz', 100));
     $this->assertFalse($googleScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($googleScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($googleScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertFalse($googleScraper->serialize('json'));
     $this->assertTrue($googleScraper->scrape('foo', 2, true, 'Europe/Berlin'));
     $this->assertCount(2, $googleScraper->getFetchedPages());
     $this->assertCount(1, $googleScraper->getKeywords());
     $this->assertTrue($googleScraper->scrape('baz', 2, true));
     $this->assertCount(4, $googleScraper->getFetchedPages());
     $this->assertCount(0, $googleScraper->getKeywords());
     $this->assertFalse($googleScraper->scrapeAll());
     $this->assertTrue($googleScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($googleScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $googleScraper->getFetchedPages());
     $this->assertCount(0, $googleScraper->getKeywords());
     $this->assertFalse($googleScraper->serialize('baz'));
     $this->assertTrue($googleScraper->serialize('json', true));
     $this->assertCount(0, $googleScraper->getFetchedPages());
     $this->assertCount(8, $googleScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages()));
     $this->assertTrue($googleScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
     $this->assertTrue($googleScraper->addKeywords(array('foo bad')));
     $this->assertTrue($googleScraper->scrapeAll(3, true));
     $this->assertCount(3, $googleScraper->getFetchedPages());
     $this->assertTrue($googleScraper->serialize('xml', true));
     $this->assertCount(0, $googleScraper->getFetchedPages());
     $this->assertCount(3, $googleScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages()));
     $this->assertTrue($googleScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $askScraper = Builder::create($this->engines[1], array(array('foo', 'baz'), 'ask'));
     $outDir = $askScraper->getOutDir();
     $this->assertFalse($askScraper->scrape('bar'));
     $this->assertFalse($askScraper->scrape('baz', 100));
     $this->assertFalse($askScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($askScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($askScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertTrue($askScraper->scrape('foo', 2, true, 'Europe/Rome'));
     $this->assertCount(2, $askScraper->getFetchedPages());
     $this->assertCount(1, $askScraper->getKeywords());
     $this->assertTrue($askScraper->scrape('baz', 2, true));
     $this->assertCount(4, $askScraper->getFetchedPages());
     $this->assertCount(0, $askScraper->getKeywords());
     $this->assertFalse($askScraper->scrapeAll());
     $this->assertTrue($askScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($askScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $askScraper->getFetchedPages());
     $this->assertCount(0, $askScraper->getKeywords());
     $this->assertFalse($askScraper->serialize('baz'));
     $this->assertTrue($askScraper->serialize('xml', true));
     $this->assertCount(0, $askScraper->getFetchedPages());
     $this->assertCount(8, $askScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages()));
     $this->assertTrue($askScraper->save(true));
     $this->assertCount(0, $askScraper->getSerializedPages());
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $this->assertTrue($askScraper->addKeywords(array('foobaz')));
     $this->assertTrue($askScraper->scrapeAll(3, true));
     $this->assertTrue($askScraper->serialize('json', true));
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages()));
     $this->assertTrue($askScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
     $bingScraper = Builder::create($this->engines[2], array(array('foo', 'baz'), 'bing'));
     $outDir = $bingScraper->getOutDir();
     $this->assertFalse($bingScraper->scrape('bar'));
     $this->assertFalse($bingScraper->scrape('baz', 100));
     $this->assertFalse($bingScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($bingScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($bingScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertFalse($bingScraper->serialize('json'));
     $this->assertTrue($bingScraper->scrape('foo', 2, true, 'Europe/Berlin'));
     $this->assertCount(2, $bingScraper->getFetchedPages());
     $this->assertCount(1, $bingScraper->getKeywords());
     $this->assertTrue($bingScraper->scrape('baz', 2, true));
     $this->assertCount(4, $bingScraper->getFetchedPages());
     $this->assertCount(0, $bingScraper->getKeywords());
     $this->assertFalse($bingScraper->scrapeAll());
     $this->assertTrue($bingScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($bingScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $bingScraper->getFetchedPages());
     $this->assertCount(0, $bingScraper->getKeywords());
     $this->assertFalse($bingScraper->serialize('baz'));
     $this->assertTrue($bingScraper->serialize('json', true));
     $this->assertCount(0, $bingScraper->getFetchedPages());
     $this->assertCount(8, $bingScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages()));
     $this->assertTrue($bingScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
     $this->assertTrue($bingScraper->addKeywords(array('foo bad')));
     $this->assertTrue($bingScraper->scrapeAll(2, true));
     $this->assertCount(2, $bingScraper->getFetchedPages());
     $this->assertTrue($bingScraper->serialize('xml', true));
     $this->assertCount(0, $bingScraper->getFetchedPages());
     $this->assertCount(2, $bingScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages()));
     $this->assertTrue($bingScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $yahooScraper = Builder::create($this->engines[3], array(array('foo', 'baz'), 'yahoo'));
     $outDir = $yahooScraper->getOutDir();
     $this->assertFalse($yahooScraper->scrape('bar'));
     $this->assertFalse($yahooScraper->scrape('baz', 100));
     $this->assertFalse($yahooScraper->scrape('baz', 1, 'baz'));
     $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'foobad'));
     $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'UTC', 'faz'));
     $this->assertTrue($yahooScraper->scrape('foo', 2, true, 'Europe/Rome'));
     $this->assertCount(2, $yahooScraper->getFetchedPages());
     $this->assertCount(1, $yahooScraper->getKeywords());
     $this->assertTrue($yahooScraper->scrape('baz', 2, true));
     $this->assertCount(4, $yahooScraper->getFetchedPages());
     $this->assertCount(0, $yahooScraper->getKeywords());
     $this->assertFalse($yahooScraper->scrapeAll());
     $this->assertTrue($yahooScraper->addKeywords(array('foobaz', 'foobar')));
     $this->assertTrue($yahooScraper->scrapeAll(2, true, 'America/Los_Angeles'));
     $this->assertCount(8, $yahooScraper->getFetchedPages());
     $this->assertCount(0, $yahooScraper->getKeywords());
     $this->assertFalse($yahooScraper->serialize('baz'));
     $this->assertTrue($yahooScraper->serialize('xml', true));
     $this->assertCount(0, $yahooScraper->getFetchedPages());
     $this->assertCount(8, $yahooScraper->getSerializedPages());
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages()));
     $this->assertTrue($yahooScraper->save(true));
     $this->assertCount(0, $yahooScraper->getSerializedPages());
     for ($i = 0; $i < count($toCheck); $i++) {
         $xml = new \XMLReader();
         $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $xml->setParserProperty(\XMLReader::VALIDATE, true);
         $this->assertTrue($xml->isValid());
     }
     $this->assertTrue($yahooScraper->addKeywords(array('foobaz')));
     $this->assertTrue($yahooScraper->scrapeAll(3, true));
     $this->assertTrue($yahooScraper->serialize('json', true));
     $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages()));
     $this->assertTrue($yahooScraper->save(true));
     for ($i = 0; $i < count($toCheck); $i++) {
         $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]);
         $this->assertNull($parser->lint($json));
     }
 }
コード例 #2
0
 public function testYahooScraper()
 {
     $yahooScraper = Builder::create($this->engines[3], array(array('baz')));
     $this->assertEquals(get_parent_class($yahooScraper), 'Franzip\\SerpScraper\\Scrapers\\SerpScraper');
     $this->assertInstanceOf('Franzip\\SerpScraper\\Scrapers\\YahooScraper', $yahooScraper);
     $this->assertInstanceOf('Franzip\\Throttler\\Throttler', $yahooScraper->getThrottler());
     $this->assertInstanceOf('Franzip\\SerpFetcher\\Fetchers\\YahooFetcher', $yahooScraper->getFetcher());
     $this->assertTrue(file_exists($yahooScraper::DEFAULT_OUTPUT_DIR) && is_dir($yahooScraper::DEFAULT_OUTPUT_DIR));
     $this->assertTrue(file_exists($yahooScraper::DEFAULT_FETCHER_CACHE_DIR) && is_dir($yahooScraper::DEFAULT_FETCHER_CACHE_DIR));
     $this->assertTrue(file_exists($yahooScraper::DEFAULT_SERIALIZER_CACHE_DIR) && is_dir($yahooScraper::DEFAULT_SERIALIZER_CACHE_DIR));
 }