protected function execute(InputInterface $input, OutputInterface $output)
 {
     $em = $this->getContainer()->get('doctrine')->getManager();
     $pageRepository = $this->getContainer()->get('doctrine')->getRepository('S2bCrawlerBundle:Page');
     $url = $input->getArgument('url');
     // $depth = $input->getOption('depth');
     if (filter_var($url, FILTER_VALIDATE_URL) === FALSE) {
         $output->writeln('<error>Invalid url</error>');
         return;
     }
     $page = $pageRepository->findOneByUrl($url);
     if ($page) {
         $output->writeln('<error>Url already in database</error>');
         return;
     }
     $page = new Page();
     $page->setUrl($url);
     $page->setDepth(0);
     $em->persist($page);
     $em->flush();
     $output->writeln('Url added successfully');
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $doctrine = $this->getContainer()->get('doctrine');
     $em = $doctrine->getManager();
     $pageRepository = $doctrine->getRepository('S2bCrawlerBundle:Page');
     $id = $input->getArgument('id');
     $page = $pageRepository->findOneById($id);
     if (!$page) {
         $output->writeln("<error>Page not found</error>");
         return;
     }
     if ($page->isCrawled()) {
         $output->writeln("<error>Page already crawled</error>");
         return;
     }
     $client = new Client();
     try {
         $response = $client->get($page->getUrl());
     } catch (ClientException $e) {
         switch ($e->getResponse()->getStatusCode()) {
             case 404:
                 $output->writeln('<error>Not found ' . $page->getUrl() . '</error>');
                 $page->setCrawledAt(new \DateTime());
                 $em->persist($page);
                 $em->flush();
                 return;
             case 503:
                 sleep(1);
                 continue;
         }
     }
     $links = $this->parseLinks((string) $response->getBody(), $page->getUrl());
     $links = $this->filterLinks($links);
     $crawled = new PageCrawled();
     $crawled->setPage($page)->setContent((string) $response->getBody())->setLinks($links);
     $page->setCrawled($crawled)->setCrawledAt(new \DateTime());
     if ($links) {
         $links_added = 0;
         foreach ($links as $i => $link) {
             if ($pageRepository->findOneByUrl($link)) {
                 if ($output->isVeryVerbose()) {
                     $output->writeln("Skipped #" . $i . ' ' . $link . ' - already in database');
                 }
                 continue;
             }
             $linkPage = new Page();
             $linkPage->setUrl($link)->setDepth($page->getDepth() + 1);
             $em->persist($linkPage);
             $em->flush();
             $links_added++;
             if ($output->isVerbose()) {
                 $output->writeln("Added #" . $i . ' ' . $link);
             }
         }
         $output->writeln('Added ' . $links_added . '/' . count($links) . ' links');
     }
     $em->persist($crawled);
     $em->persist($page);
     $em->flush();
     $output->writeln('Crawled ' . $page->getUrl());
 }