Example #1
0
 public function testSpider()
 {
     $spider = new Spider('http://localhost/kayalion/scrape');
     $spider->addBite(new AnchorSpiderBite());
     $spider->addBite(new CssSpiderBite());
     $spider->addBite(new ImageSpiderBite());
     $spider->addBite(new JsSpiderBite());
     $spider->crawl();
 }
 public function indexAction()
 {
     if (func_get_args()) {
         $this->setError404();
         return;
     }
     $id = $this->getSession()->getId();
     $fileSpider = new File(self::PATH_DATA, $id . self::SUFFIX_SPIDER);
     $baseUrl = $this->request->getBaseUrl();
     $basePath = $this->request->getBasePath();
     $formAction = $basePath;
     if ($basePath == $baseUrl) {
         $formAction .= '/';
     }
     $form = new SpiderForm($formAction);
     if ($form->isSubmitted()) {
         if ($form->isCancelled()) {
             $fileCancel = new File(self::PATH_DATA, $id . self::SUFFIX_CANCEL);
             $fileCancel->write('1');
             return;
         }
         try {
             $form->validate();
             $url = $form->getUrl();
             $delay = $form->getDelay();
             $ignore = $form->getIgnore();
             $ignore = explode("\n", $ignore);
             $spider = new Spider($url);
             foreach ($ignore as $ignoreRegex) {
                 $ignoreRegex = trim($ignoreRegex);
                 if (!$ignoreRegex) {
                     continue;
                 }
                 $spider->addIgnoreRegex($ignoreRegex);
             }
             $spider->addBite(new AnchorSpiderBite());
             $spider->addBite(new CssSpiderBite());
             $spider->addBite(new CssImageSpiderBite());
             $spider->addBite(new CssImportSpiderBite());
             $spider->addBite(new ImageSpiderBite());
             $spider->addBite(new JsSpiderBite());
             $spider->addReport(new ErrorReport());
             $spider->addReport(new RedirectReport());
             $spider->addReport(new SuccessReport());
             $spider->addReport(new ImageReport());
             $spider->addReport(new CssReport());
             $spider->addReport(new JsReport());
             $spider->addReport(new ExternalReport());
             $spider->addReport(new MailtoReport());
             $spider->addReport(new IgnoredReport());
             $parent = $fileSpider->getParent();
             $parent->create();
             $fileSpider->write(serialize($spider));
             $php = Zibo::getInstance()->getConfigValue(self::CONFIG_PHP_COMMAND, self::DEFAULT_PHP_COMMAND);
             System::execute($php . ' ' . $_SERVER['SCRIPT_FILENAME'] . ' spider/crawl/' . $id . '/' . $delay . ' > /dev/null 2> /dev/null & echo $!');
             return;
         } catch (ValidationException $exception) {
             $form->setValidationException($exception);
         }
     }
     if ($fileSpider->exists()) {
         $fileSpiderContent = $fileSpider->read();
         $spider = unserialize($fileSpiderContent);
         $form->setUrl($spider->getBaseUrl());
         $form->setIsDisabled(true, SpiderForm::FIELD_URL);
         $form->setIsDisabled(true, SpiderForm::BUTTON_SUBMIT);
     }
     $statusUrl = $basePath . '/status/' . $id;
     $reportUrl = $basePath . '/report/' . $id;
     $view = new SpiderView($form, $statusUrl, $reportUrl);
     $view->setTitle('spider.title', true);
     $this->response->setView($view);
 }