public function testSpider() { $spider = new Spider('http://localhost/kayalion/scrape'); $spider->addBite(new AnchorSpiderBite()); $spider->addBite(new CssSpiderBite()); $spider->addBite(new ImageSpiderBite()); $spider->addBite(new JsSpiderBite()); $spider->crawl(); }
public function indexAction() { if (func_get_args()) { $this->setError404(); return; } $id = $this->getSession()->getId(); $fileSpider = new File(self::PATH_DATA, $id . self::SUFFIX_SPIDER); $baseUrl = $this->request->getBaseUrl(); $basePath = $this->request->getBasePath(); $formAction = $basePath; if ($basePath == $baseUrl) { $formAction .= '/'; } $form = new SpiderForm($formAction); if ($form->isSubmitted()) { if ($form->isCancelled()) { $fileCancel = new File(self::PATH_DATA, $id . self::SUFFIX_CANCEL); $fileCancel->write('1'); return; } try { $form->validate(); $url = $form->getUrl(); $delay = $form->getDelay(); $ignore = $form->getIgnore(); $ignore = explode("\n", $ignore); $spider = new Spider($url); foreach ($ignore as $ignoreRegex) { $ignoreRegex = trim($ignoreRegex); if (!$ignoreRegex) { continue; } $spider->addIgnoreRegex($ignoreRegex); } $spider->addBite(new AnchorSpiderBite()); $spider->addBite(new CssSpiderBite()); $spider->addBite(new CssImageSpiderBite()); $spider->addBite(new CssImportSpiderBite()); $spider->addBite(new ImageSpiderBite()); $spider->addBite(new JsSpiderBite()); $spider->addReport(new ErrorReport()); $spider->addReport(new RedirectReport()); $spider->addReport(new SuccessReport()); $spider->addReport(new ImageReport()); $spider->addReport(new CssReport()); $spider->addReport(new JsReport()); $spider->addReport(new ExternalReport()); $spider->addReport(new MailtoReport()); $spider->addReport(new IgnoredReport()); $parent = $fileSpider->getParent(); $parent->create(); $fileSpider->write(serialize($spider)); $php = Zibo::getInstance()->getConfigValue(self::CONFIG_PHP_COMMAND, self::DEFAULT_PHP_COMMAND); System::execute($php . ' ' . $_SERVER['SCRIPT_FILENAME'] . ' spider/crawl/' . $id . '/' . $delay . ' > /dev/null 2> /dev/null & echo $!'); return; } catch (ValidationException $exception) { $form->setValidationException($exception); } } if ($fileSpider->exists()) { $fileSpiderContent = $fileSpider->read(); $spider = unserialize($fileSpiderContent); $form->setUrl($spider->getBaseUrl()); $form->setIsDisabled(true, SpiderForm::FIELD_URL); $form->setIsDisabled(true, SpiderForm::BUTTON_SUBMIT); } $statusUrl = $basePath . '/status/' . $id; $reportUrl = $basePath . '/report/' . $id; $view = new SpiderView($form, $statusUrl, $reportUrl); $view->setTitle('spider.title', true); $this->response->setView($view); }