} /** * Set up the logger object * * The logger type is defined in the configuration file - if it contains an * underscore in it's name, it is considered to be a user-defined logger - if * no underscore is found, the default 'Spizer_Logger_' prefix is added to * the class name. */ $type = $config->logger->type; if (strpos($type, '_') === false) { $type = 'Spizer_Logger_' . $type; } Zend_Loader::loadClass($type); $logger = new $type($config->logger->options->toArray()); $engine->setLogger($logger); // Set up the handler objects - same underscore rules apply here as well. $handlerLoader = new Zend_Loader_PluginLoader(array('Spizer_Handler_' => 'Spizer/Handler', 'Kumo_Handler' => 'Kumo/Handler')); if ($config->handlers) { foreach ($config->handlers as $name => $hconf) { $type = $hconf->type; if (!$type) { continue; } // Silenty ignore badly-defined loggers (@todo: Throw exception?) $handlerClass = $handlerLoader->load($type); $handler = new $handlerClass($hconf->options->toArray()); $handler->setHandlerName($name); $engine->addHandler($handler); } }
exit(1); } // If we have pcntl - set up a handler for sigterm if (function_exists('pcntl_signal')) { declare (ticks=1); pcntl_signal(SIGABRT, 'do_exit'); pcntl_signal(SIGHUP, 'do_exit'); pcntl_signal(SIGQUIT, 'do_exit'); pcntl_signal(SIGINT, 'do_exit'); pcntl_signal(SIGTERM, 'do_exit'); } // Instantiate Spizer engine $spizer = new Spizer_Engine(array('delay' => $delay, 'savecookies' => $opts->savecookies, 'lifo' => true)); // Set logger $logger = new Spizer_Logger_Sqlite(array('dbfile' => $log)); $spizer->setLogger($logger); // Set the spider to follow links, hrefs, images and script references $spizer->addHandler(new Spizer_Handler_LinkAppender(array('domain' => parse_url($url, PHP_URL_HOST)))); // Add some handlers to be executed on 200 OK + text/html pages $spizer->addHandler(new Spizer_Handler_StringMatch(array('match' => 'error', 'matchcase' => false, 'status' => 200, 'content-type' => 'text/html'))); $spizer->addHandler(new Spizer_Handler_StringMatch(array('match' => 'warning', 'matchcase' => false, 'status' => 200, 'content-type' => 'text/html'))); // Go! $spizer->run($url); do_exit(); // -- end here -- // Some functions function spizer_usage() { if (!isset($argv)) { $argv = $_SERVER['argv']; }