// If we have pcntl - set up a handler for sigterm
if (function_exists('pcntl_signal')) {
    declare (ticks=1);
    pcntl_signal(SIGABRT, 'do_exit');
    pcntl_signal(SIGHUP, 'do_exit');
    pcntl_signal(SIGQUIT, 'do_exit');
    pcntl_signal(SIGINT, 'do_exit');
    pcntl_signal(SIGTERM, 'do_exit');
}
// Instantiate Spizer engine
$spizer = new Spizer_Engine(array('delay' => $delay, 'savecookies' => $opts->savecookies, 'lifo' => true));
// Set logger
$logger = new Spizer_Logger_Sqlite(array('dbfile' => $log));
$spizer->setLogger($logger);
// Set the spider to follow links, hrefs, images and script references
$spizer->addHandler(new Spizer_Handler_LinkAppender(array('domain' => parse_url($url, PHP_URL_HOST))));
// Add some handlers to be executed on 200 OK + text/html pages
$spizer->addHandler(new Spizer_Handler_StringMatch(array('match' => 'error', 'matchcase' => false, 'status' => 200, 'content-type' => 'text/html')));
$spizer->addHandler(new Spizer_Handler_StringMatch(array('match' => 'warning', 'matchcase' => false, 'status' => 200, 'content-type' => 'text/html')));
// Go!
$spizer->run($url);
do_exit();
// -- end here --
// Some functions
function spizer_usage()
{
    if (!isset($argv)) {
        $argv = $_SERVER['argv'];
    }
    echo <<<USAGE
Spizer - the flexible web spider, v. 0.1
Beispiel #2
0
Zend_Loader::loadClass($type);
$logger = new $type($config->logger->options->toArray());
$engine->setLogger($logger);
// Set up the handler objects - same underscore rules apply here as well.
$handlerLoader = new Zend_Loader_PluginLoader(array('Spizer_Handler_' => 'Spizer/Handler', 'Kumo_Handler' => 'Kumo/Handler'));
if ($config->handlers) {
    foreach ($config->handlers as $name => $hconf) {
        $type = $hconf->type;
        if (!$type) {
            continue;
        }
        // Silenty ignore badly-defined loggers (@todo: Throw exception?)
        $handlerClass = $handlerLoader->load($type);
        $handler = new $handlerClass($hconf->options->toArray());
        $handler->setHandlerName($name);
        $engine->addHandler($handler);
    }
}
// If we have pcntl - set up a handler for sigterm
if (function_exists('pcntl_signal')) {
    declare (ticks=1);
    pcntl_signal(SIGABRT, 'do_exit');
    pcntl_signal(SIGHUP, 'do_exit');
    pcntl_signal(SIGQUIT, 'do_exit');
    pcntl_signal(SIGINT, 'do_exit');
    pcntl_signal(SIGTERM, 'do_exit');
}
// Go!
$engine->run($url);
//run queue's
//var_dump($queue);
Beispiel #3
0
// If we have pcntl - set up a handler for sigterm
if (function_exists('pcntl_signal')) {
    declare (ticks=1);
    pcntl_signal(SIGABRT, 'do_exit');
    pcntl_signal(SIGHUP, 'do_exit');
    pcntl_signal(SIGQUIT, 'do_exit');
    pcntl_signal(SIGINT, 'do_exit');
    pcntl_signal(SIGTERM, 'do_exit');
}
// Instantiate Spizer engine
$spizer = new Spizer_Engine(array('delay' => $delay, 'savecookies' => $opts->savecookies, 'lifo' => true));
// Set logger
$logger = new Spizer_Logger_Sqlite(array('dbfile' => $log));
$spizer->setLogger($logger);
// Set the spider to follow links, hrefs, images and script references
$spizer->addHandler(new Spizer_Handler_LinkAppender(array('domain' => parse_url($url, PHP_URL_HOST))));
$spizer->addHandler(new Kumo_Handler_Debug(array('do' => true)));
/**
$spizer->addHandler(new Kumo_Handler_ScrapeAndRequestSender(array(
    'queueAdapter' => 'Array',
    'queueOptions' => array(
        'name' => 'test'),
    'expression' => '//img',
    'type' => '@src',
)));
*/
// Go!
$spizer->run($url);
do_exit();
// -- end here --
// Some functions