/** * Perform one cron 'tick' of crawl processing * * Has limits of both how many urls to crawl * and a soft time limit on total crawl time. */ function local_linkchecker_robot_crawl($verbose = false) { global $CFG, $DB; $robot = new \local_linkchecker_robot\robot\crawler(); $config = $robot::get_config(); $crawlstart = $config->crawlstart; $crawlend = $config->crawlend; // Check if valid, otherwise bail quickly. // If we need to start a new crawl, push the seed url into the crawl queue. if (!$crawlstart || $crawlstart <= $crawlend) { $start = time(); set_config('crawlstart', $start, 'local_linkchecker_robot'); $robot->mark_for_crawl($CFG->wwwroot . '/', $config->seedurl); // Create a new history record. $history = new stdClass(); $history->startcrawl = $start; $history->urls = 0; $history->links = 0; $history->broken = 0; $history->oversize = 0; $history->cronticks = 0; $history->id = $DB->insert_record('linkchecker_history', $history); } else { $history = $DB->get_record('linkchecker_history', array('startcrawl' => $crawlstart)); } // While we are not exceeding the maxcron time, and the queue is not empty // find the next url in the queue and crawl it. // If the queue is empty then mark the crawl as ended. $cronstart = time(); $cronstop = $cronstart + $config->maxcrontime; $hasmore = true; $hastime = true; while ($hasmore && $hastime) { $hasmore = $robot->process_queue($verbose); $hastime = time() < $cronstop; set_config('crawltick', time(), 'local_linkchecker_robot'); } if ($hastime) { // Time left over, which means the queue is empty! // Mark the crawl as ended. $history->endcrawl = time(); set_config('crawlend', time(), 'local_linkchecker_robot'); } $history->urls = $robot->get_processed(); $history->links = $robot->get_num_links(); $history->broken = $robot->get_num_broken_urls(); $history->oversize = $robot->get_num_oversize(); $history->cronticks++; $DB->update_record('linkchecker_history', $history); }