public function test_is_web_crawler() { $browsers = array('Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/18.0 Firefox/18.0', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/412 (KHTML, like Gecko) Safari/412', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10', 'Opera/9.0 (Windows NT 5.1; U; en)', 'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 –Nexus', 'Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5'); $crawlers = array('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Googlebot/2.1 (+http://www.googlebot.com/bot.html)', 'Googlebot-Image/1.0', 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)', 'msnbot/2.1', 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)', 'Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)', 'AltaVista V2.0B crawler@evreka.com', 'ZoomSpider - wrensoft.com [ZSEBOT]', 'Baiduspider+(+http://www.baidu.com/search/spider_jp.html)', 'Baiduspider+(+http://www.baidu.com/search/spider.htm)', 'BaiDuSpider', 'User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma)'); foreach ($browsers as $agent) { $_SERVER['HTTP_USER_AGENT'] = $agent; $this->assertFalse(is_web_crawler()); } foreach ($crawlers as $agent) { $_SERVER['HTTP_USER_AGENT'] = $agent; $this->assertTrue(is_web_crawler(), "{$agent} should be considered a search engine"); } }
/** * Initialise $_SESSION, handles google access * and sets up not-logged-in user properly. * * WARNING: $USER and $SESSION are set up later, do not use them yet! * * @param bool $newsid is this a new session in first http request? */ protected static function initialise_user_session($newsid) { global $CFG, $DB; $sid = session_id(); if (!$sid) { // No session, very weird. error_log('Missing session ID, session not started!'); self::init_empty_session(); return; } if (!($record = $DB->get_record('sessions', array('sid' => $sid), 'id, sid, state, userid, lastip, timecreated, timemodified'))) { if (!$newsid) { if (!empty($_SESSION['USER']->id)) { // This should not happen, just log it, we MUST not produce any output here! error_log("Cannot find session record {$sid} for user " . $_SESSION['USER']->id . ", creating new session."); } // Prevent session fixation attacks. session_regenerate_id(true); } $_SESSION = array(); } unset($sid); if (isset($_SESSION['USER']->id)) { if (!empty($_SESSION['USER']->realuser)) { $userid = $_SESSION['USER']->realuser; } else { $userid = $_SESSION['USER']->id; } // Verify timeout first. $maxlifetime = $CFG->sessiontimeout; $timeout = false; if (isguestuser($userid) or empty($userid)) { // Ignore guest and not-logged in timeouts, there is very little risk here. $timeout = false; } else { if ($record->timemodified < time() - $maxlifetime) { $timeout = true; $authsequence = get_enabled_auth_plugins(); // Auths, in sequence. foreach ($authsequence as $authname) { $authplugin = get_auth_plugin($authname); if ($authplugin->ignore_timeout_hook($_SESSION['USER'], $record->sid, $record->timecreated, $record->timemodified)) { $timeout = false; break; } } } } if ($timeout) { session_regenerate_id(true); $_SESSION = array(); $DB->delete_records('sessions', array('id' => $record->id)); } else { // Update session tracking record. $update = new \stdClass(); $updated = false; if ($record->userid != $userid) { $update->userid = $record->userid = $userid; $updated = true; } $ip = getremoteaddr(); if ($record->lastip != $ip) { $update->lastip = $record->lastip = $ip; $updated = true; } $updatefreq = empty($CFG->session_update_timemodified_frequency) ? 20 : $CFG->session_update_timemodified_frequency; if ($record->timemodified == $record->timecreated) { // Always do first update of existing record. $update->timemodified = $record->timemodified = time(); $updated = true; } else { if ($record->timemodified < time() - $updatefreq) { // Update the session modified flag only once every 20 seconds. $update->timemodified = $record->timemodified = time(); $updated = true; } } if ($updated) { $update->id = $record->id; $DB->update_record('sessions', $update); } return; } } else { if ($record) { // This happens when people switch session handlers... session_regenerate_id(true); $_SESSION = array(); $DB->delete_records('sessions', array('id' => $record->id)); } } unset($record); $timedout = false; if (!isset($_SESSION['SESSION'])) { $_SESSION['SESSION'] = new \stdClass(); if (!$newsid) { $timedout = true; } } $user = null; if (!empty($CFG->opentogoogle)) { if (is_web_crawler()) { $user = guest_user(); } if (!empty($CFG->guestloginbutton) and !$user and !empty($_SERVER['HTTP_REFERER'])) { // Automatically log in users coming from search engine results. if (strpos($_SERVER['HTTP_REFERER'], 'google') !== false) { $user = guest_user(); } else { if (strpos($_SERVER['HTTP_REFERER'], 'altavista') !== false) { $user = guest_user(); } } } } // Setup $USER and insert the session tracking record. if ($user) { self::set_user($user); self::add_session_record($user->id); } else { self::init_empty_session(); self::add_session_record(0); } if ($timedout) { $_SESSION['SESSION']->has_timed_out = true; } }
/** * Initialise $USER object, handles google access * and sets up not logged in user properly. * * @return void */ protected function check_user_initialised() { global $CFG; if (isset($_SESSION['USER']->id)) { // already set up $USER return; } $user = null; if (!empty($CFG->opentogoogle) and !NO_MOODLE_COOKIES) { if (is_web_crawler()) { $user = guest_user(); } if (!empty($CFG->guestloginbutton) and !$user and !empty($_SERVER['HTTP_REFERER'])) { // automaticaly log in users coming from search engine results if (strpos($_SERVER['HTTP_REFERER'], 'google') !== false) { $user = guest_user(); } else { if (strpos($_SERVER['HTTP_REFERER'], 'altavista') !== false) { $user = guest_user(); } } } } if (!$user) { $user = new stdClass(); $user->id = 0; // to enable proper function of $CFG->notloggedinroleid hack if (isset($CFG->mnet_localhost_id)) { $user->mnethostid = $CFG->mnet_localhost_id; } else { $user->mnethostid = 1; } } session_set_user($user); }
case 'posts' : $searchterms = array('userid:'.$user->id); $extrasql = ''; break; default: $searchterms = array('userid:'.$user->id); $extrasql = 'AND p.parent = 0'; break; } echo '<div class="user-content">'; if ($course->id == SITEID) { $searchcourse = SITEID; if (empty($CFG->forceloginforprofiles) or (isloggedin() and !isguestuser() and !is_web_crawler())) { // Search throughout the whole site. $searchcourse = 0; } } else { // Search only for posts the user made in this course. $searchcourse = $course->id; } // Get the posts. if ($posts = forum_search_posts($searchterms, $searchcourse, $page*$perpage, $perpage, $totalcount, $extrasql)) { require_once($CFG->dirroot.'/rating/lib.php'); $baseurl = new moodle_url('user.php', array('id' => $user->id, 'course' => $course->id, 'mode' => $mode, 'perpage' => $perpage)); echo $OUTPUT->paging_bar($totalcount, $page, $perpage, $baseurl);