function is_banned_domain($url) { if ($IGNORE_BANNED_DOMAINS) { return false; } require_once $PHP_INCLUDE_PATH . "parse_domain.php"; $domain = get_base_domain($url); if (mysql_num_rows(lookup_banned($domain)) != 0) { return true; } else { return false; } }
/** * Get referer_domain_ID (ID of the referer in T_basedomains). * * @return integer (may be NULL, but should never). */ function get_referer_domain_ID() { if (!isset($this->referer_domain_ID)) { global $DB; // Check if we know the base domain: $referer_basedomain = get_base_domain($this->referer); if ($referer_basedomain) { // This referer has a base domain // Check if we have met it before: $hit_basedomain = $DB->get_row(' SELECT dom_ID FROM T_basedomains WHERE dom_name = ' . $DB->quote($referer_basedomain)); if (!empty($hit_basedomain->dom_ID)) { // This basedomain has visited before: $this->referer_domain_ID = $hit_basedomain->dom_ID; // fp> The blacklist handling that was here made no sense. } else { // This is the first time this base domain visits: // The INSERT below can fail, probably if we get two simultaneous hits (seen in the demo logfiles) if ($this->agent_type == 'robot' || $this->hit_type == 'rss') { $this->dom_type = 'aggregator'; } elseif ($this->referer_type == 'search') { $this->dom_type = 'searcheng'; } elseif ($this->referer_type == 'referer' || $this->referer_type == 'self') { $this->dom_type = 'normal'; } $DB->save_error_state(); if ($DB->query(' INSERT INTO T_basedomains( dom_name, dom_type) VALUES( ' . $DB->quote($referer_basedomain) . ', ' . $DB->quote($this->dom_type) . ' )')) { // INSERTed ok: $this->referer_domain_ID = $DB->insert_id; } else { // INSERT failed: see, try to select again (may become/stay NULL) $this->referer_domain_ID = $DB->get_var(' SELECT dom_ID FROM T_basedomains WHERE dom_name = ' . $DB->quote($referer_basedomain)); } $DB->restore_error_state(); } } } return $this->referer_domain_ID; }
/** * Test {@link get_base_domain()} */ function test_get_base_domain() { $this->change_global('evo_charset', 'iso-8859-1'); $this->assertEqual(get_base_domain(''), ''); // Example: empty referer $this->assertEqual(get_base_domain('hostname'), 'hostname'); $this->assertEqual(get_base_domain('http://hostname'), 'hostname'); $this->assertEqual(get_base_domain('www.example.com'), 'example.com'); $this->assertEqual(get_base_domain('www2.example.com'), 'www2.example.com'); // We no longer treat www2.ex.com equal to ex.com $this->assertEqual(get_base_domain('subdom.example.com'), 'subdom.example.com'); $this->assertEqual(get_base_domain('https://www.hello.example.com/path/1/2/3/page.html?param=hello#location'), 'hello.example.com'); $this->assertEqual(get_base_domain('https://www.sub1.hello.example.com/path/1/2/3/page.html?param=hello#location'), 'hello.example.com'); $this->assertEqual(get_base_domain('https://sub1.hello.example.com/path/1/2/3/page.html?param=hello#location'), 'hello.example.com'); $this->assertEqual(get_base_domain('https://hello.example.com/path/1/2/3/page.html?param=hello#location'), 'hello.example.com'); $this->assertEqual(get_base_domain('https://hello.example.com:8080/path/1/2/3/page.html?param=hello#location'), 'hello.example.com'); // Anchor after domain name, used by spammers: $this->assertEqual(get_base_domain('http://example.com#anchor'), 'example.com'); $this->assertEqual(get_base_domain('http://example.com/#anchor'), 'example.com'); // "-" is a valid char: $this->assertEqual(get_base_domain('host-name'), 'host-name'); $this->assertEqual(get_base_domain('www-2.host-name.tld'), 'www-2.host-name.tld'); // IDN: $this->assertEqual(get_base_domain('kдse'), 'kдse'); $this->assertEqual(get_base_domain('цl.de'), 'цl.de'); $this->assertEqual(get_base_domain('www-цl.kдse-цl.de'), 'www-цl.kдse-цl.de'); $this->assertEqual(get_base_domain('sub1.sub2.prцhl.de'), 'sub2.prцhl.de'); // Numerical, should be kept: $this->assertIdentical(get_base_domain('123.123.123.123'), '123.123.123.123'); $this->assertIdentical(get_base_domain('123.123.123.123:8080'), '123.123.123.123'); // Invalid, but ok: // fp> This function is called get_base_domain(), not validate_domain() . If we receive a domain starting with a _, then it is not a problem to keep it in the base domain. $this->assertEqual(get_base_domain('_host'), '_host'); // The following may not be valid in the future but seem good enough for now: $this->assertEqual(get_base_domain('.de'), 'de'); $this->assertEqual(get_base_domain('.....de'), 'de'); $this->assertIdentical(get_base_domain('...'), ''); $this->assertIdentical(get_base_domain('1..'), ''); $this->assertIdentical(get_base_domain(chr(0)), ''); }
/** * Constructor */ function Hit() { global $Debuglog, $DB; // Get the first IP in the list of REMOTE_ADDR and HTTP_X_FORWARDED_FOR $this->IP = get_ip_list(true); // Check the referer and determine referer_type: $this->detect_referer(); // Check if we know the base domain: $this->referer_basedomain = get_base_domain($this->referer); if ($this->referer_basedomain) { // This referer has a base domain // Check if we have met it before: $hit_basedomain = $DB->get_row(' SELECT dom_ID FROM T_basedomains WHERE dom_name = ' . $DB->quote($this->referer_basedomain)); if (!empty($hit_basedomain->dom_ID)) { // This basedomain has visited before: $this->referer_domain_ID = $hit_basedomain->dom_ID; // fp> The blacklist handling that was here made no sense. } else { // This is the first time this base domain visits: // The INSERT below can fail, probably if we get two simultaneous hits (seen in the demo logfiles) $old_hold_on_error = $DB->halt_on_error; $old_show_errors = $DB->show_errors; $DB->halt_on_error = false; $DB->show_errors = false; if ($DB->query(' INSERT INTO T_basedomains( dom_name ) VALUES( ' . $DB->quote($this->referer_basedomain) . ' )')) { // INSERTed ok: $this->referer_domain_ID = $DB->insert_id; } else { // INSERT failed: see, try to select again (may become/stay NULL) $this->referer_domain_ID = $DB->get_var(' SELECT dom_ID FROM T_basedomains WHERE dom_name = ' . $DB->quote($this->referer_basedomain)); } $DB->halt_on_error = $old_hold_on_error; $DB->show_errors = $old_show_errors; } } $this->detect_useragent(); $Debuglog->add('IP: ' . $this->IP, 'hit'); $Debuglog->add('UserAgent: ' . $this->user_agent, 'hit'); $Debuglog->add('Referer: ' . var_export($this->referer, true) . '; type=' . $this->referer_type, 'hit'); $Debuglog->add('Remote Host: ' . $this->get_remote_host(false), 'hit'); }