/** * \brief Check FQDN label * Check a label regarding section 2.1 of RFC 1123 : 63 lengths and no other characters * than alphanumerics. Minus ('-') is allowed if it is not at the end or begin of the lable. * * @param $label the label to check **/ static function checkFQDNLabel($label) { if (strlen($label) == 1) { if (!preg_match("/^[0-9A-Za-z]\$/", $label, $regs)) { return false; } } else { $fqdn_regex = "/^(?!-)[A-Za-z0-9-]{1,63}(?<!-)\$/"; if (!preg_match($fqdn_regex, $label, $regs)) { //check also Internationalized domain name $punycode = new TrueBV\Punycode(); $idn = $punycode->encode($label); if (!preg_match($fqdn_regex, $idn, $regs)) { return false; } } } return true; }
/** * Convert IDNA (punycode) domain into UTF-8 * * @param string $domain Domain to convert * @return string Converted string */ public static function decodeIdna($domain) { if (function_exists('idn_to_utf8')) { return idn_to_utf8($domain); } else { $decoder = new TrueBV\Punycode(); return $decoder->decode($domain); } }
/** * nv_check_domain() * * @param string $domain * @return string $domain_ascii */ function nv_check_domain($domain) { if (preg_match('/^([a-z0-9]+)([a-z0-9\\-\\.]+)\\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|com|coop|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|post|pr|pro|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xxx|ye|yt|za|zm|zw)$/', $domain) or $domain == 'localhost' or filter_var($domain, FILTER_VALIDATE_IP)) { return $domain; } else { if (function_exists('idn_to_ascii')) { $domain_ascii = idn_to_ascii($domain); } else { $Punycode = new TrueBV\Punycode(); $domain_ascii = $Punycode->encode($domain); } if (preg_match('/^xn\\-\\-([a-z0-9\\-\\.]+)\\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|com|coop|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|post|pr|pro|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xxx|ye|yt|za|zm|zw|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--3e0b707e|xn--45brj9c|xn--54b7fta0cc|xn--80akhbyknj4f|xn--80ao21a|xn--90a3ac|xn--9t4b11yi5a|xn--clchc0ea0b2g2a9gcd|xn--deba0ad|xn--fiqs8s|xn--fiqz9s|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--g6w251d|xn--gecrj9c|xn--h2brj9c|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--j1amh|xn--j6w193g|xn--jxalpdlp|xn--kgbechtv|xn--kprw13d|xn--kpry57d|xn--l1acc|xn--lgbbat1ad8j|xn--mgb9awbf|xn--mgba3a4f16a|xn--mgbaam7a8h|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgberp4a5d4ar|xn--mgbx4cd0ab|xn--node|xn--o3cw4h|xn--ogbpf8fl|xn--p1ai|xn--pgbs0dh|xn--s9brj9c|xn--wgbh1c|xn--wgbl6a|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--yfro4i67o|xn--ygbi2ammx|xn--zckzah)$/', $domain_ascii)) { return $domain_ascii; } elseif ($domain == NV_SERVER_NAME) { return $domain; } } return ''; }
function clean($url, $list = []) { // only hostnames & path $url = Domainparser::getHostname($url) . Domainparser::getPath($url); // delete redundant subdomains $root = Domainparser::getRootDomain($url); if (!empty($list) && $root !== Domainparser::getHostname($url) && in_array($root, $list)) { $url = ''; } // trailing / $url = trim($url, '/'); // lower case $url = strtolower($url); $url = trim($url); $punicode = new \TrueBV\Punycode(); $url = iconv("UTF-8", "ISO-8859-1", $punicode->encode($url)); return $url; }
} } echo $source . " contains " . count($list) . " source(s)\n"; $spammers = array_merge($spammers, $list); } // only top-level domains foreach ($spammers as &$spammer) { $spammer = \Nabble\SemaltBlocker\Domainparser::getRootDomain($spammer); } // merge & cleanup spammers if ($includeOldList) { $spammers = array_merge(\Nabble\SemaltBlocker\Blocker::getBlocklist(), $spammers); } $spammers = array_map('strtolower', $spammers); $spammers = array_map('trim', $spammers); $punicode = new \TrueBV\Punycode(); foreach ($spammers as &$spammer) { $spammer = iconv("UTF-8", "ISO-8859-1", $punicode->encode($spammer)); } $spammers = array_unique($spammers); $spammers = array_filter($spammers); sort($spammers); // echo some info echo "New list: " . count($spammers) . " sources\n"; // write if (count($spammers)) { file_put_contents('../domains/blocked', implode("\n", $spammers) . PHP_EOL); } echo "Updated blocklist\n"; // readme $readme = file_get_contents('../README.md');