Example #1
0
 /**
  * \brief Check FQDN label
  * Check a label regarding section 2.1 of RFC 1123 : 63 lengths and no other characters
  * than alphanumerics. Minus ('-') is allowed if it is not at the end or begin of the lable.
  *
  * @param $label        the label to check
  **/
 static function checkFQDNLabel($label)
 {
     if (strlen($label) == 1) {
         if (!preg_match("/^[0-9A-Za-z]\$/", $label, $regs)) {
             return false;
         }
     } else {
         $fqdn_regex = "/^(?!-)[A-Za-z0-9-]{1,63}(?<!-)\$/";
         if (!preg_match($fqdn_regex, $label, $regs)) {
             //check also Internationalized domain name
             $punycode = new TrueBV\Punycode();
             $idn = $punycode->encode($label);
             if (!preg_match($fqdn_regex, $idn, $regs)) {
                 return false;
             }
         }
     }
     return true;
 }
Example #2
0
 /**
  * Convert IDNA (punycode) domain into UTF-8
  * 
  * @param string $domain Domain to convert
  * @return string Converted string
  */
 public static function decodeIdna($domain)
 {
     if (function_exists('idn_to_utf8')) {
         return idn_to_utf8($domain);
     } else {
         $decoder = new TrueBV\Punycode();
         return $decoder->decode($domain);
     }
 }
Example #3
0
/**
 * nv_check_domain()
 *
 * @param string $domain
 * @return string $domain_ascii
 */
function nv_check_domain($domain)
{
    if (preg_match('/^([a-z0-9]+)([a-z0-9\\-\\.]+)\\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|com|coop|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|post|pr|pro|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xxx|ye|yt|za|zm|zw)$/', $domain) or $domain == 'localhost' or filter_var($domain, FILTER_VALIDATE_IP)) {
        return $domain;
    } else {
        if (function_exists('idn_to_ascii')) {
            $domain_ascii = idn_to_ascii($domain);
        } else {
            $Punycode = new TrueBV\Punycode();
            $domain_ascii = $Punycode->encode($domain);
        }
        if (preg_match('/^xn\\-\\-([a-z0-9\\-\\.]+)\\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|com|coop|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|post|pr|pro|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xxx|ye|yt|za|zm|zw|xn--0zwm56d|xn--11b5bs3a9aj6g|xn--3e0b707e|xn--45brj9c|xn--54b7fta0cc|xn--80akhbyknj4f|xn--80ao21a|xn--90a3ac|xn--9t4b11yi5a|xn--clchc0ea0b2g2a9gcd|xn--deba0ad|xn--fiqs8s|xn--fiqz9s|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--g6w251d|xn--gecrj9c|xn--h2brj9c|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--j1amh|xn--j6w193g|xn--jxalpdlp|xn--kgbechtv|xn--kprw13d|xn--kpry57d|xn--l1acc|xn--lgbbat1ad8j|xn--mgb9awbf|xn--mgba3a4f16a|xn--mgbaam7a8h|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgberp4a5d4ar|xn--mgbx4cd0ab|xn--node|xn--o3cw4h|xn--ogbpf8fl|xn--p1ai|xn--pgbs0dh|xn--s9brj9c|xn--wgbh1c|xn--wgbl6a|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--yfro4i67o|xn--ygbi2ammx|xn--zckzah)$/', $domain_ascii)) {
            return $domain_ascii;
        } elseif ($domain == NV_SERVER_NAME) {
            return $domain;
        }
    }
    return '';
}
Example #4
0
function clean($url, $list = [])
{
    // only hostnames & path
    $url = Domainparser::getHostname($url) . Domainparser::getPath($url);
    // delete redundant subdomains
    $root = Domainparser::getRootDomain($url);
    if (!empty($list) && $root !== Domainparser::getHostname($url) && in_array($root, $list)) {
        $url = '';
    }
    // trailing /
    $url = trim($url, '/');
    // lower case
    $url = strtolower($url);
    $url = trim($url);
    $punicode = new \TrueBV\Punycode();
    $url = iconv("UTF-8", "ISO-8859-1", $punicode->encode($url));
    return $url;
}
Example #5
0
        }
    }
    echo $source . " contains " . count($list) . " source(s)\n";
    $spammers = array_merge($spammers, $list);
}
// only top-level domains
foreach ($spammers as &$spammer) {
    $spammer = \Nabble\SemaltBlocker\Domainparser::getRootDomain($spammer);
}
// merge & cleanup spammers
if ($includeOldList) {
    $spammers = array_merge(\Nabble\SemaltBlocker\Blocker::getBlocklist(), $spammers);
}
$spammers = array_map('strtolower', $spammers);
$spammers = array_map('trim', $spammers);
$punicode = new \TrueBV\Punycode();
foreach ($spammers as &$spammer) {
    $spammer = iconv("UTF-8", "ISO-8859-1", $punicode->encode($spammer));
}
$spammers = array_unique($spammers);
$spammers = array_filter($spammers);
sort($spammers);
// echo some info
echo "New list: " . count($spammers) . " sources\n";
// write
if (count($spammers)) {
    file_put_contents('../domains/blocked', implode("\n", $spammers) . PHP_EOL);
}
echo "Updated blocklist\n";
// readme
$readme = file_get_contents('../README.md');