/** URI pickup * Return an array of URIs in the $string * [OK] http://nasty.example.org#nasty_string * [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar * [OK] ftp://nasty.example.org:80/dfsdfs * [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986) * Not available for: IDN(ignored) * @param string $string * @return array */ static function uri_pickup($string = '') { if (!is_string($string)) { return array(); } $array = array(); preg_match_all('#(\\b[a-z][a-z0-9.+-]{1,8}):[/\\\\]+' . '(?:' . '([^\\s<>"\'\\[\\]/\\#?@]*)' . '@)?' . '(' . '\\[[0-9a-f:.]+\\]' . '|' . '(?:[0-9]{1,3}\\.){3}[0-9]{1,3}' . '|' . '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . ')' . '(?::([0-9]*))?' . '((?:/+[^\\s<>"\'\\[\\]/\\#?]+)*/+)?' . '([^\\s<>"\'\\[\\]\\#?]+)?' . '(?:\\?([^\\s<>"\'\\[\\]\\#]+))?' . '(?:\\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . '#i', $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); // Reformat the $array static $parts = array(1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port', 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment'); $default = array(0 => '', 1 => -1); foreach (array_keys($array) as $uri) { $_uri =& $array[$uri]; SpamUtility::array_rename_keys($_uri, $parts, TRUE, $default); $offset = $_uri['scheme'][1]; // Scheme's offset = URI's offset foreach (array_keys($_uri) as $part) { $_uri[$part] = $_uri[$part][0]; // Remove offsets } } foreach (array_keys($array) as $uri) { $_uri =& $array[$uri]; if ($_uri['scheme'] === '') { unset($array[$uri]); // Considererd harmless continue; } unset($_uri[0]); // Matched string itself $_uri['area']['offset'] = $offset; // Area offset for area_measure() } return $array; }
private static function summarize_detail_newtral($progress = array()) { if (!isset($progress['hosts']) || !is_array($progress['hosts']) || empty($progress['hosts'])) { return ''; } // Generate a responsible $trie $trie = array(); foreach ($progress['hosts'] as $value) { // 'A.foo.bar.example.com' $resp = SpamPickup::whois_responsibility($value); // 'example.com' if (empty($resp)) { // One or more test, or do nothing here $resp = strval($value); $rest = ''; } else { $rest = rtrim(substr($value, 0, -strlen($resp)), '.'); // 'A.foo.bar' } $trie = SpamUtility::array_merge_leaves($trie, array($resp => array($rest => NULL)), FALSE); } // Format: var_export_shrink() -like output $result = array(); ksort_by_domain($trie); foreach (array_keys($trie) as $key) { ksort_by_domain($trie[$key]); if (count($trie[$key]) == 1 && key($trie[$key]) == '') { // Just one 'responsibility.example.com' $result[] = ' \'' . $key . '\','; } else { // One subdomain-or-host, or several ones $subs = array(); foreach (array_keys($trie[$key]) as $sub) { if ($sub == '') { $subs[] = $key; // 'example.com' } else { $subs[] = $sub . '. '; // 'A.foo.bar. ' } } $result[] = ' \'' . $key . '\' => \'' . implode(', ', $subs) . '\','; } unset($trie[$key]); } return 'array (' . "\n" . implode("\n", $result) . "\n" . ')'; }