public function testNorm() { $this->assertTrue(normLatin('ÁΓă') === "AGa"); $this->assertTrue(normLatin('ђÜẽ') === "djUe"); $this->assertTrue(normLatin('Màl Śir') === "Mal Sir"); // $this->assertTrue(normLatinRu('привет мир') === "privet mir"); $this->assertTrue(normLatinRu('щука ямка хрен') === "shchuka iamka khren"); // $this->assertTrue(normSpace(" \t \n Привет \t \n мир! \t \n ") === "Привет мир!"); $this->assertTrue(normTrim(" \t \n Привет \t \n мир! \t \n ") === "Привет \t \n мир!"); // $this->assertTrue(normEn("Hello, world!") === "hello world"); $this->assertTrue(normRu("Привет, мир!") === "привет мир"); }
public static function analyze($stamp, $delete = false) { // STAMP TO UNIXTIME if (is_numeric($stamp) and intval($stamp) <= 0) { $stamp = strtotime("today " . intval($stamp) . " day"); } elseif (is_numeric($stamp) and intval($stamp) > 0) { $stamp = $stamp; } else { $stamp = strtotime($stamp); } // $db = self::$db; $dir = self::$dir; $paths = self::getPath($stamp, $glob = true); $hosts = array(); foreach ($paths as $path) { foreach (nsplit(file_get_contents($path)) as $hit) { $hit = unserialize($hit); if (!$hit) { continue; } $hit['URL'] = urldecode($hit['URL']); $hit['REF'] = urldecode($hit['REF']); // $host = $hit['HOST']; $host = strtolower($host); $host = preg_replace("~^www\\.~", "", $host); if (!isset($hosts[$host])) { $hosts[$host] = array('hits' => 0, 'ips' => array(), 'google' => array(), 'yandex' => array(), 'serp' => array(), 'top_all' => array(), 'top_serp' => array(), 'top_ref' => array(), 'kws' => array()); } $is_valid_ext = !in_array(file_get_ext($hit['URL']), array('jpg', 'jpeg', 'png', 'gif', 'bmp', 'css', 'js', 'ico')); // HITS $hosts[$host]['hits'] += 1; // IPS $hosts[$host]['ips'][$hit['IP']] = true; // GOOGLE if ($is_google = is_numeric(strpos(host($hit['REF']), 'google'))) { $hosts[$host]['google'][$hit['IP']] = true; } // YANDEX if ($is_yandex = is_numeric(strpos(host($hit['REF']), 'yandex'))) { $hosts[$host]['yandex'][$hit['IP']] = true; } // TOP_ALL if ($is_valid_ext) { if (!isset($hosts[$host]['top_all'][$hit['URL']])) { $hosts[$host]['top_all'][$hit['URL']] = array(); } $hosts[$host]['top_all'][$hit['URL']][$hit['IP']] = true; } // TOP_SERP if (($is_google or $is_yandex) and $is_valid_ext) { if (!isset($hosts[$host]['top_serp'][$hit['URL']])) { $hosts[$host]['top_serp'][$hit['URL']] = array(); } $hosts[$host]['top_serp'][$hit['URL']][$hit['IP']] = true; } // TOP_REF $is_same = (host($hit['REF']) === $host or host($hit['REF']) === "www.{$host}"); if (!$is_google and !$is_yandex and host($hit['REF']) and $is_valid_ext and !$is_same) { $_ = "{$hit['URL']}, {$hit['REF']}"; if (!isset($hosts[$host]['top_ref'][$_])) { $hosts[$host]['top_ref'][$_] = array(); } $hosts[$host]['top_ref'][$_][$hit['IP']] = true; } // KWS if ($is_google or $is_yandex) { $query = parse_url($hit['REF'], PHP_URL_QUERY); do { if ($query) { $kw = null; parse_str($query, $_); $query = $_; if (is_null($kw)) { $kw = ($is_google and isset($query['q']) and $query['q']) ? $query['q'] : null; } if (is_null($kw)) { $kw = ($is_yandex and isset($query['text']) and $query['text']) ? $query['text'] : null; } $kw = normLatin($kw); $kw = normRu($kw); if (!$kw) { break; } $kw = "{$kw}, {$hit['URL']}"; if (!isset($hosts[$host]['kws'][$kw])) { $hosts[$host]['kws'][$kw] = array(); } $hosts[$host]['kws'][$kw][$hit['IP']] = true; } } while (false); } } } foreach ($hosts as $host => $value) { extract($value); // TOP ALL array_walk($top_all, function (&$value) { $value = count($value); }); arsort($top_all); array_walk($top_all, function (&$value, $key) { $value = "{$value}, {$key}"; }); $top_all = array_values($top_all); $top_all = array_slice($top_all, 0, 100); $top_all = implode("\n", $top_all); // --- // TOP SERP array_walk($top_serp, function (&$value) { $value = count($value); }); arsort($top_serp); array_walk($top_serp, function (&$value, $key) { $value = "{$value}, {$key}"; }); $top_serp = array_values($top_serp); $top_serp = array_slice($top_serp, 0, 100); $top_serp = implode("\n", $top_serp); // --- // TOP REF array_walk($top_ref, function (&$value) { $value = count($value); }); arsort($top_ref); array_walk($top_ref, function (&$value, $key) { $value = "{$value}, {$key}"; }); $top_ref = array_values($top_ref); $top_ref = array_slice($top_ref, 0, 1000); $top_ref = implode("\n", $top_ref); // --- // KWS array_walk($kws, function (&$value) { $value = count($value); }); arsort($kws); array_walk($kws, function (&$value, $key) { $value = "{$value}, {$key}"; }); $kws = array_values($kws); $kws = array_slice($kws, 0, 1000); $kws = implode("\n", $kws); // --- $bean = $db->bean('stat'); $bean->import(array('host' => $host, 'stamp' => date(SQL_FORMAT_DATE, $stamp), 'hits' => $hits, 'ips' => count($ips), 'google' => count($google), 'yandex' => count($yandex), 'serp' => count($google) + count($yandex), 'top_all' => $top_all, 'top_serp' => $top_serp, 'top_ref' => $top_ref, 'kws' => $kws))->replace(); } if ($delete) { foreach ($paths as $path) { unlink($path); } } }