Exemple #1
0
 public function testNorm()
 {
     $this->assertTrue(normLatin('ÁΓă') === "AGa");
     $this->assertTrue(normLatin('ђÜẽ') === "djUe");
     $this->assertTrue(normLatin('Màl Śir') === "Mal Sir");
     //
     $this->assertTrue(normLatinRu('привет мир') === "privet mir");
     $this->assertTrue(normLatinRu('щука ямка хрен') === "shchuka iamka khren");
     //
     $this->assertTrue(normSpace(" \t \n Привет \t \n мир! \t \n ") === "Привет мир!");
     $this->assertTrue(normTrim(" \t \n Привет \t \n мир! \t \n ") === "Привет \t \n мир!");
     //
     $this->assertTrue(normEn("Hello, world!") === "hello world");
     $this->assertTrue(normRu("Привет, мир!") === "привет мир");
 }
Exemple #2
0
 public static function analyze($stamp, $delete = false)
 {
     // STAMP TO UNIXTIME
     if (is_numeric($stamp) and intval($stamp) <= 0) {
         $stamp = strtotime("today " . intval($stamp) . " day");
     } elseif (is_numeric($stamp) and intval($stamp) > 0) {
         $stamp = $stamp;
     } else {
         $stamp = strtotime($stamp);
     }
     //
     $db = self::$db;
     $dir = self::$dir;
     $paths = self::getPath($stamp, $glob = true);
     $hosts = array();
     foreach ($paths as $path) {
         foreach (nsplit(file_get_contents($path)) as $hit) {
             $hit = unserialize($hit);
             if (!$hit) {
                 continue;
             }
             $hit['URL'] = urldecode($hit['URL']);
             $hit['REF'] = urldecode($hit['REF']);
             //
             $host = $hit['HOST'];
             $host = strtolower($host);
             $host = preg_replace("~^www\\.~", "", $host);
             if (!isset($hosts[$host])) {
                 $hosts[$host] = array('hits' => 0, 'ips' => array(), 'google' => array(), 'yandex' => array(), 'serp' => array(), 'top_all' => array(), 'top_serp' => array(), 'top_ref' => array(), 'kws' => array());
             }
             $is_valid_ext = !in_array(file_get_ext($hit['URL']), array('jpg', 'jpeg', 'png', 'gif', 'bmp', 'css', 'js', 'ico'));
             // HITS
             $hosts[$host]['hits'] += 1;
             // IPS
             $hosts[$host]['ips'][$hit['IP']] = true;
             // GOOGLE
             if ($is_google = is_numeric(strpos(host($hit['REF']), 'google'))) {
                 $hosts[$host]['google'][$hit['IP']] = true;
             }
             // YANDEX
             if ($is_yandex = is_numeric(strpos(host($hit['REF']), 'yandex'))) {
                 $hosts[$host]['yandex'][$hit['IP']] = true;
             }
             // TOP_ALL
             if ($is_valid_ext) {
                 if (!isset($hosts[$host]['top_all'][$hit['URL']])) {
                     $hosts[$host]['top_all'][$hit['URL']] = array();
                 }
                 $hosts[$host]['top_all'][$hit['URL']][$hit['IP']] = true;
             }
             // TOP_SERP
             if (($is_google or $is_yandex) and $is_valid_ext) {
                 if (!isset($hosts[$host]['top_serp'][$hit['URL']])) {
                     $hosts[$host]['top_serp'][$hit['URL']] = array();
                 }
                 $hosts[$host]['top_serp'][$hit['URL']][$hit['IP']] = true;
             }
             // TOP_REF
             $is_same = (host($hit['REF']) === $host or host($hit['REF']) === "www.{$host}");
             if (!$is_google and !$is_yandex and host($hit['REF']) and $is_valid_ext and !$is_same) {
                 $_ = "{$hit['URL']}, {$hit['REF']}";
                 if (!isset($hosts[$host]['top_ref'][$_])) {
                     $hosts[$host]['top_ref'][$_] = array();
                 }
                 $hosts[$host]['top_ref'][$_][$hit['IP']] = true;
             }
             // KWS
             if ($is_google or $is_yandex) {
                 $query = parse_url($hit['REF'], PHP_URL_QUERY);
                 do {
                     if ($query) {
                         $kw = null;
                         parse_str($query, $_);
                         $query = $_;
                         if (is_null($kw)) {
                             $kw = ($is_google and isset($query['q']) and $query['q']) ? $query['q'] : null;
                         }
                         if (is_null($kw)) {
                             $kw = ($is_yandex and isset($query['text']) and $query['text']) ? $query['text'] : null;
                         }
                         $kw = normLatin($kw);
                         $kw = normRu($kw);
                         if (!$kw) {
                             break;
                         }
                         $kw = "{$kw}, {$hit['URL']}";
                         if (!isset($hosts[$host]['kws'][$kw])) {
                             $hosts[$host]['kws'][$kw] = array();
                         }
                         $hosts[$host]['kws'][$kw][$hit['IP']] = true;
                     }
                 } while (false);
             }
         }
     }
     foreach ($hosts as $host => $value) {
         extract($value);
         // TOP ALL
         array_walk($top_all, function (&$value) {
             $value = count($value);
         });
         arsort($top_all);
         array_walk($top_all, function (&$value, $key) {
             $value = "{$value}, {$key}";
         });
         $top_all = array_values($top_all);
         $top_all = array_slice($top_all, 0, 100);
         $top_all = implode("\n", $top_all);
         // ---
         // TOP SERP
         array_walk($top_serp, function (&$value) {
             $value = count($value);
         });
         arsort($top_serp);
         array_walk($top_serp, function (&$value, $key) {
             $value = "{$value}, {$key}";
         });
         $top_serp = array_values($top_serp);
         $top_serp = array_slice($top_serp, 0, 100);
         $top_serp = implode("\n", $top_serp);
         // ---
         // TOP REF
         array_walk($top_ref, function (&$value) {
             $value = count($value);
         });
         arsort($top_ref);
         array_walk($top_ref, function (&$value, $key) {
             $value = "{$value}, {$key}";
         });
         $top_ref = array_values($top_ref);
         $top_ref = array_slice($top_ref, 0, 1000);
         $top_ref = implode("\n", $top_ref);
         // ---
         // KWS
         array_walk($kws, function (&$value) {
             $value = count($value);
         });
         arsort($kws);
         array_walk($kws, function (&$value, $key) {
             $value = "{$value}, {$key}";
         });
         $kws = array_values($kws);
         $kws = array_slice($kws, 0, 1000);
         $kws = implode("\n", $kws);
         // ---
         $bean = $db->bean('stat');
         $bean->import(array('host' => $host, 'stamp' => date(SQL_FORMAT_DATE, $stamp), 'hits' => $hits, 'ips' => count($ips), 'google' => count($google), 'yandex' => count($yandex), 'serp' => count($google) + count($yandex), 'top_all' => $top_all, 'top_serp' => $top_serp, 'top_ref' => $top_ref, 'kws' => $kws))->replace();
     }
     if ($delete) {
         foreach ($paths as $path) {
             unlink($path);
         }
     }
 }