예제 #1
0
 /**
  * Tries to determine the language of the document by looking at the
  * $sample_text and $url provided
  * the language
  * @param string $sample_text sample text to try guess the language from
  * @param string $url url of web-page as a fallback look at the country
  *     to figure out language
  *
  * @return string language tag for guessed language
  */
 static function calculateLang($sample_text = NULL, $url = NULL)
 {
     if ($url != NULL) {
         $lang = UrlParser::getLang($url);
         if ($lang != NULL) {
             return $lang;
         }
     }
     if ($sample_text != NULL) {
         $words = mb_split("[[:space:]]|" . PUNCT, $sample_text);
         $num_words = count($words);
         $ascii_count = 0;
         foreach ($words as $word) {
             if (strlen($word) == mb_strlen($word)) {
                 $ascii_count++;
             }
         }
         // crude, but let's guess ASCII == english
         if ($ascii_count / $num_words > EN_RATIO) {
             $lang = 'en';
         } else {
             $lang = NULL;
         }
     } else {
         $lang = NULL;
     }
     return $lang;
 }