/** * Tries to determine the language of the document by looking at the * $sample_text and $url provided * the language * @param string $sample_text sample text to try guess the language from * @param string $url url of web-page as a fallback look at the country * to figure out language * * @return string language tag for guessed language */ static function calculateLang($sample_text = NULL, $url = NULL) { if ($url != NULL) { $lang = UrlParser::getLang($url); if ($lang != NULL) { return $lang; } } if ($sample_text != NULL) { $words = mb_split("[[:space:]]|" . PUNCT, $sample_text); $num_words = count($words); $ascii_count = 0; foreach ($words as $word) { if (strlen($word) == mb_strlen($word)) { $ascii_count++; } } // crude, but let's guess ASCII == english if ($ascii_count / $num_words > EN_RATIO) { $lang = 'en'; } else { $lang = NULL; } } else { $lang = NULL; } return $lang; }