/** * Returns the distribution of unicode blocks in a given utf8 string * * For the block name of a single char, use unicodeBlockName() * * @param string $str input string. Must be ascii or utf8 * @param bool $skip_symbols if true, skip ascii digits, symbols and * non-printing characters. Includes spaces, * newlines and common punctutation characters. * * @return array * @throws Text_LanguageDetect_Exception */ public function detectUnicodeBlocks($str, $skip_symbols) { $skip_symbols = (bool) $skip_symbols; $str = (string) $str; $sample_obj = new Text_LanguageDetect_Parser($str); $sample_obj->prepareUnicode(); $sample_obj->prepareTrigram(false); $sample_obj->setUnicodeSkipSymbols($skip_symbols); $sample_obj->analyze(); $blocks = $sample_obj->getUnicodeBlocks(); unset($sample_obj); return $blocks; }
/** * Returns the distribution of unicode blocks in a given utf8 string * * For the block name of a single char, use unicodeBlockName() * * @access public * @param string $str input string. Must be ascii or utf8 * @param bool $skip_symbols if true, skip ascii digits, symbols and * non-printing characters. Includes spaces, * newlines and common punctutation characters. * @return array * @throws PEAR_Error */ function detectUnicodeBlocks($str, $skip_symbols) { // input check if (!is_bool($skip_symbols)) { return PEAR::raiseError('Second parameter must be boolean'); } if (!is_string($str)) { return PEAR::raiseError('First parameter was not a string'); } $sample_obj = new Text_LanguageDetect_Parser($str); $sample_obj->prepareUnicode(); $sample_obj->prepareTrigram(false); $sample_obj->setUnicodeSkipSymbols($skip_symbols); $sample_obj->analyze(); $blocks =& $sample_obj->getUnicodeBlocks(); unset($sample_obj); return $blocks; }