/** * Decode gb18030 encoded string * @param string $string gb18030 string * @param boolean $save_html don't html encode special characters if true * @return string $string decoded string */ function charset_decode_gb18030($string, $save_html = false) { // global $aggressive_decoding; // don't do decoding when there are no 8bit symbols if (!sq_is8bit($string, 'gb18030')) { return $string; } // this is CPU intensive task. Use recode functions if they are available. if (function_exists('recode_string')) { // if string is already sanitized, undo htmlspecial chars if (!$save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } $string = recode_string("gb18030..html", $string); // if string sanitizing is not needed, undo htmlspecialchars applied by recode. if ($save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } return $string; } /* * iconv does not support html target, but internal utf-8 decoding is faster * than pure php implementation. */ if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) { include_once SM_PATH . 'functions/decode/utf_8.php'; $string = iconv('gb18030', 'utf-8', $string); return charset_decode_utf_8($string); } // mbstring does not support gb18030 // pure php decoding is not implemented. return $string; }
/** * Decode euc-kr encoded string * @param string $string euc-kr string * @param boolean $save_html don't html encode special characters if true * @return string $string decoded string */ function charset_decode_euc_kr($string, $save_html = false) { // global $aggressive_decoding; // don't do decoding when there are no 8bit symbols if (!sq_is8bit($string, 'euc-kr')) { return $string; } // this is CPU intensive task. Use recode functions if they are available. if (function_exists('recode_string')) { // if string is already sanitized, undo htmlspecial chars if (!$save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } $string = recode_string("euc-kr..html", $string); // if string sanitizing is not needed, undo htmlspecialchars applied by recode. if ($save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } return $string; } /* * iconv does not support html target, but internal utf-8 decoding is faster * than pure php implementation. */ if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) { include_once SM_PATH . 'functions/decode/utf_8.php'; $string = iconv('euc-kr', 'utf-8', $string); return charset_decode_utf_8($string); } // try mbstring if (function_exists('mb_convert_encoding') && function_exists('sq_mb_list_encodings') && check_php_version(4, 3, 0) && in_array('euc-kr', sq_mb_list_encodings())) { return mb_convert_encoding($string, 'HTML-ENTITIES', 'EUC-KR'); } return $string; }
/** * Converts iso-2022-kr texts * @param string $string iso-2022-kr encoded string * @param boolean $save_html don't html encode special characters if true * @return string html encoded string */ function charset_decode_iso_2022_kr($string, $save_html = false) { global $aggressive_decoding; // undo htmlspecial chars (they can break iso-2022-kr) if (!$save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } // recode // this is CPU intensive task. Use recode functions if they are available. if (function_exists('recode_string')) { $string = recode_string("iso-2022-kr..html", $string); // if string sanitizing is not needed, undo htmlspecialchars applied by recode. if ($save_html) { $string = str_replace(array('"', '<', '>', '&'), array('"', '<', '>', '&'), $string); } return $string; } // iconv does not support html target, but internal utf-8 decoding is faster than iso-2022-kr. if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) { include_once SM_PATH . 'functions/decode/utf_8.php'; $string = iconv('iso-2022-kr', 'utf-8', $string); // redo htmlspecial chars if (!$save_html) { $string = htmlspecialchars($string); } return charset_decode_utf_8($string); } // aggressive decoding disabled. iso-2022-kr is not supported by iso_2022_support.php // if (! isset($aggressive_decoding) || ! $aggressive_decoding ) return htmlspecialchars($string); /** * Include common iso-2022-xx functions */ include_once SM_PATH . 'functions/decode/iso_2022_support.php'; $index = 0; $ret = ''; $enc_table = 'ascii'; while ($index < strlen($string)) { if (isset($string[$index + 2]) && $string[$index] == "") { // table change switch ($string[$index] . $string[$index + 1] . $string[$index + 2]) { case "(B": $enc_table = 'ascii'; $index = $index + 3; break; case "\$B": $enc_table = 'jis0208-1983'; $index = $index + 3; break; case "(J": $enc_table = 'jis0201-1976'; $index = $index + 3; break; case "\$@": $enc_table = 'jis0208-1978'; $index = $index + 3; break; default: return _("Unsupported ESC sequence."); } } $ret .= get_iso_2022_symbol($string, $index, $enc_table); $index = $index + get_iso_2022_symbolsize($enc_table); } return $ret; }
function dctl_insertContent($complete_id, $param) { $complete_id = explode('-', $complete_id); $collection_id = $complete_id[0]; $package_id = $complete_id[1]; $text = ''; $fSelectorX = trim($param); $fSelectorX = str_ireplace('"', '', $fSelectorX); $fSelectorX = str_ireplace('item=', '', $fSelectorX); $dPath = DCTL_PROJECT_PATH . $collection_id; if ($package_id != '') { $dPath .= SYS_PATH_SEP . $package_id; } $isMultiPart = stripos($fSelectorX, '$') !== FALSE; if ($isMultiPart) { $regexp = str_ireplace(DCTL_PACKAGE_BODY_REGEXP1, DCTL_PACKAGE_BODY_REGEXP2, $fSelectorX); } else { $regexp = $fSelectorX; } $variants = array(); if (is_dir($dPath)) { $handle = opendir($dPath); while ($entry = readdir($handle)) { if (substr($entry, 0, 1) != '.') { $variants[] = $entry; } } } $variants = array_values(preg_grep('/^' . $regexp . '/', $variants)); sort($variants); $max = count($variants) - 1; foreach ($variants as $vKey => $fSelector) { if (!$isMultiPart || $isMultiPart && ($vKey > 0 && $vKey < $max)) { $fPath = $dPath . SYS_PATH_SEP . $fSelector; $textContent = cleanUpIndentation(charset_decode_utf_8(file_get_contents($fPath))); $header = '<!-- %BEGIN% -->'; $textContent = substr($textContent, stripos($textContent, $header) + strlen($header)); $footer = '<!-- %END% -->'; $textContent = substr($textContent, 0, stripos($textContent, $footer)); $textContent = preg_replace('/(<!--' . WS . '*BEGIN' . WS . '*-->)/', '', $textContent); $textContent = preg_replace('/(<!--' . WS . '*END' . WS . '*-->)/', '', $textContent); $textContent = preg_replace('/' . WS . '+/', ' ', $textContent); $textContent = forceUTF8($textContent); $checkContent = preg_replace('/\\w+:(\\w+)/', '$1', $textContent); $checkContent = '<?xml version="1.0" encoding="UTF-8" ?><test>' . translateLiteral2NumericEntities($checkContent) . '</test>'; if ($e = simplexml_load_string($checkContent, 'SimpleXMLElement', DCTL_XML_LOADER)) { $checkChildren = count($e->children()); if ($checkChildren < 1) { $textContent = '<div type="part" />'; } } $text .= $textContent; } } return strval($text); }
mysql_query($insert) or die(mysql_error() . " on {$insert}\n<br>"); } } } else { if ($rawdata == "'nd'") { continue; } } // translation // NULL not determined/no data // -1 no growth // 0 no (most not)/minus // 1 yes (most do)/plus // 2 variable // import raw data $rawhtml = trim(stripslashes(mysql_escape_string(charset_decode_utf_8($rawdata))), "'"); $conv_from = array('/^[dv]$/', '/^ng$/', '/^\\+$/', '/^\\&\\#8722;$/', '/^\\(\\&\\#8722\\;\\)$/', '/^\\(\\+\\)$/', '/^ng$/', '/^[vd]\\&\\#8722;$/', '/^[vd]\\+$/', '/^\\-/$', '/^[vd]\\-$/'); $conv_to = array('2', '-2', '1', '0', '2', '2', '-2', '2', '2', '0', '2'); $trans = preg_replace($conv_from, $conv_to, $rawhtml); if (!mysql_num_rows(mysql_query("SELECT `src_id` FROM `{$tbl}` WHERE `src_id`='{$src_id}' AND `taxa_id`='{$taxa_id}' AND `char_id`='{$char_id}' AND `raw`='{$rawhtml}';"))) { echo "<br>inserting data {$rawhtml}\t{$trans}\t{$src_id}\t{$taxa_id}\t{$char_id}\n"; $insert = "INSERT INTO {$tbl} (`src_id`,`taxa_id`,`char_id`,`raw`,`data`) VALUES ('{$src_id}','{$taxa_id}','{$char_id}','{$rawhtml}','{$trans}');"; mysql_query($insert) or die(mysql_error() . " on {$insert}\n<br>"); } } } } } } } }