Example #1
0
/**
 * Decode gb18030 encoded string
 * @param string $string gb18030 string
 * @param boolean $save_html don't html encode special characters if true
 * @return string $string decoded string
 */
function charset_decode_gb18030($string, $save_html = false)
{
    // global $aggressive_decoding;
    // don't do decoding when there are no 8bit symbols
    if (!sq_is8bit($string, 'gb18030')) {
        return $string;
    }
    // this is CPU intensive task. Use recode functions if they are available.
    if (function_exists('recode_string')) {
        // if string is already sanitized, undo htmlspecial chars
        if (!$save_html) {
            $string = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;'), array('"', '<', '>', '&'), $string);
        }
        $string = recode_string("gb18030..html", $string);
        // if string sanitizing is not needed, undo htmlspecialchars applied by recode.
        if ($save_html) {
            $string = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;'), array('"', '<', '>', '&'), $string);
        }
        return $string;
    }
    /*
     * iconv does not support html target, but internal utf-8 decoding is faster 
     * than pure php implementation. 
     */
    if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) {
        include_once SM_PATH . 'functions/decode/utf_8.php';
        $string = iconv('gb18030', 'utf-8', $string);
        return charset_decode_utf_8($string);
    }
    // mbstring does not support gb18030
    // pure php decoding is not implemented.
    return $string;
}
Example #2
0
/**
 * Decode euc-kr encoded string
 * @param string $string euc-kr string
 * @param boolean $save_html don't html encode special characters if true
 * @return string $string decoded string
 */
function charset_decode_euc_kr($string, $save_html = false)
{
    // global $aggressive_decoding;
    // don't do decoding when there are no 8bit symbols
    if (!sq_is8bit($string, 'euc-kr')) {
        return $string;
    }
    // this is CPU intensive task. Use recode functions if they are available.
    if (function_exists('recode_string')) {
        // if string is already sanitized, undo htmlspecial chars
        if (!$save_html) {
            $string = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;'), array('"', '<', '>', '&'), $string);
        }
        $string = recode_string("euc-kr..html", $string);
        // if string sanitizing is not needed, undo htmlspecialchars applied by recode.
        if ($save_html) {
            $string = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;'), array('"', '<', '>', '&'), $string);
        }
        return $string;
    }
    /*
     * iconv does not support html target, but internal utf-8 decoding is faster 
     * than pure php implementation. 
     */
    if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) {
        include_once SM_PATH . 'functions/decode/utf_8.php';
        $string = iconv('euc-kr', 'utf-8', $string);
        return charset_decode_utf_8($string);
    }
    // try mbstring
    if (function_exists('mb_convert_encoding') && function_exists('sq_mb_list_encodings') && check_php_version(4, 3, 0) && in_array('euc-kr', sq_mb_list_encodings())) {
        return mb_convert_encoding($string, 'HTML-ENTITIES', 'EUC-KR');
    }
    return $string;
}
Example #3
0
/**
 * Converts iso-2022-kr texts
 * @param string $string iso-2022-kr encoded string
 * @param boolean $save_html don't html encode special characters if true
 * @return string html encoded string
 */
function charset_decode_iso_2022_kr($string, $save_html = false)
{
    global $aggressive_decoding;
    // undo htmlspecial chars (they can break iso-2022-kr)
    if (!$save_html) {
        $string = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;'), array('"', '<', '>', '&'), $string);
    }
    // recode
    // this is CPU intensive task. Use recode functions if they are available.
    if (function_exists('recode_string')) {
        $string = recode_string("iso-2022-kr..html", $string);
        // if string sanitizing is not needed, undo htmlspecialchars applied by recode.
        if ($save_html) {
            $string = str_replace(array('&quot;', '&lt;', '&gt;', '&amp;'), array('"', '<', '>', '&'), $string);
        }
        return $string;
    }
    // iconv does not support html target, but internal utf-8 decoding is faster than iso-2022-kr.
    if (function_exists('iconv') && file_exists(SM_PATH . 'functions/decode/utf_8.php')) {
        include_once SM_PATH . 'functions/decode/utf_8.php';
        $string = iconv('iso-2022-kr', 'utf-8', $string);
        // redo htmlspecial chars
        if (!$save_html) {
            $string = htmlspecialchars($string);
        }
        return charset_decode_utf_8($string);
    }
    // aggressive decoding disabled. iso-2022-kr is not supported by iso_2022_support.php
    //    if (! isset($aggressive_decoding) || ! $aggressive_decoding )
    return htmlspecialchars($string);
    /**
     * Include common iso-2022-xx functions
     */
    include_once SM_PATH . 'functions/decode/iso_2022_support.php';
    $index = 0;
    $ret = '';
    $enc_table = 'ascii';
    while ($index < strlen($string)) {
        if (isset($string[$index + 2]) && $string[$index] == "") {
            // table change
            switch ($string[$index] . $string[$index + 1] . $string[$index + 2]) {
                case "(B":
                    $enc_table = 'ascii';
                    $index = $index + 3;
                    break;
                case "\$B":
                    $enc_table = 'jis0208-1983';
                    $index = $index + 3;
                    break;
                case "(J":
                    $enc_table = 'jis0201-1976';
                    $index = $index + 3;
                    break;
                case "\$@":
                    $enc_table = 'jis0208-1978';
                    $index = $index + 3;
                    break;
                default:
                    return _("Unsupported ESC sequence.");
            }
        }
        $ret .= get_iso_2022_symbol($string, $index, $enc_table);
        $index = $index + get_iso_2022_symbolsize($enc_table);
    }
    return $ret;
}
Example #4
0
function dctl_insertContent($complete_id, $param)
{
    $complete_id = explode('-', $complete_id);
    $collection_id = $complete_id[0];
    $package_id = $complete_id[1];
    $text = '';
    $fSelectorX = trim($param);
    $fSelectorX = str_ireplace('"', '', $fSelectorX);
    $fSelectorX = str_ireplace('item=', '', $fSelectorX);
    $dPath = DCTL_PROJECT_PATH . $collection_id;
    if ($package_id != '') {
        $dPath .= SYS_PATH_SEP . $package_id;
    }
    $isMultiPart = stripos($fSelectorX, '$') !== FALSE;
    if ($isMultiPart) {
        $regexp = str_ireplace(DCTL_PACKAGE_BODY_REGEXP1, DCTL_PACKAGE_BODY_REGEXP2, $fSelectorX);
    } else {
        $regexp = $fSelectorX;
    }
    $variants = array();
    if (is_dir($dPath)) {
        $handle = opendir($dPath);
        while ($entry = readdir($handle)) {
            if (substr($entry, 0, 1) != '.') {
                $variants[] = $entry;
            }
        }
    }
    $variants = array_values(preg_grep('/^' . $regexp . '/', $variants));
    sort($variants);
    $max = count($variants) - 1;
    foreach ($variants as $vKey => $fSelector) {
        if (!$isMultiPart || $isMultiPart && ($vKey > 0 && $vKey < $max)) {
            $fPath = $dPath . SYS_PATH_SEP . $fSelector;
            $textContent = cleanUpIndentation(charset_decode_utf_8(file_get_contents($fPath)));
            $header = '<!-- %BEGIN% -->';
            $textContent = substr($textContent, stripos($textContent, $header) + strlen($header));
            $footer = '<!-- %END% -->';
            $textContent = substr($textContent, 0, stripos($textContent, $footer));
            $textContent = preg_replace('/(<!--' . WS . '*BEGIN' . WS . '*-->)/', '', $textContent);
            $textContent = preg_replace('/(<!--' . WS . '*END' . WS . '*-->)/', '', $textContent);
            $textContent = preg_replace('/' . WS . '+/', ' ', $textContent);
            $textContent = forceUTF8($textContent);
            $checkContent = preg_replace('/\\w+:(\\w+)/', '$1', $textContent);
            $checkContent = '<?xml version="1.0" encoding="UTF-8" ?><test>' . translateLiteral2NumericEntities($checkContent) . '</test>';
            if ($e = simplexml_load_string($checkContent, 'SimpleXMLElement', DCTL_XML_LOADER)) {
                $checkChildren = count($e->children());
                if ($checkChildren < 1) {
                    $textContent = '<div type="part" />';
                }
            }
            $text .= $textContent;
        }
    }
    return strval($text);
}
Example #5
0
                                         mysql_query($insert) or die(mysql_error() . " on {$insert}\n<br>");
                                     }
                                 }
                             } else {
                                 if ($rawdata == "'nd'") {
                                     continue;
                                 }
                             }
                             //		translation
                             //		NULL	not determined/no data
                             //		-1	no growth
                             //		0	no (most not)/minus
                             //		1	yes (most do)/plus
                             //		2	variable
                             // 	      import raw data
                             $rawhtml = trim(stripslashes(mysql_escape_string(charset_decode_utf_8($rawdata))), "'");
                             $conv_from = array('/^[dv]$/', '/^ng$/', '/^\\+$/', '/^\\&\\#8722;$/', '/^\\(\\&\\#8722\\;\\)$/', '/^\\(\\+\\)$/', '/^ng$/', '/^[vd]\\&\\#8722;$/', '/^[vd]\\+$/', '/^\\-/$', '/^[vd]\\-$/');
                             $conv_to = array('2', '-2', '1', '0', '2', '2', '-2', '2', '2', '0', '2');
                             $trans = preg_replace($conv_from, $conv_to, $rawhtml);
                             if (!mysql_num_rows(mysql_query("SELECT `src_id` FROM `{$tbl}` WHERE `src_id`='{$src_id}' AND `taxa_id`='{$taxa_id}' AND `char_id`='{$char_id}' AND `raw`='{$rawhtml}';"))) {
                                 echo "<br>inserting data {$rawhtml}\t{$trans}\t{$src_id}\t{$taxa_id}\t{$char_id}\n";
                                 $insert = "INSERT INTO {$tbl} (`src_id`,`taxa_id`,`char_id`,`raw`,`data`) VALUES ('{$src_id}','{$taxa_id}','{$char_id}','{$rawhtml}','{$trans}');";
                                 mysql_query($insert) or die(mysql_error() . " on {$insert}\n<br>");
                             }
                         }
                     }
                 }
             }
         }
     }
 }