public function encoding($text) { // encoding para ISO 8859-1 return $text; if (strrpos(Run::ENCODING, "utf") != "") { return Encoding::toUTF8($text); } else { return Encoding::toLatin1($text); } }
$xml_content = simplexml_load_file($dir . basename($file)); $authors = $xml_content->feature[0]['authors']; $title = $xml_content->feature[0]['title']; for ($i = 2; $i < count($xml_content); $i++) { $type = (string) $xml_content->feature[$i]['type']; $obfuscation = (string) $xml_content->feature[$i]['manual_obfuscation']; $this_lang = (string) $xml_content->feature[$i]['this_language']; $src_lang = (string) $xml_content->feature[$i]['source_language']; $this_offset = intval($xml_content->feature[$i]['this_offset']); $this_lenght = intval($xml_content->feature[$i]['this_length']); $src_offset = intval($xml_content->feature[$i]['source_offset']); $src_lenght = intval($xml_content->feature[$i]['source_length']); $src_txt = (string) $xml_content->feature[$i]['source_reference']; $src_ref = (string) basename($src_txt, '.txt'); $src_content = loadFile($src_dir . $src_txt); $this_feature_content = $encode->toUTF8(substr($encode->toLatin1($this_content), $this_offset, $this_lenght)); $src_feature_content = $encode->toUTF8(substr($encode->toLatin1($src_content), $src_offset + 1, $src_lenght + 1)); writeFile($this_lang . '/SUSP_' . $j . '_' . ($i - 2) . '.txt', $this_feature_content); writeFile($src_lang . '/SRC_' . $j . '_' . ($i - 2) . '.txt', $src_feature_content); $noiseNumber = substr_count($this_feature_content, ' vna '); $noiseNumber += substr_count($this_feature_content, ' vn '); $noiseNumber += substr_count($this_feature_content, ' i '); $noise = $noiseNumber >= 5 ? 'true' : 'false'; $textualEquivalent = new TextualEquivalent($name, $this_lang, $this_offset, $this_lenght, $this_feature_content, $src_ref, $src_lang, $src_offset, $src_lenght, $src_feature_content, $type, $obfuscation, $noise); $string = '<?xml version="1.0" encoding="UTF-8"?>' . chr(10); $string .= '<info>' . chr(10); $string .= '<sourceIDDoc>' . $src_ref . '</sourceIDDoc>' . chr(10); $string .= '<sourceTextLanguage>' . $src_lang . '</sourceTextLanguage>' . chr(10); $string .= '<sourceTextOffset>' . $src_offset . '</sourceTextOffset>' . chr(10); $string .= '<sourceTextLength>' . $src_lenght . '</sourceTextLength>' . chr(10); $string .= '<sourceTextContent>' . $src_feature_content . '</sourceTextContent>' . chr(10);
public static function encode($encodingLabel, $text) { $encodingLabel = self::normalizeEncoding($encodingLabel); if ($encodingLabel == 'UTF-8') { return Encoding::toUTF8($text); } if ($encodingLabel == 'ISO-8859-1') { return Encoding::toLatin1($text); } }
<?php require_once "../autoload.php"; function is($desc, $a, $b) { if ($a === $b) { echo '<span style="background-color: green;"> Pass </span> ' . $desc . "<br/>\n"; } else { echo '<span style="background-color: red;"> Failed </span> ' . $desc . "<br/>\n"; } return $a === $b; } $orig = "αΓ‘ΒΓ‘Ε‘αιν"; is("Encoding::toUTF8() convierte a UTF-8 todos los caracteres que no son ya UTF-8, teniendo en cuenta los que son Windows-1252, pero dejando sin cambios a los que ya son UTF-8", Encoding::toUTF8($orig), "Γ‘Ε‘Γ‘ΒÑőÑéΓ"); $utf8_from_iso = "ΒΒΒΒΒΒ ΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒΒ"; $utf8_from_win = "β¬ΒβΖββ¦β β‘Λβ°Ε βΉΕΒΕ½ΒΒβββββ’ββΛβ’Ε‘βΊΕΒΕΎΕΈ"; is("Encoding::UTF8FixWin1252Chars() convierte UTF-8 a UTF-8 corrigiendo caracteres especiales de Windows-1252", Encoding::UTF8FixWin1252Chars($utf8_from_iso), $utf8_from_win); is("Encoding::toLatin1() convierte los UTF-8 a Windows-1252", Encoding::toLatin1($orig), "ααααιν"); is("Encoding::toLatin1() convierte los UTF-8 con caracteres convertidos desde Windows-1252 a Windows-1252", Encoding::toLatin1($utf8_from_win), " "); is("Encoding::toLatin1() convierte los UTF-8 con caracteres convertidos desde ISO8859-1 a Windows-1252", Encoding::toLatin1($utf8_from_iso), " "); is("Encoding::toWin1252() convierte los UTF-8 a Windows-1252", Encoding::toWin1252($orig), "ααααιν"); is("Encoding::toWin1252() convierte los UTF-8 con caracteres convertidos desde Windows-1252 a Windows-1252", Encoding::toWin1252($utf8_from_win), " "); is("Encoding::toWin1252() convierte los UTF-8 con caracteres convertidos desde ISO8859-1 a Windows-1252", Encoding::toWin1252($utf8_from_iso), " "); is("Encoding::fixUTF8() corrige UTF-8 convertido repetidamente", Encoding::fixUTF8(utf8_encode(utf8_encode(utf8_encode($orig)))), "ÑőÑőÑőÑéΓ");
public function testEncodeToLatin1() { $latin1 = Encoding::toLatin1($this->utf8); $this->assertEquals($this->iso88591, $latin1); }