/**
  * Initialize the entityNames array with all possible named entities
  * 
  * @return does not return a value.
  */
 private function _initializeMaps()
 {
     $entityNames = array("quot", "amp", "lt", "gt", "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml", "OElig", "oelig", "Scaron", "scaron", "Yuml", "fnof", "circ", "tilde", "Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Omicron", "Pi", "Rho", "Sigma", "Tau", "Upsilon", "Phi", "Chi", "Psi", "Omega", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigmaf", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega", "thetasym", "upsih", "piv", "ensp", "emsp", "thinsp", "zwnj", "zwj", "lrm", "rlm", "ndash", "mdash", "lsquo", "rsquo", "sbquo", "ldquo", "rdquo", "bdquo", "dagger", "Dagger", "bull", "hellip", "permil", "prime", "Prime", "lsaquo", "rsaquo", "oline", "frasl", "euro", "image", "weierp", "real", "trade", "alefsym", "larr", "uarr", "rarr", "darr", "harr", "crarr", "lArr", "uArr", "rArr", "dArr", "hArr", "forall", "part", "exist", "empty", "nabla", "isin", "notin", "ni", "prod", "sum", "minus", "lowast", "radic", "prop", "infin", "ang", "and", "or", "cap", "cup", "int", "there4", "sim", "cong", "asymp", "ne", "equiv", "le", "ge", "sub", "sup", "nsub", "sube", "supe", "oplus", "otimes", "perp", "sdot", "lceil", "rceil", "lfloor", "rfloor", "lang", "rang", "loz", "spades", "clubs", "hearts", "diams");
     $entityValues = array(34, 38, 60, 62, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376, 402, 710, 732, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 931, 932, 933, 934, 935, 936, 937, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 977, 978, 982, 8194, 8195, 8201, 8204, 8205, 8206, 8207, 8211, 8212, 8216, 8217, 8218, 8220, 8221, 8222, 8224, 8225, 8226, 8230, 8240, 8242, 8243, 8249, 8250, 8254, 8260, 8364, 8465, 8472, 8476, 8482, 8501, 8592, 8593, 8594, 8595, 8596, 8629, 8656, 8657, 8658, 8659, 8660, 8704, 8706, 8707, 8709, 8711, 8712, 8713, 8715, 8719, 8721, 8722, 8727, 8730, 8733, 8734, 8736, 8743, 8744, 8745, 8746, 8747, 8756, 8764, 8773, 8776, 8800, 8801, 8804, 8805, 8834, 8835, 8836, 8838, 8839, 8853, 8855, 8869, 8901, 8968, 8969, 8970, 8971, 9001, 9002, 9674, 9824, 9827, 9829, 9830);
     for ($i = 0; $i < count($entityNames); $i++) {
         $character = html_entity_decode('&' . $entityNames[$i] . ';', ENT_QUOTES, 'UTF-8');
         // Normalize encoding to UTF-32
         $character = mb_convert_encoding($character, 'UTF-32', 'UTF-8');
         self::$_characterToEntityMap[$character] = $entityNames[$i];
         self::$_entityToCharacterMap[$entityNames[$i]] = $character;
         // get the length of the longest entity name
         $len = mb_strlen($entityNames[$i], 'UTF-8');
         if ($len > self::$_longestEntity) {
             self::$_longestEntity = $len;
         }
     }
     self::$_longestEntity += 2;
     self::$_mapIsInitialized = true;
 }
Пример #2
0
 public function testDecodeDoesNotProduceMixedCharacterEncoding()
 {
     $codec = new HTMLEntityCodec();
     // expecting a UTF-8 encoded string
     $expected = mb_convert_encoding("a b c d e f\tg h i j k¡l¢m", 'UTF-8', 'ISO-8859-1');
     // check that the encoding conversion went well and the expected string is correct
     $expected_unpacked = array(1 => 0x61, 2 => 0x20, 3 => 0x62, 4 => 0x20, 5 => 0x63, 6 => 0x20, 7 => 0x64, 8 => 0x20, 9 => 0x65, 10 => 0x20, 11 => 0x66, 12 => 0x9, 13 => 0x67, 14 => 0x20, 15 => 0x68, 16 => 0x20, 17 => 0x69, 18 => 0x20, 19 => 0x6a, 20 => 0xc2, 21 => 0xa0, 22 => 0x6b, 23 => 0xc2, 24 => 0xa1, 25 => 0x6c, 26 => 0xc2, 27 => 0xa2, 28 => 0x6d);
     $unpacked = unpack('C*', $expected);
     $this->assertSame($expected_unpacked, $unpacked, 'Ensuring expected value was correctly encoded to UTF-8 - %s');
     // decode and hope we get $expected!
     $this->assertEquals($expected, $codec->decode('a b c d e f&#x9;g h i j&#xa0;k&#xa1;l&#xa2;m'));
 }