コード例 #1
0
function check_string_value($value, $field_name, $line_no, $max_length)
{
    if (strlen($value) > $max_length) {
        throw new Exception("{$field_name} '{$value}' defined on line #{$line_no} is too long, " . "maximum {$field_name} length is {$max_length} characters.");
    }
    if (!phutil_is_utf8_with_only_bmp_characters($value)) {
        throw new Exception("{$field_name} '{$value}' defined on line #{$line_no} is not a valid " . "UTF-8 string, " . "it should contain only UTF-8 characters.");
    }
}
コード例 #2
0
function check_string_value($value, $field_name, $line_no, $max_length)
{
    if (strlen($value) > $max_length) {
        throw new Exception(pht("%s '%s' defined on line #%d is too long, " . "maximum %s length is %d characters.", $field_name, $value, $line_no, $field_name, $max_length));
    }
    if (!phutil_is_utf8_with_only_bmp_characters($value)) {
        throw new Exception(pht("%s '%s' defined on line #%d is not a valid " . "UTF-8 string, it should contain only UTF-8 characters.", $field_name, $value, $line_no));
    }
}
コード例 #3
0
 public function testMySQLAgreesWithUsAboutBMP()
 {
     // Build a string with every BMP character in it, then insert it into MySQL
     // and read it back. We expect to get the same string out that we put in,
     // demonstrating that strings which pass our BMP checks are also valid in
     // MySQL and no silent data truncation will occur.
     $buf = '';
     for ($ii = 0x1; $ii <= 0x7f; $ii++) {
         $buf .= chr($ii);
     }
     for ($ii = 0xc2; $ii <= 0xdf; $ii++) {
         for ($jj = 0x80; $jj <= 0xbf; $jj++) {
             $buf .= chr($ii) . chr($jj);
         }
     }
     // NOTE: This is \xE0\xA0\xZZ.
     for ($ii = 0xe0; $ii <= 0xe0; $ii++) {
         for ($jj = 0xa0; $jj <= 0xbf; $jj++) {
             for ($kk = 0x80; $kk <= 0xbf; $kk++) {
                 $buf .= chr($ii) . chr($jj) . chr($kk);
             }
         }
     }
     // NOTE: This is \xE1\xZZ\xZZ through \xEF\xZZ\xZZ.
     for ($ii = 0xe1; $ii <= 0xef; $ii++) {
         for ($jj = 0x80; $jj <= 0xbf; $jj++) {
             for ($kk = 0x80; $kk <= 0xbf; $kk++) {
                 $buf .= chr($ii) . chr($jj) . chr($kk);
             }
         }
     }
     $this->assertEqual(194431, strlen($buf));
     $this->assertTrue(phutil_is_utf8_with_only_bmp_characters($buf));
     $write = id(new HarbormasterScratchTable())->setData('all.utf8.bmp')->setBigData($buf)->save();
     $read = id(new HarbormasterScratchTable())->load($write->getID());
     $this->assertEqual($buf, $read->getBigData());
 }
コード例 #4
0
 public function testUTF8BMP()
 {
     $tests = array("" => array(true, true, "empty string"), "a" => array(true, true, "a"), "a͠͠" => array(true, true, "a with combining"), "☃" => array(true, true, "snowman"), "￿" => array(true, true, "U+FFFF"), "��" => array(false, false, "Invalid, byte range."), "𐀀" => array(true, false, "U+10000"), "𝄞" => array(true, false, "gclef"), "musical 𝄞 g-clef" => array(true, false, "gclef text"), "�" => array(false, false, "Invalid, truncated."), "���" => array(false, false, "Nonminimal 3-byte character."), "�" => array(false, false, "Partial 2-byte character."), "�" => array(false, false, "Partial BMP 0xE0 character."), "�" => array(false, false, "Partial BMP cahracter."));
     foreach ($tests as $input => $test) {
         list($expect_utf8, $expect_bmp, $test_name) = $test;
         $this->assertEqual($expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name));
         $this->assertEqual($expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name));
     }
 }
コード例 #5
0
 /**
  * Check inserts for characters outside of the BMP. Even with the strictest
  * settings, MySQL will silently truncate data when it encounters these, which
  * can lead to data loss and security problems.
  */
 protected function validateUTF8String($string)
 {
     if (phutil_is_utf8_with_only_bmp_characters($string)) {
         return;
     }
     throw new AphrontQueryCharacterSetException(pht('Attempting to construct a query containing characters outside of ' . 'the Unicode Basic Multilingual Plane. MySQL will silently truncate ' . 'this data if it is inserted into a `utf8` column. Use the `%%B` ' . 'conversion to escape binary strings data.'));
 }
コード例 #6
0
 public function testUTF8BMP()
 {
     $tests = array('' => array(true, true, pht('empty string')), 'a' => array(true, true, 'a'), "a͠͠" => array(true, true, pht('%s with combining', 'a')), "☃" => array(true, true, pht('snowman')), "￿" => array(true, true, 'U+FFFF'), "��" => array(false, false, pht('Invalid, byte range.')), "����" => array(false, false, pht('Nonminimal 4-byte character.')), "𐀀" => array(true, false, 'U+10000'), "𝄞" => array(true, false, 'gclef'), "musical 𝄞 g-clef" => array(true, false, pht('gclef text')), "�" => array(false, false, pht('Invalid, truncated.')), "���" => array(false, false, pht('Nonminimal 3-byte character.')), "�" => array(false, false, pht('Partial 2-byte character.')), "�" => array(false, false, pht('Partial BMP 0xE0 character.')), "�" => array(false, false, pht('Partial BMP cahracter.')));
     foreach ($tests as $input => $test) {
         list($expect_utf8, $expect_bmp, $test_name) = $test;
         // Depending on what's installed on the system, this may use an
         // extension.
         $this->assertEqual($expect_utf8, phutil_is_utf8($input), pht('is_utf(%s)', $test_name));
         // Also test this against the pure PHP implementation, explicitly.
         $this->assertEqual($expect_utf8, phutil_is_utf8_slowly($input), pht('is_utf_slowly(%s)', $test_name));
         $this->assertEqual($expect_bmp, phutil_is_utf8_with_only_bmp_characters($input), pht('is_utf_bmp(%s)', $test_name));
     }
 }