コード例 #1
0
 /**
  * @covers \Fisharebest\LibGedcom\Encodings\EncodingHelper::g
  */
 public function testUtf16MagicStrings()
 {
     $encoding_helper = new EncodingHelper();
     foreach ($encoding_helper->utf16MagicStrings() as $key => $value) {
         $this->assertTrue(is_string($key));
         $this->assertInstanceOf(AbstractEncodingUtf::class, $value);
         $convert = $value->toUtf8($key);
         $this->assertRegExp('/^(|0 HEAD)$/', $convert);
     }
 }
コード例 #2
0
 /**
  * @param string $gedcom_record
  *
  * @return EncodingInterface
  */
 private function detectEncodingFromHeader(string $gedcom_record) : EncodingInterface
 {
     $encoding_helper = new EncodingHelper();
     // UTF encodings are unambiguous
     foreach ($encoding_helper->utf16MagicStrings() as $magic_string => $encoding) {
         if (substr_compare($gedcom_record, $magic_string, 0, strlen($magic_string)) === 0) {
             $this->logger->info(self::CHARSET_DETECTED, [$encoding::ENCODING_NAME]);
             return $encoding;
         }
     }
     // Use a very loose interpretation of GEDCOM, as this data is not yet normalized.
     preg_match('/^\\s*0+\\s*HEAD(?:ER)?[^\\r\\n]*' . '(?:[\\r\\n]\\s*0*[1-9] [^\\r\\n]*)*' . '(?:[\\r\\n]\\s*0*1 CHAR(?:ACTER)? (?P<CHAR>[^\\r\\n]*))' . '(?:[\\r\\n]\\s*0*2 TYPE (?P<TYPE>[^\\r\\n]*))?' . '/', $gedcom_record, $match);
     $char = trim(strtoupper($match['CHAR'] ?? ''));
     $type = trim(strtoupper($match['TYPE'] ?? ''));
     if ($type !== '') {
         $char .= '/' . $type;
     }
     foreach ($encoding_helper->characterSetsEncodings() as $character_sets_encoding) {
         list($character_sets, $encoding) = $character_sets_encoding;
         if (in_array($char, $character_sets)) {
             if ($char === $encoding::ENCODING_NAME) {
                 $this->logger->info(self::CHARSET_DETECTED, [$char]);
             } else {
                 $this->logger->error(self::CHARSET_INVALID, [$char]);
                 $this->logger->notice(self::CHARSET_ASSUMED, [$encoding::ENCODING_NAME]);
             }
             return $encoding;
         }
     }
     if ($char === '') {
         $this->logger->error(self::CHARSET_MISSING);
     } else {
         $this->logger->error(self::CHARSET_INVALID, [$char]);
     }
     $this->logger->notice(self::CHARSET_ASSUMED, [AsciiEncoding::ENCODING_NAME]);
     return new AsciiEncoding();
 }