コード例 #1
0
ファイル: IPTC.php プロジェクト: claudinec/galan-wiki
 /**
  * Helper function of a helper function to convert charset for iptc values.
  * @param string|array $data The IPTC string
  * @param string $charset The charset
  *
  * @return string
  */
 private static function convIPTCHelper($data, $charset)
 {
     if ($charset) {
         MediaWiki\suppressWarnings();
         $data = iconv($charset, "UTF-8//IGNORE", $data);
         MediaWiki\restoreWarnings();
         if ($data === false) {
             $data = "";
             wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset {$charset} to utf-8");
         }
     } else {
         // treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252
         // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8
         $oldData = $data;
         UtfNormal\Validator::quickIsNFCVerify($data);
         // make $data valid utf-8
         if ($data === $oldData) {
             return $data;
             // if validation didn't change $data
         } else {
             return self::convIPTCHelper($oldData, 'Windows-1252');
         }
     }
     return trim($data);
 }
コード例 #2
0
ファイル: Exif.php プロジェクト: mb720/mediawiki
 /**
  * Do userComment tags and similar. See pg. 34 of exif standard.
  * basically first 8 bytes is charset, rest is value.
  * This has not been tested on any shift-JIS strings.
  * @param string $prop Prop name
  */
 private function charCodeString($prop)
 {
     if (isset($this->mFilteredExifData[$prop])) {
         if (strlen($this->mFilteredExifData[$prop]) <= 8) {
             // invalid. Must be at least 9 bytes long.
             $this->debug($this->mFilteredExifData[$prop], __FUNCTION__, false);
             unset($this->mFilteredExifData[$prop]);
             return;
         }
         $charCode = substr($this->mFilteredExifData[$prop], 0, 8);
         $val = substr($this->mFilteredExifData[$prop], 8);
         switch ($charCode) {
             case "JIS":
                 // JIS
                 $charset = "Shift-JIS";
                 break;
             case "UNICODE":
                 $charset = "UTF-16" . $this->byteOrder;
                 break;
             default:
                 // ascii or undefined.
                 $charset = "";
                 break;
         }
         if ($charset) {
             MediaWiki\suppressWarnings();
             $val = iconv($charset, 'UTF-8//IGNORE', $val);
             MediaWiki\restoreWarnings();
         } else {
             // if valid utf-8, assume that, otherwise assume windows-1252
             $valCopy = $val;
             UtfNormal\Validator::quickIsNFCVerify($valCopy);
             // validates $valCopy.
             if ($valCopy !== $val) {
                 MediaWiki\suppressWarnings();
                 $val = iconv('Windows-1252', 'UTF-8//IGNORE', $val);
                 MediaWiki\restoreWarnings();
             }
         }
         // trim and check to make sure not only whitespace.
         $val = trim($val);
         if (strlen($val) === 0) {
             // only whitespace.
             $this->debug($this->mFilteredExifData[$prop], __FUNCTION__, "{$prop}: Is only whitespace");
             unset($this->mFilteredExifData[$prop]);
             return;
         }
         // all's good.
         $this->mFilteredExifData[$prop] = $val;
     }
 }
コード例 #3
0
 /** Function to extract metadata segments of interest from jpeg files
  * based on GIFMetadataExtractor.
  *
  * we can almost use getimagesize to do this
  * but gis doesn't support having multiple app1 segments
  * and those can't extract xmp on files containing both exif and xmp data
  *
  * @param string $filename Name of jpeg file
  * @return array Array of interesting segments.
  * @throws MWException If given invalid file.
  */
 static function segmentSplitter($filename)
 {
     $showXMP = XMPReader::isSupported();
     $segmentCount = 0;
     $segments = ['XMP_ext' => [], 'COM' => [], 'PSIR' => []];
     if (!$filename) {
         throw new MWException("No filename specified for " . __METHOD__);
     }
     if (!file_exists($filename) || is_dir($filename)) {
         throw new MWException("Invalid file {$filename} passed to " . __METHOD__);
     }
     $fh = fopen($filename, "rb");
     if (!$fh) {
         throw new MWException("Could not open file {$filename}");
     }
     $buffer = fread($fh, 2);
     if ($buffer !== "ÿØ") {
         throw new MWException("Not a jpeg, no SOI");
     }
     while (!feof($fh)) {
         $buffer = fread($fh, 1);
         $segmentCount++;
         if ($segmentCount > self::MAX_JPEG_SEGMENTS) {
             // this is just a sanity check
             throw new MWException('Too many jpeg segments. Aborting');
         }
         while ($buffer !== "ÿ") {
             // In theory JPEG files are not allowed to contain anything between the sections,
             // but in practice they sometimes do. It's customary to ignore the garbage data.
             $buffer = fread($fh, 1);
         }
         $buffer = fread($fh, 1);
         while ($buffer === "ÿ" && !feof($fh)) {
             // Skip through any 0xFF padding bytes.
             $buffer = fread($fh, 1);
         }
         if ($buffer === "þ") {
             // COM section -- file comment
             // First see if valid utf-8,
             // if not try to convert it to windows-1252.
             $com = $oldCom = trim(self::jpegExtractMarker($fh));
             UtfNormal\Validator::quickIsNFCVerify($com);
             // turns $com to valid utf-8.
             // thus if no change, its utf-8, otherwise its something else.
             if ($com !== $oldCom) {
                 MediaWiki\suppressWarnings();
                 $com = $oldCom = iconv('windows-1252', 'UTF-8//IGNORE', $oldCom);
                 MediaWiki\restoreWarnings();
             }
             // Try it again, if its still not a valid string, then probably
             // binary junk or some really weird encoding, so don't extract.
             UtfNormal\Validator::quickIsNFCVerify($com);
             if ($com === $oldCom) {
                 $segments["COM"][] = $oldCom;
             } else {
                 wfDebug(__METHOD__ . " Ignoring JPEG comment as is garbage.\n");
             }
         } elseif ($buffer === "á") {
             // APP1 section (Exif, XMP, and XMP extended)
             // only extract if XMP is enabled.
             $temp = self::jpegExtractMarker($fh);
             // check what type of app segment this is.
             if (substr($temp, 0, 29) === "http://ns.adobe.com/xap/1.0/" && $showXMP) {
                 $segments["XMP"] = substr($temp, 29);
             } elseif (substr($temp, 0, 35) === "http://ns.adobe.com/xmp/extension/" && $showXMP) {
                 $segments["XMP_ext"][] = substr($temp, 35);
             } elseif (substr($temp, 0, 29) === "XMP://ns.adobe.com/xap/1.0/" && $showXMP) {
                 // Some images (especially flickr images) seem to have this.
                 // I really have no idea what the deal is with them, but
                 // whatever...
                 $segments["XMP"] = substr($temp, 29);
                 wfDebug(__METHOD__ . ' Found XMP section with wrong app identifier ' . "Using anyways.\n");
             } elseif (substr($temp, 0, 6) === "Exif") {
                 // Just need to find out what the byte order is.
                 // because php's exif plugin sucks...
                 // This is a II for little Endian, MM for big. Not a unicode BOM.
                 $byteOrderMarker = substr($temp, 6, 2);
                 if ($byteOrderMarker === 'MM') {
                     $segments['byteOrder'] = 'BE';
                 } elseif ($byteOrderMarker === 'II') {
                     $segments['byteOrder'] = 'LE';
                 } else {
                     wfDebug(__METHOD__ . " Invalid byte ordering?!\n");
                 }
             }
         } elseif ($buffer === "í") {
             // APP13 - PSIR. IPTC and some photoshop stuff
             $temp = self::jpegExtractMarker($fh);
             if (substr($temp, 0, 14) === "Photoshop 3.0") {
                 $segments["PSIR"][] = $temp;
             }
         } elseif ($buffer === "Ù" || $buffer === "Ú") {
             // EOI - end of image or SOS - start of scan. either way we're past any interesting segments
             return $segments;
         } else {
             // segment we don't care about, so skip
             $size = wfUnpack("nint", fread($fh, 2), 2);
             if ($size['int'] < 2) {
                 throw new MWException("invalid marker size in jpeg");
             }
             fseek($fh, $size['int'] - 2, SEEK_CUR);
         }
     }
     // shouldn't get here.
     throw new MWException("Reached end of jpeg file unexpectedly");
 }