Esempio n. 1
0
 /**
  * Have a high offset to simulate a missing packet,
  * which should cause it to ignore the ExtendedXMP packet.
  *
  * @covers XMPReader::parseExtended
  */
 public function testExtendedXMPMissingPacket()
 {
     $xmpPath = __DIR__ . '/../../data/xmp/';
     $standardXMP = file_get_contents($xmpPath . 'xmpExt.xmp');
     $extendedXMP = file_get_contents($xmpPath . 'xmpExt2.xmp');
     $md5sum = '28C74E0AC2D796886759006FBE2E57B7';
     // of xmpExt2.xmp
     $length = pack('N', strlen($extendedXMP));
     $offset = pack('N', 2048);
     $extendedPacket = $md5sum . $length . $offset . $extendedXMP;
     $reader = new XMPReader();
     $reader->parse($standardXMP);
     $reader->parseExtended($extendedPacket);
     $actual = $reader->getResults();
     $expected = array('xmp-exif' => array('DigitalZoomRatio' => '0/10', 'Flash' => 9));
     $this->assertEquals($expected, $actual);
 }
 /** function for gif images.
  *
  * They don't really have native metadata, so just merges together
  * XMP and image comment.
  *
  * @param string $filename full path to file
  * @return Array metadata array
  */
 public static function GIF($filename)
 {
     $meta = new self();
     $baseArray = GIFMetadataExtractor::getMetadata($filename);
     if (count($baseArray['comment']) > 0) {
         $meta->addMetadata(array('GIFFileComment' => $baseArray['comment']), 'native');
     }
     if ($baseArray['xmp'] !== '' && function_exists('xml_parser_create_ns')) {
         $xmp = new XMPReader();
         $xmp->parse($baseArray['xmp']);
         $xmpRes = $xmp->getResults();
         foreach ($xmpRes as $type => $xmpSection) {
             $meta->addMetadata($xmpSection, $type);
         }
     }
     unset($baseArray['comment']);
     unset($baseArray['xmp']);
     $baseArray['metadata'] = $meta->getMetadataArray();
     $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION;
     return $baseArray;
 }
Esempio n. 3
0
 /**
  * Test for multi-section, hostile XML
  * @covers checkParseSafety
  */
 public function testCheckParseSafety()
 {
     // Test for detection
     $xmpPath = __DIR__ . '/../../data/xmp/';
     $file = fopen($xmpPath . 'doctype-included.xmp', 'rb');
     $valid = false;
     $reader = new XMPReader();
     do {
         $chunk = fread($file, 10);
         $valid = $reader->parse($chunk, feof($file));
     } while (!feof($file));
     $this->assertFalse($valid, 'Check that doctype is detected in fragmented XML');
     $this->assertEquals(array(), $reader->getResults(), 'Check that doctype is detected in fragmented XML');
     fclose($file);
     unset($reader);
     // Test for false positives
     $file = fopen($xmpPath . 'doctype-not-included.xmp', 'rb');
     $valid = false;
     $reader = new XMPReader();
     do {
         $chunk = fread($file, 10);
         $valid = $reader->parse($chunk, feof($file));
     } while (!feof($file));
     $this->assertTrue($valid, 'Check for false-positive detecting doctype in fragmented XML');
     $this->assertEquals(array('xmp-exif' => array('DigitalZoomRatio' => '0/10', 'Flash' => '9')), $reader->getResults(), 'Check that doctype is detected in fragmented XML');
 }
 /**
  * Postprocess the metadata (convert xmp into useful form, etc)
  *
  * This is used to generate the metadata table at the bottom
  * of the image description page.
  *
  * @param $data Array metadata
  * @return Array post-processed metadata
  */
 protected function postProcessDump(array $data)
 {
     $meta = new BitmapMetadataHandler();
     $items = array();
     foreach ($data as $key => $val) {
         switch ($key) {
             case 'Title':
                 $items['ObjectName'] = $val;
                 break;
             case 'Subject':
                 $items['ImageDescription'] = $val;
                 break;
             case 'Keywords':
                 // Sometimes we have empty keywords. This seems
                 // to be a product of how pdfinfo deals with keywords
                 // with spaces in them. Filter such empty keywords
                 $keyList = array_filter(explode(' ', $val));
                 if (count($keyList) > 0) {
                     $items['Keywords'] = $keyList;
                 }
                 break;
             case 'Author':
                 $items['Artist'] = $val;
                 break;
             case 'Creator':
                 // Program used to create file.
                 // Different from program used to convert to pdf.
                 $items['Software'] = $val;
                 break;
             case 'Producer':
                 // Conversion program
                 $items['pdf-Producer'] = $val;
                 break;
             case 'ModTime':
                 $timestamp = wfTimestamp(TS_EXIF, $val);
                 if ($timestamp) {
                     // 'if' is just paranoia
                     $items['DateTime'] = $timestamp;
                 }
                 break;
             case 'CreationTime':
                 $timestamp = wfTimestamp(TS_EXIF, $val);
                 if ($timestamp) {
                     $items['DateTimeDigitized'] = $timestamp;
                 }
                 break;
                 // These last two (version and encryption) I was unsure
                 // if we should include in the table, since they aren't
                 // all that useful to editors. I leaned on the side
                 // of including. However not including if file
                 // is optimized/linearized since that is really useless
                 // to an editor.
             // These last two (version and encryption) I was unsure
             // if we should include in the table, since they aren't
             // all that useful to editors. I leaned on the side
             // of including. However not including if file
             // is optimized/linearized since that is really useless
             // to an editor.
             case 'PDF version':
                 $items['pdf-Version'] = $val;
                 break;
             case 'Encrypted':
                 // @todo: The value isn't i18n-ised. The appropriate
                 // place to do that is in FormatMetadata.php
                 // should add a hook a there.
                 // For reference, if encrypted this fields value looks like:
                 // "yes (print:yes copy:no change:no addNotes:no)"
                 $items['pdf-Encrypted'] = $val;
                 break;
                 // Note 'pages' and 'Pages' are different keys (!)
             // Note 'pages' and 'Pages' are different keys (!)
             case 'pages':
                 // A pdf document can have multiple sized pages in it.
                 // (However 95% of the time, all pages are the same size)
                 // get a list of all the unique page sizes in document.
                 // This doesn't do anything with rotation as of yet,
                 // mostly because I am unsure of what a good way to
                 // present that information to the user would be.
                 $pageSizes = array();
                 foreach ($val as $page) {
                     if (isset($page['Page size'])) {
                         $pageSizes[$page['Page size']] = true;
                     }
                 }
                 $pageSizeArray = array_keys($pageSizes);
                 if (count($pageSizeArray) > 0) {
                     $items['pdf-PageSize'] = $pageSizeArray;
                 }
                 break;
         }
     }
     $meta->addMetadata($items, 'native');
     if (isset($data['xmp']) && function_exists('xml_parser_create_ns')) {
         // func exists verifies that the xml extension required for XMPReader
         // is present (Almost always is present)
         // @todo: This only handles generic xmp properties. Would be improved
         // by handling pdf xmp properties (pdf and pdfx) via XMPInfo hook.
         $xmp = new XMPReader(LoggerFactory::getInstance('XMP'));
         $xmp->parse($data['xmp']);
         $xmpRes = $xmp->getResults();
         foreach ($xmpRes as $type => $xmpSection) {
             $meta->addMetadata($xmpSection, $type);
         }
     }
     unset($data['xmp']);
     $data['mergedMetadata'] = $meta->getMetadataArray();
     return $data;
 }
 /** Function to extract metadata segments of interest from jpeg files
  * based on GIFMetadataExtractor.
  *
  * we can almost use getimagesize to do this
  * but gis doesn't support having multiple app1 segments
  * and those can't extract xmp on files containing both exif and xmp data
  *
  * @param string $filename Name of jpeg file
  * @return array Array of interesting segments.
  * @throws MWException If given invalid file.
  */
 static function segmentSplitter($filename)
 {
     $showXMP = XMPReader::isSupported();
     $segmentCount = 0;
     $segments = array('XMP_ext' => array(), 'COM' => array(), 'PSIR' => array());
     if (!$filename) {
         throw new MWException("No filename specified for " . __METHOD__);
     }
     if (!file_exists($filename) || is_dir($filename)) {
         throw new MWException("Invalid file {$filename} passed to " . __METHOD__);
     }
     $fh = fopen($filename, "rb");
     if (!$fh) {
         throw new MWException("Could not open file {$filename}");
     }
     $buffer = fread($fh, 2);
     if ($buffer !== "ÿØ") {
         throw new MWException("Not a jpeg, no SOI");
     }
     while (!feof($fh)) {
         $buffer = fread($fh, 1);
         $segmentCount++;
         if ($segmentCount > self::MAX_JPEG_SEGMENTS) {
             // this is just a sanity check
             throw new MWException('Too many jpeg segments. Aborting');
         }
         if ($buffer !== "ÿ") {
             throw new MWException("Error reading jpeg file marker. " . "Expected 0xFF but got " . bin2hex($buffer));
         }
         $buffer = fread($fh, 1);
         while ($buffer === "ÿ" && !feof($fh)) {
             // Skip through any 0xFF padding bytes.
             $buffer = fread($fh, 1);
         }
         if ($buffer === "þ") {
             // COM section -- file comment
             // First see if valid utf-8,
             // if not try to convert it to windows-1252.
             $com = $oldCom = trim(self::jpegExtractMarker($fh));
             UtfNormal::quickIsNFCVerify($com);
             // turns $com to valid utf-8.
             // thus if no change, its utf-8, otherwise its something else.
             if ($com !== $oldCom) {
                 wfSuppressWarnings();
                 $com = $oldCom = iconv('windows-1252', 'UTF-8//IGNORE', $oldCom);
                 wfRestoreWarnings();
             }
             // Try it again, if its still not a valid string, then probably
             // binary junk or some really weird encoding, so don't extract.
             UtfNormal::quickIsNFCVerify($com);
             if ($com === $oldCom) {
                 $segments["COM"][] = $oldCom;
             } else {
                 wfDebug(__METHOD__ . " Ignoring JPEG comment as is garbage.\n");
             }
         } elseif ($buffer === "á") {
             // APP1 section (Exif, XMP, and XMP extended)
             // only extract if XMP is enabled.
             $temp = self::jpegExtractMarker($fh);
             // check what type of app segment this is.
             if (substr($temp, 0, 29) === "http://ns.adobe.com/xap/1.0/" && $showXMP) {
                 $segments["XMP"] = substr($temp, 29);
             } elseif (substr($temp, 0, 35) === "http://ns.adobe.com/xmp/extension/" && $showXMP) {
                 $segments["XMP_ext"][] = substr($temp, 35);
             } elseif (substr($temp, 0, 29) === "XMP://ns.adobe.com/xap/1.0/" && $showXMP) {
                 // Some images (especially flickr images) seem to have this.
                 // I really have no idea what the deal is with them, but
                 // whatever...
                 $segments["XMP"] = substr($temp, 29);
                 wfDebug(__METHOD__ . ' Found XMP section with wrong app identifier ' . "Using anyways.\n");
             } elseif (substr($temp, 0, 6) === "Exif") {
                 // Just need to find out what the byte order is.
                 // because php's exif plugin sucks...
                 // This is a II for little Endian, MM for big. Not a unicode BOM.
                 $byteOrderMarker = substr($temp, 6, 2);
                 if ($byteOrderMarker === 'MM') {
                     $segments['byteOrder'] = 'BE';
                 } elseif ($byteOrderMarker === 'II') {
                     $segments['byteOrder'] = 'LE';
                 } else {
                     wfDebug(__METHOD__ . " Invalid byte ordering?!\n");
                 }
             }
         } elseif ($buffer === "í") {
             // APP13 - PSIR. IPTC and some photoshop stuff
             $temp = self::jpegExtractMarker($fh);
             if (substr($temp, 0, 14) === "Photoshop 3.0") {
                 $segments["PSIR"][] = $temp;
             }
         } elseif ($buffer === "Ù" || $buffer === "Ú") {
             // EOI - end of image or SOS - start of scan. either way we're past any interesting segments
             return $segments;
         } else {
             // segment we don't care about, so skip
             $size = wfUnpack("nint", fread($fh, 2), 2);
             if ($size['int'] <= 2) {
                 throw new MWException("invalid marker size in jpeg");
             }
             fseek($fh, $size['int'] - 2, SEEK_CUR);
         }
     }
     // shouldn't get here.
     throw new MWException("Reached end of jpeg file unexpectedly");
 }
 /** function for gif images.
  *
  * They don't really have native metadata, so just merges together
  * XMP and image comment.
  *
  * @param string $filename Full path to file
  * @return array Metadata array
  */
 public static function GIF($filename)
 {
     $meta = new self();
     $baseArray = GIFMetadataExtractor::getMetadata($filename);
     if (count($baseArray['comment']) > 0) {
         $meta->addMetadata(['GIFFileComment' => $baseArray['comment']], 'native');
     }
     if ($baseArray['xmp'] !== '' && XMPReader::isSupported()) {
         $xmp = new XMPReader(LoggerFactory::getInstance('XMP'));
         $xmp->parse($baseArray['xmp']);
         $xmpRes = $xmp->getResults();
         foreach ($xmpRes as $type => $xmpSection) {
             $meta->addMetadata($xmpSection, $type);
         }
     }
     unset($baseArray['comment']);
     unset($baseArray['xmp']);
     $baseArray['metadata'] = $meta->getMetadataArray();
     $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION;
     return $baseArray;
 }