Beispiel #1
0
 function getMetadata($image, $filename)
 {
     try {
         $meta = BitmapMetadataHandler::Jpeg($filename);
         if (!is_array($meta)) {
             // This should never happen, but doesn't hurt to be paranoid.
             throw new MWException('Metadata array is not an array');
         }
         $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
         return serialize($meta);
     } catch (MWException $e) {
         // BitmapMetadataHandler throws an exception in certain exceptional
         // cases like if file does not exist.
         wfDebug(__METHOD__ . ': ' . $e->getMessage() . "\n");
         /* This used to use 0 (ExifBitmapHandler::OLD_BROKEN_FILE) for the cases
          * 	* No metadata in the file
          * 	* Something is broken in the file.
          * However, if the metadata support gets expanded then you can't tell if the 0 is from
          * a broken file, or just no props found. A broken file is likely to stay broken, but
          * a file which had no props could have props once the metadata support is improved.
          * Thus switch to using -1 to denote only a broken file, and use an array with only
          * MEDIAWIKI_EXIF_VERSION to denote no props.
          */
         return ExifBitmapHandler::BROKEN_FILE;
     }
 }
Beispiel #2
0
 function getMetadata($image, $filename)
 {
     try {
         $parsedGIFMetadata = BitmapMetadataHandler::GIF($filename);
     } catch (Exception $e) {
         // Broken file?
         wfDebug(__METHOD__ . ': ' . $e->getMessage() . "\n");
         return self::BROKEN_FILE;
     }
     return serialize($parsedGIFMetadata);
 }
Beispiel #3
0
 /**
  * @param File $image
  * @param string $filename
  * @throws MWException
  * @return string
  */
 function getMetadata($image, $filename)
 {
     global $wgShowEXIF;
     if ($wgShowEXIF) {
         try {
             $meta = BitmapMetadataHandler::Tiff($filename);
             if (!is_array($meta)) {
                 // This should never happen, but doesn't hurt to be paranoid.
                 throw new MWException('Metadata array is not an array');
             }
             $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
             return serialize($meta);
         } catch (Exception $e) {
             // BitmapMetadataHandler throws an exception in certain exceptional
             // cases like if file does not exist.
             wfDebug(__METHOD__ . ': ' . $e->getMessage() . "\n");
             return ExifBitmapHandler::BROKEN_FILE;
         }
     } else {
         return '';
     }
 }
 public function testTiffByteOrder()
 {
     $handler = new BitmapMetadataHandler();
     $res = $handler->getTiffByteOrder($this->filePath . 'test.tiff');
     $this->assertEquals('LE', $res);
 }
 /**
  * Postprocess the metadata (convert xmp into useful form, etc)
  *
  * This is used to generate the metadata table at the bottom
  * of the image description page.
  *
  * @param $data Array metadata
  * @return Array post-processed metadata
  */
 protected function postProcessDump(array $data)
 {
     $meta = new BitmapMetadataHandler();
     $items = array();
     foreach ($data as $key => $val) {
         switch ($key) {
             case 'Title':
                 $items['ObjectName'] = $val;
                 break;
             case 'Subject':
                 $items['ImageDescription'] = $val;
                 break;
             case 'Keywords':
                 // Sometimes we have empty keywords. This seems
                 // to be a product of how pdfinfo deals with keywords
                 // with spaces in them. Filter such empty keywords
                 $keyList = array_filter(explode(' ', $val));
                 if (count($keyList) > 0) {
                     $items['Keywords'] = $keyList;
                 }
                 break;
             case 'Author':
                 $items['Artist'] = $val;
                 break;
             case 'Creator':
                 // Program used to create file.
                 // Different from program used to convert to pdf.
                 $items['Software'] = $val;
                 break;
             case 'Producer':
                 // Conversion program
                 $items['pdf-Producer'] = $val;
                 break;
             case 'ModTime':
                 $timestamp = wfTimestamp(TS_EXIF, $val);
                 if ($timestamp) {
                     // 'if' is just paranoia
                     $items['DateTime'] = $timestamp;
                 }
                 break;
             case 'CreationTime':
                 $timestamp = wfTimestamp(TS_EXIF, $val);
                 if ($timestamp) {
                     $items['DateTimeDigitized'] = $timestamp;
                 }
                 break;
                 // These last two (version and encryption) I was unsure
                 // if we should include in the table, since they aren't
                 // all that useful to editors. I leaned on the side
                 // of including. However not including if file
                 // is optimized/linearized since that is really useless
                 // to an editor.
             // These last two (version and encryption) I was unsure
             // if we should include in the table, since they aren't
             // all that useful to editors. I leaned on the side
             // of including. However not including if file
             // is optimized/linearized since that is really useless
             // to an editor.
             case 'PDF version':
                 $items['pdf-Version'] = $val;
                 break;
             case 'Encrypted':
                 // @todo: The value isn't i18n-ised. The appropriate
                 // place to do that is in FormatMetadata.php
                 // should add a hook a there.
                 // For reference, if encrypted this fields value looks like:
                 // "yes (print:yes copy:no change:no addNotes:no)"
                 $items['pdf-Encrypted'] = $val;
                 break;
                 // Note 'pages' and 'Pages' are different keys (!)
             // Note 'pages' and 'Pages' are different keys (!)
             case 'pages':
                 // A pdf document can have multiple sized pages in it.
                 // (However 95% of the time, all pages are the same size)
                 // get a list of all the unique page sizes in document.
                 // This doesn't do anything with rotation as of yet,
                 // mostly because I am unsure of what a good way to
                 // present that information to the user would be.
                 $pageSizes = array();
                 foreach ($val as $page) {
                     if (isset($page['Page size'])) {
                         $pageSizes[$page['Page size']] = true;
                     }
                 }
                 $pageSizeArray = array_keys($pageSizes);
                 if (count($pageSizeArray) > 0) {
                     $items['pdf-PageSize'] = $pageSizeArray;
                 }
                 break;
         }
     }
     $meta->addMetadata($items, 'native');
     if (isset($data['xmp']) && function_exists('xml_parser_create_ns')) {
         // func exists verifies that the xml extension required for XMPReader
         // is present (Almost always is present)
         // @todo: This only handles generic xmp properties. Would be improved
         // by handling pdf xmp properties (pdf and pdfx) via XMPInfo hook.
         $xmp = new XMPReader(LoggerFactory::getInstance('XMP'));
         $xmp->parse($data['xmp']);
         $xmpRes = $xmp->getResults();
         foreach ($xmpRes as $type => $xmpSection) {
             $meta->addMetadata($xmpSection, $type);
         }
     }
     unset($data['xmp']);
     $data['mergedMetadata'] = $meta->getMetadataArray();
     return $data;
 }
	/**
	 * Reads metadata of the tiff file via shell command and returns an associative array.
	 * layout:
	 * meta['page_count'] = number of pages
	 * meta['first_page'] = number of first page
	 * meta['last_page'] = number of last page
	 * meta['page_data'] = metadata per page
	 * meta['exif']  = Exif, XMP and IPTC
	 * meta['errors'] = identify-errors
	 * meta['warnings'] = identify-warnings
	 */
	public function retrieveMetaData() {
		global $wgImageMagickIdentifyCommand, $wgExiv2Command, $wgTiffUseExiv;
		global $wgTiffUseTiffinfo, $wgTiffTiffinfoCommand;
		global $wgShowEXIF;

		if ( $this->_meta === null ) {
			wfProfileIn( 'PagedTiffImage::retrieveMetaData' );

			//fetch base info: number of pages, size and alpha for each page.
			//run hooks first, then optionally tiffinfo or, per default, ImageMagic's identify command
			if ( !wfRunHooks( 'PagedTiffHandlerTiffData', array( $this->mFilename, &$this->_meta ) ) ) {
				wfDebug( __METHOD__ . ": hook PagedTiffHandlerTiffData overrides TIFF data extraction\n" );
			} elseif ( $wgTiffUseTiffinfo ) {
				// read TIFF directories using libtiff's tiffinfo, see
				// http://www.libtiff.org/man/tiffinfo.1.html
				$cmd = wfEscapeShellArg( $wgTiffTiffinfoCommand ) .
					' ' . wfEscapeShellArg( $this->mFilename ) . ' 2>&1';

				wfProfileIn( 'tiffinfo' );
				wfDebug( __METHOD__ . ": $cmd\n" );
				$retval = '';
				$dump = wfShellExec( $cmd, $retval );
				wfProfileOut( 'tiffinfo' );

				if ( $retval ) {
					$data['errors'][] = "tiffinfo command failed: $cmd";
					wfDebug( __METHOD__ . ": tiffinfo command failed: $cmd\n" );
					return $data; // fail. we *need* that info
				}

				$this->_meta = $this->parseTiffinfoOutput( $dump );
			} else {
				$cmd = wfEscapeShellArg( $wgImageMagickIdentifyCommand ) .
					' -format "[BEGIN]page=%p\nalpha=%A\nalpha2=%r\nheight=%h\nwidth=%w\ndepth=%z[END]" ' .
					wfEscapeShellArg( $this->mFilename ) . ' 2>&1';

				wfProfileIn( 'identify' );
				wfDebug( __METHOD__ . ": $cmd\n" );
				$retval = '';
				$dump = wfShellExec( $cmd, $retval );
				wfProfileOut( 'identify' );

				if ( $retval ) {
					$data['errors'][] = "identify command failed: $cmd";
					wfDebug( __METHOD__ . ": identify command failed: $cmd\n" );
					return $data; // fail. we *need* that info
				}

				$this->_meta = $this->parseIdentifyOutput( $dump );
			}

			$this->_meta['exif'] = array();

			//fetch extended info: EXIF/IPTC/XMP
			//run hooks first, then optionally Exiv2 or, per default, the internal EXIF class
			if ( !empty( $this->_meta['errors'] ) ) {
				wfDebug( __METHOD__ . ": found errors, skipping EXIF extraction\n" );
			} elseif ( !wfRunHooks( 'PagedTiffHandlerExifData', array( $this->mFilename, &$this->_meta['exif'] ) ) ) {
				wfDebug( __METHOD__ . ": hook PagedTiffHandlerExifData overrides EXIF extraction\n" );
			} elseif ( $wgTiffUseExiv ) {
				// read EXIF, XMP, IPTC as name-tag => interpreted data
				// -ignore unknown fields
				// see exiv2-doc @link http://www.exiv2.org/sample.html
				// NOTE: the linux version of exiv2 has a bug: it can only
				// read one type of meta-data at a time, not all at once.
				$cmd = wfEscapeShellArg( $wgExiv2Command ) .
					' -u -psix -Pnt ' . wfEscapeShellArg( $this->mFilename ) . ' 2>&1';

				wfProfileIn( 'exiv2' );
				wfDebug( __METHOD__ . ": $cmd\n" );
				$retval = '';
				$dump = wfShellExec( $cmd, $retval );
				wfProfileOut( 'exiv2' );

				if ( $retval ) {
					$data['errors'][] = "exiv command failed: $cmd";
					wfDebug( __METHOD__ . ": exiv command failed: $cmd\n" );
					// don't fail - we are missing info, just report
				}

				$data = $this->parseExiv2Output( $dump );

				$this->_meta['exif'] = $data;
			} elseif ( $wgShowEXIF ) {
				wfDebug( __METHOD__ . ": using internal Exif( {$this->mFilename} )\n" );
				if ( method_exists( 'BitmapMetadataHandler', 'Tiff' ) ) {
					$data = BitmapMetadataHandler::Tiff( $this->mFilename );
				} else {
					// old method for back compat.
					$exif = new Exif( $this->mFilename );
					$data = $exif->getFilteredData();
				}

				if ( $data ) {
					$data['MEDIAWIKI_EXIF_VERSION'] = Exif::version();
					$this->_meta['exif'] = $data;
				}
			}

			unset( $this->_meta['exif']['Image'] );
			unset( $this->_meta['exif']['filename'] );
			unset( $this->_meta['exif']['Base filename'] );
			unset( $this->_meta['exif']['XMLPacket'] );
			unset( $this->_meta['exif']['ImageResources'] );

			$this->_meta['TIFF_METADATA_VERSION'] = TIFF_METADATA_VERSION;

			wfProfileOut( 'PagedTiffImage::retrieveMetaData' );
		}

		return $this->_meta;
	}