function getMetadata($image, $filename) { try { $meta = BitmapMetadataHandler::Jpeg($filename); if (!is_array($meta)) { // This should never happen, but doesn't hurt to be paranoid. throw new MWException('Metadata array is not an array'); } $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); return serialize($meta); } catch (MWException $e) { // BitmapMetadataHandler throws an exception in certain exceptional // cases like if file does not exist. wfDebug(__METHOD__ . ': ' . $e->getMessage() . "\n"); /* This used to use 0 (ExifBitmapHandler::OLD_BROKEN_FILE) for the cases * * No metadata in the file * * Something is broken in the file. * However, if the metadata support gets expanded then you can't tell if the 0 is from * a broken file, or just no props found. A broken file is likely to stay broken, but * a file which had no props could have props once the metadata support is improved. * Thus switch to using -1 to denote only a broken file, and use an array with only * MEDIAWIKI_EXIF_VERSION to denote no props. */ return ExifBitmapHandler::BROKEN_FILE; } }
function getMetadata($image, $filename) { try { $parsedGIFMetadata = BitmapMetadataHandler::GIF($filename); } catch (Exception $e) { // Broken file? wfDebug(__METHOD__ . ': ' . $e->getMessage() . "\n"); return self::BROKEN_FILE; } return serialize($parsedGIFMetadata); }
/** * @param File $image * @param string $filename * @throws MWException * @return string */ function getMetadata($image, $filename) { global $wgShowEXIF; if ($wgShowEXIF) { try { $meta = BitmapMetadataHandler::Tiff($filename); if (!is_array($meta)) { // This should never happen, but doesn't hurt to be paranoid. throw new MWException('Metadata array is not an array'); } $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); return serialize($meta); } catch (Exception $e) { // BitmapMetadataHandler throws an exception in certain exceptional // cases like if file does not exist. wfDebug(__METHOD__ . ': ' . $e->getMessage() . "\n"); return ExifBitmapHandler::BROKEN_FILE; } } else { return ''; } }
public function testTiffByteOrder() { $handler = new BitmapMetadataHandler(); $res = $handler->getTiffByteOrder($this->filePath . 'test.tiff'); $this->assertEquals('LE', $res); }
/** * Postprocess the metadata (convert xmp into useful form, etc) * * This is used to generate the metadata table at the bottom * of the image description page. * * @param $data Array metadata * @return Array post-processed metadata */ protected function postProcessDump(array $data) { $meta = new BitmapMetadataHandler(); $items = array(); foreach ($data as $key => $val) { switch ($key) { case 'Title': $items['ObjectName'] = $val; break; case 'Subject': $items['ImageDescription'] = $val; break; case 'Keywords': // Sometimes we have empty keywords. This seems // to be a product of how pdfinfo deals with keywords // with spaces in them. Filter such empty keywords $keyList = array_filter(explode(' ', $val)); if (count($keyList) > 0) { $items['Keywords'] = $keyList; } break; case 'Author': $items['Artist'] = $val; break; case 'Creator': // Program used to create file. // Different from program used to convert to pdf. $items['Software'] = $val; break; case 'Producer': // Conversion program $items['pdf-Producer'] = $val; break; case 'ModTime': $timestamp = wfTimestamp(TS_EXIF, $val); if ($timestamp) { // 'if' is just paranoia $items['DateTime'] = $timestamp; } break; case 'CreationTime': $timestamp = wfTimestamp(TS_EXIF, $val); if ($timestamp) { $items['DateTimeDigitized'] = $timestamp; } break; // These last two (version and encryption) I was unsure // if we should include in the table, since they aren't // all that useful to editors. I leaned on the side // of including. However not including if file // is optimized/linearized since that is really useless // to an editor. // These last two (version and encryption) I was unsure // if we should include in the table, since they aren't // all that useful to editors. I leaned on the side // of including. However not including if file // is optimized/linearized since that is really useless // to an editor. case 'PDF version': $items['pdf-Version'] = $val; break; case 'Encrypted': // @todo: The value isn't i18n-ised. The appropriate // place to do that is in FormatMetadata.php // should add a hook a there. // For reference, if encrypted this fields value looks like: // "yes (print:yes copy:no change:no addNotes:no)" $items['pdf-Encrypted'] = $val; break; // Note 'pages' and 'Pages' are different keys (!) // Note 'pages' and 'Pages' are different keys (!) case 'pages': // A pdf document can have multiple sized pages in it. // (However 95% of the time, all pages are the same size) // get a list of all the unique page sizes in document. // This doesn't do anything with rotation as of yet, // mostly because I am unsure of what a good way to // present that information to the user would be. $pageSizes = array(); foreach ($val as $page) { if (isset($page['Page size'])) { $pageSizes[$page['Page size']] = true; } } $pageSizeArray = array_keys($pageSizes); if (count($pageSizeArray) > 0) { $items['pdf-PageSize'] = $pageSizeArray; } break; } } $meta->addMetadata($items, 'native'); if (isset($data['xmp']) && function_exists('xml_parser_create_ns')) { // func exists verifies that the xml extension required for XMPReader // is present (Almost always is present) // @todo: This only handles generic xmp properties. Would be improved // by handling pdf xmp properties (pdf and pdfx) via XMPInfo hook. $xmp = new XMPReader(LoggerFactory::getInstance('XMP')); $xmp->parse($data['xmp']); $xmpRes = $xmp->getResults(); foreach ($xmpRes as $type => $xmpSection) { $meta->addMetadata($xmpSection, $type); } } unset($data['xmp']); $data['mergedMetadata'] = $meta->getMetadataArray(); return $data; }
/** * Reads metadata of the tiff file via shell command and returns an associative array. * layout: * meta['page_count'] = number of pages * meta['first_page'] = number of first page * meta['last_page'] = number of last page * meta['page_data'] = metadata per page * meta['exif'] = Exif, XMP and IPTC * meta['errors'] = identify-errors * meta['warnings'] = identify-warnings */ public function retrieveMetaData() { global $wgImageMagickIdentifyCommand, $wgExiv2Command, $wgTiffUseExiv; global $wgTiffUseTiffinfo, $wgTiffTiffinfoCommand; global $wgShowEXIF; if ( $this->_meta === null ) { wfProfileIn( 'PagedTiffImage::retrieveMetaData' ); //fetch base info: number of pages, size and alpha for each page. //run hooks first, then optionally tiffinfo or, per default, ImageMagic's identify command if ( !wfRunHooks( 'PagedTiffHandlerTiffData', array( $this->mFilename, &$this->_meta ) ) ) { wfDebug( __METHOD__ . ": hook PagedTiffHandlerTiffData overrides TIFF data extraction\n" ); } elseif ( $wgTiffUseTiffinfo ) { // read TIFF directories using libtiff's tiffinfo, see // http://www.libtiff.org/man/tiffinfo.1.html $cmd = wfEscapeShellArg( $wgTiffTiffinfoCommand ) . ' ' . wfEscapeShellArg( $this->mFilename ) . ' 2>&1'; wfProfileIn( 'tiffinfo' ); wfDebug( __METHOD__ . ": $cmd\n" ); $retval = ''; $dump = wfShellExec( $cmd, $retval ); wfProfileOut( 'tiffinfo' ); if ( $retval ) { $data['errors'][] = "tiffinfo command failed: $cmd"; wfDebug( __METHOD__ . ": tiffinfo command failed: $cmd\n" ); return $data; // fail. we *need* that info } $this->_meta = $this->parseTiffinfoOutput( $dump ); } else { $cmd = wfEscapeShellArg( $wgImageMagickIdentifyCommand ) . ' -format "[BEGIN]page=%p\nalpha=%A\nalpha2=%r\nheight=%h\nwidth=%w\ndepth=%z[END]" ' . wfEscapeShellArg( $this->mFilename ) . ' 2>&1'; wfProfileIn( 'identify' ); wfDebug( __METHOD__ . ": $cmd\n" ); $retval = ''; $dump = wfShellExec( $cmd, $retval ); wfProfileOut( 'identify' ); if ( $retval ) { $data['errors'][] = "identify command failed: $cmd"; wfDebug( __METHOD__ . ": identify command failed: $cmd\n" ); return $data; // fail. we *need* that info } $this->_meta = $this->parseIdentifyOutput( $dump ); } $this->_meta['exif'] = array(); //fetch extended info: EXIF/IPTC/XMP //run hooks first, then optionally Exiv2 or, per default, the internal EXIF class if ( !empty( $this->_meta['errors'] ) ) { wfDebug( __METHOD__ . ": found errors, skipping EXIF extraction\n" ); } elseif ( !wfRunHooks( 'PagedTiffHandlerExifData', array( $this->mFilename, &$this->_meta['exif'] ) ) ) { wfDebug( __METHOD__ . ": hook PagedTiffHandlerExifData overrides EXIF extraction\n" ); } elseif ( $wgTiffUseExiv ) { // read EXIF, XMP, IPTC as name-tag => interpreted data // -ignore unknown fields // see exiv2-doc @link http://www.exiv2.org/sample.html // NOTE: the linux version of exiv2 has a bug: it can only // read one type of meta-data at a time, not all at once. $cmd = wfEscapeShellArg( $wgExiv2Command ) . ' -u -psix -Pnt ' . wfEscapeShellArg( $this->mFilename ) . ' 2>&1'; wfProfileIn( 'exiv2' ); wfDebug( __METHOD__ . ": $cmd\n" ); $retval = ''; $dump = wfShellExec( $cmd, $retval ); wfProfileOut( 'exiv2' ); if ( $retval ) { $data['errors'][] = "exiv command failed: $cmd"; wfDebug( __METHOD__ . ": exiv command failed: $cmd\n" ); // don't fail - we are missing info, just report } $data = $this->parseExiv2Output( $dump ); $this->_meta['exif'] = $data; } elseif ( $wgShowEXIF ) { wfDebug( __METHOD__ . ": using internal Exif( {$this->mFilename} )\n" ); if ( method_exists( 'BitmapMetadataHandler', 'Tiff' ) ) { $data = BitmapMetadataHandler::Tiff( $this->mFilename ); } else { // old method for back compat. $exif = new Exif( $this->mFilename ); $data = $exif->getFilteredData(); } if ( $data ) { $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); $this->_meta['exif'] = $data; } } unset( $this->_meta['exif']['Image'] ); unset( $this->_meta['exif']['filename'] ); unset( $this->_meta['exif']['Base filename'] ); unset( $this->_meta['exif']['XMLPacket'] ); unset( $this->_meta['exif']['ImageResources'] ); $this->_meta['TIFF_METADATA_VERSION'] = TIFF_METADATA_VERSION; wfProfileOut( 'PagedTiffImage::retrieveMetaData' ); } return $this->_meta; }