/** * XMP data should take priority over iptc data * when hash has been updated, but not when * the hash is wrong. * @covers BitmapMetadataHandler::addMetadata * @covers BitmapMetadataHandler::getMetadataArray */ public function testMerging() { $merger = new BitmapMetadataHandler(); $merger->addMetadata(array('foo' => 'xmp'), 'xmp-general'); $merger->addMetadata(array('bar' => 'xmp'), 'xmp-general'); $merger->addMetadata(array('baz' => 'xmp'), 'xmp-general'); $merger->addMetadata(array('fred' => 'xmp'), 'xmp-general'); $merger->addMetadata(array('foo' => 'iptc (hash)'), 'iptc-good-hash'); $merger->addMetadata(array('bar' => 'iptc (bad hash)'), 'iptc-bad-hash'); $merger->addMetadata(array('baz' => 'iptc (bad hash)'), 'iptc-bad-hash'); $merger->addMetadata(array('fred' => 'iptc (no hash)'), 'iptc-no-hash'); $merger->addMetadata(array('baz' => 'exif'), 'exif'); $actual = $merger->getMetadataArray(); $expected = array('foo' => 'xmp', 'bar' => 'iptc (bad hash)', 'baz' => 'exif', 'fred' => 'xmp'); $this->assertEquals($expected, $actual); }
/** * Postprocess the metadata (convert xmp into useful form, etc) * * This is used to generate the metadata table at the bottom * of the image description page. * * @param $data Array metadata * @return Array post-processed metadata */ protected function postProcessDump(array $data) { $meta = new BitmapMetadataHandler(); $items = array(); foreach ($data as $key => $val) { switch ($key) { case 'Title': $items['ObjectName'] = $val; break; case 'Subject': $items['ImageDescription'] = $val; break; case 'Keywords': // Sometimes we have empty keywords. This seems // to be a product of how pdfinfo deals with keywords // with spaces in them. Filter such empty keywords $keyList = array_filter(explode(' ', $val)); if (count($keyList) > 0) { $items['Keywords'] = $keyList; } break; case 'Author': $items['Artist'] = $val; break; case 'Creator': // Program used to create file. // Different from program used to convert to pdf. $items['Software'] = $val; break; case 'Producer': // Conversion program $items['pdf-Producer'] = $val; break; case 'ModTime': $timestamp = wfTimestamp(TS_EXIF, $val); if ($timestamp) { // 'if' is just paranoia $items['DateTime'] = $timestamp; } break; case 'CreationTime': $timestamp = wfTimestamp(TS_EXIF, $val); if ($timestamp) { $items['DateTimeDigitized'] = $timestamp; } break; // These last two (version and encryption) I was unsure // if we should include in the table, since they aren't // all that useful to editors. I leaned on the side // of including. However not including if file // is optimized/linearized since that is really useless // to an editor. // These last two (version and encryption) I was unsure // if we should include in the table, since they aren't // all that useful to editors. I leaned on the side // of including. However not including if file // is optimized/linearized since that is really useless // to an editor. case 'PDF version': $items['pdf-Version'] = $val; break; case 'Encrypted': // @todo: The value isn't i18n-ised. The appropriate // place to do that is in FormatMetadata.php // should add a hook a there. // For reference, if encrypted this fields value looks like: // "yes (print:yes copy:no change:no addNotes:no)" $items['pdf-Encrypted'] = $val; break; // Note 'pages' and 'Pages' are different keys (!) // Note 'pages' and 'Pages' are different keys (!) case 'pages': // A pdf document can have multiple sized pages in it. // (However 95% of the time, all pages are the same size) // get a list of all the unique page sizes in document. // This doesn't do anything with rotation as of yet, // mostly because I am unsure of what a good way to // present that information to the user would be. $pageSizes = array(); foreach ($val as $page) { if (isset($page['Page size'])) { $pageSizes[$page['Page size']] = true; } } $pageSizeArray = array_keys($pageSizes); if (count($pageSizeArray) > 0) { $items['pdf-PageSize'] = $pageSizeArray; } break; } } $meta->addMetadata($items, 'native'); if (isset($data['xmp']) && function_exists('xml_parser_create_ns')) { // func exists verifies that the xml extension required for XMPReader // is present (Almost always is present) // @todo: This only handles generic xmp properties. Would be improved // by handling pdf xmp properties (pdf and pdfx) via XMPInfo hook. $xmp = new XMPReader(LoggerFactory::getInstance('XMP')); $xmp->parse($data['xmp']); $xmpRes = $xmp->getResults(); foreach ($xmpRes as $type => $xmpSection) { $meta->addMetadata($xmpSection, $type); } } unset($data['xmp']); $data['mergedMetadata'] = $meta->getMetadataArray(); return $data; }