/** * Extract Metadata from a PDF file * * @since 2.10 * * @param string full path to the desired file * * @return array ( 'xmp' => array( key => value ), 'pdf' => array( key => value ) ) for each metadata field, in string format */ public static function mla_extract_pdf_metadata($file_name) { $xmp = array(); $metadata = array(); self::$pdf_indirect_objects = NULL; $chunksize = 16384; if (!file_exists($file_name)) { return array('xmp' => $xmp, 'pdf' => $metadata); } $filesize = filesize($file_name); $file_offset = $chunksize < $filesize ? $filesize - $chunksize : 0; $tail = file_get_contents($file_name, false, NULL, $file_offset); //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata( {$file_name}, {$file_offset} ) tail = \r\n" . MLAData::mla_hex_dump( $tail ), 0 ); if (0 == $file_offset) { $header = substr($tail, 0, 128); } else { $header = file_get_contents($file_name, false, NULL, 0, 128); } //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata( {$file_name}, {$file_offset} ) header = \r\n" . MLAData::mla_hex_dump( $header ), 0 ); if ('%PDF-' == substr($header, 0, 5)) { $metadata['PDF_Version'] = substr($header, 1, 7); $metadata['PDF_VersionNumber'] = substr($header, 5, 3); } /* * Find the xref and (optional) trailer */ $match_count = preg_match_all('/startxref[\\x00-\\x20]+(\\d+)[\\x00-\\x20]+\\%\\%EOF/', $tail, $matches, PREG_OFFSET_CAPTURE); if (0 == $match_count) { /* translators: 1: ERROR tag 2: path and file */ error_log(sprintf(_x('%1$s: File "%2$s", startxref not found.', 'error_log', 'media-library-assistant'), __('ERROR', 'media-library-assistant'), $path), 0); return array('xmp' => $xmp, 'pdf' => $metadata); } $startxref = (int) $matches[1][$match_count - 1][0]; $trailer_dictionaries = self::_extract_pdf_trailer($file_name, $startxref); //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata trailer_dictionaries = " . var_export( $trailer_dictionaries, true ), 0 ); if (is_array($trailer_dictionaries)) { $info_reference = NULL; foreach ($trailer_dictionaries as $trailer_dictionary) { if (isset($trailer_dictionary['Info'])) { $info_reference = $trailer_dictionary['Info']; break; } } //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata info_reference = " . var_export( $info_reference, true ), 0 ); if (isset($info_reference)) { $info_object = self::_find_pdf_indirect_dictionary($file_name, $info_reference['object'], $info_reference['generation']); /* * Handle single or multiple Info instances */ $info_objects = array(); if ($info_object) { if (1 == $info_object['count']) { $info_objects[] = $info_object; } else { for ($index = 0; $index < $info_object['count']; $index++) { $info_objects[] = self::_find_pdf_indirect_dictionary($file_name, $info_reference['object'], $info_reference['generation'], $index); } } } //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata info_objects = " . var_export( $info_objects, true ), 0 ); foreach ($info_objects as $info_object) { $info_dictionary = self::_parse_pdf_dictionary($info_object['content'], 0); //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata info_dictionary = " . var_export( $info_dictionary, true ), 0 ); unset($info_dictionary['/length']); foreach ($info_dictionary as $name => $value) { if ('string' == $value['type']) { $prefix = substr($value['value'], 0, 2); if ('D:' == $prefix) { $metadata[$name] = MLAData::mla_parse_pdf_date($value['value']); } elseif (chr(0xfe) . chr(0xff) == $prefix) { $metadata[$name] = self::_parse_pdf_UTF16BE($value['value']); } else { $metadata[$name] = $value['value']; } } else { $metadata[$name] = $value['value']; } } // each info entry } // foreach Info object /* * Remove spurious "Filter" dictionaries */ unset($metadata['Filter']); unset($metadata['Length']); unset($metadata['Length1']); } // found Info reference //error_log( __LINE__ . ' MLAPDF::mla_extract_pdf_metadata pdf metadata = ' . var_export( $metadata, true ), 0 ); /* * Look for XMP Metadata */ $root_reference = NULL; //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata info_dictionary = " . var_export( $info_dictionary, true ), 0 ); foreach ($trailer_dictionaries as $trailer_dictionary) { if (isset($trailer_dictionary['Root'])) { $root_reference = $trailer_dictionary['Root']; break; } } //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata root_reference = " . var_export( $root_reference, true ), 0 ); if (isset($root_reference)) { $root_object = self::_find_pdf_indirect_dictionary($file_name, $root_reference['object'], $root_reference['generation']); //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata root_object = " . var_export( $root_object, true ), 0 ); if ($root_object) { $root_dictionary = self::_parse_pdf_dictionary($root_object['content'], 0); //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata root_dictionary = " . var_export( $root_dictionary, true ), 0 ); unset($root_dictionary['/length']); if (isset($root_dictionary['Metadata'])) { $xmp_object = self::_find_pdf_indirect_dictionary($file_name, $root_dictionary['Metadata']['object'], $root_dictionary['Metadata']['generation']); //error_log( __LINE__ . " MLAPDF::mla_extract_pdf_metadata xmp_object = " . var_export( $xmp_object, true ), 0 ); $xmp = MLAData::mla_parse_xmp_metadata($file_name, $xmp_object['start'] + $xmp_object['length']); if (is_array($xmp)) { $metadata = array_merge($metadata, $xmp); } else { $xmp = array(); $xmp = MLAData::mla_parse_xmp_metadata($file_name, 0); //error_log( __LINE__ . ' MLAPDF::mla_extract_pdf_metadata recovered xmp = ' . var_export( $xmp, true ), 0 ); } } // found Metadata reference } // found Root object } // found Root reference } // found trailer_dictionaries //error_log( __LINE__ . ' MLAPDF::mla_extract_pdf_metadata pdf = ' . var_export( $metadata, true ), 0 ); //error_log( __LINE__ . ' MLAPDF::mla_extract_pdf_metadata xmp = ' . var_export( $xmp, true ), 0 ); return array('xmp' => $xmp, 'pdf' => $metadata); }
/** * Fetch and filter IPTC and EXIF, XMP or PDF metadata for an image attachment * * @since 0.90 * * @param int post ID of attachment * @param string optional; if $post_id is zero, path to the image file. * * @return array Meta data variables, IPTC and EXIF or PDF */ public static function mla_fetch_attachment_image_metadata($post_id, $path = '') { $results = array('post_id' => $post_id, 'mla_iptc_metadata' => array(), 'mla_exif_metadata' => array(), 'mla_xmp_metadata' => array(), 'mla_pdf_metadata' => array()); if (0 != $post_id) { $path = get_attached_file($post_id); } if (!empty($path)) { if ('pdf' == strtolower(pathinfo($path, PATHINFO_EXTENSION))) { $pdf_metadata = MLAPDF::mla_extract_pdf_metadata($path); $results['mla_xmp_metadata'] = $pdf_metadata['xmp']; $results['mla_pdf_metadata'] = $pdf_metadata['pdf']; return $results; } $size = getimagesize($path, $info); if (is_callable('iptcparse')) { if (!empty($info['APP13'])) { //set_error_handler( 'MLAData::mla_IPTC_EXIF_error_handler' ); $iptc_values = iptcparse($info['APP13']); //restore_error_handler(); if (!empty(MLAData::$mla_IPTC_EXIF_errors)) { $results['mla_iptc_errors'] = MLAData::$mla_IPTC_EXIF_errors; MLAData::$mla_IPTC_EXIF_errors = array(); error_log(__('ERROR', 'media-library-assistant') . ': ' . '$results[mla_iptc_errors] = ' . var_export($results['mla_exif_errors'], true), 0); } if (!is_array($iptc_values)) { $iptc_values = array(); } foreach ($iptc_values as $key => $value) { if (in_array($key, array('1#000', '1#020', '1#022', '1#120', '1#122', '2#000', '2#200', '2#201'))) { $value = unpack('nbinary', $value[0]); $results['mla_iptc_metadata'][$key] = (string) $value['binary']; } elseif (1 == count($value)) { $results['mla_iptc_metadata'][$key] = $value[0]; } else { $results['mla_iptc_metadata'][$key] = $value; } } // foreach $value } // ! empty } // iptcparse if (is_callable('exif_read_data') && in_array($size[2], array(IMAGETYPE_JPEG, IMAGETYPE_TIFF_II, IMAGETYPE_TIFF_MM))) { //set_error_handler( 'MLAData::mla_IPTC_EXIF_error_handler' ); $results['mla_exif_metadata'] = $exif_data = @exif_read_data($path); //restore_error_handler(); if (!empty(MLAData::$mla_IPTC_EXIF_errors)) { $results['mla_exif_errors'] = MLAData::$mla_IPTC_EXIF_errors; MLAData::$mla_IPTC_EXIF_errors = array(); error_log(__('ERROR', 'media-library-assistant') . ': ' . '$results[mla_exif_errors] = ' . var_export($results['mla_exif_errors'], true), 0); } } // exif_read_data $results['mla_xmp_metadata'] = self::mla_parse_xmp_metadata($path, 0); if (NULL == $results['mla_xmp_metadata']) { $results['mla_xmp_metadata'] = array(); } } /* * Expand EXIF Camera-related values: * * ExposureBiasValue * ExposureTime * Flash * FNumber * FocalLength * ShutterSpeed from ExposureTime */ $new_data = array(); if (isset($exif_data['FNumber'])) { if (false !== ($value = self::_rational_to_string($exif_data['FNumber'], '%1$d', '%1$d/%2$d', '%1$.1f'))) { $new_data['FNumber'] = $value; } } // FNumber if (isset($exif_data['ExposureBiasValue'])) { $fragments = array_map('intval', explode('/', $exif_data['ExposureBiasValue'])); if (!is_null($fragments[1])) { $numerator = $fragments[0]; $denominator = $fragments[1]; // Clean up some common format issues, e.g. 4/6, 2/4 while (0 == ($numerator & 0x1) && 0 == ($denominator & 0x1)) { $numerator = $numerator >> 1; $denominator = $denominator >> 1; } // Remove excess precision if ($denominator > $numerator && 1000 < $numerator && 1000 < $denominator) { $exif_data['ExposureBiasValue'] = sprintf('%1$+.3f', $numerator / $denominator); } else { $fragments[0] = $numerator; $fragments[1] = $denominator; $exif_data['ExposureBiasValue'] = $numerator . '/' . $denominator; } } if (false !== ($value = self::_rational_to_string($exif_data['ExposureBiasValue'], '%1$+d', '%1$+d/%2$d', '%1$+.2f'))) { $new_data['ExposureBiasValue'] = $value; } } // ExposureBiasValue if (isset($exif_data['Flash'])) { $value = absint($exif_data['Flash']); if ($value & 0x1) { $new_data['Flash'] = __('Yes', 'media-library-assistant'); } else { $new_data['Flash'] = __('No', 'media-library-assistant'); } } // Flash if (isset($exif_data['FocalLength'])) { if (false !== ($value = self::_rational_to_string($exif_data['FocalLength'], '%1$d', '%1$d/%2$d', '%1$.2f'))) { $new_data['FocalLength'] = $value; } } // FocalLength if (isset($exif_data['ExposureTime'])) { if (false !== ($value = self::_rational_to_string($exif_data['ExposureTime'], '%1$d', '%1$d/%2$d', '%1$.2f'))) { $new_data['ExposureTime'] = $value; } } // ExposureTime /* * ShutterSpeed in "1/" format, from ExposureTime * Special logic for "fractional shutter speed" values 1.3, 1.5, 1.6, 2.5 */ if (isset($exif_data['ExposureTime'])) { $fragments = array_map('intval', explode('/', $exif_data['ExposureTime'])); if (!is_null($fragments[1] && $fragments[0])) { if (1 == $fragments[1]) { $new_data['ShutterSpeed'] = $new_data['ExposureTime'] = sprintf('%1$d', $fragments[0]); } elseif (0 != $fragments[1]) { $value = $fragments[0] / $fragments[1]; if (0 < $value && 1 > $value) { // Convert to "1/" value for shutter speed if (1 == $fragments[0]) { $new_data['ShutterSpeed'] = $new_data['ExposureTime']; } else { $test = (double) number_format(1.0 / $value, 1, '.', ''); if (in_array($test, array(1.3, 1.5, 1.6, 2.5))) { $new_data['ShutterSpeed'] = '1/' . number_format(1.0 / $value, 1, '.', ''); } else { $new_data['ShutterSpeed'] = '1/' . number_format(1.0 / $value, 0, '.', ''); } } } else { $new_data['ShutterSpeed'] = $new_data['ExposureTime'] = sprintf('%1$.2f', $value); } } // fractional value } // valid denominator and non-zero numerator } // ShutterSpeed if (isset($exif_data['UndefinedTag:0xA420'])) { $new_data['ImageUniqueID'] = $exif_data['UndefinedTag:0xA420']; } if (isset($exif_data['UndefinedTag:0xA430'])) { $new_data['CameraOwnerName'] = $exif_data['UndefinedTag:0xA430']; } if (isset($exif_data['UndefinedTag:0xA431'])) { $new_data['BodySerialNumber'] = $exif_data['UndefinedTag:0xA431']; } if (isset($exif_data['UndefinedTag:0xA432']) && is_array($exif_data['UndefinedTag:0xA432'])) { $array = $new_data['LensSpecification'] = $exif_data['UndefinedTag:0xA432']; if (isset($array[0])) { if (false !== ($value = self::_rational_to_string($array[0], '%1$d', '%1$d/%2$d', '%1$.2f'))) { $new_data['LensMinFocalLength'] = $value; } } if (isset($array[1])) { if (false !== ($value = self::_rational_to_string($array[1], '%1$d', '%1$d/%2$d', '%1$.2f'))) { $new_data['LensMaxFocalLength'] = $value; } } if (isset($array[2])) { if (false !== ($value = self::_rational_to_string($array[2], '%1$d', '%1$d/%2$d', '%1$.1f'))) { $new_data['LensMinFocalLengthFN'] = $value; } } if (isset($array[3])) { if (false !== ($value = self::_rational_to_string($array[3], '%1$d', '%1$d/%2$d', '%1$.1f'))) { $new_data['LensMaxFocalLengthFN'] = $value; } } } if (isset($exif_data['UndefinedTag:0xA433'])) { $new_data['LensMake'] = $exif_data['UndefinedTag:0xA433']; } if (isset($exif_data['UndefinedTag:0xA434'])) { $new_data['LensModel'] = $exif_data['UndefinedTag:0xA434']; } if (isset($exif_data['UndefinedTag:0xA435'])) { $new_data['LensSerialNumber'] = $exif_data['UndefinedTag:0xA435']; } if (!empty($new_data)) { $results['mla_exif_metadata']['CAMERA'] = $new_data; } /* * Expand EXIF GPS values */ $new_data = array(); if (isset($exif_data['GPSVersion'])) { $new_data['Version'] = sprintf('%1$d.%2$d.%3$d.%4$d', ord($exif_data['GPSVersion'][0]), ord($exif_data['GPSVersion'][1]), ord($exif_data['GPSVersion'][2]), ord($exif_data['GPSVersion'][3])); } if (isset($exif_data['GPSLatitudeRef'])) { $new_data['LatitudeRef'] = $exif_data['GPSLatitudeRef']; $new_data['LatitudeRefS'] = 'N' == $exif_data['GPSLatitudeRef'] ? '' : '-'; $ref = $new_data['LatitudeRef']; $refs = $new_data['LatitudeRefS']; } else { $ref = ''; $refs = ''; } if (isset($exif_data['GPSLatitude'])) { $rational = $exif_data['GPSLatitude']; $new_data['LatitudeD'] = $degrees = self::_rational_to_decimal($rational[0]); $new_data['LatitudeM'] = $minutes = self::_rational_to_decimal($rational[1]); $new_data['LatitudeS'] = sprintf('%1$01.4f', $seconds = self::_rational_to_decimal($rational[2])); $decimal_minutes = $minutes + $seconds / 60; $decimal_degrees = $decimal_minutes / 60; $new_data['Latitude'] = sprintf('%1$dd %2$d\' %3$01.4f" %4$s', $degrees, $minutes, $seconds, $ref); $new_data['LatitudeDM'] = sprintf('%1$d %2$01.4f', $degrees, $decimal_minutes); $new_data['LatitudeDD'] = sprintf('%1$01f', $degrees + $decimal_degrees); $new_data['LatitudeMinDec'] = substr($new_data['LatitudeDM'], strpos($new_data['LatitudeDM'], ' ') + 1); $new_data['LatitudeDegDec'] = substr($new_data['LatitudeDD'], strpos($new_data['LatitudeDD'], '.')); $new_data['LatitudeSDM'] = $refs . $new_data['LatitudeDM']; $new_data['LatitudeSDD'] = $refs . $new_data['LatitudeDD']; $new_data['LatitudeDM'] = $new_data['LatitudeDM'] . $ref; $new_data['LatitudeDD'] = $new_data['LatitudeDD'] . $ref; } if (isset($exif_data['GPSLongitudeRef'])) { $new_data['LongitudeRef'] = $exif_data['GPSLongitudeRef']; $new_data['LongitudeRefS'] = 'E' == $exif_data['GPSLongitudeRef'] ? '' : '-'; $ref = $new_data['LongitudeRef']; $refs = $new_data['LongitudeRefS']; } else { $ref = ''; $refs = ''; } if (isset($exif_data['GPSLongitude'])) { $rational = $exif_data['GPSLongitude']; $new_data['LongitudeD'] = $degrees = self::_rational_to_decimal($rational[0]); $new_data['LongitudeM'] = $minutes = self::_rational_to_decimal($rational[1]); $new_data['LongitudeS'] = sprintf('%1$01.4f', $seconds = self::_rational_to_decimal($rational[2])); $decimal_minutes = $minutes + $seconds / 60; $decimal_degrees = $decimal_minutes / 60; $new_data['Longitude'] = sprintf('%1$dd %2$d\' %3$01.4f" %4$s', $degrees, $minutes, $seconds, $ref); $new_data['LongitudeDM'] = sprintf('%1$d %2$01.4f', $degrees, $decimal_minutes); $new_data['LongitudeDD'] = sprintf('%1$01f', $degrees + $decimal_degrees); $new_data['LongitudeMinDec'] = substr($new_data['LongitudeDM'], strpos($new_data['LongitudeDM'], ' ') + 1); $new_data['LongitudeDegDec'] = substr($new_data['LongitudeDD'], strpos($new_data['LongitudeDD'], '.')); $new_data['LongitudeSDM'] = $refs . $new_data['LongitudeDM']; $new_data['LongitudeSDD'] = $refs . $new_data['LongitudeDD']; $new_data['LongitudeDM'] = $new_data['LongitudeDM'] . $ref; $new_data['LongitudeDD'] = $new_data['LongitudeDD'] . $ref; } if (isset($exif_data['GPSAltitudeRef'])) { $new_data['AltitudeRef'] = sprintf('%1$d', ord($exif_data['GPSAltitudeRef'][0])); $new_data['AltitudeRefS'] = '0' == $new_data['AltitudeRef'] ? '' : '-'; $refs = $new_data['AltitudeRefS']; } else { $refs = ''; } if (isset($exif_data['GPSAltitude'])) { $new_data['Altitude'] = sprintf('%1$s%2$01.4f', $refs, $meters = self::_rational_to_decimal($exif_data['GPSAltitude'])); $new_data['AltitudeFeet'] = sprintf('%1$s%2$01.2f', $refs, $meters * 3.280839895013); } if (isset($exif_data['GPSTimeStamp'])) { $rational = $exif_data['GPSTimeStamp']; $new_data['TimeStampH'] = sprintf('%1$02d', $hours = self::_rational_to_decimal($rational[0])); $new_data['TimeStampM'] = sprintf('%1$02d', $minutes = self::_rational_to_decimal($rational[1])); $new_data['TimeStampS'] = sprintf('%1$02d', $seconds = self::_rational_to_decimal($rational[2])); $new_data['TimeStamp'] = sprintf('%1$02d:%2$02d:%3$02d', $hours, $minutes, $seconds); } if (isset($exif_data['GPSDateStamp'])) { $parts = explode(':', $exif_data['GPSDateStamp']); $new_data['DateStampY'] = $parts[0]; $new_data['DateStampM'] = $parts[1]; $new_data['DateStampD'] = $parts[2]; $new_data['DateStamp'] = $exif_data['GPSDateStamp']; } if (isset($exif_data['GPSMapDatum'])) { $new_data['MapDatum'] = $exif_data['GPSMapDatum']; } if (!empty($new_data)) { $results['mla_exif_metadata']['GPS'] = $new_data; } /* * Expand EXIF array values - replaced by mla_find_array_element MLA v2.13 * / foreach ( $results['mla_exif_metadata'] as $exif_key => $exif_value ) { if ( is_array( $exif_value ) ) { foreach ( $exif_value as $key => $value ) { $results['mla_exif_metadata'][ $exif_key . '.' . $key ] = $value; } } // is_array } // */ return $results; }