Пример #1
0
 /**
  * Extract Metadata from a PDF file
  * 
  * @since 1.50
  *
  * @param	string	full path to the desired file
  *
  * @return	array	( key => value ) for each metadata field, in string format
  */
 private static function _extract_pdf_metadata($file_name)
 {
     $metadata = array();
     self::$pdf_indirect_objects = NULL;
     $chunksize = 16384;
     if (!file_exists($file_name)) {
         return $metadata;
     }
     $filesize = filesize($file_name);
     $file_offset = $chunksize < $filesize ? $filesize - $chunksize : 0;
     $tail = file_get_contents($file_name, false, NULL, $file_offset);
     if (0 == $file_offset) {
         $header = substr($tail, 0, 128);
     } else {
         $header = file_get_contents($file_name, false, NULL, 0, 128);
     }
     if ('%PDF-' == substr($header, 0, 5)) {
         $metadata['PDF_Version'] = substr($header, 1, 7);
         $metadata['PDF_VersionNumber'] = substr($header, 5, 3);
     }
     /*
      * Find the xref and (optional) trailer
      */
     $match_count = preg_match_all('/startxref[\\x00-\\x20]+(\\d+)[\\x00-\\x20]+\\%\\%EOF/', $tail, $matches, PREG_OFFSET_CAPTURE);
     if (0 == $match_count) {
         error_log('ERROR: startxref not found ' . var_export($path, true), 0);
         return $metadata;
     }
     $startxref = (int) $matches[1][$match_count - 1][0];
     $trailer_dictionaries = self::_extract_pdf_trailer($file_name, $startxref);
     if (is_array($trailer_dictionaries)) {
         $info_reference = NULL;
         foreach ($trailer_dictionaries as $trailer_dictionary) {
             if (isset($trailer_dictionary['Info'])) {
                 $info_reference = $trailer_dictionary['Info'];
                 break;
             }
         }
         if (isset($info_reference)) {
             $info_object = self::_find_pdf_indirect_dictionary($file_name, $info_reference['object'], $info_reference['generation']);
             if ($info_object) {
                 $info_dictionary = self::_parse_pdf_dictionary($info_object['content'], 0);
                 unset($info_dictionary['/length']);
                 foreach ($info_dictionary as $name => $value) {
                     if ('string' == $value['type']) {
                         $prefix = substr($value['value'], 0, 2);
                         if ('D:' == $prefix) {
                             $metadata[$name] = self::_parse_pdf_date($value['value']);
                         } elseif (chr(0xfe) . chr(0xff) == $prefix) {
                             $metadata[$name] = self::_parse_pdf_UTF16BE($value['value']);
                         } else {
                             $metadata[$name] = $value['value'];
                         }
                     } else {
                         $metadata[$name] = $value['value'];
                     }
                 }
                 // each info entry
             }
             // found Info object
         }
         // found Info reference
         /*
          * Look for XMP Metadata
          */
         $root_reference = NULL;
         foreach ($trailer_dictionaries as $trailer_dictionary) {
             if (isset($trailer_dictionary['Root'])) {
                 $root_reference = $trailer_dictionary['Root'];
                 break;
             }
         }
         if (isset($root_reference)) {
             $root_object = self::_find_pdf_indirect_dictionary($file_name, $root_reference['object'], $root_reference['generation']);
             if ($root_object) {
                 $root_dictionary = self::_parse_pdf_dictionary($root_object['content'], 0);
                 unset($root_dictionary['/length']);
                 if (isset($root_dictionary['Metadata'])) {
                     $xmp_object = self::_find_pdf_indirect_dictionary($file_name, $root_dictionary['Metadata']['object'], $root_dictionary['Metadata']['generation']);
                     $xmp = self::_parse_xmp_metadata($file_name, $xmp_object['start'] + $xmp_object['length']);
                     if (is_array($xmp)) {
                         $metadata = array_merge($metadata, $xmp);
                     }
                 }
                 // found Metadata reference
             }
             // found Root object
         }
         // found Root reference
     }
     // found trailer_dictionaries
     return $metadata;
 }