/** * Parse a PDF dictionary object * * Returns an array of dictionary contents, classified by object type: boolean, numeric, string, hex (string), * indirect (object), name, array, dictionary, stream, and null. * The array also has a '/length' element containing the number of bytes occupied by the * dictionary in the source string, excluding the enclosing delimiters. * * @since 2.10 * * @param string data within which the string occurs * @param integer offset within the source string of the opening '<<' characters or the first content character. * * @return array ( '/length' => length, key => array( 'type' => type, 'value' => value ) ) for each dictionary field */ private static function _parse_pdf_dictionary(&$source_string, $offset) { /* * Find the end of the dictionary */ if ('<<' == substr($source_string, $offset, 2)) { $nest = $offset + 2; } else { $nest = $offset; } $level = 1; do { $dictionary_end = strpos($source_string, '>>', $nest); if (false === $dictionary_end) { /* translators: 1: ERROR tag 2: source offset 3: nest level */ error_log(sprintf(_x('%1$s: _parse_pdf_dictionary offset = %2$d, nest = %3$d.', 'error_log', 'media-library-assistant'), __('ERROR', 'media-library-assistant'), $offset, $nest), 0); /* translators: 1: ERROR tag 2: dictionary excerpt */ error_log(sprintf(_x('%1$s: _parse_pdf_dictionary no end delimiter dump = %2$s.', 'error_log', 'media-library-assistant'), __('ERROR', 'media-library-assistant'), MLAData::mla_hex_dump(substr($source_string, $offset, 128), 128, 16)), 0); return array('/length' => 0); } $nest = strpos($source_string, '<<', $nest); if (false === $nest) { $nest = $dictionary_end + 2; $level--; } elseif ($nest < $dictionary_end) { $nest += 2; $level++; } else { $nest = $dictionary_end + 2; $level--; } } while ($level); $dictionary_length = $dictionary_end + 2 - $offset; $dictionary = array(); // \x00-\x20 for whitespace // \(|\)|\<|\>|\[|\]|\{|\}|\/|\% for delimiters $match_count = preg_match_all('!/([^\\x00-\\x20|\\(|\\)|\\<|\\>|\\[|\\]|\\{|\\}|\\/|\\%]*)([\\x00-\\x20]*)!', substr($source_string, $offset, $dictionary_length), $matches, PREG_OFFSET_CAPTURE); $end_data = -1; for ($match_index = 0; $match_index < $match_count; $match_index++) { $name = $matches[1][$match_index][0]; $value_start = $offset + $matches[2][$match_index][1] + strlen($matches[2][$match_index][0]); /* * Skip over false matches within a string or nested dictionary */ if ($value_start < $end_data) { continue; } $end_data = -1; $value_count = preg_match('!(\\/?[^\\/\\x0D\\x0A]*)!', substr($source_string, $value_start, $dictionary_end - $value_start), $value_matches, PREG_OFFSET_CAPTURE); if (1 == $value_count) { $value = trim($value_matches[0][0]); $length = strlen($value); $dictionary[$name]['value'] = $value; if (!isset($value[0])) { /* translators: 1: ERROR tag 2: entry name 3: value excerpt */ error_log(sprintf(_x('%1$s: _parse_pdf_dictionary bad value [ %2$s ] dump = %3$s', 'error_log', 'media-library-assistant'), __('ERROR', 'media-library-assistant'), $name, MLAData::mla_hex_dump($value, 32, 16)), 0); continue; } if (in_array($value, array('true', 'false'))) { $dictionary[$name]['type'] = 'boolean'; } elseif (is_numeric($value)) { $dictionary[$name]['type'] = 'numeric'; } elseif ('(' == $value[0]) { $dictionary[$name] = self::_parse_pdf_string($source_string, $value_start); $end_data = $value_start + $dictionary[$name]['/length']; unset($dictionary[$name]['/length']); } elseif ('<' == $value[0]) { if ('<' == $value[1]) { $dictionary[$name]['value'] = self::_parse_pdf_dictionary($source_string, $value_start); $dictionary[$name]['type'] = 'dictionary'; $end_data = $value_start + 4 + $dictionary[$name]['value']['/length']; unset($dictionary[$name]['value']['/length']); } else { $dictionary[$name]['type'] = 'hex'; } } elseif ('/' == $value[0]) { $dictionary[$name]['value'] = substr($value, 1); $dictionary[$name]['type'] = 'name'; $match_index++; // Skip to the next key } elseif ('[' == $value[0]) { $dictionary[$name]['type'] = 'array'; $array_length = strpos($source_string, ']', $value_start) - ($value_start + 1); $dictionary[$name]['value'] = substr($source_string, $value_start + 1, $array_length); $end_data = 2 + $value_start + $array_length; } elseif ('null' == $value) { $dictionary[$name]['type'] = 'null'; } elseif ('stream' == substr($value, 0, 6)) { $dictionary[$name]['type'] = 'stream'; } else { $object_count = preg_match('!(\\d+)\\h+(\\d+)\\h+R!', $value, $object_matches); if (1 == $object_count) { $dictionary[$name]['type'] = 'indirect'; $dictionary[$name]['object'] = $object_matches[1]; $dictionary[$name]['generation'] = $object_matches[2]; } else { $dictionary[$name]['type'] = 'unknown'; } } } else { $dictionary[$matches[1][$match_index][0]] = array('value' => ''); $dictionary[$matches[1][$match_index][0]]['type'] = 'nomatch'; } } // foreach match $dictionary['/length'] = $dictionary_length; return $dictionary; }