/** * Extracting the data of one content * * The parse function splits the content into its entries. * Then every entry is parsed by this function. * It parses the entry backwards. * First the last '=' is searched and the value extracted from that. * A copy is made of the entry if warnings should be generated. This takes quite * some memory but it is needed to get good warnings. If nor warnings are generated * then you don have to worry about memory. * Then the last ',' is searched and the field extracted from that. * Again the entry is shortened. * Finally after all field=>value pairs the cite and type is extraced and the * authors are splitted. * If there is a problem false is returned. * * @access private * @param string $entry The entry * @return array The representation of the entry or false if there is a problem */ function _parseEntry($entry) { $entrycopy = ''; if ($this->_options['validate']) { $entrycopy = $entry; //We need a copy for printing the warnings } $ret = array('bibtex' => $entry.'}'); if ('@string' == strtolower(substr($entry, 0, 7))) { //String are not yet supported! if ($this->_options['validate']) { $this->_generateWarning('STRING_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}'); } } elseif ('@preamble' == strtolower(substr($entry, 0, 9))) { //Preamble not yet supported! if ($this->_options['validate']) { $this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}'); } } elseif ('@comment' == strtolower(substr($entry, 0, 8))) { // Just ignores } else { // Look for key $matches = array(); preg_match('/^@\w+\{([\w\d]+),/' ,$entry, $matches); if ( count($matches) > 0 ) { $ret['entrykey'] = $matches[1]; } //Parsing all fields while (strrpos($entry,'=') !== false) { $position = strrpos($entry, '='); //Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract) $proceed = true; if (substr($entry, $position-1, 1) == '\\') { $proceed = false; } if ($proceed) { $proceed = $this->_checkEqualSign($entry, $position); } while (!$proceed) { $substring = substr($entry, 0, $position); $position = strrpos($substring,'='); $proceed = true; if (substr($entry, $position-1, 1) == '\\') { $proceed = false; } if ($proceed) { $proceed = $this->_checkEqualSign($entry, $position); } } $value = trim(substr($entry, $position+1)); $entry = substr($entry, 0, $position); if (',' == substr($value, strlen($value)-1, 1)) { $value = substr($value, 0, -1); } if ($this->_options['validate']) { $this->_validateValue($value, $entrycopy); } if ($this->_options['stripDelimiter']) { $value = $this->_stripDelimiter($value); } if ($this->_options['unwrap']) { $value = $this->_unwrap($value); } if ($this->_options['removeCurlyBraces']) { $value = $this->_removeCurlyBraces($value); } $position = strrpos($entry, ','); $field = strtolower(trim(substr($entry, $position+1))); $ret[$field] = $value; $entry = substr($entry, 0, $position); } //Parsing cite and entry type $arr = explode('{', $entry); $ret['cite'] = trim($arr[1]); $ret['entrytype'] = strtolower(trim($arr[0])); if ('@' == $ret['entrytype']{0}) { $ret['entrytype'] = substr($ret['entrytype'], 1); } if ($this->_options['validate']) { if (!$this->_checkAllowedEntryType($ret['entrytype'])) { $this->_generateWarning('WARNING_NOT_ALLOWED_ENTRY_TYPE', $ret['entrytype'], $entry.'}'); } } // Process accents foreach($ret as $key => &$value) if ($key != "bibtex") Structures_BibTex::process_accents($value); // Handling pages if (in_array('pages', array_keys($ret))) { $matches = array(); if (preg_match("/^\s*(\d+)(?:\s*--?\s*(\d+))?\s*$/", $ret['pages'], $matches)) { $ret['pages'] = new BibtexPages($matches[1], $matches[2]); } } //Handling the authors if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) { $ret['author'] = $this->_extractAuthors($ret['author']); } //Handling the editors if (in_array('editor', array_keys($ret)) && $this->_options['extractAuthors']) { $ret['editor'] = $this->_extractAuthors($ret['editor']); } } return $ret; }