예제 #1
0
    /**
     * Extracting the data of one content
     *
     * The parse function splits the content into its entries.
     * Then every entry is parsed by this function.
     * It parses the entry backwards.
     * First the last '=' is searched and the value extracted from that.
     * A copy is made of the entry if warnings should be generated. This takes quite
     * some memory but it is needed to get good warnings. If nor warnings are generated
     * then you don have to worry about memory.
     * Then the last ',' is searched and the field extracted from that.
     * Again the entry is shortened.
     * Finally after all field=>value pairs the cite and type is extraced and the
     * authors are splitted.
     * If there is a problem false is returned.
     *
     * @access private
     * @param string $entry The entry
     * @return array The representation of the entry or false if there is a problem
     */
    function _parseEntry($entry)
    {
        $entrycopy = '';
        if ($this->_options['validate']) {
            $entrycopy = $entry; //We need a copy for printing the warnings
        }
        $ret = array('bibtex' => $entry.'}');
        if ('@string' ==  strtolower(substr($entry, 0, 7))) {
            //String are not yet supported!
            if ($this->_options['validate']) {
                $this->_generateWarning('STRING_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
            }
        } elseif ('@preamble' ==  strtolower(substr($entry, 0, 9))) {
            //Preamble not yet supported!
            if ($this->_options['validate']) {
                $this->_generateWarning('PREAMBLE_ENTRY_NOT_YET_SUPPORTED', '', $entry.'}');
            }
        }  elseif ('@comment' ==  strtolower(substr($entry, 0, 8))) {
	  // Just ignores
	}  else {
            // Look for key
            $matches = array();
            preg_match('/^@\w+\{([\w\d]+),/' ,$entry, $matches);
            if ( count($matches) > 0 )
            {
              $ret['entrykey'] = $matches[1];
            }

            //Parsing all fields
            while (strrpos($entry,'=') !== false) {
                $position = strrpos($entry, '=');
                //Checking that the equal sign is not quoted or is not inside a equation (For example in an abstract)
                $proceed  = true;
                if (substr($entry, $position-1, 1) == '\\') {
                    $proceed = false;
                }
                if ($proceed) {
                    $proceed = $this->_checkEqualSign($entry, $position);
                }
                while (!$proceed) {
                    $substring = substr($entry, 0, $position);
                    $position  = strrpos($substring,'=');
                    $proceed   = true;
                    if (substr($entry, $position-1, 1) == '\\') {
                        $proceed = false;
                    }
                    if ($proceed) {
                        $proceed = $this->_checkEqualSign($entry, $position);
                    }
                }

                $value = trim(substr($entry, $position+1));
                $entry = substr($entry, 0, $position);

                if (',' == substr($value, strlen($value)-1, 1)) {
                    $value = substr($value, 0, -1);
                }
                if ($this->_options['validate']) {
                    $this->_validateValue($value, $entrycopy);
                }
                if ($this->_options['stripDelimiter']) {
                    $value = $this->_stripDelimiter($value);
                }
                if ($this->_options['unwrap']) {
                    $value = $this->_unwrap($value);
                }
                if ($this->_options['removeCurlyBraces']) {
                    $value = $this->_removeCurlyBraces($value);
                }
                $position    = strrpos($entry, ',');
                $field       = strtolower(trim(substr($entry, $position+1)));
                $ret[$field] = $value;
                $entry       = substr($entry, 0, $position);
            }
            //Parsing cite and entry type
            $arr = explode('{', $entry);
            $ret['cite'] = trim($arr[1]);
            $ret['entrytype'] = strtolower(trim($arr[0]));
            if ('@' == $ret['entrytype']{0}) {
                $ret['entrytype'] = substr($ret['entrytype'], 1);
            }
            if ($this->_options['validate']) {
                if (!$this->_checkAllowedEntryType($ret['entrytype'])) {
                    $this->_generateWarning('WARNING_NOT_ALLOWED_ENTRY_TYPE', $ret['entrytype'], $entry.'}');
                }
            }

	    // Process accents
	    foreach($ret as $key => &$value) 
	      if ($key != "bibtex")
	      Structures_BibTex::process_accents($value);
	    
	    // Handling pages
            if (in_array('pages', array_keys($ret))) {
	      $matches = array();
	      if (preg_match("/^\s*(\d+)(?:\s*--?\s*(\d+))?\s*$/", $ret['pages'], $matches)) {
		$ret['pages'] = new BibtexPages($matches[1], $matches[2]);
	      }
            }

            //Handling the authors
            if (in_array('author', array_keys($ret)) && $this->_options['extractAuthors']) {
	      $ret['author'] = $this->_extractAuthors($ret['author']);
            }
            //Handling the editors
            if (in_array('editor', array_keys($ret)) && $this->_options['extractAuthors']) {
                $ret['editor'] = $this->_extractAuthors($ret['editor']);
            }
        }
        return $ret;
    }