/** * Optimizes the lexer definition. * * @return \FSHL\Generator * @throws \RuntimeException If the lexer definition is wrong. */ private function optimize() { $i = 0; foreach (array_keys($this->lexer->getStates()) as $stateName) { if (self::STATE_QUIT === $stateName) { continue; } $this->states[$stateName] = $i; $i++; } $this->states[self::STATE_RETURN] = $i++; $this->states[self::STATE_QUIT] = $i++; foreach ($this->lexer->getStates() as $stateName => $state) { $stateId = $this->states[$stateName]; $this->classes[$stateId] = $state[self::STATE_INDEX_CLASS]; $this->flags[$stateId] = $state[self::STATE_INDEX_FLAGS]; $this->data[$stateId] = $state[self::STATE_INDEX_DATA]; if (is_array($state[self::STATE_INDEX_DIAGRAM])) { $i = 0; foreach ($state[self::STATE_INDEX_DIAGRAM] as $delimiter => $trans) { $transName = $trans[self::STATE_DIAGRAM_INDEX_STATE]; if (self::STATE_SELF === $transName) { $transName = array_search($stateId, $this->states); } if (!isset($this->states[$transName])) { throw new \RuntimeException(sprintf('Unknown state in transition %s [%s] => %s', $stateName, $delimiter, $transName)); } $this->delimiters[$stateId][$i] = $delimiter; $trans[self::STATE_DIAGRAM_INDEX_STATE] = $this->states[$transName]; $this->trans[$stateId][$i] = $trans; $i++; } } else { $this->delimiters[$stateId] = null; $this->trans[$stateId] = null; } } if (!isset($this->states[$this->lexer->getInitialState()])) { throw new \RuntimeException(sprintf('Unknown initial state "%s"', $this->lexer->getInitialState())); } return $this; }
/** * Generates a state code. * * @param integer $state * * @return string */ private function generateState($state) { // Delimiter => Condition static $commonDelimiters = array('ALL' => true, 'LINE' => "\n", 'TAB' => "\t", 'SPACE' => 'preg_match(\'~^\\s+~\', $part, $matches)', '!SPACE' => 'preg_match(\'~^\\\\S+~\', $part, $matches)', 'ALPHA' => 'preg_match(\'~^[a-z]+~i\', $part, $matches)', '!ALPHA' => 'preg_match(\'~^[^a-z]+~i\', $part, $matches)', 'ALNUM' => 'preg_match(\'~^[a-z\\\\d]+~i\', $part, $matches)', '!ALNUM' => 'preg_match(\'~^[^a-z\\\\d]+~i\', $part, $matches)', 'ALNUM_' => 'preg_match(\'~^\\\\w+~\', $part, $matches)', '!ALNUM_' => 'preg_match(\'~^\\\\W+~\', $part, $matches)', 'NUM' => 'preg_match(\'~^\\\\d+~\', $part, $matches)', '!NUM' => 'preg_match(\'~^\\\\D+~\', $part, $matches)', 'HEXNUM' => 'preg_match(\'~^[a-f\\\\d]+~i\', $part, $matches)', '!HEXNUM' => 'preg_match(\'~^[^a-f\\\\d]+~i\', $part, $matches)', 'DOTNUM' => 'preg_match(\'~^\\.\\\\d+~\', $part, $matches)', '!DOTNUM' => 'preg_match(\'~^(?:[^\\.]|\\.\\\\D)~\', $part, $matches)'); $allDelimiters = array_merge($commonDelimiters, $this->lexer->getDelimiters()); $conditionsSource = ''; $delimiters = array(); foreach ($this->delimiters[$state] as $no => $delimiter) { if ('ALL' === $delimiter) { $conditionSource = <<<CONDITION \t\t\treturn array({$no}, \$letter, \$buffer); CONDITION; } else { if (isset($allDelimiters[$delimiter]) && 0 === strpos($allDelimiters[$delimiter], 'preg_match')) { $delimiterSource = '$matches[0]'; $condition = $allDelimiters[$delimiter]; } else { if (isset($allDelimiters[$delimiter])) { $delimiter = $allDelimiters[$delimiter]; } $delimiters[$no] = $delimiter; $delimiterSource = sprintf('$delimiters[%d]', $no); if (1 === strlen($delimiter)) { $condition = sprintf('$delimiters[%d] === $letter', $no); } else { $condition = sprintf('0 === strpos($part, $delimiters[%d])', $no); } } $conditionSource = <<<CONDITION \t\t\tif ({$condition}) { \t\t\t\treturn array({$no}, {$delimiterSource}, \$buffer); \t\t\t} CONDITION; } $conditionsSource .= $conditionSource; } $partSource = preg_match('~\\$part~', $conditionsSource) ? 'substr($text, $textPos, 10)' : ''; if (preg_match('~\\$letter~', $conditionsSource)) { $letterSource = '$text[$textPos]'; $bufferSource = '$letter'; } else { $letterSource = ''; $bufferSource = '$text[$textPos]'; } $source = ' /** * Finds a delimiter for state ' . array_search($state, $this->states) . '. * * @param string $text * @param string $textLength * @param string $textPos * @return array */ public function findDelimiter' . $state . '($text, $textLength, $textPos) { ' . (!empty($delimiters) ? sprintf('static $delimiters = %s;', $this->getVarValueSource($delimiters)) : '') . ' $buffer = false; while ($textPos < $textLength) { ' . (!empty($partSource) ? sprintf('$part = %s;', $partSource) : '') . ' ' . (!empty($letterSource) ? sprintf('$letter = %s;', $letterSource) : '') . ' ' . $conditionsSource . ' $buffer .= ' . $bufferSource . '; $textPos++; } return array(-1, -1, $buffer); } '; // Removes traling whitespaces and unnecessary empty lines $source = preg_replace('~\\n{3,}~', "\n\n", preg_replace('~\\t+\\n~', "\n", $source)); return $source; }