Author: André Rothe (andre.rothe@phosco.info)
 public function process($tokenList, $token_category = 'INSERT')
 {
     $table = '';
     $cols = false;
     $comments = array();
     foreach ($tokenList as $key => &$token) {
         if ($key == 'VALUES') {
             continue;
         }
         foreach ($token as &$value) {
             if ($this->isCommentToken($value)) {
                 $comments[] = parent::processComment($value);
                 $value = '';
             }
         }
     }
     $parsed = $this->processOptions($tokenList);
     unset($tokenList['OPTIONS']);
     list($table, $cols, $key) = $this->processKeyword('INTO', $tokenList);
     $parsed = array_merge($parsed, $key);
     unset($tokenList['INTO']);
     if ($table === '' && in_array($token_category, array('INSERT', 'REPLACE'))) {
         list($table, $cols, $key) = $this->processKeyword($token_category, $tokenList);
     }
     $parsed[] = array('expr_type' => ExpressionType::TABLE, 'table' => $table, 'no_quotes' => $this->revokeQuotation($table), 'alias' => false, 'base_expr' => $table);
     $cols = $this->processColumns($cols);
     if ($cols !== false) {
         $parsed[] = $cols;
     }
     $parsed = array_merge($parsed, $comments);
     $tokenList[$token_category] = $parsed;
     return $tokenList;
 }
 public function process($tokens)
 {
     $parseInfo = $this->initParseInfo();
     $expr = array();
     $token_category = '';
     $prevToken = '';
     $skip_next = false;
     $i = 0;
     foreach ($tokens as $token) {
         $upper = strtoupper(trim($token));
         if ($skip_next && $token !== "") {
             $parseInfo['token_count']++;
             $skip_next = false;
             continue;
         } else {
             if ($skip_next) {
                 continue;
             }
         }
         if ($this->isCommentToken($token)) {
             $expr[] = parent::processComment($token);
             continue;
         }
         switch ($upper) {
             case 'CROSS':
             case ',':
             case 'INNER':
             case 'STRAIGHT_JOIN':
                 break;
             case 'OUTER':
             case 'JOIN':
                 if ($token_category === 'LEFT' || $token_category === 'RIGHT' || $token_category === 'NATURAL') {
                     $token_category = '';
                     $parseInfo['next_join_type'] = strtoupper(trim($prevToken));
                     // it seems to be a join
                 }
                 break;
             case 'LEFT':
             case 'RIGHT':
             case 'NATURAL':
                 $token_category = $upper;
                 $prevToken = $token;
                 $i++;
                 continue 2;
             default:
                 if ($token_category === 'LEFT' || $token_category === 'RIGHT') {
                     if ($upper === '') {
                         $prevToken .= $token;
                         break;
                     } else {
                         $token_category = '';
                         // it seems to be a function
                         $parseInfo['expression'] .= $prevToken;
                         if ($parseInfo['ref_type'] !== false) {
                             // all after ON / USING
                             $parseInfo['ref_expr'] .= $prevToken;
                         }
                         $prevToken = '';
                     }
                 }
                 $parseInfo['expression'] .= $token;
                 if ($parseInfo['ref_type'] !== false) {
                     // all after ON / USING
                     $parseInfo['ref_expr'] .= $token;
                 }
                 break;
         }
         if ($upper === '') {
             $i++;
             continue;
         }
         switch ($upper) {
             case 'AS':
                 $parseInfo['alias'] = array('as' => true, 'name' => "", 'base_expr' => $token);
                 $parseInfo['token_count']++;
                 $n = 1;
                 $str = "";
                 while ($str === "" && isset($tokens[$i + $n])) {
                     $parseInfo['alias']['base_expr'] .= $tokens[$i + $n] === "" ? " " : $tokens[$i + $n];
                     $str = trim($tokens[$i + $n]);
                     ++$n;
                 }
                 $parseInfo['alias']['name'] = $str;
                 $parseInfo['alias']['no_quotes'] = $this->revokeQuotation($str);
                 $parseInfo['alias']['base_expr'] = trim($parseInfo['alias']['base_expr']);
                 continue;
             case 'IGNORE':
             case 'USE':
             case 'FORCE':
                 $token_category = 'IDX_HINT';
                 $parseInfo['hints'][]['hint_type'] = $upper;
                 continue 2;
             case 'KEY':
             case 'INDEX':
                 if ($token_category === 'CREATE') {
                     $token_category = $upper;
                     // TODO: what is it for a statement?
                     continue 2;
                 }
                 if ($token_category === 'IDX_HINT') {
                     $cur_hint = count($parseInfo['hints']) - 1;
                     $parseInfo['hints'][$cur_hint]['hint_type'] .= " " . $upper;
                     continue 2;
                 }
                 break;
             case 'USING':
             case 'ON':
                 $parseInfo['ref_type'] = $upper;
                 $parseInfo['ref_expr'] = "";
             case 'CROSS':
             case 'INNER':
             case 'OUTER':
             case 'NATURAL':
                 $parseInfo['token_count']++;
                 continue;
             case 'FOR':
                 $parseInfo['token_count']++;
                 $skip_next = true;
                 continue;
             case 'STRAIGHT_JOIN':
                 $parseInfo['next_join_type'] = "STRAIGHT_JOIN";
                 if ($parseInfo['subquery']) {
                     $parseInfo['sub_tree'] = $this->parse($this->removeParenthesisFromStart($parseInfo['subquery']));
                     $parseInfo['expression'] = $parseInfo['subquery'];
                 }
                 $expr[] = $this->processFromExpression($parseInfo);
                 $parseInfo = $this->initParseInfo($parseInfo);
                 break;
             case ',':
                 $parseInfo['next_join_type'] = 'CROSS';
             case 'JOIN':
                 if ($parseInfo['subquery']) {
                     $parseInfo['sub_tree'] = $this->parse($this->removeParenthesisFromStart($parseInfo['subquery']));
                     $parseInfo['expression'] = $parseInfo['subquery'];
                 }
                 $expr[] = $this->processFromExpression($parseInfo);
                 $parseInfo = $this->initParseInfo($parseInfo);
                 break;
             default:
                 // TODO: enhance it, so we can have base_expr to calculate the position of the keywords
                 // build a subtree under "hints"
                 if ($token_category === 'IDX_HINT') {
                     $token_category = '';
                     $cur_hint = count($parseInfo['hints']) - 1;
                     $parseInfo['hints'][$cur_hint]['hint_list'] = $token;
                     continue;
                 }
                 if ($parseInfo['token_count'] === 0) {
                     if ($parseInfo['table'] === "") {
                         $parseInfo['table'] = $token;
                         $parseInfo['no_quotes'] = $this->revokeQuotation($token);
                     }
                 } else {
                     if ($parseInfo['token_count'] === 1) {
                         $parseInfo['alias'] = array('as' => false, 'name' => trim($token), 'no_quotes' => $this->revokeQuotation($token), 'base_expr' => trim($token));
                     }
                 }
                 $parseInfo['token_count']++;
                 break;
         }
         $i++;
     }
     $expr[] = $this->processFromExpression($parseInfo);
     return $expr;
 }
 public function process($tokens)
 {
     $currCategory = '';
     $parsed = array();
     $base_expr = '';
     foreach ($tokens['VALUES'] as $k => $v) {
         if ($this->isWhitespaceToken($v)) {
             continue;
         }
         if ($this->isCommentToken($v)) {
             $parsed[] = parent::processComment($v);
             continue;
         }
         $base_expr .= $v;
         $trim = trim($v);
         $upper = strtoupper($trim);
         switch ($upper) {
             case 'ON':
                 if ($currCategory === '') {
                     $base_expr = trim(substr($base_expr, 0, -strlen($v)));
                     $parsed[] = array('expr_type' => ExpressionType::RECORD, 'base_expr' => $base_expr, 'data' => $this->processRecord($base_expr), 'delim' => false);
                     $base_expr = '';
                     $currCategory = 'DUPLICATE';
                     $parsed[] = array('expr_type' => ExpressionType::RESERVED, 'base_expr' => $trim);
                 }
                 // else ?
                 break;
             case 'DUPLICATE':
             case 'KEY':
             case 'UPDATE':
                 if ($currCategory === 'DUPLICATE') {
                     $parsed[] = array('expr_type' => ExpressionType::RESERVED, 'base_expr' => $trim);
                     $base_expr = '';
                 }
                 // else ?
                 break;
             case ',':
                 if ($currCategory === 'DUPLICATE') {
                     $base_expr = trim(substr($base_expr, 0, -strlen($v)));
                     $res = $this->processExpressionList($this->splitSQLIntoTokens($base_expr));
                     $parsed[] = array('expr_type' => ExpressionType::EXPRESSION, 'base_expr' => $base_expr, 'sub_tree' => empty($res) ? false : $res, 'delim' => $trim);
                     $base_expr = '';
                     continue 2;
                 }
                 $parsed[] = array('expr_type' => ExpressionType::RECORD, 'base_expr' => trim($base_expr), 'data' => $this->processRecord(trim($base_expr)), 'delim' => $trim);
                 $base_expr = '';
                 break;
             default:
                 break;
         }
     }
     if (trim($base_expr) !== '') {
         if ($currCategory === '') {
             $parsed[] = array('expr_type' => ExpressionType::RECORD, 'base_expr' => trim($base_expr), 'data' => $this->processRecord(trim($base_expr)), 'delim' => false);
         }
         if ($currCategory === 'DUPLICATE') {
             $res = $this->processExpressionList($this->splitSQLIntoTokens($base_expr));
             $parsed[] = array('expr_type' => ExpressionType::EXPRESSION, 'base_expr' => trim($base_expr), 'sub_tree' => empty($res) ? false : $res, 'delim' => false);
         }
     }
     $tokens['VALUES'] = $parsed;
     return $tokens;
 }
 public function process($tokens)
 {
     $resultList = array();
     $skip_next = false;
     $prev = new ExpressionToken();
     foreach ($tokens as $k => $v) {
         if ($this->isCommentToken($v)) {
             $resultList[] = parent::processComment($v);
             continue;
         }
         $curr = new ExpressionToken($k, $v);
         if ($curr->isWhitespaceToken()) {
             continue;
         }
         if ($skip_next) {
             // skip the next non-whitespace token
             $skip_next = false;
             continue;
         }
         /* is it a subquery? */
         if ($curr->isSubQueryToken()) {
             $processor = new DefaultProcessor($this->options);
             $curr->setSubTree($processor->process($this->removeParenthesisFromStart($curr->getTrim())));
             $curr->setTokenType(ExpressionType::SUBQUERY);
         } elseif ($curr->isEnclosedWithinParenthesis()) {
             /* is it an in-list? */
             $localTokenList = $this->splitSQLIntoTokens($this->removeParenthesisFromStart($curr->getTrim()));
             if ($prev->getUpper() === 'IN') {
                 foreach ($localTokenList as $k => $v) {
                     $tmpToken = new ExpressionToken($k, $v);
                     if ($tmpToken->isCommaToken()) {
                         unset($localTokenList[$k]);
                     }
                 }
                 $localTokenList = array_values($localTokenList);
                 $curr->setSubTree($this->process($localTokenList));
                 $curr->setTokenType(ExpressionType::IN_LIST);
             } elseif ($prev->getUpper() === 'AGAINST') {
                 $match_mode = false;
                 foreach ($localTokenList as $k => $v) {
                     $tmpToken = new ExpressionToken($k, $v);
                     switch ($tmpToken->getUpper()) {
                         case 'WITH':
                             $match_mode = 'WITH QUERY EXPANSION';
                             break;
                         case 'IN':
                             $match_mode = 'IN BOOLEAN MODE';
                             break;
                         default:
                     }
                     if ($match_mode !== false) {
                         unset($localTokenList[$k]);
                     }
                 }
                 $tmpToken = $this->process($localTokenList);
                 if ($match_mode !== false) {
                     $match_mode = new ExpressionToken(0, $match_mode);
                     $match_mode->setTokenType(ExpressionType::MATCH_MODE);
                     $tmpToken[] = $match_mode->toArray();
                 }
                 $curr->setSubTree($tmpToken);
                 $curr->setTokenType(ExpressionType::MATCH_ARGUMENTS);
                 $prev->setTokenType(ExpressionType::SIMPLE_FUNCTION);
             } elseif ($prev->isColumnReference() || $prev->isFunction() || $prev->isAggregateFunction() || $prev->isCustomFunction()) {
                 // if we have a colref followed by a parenthesis pair,
                 // it isn't a colref, it is a user-function
                 // TODO: this should be a method, because we need the same code
                 // below for unspecified tokens (expressions).
                 $localExpr = new ExpressionToken();
                 $tmpExprList = array();
                 foreach ($localTokenList as $k => $v) {
                     $tmpToken = new ExpressionToken($k, $v);
                     if (!$tmpToken->isCommaToken()) {
                         $localExpr->addToken($v);
                         $tmpExprList[] = $v;
                     } else {
                         // an expression could have multiple parts split by operands
                         // if we have a comma, it is a split-point for expressions
                         $tmpExprList = array_values($tmpExprList);
                         $localExprList = $this->process($tmpExprList);
                         if (count($localExprList) > 1) {
                             $localExpr->setSubTree($localExprList);
                             $localExpr->setTokenType(ExpressionType::EXPRESSION);
                             $localExprList = $localExpr->toArray();
                             $localExprList['alias'] = false;
                             $localExprList = array($localExprList);
                         }
                         if (!$curr->getSubTree()) {
                             if (!empty($localExprList)) {
                                 $curr->setSubTree($localExprList);
                             }
                         } else {
                             $tmpExprList = $curr->getSubTree();
                             $curr->setSubTree(array_merge($tmpExprList, $localExprList));
                         }
                         $tmpExprList = array();
                         $localExpr = new ExpressionToken();
                     }
                 }
                 $tmpExprList = array_values($tmpExprList);
                 $localExprList = $this->process($tmpExprList);
                 if (count($localExprList) > 1) {
                     $localExpr->setSubTree($localExprList);
                     $localExpr->setTokenType(ExpressionType::EXPRESSION);
                     $localExprList = $localExpr->toArray();
                     $localExprList['alias'] = false;
                     $localExprList = array($localExprList);
                 }
                 if (!$curr->getSubTree()) {
                     if (!empty($localExprList)) {
                         $curr->setSubTree($localExprList);
                     }
                 } else {
                     $tmpExprList = $curr->getSubTree();
                     $curr->setSubTree(array_merge($tmpExprList, $localExprList));
                 }
                 $prev->setSubTree($curr->getSubTree());
                 if ($prev->isColumnReference()) {
                     if (PHPSQLParserConstants::getInstance()->isCustomFunction($prev->getUpper())) {
                         $prev->setTokenType(ExpressionType::CUSTOM_FUNCTION);
                     } else {
                         $prev->setTokenType(ExpressionType::SIMPLE_FUNCTION);
                     }
                     $prev->setNoQuotes(null, null, $this->options);
                 }
                 array_pop($resultList);
                 $curr = $prev;
             }
             // we have parenthesis, but it seems to be an expression
             if ($curr->isUnspecified()) {
                 $localExpr = new ExpressionToken();
                 $tmpExprList = array();
                 foreach ($localTokenList as $k => $v) {
                     $tmpToken = new ExpressionToken($k, $v);
                     if (!$tmpToken->isCommaToken()) {
                         $localExpr->addToken($v);
                         $tmpExprList[] = $v;
                     } else {
                         // an expression could have multiple parts split by operands
                         // if we have a comma, it is a split-point for expressions
                         $tmpExprList = array_values($tmpExprList);
                         $localExprList = $this->process($tmpExprList);
                         if (count($localExprList) > 1) {
                             $localExpr->setSubTree($localExprList);
                             $localExpr->setTokenType(ExpressionType::EXPRESSION);
                             $localExprList = $localExpr->toArray();
                             $localExprList['alias'] = false;
                             $localExprList = array($localExprList);
                         }
                         if (!$curr->getSubTree()) {
                             if (!empty($localExprList)) {
                                 $curr->setSubTree($localExprList);
                             }
                         } else {
                             $tmpExprList = $curr->getSubTree();
                             $curr->setSubTree(array_merge($tmpExprList, $localExprList));
                         }
                         $tmpExprList = array();
                         $localExpr = new ExpressionToken();
                     }
                 }
                 $tmpExprList = array_values($tmpExprList);
                 $localExprList = $this->process($tmpExprList);
                 $curr->setTokenType(ExpressionType::BRACKET_EXPRESSION);
                 if (!$curr->getSubTree()) {
                     if (!empty($localExprList)) {
                         $curr->setSubTree($localExprList);
                     }
                 } else {
                     $tmpExprList = $curr->getSubTree();
                     $curr->setSubTree(array_merge($tmpExprList, $localExprList));
                 }
             }
         } elseif ($curr->isVariableToken()) {
             # a variable
             # it can be quoted
             $curr->setTokenType($this->getVariableType($curr->getUpper()));
             $curr->setSubTree(false);
             $curr->setNoQuotes(trim(trim($curr->getToken()), '@'), "`'\"", $this->options);
         } else {
             /* it is either an operator, a colref or a constant */
             switch ($curr->getUpper()) {
                 case '*':
                     $curr->setSubTree(false);
                     // o subtree
                     // single or first element of expression list -> all-column-alias
                     if (empty($resultList)) {
                         $curr->setTokenType(ExpressionType::COLREF);
                         break;
                     }
                     // if the last token is colref, const or expression
                     // then * is an operator
                     // but if the previous colref ends with a dot, the * is the all-columns-alias
                     if (!$prev->isColumnReference() && !$prev->isConstant() && !$prev->isExpression() && !$prev->isBracketExpression() && !$prev->isAggregateFunction() && !$prev->isVariable()) {
                         $curr->setTokenType(ExpressionType::COLREF);
                         break;
                     }
                     if ($prev->isColumnReference() && $prev->endsWith(".")) {
                         $prev->addToken('*');
                         // tablealias dot *
                         continue 2;
                         // skip the current token
                     }
                     $curr->setTokenType(ExpressionType::OPERATOR);
                     break;
                 case ':=':
                 case 'AND':
                 case '&&':
                 case 'BETWEEN':
                 case 'AND':
                 case 'BINARY':
                 case '&':
                 case '~':
                 case '|':
                 case '^':
                 case 'DIV':
                 case '/':
                 case '<=>':
                 case '=':
                 case '>=':
                 case '>':
                 case 'IS':
                 case 'NOT':
                 case '<<':
                 case '<=':
                 case '<':
                 case 'LIKE':
                 case '%':
                 case '!=':
                 case '<>':
                 case 'REGEXP':
                 case '!':
                 case '||':
                 case 'OR':
                 case '>>':
                 case 'RLIKE':
                 case 'SOUNDS':
                 case 'XOR':
                 case 'IN':
                     $curr->setSubTree(false);
                     $curr->setTokenType(ExpressionType::OPERATOR);
                     break;
                 case 'NULL':
                     $curr->setSubTree(false);
                     $curr->setTokenType(ExpressionType::CONSTANT);
                     break;
                 case '-':
                 case '+':
                     // differ between preceding sign and operator
                     $curr->setSubTree(false);
                     if ($prev->isColumnReference() || $prev->isFunction() || $prev->isAggregateFunction() || $prev->isConstant() || $prev->isSubQuery() || $prev->isExpression() || $prev->isBracketExpression() || $prev->isVariable() || $prev->isCustomFunction()) {
                         $curr->setTokenType(ExpressionType::OPERATOR);
                     } else {
                         $curr->setTokenType(ExpressionType::SIGN);
                     }
                     break;
                 default:
                     $curr->setSubTree(false);
                     switch ($curr->getToken(0)) {
                         case "'":
                         case '"':
                             // it is a string literal
                             $curr->setTokenType(ExpressionType::CONSTANT);
                             break;
                         case '`':
                             // it is an escaped colum name
                             $curr->setTokenType(ExpressionType::COLREF);
                             $curr->setNoQuotes($curr->getToken(), null, $this->options);
                             break;
                         default:
                             if (is_numeric($curr->getToken())) {
                                 if ($prev->isSign()) {
                                     $prev->addToken($curr->getToken());
                                     // it is a negative numeric constant
                                     $prev->setTokenType(ExpressionType::CONSTANT);
                                     continue 3;
                                     // skip current token
                                 } else {
                                     $curr->setTokenType(ExpressionType::CONSTANT);
                                 }
                             } else {
                                 $curr->setTokenType(ExpressionType::COLREF);
                                 $curr->setNoQuotes($curr->getToken(), null, $this->options);
                             }
                             break;
                     }
             }
         }
         /* is a reserved word? */
         if (!$curr->isOperator() && !$curr->isInList() && !$curr->isFunction() && !$curr->isAggregateFunction() && !$curr->isCustomFunction() && PHPSQLParserConstants::getInstance()->isReserved($curr->getUpper())) {
             if (PHPSQLParserConstants::getInstance()->isCustomFunction($curr->getUpper())) {
                 $curr->setTokenType(ExpressionType::CUSTOM_FUNCTION);
                 $curr->setNoQuotes(null, null, $this->options);
             } elseif (PHPSQLParserConstants::getInstance()->isAggregateFunction($curr->getUpper())) {
                 $curr->setTokenType(ExpressionType::AGGREGATE_FUNCTION);
                 $curr->setNoQuotes(null, null, $this->options);
             } elseif ($curr->getUpper() === 'NULL') {
                 // it is a reserved word, but we would like to set it as constant
                 $curr->setTokenType(ExpressionType::CONSTANT);
             } else {
                 if (PHPSQLParserConstants::getInstance()->isParameterizedFunction($curr->getUpper())) {
                     // issue 60: check functions with parameters
                     // -> colref (we check parameters later)
                     // -> if there is no parameter, we leave the colref
                     $curr->setTokenType(ExpressionType::COLREF);
                 } elseif (PHPSQLParserConstants::getInstance()->isFunction($curr->getUpper())) {
                     $curr->setTokenType(ExpressionType::SIMPLE_FUNCTION);
                     $curr->setNoQuotes(null, null, $this->options);
                 } else {
                     $curr->setTokenType(ExpressionType::RESERVED);
                     $curr->setNoQuotes(null, null, $this->options);
                 }
             }
         }
         // issue 94, INTERVAL 1 MONTH
         if ($curr->isConstant() && PHPSQLParserConstants::getInstance()->isParameterizedFunction($prev->getUpper())) {
             $prev->setTokenType(ExpressionType::RESERVED);
             $prev->setNoQuotes(null, null, $this->options);
         }
         if ($prev->isConstant() && PHPSQLParserConstants::getInstance()->isParameterizedFunction($curr->getUpper())) {
             $curr->setTokenType(ExpressionType::RESERVED);
             $curr->setNoQuotes(null, null, $this->options);
         }
         if ($curr->isUnspecified()) {
             $curr->setTokenType(ExpressionType::EXPRESSION);
             $curr->setNoQuotes(null, null, $this->options);
             $curr->setSubTree($this->process($this->splitSQLIntoTokens($curr->getTrim())));
         }
         $resultList[] = $curr;
         $prev = $curr;
     }
     // end of for-loop
     return $this->toArray($resultList);
 }
 public function __construct(Options $options)
 {
     parent::__construct($options);
     $this->limitProcessor = new LimitProcessor($options);
 }