/** * Some sections are just lists of expressions, like the WHERE and HAVING clauses. * This function processes these sections. Recursive. */ private function process_expr_list($tokens) { $resultList = array(); $skip_next = false; $prev = new ExpressionToken(); foreach ($tokens as $k => $v) { $curr = new ExpressionToken($k, $v); if ($curr->isWhitespaceToken()) { continue; } if ($skip_next) { # skip the next non-whitespace token $skip_next = false; continue; } /* is it a subquery?*/ if ($curr->isSubQueryToken()) { $curr->setSubTree($this->parse($this->removeParenthesisFromStart($curr->getTrim()))); $curr->setTokenType(ExpressionType::SUBQUERY); } elseif ($curr->isEnclosedWithinParenthesis()) { /* is it an in-list? */ $localTokenList = $this->splitSQLIntoTokens($this->removeParenthesisFromStart($curr->getTrim())); if ($prev->getUpper() === 'IN') { foreach ($localTokenList as $k => $v) { $tmpToken = new ExpressionToken($k, $v); if ($tmpToken->isCommaToken()) { unset($localTokenList[$k]); } } $localTokenList = array_values($localTokenList); $curr->setSubTree($this->process_expr_list($localTokenList)); $curr->setTokenType(ExpressionType::IN_LIST); } elseif ($prev->getUpper() === 'AGAINST') { $match_mode = false; foreach ($localTokenList as $k => $v) { $tmpToken = new ExpressionToken($k, $v); switch ($tmpToken->getUpper()) { case 'WITH': $match_mode = 'WITH QUERY EXPANSION'; break; case 'IN': $match_mode = 'IN BOOLEAN MODE'; break; default: } if ($match_mode !== false) { unset($localTokenList[$k]); } } $tmpToken = $this->process_expr_list($localTokenList); if ($match_mode !== false) { $match_mode = new ExpressionToken(0, $match_mode); $match_mode->setTokenType(ExpressionType::MATCH_MODE); $tmpToken[] = $match_mode->toArray(); } $curr->setSubTree($tmpToken); $curr->setTokenType(ExpressionType::MATCH_ARGUMENTS); $prev->setTokenType(ExpressionType::SIMPLE_FUNCTION); } elseif ($prev->isColumnReference() || $prev->isFunction() || $prev->isAggregateFunction()) { # if we have a colref followed by a parenthesis pair, # it isn't a colref, it is a user-function $localExpr = new ExpressionToken(); $tmpExprList = array(); foreach ($localTokenList as $k => $v) { $tmpToken = new ExpressionToken($k, $v); if (!$tmpToken->isCommaToken()) { $localExpr->addToken($v); $tmpExprList[] = $v; } else { # an expression could have multiple parts split by operands # if we have a comma, it is a split-point for expressions $tmpExprList = array_values($tmpExprList); $localExprList = $this->process_expr_list($tmpExprList); if (count($localExprList) > 1) { $localExpr->setSubTree($localExprList); $localExpr->setTokenType(ExpressionType::EXPRESSION); $localExprList = $localExpr->toArray(); $localExprList['alias'] = false; $localExprList = array($localExprList); } if (!$curr->getSubTree()) { $curr->setSubTree($localExprList); } else { $tmpExprList = $curr->getSubTree(); $curr->setSubTree(array_merge($tmpExprList, $localExprList)); } $tmpExprList = array(); $localExpr = new ExpressionToken(); } } $tmpExprList = array_values($tmpExprList); $localExprList = $this->process_expr_list($tmpExprList); if (count($localExprList) > 1) { $localExpr->setSubTree($localExprList); $localExpr->setTokenType(ExpressionType::EXPRESSION); $localExprList = $localExpr->toArray(); $localExprList['alias'] = false; $localExprList = array($localExprList); } if (!$curr->getSubTree()) { $curr->setSubTree($localExprList); } else { $tmpExprList = $curr->getSubTree(); $curr->setSubTree(array_merge($tmpExprList, $localExprList)); } $prev->setSubTree($curr->getSubTree()); if ($prev->isColumnReference()) { $prev->setTokenType(ExpressionType::SIMPLE_FUNCTION); } array_pop($resultList); $curr = $prev; } # we have parenthesis, but it seems to be an expression if ($curr->isUnspecified()) { $curr->setSubTree($this->process_expr_list($localTokenList)); $curr->setTokenType(ExpressionType::BRACKET_EXPRESSION); } } elseif ($curr->isVariableToken()) { // a variable $curr->setTokenType($this->getVariableType($curr->getUpper())); $curr->setSubTree(false); } else { /* it is either an operator, a colref or a constant */ switch ($curr->getUpper()) { case '*': $curr->setSubTree(false); #no subtree # single or first element of expression list -> all-column-alias if (empty($resultList)) { $curr->setTokenType(ExpressionType::COLREF); break; } # if the last token is colref, const or expression # then * is an operator # but if the previous colref ends with a dot, the * is the all-columns-alias if (!$prev->isColumnReference() && !$prev->isConstant() && !$prev->isExpression() && !$prev->isBracketExpression() && !$prev->isAggregateFunction()) { $curr->setTokenType(ExpressionType::COLREF); break; } if ($prev->isColumnReference() && $prev->endsWith(".")) { $prev->addToken('*'); # tablealias dot * continue 2; # skip the current token } $curr->setTokenType(ExpressionType::OPERATOR); break; case 'AND': case '&&': case 'BETWEEN': case 'AND': case 'BINARY': case '&': case '~': case '|': case '^': case 'DIV': case '/': case '<=>': case '=': case '>=': case '>': case 'IS': case 'NOT': case '<<': case '<=': case '<': case 'LIKE': case '%': case '!=': case '<>': case 'REGEXP': case '!': case '||': case 'OR': case '>>': case 'RLIKE': case 'SOUNDS': case 'XOR': case 'IN': $curr->setSubTree(false); $curr->setTokenType(ExpressionType::OPERATOR); break; case 'NULL': $curr->setSubTree(false); $curr->setTokenType(ExpressionType::CONSTANT); break; case '-': case '+': // differ between preceding sign and operator $curr->setSubTree(false); if ($prev->isColumnReference() || $prev->isFunction() || $prev->isAggregateFunction() || $prev->isConstant() || $prev->isSubQuery() || $prev->isExpression() || $prev->isBracketExpression()) { $curr->setTokenType(ExpressionType::OPERATOR); } else { $curr->setTokenType(ExpressionType::SIGN); } break; default: $curr->setSubTree(false); switch ($curr->getToken(0)) { case "'": case '"': # it is a string literal $curr->setTokenType(ExpressionType::CONSTANT); break; case '`': # it is an escaped colum name $curr->setTokenType(ExpressionType::COLREF); break; default: if (is_numeric($curr->getToken())) { if ($prev->isSign()) { $prev->addToken($curr->getToken()); # it is a negative numeric constant $prev->setTokenType(ExpressionType::CONSTANT); continue 3; # skip current token } else { $curr->setTokenType(ExpressionType::CONSTANT); } } else { $curr->setTokenType(ExpressionType::COLREF); } break; } } } /* is a reserved word? */ if (!$curr->isOperator() && !$curr->isInList() && !$curr->isFunction() && !$curr->isAggregateFunction() && in_array($curr->getUpper(), parent::$reserved)) { if (in_array($curr->getUpper(), parent::$aggregateFunctions)) { $curr->setTokenType(ExpressionType::AGGREGATE_FUNCTION); } elseif ($curr->getUpper() === 'NULL') { // it is a reserved word, but we would like to set it as constant $curr->setTokenType(ExpressionType::CONSTANT); } else { if (in_array($curr->getUpper(), parent::$parameterizedFunctions)) { // issue 60: check functions with parameters // -> colref (we check parameters later) // -> if there is no parameter, we leave the colref $curr->setTokenType(ExpressionType::COLREF); } elseif (in_array($curr->getUpper(), parent::$functions)) { $curr->setTokenType(ExpressionType::SIMPLE_FUNCTION); } else { $curr->setTokenType(ExpressionType::RESERVED); } } } if ($curr->isUnspecified()) { $curr->setTokenType(ExpressionType::EXPRESSION); $curr->setSubTree($this->process_expr_list($this->splitSQLIntoTokens($curr->getTrim()))); } $resultList[] = $curr; $prev = $curr; } // end of for-loop return $this->toArray($resultList); }