/** * MySQL supports a special form of UNION: * (select ...) * union * (select ...) * * This function handles this query syntax. Only one such subquery * is supported in each UNION block. (select)(select)union(select) is not legal. * The extra queries will be silently ignored. */ protected function processMySQLUnion($queries) { $unionTypes = array('UNION', 'UNION ALL'); foreach ($unionTypes as $unionType) { if (empty($queries[$unionType])) { continue; } foreach ($queries[$unionType] as $key => $tokenList) { foreach ($tokenList as $z => $token) { $token = trim($token); if ($token === "") { continue; } // starts with "(select" if (preg_match("/^\\(\\s*select\\s*/i", $token)) { $processor = new DefaultProcessor(); $queries[$unionType][$key] = $processor->process($this->removeParenthesisFromStart($token)); break; } $processor = new SQLProcessor(); $queries[$unionType][$key] = $processor->process($queries[$unionType][$key]); break; } } } // it can be parsed or not return $queries; }
/** * It parses the given SQL statement and generates a detailled * output array for every part of the statement. The method can * also generate [position] fields within the output, which hold * the character position for every statement part. The calculation * of the positions needs some time, if you don't need positions in * your application, set the parameter to false. * * @param String $sql The SQL statement. * @param boolean $calcPositions True, if the output should contain [position], false otherwise. * * @return array An associative array with all meta information about the SQL statement. */ public function parse($sql, $calcPositions = false) { $processor = new DefaultProcessor(); $queries = $processor->process($sql); // calc the positions of some important tokens if ($calcPositions) { $calculator = new PositionCalculator(); $queries = $calculator->setPositionsWithinSQL($sql, $queries); } // store the parsed queries $this->parsed = $queries; return $this->parsed; }
protected function processFromExpression(&$parseInfo) { $res = array(); // exchange the join types (join_type is save now, saved_join_type holds the next one) $parseInfo['join_type'] = $parseInfo['saved_join_type']; // initialized with JOIN $parseInfo['saved_join_type'] = $parseInfo['next_join_type'] ? $parseInfo['next_join_type'] : 'JOIN'; // we have a reg_expr, so we have to parse it if ($parseInfo['ref_expr'] !== false) { $unparsed = $this->splitSQLIntoTokens($this->removeParenthesisFromStart($parseInfo['ref_expr'])); // here we can get a comma separated list foreach ($unparsed as $k => $v) { if ($this->isCommaToken($v)) { $unparsed[$k] = ""; } } $processor = new ExpressionListProcessor(); $parseInfo['ref_expr'] = $processor->process($unparsed); } // there is an expression, we have to parse it if (substr(trim($parseInfo['table']), 0, 1) == '(') { $parseInfo['expression'] = $this->removeParenthesisFromStart($parseInfo['table']); if (preg_match("/^\\s*select/i", $parseInfo['expression'])) { $processor = new DefaultProcessor(); $parseInfo['sub_tree'] = $processor->process($parseInfo['expression']); $res['expr_type'] = ExpressionType::SUBQUERY; } else { $tmp = $this->splitSQLIntoTokens($parseInfo['expression']); $parseInfo['sub_tree'] = $this->process($tmp); $res['expr_type'] = ExpressionType::TABLE_EXPRESSION; } } else { $res['expr_type'] = ExpressionType::TABLE; $res['table'] = $parseInfo['table']; $res['no_quotes'] = $this->revokeQuotation($parseInfo['table']); } $res['alias'] = $parseInfo['alias']; $res['join_type'] = $parseInfo['join_type']; $res['ref_type'] = $parseInfo['ref_type']; $res['ref_clause'] = $parseInfo['ref_expr']; $res['base_expr'] = trim($parseInfo['expression']); $res['sub_tree'] = $parseInfo['sub_tree']; return $res; }
protected function processDefault($token) { $processor = new DefaultProcessor($this->options); return $processor->process($token); }
protected function processTopLevel($sql) { $processor = new DefaultProcessor(); return $processor->process($sql); }
private function revokeQuotation($token) { $defProc = new DefaultProcessor(); return $defProc->revokeQuotation($token); }
protected function processSQLDefault($unparsed) { $processor = new DefaultProcessor(); return $processor->process($unparsed); }
protected function processColumns($cols) { if ($cols === false) { return $cols; } if ($cols[0] === '(' && substr($cols, -1) === ')') { $parsed = array('expr_type' => ExpressionType::BRACKET_EXPRESSION, 'base_expr' => $cols, 'sub_tree' => false); } $cols = $this->removeParenthesisFromStart($cols); if (stripos($cols, 'SELECT') === 0) { $processor = new DefaultProcessor(); $parsed['sub_tree'] = array(array('expr_type' => ExpressionType::QUERY, 'base_expr' => $cols, 'sub_tree' => $processor->process($cols))); } else { $processor = new ColumnListProcessor(); $parsed['sub_tree'] = $processor->process($cols); $parsed['expr_type'] = ExpressionType::COLUMN_LIST; } return $parsed; }
public function process($tokens) { $resultList = array(); $skip_next = false; $prev = new ExpressionToken(); foreach ($tokens as $k => $v) { $curr = new ExpressionToken($k, $v); if ($curr->isWhitespaceToken()) { continue; } if ($skip_next) { // skip the next non-whitespace token $skip_next = false; continue; } /* is it a subquery? */ if ($curr->isSubQueryToken()) { $processor = new DefaultProcessor(); $curr->setSubTree($processor->process($this->removeParenthesisFromStart($curr->getTrim()))); $curr->setTokenType(ExpressionType::SUBQUERY); } elseif ($curr->isEnclosedWithinParenthesis()) { /* is it an in-list? */ $localTokenList = $this->splitSQLIntoTokens($this->removeParenthesisFromStart($curr->getTrim())); if ($prev->getUpper() === 'IN') { foreach ($localTokenList as $k => $v) { $tmpToken = new ExpressionToken($k, $v); if ($tmpToken->isCommaToken()) { unset($localTokenList[$k]); } } $localTokenList = array_values($localTokenList); $curr->setSubTree($this->process($localTokenList)); $curr->setTokenType(ExpressionType::IN_LIST); } elseif ($prev->getUpper() === 'AGAINST') { $match_mode = false; foreach ($localTokenList as $k => $v) { $tmpToken = new ExpressionToken($k, $v); switch ($tmpToken->getUpper()) { case 'WITH': $match_mode = 'WITH QUERY EXPANSION'; break; case 'IN': $match_mode = 'IN BOOLEAN MODE'; break; default: } if ($match_mode !== false) { unset($localTokenList[$k]); } } $tmpToken = $this->process($localTokenList); if ($match_mode !== false) { $match_mode = new ExpressionToken(0, $match_mode); $match_mode->setTokenType(ExpressionType::MATCH_MODE); $tmpToken[] = $match_mode->toArray(); } $curr->setSubTree($tmpToken); $curr->setTokenType(ExpressionType::MATCH_ARGUMENTS); $prev->setTokenType(ExpressionType::SIMPLE_FUNCTION); } elseif ($prev->isColumnReference() || $prev->isFunction() || $prev->isAggregateFunction()) { // if we have a colref followed by a parenthesis pair, // it isn't a colref, it is a user-function // TODO: this should be a method, because we need the same code // below for unspecified tokens (expressions). $localExpr = new ExpressionToken(); $tmpExprList = array(); foreach ($localTokenList as $k => $v) { $tmpToken = new ExpressionToken($k, $v); if (!$tmpToken->isCommaToken()) { $localExpr->addToken($v); $tmpExprList[] = $v; } else { // an expression could have multiple parts split by operands // if we have a comma, it is a split-point for expressions $tmpExprList = array_values($tmpExprList); $localExprList = $this->process($tmpExprList); if (count($localExprList) > 1) { $localExpr->setSubTree($localExprList); $localExpr->setTokenType(ExpressionType::EXPRESSION); $localExprList = $localExpr->toArray(); $localExprList['alias'] = false; $localExprList = array($localExprList); } if (!$curr->getSubTree()) { $curr->setSubTree($localExprList); } else { $tmpExprList = $curr->getSubTree(); $curr->setSubTree(array_merge($tmpExprList, $localExprList)); } $tmpExprList = array(); $localExpr = new ExpressionToken(); } } $tmpExprList = array_values($tmpExprList); $localExprList = $this->process($tmpExprList); if (count($localExprList) > 1) { $localExpr->setSubTree($localExprList); $localExpr->setTokenType(ExpressionType::EXPRESSION); $localExprList = $localExpr->toArray(); $localExprList['alias'] = false; $localExprList = array($localExprList); } if (!$curr->getSubTree()) { $curr->setSubTree($localExprList); } else { $tmpExprList = $curr->getSubTree(); $curr->setSubTree(array_merge($tmpExprList, $localExprList)); } $prev->setSubTree($curr->getSubTree()); if ($prev->isColumnReference()) { $prev->setTokenType(ExpressionType::SIMPLE_FUNCTION); $prev->setNoQuotes(null); } array_pop($resultList); $curr = $prev; } // we have parenthesis, but it seems to be an expression if ($curr->isUnspecified()) { // TODO: the localTokenList could contain commas and further expressions, // we must handle that like function parameters (see above)! // this should solve issue 51 $curr->setSubTree($this->process($localTokenList)); $curr->setTokenType(ExpressionType::BRACKET_EXPRESSION); } } elseif ($curr->isVariableToken()) { # a variable # it can be quoted $curr->setTokenType($this->getVariableType($curr->getUpper())); $curr->setSubTree(false); $curr->setNoQuotes(trim(trim($curr->getToken()), '@'), "`'\""); } else { /* it is either an operator, a colref or a constant */ switch ($curr->getUpper()) { case '*': $curr->setSubTree(false); // o subtree // single or first element of expression list -> all-column-alias if (empty($resultList)) { $curr->setTokenType(ExpressionType::COLREF); break; } // if the last token is colref, const or expression // then * is an operator // but if the previous colref ends with a dot, the * is the all-columns-alias if (!$prev->isColumnReference() && !$prev->isConstant() && !$prev->isExpression() && !$prev->isBracketExpression() && !$prev->isAggregateFunction() && !$prev->isVariable()) { $curr->setTokenType(ExpressionType::COLREF); break; } if ($prev->isColumnReference() && $prev->endsWith(".")) { $prev->addToken('*'); // tablealias dot * continue 2; // skip the current token } $curr->setTokenType(ExpressionType::OPERATOR); break; case ':=': case 'AND': case '&&': case 'BETWEEN': case 'AND': case 'BINARY': case '&': case '~': case '|': case '^': case 'DIV': case '/': case '<=>': case '=': case '>=': case '>': case 'IS': case 'NOT': case '<<': case '<=': case '<': case 'LIKE': case '%': case '!=': case '<>': case 'REGEXP': case '!': case '||': case 'OR': case '>>': case 'RLIKE': case 'SOUNDS': case 'XOR': case 'IN': $curr->setSubTree(false); $curr->setTokenType(ExpressionType::OPERATOR); break; case 'NULL': $curr->setSubTree(false); $curr->setTokenType(ExpressionType::CONSTANT); break; case '-': case '+': // differ between preceding sign and operator $curr->setSubTree(false); if ($prev->isColumnReference() || $prev->isFunction() || $prev->isAggregateFunction() || $prev->isConstant() || $prev->isSubQuery() || $prev->isExpression() || $prev->isBracketExpression() || $prev->isVariable()) { $curr->setTokenType(ExpressionType::OPERATOR); } else { $curr->setTokenType(ExpressionType::SIGN); } break; default: $curr->setSubTree(false); switch ($curr->getToken(0)) { case "'": case '"': // it is a string literal $curr->setTokenType(ExpressionType::CONSTANT); break; case '`': // it is an escaped colum name $curr->setTokenType(ExpressionType::COLREF); $curr->setNoQuotes($curr->getToken()); break; default: if (is_numeric($curr->getToken())) { if ($prev->isSign()) { $prev->addToken($curr->getToken()); // it is a negative numeric constant $prev->setTokenType(ExpressionType::CONSTANT); continue 3; // skip current token } else { $curr->setTokenType(ExpressionType::CONSTANT); } } else { $curr->setTokenType(ExpressionType::COLREF); $curr->setNoQuotes($curr->getToken()); } break; } } } /* is a reserved word? */ if (!$curr->isOperator() && !$curr->isInList() && !$curr->isFunction() && !$curr->isAggregateFunction() && PHPSQLParserConstants::isReserved($curr->getUpper())) { if (PHPSQLParserConstants::isAggregateFunction($curr->getUpper())) { $curr->setTokenType(ExpressionType::AGGREGATE_FUNCTION); $curr->setNoQuotes(null); } elseif ($curr->getUpper() === 'NULL') { // it is a reserved word, but we would like to set it as constant $curr->setTokenType(ExpressionType::CONSTANT); } else { if (PHPSQLParserConstants::isParameterizedFunction($curr->getUpper())) { // issue 60: check functions with parameters // -> colref (we check parameters later) // -> if there is no parameter, we leave the colref $curr->setTokenType(ExpressionType::COLREF); } elseif (PHPSQLParserConstants::isFunction($curr->getUpper())) { $curr->setTokenType(ExpressionType::SIMPLE_FUNCTION); $curr->setNoQuotes(null); } else { $curr->setTokenType(ExpressionType::RESERVED); $curr->setNoQuotes(null); } } } // issue 94, INTERVAL 1 MONTH if ($curr->isConstant() && PHPSQLParserConstants::isParameterizedFunction($prev->getUpper())) { $prev->setTokenType(ExpressionType::RESERVED); $prev->setNoQuotes(null); } if ($prev->isConstant() && PHPSQLParserConstants::isParameterizedFunction($curr->getUpper())) { $curr->setTokenType(ExpressionType::RESERVED); $curr->setNoQuotes(null); } if ($curr->isUnspecified()) { $curr->setTokenType(ExpressionType::EXPRESSION); $curr->setNoQuotes(null); $curr->setSubTree($this->process($this->splitSQLIntoTokens($curr->getTrim()))); } $resultList[] = $curr; $prev = $curr; } // end of for-loop return $this->toArray($resultList); }