Skip to content

Commit

Permalink
refactor: move skip methods to abstract parser (#2948)
Browse files Browse the repository at this point in the history
Move the PostgreSQL skip methods from the PostgreSQL parser to the
abstract parser. This is step 1 in refactoring the GoogleSQL and
PostgreSQL parser so they can share more code. The eventual goal is to
allow the GoogleSQL parser to be able to handle SQL string without
having to remove the comments from the string first.
  • Loading branch information
olavloite committed Mar 19, 2024
1 parent 1e45237 commit 3247a05
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 147 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.concurrent.Callable;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.Nullable;

/**
* Internal class for the Spanner Connection API.
Expand Down Expand Up @@ -696,4 +697,169 @@ static int countOccurrencesOf(char c, String string) {
public boolean checkReturningClause(String sql) {
return checkReturningClauseInternal(sql);
}

/**
* Returns true for characters that can be used as the first character in unquoted identifiers.
*/
boolean isValidIdentifierFirstChar(char c) {
return Character.isLetter(c) || c == UNDERSCORE;
}

/** Returns true for characters that can be used in unquoted identifiers. */
boolean isValidIdentifierChar(char c) {
return isValidIdentifierFirstChar(c) || Character.isDigit(c) || c == DOLLAR;
}

/** Reads a dollar-quoted string literal from position index in the given sql string. */
String parseDollarQuotedString(String sql, int index) {
// Look ahead to the next dollar sign (if any). Everything in between is the quote tag.
StringBuilder tag = new StringBuilder();
while (index < sql.length()) {
char c = sql.charAt(index);
if (c == DOLLAR) {
return tag.toString();
}
if (!isValidIdentifierChar(c)) {
break;
}
tag.append(c);
index++;
}
return null;
}

/**
* Skips the next character, literal, identifier, or comment in the given sql string from the
* given index. The skipped characters are added to result if it is not null.
*/
int skip(String sql, int currentIndex, @Nullable StringBuilder result) {
char currentChar = sql.charAt(currentIndex);
if (currentChar == SINGLE_QUOTE || currentChar == DOUBLE_QUOTE) {
appendIfNotNull(result, currentChar);
return skipQuoted(sql, currentIndex, currentChar, result);
} else if (currentChar == DOLLAR) {
String dollarTag = parseDollarQuotedString(sql, currentIndex + 1);
if (dollarTag != null) {
appendIfNotNull(result, currentChar, dollarTag, currentChar);
return skipQuoted(
sql, currentIndex + dollarTag.length() + 1, currentChar, dollarTag, result);
}
} else if (currentChar == HYPHEN
&& sql.length() > (currentIndex + 1)
&& sql.charAt(currentIndex + 1) == HYPHEN) {
return skipSingleLineComment(sql, currentIndex, result);
} else if (currentChar == SLASH
&& sql.length() > (currentIndex + 1)
&& sql.charAt(currentIndex + 1) == ASTERISK) {
return skipMultiLineComment(sql, currentIndex, result);
}

appendIfNotNull(result, currentChar);
return currentIndex + 1;
}

/** Skips a single-line comment from startIndex and adds it to result if result is not null. */
static int skipSingleLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
int endIndex = sql.indexOf('\n', startIndex + 2);
if (endIndex == -1) {
endIndex = sql.length();
} else {
// Include the newline character.
endIndex++;
}
appendIfNotNull(result, sql.substring(startIndex, endIndex));
return endIndex;
}

/** Skips a multi-line comment from startIndex and adds it to result if result is not null. */
static int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
// Current position is start + '/*'.length().
int pos = startIndex + 2;
// PostgreSQL allows comments to be nested. That is, the following is allowed:
// '/* test /* inner comment */ still a comment */'
int level = 1;
while (pos < sql.length()) {
if (sql.charAt(pos) == SLASH && sql.length() > (pos + 1) && sql.charAt(pos + 1) == ASTERISK) {
level++;
}
if (sql.charAt(pos) == ASTERISK && sql.length() > (pos + 1) && sql.charAt(pos + 1) == SLASH) {
level--;
if (level == 0) {
pos += 2;
appendIfNotNull(result, sql.substring(startIndex, pos));
return pos;
}
}
pos++;
}
appendIfNotNull(result, sql.substring(startIndex));
return sql.length();
}

/** Skips a quoted string from startIndex. */
private int skipQuoted(
String sql, int startIndex, char startQuote, @Nullable StringBuilder result) {
return skipQuoted(sql, startIndex, startQuote, null, result);
}

/**
* Skips a quoted string from startIndex. The quote character is assumed to be $ if dollarTag is
* not null.
*/
private int skipQuoted(
String sql,
int startIndex,
char startQuote,
String dollarTag,
@Nullable StringBuilder result) {
int currentIndex = startIndex + 1;
while (currentIndex < sql.length()) {
char currentChar = sql.charAt(currentIndex);
if (currentChar == startQuote) {
if (currentChar == DOLLAR) {
// Check if this is the end of the current dollar quoted string.
String tag = parseDollarQuotedString(sql, currentIndex + 1);
if (tag != null && tag.equals(dollarTag)) {
appendIfNotNull(result, currentChar, dollarTag, currentChar);
return currentIndex + tag.length() + 2;
}
} else if (sql.length() > currentIndex + 1 && sql.charAt(currentIndex + 1) == startQuote) {
// This is an escaped quote (e.g. 'foo''bar')
appendIfNotNull(result, currentChar);
appendIfNotNull(result, currentChar);
currentIndex += 2;
continue;
} else {
appendIfNotNull(result, currentChar);
return currentIndex + 1;
}
}
currentIndex++;
appendIfNotNull(result, currentChar);
}
throw SpannerExceptionFactory.newSpannerException(
ErrorCode.INVALID_ARGUMENT, "SQL statement contains an unclosed literal: " + sql);
}

/** Appends the given character to result if result is not null. */
private void appendIfNotNull(@Nullable StringBuilder result, char currentChar) {
if (result != null) {
result.append(currentChar);
}
}

/** Appends the given suffix to result if result is not null. */
private static void appendIfNotNull(@Nullable StringBuilder result, String suffix) {
if (result != null) {
result.append(suffix);
}
}

/** Appends the given prefix, tag, and suffix to result if result is not null. */
private static void appendIfNotNull(
@Nullable StringBuilder result, char prefix, String tag, char suffix) {
if (result != null) {
result.append(prefix).append(tag).append(suffix);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.Nullable;

@InternalApi
public class PostgreSQLStatementParser extends AbstractStatementParser {
Expand Down Expand Up @@ -136,23 +135,6 @@ String removeCommentsAndTrimInternal(String sql) {
return res.toString().trim();
}

String parseDollarQuotedString(String sql, int index) {
// Look ahead to the next dollar sign (if any). Everything in between is the quote tag.
StringBuilder tag = new StringBuilder();
while (index < sql.length()) {
char c = sql.charAt(index);
if (c == DOLLAR) {
return tag.toString();
}
if (!isValidIdentifierChar(c)) {
break;
}
tag.append(c);
index++;
}
return null;
}

/** PostgreSQL does not support statement hints. */
@Override
String removeStatementHint(String sql) {
Expand Down Expand Up @@ -220,135 +202,6 @@ public Set<String> getQueryParameters(String sql) {
return parameters;
}

private int skip(String sql, int currentIndex, @Nullable StringBuilder result) {
char currentChar = sql.charAt(currentIndex);
if (currentChar == SINGLE_QUOTE || currentChar == DOUBLE_QUOTE) {
appendIfNotNull(result, currentChar);
return skipQuoted(sql, currentIndex, currentChar, result);
} else if (currentChar == DOLLAR) {
String dollarTag = parseDollarQuotedString(sql, currentIndex + 1);
if (dollarTag != null) {
appendIfNotNull(result, currentChar, dollarTag, currentChar);
return skipQuoted(
sql, currentIndex + dollarTag.length() + 1, currentChar, dollarTag, result);
}
} else if (currentChar == HYPHEN
&& sql.length() > (currentIndex + 1)
&& sql.charAt(currentIndex + 1) == HYPHEN) {
return skipSingleLineComment(sql, currentIndex, result);
} else if (currentChar == SLASH
&& sql.length() > (currentIndex + 1)
&& sql.charAt(currentIndex + 1) == ASTERISK) {
return skipMultiLineComment(sql, currentIndex, result);
}

appendIfNotNull(result, currentChar);
return currentIndex + 1;
}

static int skipSingleLineComment(String sql, int currentIndex, @Nullable StringBuilder result) {
int endIndex = sql.indexOf('\n', currentIndex + 2);
if (endIndex == -1) {
endIndex = sql.length();
} else {
// Include the newline character.
endIndex++;
}
appendIfNotNull(result, sql.substring(currentIndex, endIndex));
return endIndex;
}

static int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
// Current position is start + '/*'.length().
int pos = startIndex + 2;
// PostgreSQL allows comments to be nested. That is, the following is allowed:
// '/* test /* inner comment */ still a comment */'
int level = 1;
while (pos < sql.length()) {
if (sql.charAt(pos) == SLASH && sql.length() > (pos + 1) && sql.charAt(pos + 1) == ASTERISK) {
level++;
}
if (sql.charAt(pos) == ASTERISK && sql.length() > (pos + 1) && sql.charAt(pos + 1) == SLASH) {
level--;
if (level == 0) {
pos += 2;
appendIfNotNull(result, sql.substring(startIndex, pos));
return pos;
}
}
pos++;
}
appendIfNotNull(result, sql.substring(startIndex));
return sql.length();
}

private int skipQuoted(
String sql, int startIndex, char startQuote, @Nullable StringBuilder result) {
return skipQuoted(sql, startIndex, startQuote, null, result);
}

private int skipQuoted(
String sql,
int startIndex,
char startQuote,
String dollarTag,
@Nullable StringBuilder result) {
int currentIndex = startIndex + 1;
while (currentIndex < sql.length()) {
char currentChar = sql.charAt(currentIndex);
if (currentChar == startQuote) {
if (currentChar == DOLLAR) {
// Check if this is the end of the current dollar quoted string.
String tag = parseDollarQuotedString(sql, currentIndex + 1);
if (tag != null && tag.equals(dollarTag)) {
appendIfNotNull(result, currentChar, dollarTag, currentChar);
return currentIndex + tag.length() + 2;
}
} else if (sql.length() > currentIndex + 1 && sql.charAt(currentIndex + 1) == startQuote) {
// This is an escaped quote (e.g. 'foo''bar')
appendIfNotNull(result, currentChar);
appendIfNotNull(result, currentChar);
currentIndex += 2;
continue;
} else {
appendIfNotNull(result, currentChar);
return currentIndex + 1;
}
}
currentIndex++;
appendIfNotNull(result, currentChar);
}
throw SpannerExceptionFactory.newSpannerException(
ErrorCode.INVALID_ARGUMENT, "SQL statement contains an unclosed literal: " + sql);
}

private void appendIfNotNull(@Nullable StringBuilder result, char currentChar) {
if (result != null) {
result.append(currentChar);
}
}

private static void appendIfNotNull(@Nullable StringBuilder result, String suffix) {
if (result != null) {
result.append(suffix);
}
}

private void appendIfNotNull(
@Nullable StringBuilder result, char prefix, String tag, char suffix) {
if (result != null) {
result.append(prefix).append(tag).append(suffix);
}
}

private boolean isValidIdentifierFirstChar(char c) {
return Character.isLetter(c) || c == UNDERSCORE;
}

private boolean isValidIdentifierChar(char c) {
return isValidIdentifierFirstChar(c) || Character.isDigit(c) || c == DOLLAR;
}

private boolean checkCharPrecedingReturning(char ch) {
return (ch == SPACE)
|| (ch == SINGLE_QUOTE)
Expand Down

0 comments on commit 3247a05

Please sign in to comment.