用Java解析SQL查询

Jag*_*war 5 java sql parsing

我知道通过使用准备好的语句我们可以设置列值。我想要的是,我已经编写了一个查询列表,用于在同一个表上执行但具有不同的列值。例如

select * from tableName as t1 where t1.tableColumnId=4 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId

select * from tableName as t1 where t1.tableColumnId=6 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
Run Code Online (Sandbox Code Playgroud)

正如您所看到的,除了值之外,两个查询几乎相同tableColumnId。我想将其保存在集合中

select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
Run Code Online (Sandbox Code Playgroud)

这样我就不会出现重复的查询(不考虑值)。

我怎样才能做到这一点?

Mau*_*rry 4

  1. 一种方法是定义足以解析查询的 SQL 语法子集,然后为该语法编写一个解析器,
  2. 比较查询并找到相同的部分和不同的部分,
  3. 4在查询中找到、6、等文字值'test',构建(平面)语法树,并将这些树相互比较,以识别一个查询与另一个查询之间可能不同的那些文字值。

更新

要解析 SQL,您可以使用解析器生成器,例如ANTLRJavaCC。SQL 存在 ANTLR 和 JavaCC 语法,您可以从其中之一开始。

也就是说,我认为这种方法在这种情况下有点矫枉过正。我宁愿使用第三个。

更新2:(第三种方法)

要定位文字字符串和数字,您可以使用正则表达式:

private static final Pattern CONST_PATTERN
        = Pattern.compile("([^0-9a-zA-Z])((?:[0-9]+(?:\\.[0-9]*)?|[0-9]*\\.[0-9]+)"
                + "(?:[Ee][+-][0-9]+])?"
                + "|(?:\\'[^']*\\')+)", Pattern.CASE_INSENSITIVE);
Run Code Online (Sandbox Code Playgroud)

您可以在生成以下结构时解析查询:

private static class ParameterizedQuery {
    final String sql;
    final Parameter[] params;

    ParameterizedQuery(String sql, Parameter[] params) {
        this.sql = sql;
        this.params = params.clone();
    }
}

private static class Parameter {
    final int position;
    final String value;

    Parameter(int position, String value) {
        this.position = position;
        this.value = value;
    }
}
Run Code Online (Sandbox Code Playgroud)

生成的 sql 查询是输入查询,其中所有文字都替换为问号。解析过程如下:

private static ParameterizedQuery parse(String query) {
    List<Parameter> parms = new ArrayList<>();
    Matcher matcher = CONST_PATTERN.matcher(query);
    int start = 0;
    StringBuilder buf = new StringBuilder();
    while (matcher.find()) {
        int pos = matcher.start();
        buf.append(query, start, pos)
                .append(matcher.group(1))
                .append("?");
        parms.add(new Parameter(buf.length()-1,matcher.group(2)));
        start = matcher.end();
    }
    buf.append(query, start, query.length());
    return new ParameterizedQuery(
            buf.toString(), parms.toArray(new Parameter[parms.size()]));
}
Run Code Online (Sandbox Code Playgroud)

现在,如果您有一个查询列表,并且您只想将所有输入查询中不相等的查询保留为参数,则可以解析所有查询,生成 ParameterizedQuery 数组,并简化该数组:

private static ParameterizedQuery[] simplify(ParameterizedQuery[] queries) {
    if (queries.length == 0) {
        return queries;
    }
    ParameterizedQuery prev = null;
    boolean[] diff = null;
    for (ParameterizedQuery cur: queries) {
        if (prev == null) {
            diff = new boolean[cur.params.length];
        } else {
            if (!cur.sql.equals(prev.sql)) {
                throw new RuntimeException(
                        "Queries are too different: [" + prev.sql
                        + "] and [" + cur.sql + "]");
            } else if (cur.params.length != prev.params.length) {
                throw new RuntimeException(
                        "Different number of parameters: ["
                        + prev.params.length
                        + "] and [" + cur.params.length + "]");
            }
            for (int i = 0; i < diff.length; ++i) {
                if (!cur.params[i].value.equals(prev.params[i].value)) {
                    diff[i] = true;
                }
            }
        }
        prev = cur;
    }
    if (and(diff)) {
        return queries;
    }
    ParameterizedQuery[] result = new ParameterizedQuery[queries.length];
    result[0] = expandQuery(queries[0].sql, queries[0].params, diff);
    for (int i = 1; i < queries.length; ++i) {
        result[i] = new ParameterizedQuery(result[0].sql,
                keep(queries[i].params, result[0].params, diff));
    }
    return result;
}

private static boolean and(boolean[] arr) {
    for (boolean b: arr) {
        if (!b) {
            return false;
        }
    }
    return true;
}

private static ParameterizedQuery expandQuery(String query,
        Parameter[] params, boolean[] diff) {
    int count = 0;
    for (boolean b: diff) {
        if (b) {
            ++count;
        }
    }
    Parameter[] result = new Parameter[count];
    int r = 0;
    int start = 0;
    StringBuilder buf = new StringBuilder();
    for (int i = 0; i < diff.length; ++i) {
        Parameter parm = params[i];
        if (!diff[i]) {
            // expand param
            buf.append(query, start, parm.position);
            buf.append(parm.value);
            start = parm.position+1;
        } else {
            buf.append(query, start, parm.position);
            result[r++] = new Parameter(buf.length(), parm.value);
            start = parm.position;
        }
    }
    buf.append(query, start, query.length());
    return new ParameterizedQuery(buf.toString(), result);
}

private static Parameter[] keep(Parameter[] params, Parameter[] ref,
        boolean[] diff) {
    Parameter[] result = new Parameter[ref.length];
    int j = 0;
    for (int i = 0; i < params.length; ++i) {
        if (diff[i]) {
            result[j] = new Parameter(ref[j].position, params[i].value);
            ++j;
        }
    }
    return result;
}
Run Code Online (Sandbox Code Playgroud)

这是解决您的示例的程序:

public class Main {
    private static final String[] QUERIES = {
        "select * from tableName as t1 where t1.tableColumnId=4 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId",
        "select * from tableName as t1 where t1.tableColumnId=6 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId",
    };
    private static final Pattern CONST_PATTERN
            = Pattern.compile("([^0-9a-zA-Z])((?:[0-9]+(?:\\.[0-9]*)?|[0-9]*\\.[0-9]+)"
                    + "(?:[Ee][+-][0-9]+])?"
                    + "|(?:\\'[^']*\\')+)", Pattern.CASE_INSENSITIVE);

    private static class ParameterizedQuery {
        final String sql;
        final Parameter[] params;

        ParameterizedQuery(String sql, Parameter[] params) {
            this.sql = sql;
            this.params = params.clone();
        }
    }

    private static class Parameter {
        final int position;
        final String value;

        Parameter(int position, String value) {
            this.position = position;
            this.value = value;
        }
    }

    public static void main(String[] args) {
        ParameterizedQuery[] queries = new ParameterizedQuery[QUERIES.length];
        for (int i = 0; i < QUERIES.length; ++i) {
            queries[i] = parse(QUERIES[i]);
        }
        for (ParameterizedQuery cur: queries) {
            System.out.println(cur.sql);
            int i = 0;
            for (Parameter parm: cur.params) {
                System.out.println("    " + (++i) + ": " + parm.value);
            }
        }
        queries = simplify(queries);
        for (ParameterizedQuery cur: queries) {
            System.out.println(cur.sql);
            int i = 0;
            for (Parameter parm: cur.params) {
                System.out.println("    " + (++i) + ": " + parm.value);
            }
        }
    }

    private static ParameterizedQuery parse(String query) {
        List<Parameter> parms = new ArrayList<>();
        Matcher matcher = CONST_PATTERN.matcher(query);
        int start = 0;
        StringBuilder buf = new StringBuilder();
        while (matcher.find()) {
            int pos = matcher.start();
            buf.append(query, start, pos)
                    .append(matcher.group(1))
                    .append("?");
            parms.add(new Parameter(buf.length()-1,matcher.group(2)));
            start = matcher.end();
        }
        buf.append(query, start, query.length());
        return new ParameterizedQuery(
                buf.toString(), parms.toArray(new Parameter[parms.size()]));
    }

    private static ParameterizedQuery[] simplify(ParameterizedQuery[] queries) {
        if (queries.length == 0) {
            return queries;
        }
        ParameterizedQuery prev = null;
        boolean[] diff = null;
        for (ParameterizedQuery cur: queries) {
            if (prev == null) {
                diff = new boolean[cur.params.length];
            } else {
                if (!cur.sql.equals(prev.sql)) {
                    throw new RuntimeException(
                            "Queries are too different: [" + prev.sql
                            + "] and [" + cur.sql + "]");
                } else if (cur.params.length != prev.params.length) {
                    throw new RuntimeException(
                            "Different number of parameters: ["
                            + prev.params.length
                            + "] and [" + cur.params.length + "]");
                }
                for (int i = 0; i < diff.length; ++i) {
                    if (!cur.params[i].value.equals(prev.params[i].value)) {
                        diff[i] = true;
                    }
                }
            }
            prev = cur;
        }
        if (and(diff)) {
            return queries;
        }
        ParameterizedQuery[] result = new ParameterizedQuery[queries.length];
        result[0] = expandQuery(queries[0].sql, queries[0].params, diff);
        for (int i = 1; i < queries.length; ++i) {
            result[i] = new ParameterizedQuery(result[0].sql,
                    keep(queries[i].params, result[0].params, diff));
        }
        return result;
    }

    private static boolean and(boolean[] arr) {
        for (boolean b: arr) {
            if (!b) {
                return false;
            }
        }
        return true;
    }

    private static ParameterizedQuery expandQuery(String query,
            Parameter[] params, boolean[] diff) {
        int count = 0;
        for (boolean b: diff) {
            if (b) {
                ++count;
            }
        }
        Parameter[] result = new Parameter[count];
        int r = 0;
        int start = 0;
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < diff.length; ++i) {
            Parameter parm = params[i];
            if (!diff[i]) {
                // expand param
                buf.append(query, start, parm.position);
                buf.append(parm.value);
                start = parm.position+1;
            } else {
                buf.append(query, start, parm.position);
                result[r++] = new Parameter(buf.length(), parm.value);
                start = parm.position;
            }
        }
        buf.append(query, start, query.length());
        return new ParameterizedQuery(buf.toString(), result);
    }

    private static Parameter[] keep(Parameter[] params, Parameter[] ref,
            boolean[] diff) {
        Parameter[] result = new Parameter[ref.length];
        int j = 0;
        for (int i = 0; i < params.length; ++i) {
            if (diff[i]) {
                result[j] = new Parameter(ref[j].position, params[i].value);
                ++j;
            }
        }
        return result;
    }
}
Run Code Online (Sandbox Code Playgroud)

输出是:

select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName=? inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
    1: 4
    2: 'test'
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName=? inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
    1: 6
    2: 'test'
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
    1: 4
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
    1: 6
Run Code Online (Sandbox Code Playgroud)