Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-51303] [SQL] [TESTS] Extend ORDER BY testing coverage #50069

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,14 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"fragment" : "all"
} ]
}


-- !query
SELECT all, (SELECT col2 FROM VALUES (2, 2) ORDER BY ALL) FROM VALUES (1) AS t1 (all)
-- !query analysis
Project [all#x, scalar-subquery#x [] AS scalarsubquery()#x]
: +- Sort [col2#x ASC NULLS FIRST], true
: +- Project [col2#x]
: +- LocalRelation [col1#x, col2#x]
+- SubqueryAlias t1
+- LocalRelation [all#x]
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
-- Automatically generated by SQLQueryTestSuite
-- !query
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
AS testData(a, b)
-- !query analysis
CreateViewCommand `testData`, SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT a FROM testData ORDER BY a
-- !query analysis
Sort [a#x ASC NULLS FIRST], true
+- Project [a#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT a FROM testData GROUP BY a, b ORDER BY a
-- !query analysis
Sort [a#x ASC NULLS FIRST], true
+- Aggregate [a#x, b#x], [a#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT b FROM testData WHERE a > 1 ORDER BY a
-- !query analysis
Project [b#x]
+- Sort [a#x ASC NULLS FIRST], true
+- Project [b#x, a#x]
+- Filter (a#x > 1)
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT b FROM testData GROUP BY a, b ORDER BY a
-- !query analysis
Project [b#x]
+- Sort [a#x ASC NULLS FIRST], true
+- Aggregate [a#x, b#x], [b#x, a#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT col1 FROM VALUES (1, named_struct('f1', 1)) ORDER BY col2.f1
-- !query analysis
Project [col1#x]
+- Sort [col2#x.f1 ASC NULLS FIRST], true
+- Project [col1#x, col2#x]
+- LocalRelation [col1#x, col2#x]


-- !query
SELECT col1 FROM VALUES (1, named_struct('f1', named_struct('f2', 1))) ORDER BY col2.f1.f2
-- !query analysis
Project [col1#x]
+- Sort [col2#x.f1.f2 ASC NULLS FIRST], true
+- Project [col1#x, col2#x]
+- LocalRelation [col1#x, col2#x]


-- !query
SELECT a, (SELECT b FROM testData GROUP BY b HAVING b > 1 ORDER BY a) FROM testData
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
"sqlState" : "0A000",
"messageParameters" : {
"sqlExprs" : "\"a ASC NULLS FIRST\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 59,
"stopIndex" : 68,
"fragment" : "ORDER BY a"
} ]
}


-- !query
SELECT a, (SELECT b FROM VALUES (1, 2) AS innerTestData (all, b) ORDER BY ALL) FROM testData
-- !query analysis
Project [a#x, scalar-subquery#x [] AS scalarsubquery()#x]
: +- Project [b#x]
: +- Sort [ALL#x ASC NULLS FIRST], true
: +- Project [b#x, ALL#x]
: +- SubqueryAlias innerTestData
: +- LocalRelation [all#x, b#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT * FROM testData ORDER BY (SELECT a FROM testData ORDER BY b)
-- !query analysis
Sort [scalar-subquery#x [] ASC NULLS FIRST], true
: +- Project [a#x]
: +- Sort [b#x ASC NULLS FIRST], true
: +- Project [a#x, b#x]
: +- SubqueryAlias testdata
: +- View (`testData`, [a#x, b#x])
: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
: +- Project [a#x, b#x]
: +- SubqueryAlias testData
: +- LocalRelation [a#x, b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT * FROM testData ORDER BY (SELECT * FROM testData ORDER BY (SELECT a FROM testData ORDER BY b))
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
"sqlState" : "42823",
"messageParameters" : {
"number" : "2"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 33,
"stopIndex" : 101,
"fragment" : "(SELECT * FROM testData ORDER BY (SELECT a FROM testData ORDER BY b))"
} ]
}


-- !query
SELECT * FROM testData ORDER BY (SELECT a FROM VALUES (1))
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_SCALAR_SUBQUERY",
"sqlState" : "0A000",
"messageParameters" : {
"treeNode" : "Sort [scalar-subquery#x [a#x] ASC NULLS FIRST], true\n: +- Project [outer(a#x)]\n: +- LocalRelation [col1#x]\n+- Project [a#x, b#x]\n +- SubqueryAlias testdata\n +- View (`testData`, [a#x, b#x])\n +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n +- Project [a#x, b#x]\n +- SubqueryAlias testData\n +- LocalRelation [a#x, b#x]\n"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 24,
"stopIndex" : 58,
"fragment" : "ORDER BY (SELECT a FROM VALUES (1))"
} ]
}


-- !query
DROP VIEW IF EXISTS testData
-- !query analysis
DropTempViewCommand testData
3 changes: 3 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ select * from values("z", 1), ("y", 2), ("x", 3) AS T(col1, all) order by all;
-- shouldn't work in window functions
select name, dept, rank() over (partition by dept order by all) as rank
from values('Lisa', 'Sales', 10000, 35) as T(name, dept, salary, age);

-- ORDER BY column can't reference an outer scope
SELECT all, (SELECT col2 FROM VALUES (2, 2) ORDER BY ALL) FROM VALUES (1) AS t1 (all)
34 changes: 34 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/order-by.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
AS testData(a, b);

-- ORDER BY a column from a child's output
SELECT a FROM testData ORDER BY a;
SELECT a FROM testData GROUP BY a, b ORDER BY a;

-- ORDER BY a column from an output below a child's one
SELECT b FROM testData WHERE a > 1 ORDER BY a;

-- ORDER BY a column from grouping expressions list
SELECT b FROM testData GROUP BY a, b ORDER BY a;

-- ORDER BY a nested column from an output below a child's one
SELECT col1 FROM VALUES (1, named_struct('f1', 1)) ORDER BY col2.f1;
SELECT col1 FROM VALUES (1, named_struct('f1', named_struct('f2', 1))) ORDER BY col2.f1.f2;

-- ORDER BY column can't reference an outer scope
SELECT a, (SELECT b FROM testData GROUP BY b HAVING b > 1 ORDER BY a) FROM testData;

-- Column resolution from the child's output takes the precedence over `ORDER BY ALL`
SELECT a, (SELECT b FROM VALUES (1, 2) AS innerTestData (all, b) ORDER BY ALL) FROM testData;

-- ORDER BY with scalar subqueries
SELECT * FROM testData ORDER BY (SELECT a FROM testData ORDER BY b);
SELECT * FROM testData ORDER BY (SELECT * FROM testData ORDER BY (SELECT a FROM testData ORDER BY b));

-- Fails because correlation is not allowed in ORDER BY
SELECT * FROM testData ORDER BY (SELECT a FROM VALUES (1));

-- Clean up
DROP VIEW IF EXISTS testData;
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"fragment" : "all"
} ]
}


-- !query
SELECT all, (SELECT col2 FROM VALUES (2, 2) ORDER BY ALL) FROM VALUES (1) AS t1 (all)
-- !query schema
struct<all:int,scalarsubquery():int>
-- !query output
1 2
Loading