From 649e2ccfba863442561430c4ad83a147227b9bc5 Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Fri, 20 Oct 2023 13:02:31 -0700
Subject: [PATCH 1/6] Add a spark-shell smoke test to premerge and nightly

Contributes to #5704

Signed-off-by: Gera Shegalov <gera@apache.org>
---
 integration_tests/run_pyspark_from_build.sh | 13 ++++++++++++-
 jenkins/spark-premerge-build.sh             |  1 +
 jenkins/spark-tests.sh                      |  2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 853fae66316..fa4dffc8073 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -309,7 +309,18 @@ EOF
     fi
     export PYSP_TEST_spark_rapids_memory_gpu_allocSize=${PYSP_TEST_spark_rapids_memory_gpu_allocSize:-'1536m'}
 
-    if ((${#TEST_PARALLEL_OPTS[@]} > 0));
+    SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}"
+    if [[ "$SPARK_SHELL_SMOKE_TEST" != "0" ]]; then
+        echo "Running spark-shell smoke test..."
+        <<< 'spark.range(100).agg(Map("id" -> "sum")).collect()' \
+            "$SPARK_HOME"/bin/spark-shell \
+                --master local-cluster[1,1,1024] \
+                --jars "${PYSP_TEST_spark_jars}" \
+                --conf spark.plugins=com.nvidia.spark.SQLPlugin \
+                --conf spark.deploy.maxExecutorRetries=0 2>/dev/null \
+            | grep -F 'res0: Array[org.apache.spark.sql.Row] = Array([4950])'
+        echo "SUCCESS spark-shell smoke test..."
+    elif ((${#TEST_PARALLEL_OPTS[@]} > 0));
     then
         exec python "${RUN_TESTS_COMMAND[@]}" "${TEST_PARALLEL_OPTS[@]}" "${TEST_COMMON_OPTS[@]}"
     else
diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index 9b509208986..56997695f39 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -88,6 +88,7 @@ mvn_verify() {
 
     # Triggering here until we change the jenkins file
     rapids_shuffle_smoke_test
+    SPARK_SHELL_SMOKE_TEST=1 ./integration_tests/run_pyspark_from_build.sh
 }
 
 rapids_shuffle_smoke_test() {
diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
index 4a062f63871..c3896d05342 100755
--- a/jenkins/spark-tests.sh
+++ b/jenkins/spark-tests.sh
@@ -270,6 +270,8 @@ TEST_MODE=${TEST_MODE:-'DEFAULT'}
 if [[ $TEST_MODE == "DEFAULT" ]]; then
   ./run_pyspark_from_build.sh
 
+  SPARK_SHELL_SMOKE_TEST=1 ./integration_tests/run_pyspark_from_build.sh
+
   # ParquetCachedBatchSerializer cache_test
   PYSP_TEST_spark_sql_cache_serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
     ./run_pyspark_from_build.sh -k cache_test

From e42518bad2c681588d63d13ec2dc9fe49dd91d39 Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Fri, 20 Oct 2023 13:32:53 -0700
Subject: [PATCH 2/6] extra ...

---
 integration_tests/run_pyspark_from_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index fa4dffc8073..7fef5fc945e 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -319,7 +319,7 @@ EOF
                 --conf spark.plugins=com.nvidia.spark.SQLPlugin \
                 --conf spark.deploy.maxExecutorRetries=0 2>/dev/null \
             | grep -F 'res0: Array[org.apache.spark.sql.Row] = Array([4950])'
-        echo "SUCCESS spark-shell smoke test..."
+        echo "SUCCESS spark-shell smoke test"
     elif ((${#TEST_PARALLEL_OPTS[@]} > 0));
     then
         exec python "${RUN_TESTS_COMMAND[@]}" "${TEST_PARALLEL_OPTS[@]}" "${TEST_COMMON_OPTS[@]}"

From dfc7d33744239fca5f035013487d3cccd92e138c Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Fri, 20 Oct 2023 13:39:42 -0700
Subject: [PATCH 3/6] test workings

---
 integration_tests/run_pyspark_from_build.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 7fef5fc945e..fbe8b4fa3f7 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -312,6 +312,12 @@ EOF
     SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}"
     if [[ "$SPARK_SHELL_SMOKE_TEST" != "0" ]]; then
         echo "Running spark-shell smoke test..."
+        # NOTE grep is used not only for checking the output but also
+        # to workaround the fact that spark-shell catches all failures.
+        # In this test it exits not because of the failure but because it encounters
+        # an EOF on stdin and injects a ":quit" command. Without a grep check
+        # the exit code would be success 0 regardless of the exceptions.
+        #
         <<< 'spark.range(100).agg(Map("id" -> "sum")).collect()' \
             "$SPARK_HOME"/bin/spark-shell \
                 --master local-cluster[1,1,1024] \

From 0d06117ce917e93c52a4dd81460824633c097d84 Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Mon, 23 Oct 2023 10:22:46 -0700
Subject: [PATCH 4/6] add shuffle manager for test

Signed-off-by: Gera Shegalov <gera@apache.org>
---
 integration_tests/run_pyspark_from_build.sh | 23 +++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index fbe8b4fa3f7..e31e462e1a8 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -310,8 +310,23 @@ EOF
     export PYSP_TEST_spark_rapids_memory_gpu_allocSize=${PYSP_TEST_spark_rapids_memory_gpu_allocSize:-'1536m'}
 
     SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}"
-    if [[ "$SPARK_SHELL_SMOKE_TEST" != "0" ]]; then
+    if [[ "${SPARK_SHELL_SMOKE_TEST}" != "0" ]]; then
         echo "Running spark-shell smoke test..."
+        SPARK_SHELL_ARGS_ARR=(
+            --master local-cluster[1,2,1024]
+            --conf spark.plugins=com.nvidia.spark.SQLPlugin
+            --conf spark.deploy.maxExecutorRetries=0
+        )
+        if [[ "${PYSP_TEST_spark_shuffle_manager}" != "" ]]; then
+            SPARK_SHELL_ARGS_ARR+=(
+                --conf spark.shuffle.manager="${PYSP_TEST_spark_shuffle_manager}"
+                --driver-class-path "${PYSP_TEST_spark_driver_extraClassPath}"
+                --conf spark.executor.extraClassPath="${PYSP_TEST_spark_driver_extraClassPath}"
+            )
+        else
+            SPARK_SHELL_ARGS_ARR+=(--jars "${PYSP_TEST_spark_jars}")
+        fi
+
         # NOTE grep is used not only for checking the output but also
         # to workaround the fact that spark-shell catches all failures.
         # In this test it exits not because of the failure but because it encounters
@@ -319,11 +334,7 @@ EOF
         # the exit code would be success 0 regardless of the exceptions.
         #
         <<< 'spark.range(100).agg(Map("id" -> "sum")).collect()' \
-            "$SPARK_HOME"/bin/spark-shell \
-                --master local-cluster[1,1,1024] \
-                --jars "${PYSP_TEST_spark_jars}" \
-                --conf spark.plugins=com.nvidia.spark.SQLPlugin \
-                --conf spark.deploy.maxExecutorRetries=0 2>/dev/null \
+            "${SPARK_HOME}"/bin/spark-shell "${SPARK_SHELL_ARGS_ARR[@]}" 2>/dev/null \
             | grep -F 'res0: Array[org.apache.spark.sql.Row] = Array([4950])'
         echo "SUCCESS spark-shell smoke test"
     elif ((${#TEST_PARALLEL_OPTS[@]} > 0));

From 95c9d1059e8cbba7a01cf8e13d980b09c87cdd0c Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Mon, 23 Oct 2023 10:29:23 -0700
Subject: [PATCH 5/6] add shuffle manager for test

Signed-off-by: Gera Shegalov <gera@apache.org>
---
 jenkins/spark-premerge-build.sh | 4 +++-
 jenkins/spark-tests.sh          | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index 56997695f39..194fa4f15f5 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -88,7 +88,9 @@ mvn_verify() {
 
     # Triggering here until we change the jenkins file
     rapids_shuffle_smoke_test
-    SPARK_SHELL_SMOKE_TEST=1 ./integration_tests/run_pyspark_from_build.sh
+    SPARK_SHELL_SMOKE_TEST=1 \
+    PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.$SHUFFLE_SPARK_SHIM.RapidsShuffleManager \
+        ./integration_tests/run_pyspark_from_build.sh
 }
 
 rapids_shuffle_smoke_test() {
diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
index c3896d05342..9ef4d64e2d9 100755
--- a/jenkins/spark-tests.sh
+++ b/jenkins/spark-tests.sh
@@ -270,7 +270,9 @@ TEST_MODE=${TEST_MODE:-'DEFAULT'}
 if [[ $TEST_MODE == "DEFAULT" ]]; then
   ./run_pyspark_from_build.sh
 
-  SPARK_SHELL_SMOKE_TEST=1 ./integration_tests/run_pyspark_from_build.sh
+  SPARK_SHELL_SMOKE_TEST=1 \
+  PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.$SHUFFLE_SPARK_SHIM.RapidsShuffleManager \
+    ./integration_tests/run_pyspark_from_build.sh
 
   # ParquetCachedBatchSerializer cache_test
   PYSP_TEST_spark_sql_cache_serializer=com.nvidia.spark.ParquetCachedBatchSerializer \

From 4f8d7a97dc36a3181f541d82a59ed0442af70479 Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Mon, 23 Oct 2023 10:33:34 -0700
Subject: [PATCH 6/6] add curlies

Signed-off-by: Gera Shegalov <gera@apache.org>
---
 jenkins/spark-premerge-build.sh | 2 +-
 jenkins/spark-tests.sh          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index 194fa4f15f5..15c5166001b 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -89,7 +89,7 @@ mvn_verify() {
     # Triggering here until we change the jenkins file
     rapids_shuffle_smoke_test
     SPARK_SHELL_SMOKE_TEST=1 \
-    PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.$SHUFFLE_SPARK_SHIM.RapidsShuffleManager \
+    PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \
         ./integration_tests/run_pyspark_from_build.sh
 }
 
diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
index 9ef4d64e2d9..e28799c28d4 100755
--- a/jenkins/spark-tests.sh
+++ b/jenkins/spark-tests.sh
@@ -271,7 +271,7 @@ if [[ $TEST_MODE == "DEFAULT" ]]; then
   ./run_pyspark_from_build.sh
 
   SPARK_SHELL_SMOKE_TEST=1 \
-  PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.$SHUFFLE_SPARK_SHIM.RapidsShuffleManager \
+  PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \
     ./integration_tests/run_pyspark_from_build.sh
 
   # ParquetCachedBatchSerializer cache_test