apache
diff --git a/‎LICENSE-binary
+2 b/‎LICENSE-binary
+2
diff --git a/‎dev/.rat-excludes
+1 b/‎dev/.rat-excludes
+1
diff --git a/‎dev/sparktestsupport/modules.py
+2 b/‎dev/sparktestsupport/modules.py
+2
diff --git a/‎python/packaging/client/setup.py
+1 b/‎python/packaging/client/setup.py
+1
diff --git a/‎python/pyspark/errors/exceptions/base.py
+32-1 b/‎python/pyspark/errors/exceptions/base.py
+32-1
diff --git a/‎python/pyspark/errors/exceptions/captured.py
+6 b/‎python/pyspark/errors/exceptions/captured.py
+6
diff --git a/‎python/pyspark/errors/exceptions/connect.py
+11 b/‎python/pyspark/errors/exceptions/connect.py
+11
@@ -445,6 +445,8 @@ jline:jline
 org.jodd:jodd-core
 pl.edu.icm:JLargeArrays
 
+python/pyspark/errors/exceptions/tblib.py
+
 
 BSD 3-Clause
 ------------
 
@@ -48,6 +48,7 @@ jquery.mustache.js
 pyspark-coverage-site/*
 cloudpickle/*
 join.py
+tblib.py
 SparkILoop.scala
 sbt
 sbt-launch-lib.bash
 
@@ -1466,6 +1466,8 @@ def __hash__(self):
     python_test_goals=[
         # unittests
         "pyspark.errors.tests.test_errors",
+        "pyspark.errors.tests.test_traceback",
+        "pyspark.errors.tests.connect.test_parity_traceback",
     ],
 )
 
 
@@ -68,6 +68,7 @@
 test_packages = []
 if "SPARK_TESTING" in os.environ:
     test_packages = [
+        "pyspark.errors.tests.connect",
         "pyspark.tests",  # for Memory profiler parity tests
         "pyspark.resource.tests",
         "pyspark.sql.tests",
 
@@ -17,8 +17,9 @@
 import warnings
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Dict, Optional, cast, Iterable, TYPE_CHECKING, List
+from typing import Dict, Optional, TypeVar, cast, Iterable, TYPE_CHECKING, List
 
+from pyspark.errors.exceptions.tblib import Traceback
 from pyspark.errors.utils import ErrorClassesReader
 from pyspark.logger import PySparkLogger
 from pickle import PicklingError
@@ -27,6 +28,9 @@
     from pyspark.sql.types import Row
 
 
+T = TypeVar("T", bound="PySparkException")
+
+
 class PySparkException(Exception):
     """
     Base Exception for handling errors generated from PySpark.
@@ -449,3 +453,30 @@ def summary(self) -> str:
         Summary of the exception cause.
         """
         ...
+
+
+def recover_python_exception(e: T) -> T:
+    """
+    Recover Python exception stack trace.
+
+    Many JVM exceptions types may wrap Python exceptions. For example:
+    - UDFs can cause PythonException
+    - UDTFs and Data Sources can cause AnalysisException
+    """
+    python_exception_header = "Traceback (most recent call last):"
+    try:
+        message = str(e)
+        start = message.find(python_exception_header)
+        if start == -1:
+            # No Python exception found
+            return e
+
+        # The message contains a Python exception. Parse it to use it as the exception's traceback.
+        # This allows richer error messages, for example showing line content in Python UDF.
+        python_exception_string = message[start:]
+        tb = Traceback.from_string(python_exception_string)
+        tb.populate_linecache()
+        return e.with_traceback(tb.as_traceback())
+    except BaseException:
+        # Parsing the stacktrace is best effort.
+        return e
@@ -37,6 +37,7 @@
     UnknownException as BaseUnknownException,
     QueryContext as BaseQueryContext,
     QueryContextType,
+    recover_python_exception,
 )
 
 if TYPE_CHECKING:
@@ -185,6 +186,11 @@ def getQueryContext(self) -> List[BaseQueryContext]:
 
 
 def convert_exception(e: "Py4JJavaError") -> CapturedException:
+    converted = _convert_exception(e)
+    return recover_python_exception(converted)
+
+
+def _convert_exception(e: "Py4JJavaError") -> CapturedException:
     from pyspark import SparkContext
     from py4j.java_gateway import is_instance_of
 
 
@@ -39,6 +39,7 @@
     StreamingPythonRunnerInitializationException as BaseStreamingPythonRunnerInitException,
     PickleException as BasePickleException,
     UnknownException as BaseUnknownException,
+    recover_python_exception,
 )
 
 if TYPE_CHECKING:
@@ -56,6 +57,16 @@ def convert_exception(
     truncated_message: str,
     resp: Optional[pb2.FetchErrorDetailsResponse],
     display_server_stacktrace: bool = False,
+) -> SparkConnectException:
+    converted = _convert_exception(info, truncated_message, resp, display_server_stacktrace)
+    return recover_python_exception(converted)
+
+
+def _convert_exception(
+    info: "ErrorInfo",
+    truncated_message: str,
+    resp: Optional[pb2.FetchErrorDetailsResponse],
+    display_server_stacktrace: bool = False,
 ) -> SparkConnectException:
     raw_classes = info.metadata.get("classes")
     classes: List[str] = json.loads(raw_classes) if raw_classes else []
Original file line number	Diff line number	Diff line change
`@@ -1466,6 +1466,8 @@ def __hash__(self):`
`1466`	`1466`	`python_test_goals=[`
`1467`	`1467`	`# unittests`
`1468`	`1468`	`"pyspark.errors.tests.test_errors",`
	`1469`	`+ "pyspark.errors.tests.test_traceback",`
	`1470`	`+ "pyspark.errors.tests.connect.test_parity_traceback",`
`1469`	`1471`	`],`
`1470`	`1472`	`)`
`1471`	`1473`