|
17 | 17 | import warnings
|
18 | 18 | from abc import ABC, abstractmethod
|
19 | 19 | from enum import Enum
|
20 |
| -from typing import Dict, Optional, cast, Iterable, TYPE_CHECKING, List |
| 20 | +from typing import Dict, Optional, TypeVar, cast, Iterable, TYPE_CHECKING, List |
21 | 21 |
|
| 22 | +from pyspark.errors.exceptions.tblib import Traceback |
22 | 23 | from pyspark.errors.utils import ErrorClassesReader
|
23 | 24 | from pyspark.logger import PySparkLogger
|
24 | 25 | from pickle import PicklingError
|
|
27 | 28 | from pyspark.sql.types import Row
|
28 | 29 |
|
29 | 30 |
|
| 31 | +T = TypeVar("T", bound="PySparkException") |
| 32 | + |
| 33 | + |
30 | 34 | class PySparkException(Exception):
|
31 | 35 | """
|
32 | 36 | Base Exception for handling errors generated from PySpark.
|
@@ -449,3 +453,30 @@ def summary(self) -> str:
|
449 | 453 | Summary of the exception cause.
|
450 | 454 | """
|
451 | 455 | ...
|
| 456 | + |
| 457 | + |
| 458 | +def recover_python_exception(e: T) -> T: |
| 459 | + """ |
| 460 | + Recover Python exception stack trace. |
| 461 | +
|
| 462 | + Many JVM exceptions types may wrap Python exceptions. For example: |
| 463 | + - UDFs can cause PythonException |
| 464 | + - UDTFs and Data Sources can cause AnalysisException |
| 465 | + """ |
| 466 | + python_exception_header = "Traceback (most recent call last):" |
| 467 | + try: |
| 468 | + message = str(e) |
| 469 | + start = message.find(python_exception_header) |
| 470 | + if start == -1: |
| 471 | + # No Python exception found |
| 472 | + return e |
| 473 | + |
| 474 | + # The message contains a Python exception. Parse it to use it as the exception's traceback. |
| 475 | + # This allows richer error messages, for example showing line content in Python UDF. |
| 476 | + python_exception_string = message[start:] |
| 477 | + tb = Traceback.from_string(python_exception_string) |
| 478 | + tb.populate_linecache() |
| 479 | + return e.with_traceback(tb.as_traceback()) |
| 480 | + except BaseException: |
| 481 | + # Parsing the stacktrace is best effort. |
| 482 | + return e |
0 commit comments