From 65ff67c0ba5b3ba3438d54de1399fe7ad1b16717 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:23:35 -0300 Subject: [PATCH 1/7] refactor: deprecate usage of `cursor.execute` statements in favor of the in class implementation of `execute`. --- pandas/io/sql.py | 61 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5652d7fab0c7c..f4c40ac1b235d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1651,10 +1651,18 @@ def run_transaction(self): def execute(self, sql: str | Select | TextClause, params=None): """Simple passthrough to SQLAlchemy connectable""" + from sqlalchemy.exc import DBAPIError as SQLAlchemyDatabaseError + args = [] if params is None else [params] if isinstance(sql, str): - return self.con.exec_driver_sql(sql, *args) - return self.con.execute(sql, *args) + execute_function = self.con.exec_driver_sql + else: + execute_function = self.con.execute + + try: + return execute_function(sql, *args) + except SQLAlchemyDatabaseError as exc: + raise DatabaseError(f"Execution failed on sql '{sql}': {exc}") from exc def read_table( self, @@ -2108,6 +2116,8 @@ def run_transaction(self): self.con.commit() def execute(self, sql: str | Select | TextClause, params=None): + from adbc_driver_manager import DatabaseError as ADBCDatabaseError + if not isinstance(sql, str): raise TypeError("Query must be a string unless using sqlalchemy.") args = [] if params is None else [params] @@ -2115,10 +2125,10 @@ def execute(self, sql: str | Select | TextClause, params=None): try: cur.execute(sql, *args) return cur - except Exception as exc: + except ADBCDatabaseError as exc: try: self.con.rollback() - except Exception as inner_exc: # pragma: no cover + except ADBCDatabaseError as inner_exc: # pragma: no cover ex = DatabaseError( f"Execution failed on sql: {sql}\n{exc}\nunable to rollback" ) @@ -2207,8 +2217,7 @@ def read_table( else: stmt = f"SELECT {select_list} FROM {table_name}" - with self.con.cursor() as cur: - cur.execute(stmt) + with self.execute(stmt) as cur: pa_table = cur.fetch_arrow_table() df = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend) @@ -2278,8 +2287,7 @@ def read_query( if chunksize: raise NotImplementedError("'chunksize' is not implemented for ADBC drivers") - with self.con.cursor() as cur: - cur.execute(sql) + with self.execute(sql) as cur: pa_table = cur.fetch_arrow_table() df = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend) @@ -2335,6 +2343,9 @@ def to_sql( engine : {'auto', 'sqlalchemy'}, default 'auto' Raises NotImplementedError if not set to 'auto' """ + from adbc_driver_manager import DatabaseError as ADBCDatabaseError + import pyarrow as pa + if index_label: raise NotImplementedError( "'index_label' is not implemented for ADBC drivers" @@ -2364,22 +2375,25 @@ def to_sql( if if_exists == "fail": raise ValueError(f"Table '{table_name}' already exists.") elif if_exists == "replace": - with self.con.cursor() as cur: - cur.execute(f"DROP TABLE {table_name}") + sql_statement = f"DROP TABLE {table_name}" + self.execute(sql_statement).close() elif if_exists == "append": mode = "append" - import pyarrow as pa - try: tbl = pa.Table.from_pandas(frame, preserve_index=index) except pa.ArrowNotImplementedError as exc: raise ValueError("datatypes not supported") from exc with self.con.cursor() as cur: - total_inserted = cur.adbc_ingest( - table_name=name, data=tbl, mode=mode, db_schema_name=schema - ) + try: + total_inserted = cur.adbc_ingest( + table_name=name, data=tbl, mode=mode, db_schema_name=schema + ) + except ADBCDatabaseError as exc: + raise DatabaseError( + f"Failed to insert records on table={name} with {mode=}" + ) from exc self.con.commit() return total_inserted @@ -2496,9 +2510,9 @@ def sql_schema(self) -> str: return str(";\n".join(self.table)) def _execute_create(self) -> None: - with self.pd_sql.run_transaction() as conn: + with self.pd_sql.run_transaction(): for stmt in self.table: - conn.execute(stmt) + self.pd_sql.execute(stmt).close() def insert_statement(self, *, num_rows: int) -> str: names = list(map(str, self.frame.columns)) @@ -2520,8 +2534,13 @@ def insert_statement(self, *, num_rows: int) -> str: return insert_statement def _execute_insert(self, conn, keys, data_iter) -> int: + from sqlite3 import DatabaseError as SQLiteDatabaseError + data_list = list(data_iter) - conn.executemany(self.insert_statement(num_rows=1), data_list) + try: + conn.executemany(self.insert_statement(num_rows=1), data_list) + except SQLiteDatabaseError as exc: + raise DatabaseError("Execution failed") from exc return conn.rowcount def _execute_insert_multi(self, conn, keys, data_iter) -> int: @@ -2643,6 +2662,8 @@ def run_transaction(self): cur.close() def execute(self, sql: str | Select | TextClause, params=None): + from sqlite3 import DatabaseError as SQLiteDatabaseError + if not isinstance(sql, str): raise TypeError("Query must be a string unless using sqlalchemy.") args = [] if params is None else [params] @@ -2650,10 +2671,10 @@ def execute(self, sql: str | Select | TextClause, params=None): try: cur.execute(sql, *args) return cur - except Exception as exc: + except SQLiteDatabaseError as exc: try: self.con.rollback() - except Exception as inner_exc: # pragma: no cover + except SQLiteDatabaseError as inner_exc: # pragma: no cover ex = DatabaseError( f"Execution failed on sql: {sql}\n{exc}\nunable to rollback" ) From 28199e56d326ffd7b968fbaaf3ccf1ab6050d0b4 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:09:34 -0300 Subject: [PATCH 2/7] chore: using cursor from transaction --- pandas/io/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f4c40ac1b235d..8919aa48f1385 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2510,9 +2510,9 @@ def sql_schema(self) -> str: return str(";\n".join(self.table)) def _execute_create(self) -> None: - with self.pd_sql.run_transaction(): + with self.pd_sql.run_transaction() as cur: for stmt in self.table: - self.pd_sql.execute(stmt).close() + cur.execute(stmt) def insert_statement(self, *, num_rows: int) -> str: names = list(map(str, self.frame.columns)) From b44a5d837ba174507d1ceeb7a1bb97d617bbed7c Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Mon, 10 Feb 2025 15:21:31 -0300 Subject: [PATCH 3/7] using base exceptions --- pandas/io/sql.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 8919aa48f1385..f27a59c9a826a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1651,7 +1651,7 @@ def run_transaction(self): def execute(self, sql: str | Select | TextClause, params=None): """Simple passthrough to SQLAlchemy connectable""" - from sqlalchemy.exc import DBAPIError as SQLAlchemyDatabaseError + from sqlalchemy.exc import SQLAlchemyError args = [] if params is None else [params] if isinstance(sql, str): @@ -1661,7 +1661,7 @@ def execute(self, sql: str | Select | TextClause, params=None): try: return execute_function(sql, *args) - except SQLAlchemyDatabaseError as exc: + except SQLAlchemyError as exc: raise DatabaseError(f"Execution failed on sql '{sql}': {exc}") from exc def read_table( @@ -2116,7 +2116,7 @@ def run_transaction(self): self.con.commit() def execute(self, sql: str | Select | TextClause, params=None): - from adbc_driver_manager import DatabaseError as ADBCDatabaseError + from adbc_driver_manager import Error if not isinstance(sql, str): raise TypeError("Query must be a string unless using sqlalchemy.") @@ -2125,10 +2125,10 @@ def execute(self, sql: str | Select | TextClause, params=None): try: cur.execute(sql, *args) return cur - except ADBCDatabaseError as exc: + except Error as exc: try: self.con.rollback() - except ADBCDatabaseError as inner_exc: # pragma: no cover + except Error as inner_exc: # pragma: no cover ex = DatabaseError( f"Execution failed on sql: {sql}\n{exc}\nunable to rollback" ) @@ -2343,7 +2343,7 @@ def to_sql( engine : {'auto', 'sqlalchemy'}, default 'auto' Raises NotImplementedError if not set to 'auto' """ - from adbc_driver_manager import DatabaseError as ADBCDatabaseError + from adbc_driver_manager import Error import pyarrow as pa if index_label: @@ -2390,7 +2390,7 @@ def to_sql( total_inserted = cur.adbc_ingest( table_name=name, data=tbl, mode=mode, db_schema_name=schema ) - except ADBCDatabaseError as exc: + except Error as exc: raise DatabaseError( f"Failed to insert records on table={name} with {mode=}" ) from exc @@ -2534,12 +2534,12 @@ def insert_statement(self, *, num_rows: int) -> str: return insert_statement def _execute_insert(self, conn, keys, data_iter) -> int: - from sqlite3 import DatabaseError as SQLiteDatabaseError + from sqlite3 import Error data_list = list(data_iter) try: conn.executemany(self.insert_statement(num_rows=1), data_list) - except SQLiteDatabaseError as exc: + except Error as exc: raise DatabaseError("Execution failed") from exc return conn.rowcount @@ -2662,7 +2662,7 @@ def run_transaction(self): cur.close() def execute(self, sql: str | Select | TextClause, params=None): - from sqlite3 import DatabaseError as SQLiteDatabaseError + from sqlite3 import Error if not isinstance(sql, str): raise TypeError("Query must be a string unless using sqlalchemy.") @@ -2671,10 +2671,10 @@ def execute(self, sql: str | Select | TextClause, params=None): try: cur.execute(sql, *args) return cur - except SQLiteDatabaseError as exc: + except Error as exc: try: self.con.rollback() - except SQLiteDatabaseError as inner_exc: # pragma: no cover + except Error as inner_exc: # pragma: no cover ex = DatabaseError( f"Execution failed on sql: {sql}\n{exc}\nunable to rollback" ) From 7b6dd972e16de3c04a6821babce0788fd48b08a7 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Tue, 11 Feb 2025 20:44:20 -0300 Subject: [PATCH 4/7] chore: update docs --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 570faa00e97a8..ee07570cdd131 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -76,6 +76,8 @@ Other enhancements - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) +- Refactor classes in ``pandas.io.sql`` to favor their own implementation of ``execute`` instead of relying on driver's ``execute``. +- Refactor classes in ``pandas.io.sql`` to standardize database related exceptions. :class:`.DatabaseError` is raised when an error is encountered at the database driver - context of the original exception is preserved. .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: From 2d07c53de507a8f3ad332479caef1284886ef4f5 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Wed, 12 Feb 2025 08:25:25 -0300 Subject: [PATCH 5/7] chore: update whatsnew --- doc/source/whatsnew/v3.0.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ee07570cdd131..be226e8b2e428 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -76,8 +76,7 @@ Other enhancements - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) -- Refactor classes in ``pandas.io.sql`` to favor their own implementation of ``execute`` instead of relying on driver's ``execute``. -- Refactor classes in ``pandas.io.sql`` to standardize database related exceptions. :class:`.DatabaseError` is raised when an error is encountered at the database driver - context of the original exception is preserved. +- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: From 6f4ce034dd30f5ecf0ff8715880b34e199633f5d Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Wed, 12 Feb 2025 15:31:41 -0300 Subject: [PATCH 6/7] chore: using 'import_optional_dependency' to import pyarrow and changing import order --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f27a59c9a826a..8e75c61e1744d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2343,8 +2343,8 @@ def to_sql( engine : {'auto', 'sqlalchemy'}, default 'auto' Raises NotImplementedError if not set to 'auto' """ + pa = import_optional_dependency("pyarrow") from adbc_driver_manager import Error - import pyarrow as pa if index_label: raise NotImplementedError( From 8b7241a72455a7a2452e46971789f1f86753a0c7 Mon Sep 17 00:00:00 2001 From: Guilherme Martins Crocetti <24530683+gmcrocetti@users.noreply.github.com> Date: Wed, 12 Feb 2025 16:03:00 -0300 Subject: [PATCH 7/7] docs: add issue number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index be226e8b2e428..9fa83e6a10813 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -70,13 +70,13 @@ Other enhancements - :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) +- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) -- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: