From f3f17cb01a809e5f6299f4fa387c97adeac9126d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 5 Feb 2025 18:38:28 +0100 Subject: [PATCH] [backport 2.3.x] TST (string): from_dummies, dropna (#60818) (#60856) BUG(string): from_dummies, dropna (#60818) (cherry picked from commit ea7ff0ea4606f47a672f75793f4ea2b3eb0b87f5) Co-authored-by: jbrockmendel --- pandas/tests/frame/methods/test_dropna.py | 8 ++++---- pandas/tests/frame/test_arithmetic.py | 14 ++++++++++---- pandas/tests/reshape/test_from_dummies.py | 7 +++---- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 87a43b4e67c3f..0d4a6a065111f 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -184,10 +182,12 @@ def test_dropna_multiple_axes(self): with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") - def test_dropna_tz_aware_datetime(self): + def test_dropna_tz_aware_datetime(self, using_infer_string): # GH13407 + df = DataFrame() + if using_infer_string: + df.columns = df.columns.astype("str") dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) df["Time"] = [dt1] diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index eb85c108ca238..195126f1c5382 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -11,8 +11,7 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - +from pandas.compat import HAS_PYARROW import pandas.util._test_decorators as td import pandas as pd @@ -2128,12 +2127,19 @@ def test_enum_column_equality(): tm.assert_series_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_mixed_col_index_dtype(): +def test_mixed_col_index_dtype(using_infer_string): # GH 47382 df1 = DataFrame(columns=list("abc"), data=1.0, index=[0]) df2 = DataFrame(columns=list("abc"), data=0.0, index=[0]) df1.columns = df2.columns.astype("string") result = df1 + df2 expected = DataFrame(columns=list("abc"), data=1.0, index=[0]) + if using_infer_string: + # df2.columns.dtype will be "str" instead of object, + # so the aligned result will be "string", not object + if HAS_PYARROW: + dtype = "string[pyarrow]" + else: + dtype = "string" + expected.columns = expected.columns.astype(dtype) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py index 6009b263a83c5..59c81c545697a 100644 --- a/pandas/tests/reshape/test_from_dummies.py +++ b/pandas/tests/reshape/test_from_dummies.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( DataFrame, Series, @@ -363,7 +361,6 @@ def test_with_prefix_contains_get_dummies_NaN_column(): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "default_category, expected", [ @@ -400,11 +397,13 @@ def test_with_prefix_contains_get_dummies_NaN_column(): ], ) def test_with_prefix_default_category( - dummies_with_unassigned, default_category, expected + dummies_with_unassigned, default_category, expected, using_infer_string ): result = from_dummies( dummies_with_unassigned, sep="_", default_category=default_category ) + if using_infer_string: + expected = expected.astype("str") tm.assert_frame_equal(result, expected)