diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b4aa6447c0a1b..565cd1c54d42c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -703,6 +703,7 @@ Indexing - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) +- Bug in :meth:`Series.mask` unexpectedly filling ``pd.NA`` (:issue:`60729`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c3f535df9ce2..f0fc8fa4d695f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -28,6 +28,7 @@ from pandas._libs import lib from pandas._libs.lib import is_range_indexer +from pandas._libs.missing import NA from pandas._libs.tslibs import ( Period, Timestamp, @@ -9701,6 +9702,7 @@ def _where( # align the cond to same shape as myself cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): + cond = cond.fillna(True) # CoW: Make sure reference is not kept alive if cond.ndim == 1 and self.ndim == 2: cond = cond._constructor_expanddim( @@ -9715,6 +9717,7 @@ def _where( if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) + cond = cond.fillna(True) # make sure we are boolean fill_value = bool(inplace) @@ -10097,6 +10100,16 @@ def mask( if not hasattr(cond, "__invert__"): cond = np.array(cond) + if isinstance(cond, np.ndarray): + if all( + x is NA or isinstance(x, (np.bool_, bool)) or x is np.nan + for x in cond.flatten() + ): + if not cond.flags.writeable: + cond.setflags(write=True) + cond[isna(cond)] = False + cond = cond.astype(bool) + return self._where( ~cond, other=other, diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 3c21cd0d5ca64..3bfade287bd05 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import Series import pandas._testing as tm @@ -67,3 +69,26 @@ def test_mask_inplace(): rs = s.copy() rs.mask(cond, -s, inplace=True) tm.assert_series_equal(rs, s.mask(cond, -s)) + + +@pytest.mark.parametrize( + "dtype", + [ + "Int64", + pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")), + ], +) +def test_mask_na(dtype): + # We should not be filling pd.NA. See GH#60729 + series = Series([None, 1, 2, None, 3, 4, None], dtype=dtype) + cond = series <= 2 + expected = Series([None, -99, -99, None, 3, 4, None], dtype=dtype) + + result = series.mask(cond, -99) + tm.assert_series_equal(result, expected) + + result = series.mask(cond.to_list(), -99) + tm.assert_series_equal(result, expected) + + result = series.mask(cond.to_numpy(), -99) + tm.assert_series_equal(result, expected)