Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DEPR] deprecate pivot_wider #1263

Merged
merged 6 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
- [INF] Update some `mkdocs` compatibility code. PR #1231 @thatlittleboy
- [INF] Migrated docstring style from Sphinx to Google for better compatibility with `mkdocstrings`. PR #1235 @thatlittleboy
- [INF] Prevent selection of chevrons (`>>>`) and outputs in Example code blocks. PR #1237 @thatlittleboy
- [DEPR] Add deprecation warnings for `process_text`, `rename_column`, `rename_columns`, `filter_on`, `remove_columns`, `fill_direction`. #1045 @samukweku
- [DEPR] Add deprecation warnings for `process_text`, `rename_column`, `rename_columns`, `filter_on`, `remove_columns`, `fill_direction`. Issue #1045 @samukweku
- [ENH] `pivot_longer` now supports named groups where `names_pattern` is a regular expression. A dictionary can now be passed to `names_pattern`, and is internally evaluated as a list/tuple of regular expressions. Issue #1209 @samukweku
- [ENH] Improve selection in `conditional_join`. Issue #1223 @samukweku
- [ENH] Performance improvement for range joins in `conditional_join`, when `use_numba = False`. PR #1256 @samukweku
- [ENH] Add `col` class for selecting columns within an expression. Currently limited to use within `conditional_join`. PR #1260 @samukweku.
- [DEPR] Add deprecation warning for `pivot_wider`. Issue #1045 @samukweku

## [v0.24.0] - 2022-11-12

Expand Down
38 changes: 19 additions & 19 deletions janitor/functions/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
from pandas.api.types import (
is_list_like,
is_categorical_dtype,
is_extension_array_dtype,
)
from pandas.core.dtypes.concat import concat_compat

from janitor.functions.utils import (
get_index_labels,
_computations_expand_grid,
)
from janitor.utils import check
from janitor.utils import check, refactored_function


@pf.register_dataframe_method
Expand Down Expand Up @@ -1123,13 +1124,8 @@ def _base_melt(
reps = len(columns)
outcome = {name: columns.get_level_values(name) for name in columns.names}

# offers a fast route
# while still returning the underlying array
# which could be an extension array
# thus helping in preserving dtypes where possible
if df._mgr.any_extension_types:
values = df._mgr
values = [values.iget_values(i) for i in range(df.columns.size)]
if df.dtypes.map(is_extension_array_dtype).any(axis=None):
values = [arr._values for _, arr in df.items()]
values = concat_compat(values)
else:
values = df._values.ravel(order="F")
Expand Down Expand Up @@ -1168,15 +1164,8 @@ def _pivot_longer_dot_value(
"""
if np.count_nonzero(mapping.columns == ".value") > 1:
outcome = mapping.pop(".value")
out = outcome.iloc[:, 0]
# for loop preferred over agg
# primarily for speed
# if the column is a large array
# direct addition is surprisingly faster than
# the convenient agg(','.join, axis = 1) option
for _, val in outcome.iloc[:, 1:].items():
out += val
mapping[".value"] = out
outcome = outcome.sum(axis=1, numeric_only=False)
mapping.insert(loc=0, column=".value", value=outcome)

exclude = {
word
Expand Down Expand Up @@ -1234,7 +1223,7 @@ def _pivot_longer_dot_value(
indexer = pd.DataFrame(indexer, copy=False)

indexer.columns = columns
df = df.reindex(columns=indexer)
df = df.reindex(columns=indexer, copy=False)
df.columns = df.columns.get_level_values(".value")
values = _dict_from_grouped_names(df=df)
outcome = indexer.loc[indexer[".value"] == outcome[0], other]
Expand Down Expand Up @@ -1286,7 +1275,7 @@ def _headers_single_series(df: pd.DataFrame, mapping: pd.Series) -> tuple:
df.columns = [mapping, positions]
indexer = group_size.index, np.arange(group_max)
indexer = pd.MultiIndex.from_product(indexer)
df = df.reindex(columns=indexer)
df = df.reindex(columns=indexer, copy=False)
df.columns = df.columns.get_level_values(0)
else:
df.columns = mapping
Expand Down Expand Up @@ -1394,6 +1383,12 @@ def _final_frame_longer(


@pf.register_dataframe_method
@refactored_function(
message=(
"This function will be deprecated in a 1.x release. "
"Please use `pd.DataFrame.pivot` instead."
)
)
def pivot_wider(
df: pd.DataFrame,
index: Optional[Union[list, str]] = None,
Expand All @@ -1408,6 +1403,11 @@ def pivot_wider(
) -> pd.DataFrame:
"""Reshapes data from *long* to *wide* form.

!!!note

This function will be deprecated in a 1.x release.
Please use `pd.DataFrame.pivot` instead.

The number of columns are increased, while decreasing
the number of rows. It is the inverse of the
[`pivot_longer`][janitor.functions.pivot.pivot_longer]
Expand Down
1 change: 0 additions & 1 deletion janitor/functions/then.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
message="This function will be deprecated in a 1.x release. "
"Kindly use `pd.DataFrame.pipe` instead."
)
@pf.register_dataframe_method
def then(df: pd.DataFrame, func: Callable) -> pd.DataFrame:
"""Add an arbitrary function to run in the `pyjanitor` method chain.

Expand Down