From 0b7e275de273ff9e04b0e046f44590b97ae1b99d Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 13 Mar 2022 09:31:04 +0000 Subject: [PATCH 1/5] use bfill only for coalesce --- janitor/functions/coalesce.py | 39 ++++++++++++++++++++++------------- midpoint.csv | 3 +++ 2 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 midpoint.csv diff --git a/janitor/functions/coalesce.py b/janitor/functions/coalesce.py index 928c814b9..37a2c3299 100644 --- a/janitor/functions/coalesce.py +++ b/janitor/functions/coalesce.py @@ -17,14 +17,18 @@ def coalesce( ) -> pd.DataFrame: """Coalesce two or more columns of data in order of column names provided. - Given the list of column names, `coalesce` finds and returns the first - non-missing value from these columns, for every row in the input dataframe. - If all the column values are null for a particular row, then the - `default_value` will be filled in. + Given the variable arguments of column names, + `coalesce` finds and returns the first non-missing value + from these columns, for every row in the input dataframe. + If all the column values are null for a particular row, + then the `default_value` will be filled in. + + If `target_column_name` is not provided, + then the first column is coalesced. This method does not mutate the original DataFrame. - Example: Using `coalesce` with 3 columns, "a", "b" and "c". + Example: Use `coalesce` with 3 columns, "a", "b" and "c". >>> import pandas as pd >>> import numpy as np @@ -34,13 +38,21 @@ def coalesce( ... "b": [2, 3, np.nan], ... "c": [4, np.nan, np.nan], ... }) + >>> df.coalesce("a", "b", "c") + a b c + 0 2.0 2.0 4.0 + 1 1.0 3.0 NaN + 2 NaN NaN NaN + + Example: Provide a target_column_name. + >>> df.coalesce("a", "b", "c", target_column_name="new_col") a b c new_col 0 NaN 2.0 4.0 2.0 1 1.0 3.0 NaN 1.0 2 NaN NaN NaN NaN - Example: Providing a default value. + Example: Provide a default value. >>> import pandas as pd >>> import numpy as np @@ -93,14 +105,13 @@ def coalesce( if target_column_name is None: target_column_name = column_names[0] - # bfill/ffill combo is faster than combine_first - outcome = ( - df.filter(column_names) - .bfill(axis="columns") - .ffill(axis="columns") - .iloc[:, 0] - ) + + outcome = df.filter(column_names).bfill(axis="columns").iloc[:, 0] if outcome.hasnans and (default_value is not None): outcome = outcome.fillna(default_value) - return df.assign(**{target_column_name: outcome}) + # to allow for non-strings ... GH #1016 + df = df.copy() + df[target_column_name] = outcome + + return df diff --git a/midpoint.csv b/midpoint.csv new file mode 100644 index 000000000..82efc3cf9 --- /dev/null +++ b/midpoint.csv @@ -0,0 +1,3 @@ +,a_new,b +1,2,b +2,3,c From dd11ced06e5c5594b7a1fddec858c1ea4577de22 Mon Sep 17 00:00:00 2001 From: samukweku Date: Sun, 13 Mar 2022 09:31:16 +0000 Subject: [PATCH 2/5] use bfill only for coalesce --- midpoint.csv | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 midpoint.csv diff --git a/midpoint.csv b/midpoint.csv deleted file mode 100644 index 82efc3cf9..000000000 --- a/midpoint.csv +++ /dev/null @@ -1,3 +0,0 @@ -,a_new,b -1,2,b -2,3,c From 302653d97383b67658f0cc27cfb42009cde7ca79 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sun, 13 Mar 2022 23:04:58 +1100 Subject: [PATCH 3/5] Update coalesce.py --- janitor/functions/coalesce.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/janitor/functions/coalesce.py b/janitor/functions/coalesce.py index 37a2c3299..a9568bac7 100644 --- a/janitor/functions/coalesce.py +++ b/janitor/functions/coalesce.py @@ -110,8 +110,6 @@ def coalesce( if outcome.hasnans and (default_value is not None): outcome = outcome.fillna(default_value) - # to allow for non-strings ... GH #1016 - df = df.copy() - df[target_column_name] = outcome + return df.assign(**{target_column_name: outcome}) + - return df From 6734963efc68edf0b884580ed5887eb87f7c0219 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 13 Mar 2022 12:05:32 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- janitor/functions/coalesce.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/janitor/functions/coalesce.py b/janitor/functions/coalesce.py index a9568bac7..d0ac070cf 100644 --- a/janitor/functions/coalesce.py +++ b/janitor/functions/coalesce.py @@ -111,5 +111,3 @@ def coalesce( outcome = outcome.fillna(default_value) return df.assign(**{target_column_name: outcome}) - - From e819beda7ed29c91a232e3615ef5c18ded05e96e Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sun, 13 Mar 2022 23:06:54 +1100 Subject: [PATCH 5/5] Update coalesce.py