Skip to content

Commit

Permalink
tuple length check
Browse files Browse the repository at this point in the history
  • Loading branch information
samukweku committed Feb 1, 2023
1 parent 0f119ad commit d3b09ad
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 110 deletions.
45 changes: 17 additions & 28 deletions janitor/functions/mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,40 +103,29 @@ def mutate(

args_to_process = []
for num, arg in enumerate(args):
check(f"Argument {num} in the mutate function", arg, [tuple])
if len(arg) < 2:
raise ValueError(
f"Argument {num} should have a minimum length of 2, "
f"instead got {len(arg)}"
)
if len(arg) > 3:
raise ValueError(
f"Argument {num} should have a maximum length of 3, "
f"instead got {len(arg)}"
)
entry = SD(*arg)
func = entry.func
names = entry.names_glue
check(
f"The function (position 1 in the tuple) for argument {num} ",
func,
[str, callable, list, tuple],
)
if isinstance(func, (list, tuple)):
for number, funcn in enumerate(func):
check(f"Argument {num} in the mutate function", arg, [dict, tuple])
if isinstance(arg, dict):
for col, func in arg.items():
check(
f"Entry {number} in the function sequence "
f"for argument {num}",
funcn,
[str, callable],
)
if names:
check(
f"The names (position 2 in the tuple) for argument {num} ",
names,
[str],
)
args_to_process.append(entry)
if isinstance(func, dict):
for _, funcn in func.items():
check(
f"func in nested dictionary for "
f"{col} in argument {num}",
funcn,
[str, callable],
)
else:
if len(arg) != 3:
raise ValueError(
f"The tuple length of Argument {num} should be 3, "
f"instead got {len(arg)}"
)

by_is_true = by is not None
grp = None
Expand Down
43 changes: 2 additions & 41 deletions janitor/functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,47 +670,8 @@ class SD(NamedTuple):
Subset of Data.
Used in `mutate` and `summarize`
for computation on multiple columns
!!! info "New in version 0.25.0"
"""

columns: Any
func: Optional[Union[str, Callable, list, tuple]]
names_glue: Optional[str] = None


def _process_SD(df, arg):
"""
process SD for use in `mutate` or `summarize`
"""
columns = arg.columns
func = arg.func
names = arg.names_glue
columns = _select_index([columns], df, axis="columns")
columns = df.columns[columns]
if not isinstance(func, (list, tuple)):
func = [func]
func_names = [
funcn.__name__ if callable(funcn) else funcn for funcn in func
]
counts = None
dupes = set()
if len(func) > 1:
counts = Counter(func_names)
counts = {key: 0 for key, value in counts.items() if value > 1}
# deal with duplicate function names
if counts:
func_list = []
for funcn in func_names:
if funcn in counts:
if names:
name = f"{funcn}{counts[funcn]}"
else:
name = f"{counts[funcn]}"
dupes.add(name)
func_list.append(name)
counts[funcn] += 1
else:
func_list.append(funcn)
func_names = func_list
counts = None
return columns, names, zip(func_names, func), dupes
func: Union[str, Callable, list]
names: Optional[str] = None
49 changes: 8 additions & 41 deletions tests/functions/test_mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ def test_empty_args(dataframe):


@pytest.mark.functions
def test_type_args(dataframe):
"""Raise if arg is not a tuple"""
def test_dict_args_error(dataframe):
"""Raise if arg is not a dict/tuple"""
with pytest.raises(TypeError, match="Argument 0 in the mutate function.+"):
dataframe.mutate({"a": "sum"})

Expand All @@ -28,50 +28,17 @@ def test_tuple_length_error_max(dataframe):


@pytest.mark.functions
def test_tuple_length_error_min(dataframe):
"""Raise if length of tuple is < 2"""
def test_tuple_length_error(dataframe):
"""Raise if length of tuple is not 3"""
with pytest.raises(
ValueError, match=r"Argument 0 should have a minimum length of 2.+"
ValueError, match="The tuple length of Argument 0 should be 3,.+"
):
dataframe.mutate(("a",))
dataframe.mutate(("a", "sum"))


@pytest.mark.functions
def test_tuple_name_error(dataframe):
"""Raise if name is provided, and is not a string"""
with pytest.raises(
TypeError,
match=r"The names \(position 2 in the tuple\) for argument 0.+",
):
dataframe.mutate(("a", "sum", 1))


@pytest.mark.functions
def test_tuple_func_error(dataframe):
"""Raise if func is not a string/callable/list/tuple"""
with pytest.raises(
TypeError,
match=r"The function \(position 1 in the tuple\) for argument 0.+",
):
dataframe.mutate(("a", 1, "name"))


@pytest.mark.functions
def test_tuple_func_seq_error(dataframe):
"""Raise if func is a list/tuple, and its content is not str/callable"""
with pytest.raises(
TypeError, match=r"Entry 1 in the function sequence for argument 0.+"
):
dataframe.mutate(("a", [np.sum, 1], "name"))


args = [("a", lambda f: np.sqrt(f)), ("a", "sqrt"), ("a", np.sqrt)]


@pytest.mark.parametrize("test_input", args)
@pytest.mark.functions
def test_args_various(dataframe, test_input):
"""Test output for various arguments"""
def test_dict_str(dataframe):
"""Test output for dict"""
expected = dataframe.assign(a=dataframe.a.transform("sqrt"))
actual = dataframe.mutate(test_input)
assert_frame_equal(expected, actual)
Expand Down

0 comments on commit d3b09ad

Please sign in to comment.