Skip to content

Commit

Permalink
[ENH] Add .also() inspired by Kotlin (pyjanitor-devs#735)
Browse files Browse the repository at this point in the history
* add basic implementation with wip docs

* add also to general_functions page

* add tests for also

* add tests for args and kwargs

* workshop docstring

* workshop func explanation

* add contribution artifacts

* fix docstring codeblock rendering

* fix typo

* silence accepted flake8 errors

* Add docstrings for tests

* Update tests/functions/test_also.py

* make tests pass in python 3.6

* remove tuple unpacking

Co-authored-by: Eric Ma <[email protected]>
  • Loading branch information
sauln and ericmjl authored Sep 10, 2020
1 parent fb56c1d commit 2bd3a20
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 1 deletion.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,4 @@ Contributors
- `@VPerrollaz <https://github.com/VPerrollaz>`_ | `contributions <https://github.com/ericmjl/pyjanitor/issues?q=is%3Aclosed+mentions%3AVPerrollaz>`_
- `@UGuntupalli <https://github.com/UGuntupalli>`_ | `contributions < https://github.com/ericmjl/pyjanitor/issues?q=is%3Aclosed+mentions%3AUGuntupalli >`_
- `@mphirke <https://github.com/mphirke>`_ | `contributions <https://github.com/ericmjl/pyjanitor/issues?q=is%3Aclosed+mentions%3Amphirke>`_
- `@sauln <https://github.com/sauln>`_ | `contributions <https://github.com/ericmjl/pyjanitor/issues?q=is%3Aclosed+mentions%3Asauln`_
3 changes: 2 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
new version (on deck)
=====================
- [ENH] Added function ``sort_timestamps_monotonically`` to timeseries functions @UGuntupalli
- [ENH] Added the complete function for converting implicit missing values
- [ENH] Added the complete function for converting implicit missing values
to explicit ones. @samukweku
- [ENH] Further simplification of expand_grid. @samukweku
- [BUGFIX] Added copy() method to original dataframe, to avoid mutation. Issue #729. @samukweku
- [ENH] Added `also` method for running functions in chain with no return values.

v0.20.9
=======
Expand Down
1 change: 1 addition & 0 deletions docs/reference/general_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ Other
:toctree: janitor.functions/

then
also
shuffle
count_cumulative_unique
sort_naturally
Expand Down
37 changes: 37 additions & 0 deletions janitor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2963,6 +2963,43 @@ def then(df: pd.DataFrame, func: Callable) -> pd.DataFrame:
return df


@pf.register_dataframe_method
def also(df: pd.DataFrame, func: Callable, *args, **kwargs) -> pd.DataFrame:
"""Add an arbitrary function with no return value to run in the
``pyjanitor`` method chain. This returns the input dataframe instead,
not the output of `func`.
This method does not mutate the original DataFrame.
Example usage:
.. code-block:: python
df = (
pd.DataFrame(...)
.query(...)
.also(lambda df: print(f"DataFrame shape is: {df.shape}"))
.transform_column(...)
.also(lambda df: df.to_csv("midpoint.csv"))
.also(
lambda df: print(
f"Column col_name has these values: {set(df['col_name'].unique())}"
)
)
.group_add(...)
)
:param df: A pandas dataframe.
:param func: A function you would like to run in the method chain.
It should take one DataFrame object as a parameter and have no return.
If there is a return, it will be ignored.
:param args, kwargs: Optional arguments and keyword arguments for `func`.
:returns: The input pandas DataFrame.
""" # noqa: E501
func(df.copy(), *args, **kwargs)
return df


@pf.register_dataframe_method
@deprecated_alias(column="column_name")
def dropnotnull(df: pd.DataFrame, column_name: Hashable) -> pd.DataFrame:
Expand Down
83 changes: 83 additions & 0 deletions tests/functions/test_also.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Unit tests for `.also()`."""
from unittest.mock import Mock
import pytest


def remove_first_two_letters_from_col_names(df):
"""Helper function to mutate dataframe by changing column names."""
col_names = df.columns
col_names = [name[2:] for name in col_names]
df.columns = col_names
return df


def remove_rows_3_and_4(df):
"""Helper function to mutate dataframe by removing rows."""
df = df.drop(3, axis=0)
df = df.drop(4, axis=0)
return df


def drop_inplace(df):
"""
Helper function to mutate dataframe by dropping a column.
We usually would not use `inplace=True` in a block,
but the intent here is to test that
the in-place modification of a dataframe
doesn't get passed through in the `.also()` function.
Hence, we tell Flake8 to skip checking `PD002` on that line.
"""
df.drop(columns=[df.columns[0]], inplace=True) # noqa: PD002


@pytest.mark.functions
def test_also_column_manipulation_no_change(dataframe):
"""Test that changed dataframe inside `.also()` doesn't get returned."""
cols = tuple(dataframe.columns)
df = dataframe.also(remove_first_two_letters_from_col_names)
assert dataframe is df
assert cols == tuple(df.columns)


@pytest.mark.functions
def test_also_remove_rows_no_change(dataframe):
"""Test that changed dataframe inside `.also()` doesn't get returned."""
df = dataframe.also(remove_rows_3_and_4)
rows = tuple(df.index)
assert rows == (0, 1, 2, 3, 4, 5, 6, 7, 8)


@pytest.mark.functions
def test_also_runs_function(dataframe):
"""Test that `.also()` executes the function."""
method = Mock(return_value=None)
df = dataframe.also(method)
assert id(df) == id(dataframe)
assert method.call_count == 1


@pytest.mark.functions
def test_also_args(dataframe):
"""Test that the args are passed through to the function."""
method = Mock(return_value=None)
_ = dataframe.also(method, 5)

assert method.call_args[0][1] == 5


@pytest.mark.functions
def test_also_kwargs(dataframe):
"""Test that the kwargs are passed through to the function."""
method = Mock(return_value=None)
_ = dataframe.also(method, n=5)

assert method.call_args[1] == {"n": 5}


@pytest.mark.functions
def test_also_drop_inplace(dataframe):
"""Test that in-place modification of dataframe does not pass through."""
cols = tuple(dataframe.columns)
df = dataframe.also(drop_inplace)
assert tuple(df.columns) == cols

0 comments on commit 2bd3a20

Please sign in to comment.