diff --git a/src/data_morph/data/stats.py b/src/data_morph/data/stats.py index d3c52669..28cbaf6f 100644 --- a/src/data_morph/data/stats.py +++ b/src/data_morph/data/stats.py @@ -5,7 +5,8 @@ import pandas as pd SummaryStatistics = namedtuple( - 'SummaryStatistics', ['x_mean', 'y_mean', 'x_stdev', 'y_stdev', 'correlation'] + 'SummaryStatistics', + ['x_mean', 'y_mean', 'x_med', 'y_med', 'x_stdev', 'y_stdev', 'correlation'], ) SummaryStatistics.__doc__ = ( 'Named tuple containing the summary statistics for plotting/analysis.' @@ -24,12 +25,14 @@ def get_values(df: pd.DataFrame) -> SummaryStatistics: Returns ------- SummaryStatistics - Named tuple consisting of mean and standard deviations of x and y, + Named tuple consisting of mean, median and standard deviations of x and y, along with the Pearson correlation coefficient between the two. """ return SummaryStatistics( df.x.mean(), df.y.mean(), + df.x.median(), + df.y.median(), df.x.std(), df.y.std(), df.corr().x.y, diff --git a/src/data_morph/plotting/static.py b/src/data_morph/plotting/static.py index e91047ec..ffd9c138 100644 --- a/src/data_morph/plotting/static.py +++ b/src/data_morph/plotting/static.py @@ -60,7 +60,7 @@ def plot( res = get_values(df) - labels = ('X Mean', 'Y Mean', 'X SD', 'Y SD', 'Corr.') + labels = ('X Mean', 'Y Mean', 'X Med.', 'Y Med.', 'X SD', 'Y SD', 'Corr.') locs = np.linspace(0.8, 0.2, num=len(labels)) max_label_length = max([len(label) for label in labels]) max_stat = int(np.log10(np.max(np.abs(res)))) + 1 diff --git a/tests/data/test_stats.py b/tests/data/test_stats.py index c99134ed..6b1df53b 100644 --- a/tests/data/test_stats.py +++ b/tests/data/test_stats.py @@ -13,6 +13,8 @@ def test_stats(): assert stats.x_mean == data.x.mean() assert stats.y_mean == data.y.mean() + assert stats.x_med == data.x.median() + assert stats.y_med == data.y.median() assert stats.x_stdev == data.x.std() assert stats.y_stdev == data.y.std() assert stats.correlation == data.corr().x.y