Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rmspe test stat #22

Merged
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ in your terminal.
1. Follow steps 1-3 from `For Users`
2. Create a hatch environment `hatch env create`
3. Open a hatch shell `hatch shell`
4. Validate your installation by running `hatch run tests:test`
4. Validate your installation by running `hatch run dev:test`
2 changes: 1 addition & 1 deletion src/causal_validation/__about__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = "0.0.7"
__version__ = "0.0.8"

__all__ = ["__version__"]
13 changes: 11 additions & 2 deletions src/causal_validation/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Dataset:
yte: Float[np.ndarray, "M 1"]
_start_date: dt.date
counterfactual: tp.Optional[Float[np.ndarray, "M 1"]] = None
synthetic: tp.Optional[Float[np.ndarray, "M 1"]] = None
_name: str = None

def to_df(
Expand Down Expand Up @@ -151,7 +152,13 @@ def drop_unit(self, idx: int) -> Dataset:
Xtr = np.delete(self.Xtr, [idx], axis=1)
Xte = np.delete(self.Xte, [idx], axis=1)
return Dataset(
Xtr, Xte, self.ytr, self.yte, self._start_date, self.counterfactual
Xtr,
Xte,
self.ytr,
self.yte,
self._start_date,
self.counterfactual,
self.synthetic,
)

def to_placebo_data(self, to_treat_idx: int) -> Dataset:
Expand Down Expand Up @@ -204,4 +211,6 @@ def reassign_treatment(
) -> Dataset:
Xtr = data.Xtr
Xte = data.Xte
return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
return Dataset(
Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
)
10 changes: 9 additions & 1 deletion src/causal_validation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
class Result:
effect: Effect
counterfactual: Float[NPArray, "N 1"]
synthetic: Float[NPArray, "N 1"]
observed: Float[NPArray, "N 1"]


@dataclass
class AZCausalWrapper:
model: Estimator
error_estimator: tp.Optional[Error] = None
_az_result: Result = None
_az_result: _Result = None

def __post_init__(self):
self._model_name = self.model.__class__.__name__
Expand All @@ -37,6 +38,7 @@ def __call__(self, data: Dataset, **kwargs) -> Result:
res = Result(
effect=result.effect,
counterfactual=self.counterfactual,
synthetic=self.synthetic,
observed=self.observed,
)
return res
Expand All @@ -47,6 +49,12 @@ def counterfactual(self) -> Float[NPArray, "N 1"]:
c_factual = df.loc[:, "CF"].values.reshape(-1, 1)
return c_factual

@property
def synthetic(self) -> Float[NPArray, "N 1"]:
df = self._az_result.effect.by_time
synth_control = df.loc[:, "C"].values.reshape(-1, 1)
return synth_control

@property
def observed(self) -> Float[NPArray, "N 1"]:
df = self._az_result.effect.by_time
Expand Down
3 changes: 2 additions & 1 deletion src/causal_validation/transforms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from causal_validation.transforms.noise import Noise
from causal_validation.transforms.periodic import Periodic
from causal_validation.transforms.trends import Trend

__all__ = ["Trend", "Periodic"]
__all__ = ["Trend", "Periodic", "Noise"]
8 changes: 6 additions & 2 deletions src/causal_validation/transforms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def apply_values(
ytr = ytr + pre_intervention_vals[:, :1]
Xte = Xte + post_intervention_vals[:, 1:]
yte = yte + post_intervention_vals[:, :1]
return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
return Dataset(
Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
)


@dataclass(kw_only=True)
Expand All @@ -91,4 +93,6 @@ def apply_values(
ytr = ytr * pre_intervention_vals
Xte = Xte * post_intervention_vals
yte = yte * post_intervention_vals
return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
return Dataset(
Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
)
30 changes: 30 additions & 0 deletions src/causal_validation/transforms/noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataclasses import dataclass
from typing import Tuple

from jaxtyping import Float
import numpy as np
from scipy.stats import norm

from causal_validation.data import Dataset
from causal_validation.transforms.base import AdditiveTransform
from causal_validation.transforms.parameter import TimeVaryingParameter


@dataclass(kw_only=True)
class Noise(AdditiveTransform):
"""
Transform the treatment by adding TimeVaryingParameter noise terms sampled from
a specified sampling distribution. By default, the sampling distribution is
Normal with 0 loc and 0.1 scale.
"""

noise_dist: TimeVaryingParameter = TimeVaryingParameter(sampling_dist=norm(0, 0.1))
_slots: Tuple[str] = ("noise_dist",)

def get_values(self, data: Dataset) -> Float[np.ndarray, "N D"]:
noise = np.zeros((data.n_timepoints, data.n_units + 1))
noise_treatment = self.noise_dist.get_value(
n_units=1, n_timepoints=data.n_timepoints
).reshape(-1)
noise[:, 0] = noise_treatment
return noise
21 changes: 2 additions & 19 deletions src/causal_validation/validation/placebo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@
Column,
DataFrameSchema,
)
from rich import box
from rich.progress import (
Progress,
ProgressBar,
track,
)
from rich.table import Table
from scipy.stats import ttest_1samp
from tqdm import (
tqdm,
Expand All @@ -30,6 +28,7 @@
AZCausalWrapper,
Result,
)
from causal_validation.validation.testing import TestResultFrame

PlaceboSchema = DataFrameSchema(
{
Expand All @@ -46,13 +45,12 @@


@dataclass
class PlaceboTestResult:
class PlaceboTestResult(TestResultFrame):
effects: tp.Dict[tp.Tuple[str, str], tp.List[Result]]

def _model_to_df(
self, model_name: str, dataset_name: str, effects: tp.List[Result]
) -> pd.DataFrame:
breakpoint()
_effects = [e.effect.percentage().value for e in effects]
_n_effects = len(_effects)
expected_effect = np.mean(_effects)
Expand All @@ -79,21 +77,6 @@ def to_df(self) -> pd.DataFrame:
PlaceboSchema.validate(df)
return df

def summary(self, precision: int = 4) -> Table:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

placeholder: I assume this method is migrated elsewhere

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, a new parent class TestResultFrame is created and it owns the summary method. PlaceboTestResult and RMSPETestResult both utilize this method as subclasses.

table = Table(show_header=True, box=box.MARKDOWN)
df = self.to_df()
numeric_cols = df.select_dtypes(include=[np.number])
df.loc[:, numeric_cols.columns] = np.round(numeric_cols, decimals=precision)

for column in df.columns:
table.add_column(str(column), style="magenta")

for _, value_list in enumerate(df.values.tolist()):
row = [str(x) for x in value_list]
table.add_row(*row)

return table


@dataclass
class PlaceboTest:
Expand Down
133 changes: 133 additions & 0 deletions src/causal_validation/validation/rmspe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from dataclasses import dataclass
import typing as tp

from jaxtyping import Float
import numpy as np
import pandas as pd
from pandera import (
Check,
Column,
DataFrameSchema,
)
from rich import box
from rich.progress import (
Progress,
ProgressBar,
track,
)

from causal_validation.validation.placebo import PlaceboTest
from causal_validation.validation.testing import (
RMSPETestStatistic,
TestResult,
TestResultFrame,
)

RMSPESchema = DataFrameSchema(
{
"Model": Column(str),
"Dataset": Column(str),
"Test statistic": Column(float, coerce=True),
"p-value": Column(
float,
checks=[
Check.greater_than_or_equal_to(0.0),
Check.less_than_or_equal_to(1.0),
],
coerce=True,
),
}
)


@dataclass
class RMSPETestResult(TestResultFrame):
"""
A subclass of TestResultFrame, RMSPETestResult stores test statistics and p-value
for the treated unit. Test statistics for pseudo treatment units are also stored.
"""

treatment_test_results: tp.Dict[tp.Tuple[str, str], TestResult]
pseudo_treatment_test_statistics: tp.Dict[tp.Tuple[str, str], tp.List[Float]]

def to_df(self) -> pd.DataFrame:
dfs = []
for (model, dataset), test_results in self.treatment_test_results.items():
result = {
"Model": model,
"Dataset": dataset,
"Test statistic": test_results.test_statistic,
"p-value": test_results.p_value,
}
df = pd.DataFrame([result])
dfs.append(df)
df = pd.concat(dfs)
RMSPESchema.validate(df)
return df


@dataclass
class RMSPETest(PlaceboTest):
"""
A subclass of PlaceboTest calculates RMSPE as test statistic for all units.
Given the RMSPE test stats, p-value for actual treatment is calculated.
"""

def execute(self, verbose: bool = True) -> RMSPETestResult:
treatment_results, pseudo_treatment_results = {}, {}
datasets = self.dataset_dict
n_datasets = len(datasets)
n_control = sum([d.n_units for d in datasets.values()])
rmspe = RMSPETestStatistic()
with Progress(disable=not verbose) as progress:
model_task = progress.add_task(
"[red]Models", total=len(self.models), visible=verbose
)
data_task = progress.add_task(
"[blue]Datasets", total=n_datasets, visible=verbose
)
unit_task = progress.add_task(
f"[green]Treatment and Control Units",
total=n_control + 1,
visible=verbose,
)
for data_name, dataset in datasets.items():
progress.update(data_task, advance=1)
for model in self.models:
progress.update(unit_task, advance=1)
treatment_result = model(dataset)
treatment_idx = dataset.ytr.shape[0]
treatment_test_stat = rmspe(
dataset,
treatment_result.counterfactual,
treatment_result.synthetic,
treatment_idx,
)
progress.update(model_task, advance=1)
placebo_test_stats = []
for i in range(dataset.n_units):
progress.update(unit_task, advance=1)
placebo_data = dataset.to_placebo_data(i)
result = model(placebo_data)
placebo_test_stats.append(
rmspe(
placebo_data,
result.counterfactual,
result.synthetic,
treatment_idx,
)
)
pval_idx = 1
for p_stat in placebo_test_stats:
pval_idx += 1 if treatment_test_stat < p_stat else 0
pval = pval_idx / (n_control + 1)
treatment_results[(model._model_name, data_name)] = TestResult(
p_value=pval, test_statistic=treatment_test_stat
)
pseudo_treatment_results[(model._model_name, data_name)] = (
placebo_test_stats
)
return RMSPETestResult(
treatment_test_results=treatment_results,
pseudo_treatment_test_statistics=pseudo_treatment_results,
)
Loading
Loading