Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rmspe test stat #24

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ in your terminal.
1. Follow steps 1-3 from `For Users`
2. Create a hatch environment `hatch env create`
3. Open a hatch shell `hatch shell`
4. Validate your installation by running `hatch run tests:test`
4. Validate your installation by running `hatch run dev:test`
16 changes: 16 additions & 0 deletions docs/examples/placebo_test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,22 @@
"datasets = DatasetContainer([data, complex_data], names=[\"Simple\", \"Complex\"])\n",
"PlaceboTest([model, did_model], datasets).execute().summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "15",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion src/causal_validation/__about__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = "0.0.7"
__version__ = "0.0.8"

__all__ = ["__version__"]
13 changes: 11 additions & 2 deletions src/causal_validation/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Dataset:
yte: Float[np.ndarray, "M 1"]
_start_date: dt.date
counterfactual: tp.Optional[Float[np.ndarray, "M 1"]] = None
synthetic: tp.Optional[Float[np.ndarray, "M 1"]] = None
_name: str = None

def to_df(
Expand Down Expand Up @@ -151,7 +152,13 @@ def drop_unit(self, idx: int) -> Dataset:
Xtr = np.delete(self.Xtr, [idx], axis=1)
Xte = np.delete(self.Xte, [idx], axis=1)
return Dataset(
Xtr, Xte, self.ytr, self.yte, self._start_date, self.counterfactual
Xtr,
Xte,
self.ytr,
self.yte,
self._start_date,
self.counterfactual,
self.synthetic,
)

def to_placebo_data(self, to_treat_idx: int) -> Dataset:
Expand Down Expand Up @@ -204,4 +211,6 @@ def reassign_treatment(
) -> Dataset:
Xtr = data.Xtr
Xte = data.Xte
return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
return Dataset(
Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
)
42 changes: 40 additions & 2 deletions src/causal_validation/models.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
from dataclasses import dataclass
import typing as tp

from azcausal.core.effect import Effect
from azcausal.core.error import Error
from azcausal.core.estimator import Estimator
from azcausal.core.result import Result
from azcausal.core.result import Result as _Result
from jaxtyping import Float

from causal_validation.data import Dataset
from causal_validation.types import NPArray


@dataclass
class Result:
effect: Effect
counterfactual: Float[NPArray, "N 1"]
synthetic: Float[NPArray, "N 1"]
observed: Float[NPArray, "N 1"]


@dataclass
class AZCausalWrapper:
model: Estimator
error_estimator: tp.Optional[Error] = None
_az_result: _Result = None

def __post_init__(self):
self._model_name = self.model.__class__.__name__
Expand All @@ -21,4 +33,30 @@ def __call__(self, data: Dataset, **kwargs) -> Result:
result = self.model.fit(panel, **kwargs)
if self.error_estimator:
self.model.error(result, self.error_estimator)
return result
self._az_result = result

res = Result(
effect=result.effect,
counterfactual=self.counterfactual,
synthetic=self.synthetic,
observed=self.observed,
)
return res

@property
def counterfactual(self) -> Float[NPArray, "N 1"]:
df = self._az_result.effect.by_time
c_factual = df.loc[:, "CF"].values.reshape(-1, 1)
return c_factual

@property
def synthetic(self) -> Float[NPArray, "N 1"]:
df = self._az_result.effect.by_time
synth_control = df.loc[:, "C"].values.reshape(-1, 1)
return synth_control

@property
def observed(self) -> Float[NPArray, "N 1"]:
df = self._az_result.effect.by_time
treated = df.loc[:, "T"].values.reshape(-1, 1)
return treated
3 changes: 2 additions & 1 deletion src/causal_validation/transforms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from causal_validation.transforms.noise import Noise
from causal_validation.transforms.periodic import Periodic
from causal_validation.transforms.trends import Trend

__all__ = ["Trend", "Periodic"]
__all__ = ["Trend", "Periodic", "Noise"]
8 changes: 6 additions & 2 deletions src/causal_validation/transforms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def apply_values(
ytr = ytr + pre_intervention_vals[:, :1]
Xte = Xte + post_intervention_vals[:, 1:]
yte = yte + post_intervention_vals[:, :1]
return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
return Dataset(
Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
)


@dataclass(kw_only=True)
Expand All @@ -91,4 +93,6 @@ def apply_values(
ytr = ytr * pre_intervention_vals
Xte = Xte * post_intervention_vals
yte = yte * post_intervention_vals
return Dataset(Xtr, Xte, ytr, yte, data._start_date, data.counterfactual)
return Dataset(
Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
)
30 changes: 30 additions & 0 deletions src/causal_validation/transforms/noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataclasses import dataclass
from typing import Tuple

from jaxtyping import Float
import numpy as np
from scipy.stats import norm

from causal_validation.data import Dataset
from causal_validation.transforms.base import AdditiveTransform
from causal_validation.transforms.parameter import TimeVaryingParameter


@dataclass(kw_only=True)
class Noise(AdditiveTransform):
"""
Transform the treatment by adding TimeVaryingParameter noise terms sampled from
a specified sampling distribution. By default, the sampling distribution is
Normal with 0 loc and 0.1 scale.
"""

noise_dist: TimeVaryingParameter = TimeVaryingParameter(sampling_dist=norm(0, 0.1))
_slots: Tuple[str] = ("noise_dist",)

def get_values(self, data: Dataset) -> Float[np.ndarray, "N D"]:
noise = np.zeros((data.n_timepoints, data.n_units + 1))
noise_treatment = self.noise_dist.get_value(
n_units=1, n_timepoints=data.n_timepoints
).reshape(-1)
noise[:, 0] = noise_treatment
return noise
2 changes: 2 additions & 0 deletions src/causal_validation/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import typing as tp

import numpy as np
from scipy.stats._distn_infrastructure import (
rv_continuous,
rv_discrete,
Expand All @@ -10,3 +11,4 @@
InterventionTypes = tp.Literal["pre-intervention", "post-intervention", "both"]
RandomVariable = tp.Union[rv_continuous, rv_discrete]
Number = tp.Union[float, int]
NPArray = np.ndarray
38 changes: 14 additions & 24 deletions src/causal_validation/validation/placebo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,26 @@
Column,
DataFrameSchema,
)
from rich import box
from rich.progress import (
Progress,
ProgressBar,
track,
)
from rich.table import Table
from scipy.stats import ttest_1samp
from tqdm import (
tqdm,
trange,
)

from causal_validation.data import (
Dataset,
DatasetContainer,
)
from causal_validation.models import AZCausalWrapper
from causal_validation.models import (
AZCausalWrapper,
Result,
)
from causal_validation.validation.testing import TestResultFrame

PlaceboSchema = DataFrameSchema(
{
Expand All @@ -39,13 +45,13 @@


@dataclass
class PlaceboTestResult:
effects: tp.Dict[tp.Tuple[str, str], tp.List[Effect]]
class PlaceboTestResult(TestResultFrame):
effects: tp.Dict[tp.Tuple[str, str], tp.List[Result]]

def _model_to_df(
self, model_name: str, dataset_name: str, effects: tp.List[Effect]
self, model_name: str, dataset_name: str, effects: tp.List[Result]
) -> pd.DataFrame:
_effects = [effect.value for effect in effects]
_effects = [e.effect.percentage().value for e in effects]
_n_effects = len(_effects)
expected_effect = np.mean(_effects)
stddev_effect = np.std(_effects)
Expand All @@ -71,21 +77,6 @@ def to_df(self) -> pd.DataFrame:
PlaceboSchema.validate(df)
return df

def summary(self, precision: int = 4) -> Table:
table = Table(show_header=True, box=box.MARKDOWN)
df = self.to_df()
numeric_cols = df.select_dtypes(include=[np.number])
df.loc[:, numeric_cols.columns] = np.round(numeric_cols, decimals=precision)

for column in df.columns:
table.add_column(str(column), style="magenta")

for _, value_list in enumerate(df.values.tolist()):
row = [str(x) for x in value_list]
table.add_row(*row)

return table


@dataclass
class PlaceboTest:
Expand All @@ -109,7 +100,7 @@ def execute(self, verbose: bool = True) -> PlaceboTestResult:
datasets = self.dataset_dict
n_datasets = len(datasets)
n_control = sum([d.n_units for d in datasets.values()])
with Progress() as progress:
with Progress(disable=not verbose) as progress:
model_task = progress.add_task(
"[red]Models", total=len(self.models), visible=verbose
)
Expand All @@ -130,7 +121,6 @@ def execute(self, verbose: bool = True) -> PlaceboTestResult:
progress.update(unit_task, advance=1)
placebo_data = dataset.to_placebo_data(i)
result = model(placebo_data)
result = result.effect.percentage()
model_result.append(result)
results[(model._model_name, data_name)] = model_result
return PlaceboTestResult(effects=results)
Loading
Loading