From 9e3857730fd92b0b044e12cd6d7c14335a2956ed Mon Sep 17 00:00:00 2001 From: Matt Gilson Date: Wed, 30 Aug 2023 11:22:59 -0400 Subject: [PATCH] Use generic types for config objects. This is made to be consistent with typeshed best practices: > avoid invariant collection types (list, dict) in argument positions, in favor of covariant types like Mapping or Sequence Signed-off-by: Matt Gilson --- pandera/api/pandas/container.py | 8 ++++---- pandera/api/pandas/model_config.py | 8 ++++---- pandera/api/pyspark/container.py | 8 ++++---- pandera/api/pyspark/model_config.py | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandera/api/pandas/container.py b/pandera/api/pandas/container.py index fd1fe2fe2..6f332928f 100644 --- a/pandera/api/pandas/container.py +++ b/pandera/api/pandas/container.py @@ -6,7 +6,7 @@ import os import warnings from pathlib import Path -from typing import Any, Dict, List, Optional, Union, cast, overload +from typing import Any, Dict, List, Optional, Sequence, Union, cast, overload import pandas as pd @@ -47,7 +47,7 @@ def __init__( strict: StrictType = False, name: Optional[str] = None, ordered: bool = False, - unique: Optional[Union[str, List[str]]] = None, + unique: Optional[Union[str, Sequence[str]]] = None, report_duplicates: UniqueSettings = "all", unique_column_names: bool = False, add_missing_columns: bool = False, @@ -188,12 +188,12 @@ def coerce(self, value: bool) -> None: self._coerce = value @property - def unique(self): + def unique(self) -> Optional[Sequence[str]]: """List of columns that should be jointly unique.""" return self._unique @unique.setter - def unique(self, value: Optional[Union[str, List[str]]]) -> None: + def unique(self, value: Optional[Union[str, Sequence[str]]]) -> None: """Set unique attribute.""" self._unique = [value] if isinstance(value, str) else value diff --git a/pandera/api/pandas/model_config.py b/pandera/api/pandas/model_config.py index d6eb64ca1..c43455f56 100644 --- a/pandera/api/pandas/model_config.py +++ b/pandera/api/pandas/model_config.py @@ -1,6 +1,6 @@ """Class-based dataframe model API configuration for pandas.""" -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Mapping, Optional, Sequence, Union from pandera.api.base.model_config import BaseModelConfig from pandera.api.pandas.types import PandasDtypeInputTypes, StrictType @@ -24,7 +24,7 @@ class BaseConfig(BaseModelConfig): # pylint:disable=R0903 drop_invalid_rows: bool = False #: drop invalid rows on validation #: make sure certain column combinations are unique - unique: Optional[Union[str, List[str]]] = None + unique: Optional[Union[str, Sequence[str]]] = None #: make sure all specified columns are in the validated dataframe - #: if ``"filter"``, removes columns not specified in the schema @@ -61,7 +61,7 @@ class BaseConfig(BaseModelConfig): # pylint:disable=R0903 #: converts the object of type ``from_format`` to a pandera-validate-able #: data structure. The reader function is implemented in the pandera.typing #: generic types via the ``from_format`` and ``to_format`` methods. - from_format_kwargs: Optional[Dict[str, Any]] = None + from_format_kwargs: Optional[Mapping[str, Any]] = None #: data format to serialize into after validation. This option only applies #: to schemas used in the context of the pandera type constructor @@ -76,7 +76,7 @@ class BaseConfig(BaseModelConfig): # pylint:disable=R0903 #: converts the pandera-validate-able object to type ``to_format``. #: The writer function is implemented in the pandera.typing #: generic types via the ``from_format`` and ``to_format`` methods. - to_format_kwargs: Optional[Dict[str, Any]] = None + to_format_kwargs: Optional[Mapping[str, Any]] = None #: a dictionary object to store key-value data at schema level metadata: Optional[dict] = None diff --git a/pandera/api/pyspark/container.py b/pandera/api/pyspark/container.py index 6a272f7f1..61037f1cd 100644 --- a/pandera/api/pyspark/container.py +++ b/pandera/api/pyspark/container.py @@ -6,7 +6,7 @@ import os import warnings from pathlib import Path -from typing import Any, Dict, List, Optional, Union, cast, overload +from typing import Any, Dict, List, Optional, Sequence, Union, cast, overload from pyspark.sql import DataFrame @@ -40,7 +40,7 @@ def __init__( strict: StrictType = False, name: Optional[str] = None, ordered: bool = False, - unique: Optional[Union[str, List[str]]] = None, + unique: Optional[Union[str, Sequence[str]]] = None, report_duplicates: UniqueSettings = "all", unique_column_names: bool = False, title: Optional[str] = None, @@ -169,12 +169,12 @@ def coerce(self, value: bool) -> None: self._coerce = value @property - def unique(self): + def unique(self) -> Optional[Sequence[str]]: """List of columns that should be jointly unique.""" return self._unique @unique.setter - def unique(self, value: Optional[Union[str, List[str]]]) -> None: + def unique(self, value: Optional[Union[str, Sequence[str]]]) -> None: """Set unique attribute.""" self._unique = [value] if isinstance(value, str) else value diff --git a/pandera/api/pyspark/model_config.py b/pandera/api/pyspark/model_config.py index 8f566d4ff..9f6c2a752 100644 --- a/pandera/api/pyspark/model_config.py +++ b/pandera/api/pyspark/model_config.py @@ -1,6 +1,6 @@ """Class-based dataframe model API configuration for pyspark.""" -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Mapping, Optional, Sequence, Union from pandera.api.base.model_config import BaseModelConfig from pandera.api.pyspark.types import PySparkDtypeInputTypes, StrictType @@ -23,7 +23,7 @@ class BaseConfig(BaseModelConfig): # pylint:disable=R0903 coerce: bool = False #: coerce types of all schema components #: make sure certain column combinations are unique - unique: Optional[Union[str, List[str]]] = None + unique: Optional[Union[str, Sequence[str]]] = None #: make sure all specified columns are in the validated dataframe - #: if ``"filter"``, removes columns not specified in the schema @@ -44,7 +44,7 @@ class BaseConfig(BaseModelConfig): # pylint:disable=R0903 #: converts the object of type ``from_format`` to a pandera-validate-able #: data structure. The reader function is implemented in the pandera.typing #: generic types via the ``from_format`` and ``to_format`` methods. - from_format_kwargs: Optional[Dict[str, Any]] = None + from_format_kwargs: Optional[Mapping[str, Any]] = None #: data format to serialize into after validation. This option only applies #: to schemas used in the context of the pandera type constructor @@ -59,7 +59,7 @@ class BaseConfig(BaseModelConfig): # pylint:disable=R0903 #: converts the pandera-validate-able object to type ``to_format``. #: The writer function is implemented in the pandera.typing #: generic types via the ``from_format`` and ``to_format`` methods. - to_format_kwargs: Optional[Dict[str, Any]] = None + to_format_kwargs: Optional[Mapping[str, Any]] = None #: a dictionary object to store key-value data at schema level metadata: Optional[dict] = None