Skip to content

Commit

Permalink
Import working; syntax issues fixed; real unit tests broken.
Browse files Browse the repository at this point in the history
  • Loading branch information
zaneselvans committed Nov 22, 2023
1 parent 4ad49b5 commit 46a4379
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 92 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
docs/data_dictionaries/pudl_db.rst
.ipynb_checkpoints/
.cache/
.ruff_cache/
.mypy_cache/
.pytest_cache/*
.DS_Store
build/
Expand Down
28 changes: 14 additions & 14 deletions src/pudl/metadata/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pydantic
import sqlalchemy as sa
from pandas._libs.missing import NAType
from pydantic import ConfigDict, StringConstraints
from pydantic import ConfigDict, StringConstraints, ValidationInfo
from pydantic.types import DirectoryPath

import pudl.logging_helpers
Expand Down Expand Up @@ -236,7 +236,7 @@ def StrictList(item_type: type = Any) -> type: # noqa: N802
Allows :class:`list`, :class:`tuple`, :class:`set`, :class:`frozenset`,
:class:`collections.deque`, or generators and casts to a :class:`list`.
"""
return Annotated[list[item_type], pydantic.Field(min_items=1)]
return Annotated[list[item_type], pydantic.Field(min_length=1)]


# ---- Class attribute validators ---- #
Expand Down Expand Up @@ -294,7 +294,7 @@ class FieldConstraints(Base):

@pydantic.field_validator("max_length")
@classmethod
def _check_max_length(cls, value, info): # noqa: N805
def _check_max_length(cls, value, info: ValidationInfo):
minimum, maximum = info.data.get("min_length"), value
if minimum is not None and maximum is not None:
if type(minimum) is not type(maximum):
Expand All @@ -305,7 +305,7 @@ def _check_max_length(cls, value, info): # noqa: N805

@pydantic.field_validator("maximum")
@classmethod
def _check_max(cls, value, info): # noqa: N805
def _check_max(cls, value, info: ValidationInfo):
minimum, maximum = info.data.get("minimum"), value
if minimum is not None and maximum is not None:
if type(minimum) is not type(maximum):
Expand Down Expand Up @@ -409,7 +409,7 @@ def _df_is_encoding_table(cls, df): # noqa: N805

@pydantic.field_validator("ignored_codes")
@classmethod
def _good_and_ignored_codes_are_disjoint(cls, ignored_codes, info): # noqa: N805
def _good_and_ignored_codes_are_disjoint(cls, ignored_codes, info: ValidationInfo):
"""Check that there's no overlap between good and ignored codes."""
if "df" not in info.data:
return ignored_codes
Expand All @@ -423,7 +423,7 @@ def _good_and_ignored_codes_are_disjoint(cls, ignored_codes, info): # noqa: N80

@pydantic.field_validator("code_fixes")
@classmethod
def _good_and_fixable_codes_are_disjoint(cls, code_fixes, info): # noqa: N805
def _good_and_fixable_codes_are_disjoint(cls, code_fixes, info: ValidationInfo):
"""Check that there's no overlap between the good and fixable codes."""
if "df" not in info.data:
return code_fixes
Expand All @@ -437,7 +437,7 @@ def _good_and_fixable_codes_are_disjoint(cls, code_fixes, info): # noqa: N805

@pydantic.field_validator("code_fixes")
@classmethod
def _fixable_and_ignored_codes_are_disjoint(cls, code_fixes, info): # noqa: N805
def _fixable_and_ignored_codes_are_disjoint(cls, code_fixes, info: ValidationInfo):
"""Check that there's no overlap between the ignored and fixable codes."""
if "ignored_codes" not in info.data:
return code_fixes
Expand All @@ -451,7 +451,7 @@ def _fixable_and_ignored_codes_are_disjoint(cls, code_fixes, info): # noqa: N80

@pydantic.field_validator("code_fixes")
@classmethod
def _check_fixed_codes_are_good_codes(cls, code_fixes, info): # noqa: N805
def _check_fixed_codes_are_good_codes(cls, code_fixes, info: ValidationInfo):
"""Check that every every fixed code is also one of the good codes."""
if "df" not in info.data:
return code_fixes
Expand Down Expand Up @@ -562,7 +562,7 @@ class Field(Base):

@pydantic.field_validator("constraints")
@classmethod
def _check_constraints(cls, value, info): # noqa: N805, C901
def _check_constraints(cls, value, info: ValidationInfo): # noqa: C901
if "type" not in info.data:
return value
dtype = info.data["type"]
Expand All @@ -587,7 +587,7 @@ def _check_constraints(cls, value, info): # noqa: N805, C901

@pydantic.field_validator("encoder")
@classmethod
def _check_encoder(cls, value, info): # noqa: N805
def _check_encoder(cls, value, info: ValidationInfo):
if "type" not in info.data or value is None:
return value
errors = []
Expand Down Expand Up @@ -738,7 +738,7 @@ class ForeignKey(Base):

@pydantic.field_validator("reference")
@classmethod
def _check_fields_equal_length(cls, value, info): # noqa: N805
def _check_fields_equal_length(cls, value, info: ValidationInfo):
if "fields_" in info.data and len(value.fields) != len(info.data["fields_"]):
raise ValueError("fields and reference.fields are not equal length")
return value
Expand Down Expand Up @@ -778,7 +778,7 @@ def _check_field_names_unique(cls, value): # noqa: N805

@pydantic.field_validator("primary_key")
@classmethod
def _check_primary_key_in_fields(cls, value, info): # noqa: N805
def _check_primary_key_in_fields(cls, value, info: ValidationInfo):
if value is not None and "fields_" in info.data:
missing = []
names = [f.name for f in info.data["fields_"]]
Expand All @@ -795,7 +795,7 @@ def _check_primary_key_in_fields(cls, value, info): # noqa: N805

# TODO[pydantic] Refactor...
# @pydantic.validator("foreign_keys", each_item=True)
# def _check_foreign_key_in_fields(cls, value, info): # noqa: N805
# def _check_foreign_key_in_fields(cls, value, info: ValidationInfo):
# if value and "fields_" in info.data:
# names = [f.name for f in info.data["fields_"]]
# missing = [x for x in value.fields if x not in names]
Expand Down Expand Up @@ -1192,7 +1192,7 @@ class Resource(Base):

@pydantic.field_validator("schema_")
@classmethod
def _check_harvest_primary_key(cls, value, info): # noqa: N805
def _check_harvest_primary_key(cls, value, info: ValidationInfo):
if info.data["harvest"].harvest and not value.primary_key:
raise ValueError("Harvesting requires a primary key")
return value
Expand Down
87 changes: 40 additions & 47 deletions src/pudl/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,19 @@
import itertools
import json
from enum import Enum, unique
from typing import Any, ClassVar, Self
from typing import Any, ClassVar

import fsspec
import pandas as pd
import yaml
from dagster import DagsterInvalidDefinitionError
from dagster import Field as DagsterField
from pydantic import BaseModel as PydanticBaseModel
from pydantic import (
AnyHttpUrl,
AnyUrl,
ConfigDict,
field_validator,
model_validator,
root_validator,
)
from pydantic import BaseModel as PydanticBaseModel
from pydantic_settings import BaseSettings

import pudl
Expand Down Expand Up @@ -259,9 +256,9 @@ class EiaSettings(BaseModel):
eia923: Immutable pydantic model to validate eia923 settings.
"""

eia860: Eia860Settings = None
eia861: Eia861Settings = None
eia923: Eia923Settings = None
eia860: Eia860Settings | None = None
eia861: Eia861Settings | None = None
eia923: Eia923Settings | None = None

@model_validator(mode="before")
@classmethod
Expand All @@ -281,8 +278,9 @@ def default_load_all(cls, values): # noqa: N805

return values

@model_validator(mode="after")
def check_eia_dependencies(self: Self): # noqa: N805
@model_validator(mode="before")
@classmethod
def check_eia_dependencies(cls, values): # noqa: N805
"""Make sure the dependencies between the eia datasets are satisfied.
Dependencies:
Expand All @@ -294,15 +292,17 @@ def check_eia_dependencies(self: Self): # noqa: N805
Returns:
values (Dict[str, BaseModel]): dataset settings.
"""
if not self.eia923 and self.eia860:
self.eia923 = Eia923Settings(years=self.eia860.years)

if self.eia923 and not self.eia860:
available_years = Eia860Settings()
self.eia860 = Eia860Settings(
years=[year for year in self.eia923.years if year in available_years]
if not values.get("eia923") and values.get("eia860"):
values["eia923"] = Eia923Settings(years=values["eia860"].years)

if values.get("eia923") and not values.get("eia860"):
available_years = Eia860Settings().years
values["eia860"] = Eia860Settings(
years=[
year for year in values["eia923"].years if year in available_years
]
)
return self
return values


class DatasetsSettings(BaseModel):
Expand Down Expand Up @@ -466,7 +466,7 @@ class FercGenericXbrlToSqliteSettings(BaseSettings):
disabled: if True, skip processing this dataset.
"""

taxonomy: AnyHttpUrl
taxonomy: str
years: list[int]
disabled: bool = False

Expand All @@ -483,7 +483,7 @@ class Ferc1XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):
years: list[int] = [
year for year in data_source.working_partitions["years"] if year >= 2021
]
taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form1/2022-01-01/form/form1/form-1_2022-01-01.xsd"
taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form1/2022-01-01/form/form1/form-1_2022-01-01.xsd"


class Ferc2XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):
Expand All @@ -497,7 +497,7 @@ class Ferc2XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):
years: list[int] = [
year for year in data_source.working_partitions["years"] if year >= 2021
]
taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form2/2022-01-01/form/form2/form-2_2022-01-01.xsd"
taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form2/2022-01-01/form/form2/form-2_2022-01-01.xsd"


class Ferc2DbfToSqliteSettings(GenericDatasetSettings):
Expand Down Expand Up @@ -544,7 +544,7 @@ class Ferc6XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):
years: list[int] = [
year for year in data_source.working_partitions["years"] if year >= 2021
]
taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form6/2022-01-01/form/form6/form-6_2022-01-01.xsd"
taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form6/2022-01-01/form/form6/form-6_2022-01-01.xsd"


class Ferc60DbfToSqliteSettings(GenericDatasetSettings):
Expand Down Expand Up @@ -575,7 +575,7 @@ class Ferc60XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):
years: list[int] = [
year for year in data_source.working_partitions["years"] if year >= 2021
]
taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form60/2022-01-01/form/form60/form-60_2022-01-01.xsd"
taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form60/2022-01-01/form/form60/form-60_2022-01-01.xsd"


class Ferc714XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):
Expand All @@ -587,7 +587,7 @@ class Ferc714XbrlToSqliteSettings(FercGenericXbrlToSqliteSettings):

data_source: ClassVar[DataSource] = DataSource.from_id("ferc714")
years: list[int] = [2021, 2022]
taxonomy: AnyHttpUrl = "https://eCollection.ferc.gov/taxonomy/form714/2022-01-01/form/form714/form-714_2022-01-01.xsd"
taxonomy: str = "https://eCollection.ferc.gov/taxonomy/form714/2022-01-01/form/form714/form-714_2022-01-01.xsd"


class FercToSqliteSettings(BaseSettings):
Expand All @@ -600,15 +600,15 @@ class FercToSqliteSettings(BaseSettings):
other_xbrl_forms: List of non-FERC1 forms to convert from XBRL to SQLite.
"""

ferc1_dbf_to_sqlite_settings: Ferc1DbfToSqliteSettings = None
ferc1_xbrl_to_sqlite_settings: Ferc1XbrlToSqliteSettings = None
ferc2_dbf_to_sqlite_settings: Ferc2DbfToSqliteSettings = None
ferc2_xbrl_to_sqlite_settings: Ferc2XbrlToSqliteSettings = None
ferc6_dbf_to_sqlite_settings: Ferc6DbfToSqliteSettings = None
ferc6_xbrl_to_sqlite_settings: Ferc6XbrlToSqliteSettings = None
ferc60_dbf_to_sqlite_settings: Ferc60DbfToSqliteSettings = None
ferc60_xbrl_to_sqlite_settings: Ferc60XbrlToSqliteSettings = None
ferc714_xbrl_to_sqlite_settings: Ferc714XbrlToSqliteSettings = None
ferc1_dbf_to_sqlite_settings: Ferc1DbfToSqliteSettings | None = None
ferc1_xbrl_to_sqlite_settings: Ferc1XbrlToSqliteSettings | None = None
ferc2_dbf_to_sqlite_settings: Ferc2DbfToSqliteSettings | None = None
ferc2_xbrl_to_sqlite_settings: Ferc2XbrlToSqliteSettings | None = None
ferc6_dbf_to_sqlite_settings: Ferc6DbfToSqliteSettings | None = None
ferc6_xbrl_to_sqlite_settings: Ferc6XbrlToSqliteSettings | None = None
ferc60_dbf_to_sqlite_settings: Ferc60DbfToSqliteSettings | None = None
ferc60_xbrl_to_sqlite_settings: Ferc60XbrlToSqliteSettings | None = None
ferc714_xbrl_to_sqlite_settings: Ferc714XbrlToSqliteSettings | None = None

@model_validator(mode="before")
@classmethod
Expand Down Expand Up @@ -661,13 +661,13 @@ def get_xbrl_dataset_settings(
class EtlSettings(BaseSettings):
"""Main settings validation class."""

ferc_to_sqlite_settings: FercToSqliteSettings = None
datasets: DatasetsSettings = None
ferc_to_sqlite_settings: FercToSqliteSettings | None = None
datasets: DatasetsSettings | None = None

name: str = None
title: str = None
description: str = None
version: str = None
name: str | None = None
title: str | None = None
description: str | None = None
version: str | None = None

# This is list of fsspec compatible paths to publish the output datasets to.
publish_destinations: list[str] = []
Expand Down Expand Up @@ -702,14 +702,7 @@ def _convert_settings_to_dagster_config(d: dict) -> None:
if isinstance(v, dict):
_convert_settings_to_dagster_config(v)
else:
try:
d[k] = DagsterField(type(v), default_value=v)
except DagsterInvalidDefinitionError:
dagster_type = str if isinstance(v, AnyUrl) else Any
# Dagster config accepts a valid dagster types.
# Most of our settings object properties are valid types
# except for fields like taxonomy which are the AnyHttpUrl type.
d[k] = DagsterField(dagster_type, default_value=str(v))
d[k] = DagsterField(type(v), default_value=v)


def create_dagster_config(settings: BaseModel) -> dict:
Expand Down
Loading

0 comments on commit 46a4379

Please sign in to comment.