diff --git a/docs/catalyst_cites.bib b/docs/catalyst_cites.bib index 22b0b14c9..992111546 100644 --- a/docs/catalyst_cites.bib +++ b/docs/catalyst_cites.bib @@ -1,219 +1,75 @@ -@techreport{CarbonStranding2021, - author = {Tyler Fitch}, - editor = {Tyler H. Norris}, - title = {Carbon Stranding: Climate Risk and Stranded Assets in Duke's Integrated Resource Plan}, - institution = {Energy Transition Institute}, - year = {2021}, - month = {January}, - url = {https://votesolar.org/wp-content/uploads/2021/02/ETI_CarbonStrandingReport_2021.pdf}, - urldate = {2021-10-14} +@article{doi:10.1021/acs.est.4c03719, +author = {Jordan, Katherine H. and Dennin, Luke R. and Adams, Peter J. and Jaramillo, Paulina and Muller, Nicholas Z.}, +title = {Climate Policy Reduces Racial Disparities in Air Pollution from Transportation and Power Generation}, +journal = {Environmental Science \& Technology}, +volume = {58}, +number = {49}, +pages = {21510-21522}, +year = {2024}, +doi = {10.1021/acs.est.4c03719}, + note ={PMID: 39593208}, +URL = {https://doi.org/10.1021/acs.est.4c03719}, +eprint = {https://doi.org/10.1021/acs.est.4c03719} } -@techreport{CoalCostCrossover2019, - author = {Eric Gimon and Mike O'{B}oyle and Christopher T M Clack and Sarah A McKee}, - title = {The Coal Cost Crossover: Economic Viability of Existing Coal Compared to New Local Wind and Solar Resources}, - institution = {Energy Innovation and Vibrant Clean Energy}, - year = {2019}, - url = {https://energyinnovation.org/publication/the-coal-cost-crossover/}, - urldate = {2021-10-14} +@misc{Tehranchi_2024, + title={Pypsa-Usa: A Flexible Open-Source Energy System Model and Optimization Tool for the United States}, + author={Kamran Tehranchi and Trevor Barnes and Martha Frysztacki and Ines Azevedo}, + year={2024}, + month={nov}, + doi={10.2139/ssrn.5029120}, + publisher={SSRN}, + url={https://dx.doi.org/10.2139/ssrn.5029120}, } -@techreport{CoalCostCrossover2021, - author = {Eric Gimon and Amanda Meyers and Mike O'{B}oyle}, - title = {Coal Cost Crossover 2.0}, - institution = {Energy Innovation}, - year = {2021}, - url = {https://energyinnovation.org/publication/the-coal-cost-crossover-2021/}, - urldate = {2021-10-14} +@techreport{McKinley_2024, + title={Short Circuited: Costly Transitions under The Clean Air Act}, + author={Andrew McKinley}, + year={2024}, + month={dec}, + doi={10.2139/ssrn.4972091}, + publisher={SSRN}, + institution={Booth School of Business, University of Chicago}, + url={https://dx.doi.org/10.2139/ssrn.4972091}, } -@article{OpenEnergyOutlook, - author = {Joseph F. DeCarolis and - Paulina Jaramillo and - Jeremiah X. Johnson and - David L. McCollum and - Evelina Trutnevyte and - David C. Daniels and - Gökçe Akın-Olçum and - Joule Bergerson and - Soolyeon Cho and - Joon-Ho Choi and - Michael T. Craig and - Anderson R. de Queiroz and - Hadi Eshraghi and - Christopher S. Galik and - Timothy G. Gutowski and - Karl R. Haapala and - Bri-Mathias Hodge and - Simi Hoque and - Jesse D. Jenkins and - Alan Jenn and - Daniel J.A. Johansson and - Noah Kaufman and - Juha Kiviluoma and - Zhenhong Lin and - Heather L. MacLean and - Eric Masanet and - Mohammad S. Masnadi and - Colin A. McMillan and - Destenie S. Nock and - Neha Patankar and - Dalia Patino-Echeverri and - Greg Schivley and - Sauleh Siddiqui and - Amanda D. Smith and - Aranya Venkatesh and - Gernot Wagner and - Sonia Yeh and - Yuyu Zhou}, - title = {Leveraging Open-Source Tools for Collaborative Macro-energy System Modeling Efforts}, - journal = {Joule}, - volume = {4}, - issue = {12}, - pages = {2523-2526}, - year = {2020}, - month = {12}, - url = {https://doi.org/10.1016/j.joule.2020.11.002}, - doi = {10.1016/j.joule.2020.11.002} +@techreport{Singh2024-wl, + title={Trends and drivers of utility costs in California}, + author={Madalsa Singh and Allison Ong and Rayan Sud}, + year={2024}, + month={oct}, + doi={10.2139/ssrn.4987198}, + publisher={SSRN}, + institution={Stanford University}, + url={https://dx.doi.org/10.2139/ssrn.4987198}, } -@misc{PowerGenome, - author = {Greg Schivley and - Ethan Welty and - Neha Patankar}, - title = {PowerGenome/PowerGenome: v0.4.2}, - month = {aug}, - year = 2021, - publisher = {Zenodo}, - howpublished = {Git{H}ub repository archived on {Z}enodo}, - version = {v0.4.2}, - doi = {10.5281/zenodo.5347792}, - url = {https://doi.org/10.5281/zenodo.5347792} +@article{Jacobson_2024, + doi = {10.1088/2753-3751/ad6d6f}, + url = {https://dx.doi.org/10.1088/2753-3751/ad6d6f}, + year = {2024}, + month = {sep}, + publisher = {IOP Publishing}, + volume = {1}, + number = {3}, + pages = {035009}, + author = {Jacobson, Anna F and Mauzerall, Denise L and Jenkins, Jesse D}, + title = {Quantifying the impact of energy system model resolution on siting, cost, reliability, and emissions for electricity generation}, + journal = {Environmental Research: Energy}, + abstract = {Runtime and memory requirements for typical formulations of energy system models increase non-linearly with resolution, computationally constraining large-scale models despite state-of-the-art solvers and hardware. This scaling paradigm requires omission of detail which can affect key outputs to an unknown degree. Recent algorithmic innovations employing decomposition have enabled linear increases in runtime and memory use as temporal resolution increases. Newly tractable, higher resolution systems can be compared with lower resolution configurations commonly employed today in academic research and industry practice, providing a better understanding of the potential biases or inaccuracies introduced by these abstractions. We employ a state-of-the art electricity system planning model and new high-resolution systems to quantify the impact of varying degrees of spatial, temporal, and operational resolution on results salient to policymakers and planners. We find models with high spatial and temporal resolution result in more realistic siting decisions and improved emissions, reliability, and price outcomes. Errors are generally larger in systems with low spatial resolution, which omit key transmission constraints. We demonstrate that high temporal resolution cannot overcome biases introduced by low spatial resolution, and vice versa. While we see asymptotic improvements to total system cost and reliability with increased resolution, other salient outcomes such as siting accuracy and emissions exhibit continued improvement across the range of model resolutions considered. We conclude that modelers should carefully balance resolution on spatial, temporal, and operational dimensions and that novel computational methods enabling higher resolution modeling are valuable and can further improve the decision support provided by this class of models.} } -@misc{OSClimate, - author = {Open Source Climate}, - title = {Open Source Climate Initiative}, - howpublished = {Website}, - url = {https://www.os-climate.org/}, - urldate = {2021-10-15} -} +@INPROCEEDINGS{10780659, + author={Hanna, Bavly and Xu, Guandong and Wang, Xianzhi and Hossain, Jahangir}, + booktitle={2024 11th International Conference on Behavioural and Social Computing (BESC)}, + title={Leveraging Artificial Intelligence for Affordable and Clean Energy: Advancing UN Sustainable Development Goal 7}, + year={2024}, + volume={}, + number={}, + pages={1-9}, + keywords={Green energy;Technological innovation;Sociotechnical systems;Social computing;Reviews;Collaboration;Stability analysis;Stakeholders;Artificial intelligence;Sustainable development;Artificial Intelligence;Sustainable Development Goals;Energy Efficiency;Clean Energy;Renewable Energy}, + doi={10.1109/BESC64747.2024.10780659}} -@techreport{PutGasOnStandby, - author = {Sims, Jonathan and - Hillenbrand von der Neyen, Catharina and - D‘souza, Durand and - Chau, Lily and - González-Jiménez, Nicolás and - Sani, Lorenzo}, - abstract = {Unabated gas plants’ future role in the power system should be predominantly limited - to backup reserve to allow for flexible low carbon forms of supply to fully emerge. - Events in 2021 have brought the extreme levels of price and supply volatility - present in the global gas market to the fore of discussions over future energy - system dynamics. Wholesale gas prices have risen to record highs across key supply - hubs, highlighting the levels of market risk that gas-fired power stations are - exposed to, and demonstrating the urgent requirement for increased investment - in alternative forms of flexible supply which are steadily emerging. Even before - this year’s crisis however, gas-fired power stations across Europe and the US were - already confronting declining operating profitability and rapidly growing - competition from low carbon sources. A steady shift towards the primary use of - such technologies is vital if net zero emissions goals are to be achieved. While - small amounts of unabated gas-fired capacity may well be required to remain - available in these regions and to sit predominantly idle as back up peaking capacity - to ensure long- term system supply security, we believe that this should be the - limit to such units’ future role. This report aims to demonstrate that stakeholders - committing to the long-term funding of such assets are already risking the loss of - billions of dollars, while the risks of continued investment will only grow - further.}, - institution = {Carbon Tracker Initiative}, - title = {Put Gas On Standby}, - year = {2021}, - url = {https://carbontracker.org/reports/put-gas-on-standby/}, - urldate = {2021-10-30} -} - -@techreport{EconFlexSolar, - author = {Steven Dahlke and - Mahesh Morjaria and - Vahan Gevorgian and - Barry Mather}, - institution = {First Solar}, - title = {The Economics of Flexible Solar for Electricity Markets in Transition}, - year = {2020}, - url = {https://www.firstsolar.com/es-CSA/-/media/First-Solar/Documents/Grid-Evolution/The_Economics_of_Flexible_Solar_for_Electricity_Markets_in_Transition.ashx}, - urldate = {2021-11-01} -} - -@techreport{EIFossilToClean, - author = {Ron Lehr}, - title = {Utility Transition Financial Impacts: From Fossil to Clean}, - institution = {Energy Innovation}, - year = {2018}, - url = {https://energyinnovation.org/wp-content/uploads/2018/12/From-Fossil-to-Clean-Brief_12.3.18.pdf}, - urldate = {2021-10-15} -} - -@article{TransmissionSyndicate, - author = {Ari Peskoe}, - journal = {Energy Law Journal}, - number = {1}, - title = {Is the Utility Transmission Syndicate Forever?}, - volume = {42}, - year = {2021}, - url = {https://www.eba-net.org/assets/1/6/5_-_%5BPeskoe%5D%5B1-66%5D.pdf}, - urldate = {2021-11-01}, - doi = {10.2139/ssrn.3770740} -} - -@article{BISTLINE2020114941, - title = {Parameterizing open-source energy models: Statistical learning to estimate unknown power plant attributes}, - journal = {Applied Energy}, - volume = {269}, - pages = {114941}, - year = {2020}, - issn = {0306-2619}, - doi = {https://doi.org/10.1016/j.apenergy.2020.114941}, - url = {https://www.sciencedirect.com/science/article/pii/S0306261920304530}, - urldate = {2021-11-01}, - author = {Bistline, John E.T. and Merrick, James H.}, - abstract = {Energy systems models are used to perform energy and environmental policy - analysis, inform company strategy, and understand implications of technological - change. Although open-source models can promote transparency and reproducibility, data - availability and cost can be prohibitive barriers for researchers and other - stakeholders. This paper presents a novel application of a statistical approach to - predict unknown power plant parameters in Canada using available data from the United - States, which can be applied in other settings where critical model inputs are - missing. We apply two statistical learning methods, linear regression and - k-nearest-neighbors, and compare their performance on unseen portions of the United - States data before applying the learned functions to unknown Canadian data. Results - indicate that reasonable predictions of heatrates and, to a lesser extent, operation - and maintenance costs are possible even with limited data about age, capacity, and - power plant types. The nearest-neighbor approach generally outperforms linear - regressions for the datasets and applications to power plant parameters investigated - here.} -} - -@article{doi:10.1146/annurev-environ-020220-061831, -author = {Donti, Priya L. and Kolter, J. Zico}, -title = {Machine Learning for Sustainable Energy Systems}, -journal = {Annual Review of Environment and Resources}, -volume = {46}, -number = {1}, -pages = {719-747}, -year = {2021}, -doi = {10.1146/annurev-environ-020220-061831}, -URL = {https://doi.org/10.1146/annurev-environ-020220-061831}, -eprint = {https://doi.org/10.1146/annurev-environ-020220-061831}, -abstract = {In recent years, machine learning has proven to be a powerful tool for - deriving insights from data. In this review, we describe ways in which machine - learning has been leveraged to facilitate the development and operation of - sustainable energy systems. We first provide a taxonomy of machine learning - paradigms and techniques, along with a discussion of their strengths and - limitations. We then provide an overview of existing research using machine - learning for sustainable energy production, delivery, and storage. Finally, - we identify gaps in this literature, propose future research directions, and - discuss important considerations for deployment.} -} @misc{zheng2024optimal, title={Optimal transmission expansion minimally reduces decarbonization costs of {U}.{S}. electricity}, @@ -240,6 +96,37 @@ @misc{zheng2024optimal to build a clean power system at a reasonable cost.} } +@misc{DVN/7QRME4_2024, + author = {Dotson, Samuel and Shaver, Lee and Gignac, James}, + publisher = {Harvard Dataverse}, + title = {{Storing the Future: A modeling analysis of Illinois storage needs}}, + year = {2024}, + version = {V1}, + doi = {10.7910/DVN/7QRME4}, + url = {https://doi.org/10.7910/DVN/7QRME4} +} + +@misc{PowerGenome, + author = {Greg Schivley and + Ethan Welty and + Neha Patankar and + Anna Jacobson and + Qingyu Xu and + Aneesha Manocha and + Braden Pecora and + Riti Bhandarkar and + Jesse D. Jenkins and + Matthias Fripp}, + title = {PowerGenome/PowerGenome: v0.6.3}, + month = {may}, + year = {2024}, + publisher = {Zenodo}, + howpublished = {Git{H}ub repository archived on {Z}enodo}, + version = {v0.6.3}, + doi = {10.5281/zenodo.4426096}, + url = {https://doi.org/10.5281/zenodo.4426096} +} + @article{10.1257/jep.37.4.155, Author = {Davis, Lucas W. and Hausman, Catherine and Rose, Nancy L.}, Title = {Transmission Impossible? Prospects for Decarbonizing the {U}.{S}. Grid}, @@ -264,6 +151,15 @@ @article{10.1257/jep.37.4.155 transmission capacity.} } +@techreport{FlatPowerDemandOver2023, + title={The era of flat power demand is over}, + author={Wilson, John D. and Zimmerman, Zach}, + institution ={Grid Strategies LLC}, + year={2023}, + month={December}, + url={https://gridstrategiesllc.com/wp-content/uploads/2023/12/National-Load-Growth-Report-2023.pdf}, +} + @article{Miller_2023, doi = {10.1088/1748-9326/acc119}, url = {https://dx.doi.org/10.1088/1748-9326/acc119}, @@ -302,6 +198,113 @@ @article{Miller_2023 research on grid emissions.} } +@techreport{NBERw30297, + title = {Policy Uncertainty in the Market for Coal Electricity: The Case of Air Toxics Standards}, + author = {Gowrisankaran, Gautam and Langer, Ashley and Zhang, Wendan}, + institution = {National Bureau of Economic Research}, + type = {Working Paper}, + series = {Working Paper Series}, + number = {30297}, + year = {2022}, + month = {July}, + doi = {10.3386/w30297}, + URL = {http://www.nber.org/papers/w30297}, + abstract = {Government policy uncertainty affects irreversible decisions including technology adoption and exit. This paper quantifies uncertainty surrounding the Mercury and Air Toxics Standard (MATS). We estimate a dynamic oligopoly model for coal-fired electricity generators that recovers generators' beliefs regarding future MATS enforcement. We develop the Approximate Belief Oligopoly Equilibrium concept where players understand that their decisions impact aggregate market states. MATS enforcement created substantial uncertainty: the perceived enforcement probability dropped to 43%. Resolving uncertainty early would increase profits by $1.39 billion but also pollution costs by $0.652–1.776 billion. Had exit been unlikely, resolving uncertainty early would have decreased pollution.}, +} + +@article{ZHANG2022112215, + title = {A review of publicly available data sources for models to study renewables integration in China's power system}, + journal = {Renewable and Sustainable Energy Reviews}, + volume = {159}, + pages = {112215}, + year = {2022}, + issn = {1364-0321}, + doi = {https://doi.org/10.1016/j.rser.2022.112215}, + url = {https://www.sciencedirect.com/science/article/pii/S1364032122001381}, + author = {Xiaodong Zhang and Dalia Patino-Echeverri and Mingquan Li and Libo Wu}, + keywords = {Power system operation, Renewable energy, Model, Simulation, Data quality, China}, + abstract = {The ongoing transformation of the world's energy system requires detailed power-system models that help plan a cost-effective and reliable integration of variable renewables and demand-side resources. The quality and depth of the results of these models depend on the existence of trustworthy, complete, and high-resolution data on extant electric power assets and the demand they serve, wind and solar resources, and projections on costs and performance of technologies that could be developed during the next three decades. This paper assesses the quality of China's power system's publicly available data compared to the U.S. It concludes that despite growing use of power system models to inform and analyze Chinese energy policy, the availability of necessary data is still a significant barrier that severely limits the transparency, replicability, relevance, and usefulness of their results.} +} + +@techreport{CarbonStranding2021, + author = {Tyler Fitch}, + editor = {Tyler H. Norris}, + title = {Carbon Stranding: Climate Risk and Stranded Assets in Duke's Integrated Resource Plan}, + institution = {Energy Transition Institute}, + year = {2021}, + month = {January}, + url = {https://votesolar.org/wp-content/uploads/2021/02/ETI_CarbonStrandingReport_2021.pdf}, + urldate = {2021-10-14} +} + + +@techreport{PutGasOnStandby, + author = {Sims, Jonathan and + Hillenbrand von der Neyen, Catharina and + D‘souza, Durand and + Chau, Lily and + González-Jiménez, Nicolás and + Sani, Lorenzo}, + abstract = {Unabated gas plants’ future role in the power system should be predominantly limited + to backup reserve to allow for flexible low carbon forms of supply to fully emerge. + Events in 2021 have brought the extreme levels of price and supply volatility + present in the global gas market to the fore of discussions over future energy + system dynamics. Wholesale gas prices have risen to record highs across key supply + hubs, highlighting the levels of market risk that gas-fired power stations are + exposed to, and demonstrating the urgent requirement for increased investment + in alternative forms of flexible supply which are steadily emerging. Even before + this year’s crisis however, gas-fired power stations across Europe and the US were + already confronting declining operating profitability and rapidly growing + competition from low carbon sources. A steady shift towards the primary use of + such technologies is vital if net zero emissions goals are to be achieved. While + small amounts of unabated gas-fired capacity may well be required to remain + available in these regions and to sit predominantly idle as back up peaking capacity + to ensure long- term system supply security, we believe that this should be the + limit to such units’ future role. This report aims to demonstrate that stakeholders + committing to the long-term funding of such assets are already risking the loss of + billions of dollars, while the risks of continued investment will only grow + further.}, + institution = {Carbon Tracker Initiative}, + title = {Put Gas On Standby}, + year = {2021}, + url = {https://carbontracker.org/reports/put-gas-on-standby/}, + urldate = {2021-10-30} +} + +@article{TransmissionSyndicate, + author = {Ari Peskoe}, + journal = {Energy Law Journal}, + number = {1}, + title = {Is the Utility Transmission Syndicate Forever?}, + volume = {42}, + year = {2021}, + url = {https://www.eba-net.org/assets/1/6/5_-_%5BPeskoe%5D%5B1-66%5D.pdf}, + urldate = {2021-11-01}, + doi = {10.2139/ssrn.3770740} +} + +@article{doi:10.1146/annurev-environ-020220-061831, +author = {Donti, Priya L. and Kolter, J. Zico}, +title = {Machine Learning for Sustainable Energy Systems}, +journal = {Annual Review of Environment and Resources}, +volume = {46}, +number = {1}, +pages = {719-747}, +year = {2021}, +doi = {10.1146/annurev-environ-020220-061831}, +URL = {https://doi.org/10.1146/annurev-environ-020220-061831}, +eprint = {https://doi.org/10.1146/annurev-environ-020220-061831}, +abstract = {In recent years, machine learning has proven to be a powerful tool for + deriving insights from data. In this review, we describe ways in which machine + learning has been leveraged to facilitate the development and operation of + sustainable energy systems. We first provide a taxonomy of machine learning + paradigms and techniques, along with a discussion of their strengths and + limitations. We then provide an overview of existing research using machine + learning for sustainable energy production, delivery, and storage. Finally, + we identify gaps in this literature, propose future research directions, and + discuss important considerations for deployment.} +} + @book{o2021quantifying, title={Quantifying Operational Resilience Benefits of the Smart Grid}, author={O'Fallon, Cheyney and Gopstein, Avi}, @@ -325,11 +328,137 @@ @book{o2021quantifying Irma.} } -@techreport{FlatPowerDemandOver2023, - title={The era of flat power demand is over}, - author={Wilson, John D. and Zimmerman, Zach}, - institution ={Grid Strategies LLC}, - year={2023}, - month={December}, - url={https://gridstrategiesllc.com/wp-content/uploads/2023/12/National-Load-Growth-Report-2023.pdf}, +@article{huppmann2021pyam, + title={pyam: Analysis and visualisation of integrated assessment and macro-energy scenarios}, + author={Huppmann, Daniel and Gidden, Matthew J and Nicholls, Zebedee and H{\"o}rsch, Jonas and Lamboll, Robin and Kishimoto, Paul N and Burandt, Thorsten and Fricko, Oliver and Byers, Edward and Kikstra, Jarmo and others}, + journal={Open Research Europe}, + volume={1}, + year={2021}, + publisher={European Commission, Directorate General for Research and Innovation} +} + + +@techreport{CoalCostCrossover2021, + author = {Eric Gimon and Amanda Meyers and Mike O'{B}oyle}, + title = {Coal Cost Crossover 2.0}, + institution = {Energy Innovation}, + year = {2021}, + url = {https://energyinnovation.org/publication/the-coal-cost-crossover-2021/}, + urldate = {2021-10-14} +} + +@misc{OSClimate, + author = {Open Source Climate}, + title = {Open Source Climate Initiative}, + howpublished = {Website}, + url = {https://www.os-climate.org/}, + urldate = {2021-10-15} +} + +@article{OpenEnergyOutlook, + author = {Joseph F. DeCarolis and + Paulina Jaramillo and + Jeremiah X. Johnson and + David L. McCollum and + Evelina Trutnevyte and + David C. Daniels and + Gökçe Akın-Olçum and + Joule Bergerson and + Soolyeon Cho and + Joon-Ho Choi and + Michael T. Craig and + Anderson R. de Queiroz and + Hadi Eshraghi and + Christopher S. Galik and + Timothy G. Gutowski and + Karl R. Haapala and + Bri-Mathias Hodge and + Simi Hoque and + Jesse D. Jenkins and + Alan Jenn and + Daniel J.A. Johansson and + Noah Kaufman and + Juha Kiviluoma and + Zhenhong Lin and + Heather L. MacLean and + Eric Masanet and + Mohammad S. Masnadi and + Colin A. McMillan and + Destenie S. Nock and + Neha Patankar and + Dalia Patino-Echeverri and + Greg Schivley and + Sauleh Siddiqui and + Amanda D. Smith and + Aranya Venkatesh and + Gernot Wagner and + Sonia Yeh and + Yuyu Zhou}, + title = {Leveraging Open-Source Tools for Collaborative Macro-energy System Modeling Efforts}, + journal = {Joule}, + volume = {4}, + issue = {12}, + pages = {2523-2526}, + year = {2020}, + month = {12}, + url = {https://doi.org/10.1016/j.joule.2020.11.002}, + doi = {10.1016/j.joule.2020.11.002} +} + +@techreport{EconFlexSolar, + author = {Steven Dahlke and + Mahesh Morjaria and + Vahan Gevorgian and + Barry Mather}, + institution = {First Solar}, + title = {The Economics of Flexible Solar for Electricity Markets in Transition}, + year = {2020}, + url = {https://www.firstsolar.com/es-CSA/-/media/First-Solar/Documents/Grid-Evolution/The_Economics_of_Flexible_Solar_for_Electricity_Markets_in_Transition.ashx}, + urldate = {2021-11-01} +} + +@article{BISTLINE2020114941, + title = {Parameterizing open-source energy models: Statistical learning to estimate unknown power plant attributes}, + journal = {Applied Energy}, + volume = {269}, + pages = {114941}, + year = {2020}, + issn = {0306-2619}, + doi = {https://doi.org/10.1016/j.apenergy.2020.114941}, + url = {https://www.sciencedirect.com/science/article/pii/S0306261920304530}, + urldate = {2021-11-01}, + author = {Bistline, John E.T. and Merrick, James H.}, + abstract = {Energy systems models are used to perform energy and environmental policy + analysis, inform company strategy, and understand implications of technological + change. Although open-source models can promote transparency and reproducibility, data + availability and cost can be prohibitive barriers for researchers and other + stakeholders. This paper presents a novel application of a statistical approach to + predict unknown power plant parameters in Canada using available data from the United + States, which can be applied in other settings where critical model inputs are + missing. We apply two statistical learning methods, linear regression and + k-nearest-neighbors, and compare their performance on unseen portions of the United + States data before applying the learned functions to unknown Canadian data. Results + indicate that reasonable predictions of heatrates and, to a lesser extent, operation + and maintenance costs are possible even with limited data about age, capacity, and + power plant types. The nearest-neighbor approach generally outperforms linear + regressions for the datasets and applications to power plant parameters investigated + here.} +} + +@techreport{CoalCostCrossover2019, + author = {Eric Gimon and Mike O'{B}oyle and Christopher T M Clack and Sarah A McKee}, + title = {The Coal Cost Crossover: Economic Viability of Existing Coal Compared to New Local Wind and Solar Resources}, + institution = {Energy Innovation and Vibrant Clean Energy}, + year = {2019}, + url = {https://energyinnovation.org/publication/the-coal-cost-crossover/}, + urldate = {2021-10-14} +} + +@techreport{EIFossilToClean, + author = {Ron Lehr}, + title = {Utility Transition Financial Impacts: From Fossil to Clean}, + institution = {Energy Innovation}, + year = {2018}, + url = {https://energyinnovation.org/wp-content/uploads/2018/12/From-Fossil-to-Clean-Brief_12.3.18.pdf}, + urldate = {2021-10-15} } diff --git a/docs/catalyst_pubs.bib b/docs/catalyst_pubs.bib index c1976b265..b80fe3053 100644 --- a/docs/catalyst_pubs.bib +++ b/docs/catalyst_pubs.bib @@ -1,3 +1,15 @@ +@INPROCEEDINGS{10741747, + author={Lamb, Katherine and Belfer, Ella and Selvans, Zane and Norman, Bennett and Gosnell, Christina and Xia, Dazhong and Sharpe, Austen and Schira, Zach}, + booktitle={2024 56th North American Power Symposium (NAPS)}, + title={The Public Utility Data Liberation Project: Providing Open Data For a Clean Energy Transition}, + year={2024}, + volume={}, + number={}, + pages={1-6}, + keywords={Green energy;Publishing;Catalysts;Ecosystems;Buildings;Electricity supply industry;Sustainable development;North America;Open data;open source;open data;EIA;FERC;US energy system;data pipeline;outreach}, + doi={10.1109/NAPS61145.2024.10741747}} + + @misc{PudlSoftware, author = {Selvans, Zane and Gosnell, Christina and diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 24ee16b6a..def37b4fc 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -3,9 +3,38 @@ PUDL Release Notes ======================================================================================= --------------------------------------------------------------------------------------- -v2024.XX.x (2024-MM-DD) +v2025.XX.x (2025-MM-DD) --------------------------------------------------------------------------------------- +New Data +^^^^^^^^ + +SEC Form 10-K Parent-Subsidiary Ownership +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* We have added some new tables describing the parent-subsidary company ownership + relationships reported in the + `SEC's Form 10-K `__, Exhibit 21 + "Subsidiaries of the Registrant". Where possible these tables link the SEC filers or + their subsidiary companies to the corresponding EIA utilities. This work was funded + by + `a grant from the Mozilla Foundation `__. + Most of the ML models and data preparation took place in the `mozilla-sec-eia + repository `__ separate from + the main PUDL ETL, as it requires processing hundreds of thousands of PDFs and the + deployment of some ML experiment tracking infrastructure. The new tables are handed + off as nearly finished products to the PUDL ETL pipeline. **Note that these are + preliminary, experimental data products and are known to be incomplete and to contain + errors.** Extracting data tables from unstructured PDFs and the SEC to EIA record + linkage are necessarily probabalistic processes. +* See PRs :pr:`4026,4031,4035,4046,4048,4050` and check out the table descriptions in + the PUDL data dictionary: + + * :ref:`out_sec10k__parents_and_subsidiaries` + * :ref:`core_sec10k__quarterly_filings` + * :ref:`core_sec10k__quarterly_exhibit_21_company_ownership` + * :ref:`core_sec10k__quarterly_company_information` + New Data Coverage ^^^^^^^^^^^^^^^^^ diff --git a/migrations/versions/1e2ec7bf2b64_rename_sec_10k_tables_to_reflect_.py b/migrations/versions/1e2ec7bf2b64_rename_sec_10k_tables_to_reflect_.py new file mode 100644 index 000000000..0cd81deda --- /dev/null +++ b/migrations/versions/1e2ec7bf2b64_rename_sec_10k_tables_to_reflect_.py @@ -0,0 +1,94 @@ +"""Rename SEC 10K tables to reflect temporal granularity. + +Revision ID: 1e2ec7bf2b64 +Revises: ac67e04d1383 +Create Date: 2025-02-05 10:52:45.161681 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '1e2ec7bf2b64' +down_revision = 'ac67e04d1383' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('core_sec10k__quarterly_company_information', + sa.Column('filename_sec10k', sa.Text(), nullable=False, comment='Name of filing as provided by SEC data portal.'), + sa.Column('filer_count', sa.Integer(), nullable=False, comment='Index company information as some filings contain information for multiple companies.'), + sa.Column('company_information_block', sa.Text(), nullable=False, comment='Title of block of data.'), + sa.Column('company_information_block_count', sa.Integer(), nullable=False, comment='Some blocks are repeated, this defines the index of the data block.'), + sa.Column('company_information_fact_name', sa.Text(), nullable=False, comment='Name of fact within a ``company_information_block``.'), + sa.Column('company_information_fact_value', sa.Text(), nullable=False, comment='Value corresponding with ``company_information_fact_name``.'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.PrimaryKeyConstraint('filename_sec10k', 'filer_count', 'company_information_block', 'company_information_block_count', 'company_information_fact_name', 'company_information_fact_value', name=op.f('pk_core_sec10k__quarterly_company_information')) + ) + op.create_table('core_sec10k__quarterly_exhibit_21_company_ownership', + sa.Column('filename_sec10k', sa.Text(), nullable=True, comment='Name of filing as provided by SEC data portal.'), + sa.Column('subsidiary_company_name', sa.Text(), nullable=True, comment='Name of subsidiary company.'), + sa.Column('subsidiary_company_location', sa.Text(), nullable=True, comment='Location of subsidiary company.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.') + ) + op.create_table('core_sec10k__quarterly_filings', + sa.Column('filename_sec10k', sa.Text(), nullable=False, comment='Name of filing as provided by SEC data portal.'), + sa.Column('central_index_key', sa.Text(), nullable=True, comment='Identifier of the company in SEC database.'), + sa.Column('company_name', sa.Text(), nullable=True, comment='Name of company submitting SEC 10k filing.'), + sa.Column('sec10k_version', sa.Text(), nullable=True, comment='Specific version of SEC 10k filed.'), + sa.Column('filing_date', sa.Date(), nullable=True, comment='Date filing was submitted.'), + sa.Column('exhibit_21_version', sa.Text(), nullable=True, comment='Version of exhibit 21 submitted (if applicable).'), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.PrimaryKeyConstraint('filename_sec10k', name=op.f('pk_core_sec10k__quarterly_filings')) + ) + op.drop_table('core_sec10k__company_information') + op.drop_table('core_sec10k__filings') + op.drop_table('core_sec10k__exhibit_21_company_ownership') + with op.batch_alter_table('out_sec10k__parents_and_subsidiaries', schema=None) as batch_op: + batch_op.drop_constraint('fk_out_sec10k__parents_and_subsidiaries_utility_id_eia_core_eia860__scd_utilities', type_='foreignkey') + batch_op.create_foreign_key(batch_op.f('fk_out_sec10k__parents_and_subsidiaries_utility_id_eia_core_eia__entity_utilities'), 'core_eia__entity_utilities', ['utility_id_eia'], ['utility_id_eia']) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('out_sec10k__parents_and_subsidiaries', schema=None) as batch_op: + batch_op.drop_constraint(batch_op.f('fk_out_sec10k__parents_and_subsidiaries_utility_id_eia_core_eia__entity_utilities'), type_='foreignkey') + batch_op.create_foreign_key('fk_out_sec10k__parents_and_subsidiaries_utility_id_eia_core_eia860__scd_utilities', 'core_eia860__scd_utilities', ['utility_id_eia', 'report_date'], ['utility_id_eia', 'report_date']) + + op.create_table('core_sec10k__exhibit_21_company_ownership', + sa.Column('filename_sec10k', sa.TEXT(), nullable=True), + sa.Column('subsidiary_company_name', sa.TEXT(), nullable=True), + sa.Column('fraction_owned', sa.FLOAT(), nullable=True), + sa.Column('report_date', sa.DATE(), nullable=True), + sa.Column('subsidiary_company_location', sa.TEXT(), nullable=True) + ) + op.create_table('core_sec10k__filings', + sa.Column('filename_sec10k', sa.TEXT(), nullable=False), + sa.Column('central_index_key', sa.TEXT(), nullable=True), + sa.Column('company_name', sa.TEXT(), nullable=True), + sa.Column('sec10k_version', sa.TEXT(), nullable=True), + sa.Column('exhibit_21_version', sa.TEXT(), nullable=True), + sa.Column('report_date', sa.DATE(), nullable=True), + sa.Column('filing_date', sa.DATE(), nullable=True), + sa.PrimaryKeyConstraint('filename_sec10k', name='pk_core_sec10k__filings') + ) + op.create_table('core_sec10k__company_information', + sa.Column('filename_sec10k', sa.TEXT(), nullable=False), + sa.Column('filer_count', sa.INTEGER(), nullable=False), + sa.Column('company_information_block', sa.TEXT(), nullable=False), + sa.Column('company_information_block_count', sa.INTEGER(), nullable=False), + sa.Column('company_information_fact_name', sa.TEXT(), nullable=False), + sa.Column('company_information_fact_value', sa.TEXT(), nullable=False), + sa.Column('report_date', sa.DATE(), nullable=True), + sa.PrimaryKeyConstraint('filename_sec10k', 'filer_count', 'company_information_block', 'company_information_block_count', 'company_information_fact_name', 'company_information_fact_value', name='pk_core_sec10k__company_information') + ) + op.drop_table('core_sec10k__quarterly_filings') + op.drop_table('core_sec10k__quarterly_exhibit_21_company_ownership') + op.drop_table('core_sec10k__quarterly_company_information') + # ### end Alembic commands ### diff --git a/src/pudl/analysis/pudl_models.py b/src/pudl/analysis/pudl_models.py index 314af1124..0c87414c0 100644 --- a/src/pudl/analysis/pudl_models.py +++ b/src/pudl/analysis/pudl_models.py @@ -27,7 +27,7 @@ def _year_quarter_to_date(year_quarter: pd.Series) -> pd.Series: io_manager_key="pudl_io_manager", group_name="pudl_models", ) -def core_sec10k__company_information() -> pd.DataFrame: +def core_sec10k__quarterly_company_information() -> pd.DataFrame: """Basic company information extracted from SEC10k filings.""" df = _load_table_from_gcs("core_sec10k__company_information") df = df.rename( @@ -50,7 +50,7 @@ def core_sec10k__company_information() -> pd.DataFrame: io_manager_key="pudl_io_manager", group_name="pudl_models", ) -def core_sec10k__exhibit_21_company_ownership() -> pd.DataFrame: +def core_sec10k__quarterly_exhibit_21_company_ownership() -> pd.DataFrame: """Company ownership information extracted from sec10k exhibit 21 attachments.""" df = _load_table_from_gcs("core_sec10k__exhibit_21_company_ownership") df = df.rename( @@ -74,7 +74,7 @@ def core_sec10k__exhibit_21_company_ownership() -> pd.DataFrame: io_manager_key="pudl_io_manager", group_name="pudl_models", ) -def core_sec10k__filings() -> pd.DataFrame: +def core_sec10k__quarterly_filings() -> pd.DataFrame: """Metadata on all 10k filings submitted to SEC.""" df = _load_table_from_gcs("core_sec10k__filings") df = df.rename( @@ -120,5 +120,13 @@ def out_sec10k__parents_and_subsidiaries() -> pd.DataFrame: "standard_industrial_classification" ].str.extract(r"(.+)\[(\d{4})\]") df["industry_id_sic"] = df["industry_id_sic"].astype("string") + # Some utilities harvested from EIA 861 data that don't show up in our entity + # tables. These didn't end up improving coverage, and so will be removed upstream. + # Hack for now is to just drop them so the FK constraint is respected. + # See https://github.com/catalyst-cooperative/pudl/issues/4050 + bad_utility_ids = [ + 3579, # Cirro Group, Inc. in Texas + ] + df = df[~df.utility_id_eia.isin(bad_utility_ids)] return df diff --git a/src/pudl/metadata/resources/eia860.py b/src/pudl/metadata/resources/eia860.py index 4c5585bf9..617bad6d7 100644 --- a/src/pudl/metadata/resources/eia860.py +++ b/src/pudl/metadata/resources/eia860.py @@ -500,6 +500,10 @@ "core_eia861__yearly_utility_data_misc", "core_eia861__yearly_utility_data_nerc", "core_eia861__yearly_utility_data_rto", + "core_sec10k__quarterly_filings", + "core_sec10k__quarterly_exhibit_21_company_ownership", + "core_sec10k__quarterly_company_information", + "out_sec10k__parents_and_subsidiaries", ], }, }, diff --git a/src/pudl/metadata/resources/sec10k.py b/src/pudl/metadata/resources/sec10k.py index 277b930d0..7fa05f3e6 100644 --- a/src/pudl/metadata/resources/sec10k.py +++ b/src/pudl/metadata/resources/sec10k.py @@ -3,7 +3,7 @@ from typing import Any RESOURCE_METADATA: dict[str, dict[str, Any]] = { - "core_sec10k__filings": { + "core_sec10k__quarterly_filings": { "description": "Metadata describing all submitted SEC 10k filings.", "schema": { "fields": [ @@ -23,7 +23,7 @@ "etl_group": "pudl_models", "field_namespace": "sec10k", }, - "core_sec10k__exhibit_21_company_ownership": { + "core_sec10k__quarterly_exhibit_21_company_ownership": { "description": "Company ownership data extracted from Exhibit 21 attachments to SEC 10k filings.", "schema": { "fields": [ @@ -38,7 +38,7 @@ "etl_group": "pudl_models", "field_namespace": "sec10k", }, - "core_sec10k__company_information": { + "core_sec10k__quarterly_company_information": { "description": "Company information extracted from SEC 10k filings.", "schema": { "fields": [ @@ -64,7 +64,11 @@ "field_namespace": "sec10k", }, "out_sec10k__parents_and_subsidiaries": { - "description": "Denormalized table containing SEC 10k company information with mapping between subsidiary and parent companies, as well as a linkage to EIA companies.", + "description": ( + "Denormalized table containing SEC 10-K company information with mapping " + "between subsidiary and parent companies, as well as a linkage to EIA " + "utilities." + ), "schema": { "fields": [ "company_id_sec10k",