Skip to content

Commit

Permalink
Add JRC-PPDB-OPEN for some EU generation.
Browse files Browse the repository at this point in the history
Add a new data source: JRC-PPDB-OPEN
(https://ec.europa.eu/jrc/en/publication/joint-research-centre-power-plant-database-jrc-ppdb)

This source from the Joint Research Centre of the European Union links a
few datasets together, including the Global Power Plant Database with
ENTSO-E.

Since JRC-PPDB-OPEN includes a measurement of the coverage of reported
data (data gaps may exist in the ENTSO-E reporting for any generating
unit), a decision was made such that all generating units for a
particular GPPD plant would need to have 95% time coverage for a year to
have that information aggregated and used as the 'reported' generation
value for a GPPD plant. This means there are far fewer plants with
reported generation than plants that are matched in JRC-PPDB-OPEN. This
should be remedied in the future and is a patch for an underlying data
representation issue.

Since some of the WRI-collected plants (formerly the 'Fusion Tables')
have existing generation values, but the source of those generation
values is not well identified, a new column is added to the 'WRI
country databases' which seeks to track the source of the singular
generation field in the WRI databases. This field has been partially
filled out for the accessible sources and should be given attention in
the future.

In total, this database adds the net generation as follows:
2015: +257 more plants with generation
2016: +400 more plants with generation
2017: +457 more plants with generation

This update does not affect the number of plants or total capacity of
the database.

Database version 1.2.2
  • Loading branch information
loganbyers committed Feb 5, 2020
1 parent df7b0be commit 13b6a3b
Show file tree
Hide file tree
Showing 145 changed files with 425,186 additions and 399,376 deletions.
48 changes: 22 additions & 26 deletions build_databases/build_database_WRI.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,6 @@
# extract powerplant information from file(s)
print(u"Reading in plants...")

# specify column names used in raw file
COLNAMES = ["Power Plant ID", "Name", "Fuel", "Secondary Fuel", "Capacity (MW)",
"Location", "Operational Status", "Commissioning Date",
"Units", "Owner", "Annual Generation (GWh)", "Source", "URL", "Country",
"Latitude", "Longitude", "Geolocation Source", "Year of Data"]

# track IDs that are assigned to plants in two different countries (likely an error)
overlapping_ids = {}
countries_with_zero_plants = []
Expand All @@ -56,26 +50,26 @@
plant_fuel_capacities = {}

with open(os.path.join(RAW_FILE_DIRECTORY, afile), 'rU') as f:
datareader = csv.reader(f)
headers = datareader.next()
datareader = csv.DictReader(f)
try:
id_col = headers.index(COLNAMES[0])
name_col = headers.index(COLNAMES[1])
primary_fuel_col = headers.index(COLNAMES[2])
other_fuel_col = headers.index(COLNAMES[3])
capacity_col = headers.index(COLNAMES[4])
location_col = headers.index(COLNAMES[5])
status_col = headers.index(COLNAMES[6])
commissioning_year_col = headers.index(COLNAMES[7])
owner_col = headers.index(COLNAMES[9])
generation_col = headers.index(COLNAMES[10])
source_col = headers.index(COLNAMES[11])
url_col = headers.index(COLNAMES[12])
country_col = headers.index(COLNAMES[13])
latitude_col = headers.index(COLNAMES[14])
longitude_col = headers.index(COLNAMES[15])
geolocation_source_col = headers.index(COLNAMES[16])
year_of_data_col = headers.index(COLNAMES[17])
id_col = "Power Plant ID"
name_col = "Name"
primary_fuel_col = "Fuel"
other_fuel_col = "Secondary Fuel"
capacity_col = "Capacity (MW)"
location_col = "Location"
status_col = "Operational Status"
commissioning_year_col = "Commissioning Date"
owner_col = "Owner"
generation_col = "Annual Generation (GWh)"
generation_source_col = "Generation Data Source"
source_col = "Source"
url_col = "URL"
country_col = "Country"
latitude_col = "Latitude"
longitude_col = "Longitude"
geolocation_source_col = "Geolocation Source"
year_of_data_col = "Year of Data"
except:
print(u"- ERROR: One or more columns missing in {0}, skipping...".format(afile))
continue
Expand Down Expand Up @@ -133,9 +127,11 @@
try:
gen_gwh = float(pw.format_string(row[generation_col].replace(",", "")))
gen_year = int(pw.format_string(row[year_of_data_col]))
generation = pw.PlantGenerationObject.create(gen_gwh, year=gen_year)
gen_source = pw.format_string(row[generation_source_col])
except:
generation = pw.NO_DATA_OTHER
else:
generation = pw.PlantGenerationObject.create(gen_gwh, year=gen_year, source=gen_source)
try:
owner = pw.format_string(row[owner_col])
except:
Expand Down
73 changes: 73 additions & 0 deletions build_databases/build_global_power_plant_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,79 @@
print("...skipped {0} plants ({1} MW) for {2}.".format(_vals[0], _vals[1], _country))


# STEP 3.9: Add in multinational generation datasets
COUNTRY_DATABASE_FILE = pw.make_file_path(fileType="src_bin", filename="COUNTRY-Database.bin")
JRC_OPEN_PERFORMANCE = pw.make_file_path('raw', 'JRC-PPDB-OPEN', 'JRC_OPEN_PERFORMANCE.csv')
JRC_OPEN_UNITS = pw.make_file_path('raw', 'JRC-PPDB-OPEN', 'JRC_OPEN_UNITS.csv')
JRC_OPEN_LINKAGES = pw.make_file_path('raw', 'JRC-PPDB-OPEN', 'JRC_OPEN_LINKAGES.csv')
JRC_OPEN_TEMPORAL = pw.make_file_path('raw', 'JRC-PPDB-OPEN', 'JRC_OPEN_TEMPORAL.csv')
JRC_BLACKLIST = set([
# blacklist created looking at obviously-wrong matches based on country designation
# eic_g, # bad_wri_id
'50WG00000001097W', # 'BRA0030768'
'48W000000SUTB-1P', # 'USA0060878'
'26WUCNTRLDSCND24', # 'CAN0008429'
'26WUCNTRLDSCND16', # 'CAN0008429'
'50WG000000019861', # 'BRA0029858'
'50WG000000019853', # 'BRA0029858'
'50WGI00000019875', # 'BRA0029858'
'48W000000ROOS-1P', # 'USA0006202'
])

# {wri_id: [eic_g_1, eic_g_2, ...], ...}
gppd_ppdb_link = {}
with open(JRC_OPEN_LINKAGES) as fin:
r = csv.DictReader(fin)
for row in r:
wri_id = row['WRI_id']
gen_id = row['eic_g']
if gen_id: # some blank gen_ids, which currently don't have wri_id matches
gppd_ppdb_link[wri_id] = gppd_ppdb_link.get(wri_id, []) + [gen_id]

# {yr: {eic_g: (gen, time_coverage), ...}, ...}
ppdb_generation = {str(yr): {} for yr in [2015, 2016, 2017, 2018]}
with open(JRC_OPEN_TEMPORAL) as fin:
r = csv.DictReader(fin)
skipped_generation = 0
for row in r:
year_data = ppdb_generation[row['cyear']]
# value is in MWh according to `datapackage.json` in JRC-PPDB-OPEN
year_data[row['eic_g']] = (row['Generation'], row['time_coverage'])

# desired lookup structure: {plant1: {year1: val, year2: val2, ...}, ...}
agg_gen_by_gppd = {}
# per-unit time availability
time_threshold = '0.950' # yes this is a string
# WRI plants that aren't having the estimation applied [(plant1, yearA), ...]
jrc_skipped_plants = []
for wri_id, gen_ids in gppd_ppdb_link.items():
plant_totals = {}
for year in map(str, [2015, 2016, 2017]):
year_data = ppdb_generation[year]
year_gen_val = 0
accepted_gen_ids = []
for gen_id in gen_ids:
gen, time_coverage = year_data.get(gen_id, (0, '0.000'))
if time_coverage < time_threshold or gen_id in JRC_BLACKLIST:
jrc_skipped_plants.append((wri_id, int(year)))
break
year_gen_val += float(gen)
accepted_gen_ids.append(gen_id)
if set(accepted_gen_ids) == set(gen_ids):
# convert MWh to GWh and assign value for the year
plant_totals[int(year)] = year_gen_val / 1000
agg_gen_by_gppd[wri_id] = plant_totals

for pid, pp in core_database.items():
if agg_gen_by_gppd.get(pid, {}):
new_generation = []
for yr, val in agg_gen_by_gppd[pid].items():
gen = pw.PlantGenerationObject.create(val, year=yr, source='JRC-PPDB-OPEN')
new_generation.append(gen)
if new_generation:
pp.generation = new_generation
#print("Added {0} plants ({1} MW) from {2}.".format(data['count'], data['capacity'], dbname))

# STEP 4: Estimate generation for plants without reported generation for target year
count_plants_with_generation = 0
#for plant_id,plant in core_database.iteritems():
Expand Down
2 changes: 1 addition & 1 deletion output_database/DATABASE_VERSION
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
1.2.1
1.2.2

Loading

0 comments on commit 13b6a3b

Please sign in to comment.