diff --git a/.gitignore b/.gitignore index 74db7c23..5bcd0593 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,6 @@ electricitylci/output/* !electricitylci/data/*.yml # Whitelist everything except this! -electricitylci/data/EFs/.DS_Store \ No newline at end of file +electricitylci/data/EFs/.DS_Store + +Notes.txt \ No newline at end of file diff --git a/README.md b/README.md index 6c883a10..c1f220c1 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,9 @@ The `main()` method has four steps: * ELCI_2020 * ELCI_2021 * ELCI_2022 + - Version 2.1 baselines include: + * ELCI_2023 + * ELCI_2024 - These configurations statically change the module, model_config.py, which is an object read by other modules. - To change configuration values, edit the YAML before running the code. 2. `run_generation()` diff --git a/electricitylci/coal_upstream.py b/electricitylci/coal_upstream.py index c9417476..1c5f7e26 100644 --- a/electricitylci/coal_upstream.py +++ b/electricitylci/coal_upstream.py @@ -58,7 +58,7 @@ transportation, but still mainly represents 2016. Last updated: - 2025-06-09 + 2025-09-05 """ __all__ = [ "basin_codes", # Globals @@ -436,7 +436,8 @@ def eia_7a_download(year, save_path): see https://github.com/USEPA/ElectricityLCI/issues/230 for a solution. """ eia7a_base_url = 'http://www.eia.gov/coal/data/public/xls/' - name = 'coalpublic{}.xls'.format(year) + name = ('coalpublic{}.xls'.format(year) if year <= 2022 else + 'coalpublic{}.xlsx'.format(year)) url = eia7a_base_url + name try: os.makedirs(save_path) @@ -1675,11 +1676,18 @@ def read_eia7a_public_coal(year): return_name=False) # If you're here, then see the following for hotfix: # https://github.com/USEPA/ElectricityLCI/issues/230 - eia7a_df = pd.read_excel( - eia7a_path, - sheet_name='Hist_Coal_Prod', - skiprows=3 - ) + try: + eia7a_df = pd.read_excel( + eia7a_path, + sheet_name='Hist_Coal_Prod', + skiprows=3 + ) + except ValueError: + raise ValueError( + f'Error reading {eia7a_path}. Please see ' + 'https://github.com/USEPA/ElectricityLCI/issues/230 ' + 'for a solution' + ) eia7a_df = _clean_columns(eia7a_df) return eia7a_df diff --git a/electricitylci/combinator.py b/electricitylci/combinator.py index a7029b3d..40f285b0 100644 --- a/electricitylci/combinator.py +++ b/electricitylci/combinator.py @@ -254,7 +254,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs): # Hoping to reduce memory usage or at least make more of it available # for the later groupby. del(arg) - # See https://github.com/USEPA/fedelemflowlist + # See https://github.com/FLCAC-admin/fedelemflowlist # The mapping data includes a conversion factor to convert everything into # standard units (e.g., kg, MJ, m2*a). Note that 'SourceFlowContext' is # already in lowercase letters, which is why no change happens below. @@ -415,7 +415,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs): # columns to be converted to objects, so undoing that here for col in actual_quant_columns: upstream_mapped_df[col]=upstream_mapped_df[col].astype(float) - + # Preserve unmapped resource flows; # copy over the flow name, compartment and units and # set conversion factor equal to 1.0. @@ -492,7 +492,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs): "Source", "Year", ] + actual_quant_columns - + if "input" in upstream_columns: final_columns = final_columns + ["input"] @@ -587,9 +587,9 @@ def concat_clean_upstream_and_plant(pl_df, up_df): ) # 3/19/2025 MBJ: more memory management. When this process is called from # __init__.combine_upstream_and_gen_df the up_df is 12GB big. Previously - # we used a merge to add all the regional columns, but that requires a - # tremendous amount of memory. Invidually assigning columns will be a bit - # slower but will greatly reduce memory usage...and ultimately end up + # we used a merge to add all the regional columns, but that requires a + # tremendous amount of memory. Individually assigning columns will be a bit + # slower but will greatly reduce memory usage...and ultimately end up # faster if your computer tends to run out of memory using the previous # merge. for col in existing_region_cols: @@ -598,10 +598,10 @@ def concat_clean_upstream_and_plant(pl_df, up_df): # HOTFIX: during the merge, a lot eGRID_IDs are unmatched, so fill them in! # NOTE: triggers a pandas futurewarning on downcasting object datatypes. # 3/19/2025 - these would be instances where there is a plant_id in up_df - # but not a matching eGRID_ID. With the new, by-column mapping performed above - # eGRID_ID does not exist so no Nans to fill. In previous versions, I believe - # the use of fillnans with plant_id being the source would result in the - # same thing as below. + # but not a matching eGRID_ID. With the new, by-column mapping performed + # above eGRID_ID does not exist so no Nans to fill. In previous versions, + # I believe the use of fillnans with plant_id being the source would result + # in the same thing as below. up_df['eGRID_ID'] = up_df['plant_id'].astype("int") # NOTE: the only columns in up_df not in pl_df should be: diff --git a/electricitylci/data/process_metadata.yml b/electricitylci/data/process_metadata.yml index 270d265a..9cc8f040 100644 --- a/electricitylci/data/process_metadata.yml +++ b/electricitylci/data/process_metadata.yml @@ -857,15 +857,16 @@ coal_upstream: gas_upstream: techno_intro: &gas_upstream_techno_intro - - 'The cradle-to-gate inventory for production of gas aggregated to basin. ' + - 'The cradle-to-gate inventory for production of gas aggregated to basin or region, depending on the year selected in the model configuration. ' techno_process: &gas_upstream_techno_process - - 'The NETL natural gas life cycle model includes parameters to generate inventories for natural gas extraction based on basin and geology which determines the gas extraction type (e.g., Appalachian Shale using hydraulic fracturing). - 2016 natural gas production then informs the amount of each type of technology/region that form the mix in the regions. + - 'The NETL natural gas life cycle model includes parameters to generate inventories for natural gas extraction based on region or basin and geology which determines the gas extraction type (e.g., Appalachian Shale using hydraulic fracturing). + 2016 or 2020 natural gas production then informs the amount of each type of technology/region that form the mix in the regions, depending on the year selected in the model configuration. These can be further aggregated to a US average. - More details are in the natural gas upstream report at the link below - - https://www.netl.doe.gov/energy-analysis/details?id=3198' + More details are in the natural gas upstream report at the following links. + Link for 2016: https://www.netl.doe.gov/energy-analysis/details?id=4f43cb3f-c0d7-482e-bf01-39995a7c7497 + Link for 2020: https://www.netl.doe.gov/energy-analysis/details?id=546d4009-c43b-43f5-bcc9-64d5e63fc8d5 + ' Description: - *gas_upstream_techno_intro diff --git a/electricitylci/egrid_emissions_and_waste_by_facility.py b/electricitylci/egrid_emissions_and_waste_by_facility.py index 337824ec..f656e71a 100644 --- a/electricitylci/egrid_emissions_and_waste_by_facility.py +++ b/electricitylci/egrid_emissions_and_waste_by_facility.py @@ -112,6 +112,7 @@ def get_combined_stewicombo_file(model_specs): emissions_and_wastes_by_facility = get_combined_stewicombo_file( model_config) len(emissions_and_wastes_by_facility) + # for 'ELCI_2023': 90238 [251125; TWD] # for 'ELCI_2020': 88005 [250416; TWD] # for 'ELCI_1': 106284 (recorded as 88310 [250416;TWD]) diff --git a/electricitylci/egrid_facilities.py b/electricitylci/egrid_facilities.py index db0844e9..f1035906 100644 --- a/electricitylci/egrid_facilities.py +++ b/electricitylci/egrid_facilities.py @@ -156,6 +156,7 @@ def make_egrid_subregion_ref(year): # Rename columns. NOTE: missing names resolved # (https://github.com/USEPA/standardizedinventories/issues/153) egrid_facilities.rename(columns={ + 'Plant primary fuel category': 'FuelCategory', # added for 2023 STEWI data 'Plant primary coal/oil/gas/ other fossil fuel category': 'FuelCategory', 'Plant primary fuel': 'PrimaryFuel', 'eGRID subregion acronym': 'Subregion', diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py index 7262a604..35c2e4e0 100644 --- a/electricitylci/eia_trans_dist_grid_loss.py +++ b/electricitylci/eia_trans_dist_grid_loss.py @@ -74,13 +74,16 @@ def eia_trans_dist_download_extract(year): Parameters ---------- - year : str + year : str, int Analysis year Returns ------- pandas.DataFrame """ + # check in case year is passed as an int + if isinstance(year,int): + year = str(year) eia_trans_dist_loss = pd.DataFrame() old_path = os.getcwd() if os.path.exists(f"{paths.local_path}/t_and_d_{year}"): @@ -96,38 +99,22 @@ def eia_trans_dist_download_extract(year): filename = f"{STATE_ABBREV[key]}.xlsx" if not os.path.exists(filename): logging.info(f"Downloading archive data for {STATE_ABBREV[key]}") - # HOTFIX: URLs for two-word states have space omitted. - url_a = ( - "https://www.eia.gov/electricity/state/archive/" - + year - + "/" - + key.replace(" ", "") - + "/xls/" - + filename - ) - url_b = ( - "https://www.eia.gov/electricity/state/" - + key.replace(" ", "") - + "/xls/" - + filename - ) + + url_key = key.replace(" ", "") + url = f"https://www.eia.gov/electricity/state/archive/{year}/{url_key}/xls/" + if int(year) > 2023: + url = url.replace(f"/archive/{year}/{url_key}/", "/") + url += "SEP%20Tables%20for%20" + f"{STATE_ABBREV[key].upper()}.xlsx" + elif int(year) == 2023: + url += "SEP%20Tables%20for%20" + f"{STATE_ABBREV[key].upper()}.xlsx" + else: + url += f"{STATE_ABBREV[key]}.xlsx" + + r = requests.get(url, timeout=20) # HOTFIX: https://github.com/USEPA/ElectricityLCI/issues/235 #adding 20s timeout to avoid long delays due to server issues. - r = requests.get(url_a, timeout=20) - r_head = r.headers.get("Content-Type", "") - if not r.ok or r_head.startswith("text"): - logging.info(f"Trying alternative site {STATE_ABBREV[key]}") - #adding 20s timeout to avoid long delays due to server issues. - r = requests.get(url_b, timeout=20) - r_head = r.headers.get("Content-Type", "") - - if r.ok and not r_head.startswith("text"): - with open(filename, 'wb') as f: + with open (filename, "wb") as f: f.write(r.content) - else: - logging.error( - f"No TD loss data for {STATE_ABBREV[key]} {year}") - try: df = pd.read_excel( filename, diff --git a/electricitylci/generation.py b/electricitylci/generation.py index c8c9a302..6af3bcc4 100644 --- a/electricitylci/generation.py +++ b/electricitylci/generation.py @@ -929,7 +929,9 @@ def create_generation_process_df(): # numbers and maps them to eGRID facility numbers. # NOTE: there are unmatched facilities that are found in FRS_bridge, # but not in EIA (e.g., EGRID, RCRA). - emissions_and_wastes_by_facility = get_combined_stewicombo_file(model_specs) + emissions_and_wastes_by_facility = get_combined_stewicombo_file( + model_specs + ) ewf_df = pd.merge( left=emissions_and_wastes_by_facility, right=eia860_FRS, diff --git a/electricitylci/globals.py b/electricitylci/globals.py index 0c9916d0..acf84a24 100644 --- a/electricitylci/globals.py +++ b/electricitylci/globals.py @@ -233,6 +233,8 @@ RENEWABLE_VINTAGES = [2016, 2020] '''list : The valid years for renewable inventories (i.e., 2016 and 2020).''' +NG_MODEL_YEARS = [2016, 2020] +'''list : The valid years for natural gas model (i.e., 2016 and 2020).''' ############################################################################## # FUNCTIONS diff --git a/electricitylci/model_config.py b/electricitylci/model_config.py index 9a052654..cb229187 100644 --- a/electricitylci/model_config.py +++ b/electricitylci/model_config.py @@ -17,6 +17,7 @@ from electricitylci.globals import output_dir from electricitylci.globals import COAL_MODEL_YEARS from electricitylci.globals import RENEWABLE_VINTAGES +from electricitylci.globals import NG_MODEL_YEARS ############################################################################## @@ -141,6 +142,8 @@ class ModelSpecs: Absolute path to JSON-LD zip output file. File name includes the model name and current time stamp and is located by default in the output directory (see globals.py). + ng_model_year : int + The natural gas model year (e.g., 2016 or 2020). """ def __init__(self, model_specs, model_name): """Class initialization. @@ -201,6 +204,7 @@ def __init__(self, model_specs, model_name): f"{output_dir}/{model_name}_jsonld_" f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" ) + self.ng_model_year = model_specs["ng_model_year"] ############################################################################## @@ -330,9 +334,18 @@ def check_model_specs(model_specs): err_str += " or ".join([str(x) for x in COAL_MODEL_YEARS]) err_str += " not %s!" % model_specs['coal_model_year'] raise ConfigurationError(err_str) + if not model_specs['renewable_vintage'] in RENEWABLE_VINTAGES: err_str = "The renewable inventory vintage must be one of " err_str += " or ".join([str(x) for x in RENEWABLE_VINTAGES]) err_str += " not %s!" % model_specs['renewable_vintage'] raise ConfigurationError(err_str) + + if not model_specs['ng_model_year'] in NG_MODEL_YEARS: + err_str = "The natural gas model year must be one of " + err_str += " or ".join([str(x) for x in NG_MODEL_YEARS]) + err_str += " not %s!" % model_specs['ng_model_year'] + raise ConfigurationError(err_str) + logging.info("Checks passed!") + diff --git a/electricitylci/modelconfig/ELCI_1_config.yml b/electricitylci/modelconfig/ELCI_1_config.yml index f5699bbb..ff30a42b 100644 --- a/electricitylci/modelconfig/ELCI_1_config.yml +++ b/electricitylci/modelconfig/ELCI_1_config.yml @@ -31,6 +31,14 @@ replace_egrid: true # construction LCI. coal_model_year: 2020 +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2016 + # NETL developed profiles for renewable generation to capture construction # and O&M impacts (e.g., solar PV manufacturing and power plant operations). # There are two vintages for renewable inventories: 2016 and 2020. The 2016 diff --git a/electricitylci/modelconfig/ELCI_2020_config.yml b/electricitylci/modelconfig/ELCI_2020_config.yml index 36bf05f3..2effc7a9 100644 --- a/electricitylci/modelconfig/ELCI_2020_config.yml +++ b/electricitylci/modelconfig/ELCI_2020_config.yml @@ -31,6 +31,14 @@ replace_egrid: true # construction LCI. coal_model_year: 2023 +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2020 + # NETL developed profiles for renewable generation to capture construction # and O&M impacts (e.g., solar PV manufacturing and power plant operations). # There are two vintages for renewable inventories: 2016 and 2020. The 2016 diff --git a/electricitylci/modelconfig/ELCI_2021_config.yml b/electricitylci/modelconfig/ELCI_2021_config.yml index ea680a96..4fc8d5a9 100644 --- a/electricitylci/modelconfig/ELCI_2021_config.yml +++ b/electricitylci/modelconfig/ELCI_2021_config.yml @@ -31,6 +31,14 @@ replace_egrid: true # construction LCI. coal_model_year: 2023 +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2020 + # NETL developed profiles for renewable generation to capture construction # and O&M impacts (e.g., solar PV manufacturing and power plant operations). # There are two vintages for renewable inventories: 2016 and 2020. The 2016 diff --git a/electricitylci/modelconfig/ELCI_2022_config.yml b/electricitylci/modelconfig/ELCI_2022_config.yml index 64e75bc5..76046f1a 100644 --- a/electricitylci/modelconfig/ELCI_2022_config.yml +++ b/electricitylci/modelconfig/ELCI_2022_config.yml @@ -31,6 +31,14 @@ replace_egrid: true # construction LCI. coal_model_year: 2023 +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2020 + # NETL developed profiles for renewable generation to capture construction # and O&M impacts (e.g., solar PV manufacturing and power plant operations). # There are two vintages for renewable inventories: 2016 and 2020. The 2016 diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml new file mode 100644 index 00000000..17d3eca2 --- /dev/null +++ b/electricitylci/modelconfig/ELCI_2023_config.yml @@ -0,0 +1,158 @@ +# MODEL CONFIG FILE +# This file contains parameters that can be changed by a user. + +# MODEL AND DATA YEARS +# These parameters determine what year the outputs represent and which years +# of data are used from each source. + +# The target year is used to determine the temporal correlation of data with +# the electricity generation processes, which can be used in uncertainty +# calculations. +electricity_lci_target_year: 2025 + +# Select a regional aggregation from "eGRID", "NERC", "BA", "US", "FERC", +# and "EIA". The EPA_eGRID trading method can only be used with "eGRID". +# The NETL_BA_trading method can only be used with "BA" and "FERC". +regional_aggregation: 'BA' + +# If egrid is not used as a primary data source then set "replace_egrid" +# to True. This will trigger the use of EPA CEMS and EIA data as substitutes +# for eGRID. Replacing eGRID will also use EIA generation data to determine +# the generation mix. +egrid_year: 2023 +eia_gen_year: 2023 +replace_egrid: true + +# In 2023, NETL published the Cradle-to-Gate Life Cycle Analysis Baseline +# for U.S. Coal Mining and Delivery (https://www.osti.gov/biblio/2370100), +# which supersedes the 2020 model inventory used in the original baseline. +# Select between the 2020 and 2023 model inventories. This impacts both +# the mining and transportation LCIs, but does not impact the coal power plant +# construction LCI. +coal_model_year: 2023 + +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2020 + +# NETL developed profiles for renewable generation to capture construction +# and O&M impacts (e.g., solar PV manufacturing and power plant operations). +# There are two vintages for renewable inventories: 2016 and 2020. The 2016 +# inventories combine construction and operations (recommended for use when +# re-creating the 2016 baseline). In the 2020 inventory, these were separated +# into individual inventories (recommended for future baselines). +include_renewable_generation: true +renewable_vintage: 2020 + +# NETL has developed profiles for power plant water use. +# Set to true to include these impacts. +include_netl_water: true + +# Upstream data from NETL can be included in the outputs. +include_upstream_processes: true + +# For the inventories of interest, see vintages available from StEWI +# (https://github.com/USEPA/standardizedinventories). +# If you plan to run a baseline, use the prescribed stewicombo file for +# consistent results. Stewicombo files are listed in their corresponding folder +# on the AWS site (https://dmap-data-commons-ord.s3.amazonaws.com/index.html). +inventories_of_interest: + eGRID: 2023 + TRI: 2023 + NEI: 2020 + RCRAInfo: 2023 +stewicombo_file: 'ELCI_2023_v1.2.1_3687292' + +# Provide uncertainty estimates for emissions. +calculate_uncertainty: true + + +# API DATA SOURCES +# The API to access NETL EDX data resources (https://edx.netl.doe.gov). +edx_api: "" + +# If replacing eGRID, then EPA CEMS from CAMPD is used. This data is available +# from EPA's API (https://www.epa.gov/power-sector/cam-api-portal). Request +# for a free API key and copy-and-paste it here: +epa_cam_api: "" + +# If using NETL's trading method, then EIA bulk U.S. Electric System +# Operating Data are used. The data are available in two formats: +# bulk zip (EBA.zip) and Opendata API. Choose the path (note that since +# March 2024, both routes include data from 2019). To get 2016-2018 data, +# use the archived bulk zip file (`use_eia_bulk_zip` = true) and bypass the +# vintage check (`bypass_vintage_check` = true). The vintage check triggers +# a new download of EBA.zip every 30 days, as defined by VINTAGE_THRESH +# global parameter in `bulk_eia_data` and the EBA manifest text file. +# To register for a free API key, go to: https://www.eia.gov/opendata/, and +# copy-and-paste it for `eia_api` below. +use_eia_bulk_zip: false +bypass_bulk_vintage: true +eia_api: "" + + +# GENERATOR FILTERS +# These parameters determine if any power plants are filtered out +include_only_egrid_facilities_with_positive_generation: true +filter_on_efficiency: true +egrid_facility_efficiency_filters: + lower_efficiency: 10 + upper_efficiency: 100 + +# ELCI creates life cycle processes for each fuel type. If you only want to +# include power plants with a minimum amount of generation from a single fuel, +# make sure this parameter is set to true. +filter_on_min_plant_percent_generation_from_primary_fuel: true +min_plant_percent_generation_from_primary_fuel_category: 90 + +# If the parameter above is set to true, one may still want to keep the plants +# that do not meet the threshold. This setting will mark those power plants as +# MIXED and generate life cycle processes for them. Note that the definition +# of mixed is purposefully ambiguous and will mean different things depending +# on the region of aggregation. +keep_mixed_plant_category: true + +# To ensure consistency with eGRID, the model applies a filter to remove +# industrial cogeneration facilities from the EIA Form 923 data based on +# their NAICS code (i.e., code 22: Electricity Utilities and Independent +# Power Producers) and by EIA sector numbers 1 and 2. This filter is +# designed to reproduce the same power plants that are listed in eGRID. +filter_non_egrid_emission_on_NAICS: true + + +# TRANSMISSION AND ELECTRICITY TRADING +# These parameters are used to determine which method of electricity trading is +# used and what losses from electricity distribution are included in the +# results. + +# Used to compute the loss factor for transmission and distribution +# (i.e., 1/efficiency). +efficiency_of_distribution_grid: 0.95 + +# Electricity trading can be calculated using a method from EPA (with or +# without a surplus trading pool) that is based on eGRID regions or a method +# from NETL that uses an input-output approach using exchange data between +# balancing authorities from EIA. If false, net trading is used. +EPA_eGRID_trading: false + +# For net trading, select between two options for for consumption mix +# calculations: net trading (true) or gross trading (false) between NERC +# and eGRID regions. +net_trading: false + +# Trading year for EIA data used for net trading. +NETL_IO_trading_year: 2023 + + +# POST PROCESSING +# This triggers a cleaning of the JSON-LD (e.g., removing untracked flows +# zero value product flows, and renumbering the exchange table internal IDs). +# These end processes help remove common errors that crash openLCA v2. +# Product systems for the at-user consumption mixes are also generated. +run_post_processes: true + +# OTHER PARAMETERS diff --git a/electricitylci/modelconfig/ELCI_2024_config.yml b/electricitylci/modelconfig/ELCI_2024_config.yml new file mode 100644 index 00000000..4af82864 --- /dev/null +++ b/electricitylci/modelconfig/ELCI_2024_config.yml @@ -0,0 +1,159 @@ +# MODEL CONFIG FILE +# This file contains parameters that can be changed by a user. + +# MODEL AND DATA YEARS +# These parameters determine what year the outputs represent and which years +# of data are used from each source. + +# The target year is used to determine the temporal correlation of data with +# the electricity generation processes, which can be used in uncertainty +# calculations. +electricity_lci_target_year: 2025 + +# Select a regional aggregation from "eGRID", "NERC", "BA", "US", "FERC", +# and "EIA". The EPA_eGRID trading method can only be used with "eGRID". +# The NETL_BA_trading method can only be used with "BA" and "FERC". +regional_aggregation: 'BA' + +# If egrid is not used as a primary data source then set "replace_egrid" +# to True. This will trigger the use of EPA CEMS and EIA data as substitutes +# for eGRID. Replacing eGRID will also use EIA generation data to determine +# the generation mix. +egrid_year: 2023 +eia_gen_year: 2024 +replace_egrid: true + +# In 2023, NETL published the Cradle-to-Gate Life Cycle Analysis Baseline +# for U.S. Coal Mining and Delivery (https://www.osti.gov/biblio/2370100), +# which supersedes the 2020 model inventory used in the original baseline. +# Select between the 2020 and 2023 model inventories. This impacts both +# the mining and transportation LCIs, but does not impact the coal power plant +# construction LCI. +coal_model_year: 2023 + +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2020 + +# NETL developed profiles for renewable generation to capture construction +# and O&M impacts (e.g., solar PV manufacturing and power plant operations). +# There are two vintages for renewable inventories: 2016 and 2020. The 2016 +# inventories combine construction and operations (recommended for use when +# re-creating the 2016 baseline). In the 2020 inventory, these were separated +# into individual inventories (recommended for future baselines). +include_renewable_generation: true +renewable_vintage: 2020 + +# NETL has developed profiles for power plant water use. +# Set to true to include these impacts. +include_netl_water: true + +# Upstream data from NETL can be included in the outputs. +include_upstream_processes: true + +# For the inventories of interest, see vintages available from StEWI +# (https://github.com/USEPA/standardizedinventories). +# If you plan to run a baseline, use the prescribed stewicombo file for +# consistent results. Stewicombo files are listed in their corresponding folder +# on the AWS site (https://dmap-data-commons-ord.s3.amazonaws.com/index.html). +inventories_of_interest: + eGRID: 2023 + TRI: 2023 + NEI: 2020 + RCRAInfo: 2023 +stewicombo_file: 'ELCI_2020_v1.1.4' + +# Provide uncertainty estimates for emissions. +calculate_uncertainty: true + + +# API DATA SOURCES +# The API to access NETL EDX data resources (https://edx.netl.doe.gov). +edx_api: "" + +# If replacing eGRID, then EPA CEMS from CAMPD is used. This data is available +# from EPA's API (https://www.epa.gov/power-sector/cam-api-portal). Request +# for a free API key and copy-and-paste it here: +epa_cam_api: "" + +# If using NETL's trading method, then EIA bulk U.S. Electric System +# Operating Data are used. The data are available in two formats: +# bulk zip (EBA.zip) and Opendata API. Choose the path (note that since +# March 2024, both routes include data from 2019). To get 2016-2018 data, +# use the archived bulk zip file (`use_eia_bulk_zip` = true) and bypass the +# vintage check (`bypass_vintage_check` = true). The vintage check triggers +# a new download of EBA.zip every 30 days, as defined by VINTAGE_THRESH +# global parameter in `bulk_eia_data` and the EBA manifest text file. +# To register for a free API key, go to: https://www.eia.gov/opendata/, and +# copy-and-paste it for `eia_api` below. +use_eia_bulk_zip: false +bypass_bulk_vintage: true +eia_api: "" + + +# GENERATOR FILTERS +# These parameters determine if any power plants are filtered out +include_only_egrid_facilities_with_positive_generation: true +filter_on_efficiency: true +egrid_facility_efficiency_filters: + lower_efficiency: 10 + upper_efficiency: 100 + +# ELCI creates life cycle processes for each fuel type. If you only want to +# include power plants with a minimum amount of generation from a single fuel, +# make sure this parameter is set to true. +filter_on_min_plant_percent_generation_from_primary_fuel: true +min_plant_percent_generation_from_primary_fuel_category: 90 + +# If the parameter above is set to true, one may still want to keep the plants +# that do not meet the threshold. This setting will mark those power plants as +# MIXED and generate life cycle processes for them. Note that the definition +# of mixed is purposefully ambiguous and will mean different things depending +# on the region of aggregation. +keep_mixed_plant_category: true + +# To ensure consistency with eGRID, the model applies a filter to remove +# industrial cogeneration facilities from the EIA Form 923 data based on +# their NAICS code (i.e., code 22: Electricity Utilities and Independent +# Power Producers) and by EIA sector numbers 1 and 2. This filter is +# designed to reproduce the same power plants that are listed in eGRID. +filter_non_egrid_emission_on_NAICS: true + + +# TRANSMISSION AND ELECTRICITY TRADING +# These parameters are used to determine which method of electricity trading is +# used and what losses from electricity distribution are included in the +# results. + +# Used to compute the loss factor for transmission and distribution +# (i.e., 1/efficiency). +efficiency_of_distribution_grid: 0.95 + +# Electricity trading can be calculated using a method from EPA (with or +# without a surplus trading pool) that is based on eGRID regions or a method +# from NETL that uses an input-output approach using exchange data between +# balancing authorities from EIA. If false, net trading is used. +EPA_eGRID_trading: false + +# For net trading, select between two options for for consumption mix +# calculations: net trading (true) or gross trading (false) between NERC +# and eGRID regions. +net_trading: false + +# Trading year for EIA data used for net trading. +NETL_IO_trading_year: 2024 + + +# POST PROCESSING +# This triggers a cleaning of the JSON-LD (e.g., removing untracked flows +# zero value product flows, and renumbering the exchange table internal IDs). +# These end processes help remove common errors that crash openLCA v2. +# Product systems for the at-user consumption mixes are also generated. +run_post_processes: true + + +# OTHER PARAMETERS diff --git a/electricitylci/modelconfig/ELCI_2_config.yml b/electricitylci/modelconfig/ELCI_2_config.yml index f3704a90..0f4aba4e 100644 --- a/electricitylci/modelconfig/ELCI_2_config.yml +++ b/electricitylci/modelconfig/ELCI_2_config.yml @@ -31,6 +31,14 @@ replace_egrid: true # construction LCI. coal_model_year: 2020 +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2016 + # NETL developed profiles for renewable generation to capture construction # and O&M impacts (e.g., solar PV manufacturing and power plant operations). # There are two vintages for renewable inventories: 2016 and 2020. The 2016 diff --git a/electricitylci/modelconfig/ELCI_3_config.yml b/electricitylci/modelconfig/ELCI_3_config.yml index 572f2c61..d5e32c86 100644 --- a/electricitylci/modelconfig/ELCI_3_config.yml +++ b/electricitylci/modelconfig/ELCI_3_config.yml @@ -31,6 +31,14 @@ replace_egrid: false # construction LCI. coal_model_year: 2020 +# The NG baseline model year is used to determine which NG inventory to use. +# This impacts the upstream NG emissions for production, gathering and boosting, +# processing, transmission, storage, and distribution. +# Select between 2016 and 2020 model inventories: +# 2016 model: https://doi.org/10.2172/1529553 +# 2020 model: https://doi.org/10.18141/2568690 +ng_model_year: 2016 + # NETL developed profiles for renewable generation to capture construction # and O&M impacts (e.g., solar PV manufacturing and power plant operations). # There are two vintages for renewable inventories: 2016 and 2020. The 2016 diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py index d05303b9..1545fcdd 100644 --- a/electricitylci/natural_gas_upstream.py +++ b/electricitylci/natural_gas_upstream.py @@ -8,6 +8,7 @@ ############################################################################## import logging import os +import sys import pandas as pd @@ -16,6 +17,11 @@ import electricitylci.PhysicalQuantities as pq from electricitylci.generation import add_temporal_correlation_score from electricitylci.model_config import model_specs +from electricitylci.utils import download_edx +from electricitylci.globals import paths +from electricitylci.utils import check_output_dir + + ############################################################################## # MODULE DOCUMENTATION ############################################################################## @@ -23,19 +29,313 @@ component of natural gas power plant operation (extraction, processing, and transportation) for every plant in EIA-923. +TODO: +- Make eLCI.csv resource a public submission. + Created: 2019-02-18 Last updated: - 2024-01-10 + 2025-12-04 """ __all__ = [ "generate_upstream_ng", ] +############################################################################## +# GLOBALS +############################################################################## +region_sheets_dict = { + 'Pacific': 'FI - Pacific Delivery', + 'Rocky Mountain': 'FI - Rocky Mountain Delivery', + 'Southwest': 'FI - Southwest Delivery', + 'Midwest': 'FI - Midwest Delivery', + 'Southeast': 'FI - Southeast Delivery', + 'Northeast': 'FI - Northeast Delivery' + } +'''dict : Region names mapped to Excel workbook sheet names.''' + +r_ids_2020 = { + 'Appendix_F_2020_Full_Inventory_Results_Midwest_ProdThruTrans.xlsx':'5665de40-fc2b-4643-b647-ceec226af2bb', + 'Appendix_F_2020_Full_Inventory_Results_Northeast_ProdThruTrans.xlsx' :'b396eb50-72ac-45f0-8231-9b613457c6d8', + 'Appendix_F_2020_Full_Inventory_Results_Pacific_ProdThruTrans.xlsx' :'347a0cd8-5ff2-4cb3-be0a-f31a56bac9c6', + 'Appendix_F_2020_Full_Inventory_Results_Rocky_Mountain_ProdThruTrans.xlsx' :'d08f4da2-543a-40b2-9ffd-c7138ed4f8c6', + 'Appendix_F_2020_Full_Inventory_Results_Southeast_ProdThruTrans.xlsx' :'4590712b-db21-4428-b488-6ded3b65d18b', + 'Appendix_F_2020_Full_Inventory_Results_Southwest_ProdThruTrans.xlsx':'9dd7a6e5-df1a-461e-87e7-0b9d8d600f26' +} +'''dict : Excel workbook file names mapped to EDX resource IDs.''' + +region_state_mapping = { + 'WA':'Pacific','CA':'Pacific','OR':'Pacific','MT':'Rocky Mountain','ID':'Rocky Mountain','CO':'Rocky Mountain','NV':'Rocky Mountain','UT':'Rocky Mountain','WY':'Rocky Mountain', + 'AZ':'Southwest','NM':'Southwest','OK':'Southwest','TX':'Southwest','MN':'Midwest','ND':'Midwest','IA':'Midwest','KS':'Midwest', + 'MO':'Midwest','NE':'Midwest','SD':'Midwest','IL':'Midwest','IN':'Midwest','OH':'Midwest','WI':'Midwest','MI':'Midwest', + 'AR':'Southeast','LA':'Southeast','AL':'Southeast','FL':'Southeast','GA':'Southeast','MS':'Southeast','SC':'Southeast','KY':'Southeast', + 'NC':'Southeast','TN':'Southeast','VA':'Southeast','WV':'Southeast','DE':'Southeast','MD':'Southeast','CT':'Northeast','MA':'Northeast', + 'NH':'Northeast','RI':'Northeast','VT':'Northeast','NJ':'Northeast','NY':'Northeast','PA':'Northeast','ME':'Northeast', 'DC':'Northeast', +} +'''dict : U.S. state abbreviations mapped to region. Excludes AK and HI.''' + + ############################################################################## # FUNCTIONS ############################################################################## +def correct_netl_flow_names(df, flow_mapping_path, amount_col="FlowAmount"): + """A helper method that replaces NETL air, water, and ground emissions + with Federal Elementary Flow List equivalents based on a subset of + flows defined in USEPA's eLCI mapping using the Python package + `fedelemflowlist `_ + + Parameters + ---------- + df : pandas.DataFrame + A life cycle inventory data frame with columns, 'FlowName', + 'Compartment', 'Unit', and ``amount_col``. + amount_col : str, optional + The column title representing the flow amount, by default "FlowAmount" + + Returns + ------- + pandas.DataFrame + A new data frame with the same number of rows and columns as the + sent data frame. Flow names, compartments, units, and flow amounts + are updated based on emissions matches with the FEDEFL. All unmatched + flows are returned 'as is'. If FlowUUID was not in the column list, + it is created; otherwise, the matched UUIDs are updated. + """ + # This data frame has about 4k source flow names and contexts associated + # with NETL unit process models (e.g., petro, nuclear, coal). + flow_mapping = pd.read_csv(flow_mapping_path, encoding='ISO-8859-1') + + # Matching occurs on name, compartment and units; help this along by + # lowering the case (improves coal UP matches from 10% to 42%). + df["FlowName_orig"] = df["FlowName"] + df["Compartment_orig"] = df["Compartment"] + df["FlowName"] = df["FlowName"].str.lower().str.rstrip() + df["Compartment"] = df["Compartment"].str.lower().str.rstrip() + + # In the map, also lower-case names and compartments and remove trailing + # space; note this introduces duplicate entries in the map, so remove them. + # The duplicates are from later entries, so ignore mapper, verifier and + # last updated cols when searching for duplicates. [250917; TWD] + flow_mapping['SourceFlowName'] = flow_mapping[ + 'SourceFlowName'].str.lower().str.rstrip() + flow_mapping['SourceFlowContext'] = flow_mapping[ + "SourceFlowContext"].str.lower().str.rstrip() + ignore_cols = ['Mapper', 'Verifier', 'LastUpdated'] + flow_mapping = flow_mapping.drop_duplicates( + subset=[x for x in flow_mapping.columns if x not in ignore_cols] + ) + + # Some compartments in NETL UPs are complex (e.g., 'Emission to water/fresh + # water'), but are listed simply in the FEDEFL eLCI mapper (e.g., 'emission/ + # water'). Improves coal mining UP matches from 42% to 62%. + is_emission = df['input'] == False + is_water = df['Compartment'].str.contains('water') + is_air = df['Compartment'].str.contains('air') + is_ground = df['Compartment'].str.contains('ground') + + df.loc[is_emission * is_water, 'Compartment'] = 'emission/water' + df.loc[is_emission * is_air, 'Compartment'] = 'emission/air' + df.loc[is_emission * is_ground, 'Compartment'] = 'emission/ground' + + # HOTFIX: Map against source units [250205; TWD] + # For coal mining, reduces matches from >62% to <62% (about 2k less rows) + logging.info("Mapping emissions to FEDEFL") + mapped_df = pd.merge( + df, + flow_mapping, + left_on=["FlowName", "Compartment", "Unit"], + right_on=["SourceFlowName", "SourceFlowContext", "SourceUnit"], + how="left", + ) + + # If TargetFlowName is present, there was a match. + is_match = mapped_df["TargetFlowName"].notnull() + logging.info("Correcting %d NETL flows" % is_match.sum()) + + # Quality Check (coal_df) + # Check that target unit matches source unit. + # No! Hydrogen, Uranium, and Lead-210/kg have mis-matched units. + # Therefore, unit conversions are necessary. + + # Return flow names and compartments back to their original values. + df["FlowName"] = df["FlowName_orig"] + df["Compartment"] = df["Compartment_orig"] + del df['FlowName_orig'] # use this syntax since you're editing + del df['Compartment_orig'] # a reference object that isn't returned + mapped_df['FlowName'] = mapped_df['FlowName_orig'] + mapped_df["Compartment"] = mapped_df["Compartment_orig"] + mapped_df = mapped_df.drop(columns=['FlowName_orig', 'Compartment_orig']) + + # Replace FlowName, Unit, and Compartment with new names (where matched) + mapped_df.loc[is_match, "FlowName"] = mapped_df.loc[ + is_match, "TargetFlowName"] + mapped_df.loc[is_match, "Compartment"] = mapped_df.loc[ + is_match, "TargetFlowContext"] + mapped_df.loc[is_match, "Unit"] = mapped_df.loc[is_match, "TargetUnit"] + + # Correct values using the conversion factor + mapped_df.loc[is_match, amount_col] *= mapped_df.loc[ + is_match, 'ConversionFactor'] + + if 'FlowUUID' in mapped_df.columns: + # Update existing values with new UUIDs + mapped_df.loc[is_match, 'FlowUUID'] = mapped_df.loc[ + is_match, 'TargetFlowUUID'] + else: + # Set UUIDs to target values + mapped_df = mapped_df.rename(columns={"TargetFlowUUID": "FlowUUID"}) + + # Drop all unneeded cols + drop_cols = [x for x in flow_mapping.columns if x in mapped_df.columns] + mapped_df = mapped_df.drop(columns=drop_cols) + + return mapped_df + + +def generate_lci(excel_folder_path, + flow_mapping_path, + destination_path, + final_table_name): + """ + Read Excel file, extract data, and generate NG LCI in the correct format. + + Parameters + ---------- + excel_folder_path : str + The path to the folder containing the excel files (i.e., NG models and + inventories). + flow_mapping_path: str + The path to the flow mapping file. + destination_path : str, optional + The path to the destination folder. If not provided, the function + will save the file in the current working directory. + final_table_name : str, optional + The name of the final table to be saved. If not provided, the function + will save the file with the name 'final_table.xlsx'. + + Returns + ------- + pandas.DataFrame + A dataframe with the LCI for NG with the same format as the currently + used file. + + Notes + ----- + The function is sensitive to the naming convention of the regions in the + Excel file. + """ + final_table = pd.DataFrame() + + # 1. Read excel files in the folder path containing the model + for filename in os.listdir(excel_folder_path): + if filename.endswith('.xlsx'): + file_path = os.path.join(excel_folder_path, filename) + logging.info(f"Reading file: {file_path}") + input_data = pd.ExcelFile(file_path) + sheet_names = input_data.sheet_names + sheet_name = [ + name for name in sheet_names if name in region_sheets_dict.values() + ][0] + + # Extract air, water, and ground emissions data for the selected sheet + # (i.e., technobasin). + air_emissions_data, water_emissions_data, ground_emissions_data = read_region_data(file_path, sheet_name) + + # Air emissions + # - Get the correct flow names, compartment, and uuid for each flow + full_air_emissions_data = correct_netl_flow_names( + air_emissions_data, + flow_mapping_path + ) + # Drop rows with FlowUUID NaN. + full_air_emissions_data = full_air_emissions_data[ + full_air_emissions_data['FlowUUID'].notna() + ] + + # Water emissions + # - get the correct flow names, compartment, and uuid for each flow. + full_water_emissions_data = correct_netl_flow_names( + water_emissions_data, + flow_mapping_path + ) + # Drop rows with FlowUUID NaN. + full_water_emissions_data = full_water_emissions_data[ + full_water_emissions_data['FlowUUID'].notna() + ] + + # Ground emissions + # - get the correct flow names, compartment, and uuid for each flow + full_ground_emissions_data = correct_netl_flow_names( + ground_emissions_data, + flow_mapping_path + ) + full_ground_emissions_data = full_ground_emissions_data[ + full_ground_emissions_data['FlowUUID'].notna() + ] + + # Combine dataframes. + df1 = pd.concat([ + full_air_emissions_data, + full_water_emissions_data, + full_ground_emissions_data + ]) + df1 = df1.sort_values(by='FlowUUID') + region = [ + key for key, v in region_sheets_dict.items() if v == sheet_name + ][0] + df1['FlowAmount'] = df1['FlowAmount'].astype(float) + df1['FlowAmount'] = df1['FlowAmount'].fillna(0) + + # Create final_table structure in 1st iteration. + if final_table.empty: + final_table = df1[[ + 'FlowName', 'Compartment', 'Unit', 'input', 'FlowUUID' + ]] + final_table = final_table.sort_values(by='FlowUUID') + final_table ['flow_type'] = 'ELEMENTARY_FLOW' + # Reorder and rename columns. + final_table = final_table[[ + 'Compartment', + 'FlowName', + 'FlowUUID', + 'Unit', + 'flow_type', + 'input' + ]] + final_table.columns = [ + 'compartment', + 'flow_name', + 'uuid', + 'unit', + 'flow_type', + 'is_input' + ] + # Add a column for each basin + region_columns = list(region_sheets_dict.keys()) + for r in region_columns: + final_table[r] = 0 + + # Add region emissions to final table + try: + logging.info(f"Adding emissions for {region}") + logging.info(f"df1: {df1['FlowAmount'].head(5)}") + final_table[region] = df1['FlowAmount'] + except Exception as e: + sys.exit( + "Error reading sheet. " + "Make sure your excel file follows the correct naming " + "convention. For reference, refer to the source code, " + f"lines 70-78. Error: {e}" + ) + + # 2. Save final table to excel + save_ng_lci(final_table, final_table_name ,destination_path) + print(f"Final table saved to {destination_path}/{final_table_name}.xlsx") + + return final_table + + def generate_upstream_ng(year): """ Generate the annual gas extraction, processing and transportation @@ -43,6 +343,8 @@ def generate_upstream_ng(year): Notes ----- + This is the main method called outside this module. + Depends on the data file, gas_supply_basin_mapping.csv, which includes the identification information for every natural gas plant in the U.S. Once imported, this data frame is simplified to contain just the plant @@ -64,12 +366,255 @@ def generate_upstream_ng(year): """ logging.info("Generating natural gas inventory") + # Get plant data and map each plant to its ng source: basin or region. + # The 2016 ng emissions inventory is only available by basin. + # As such, plants can only be connected to upstream emissions via basin + # assignment newer data (2020) is available by region plants are connected + # to upstream ng emissions via region assignment + + # 'year' refers to eia_gen_year + if model_specs.ng_model_year == 2016: + ng_generation_data_mapped = map_ng_by_basin(year) + else: + ng_generation_data_mapped = map_ng_by_region(year) + + # Read the NG LCI file + # If year = 2016 + # - this step will directly ready NG_LCI.csv from the data_dir + # - returns lci (by basin) + # If year = 2020 + # - this step will require edx api, download ng model and mapping + # - returns lci (by region) + # Document from edx, and generate lci + ng_lci = get_ng_lci(model_specs.ng_model_year) + + # merge ng lci and plants based on the common parameter: region or basin + if model_specs.ng_model_year == 2016: + ng_lci_mapped = map_ng_lci_to_plants_by_basin( + ng_lci, ng_generation_data_mapped + ) + else: + ng_lci_mapped = map_ng_lci_to_plants_by_region( + ng_lci, ng_generation_data_mapped + ) + + # Multiplying with the EIA 923 fuel consumption; conversion factor is + # for MMBtu to MJ + btu_to_MJ = pq.convert(10**6,'Btu','MJ') + ng_lci_mapped["FlowAmount"]=( + ng_lci_mapped["FlowAmount"] + * ng_lci_mapped['Total Fuel Consumption MMBtu'] + * btu_to_MJ + ) + + ng_lci_mapped = ng_lci_mapped.rename( + columns={'Total Fuel Consumption MMBtu':'quantity'}) + ng_lci_mapped["quantity"] = ng_lci_mapped["quantity"]*btu_to_MJ + + # Output is kg emission for the specified year by facility Id, + # not normalized to electricity output + + ng_lci_mapped['FuelCategory'] = 'GAS' + ng_lci_mapped.rename( + columns={ + 'Plant Id':'plant_id', + 'NG_LCI_Region': 'stage_code', + 'NG_LCI_Name':'stage_code', + 'Stage':'stage'}, + inplace=True + ) + ng_lci_mapped["Year"] = year + ng_lci_mapped["Source"] = "netlgaseiafuel" + ng_lci_mapped["ElementaryFlowPrimeContext"] = "emission" + ng_lci_mapped.loc[ + ng_lci_mapped["Compartment"].str.contains("resource/"), + "ElementaryFlowPrimeContext"] = "resource" + ng_lci_mapped.loc[ + ng_lci_mapped["Compartment"].str.contains("Technosphere/"), + "ElementaryFlowPrimeContext"] = "technosphere" + # Issue #296 - adding DQI information for upstream processes + ng_lci_mapped["Year"] = model_specs.ng_model_year + ng_lci_mapped["DataReliability"] = 3 + ng_lci_mapped["TemporalCorrelation"] = add_temporal_correlation_score( + ng_lci_mapped["Year"], model_specs.electricity_lci_target_year + ) + ng_lci_mapped["GeographicalCorrelation"] = 1 + ng_lci_mapped["TechnologicalCorrelation"] = 1 + ng_lci_mapped["DataCollection"] = 1 + + # 3/20/2025 MBJ - replacing renewable vintage here so that temporal + # correlation is based on the year the inventory is based on, but when + # electricity generation is combined, it needs to be based on the target + # year for the inventory. + ng_lci_mapped["Year"] = year + + return ng_lci_mapped + + +def get_ng_lci(year): + """ + Get the natural gas life cycle inventory for a given year. + Depending on the year, the natural gas life cycle inventory is either: + + - retrieved from existing data + - calculated using the natural gas life cycle inventory model + + Parameters + ---------- + year : str, int + The year for which to get the natural gas life cycle inventory. + This should reflect the model configuration, ``ng_model_year``. + + Returns + ------- + pandas.DataFrame + A dataframe containing the emissions associated with the natural gas + production through transportation for each basin during the given year. + + Notes + ----- + This method depends on: + + - the NG_LCI CSV file (if the old model is selected in the configuration) + - the EDX API (if the new model is selected in the configuration) + - the elci flow mapping CSV file (if the new model is selected in the + configuration) + """ + if isinstance(year, int): + year = str(year) + if year == "2016": + logging.info( + f"Retrieving the 2016 natural gas life cycle inventory by basin." + ) + ng_lci = pd.read_csv( + os.path.join(data_dir, "NG_LCI.csv"), + index_col=[0,1,2,3,4,5] + ) + else: + data_folder = os.path.join(paths.local_path, 'netl') + # Create new directory for ng if non existing. + check_output_dir(os.path.join(data_folder,"2020_ng")) + data_folder = os.path.join(data_folder,"2020_ng") + # Check if the ng_lci_2020rev1.csv already exists + # - if it does then we can skip all the below + if os.path.exists(os.path.join(data_folder, "ng_lci_2020rev1.csv")): + logging.info(f"NG LCI already exists in your data directory.") + ng_lci = pd.read_csv( + os.path.join(data_folder, "ng_lci_2020rev1.csv"), + index_col=[0,1,2,3,4,5] + ) + else: + # If it does not exist, then generate it. + logging.info( + f"Retrieving the {year} natural gas life cycle inventory " + "by region." + ) + # This step will require downloading files from EDX. + # - retrieve ng model + # - check if model is data_dir + check_output_dir(os.path.join(data_folder, "2020_ng_model")) + model_folder = os.path.join(data_folder, "2020_ng_model") + for ngmodel in r_ids_2020.keys(): + if os.path.exists(os.path.join(model_folder, ngmodel)): + logging.info( + f"{ngmodel} already exists in your data directory." + ) + else: + logging.info(f"Downloading {ngmodel} from EDX.") + try: + download_edx( + resource_id=r_ids_2020[ngmodel], + api_key=model_specs.edx_api_key, + output_dir=model_folder + ) + except Exception as e: + logging.error( + f"Error downloading {ngmodel} from EDX. Error: {e}" + ) + sys.exit(1) + # Retrieve flow mapping document from EDX, eLCI.csv, and check if + # flow mapping CSV exists in data_dir. + if os.path.exists(os.path.join(data_folder, "eLCI.csv")): + logging.info( + "ELCI flow mapping document already exists in your " + "data directory." + ) + flow_mapping_path = os.path.join(data_folder, "eLCI.csv") + else: + # Download flow mapping document from EDX. + logging.info( + "Downloading ELCI flow mapping document from EDX." + ) + # Resource id of eLCI flow mapping document on EDX + # NOTE: Currently in Life Cycle Collaborations Workspace + # ---not public!!! + r_id_elci = 'e2c8f934-e95e-470a-879b-17ebe4afd39e' + try: + download_edx( + resource_id=r_id_elci, + api_key=model_specs.edx_api_key, + output_dir=data_folder + ) + flow_mapping_path = os.path.join(data_folder, "eLCI.csv") + except Exception as e: + logging.error( + "Error downloading ELCI flow mapping document from " + f"EDX. Error: {e}" + ) + sys.exit(1) + + # Run the generate_ng_lci function and save it in data_dir. + try: + generate_lci( + excel_folder_path=model_folder, + flow_mapping_path=flow_mapping_path, + destination_path=data_folder, + final_table_name="ng_lci_2020rev1" + ) + ng_lci = pd.read_csv( + os.path.join(data_folder, "ng_lci_2020rev1.csv"), + index_col=[0,1,2,3,4,5] + ) + except Exception as e: + logging.error( + "Error generating natural gas life cycle inventory. " + f"Error: {e}" + ) + sys.exit(1) + return ng_lci + + +def map_ng_by_basin(year): + """ + Map the natural gas generation data by basin. + + Notes + ----- + - Downloads EIA plant data for the specified year. + - Filters the data to only include NG facilities and on positive fuel + consumption. + - Maps each plant to a basin using the gas_supply_basin_mapping.csv file. + + Parameters + ---------- + year: int, str + The year of the eia923 plant data to use. + + Returns + ---------- + pandas.DataFrame + A dataframe with the natural gas generation data by region. + """ + if isinstance(year, str): + year = int(year) + # Get the EIA generation data for the specified year, this dataset includes # the fuel consumption for generating electricity for each facility # and fuel type. Filter the data to only include NG facilities and on # positive fuel consumption. Group that data by Plant Id as it is possible # to have multiple rows for the same facility and fuel based on different # prime movers (e.g., gas turbine and combined cycle). + eia_generation_data = eia923_download_extract(year) column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') & @@ -81,8 +626,8 @@ def generate_upstream_ng(year): ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int) # Import the mapping file which has the source gas basin for each Plant Id. - # NOTE: - # This is a 2 MB file that provides about 100 kB of info! + # NOTE: This is a 2 MB file that provides about 100 kB of info! + # TODO: Move this resource to EDX. ng_basin_mapping = pd.read_csv( os.path.join(data_dir, 'gas_supply_basin_mapping.csv') ) @@ -99,12 +644,59 @@ def generate_upstream_ng(year): ng_generation_data_basin = ng_generation_data_basin.drop( columns=['Plant Code'] ) + return ng_generation_data_basin - # Read the NG LCI excel file - ng_lci = pd.read_csv( - os.path.join(data_dir, "NG_LCI.csv"), - index_col=[0,1,2,3,4,5] - ) + +def map_ng_by_region(year): + """ + Map the natural gas generation data by region. + This includes 6 regions: Pacific, Rocky Mountain, Southwest, Midwest, + Southeast, and Northeast. + + Notes + ----- + - Downloads EIA plant data for the specified year. + - Filters the data to only include NG facilities and on positive fuel + consumption. + - Groups the data by Plant Id and aggregates the fuel consumption by + summing the total fuel consumption. + - Maps each plant to a region using the region_state_mapping dictionary. + + Parameters + ---------- + year: int, str + The year of the eia923 plant data to use. + + Returns + ---------- + pandas.DataFrame + A dataframe with the natural gas generation data by region. + """ + if isinstance(year, str): + year = int(year) + + eia_generation_data = eia923_download_extract(year) + + column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') & + (eia_generation_data['Total Fuel Consumption MMBtu'] > 0)) + + ng_generation_data = eia_generation_data[column_filt] + + ng_generation_data = ng_generation_data.groupby('Plant Id').agg( + {'Total Fuel Consumption MMBtu':'sum','State':'first'}).reset_index() + ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int) + + ng_generation_data_region = ng_generation_data.copy() + + ng_generation_data_region['NG_LCI_Region'] = ng_generation_data['State'].map(region_state_mapping) + + return ng_generation_data_region + + +def map_ng_lci_to_plants_by_basin(ng_lci, ng_generation_data_mapped): + """ + Map the natural gas generation data by basin. + """ ng_lci_columns=[ "Compartment", "FlowName", @@ -119,62 +711,138 @@ def generate_upstream_ng(year): ng_lci_stack.columns=ng_lci_columns # Merge basin data with LCI dataset - ng_lci_basin = pd.merge( + ng_lci_mapped = pd.merge( ng_lci_stack, - ng_generation_data_basin, + ng_generation_data_mapped, left_on = 'Basin', right_on = 'NG_LCI_Name', how='left' ) + return ng_lci_mapped - # Multiplying with the EIA 923 fuel consumption; conversion factor is - # for MMBtu to MJ - btu_to_MJ = pq.convert(10**6,'Btu','MJ') - ng_lci_basin["FlowAmount"]=( - ng_lci_basin["FlowAmount"] - * ng_lci_basin['Total Fuel Consumption MMBtu'] - * btu_to_MJ + +def map_ng_lci_to_plants_by_region(ng_lci, ng_generation_data_mapped): + """ + Map the natural gas generation data by basin. + """ + ng_lci_columns=[ + "Compartment", + "FlowName", + "FlowUUID", + "Unit", + "FlowType", + "input", + "Region", + "FlowAmount" + ] + ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index() + ng_lci_stack.columns=ng_lci_columns + + # Merge basin data with LCI dataset + ng_lci_mapped = pd.merge( + ng_lci_stack, + ng_generation_data_mapped, + left_on = 'Region', + right_on = 'NG_LCI_Region', + how='left' ) + return ng_lci_mapped - ng_lci_basin = ng_lci_basin.rename( - columns={'Total Fuel Consumption MMBtu':'quantity'}) - ng_lci_basin["quantity"]=ng_lci_basin["quantity"]*btu_to_MJ - # Output is kg emission for the specified year by facility Id, - # not normalized to electricity output +def read_region_data(excel_file_path, sheet_name): + """ + Read Excel file, extract data, and generate a data frame for NG emissions + for air, water, and ground. The data frame includes the flow name and flow + amount (P2.5 and P97.5 values are dropped). - ng_lci_basin['FuelCategory'] = 'GAS' - ng_lci_basin.rename( - columns={ - 'Plant Id':'plant_id', - 'NG_LCI_Name':'stage_code', - 'Stage':'stage'}, - inplace=True + Parameters + ---------- + excel_file_path : str + Path to the Excel file. + sheet_name : str + Name of the sheet to extract the data from. + + Returns + ------- + tuple + A tuple of length three: + + - pandas.DataFrame, the air emissions data + - pandas.DataFrame, the water emissions data + - pandas.DataFrame, the ground emissions data + """ + print(f"Processing sheet: {sheet_name}") + # create empty database + df = pd.DataFrame() + # Extract all the data from the sheet + df = pd.read_excel( + excel_file_path, + sheet_name=sheet_name, + skiprows=0, + header=None ) - ng_lci_basin["Year"] = year - ng_lci_basin["Source"] = "netlgaseiafuel" - ng_lci_basin["ElementaryFlowPrimeContext"] = "emission" - ng_lci_basin.loc[ - ng_lci_basin["Compartment"].str.contains("resource/"), - "ElementaryFlowPrimeContext"] = "resource" - ng_lci_basin.loc[ - ng_lci_basin["Compartment"].str.contains("Technosphere/"), - "ElementaryFlowPrimeContext"] = "technosphere" - # Issue #296 - adding DQI information for upstream processes - ng_lci_basin["Year"] = 2016 - ng_lci_basin["DataReliability"] = 3 - ng_lci_basin["TemporalCorrelation"] = add_temporal_correlation_score( - ng_lci_basin["Year"], model_specs.electricity_lci_target_year + + # Adjustments: 1) changing header, 2) dropping P2.5 and P97.5 columns + df.iloc[0] = df.iloc[0].ffill() + df.iloc[1] = df.iloc[1].ffill() + df.columns = df.iloc[2] + df = df.drop(columns=["P2.5", "P97.5"]) + df.columns = df.iloc[0] + df = df.drop(df.index[0]) + + # Separate water, soil, ground, and air emissions - and map them to + # FEDEFL elementary flows + + # Air emissions + air_emissions_data = df.drop( + columns=[col for col in df.columns if col != df.columns[1]] + ) + # Drop the last two columns (empty columns from excel) + air_emissions_data = air_emissions_data.iloc[:, :-2] + # Sum columns 2:11 for each row + air_emissions_data[f'FlowAmount'] = air_emissions_data.iloc[:, 1:11].sum( + axis=1 ) - ng_lci_basin["GeographicalCorrelation"] = 1 - ng_lci_basin["TechnologicalCorrelation"] = 1 - ng_lci_basin["DataCollection"] = 1 - #3/20/2025 MBJ - replacing renewable vintage here so that temporal correlation - #is based on the year the inventory is based on, but when electricity - #generation is combined, it needs to be based on the target year for the - #inventory. - ng_lci_basin["Year"]=year - return ng_lci_basin + air_emissions_data = air_emissions_data.iloc[2:] + air_emissions_data = air_emissions_data.iloc[:, [0,-1]] + air_emissions_data['Compartment'] = 'Air' + air_emissions_data.columns.values[0] = 'FlowName' # change header + air_emissions_data['Unit'] = 'kg' + air_emissions_data ['input'] = False # not an input + + # Water emissions + water_emissions_data = df.iloc[:, [df.shape[1]-3, df.shape[1]-1]] + water_emissions_data.columns.values[0] = "FlowName" + water_emissions_data.columns.values[1] = "FlowAmount" + water_emissions_data = water_emissions_data.iloc[2:] + water_emissions_data = water_emissions_data.dropna() + water_emissions_data['Compartment'] = 'Water' + water_emissions_data['Unit'] = 'kg' + water_emissions_data ['input'] = False + + # Ground emissions + ground_emissions_data = df.iloc[:, [df.shape[1]-3, df.shape[1]-2]] + ground_emissions_data.columns.values[0] = "FlowName" + ground_emissions_data.columns.values[1] = "FlowAmount" + ground_emissions_data = ground_emissions_data.dropna() + ground_emissions_data = ground_emissions_data.iloc[1:] + ground_emissions_data['Compartment'] = 'Ground' + ground_emissions_data['Unit'] = 'kg' + ground_emissions_data ['input'] = False + + return air_emissions_data, water_emissions_data, ground_emissions_data + + +def save_ng_lci(df, filename, destination_path): + """ + Save the final table to CSV file. + """ + if destination_path is None: + destination_path = f"{os.getcwd()}/" + if filename is None: + filename = 'final_table' + full_path = os.path.join(destination_path, f"{filename}.csv") + df.to_csv(full_path, index=False) ############################################################################## @@ -182,6 +850,6 @@ def generate_upstream_ng(year): ############################################################################## if __name__=='__main__': from electricitylci.globals import output_dir - year=2016 + year = 2016 df = generate_upstream_ng(year) df.to_csv(output_dir+'/ng_emissions_{}.csv'.format(year)) diff --git a/setup.py b/setup.py index 87642c23..36103c09 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='electricitylci', - version='2.0.0', + version='2.1.0', packages=['electricitylci'], package_data={ 'electricitylci': ["data/*.*", @@ -19,10 +19,10 @@ url='https://github.com/NETL-RIC/ElectricityLCI', license='CC0', author='Tyler W. Davis, Matthew Jamieson, Wesley W. Ingwersen, Greg Schivley, Ben Young, Tapajyoti Ghosh, Jing Li, Shirley Sam, Daniel Lee Young, Michael Srocka, and Troy A. Hottle', - author_email='matthew.jamieson@netl.doe.gov', - description='Create life cycle inventory data for regionalized electricity generation, mix of generation, mix of consumption, and distribution to the end-user in the United States.', + author_email='Mathew.Jamieson@netl.doe.gov', + description='A Python package to create regionalized life cycle inventory models of U.S. electricity generation, consumption, and distribution using standardized facility and generation data for use with open-source LCA software.', install_requires=[ - 'fedelemflowlist @ git+https://github.com/FLCAC-admin/Federal-LCA-Commons-Elementary-Flow-List#egg=fedelemflowlist', + 'fedelemflowlist @ git+https://github.com/FLCAC-Admin/fedelemflowlist', 'StEWI @ git+https://github.com/USEPA/standardizedinventories#egg=StEWI', 'scipy>=1.10', ],