From a165066cdd3cd99d9b837e5c6edf0a5bb5b7aff6 Mon Sep 17 00:00:00 2001
From: ssamkl <shirley.sam@keylogic.com>
Date: Wed, 6 Aug 2025 15:30:46 -0400
Subject: [PATCH 01/24] adding two new config files to update data. but still
 need stewicombo_file for each. #275

---
 .../modelconfig/ELCI_2023_config.yml          | 151 ++++++++++++++++++
 .../modelconfig/ELCI_2024_config.yml          | 151 ++++++++++++++++++
 2 files changed, 302 insertions(+)
 create mode 100644 electricitylci/modelconfig/ELCI_2023_config.yml
 create mode 100644 electricitylci/modelconfig/ELCI_2024_config.yml

diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml
new file mode 100644
index 00000000..a25cd724
--- /dev/null
+++ b/electricitylci/modelconfig/ELCI_2023_config.yml
@@ -0,0 +1,151 @@
+# MODEL CONFIG FILE
+# This file contains parameters that can be changed by a user.
+
+# MODEL AND DATA YEARS
+# These parameters determine what year the outputs represent and which years
+# of data are used from each source.
+
+# The target year is used to determine the temporal correlation of data with
+# the electricity generation processes, which can be used in uncertainty
+# calculations.
+electricity_lci_target_year: 2025
+
+# Select a regional aggregation from "eGRID", "NERC", "BA", "US", "FERC",
+# and "EIA". The EPA_eGRID trading method can only be used with "eGRID".
+# The NETL_BA_trading method can only be used with "BA" and "FERC".
+regional_aggregation: 'BA'
+
+# If egrid is not used as a primary data source then set "replace_egrid"
+# to True. This will trigger the use of EPA CEMS and EIA data as substitutes
+# for eGRID. Replacing eGRID will also use EIA generation data to determine
+# the generation mix.
+egrid_year: 2023
+eia_gen_year: 2023
+replace_egrid: true
+
+# In 2023, NETL published the Cradle-to-Gate Life Cycle Analysis Baseline
+# for U.S. Coal Mining and Delivery (https://www.osti.gov/biblio/2370100),
+# which supersedes the 2020 model inventory used in the original baseline.
+# Select between the 2020 and 2023 model inventories. This impacts both
+# the mining and transportation LCIs, but does not impact the coal power plant
+# construction LCI.
+coal_model_year: 2023
+
+# NETL developed profiles for renewable generation to capture construction
+# and O&M impacts (e.g., solar PV manufacturing and power plant operations).
+# There are two vintages for renewable inventories: 2016 and 2020. The 2016
+# inventories combine construction and operations (recommended for use when
+# re-creating the 2016 baseline). In the 2020 inventory, these were separated
+# into individual inventories (recommended for future baselines).
+include_renewable_generation: true
+renewable_vintage: 2020
+
+# NETL has developed profiles for power plant water use.
+# Set to true to include these impacts.
+include_netl_water: true
+
+# Upstream data from NETL can be included in the outputs.
+include_upstream_processes: true
+
+# For the inventories of interest, see vintages available from StEWI
+# (https://github.com/USEPA/standardizedinventories).
+# If you plan to run a baseline, use the prescribed stewicombo file for
+# consistent results. Stewicombo files are listed in their corresponding folder
+# on the AWS site (https://dmap-data-commons-ord.s3.amazonaws.com/index.html).
+inventories_of_interest:
+  eGRID: 2023
+  TRI: 2023
+  NEI: 2020
+  RCRAInfo: 2023
+stewicombo_file: 'ELCI_2020_v1.1.4'
+
+# Provide uncertainty estimates for emissions.
+calculate_uncertainty: true
+
+
+# API DATA SOURCES
+# The API to access NETL EDX data resources (https://edx.netl.doe.gov).
+edx_api: ""
+
+# If replacing eGRID, then EPA CEMS from CAMPD is used. This data is available
+# from EPA's API (https://www.epa.gov/power-sector/cam-api-portal). Request
+# for a free API key and copy-and-paste it here:
+epa_cam_api: ""
+
+# If using NETL's trading method, then EIA bulk U.S. Electric System
+# Operating Data are used. The data are available in two formats:
+# bulk zip (EBA.zip) and Opendata API. Choose the path (note that since
+# March 2024, both routes include data from 2019). To get 2016-2018 data,
+# use the archived bulk zip file (`use_eia_bulk_zip` = true) and bypass the
+# vintage check (`bypass_vintage_check` = true). The vintage check triggers
+# a new download of EBA.zip every 30 days, as defined by VINTAGE_THRESH
+# global parameter in `bulk_eia_data` and the EBA manifest text file.
+# To register for a free API key, go to: https://www.eia.gov/opendata/, and
+# copy-and-paste it for `eia_api` below.
+use_eia_bulk_zip: false
+bypass_bulk_vintage: true
+eia_api: ""
+
+
+# GENERATOR FILTERS
+# These parameters determine if any power plants are filtered out
+include_only_egrid_facilities_with_positive_generation: true
+filter_on_efficiency: true
+egrid_facility_efficiency_filters:
+  lower_efficiency: 10
+  upper_efficiency: 100
+
+# ELCI creates life cycle processes for each fuel type. If you only want to
+# include power plants with a minimum amount of generation from a single fuel,
+# make sure this parameter is set to true.
+filter_on_min_plant_percent_generation_from_primary_fuel: true
+min_plant_percent_generation_from_primary_fuel_category: 90
+
+# If the parameter above is set to true, one may still want to keep the plants
+# that do not meet the threshold. This setting will mark those power plants as
+# MIXED and generate life cycle processes for them. Note that the definition
+# of mixed is purposefully ambiguous and will mean different things depending
+# on the region of aggregation.
+keep_mixed_plant_category: true
+
+# To ensure consistency with eGRID, the model applies a filter to remove
+# industrial cogeneration facilities from the EIA Form 923 data based on
+# their NAICS code (i.e., code 22: Electricity Utilities and Independent
+# Power Producers) and by EIA sector numbers 1 and 2. This filter is
+# designed to reproduce the same power plants that are listed in eGRID.
+filter_non_egrid_emission_on_NAICS: true
+
+
+# TRANSMISSION AND ELECTRICITY TRADING
+# These parameters are used to determine which method of electricity trading is
+# used and what losses from electricity distribution are included in the
+# results.
+
+# Used to compute the loss factor for transmission and distribution
+# (i.e., 1/efficiency).
+efficiency_of_distribution_grid: 0.95
+
+# Electricity trading can be calculated using a method from EPA (with or
+# without a surplus trading pool) that is based on eGRID regions or a method
+# from NETL that uses an input-output approach using exchange data between
+# balancing authorities from EIA. If false, net trading is used.
+EPA_eGRID_trading: false
+
+# For net trading, select between two options for for consumption mix
+# calculations: net trading (true) or gross trading (false) between NERC
+# and eGRID regions.
+net_trading: false
+
+# Trading year for EIA data used for net trading.
+NETL_IO_trading_year: 2023
+
+
+# POST PROCESSING
+# This triggers a cleaning of the JSON-LD (e.g., removing untracked flows
+# zero value product flows, and renumbering the exchange table internal IDs).
+# These end processes help remove common errors that crash openLCA v2.
+# Product systems for the at-user consumption mixes are also generated.
+run_post_processes: true
+
+
+# OTHER PARAMETERS
diff --git a/electricitylci/modelconfig/ELCI_2024_config.yml b/electricitylci/modelconfig/ELCI_2024_config.yml
new file mode 100644
index 00000000..40e72044
--- /dev/null
+++ b/electricitylci/modelconfig/ELCI_2024_config.yml
@@ -0,0 +1,151 @@
+# MODEL CONFIG FILE
+# This file contains parameters that can be changed by a user.
+
+# MODEL AND DATA YEARS
+# These parameters determine what year the outputs represent and which years
+# of data are used from each source.
+
+# The target year is used to determine the temporal correlation of data with
+# the electricity generation processes, which can be used in uncertainty
+# calculations.
+electricity_lci_target_year: 2025
+
+# Select a regional aggregation from "eGRID", "NERC", "BA", "US", "FERC",
+# and "EIA". The EPA_eGRID trading method can only be used with "eGRID".
+# The NETL_BA_trading method can only be used with "BA" and "FERC".
+regional_aggregation: 'BA'
+
+# If egrid is not used as a primary data source then set "replace_egrid"
+# to True. This will trigger the use of EPA CEMS and EIA data as substitutes
+# for eGRID. Replacing eGRID will also use EIA generation data to determine
+# the generation mix.
+egrid_year: 2023
+eia_gen_year: 2024
+replace_egrid: true
+
+# In 2023, NETL published the Cradle-to-Gate Life Cycle Analysis Baseline
+# for U.S. Coal Mining and Delivery (https://www.osti.gov/biblio/2370100),
+# which supersedes the 2020 model inventory used in the original baseline.
+# Select between the 2020 and 2023 model inventories. This impacts both
+# the mining and transportation LCIs, but does not impact the coal power plant
+# construction LCI.
+coal_model_year: 2023
+
+# NETL developed profiles for renewable generation to capture construction
+# and O&M impacts (e.g., solar PV manufacturing and power plant operations).
+# There are two vintages for renewable inventories: 2016 and 2020. The 2016
+# inventories combine construction and operations (recommended for use when
+# re-creating the 2016 baseline). In the 2020 inventory, these were separated
+# into individual inventories (recommended for future baselines).
+include_renewable_generation: true
+renewable_vintage: 2020
+
+# NETL has developed profiles for power plant water use.
+# Set to true to include these impacts.
+include_netl_water: true
+
+# Upstream data from NETL can be included in the outputs.
+include_upstream_processes: true
+
+# For the inventories of interest, see vintages available from StEWI
+# (https://github.com/USEPA/standardizedinventories).
+# If you plan to run a baseline, use the prescribed stewicombo file for
+# consistent results. Stewicombo files are listed in their corresponding folder
+# on the AWS site (https://dmap-data-commons-ord.s3.amazonaws.com/index.html).
+inventories_of_interest:
+  eGRID: 2023
+  TRI: 2023
+  NEI: 2020
+  RCRAInfo: 2023
+stewicombo_file: 'ELCI_2020_v1.1.4'
+
+# Provide uncertainty estimates for emissions.
+calculate_uncertainty: true
+
+
+# API DATA SOURCES
+# The API to access NETL EDX data resources (https://edx.netl.doe.gov).
+edx_api: ""
+
+# If replacing eGRID, then EPA CEMS from CAMPD is used. This data is available
+# from EPA's API (https://www.epa.gov/power-sector/cam-api-portal). Request
+# for a free API key and copy-and-paste it here:
+epa_cam_api: ""
+
+# If using NETL's trading method, then EIA bulk U.S. Electric System
+# Operating Data are used. The data are available in two formats:
+# bulk zip (EBA.zip) and Opendata API. Choose the path (note that since
+# March 2024, both routes include data from 2019). To get 2016-2018 data,
+# use the archived bulk zip file (`use_eia_bulk_zip` = true) and bypass the
+# vintage check (`bypass_vintage_check` = true). The vintage check triggers
+# a new download of EBA.zip every 30 days, as defined by VINTAGE_THRESH
+# global parameter in `bulk_eia_data` and the EBA manifest text file.
+# To register for a free API key, go to: https://www.eia.gov/opendata/, and
+# copy-and-paste it for `eia_api` below.
+use_eia_bulk_zip: false
+bypass_bulk_vintage: true
+eia_api: ""
+
+
+# GENERATOR FILTERS
+# These parameters determine if any power plants are filtered out
+include_only_egrid_facilities_with_positive_generation: true
+filter_on_efficiency: true
+egrid_facility_efficiency_filters:
+  lower_efficiency: 10
+  upper_efficiency: 100
+
+# ELCI creates life cycle processes for each fuel type. If you only want to
+# include power plants with a minimum amount of generation from a single fuel,
+# make sure this parameter is set to true.
+filter_on_min_plant_percent_generation_from_primary_fuel: true
+min_plant_percent_generation_from_primary_fuel_category: 90
+
+# If the parameter above is set to true, one may still want to keep the plants
+# that do not meet the threshold. This setting will mark those power plants as
+# MIXED and generate life cycle processes for them. Note that the definition
+# of mixed is purposefully ambiguous and will mean different things depending
+# on the region of aggregation.
+keep_mixed_plant_category: true
+
+# To ensure consistency with eGRID, the model applies a filter to remove
+# industrial cogeneration facilities from the EIA Form 923 data based on
+# their NAICS code (i.e., code 22: Electricity Utilities and Independent
+# Power Producers) and by EIA sector numbers 1 and 2. This filter is
+# designed to reproduce the same power plants that are listed in eGRID.
+filter_non_egrid_emission_on_NAICS: true
+
+
+# TRANSMISSION AND ELECTRICITY TRADING
+# These parameters are used to determine which method of electricity trading is
+# used and what losses from electricity distribution are included in the
+# results.
+
+# Used to compute the loss factor for transmission and distribution
+# (i.e., 1/efficiency).
+efficiency_of_distribution_grid: 0.95
+
+# Electricity trading can be calculated using a method from EPA (with or
+# without a surplus trading pool) that is based on eGRID regions or a method
+# from NETL that uses an input-output approach using exchange data between
+# balancing authorities from EIA. If false, net trading is used.
+EPA_eGRID_trading: false
+
+# For net trading, select between two options for for consumption mix
+# calculations: net trading (true) or gross trading (false) between NERC
+# and eGRID regions.
+net_trading: false
+
+# Trading year for EIA data used for net trading.
+NETL_IO_trading_year: 2024
+
+
+# POST PROCESSING
+# This triggers a cleaning of the JSON-LD (e.g., removing untracked flows
+# zero value product flows, and renumbering the exchange table internal IDs).
+# These end processes help remove common errors that crash openLCA v2.
+# Product systems for the at-user consumption mixes are also generated.
+run_post_processes: true
+
+
+# OTHER PARAMETERS

From 506f770009f4a1f1b54f47779552516f8e127205 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Fri, 5 Sep 2025 16:25:36 -0400
Subject: [PATCH 02/24] add the stewicomboo testing file to 2023 config

---
 electricitylci/modelconfig/ELCI_2023_config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml
index a25cd724..fc0908d0 100644
--- a/electricitylci/modelconfig/ELCI_2023_config.yml
+++ b/electricitylci/modelconfig/ELCI_2023_config.yml
@@ -57,7 +57,7 @@ inventories_of_interest:
   TRI: 2023
   NEI: 2020
   RCRAInfo: 2023
-stewicombo_file: 'ELCI_2020_v1.1.4'
+stewicombo_file: 'ELCI_2023_v1.2.0_d3bb4db'
 
 # Provide uncertainty estimates for emissions.
 calculate_uncertainty: true

From 8dba85aecdc62f71877093ffef5ab28a8547e142 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Fri, 5 Sep 2025 16:49:26 -0400
Subject: [PATCH 03/24] fix public coal for 2023

See solution provided by Ben, 6b859fae7f4e185816ce6d321e1d90c94259c221
---
 electricitylci/coal_upstream.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/electricitylci/coal_upstream.py b/electricitylci/coal_upstream.py
index c9417476..1c5f7e26 100644
--- a/electricitylci/coal_upstream.py
+++ b/electricitylci/coal_upstream.py
@@ -58,7 +58,7 @@
 transportation, but still mainly represents 2016.
 
 Last updated:
-    2025-06-09
+    2025-09-05
 """
 __all__ = [
     "basin_codes",       # Globals
@@ -436,7 +436,8 @@ def eia_7a_download(year, save_path):
     see https://github.com/USEPA/ElectricityLCI/issues/230 for a solution.
     """
     eia7a_base_url = 'http://www.eia.gov/coal/data/public/xls/'
-    name = 'coalpublic{}.xls'.format(year)
+    name = ('coalpublic{}.xls'.format(year) if year <= 2022 else
+            'coalpublic{}.xlsx'.format(year))
     url = eia7a_base_url + name
     try:
         os.makedirs(save_path)
@@ -1675,11 +1676,18 @@ def read_eia7a_public_coal(year):
         return_name=False)
     # If you're here, then see the following for hotfix:
     # https://github.com/USEPA/ElectricityLCI/issues/230
-    eia7a_df = pd.read_excel(
-        eia7a_path,
-        sheet_name='Hist_Coal_Prod',
-        skiprows=3
-    )
+    try:
+        eia7a_df = pd.read_excel(
+            eia7a_path,
+            sheet_name='Hist_Coal_Prod',
+            skiprows=3
+        )
+    except ValueError:
+        raise ValueError(
+            f'Error reading {eia7a_path}. Please see '
+            'https://github.com/USEPA/ElectricityLCI/issues/230 '
+            'for a solution'
+        )
     eia7a_df = _clean_columns(eia7a_df)
 
     return eia7a_df

From 6746d636c1b3989e09428a0d7c9de481ee84b553 Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Thu, 9 Oct 2025 15:19:37 -0400
Subject: [PATCH 04/24] Updates for 2023 data + other minor bug fixes - updated
 egrid_facilities.py to account for FuelCategory column name in new stewi data
 - updated eia_trans_dist_grid_loss.py to account for new link format for 2023
 data

---
 .gitignore                                 |  4 +-
 electricitylci/egrid_facilities.py         |  1 +
 electricitylci/eia_trans_dist_grid_loss.py | 46 +++++++++++++++-------
 3 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index 74db7c23..5bcd0593 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,4 +42,6 @@ electricitylci/output/*
 !electricitylci/data/*.yml
 
 # Whitelist everything except this!
-electricitylci/data/EFs/.DS_Store
\ No newline at end of file
+electricitylci/data/EFs/.DS_Store
+
+Notes.txt
\ No newline at end of file
diff --git a/electricitylci/egrid_facilities.py b/electricitylci/egrid_facilities.py
index db0844e9..f1035906 100644
--- a/electricitylci/egrid_facilities.py
+++ b/electricitylci/egrid_facilities.py
@@ -156,6 +156,7 @@ def make_egrid_subregion_ref(year):
 # Rename columns. NOTE: missing names resolved
 # (https://github.com/USEPA/standardizedinventories/issues/153)
 egrid_facilities.rename(columns={
+    'Plant primary fuel category': 'FuelCategory', # added for 2023 STEWI data
     'Plant primary coal/oil/gas/ other fossil fuel category': 'FuelCategory',
     'Plant primary fuel': 'PrimaryFuel',
     'eGRID subregion acronym': 'Subregion',
diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py
index 7262a604..b50a22bf 100644
--- a/electricitylci/eia_trans_dist_grid_loss.py
+++ b/electricitylci/eia_trans_dist_grid_loss.py
@@ -99,7 +99,7 @@ def eia_trans_dist_download_extract(year):
             # HOTFIX: URLs for two-word states have space omitted.
             url_a = (
                 "https://www.eia.gov/electricity/state/archive/"
-                + year
+                + f"{year}"
                 + "/"
                 + key.replace(" ", "")
                 + "/xls/"
@@ -111,22 +111,39 @@ def eia_trans_dist_download_extract(year):
                 + "/xls/"
                 + filename
             )
+            # bugfix: url for year 2023 [FH]
+            # this has to be updated later when 2023 data gets archived 
+            # and links should be rechecked for compatibility with 2024 data (when released)
+            url_c = (
+                "https://www.eia.gov/electricity/state/"
+                + key.replace(" ", "")
+                + "/xls/"
+                + "SEP Tables for "
+                + STATE_ABBREV[key].upper()
+                + ".xlsx"
+            )           
             # HOTFIX: https://github.com/USEPA/ElectricityLCI/issues/235
             #adding 20s timeout to avoid long delays due to server issues.
-            r = requests.get(url_a, timeout=20)
-            r_head = r.headers.get("Content-Type", "")
-            if not r.ok or r_head.startswith("text"):
-                logging.info(f"Trying alternative site {STATE_ABBREV[key]}")
-                #adding 20s timeout to avoid long delays due to server issues.
-                r = requests.get(url_b, timeout=20)
-                r_head = r.headers.get("Content-Type", "")
-
-            if r.ok and not r_head.startswith("text"):
-                with open(filename, 'wb') as f:
+            # bugfix: added condition to account for the 2023 data link format [FH]
+            if year == "2023":
+                r = requests.get(url_c, timeout=20)
+                with open (filename, "wb") as f:
                     f.write(r.content)
             else:
-                logging.error(
-                    f"No TD loss data for {STATE_ABBREV[key]} {year}")
+                r = requests.get(url_a, timeout=20)
+                r_head = r.headers.get("Content-Type", "")
+                if not r.ok or r_head.startswith("text"):
+                    logging.info(f"Trying alternative site {STATE_ABBREV[key]}")
+                    #adding 20s timeout to avoid long delays due to server issues.
+                    r = requests.get(url_b, timeout=20)
+                    r_head = r.headers.get("Content-Type", "")
+
+                if r.ok and not r_head.startswith("text"):
+                    with open(filename, 'wb') as f:
+                        f.write(r.content)
+                else:
+                    logging.error(
+                        f"No TD loss data for {STATE_ABBREV[key]} {year}")
 
         try:
             df = pd.read_excel(
@@ -155,7 +172,8 @@ def eia_trans_dist_download_extract(year):
 
     eia_trans_dist_loss.columns = eia_trans_dist_loss.columns.str.upper()
     eia_trans_dist_loss = eia_trans_dist_loss.transpose()
-    eia_trans_dist_loss = eia_trans_dist_loss[[year]]
+    eia_trans_dist_loss = eia_trans_dist_loss[f"{year}"]
+    eia_trans_dist_loss = eia_trans_dist_loss.to_frame()
     eia_trans_dist_loss.columns = ["t_d_losses"]
     os.chdir(old_path)
 

From d1dfccecfd7ac852ca700c4e77cfad82653b3e92 Mon Sep 17 00:00:00 2001
From: Francis Hanna <91334875+frankhanna94@users.noreply.github.com>
Date: Tue, 14 Oct 2025 15:49:16 -0400
Subject: [PATCH 05/24] Modify 'year' parameter type and DataFrame selection

Updated parameter type for 'year' to accept both string and integer. Added type check to convert 'year' to string if passed as an integer. Adjusted DataFrame selection to use double brackets for consistency.
---
 electricitylci/eia_trans_dist_grid_loss.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py
index b50a22bf..d93d5c3f 100644
--- a/electricitylci/eia_trans_dist_grid_loss.py
+++ b/electricitylci/eia_trans_dist_grid_loss.py
@@ -74,13 +74,16 @@ def eia_trans_dist_download_extract(year):
 
     Parameters
     ----------
-    year : str
+    year : str, int
         Analysis year
 
     Returns
     -------
     pandas.DataFrame
     """
+    # check in case year is passed as an int
+    if isinstance(year,str)
+        year = str(year)
     eia_trans_dist_loss = pd.DataFrame()
     old_path = os.getcwd()
     if os.path.exists(f"{paths.local_path}/t_and_d_{year}"):
@@ -99,7 +102,7 @@ def eia_trans_dist_download_extract(year):
             # HOTFIX: URLs for two-word states have space omitted.
             url_a = (
                 "https://www.eia.gov/electricity/state/archive/"
-                + f"{year}"
+                + year
                 + "/"
                 + key.replace(" ", "")
                 + "/xls/"
@@ -172,8 +175,7 @@ def eia_trans_dist_download_extract(year):
 
     eia_trans_dist_loss.columns = eia_trans_dist_loss.columns.str.upper()
     eia_trans_dist_loss = eia_trans_dist_loss.transpose()
-    eia_trans_dist_loss = eia_trans_dist_loss[f"{year}"]
-    eia_trans_dist_loss = eia_trans_dist_loss.to_frame()
+    eia_trans_dist_loss = eia_trans_dist_loss[[year]]
     eia_trans_dist_loss.columns = ["t_d_losses"]
     os.chdir(old_path)
 

From 794c7e88411d8818a5845d71fbb4a19782c97da0 Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Thu, 16 Oct 2025 09:49:54 -0400
Subject: [PATCH 06/24] Natural Gas Upstream update - Addresses issue #260 -
 added ng_model_year parameter to the modelconfig files - added new function
 to generate the ng lci based on the ng_model_year parameter - added
 NG_MODEL_YEARS to globals.py - added the ng_model_year to the config files -
 minor fix in eia_trans_dist_grid_loss.py - important note: modified
 generate_upstream_ng to handle NaN values.     See comments in the code.

---
 electricitylci/eia_trans_dist_grid_loss.py    |   2 +-
 electricitylci/globals.py                     |   2 +
 electricitylci/model_config.py                |  13 +
 electricitylci/modelconfig/ELCI_1_config.yml  |   7 +
 .../modelconfig/ELCI_2020_config.yml          |   7 +
 .../modelconfig/ELCI_2021_config.yml          |   7 +
 .../modelconfig/ELCI_2022_config.yml          |   7 +
 .../modelconfig/ELCI_2023_config.yml          |   8 +-
 .../modelconfig/ELCI_2024_config.yml          |   7 +
 electricitylci/modelconfig/ELCI_2_config.yml  |   7 +
 electricitylci/modelconfig/ELCI_3_config.yml  |   7 +
 electricitylci/natural_gas_upstream.py        | 551 +++++++++++++++++-
 12 files changed, 616 insertions(+), 9 deletions(-)

diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py
index d93d5c3f..3a7ac4f0 100644
--- a/electricitylci/eia_trans_dist_grid_loss.py
+++ b/electricitylci/eia_trans_dist_grid_loss.py
@@ -82,7 +82,7 @@ def eia_trans_dist_download_extract(year):
     pandas.DataFrame
     """
     # check in case year is passed as an int
-    if isinstance(year,str)
+    if isinstance(year,int):
         year = str(year)
     eia_trans_dist_loss = pd.DataFrame()
     old_path = os.getcwd()
diff --git a/electricitylci/globals.py b/electricitylci/globals.py
index 0c9916d0..acf84a24 100644
--- a/electricitylci/globals.py
+++ b/electricitylci/globals.py
@@ -233,6 +233,8 @@
 RENEWABLE_VINTAGES = [2016, 2020]
 '''list : The valid years for renewable inventories (i.e., 2016 and 2020).'''
 
+NG_MODEL_YEARS = [2016, 2020]
+'''list : The valid years for natural gas model (i.e., 2016 and 2020).'''
 
 ##############################################################################
 # FUNCTIONS
diff --git a/electricitylci/model_config.py b/electricitylci/model_config.py
index 9a052654..cb229187 100644
--- a/electricitylci/model_config.py
+++ b/electricitylci/model_config.py
@@ -17,6 +17,7 @@
 from electricitylci.globals import output_dir
 from electricitylci.globals import COAL_MODEL_YEARS
 from electricitylci.globals import RENEWABLE_VINTAGES
+from electricitylci.globals import NG_MODEL_YEARS
 
 
 ##############################################################################
@@ -141,6 +142,8 @@ class ModelSpecs:
         Absolute path to JSON-LD zip output file.
         File name includes the model name and current time stamp and is
         located by default in the output directory (see globals.py).
+    ng_model_year : int
+        The natural gas model year (e.g., 2016 or 2020).
     """
     def __init__(self, model_specs, model_name):
         """Class initialization.
@@ -201,6 +204,7 @@ def __init__(self, model_specs, model_name):
             f"{output_dir}/{model_name}_jsonld_"
             f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
         )
+        self.ng_model_year = model_specs["ng_model_year"]
 
 
 ##############################################################################
@@ -330,9 +334,18 @@ def check_model_specs(model_specs):
         err_str += " or ".join([str(x) for x in COAL_MODEL_YEARS])
         err_str += " not %s!" % model_specs['coal_model_year']
         raise ConfigurationError(err_str)
+    
     if not model_specs['renewable_vintage'] in RENEWABLE_VINTAGES:
         err_str = "The renewable inventory vintage must be one of "
         err_str += " or ".join([str(x) for x in RENEWABLE_VINTAGES])
         err_str += " not %s!" % model_specs['renewable_vintage']
         raise ConfigurationError(err_str)
+    
+    if not model_specs['ng_model_year'] in NG_MODEL_YEARS:
+        err_str = "The natural gas model year must be one of "
+        err_str += " or ".join([str(x) for x in NG_MODEL_YEARS])
+        err_str += " not %s!" % model_specs['ng_model_year']
+        raise ConfigurationError(err_str)
+    
     logging.info("Checks passed!")
+
diff --git a/electricitylci/modelconfig/ELCI_1_config.yml b/electricitylci/modelconfig/ELCI_1_config.yml
index f5699bbb..80d7b3d5 100644
--- a/electricitylci/modelconfig/ELCI_1_config.yml
+++ b/electricitylci/modelconfig/ELCI_1_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2020
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2016
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/modelconfig/ELCI_2020_config.yml b/electricitylci/modelconfig/ELCI_2020_config.yml
index 36bf05f3..fbd4875e 100644
--- a/electricitylci/modelconfig/ELCI_2020_config.yml
+++ b/electricitylci/modelconfig/ELCI_2020_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2020
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/modelconfig/ELCI_2021_config.yml b/electricitylci/modelconfig/ELCI_2021_config.yml
index ea680a96..c39bdc8e 100644
--- a/electricitylci/modelconfig/ELCI_2021_config.yml
+++ b/electricitylci/modelconfig/ELCI_2021_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2020
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/modelconfig/ELCI_2022_config.yml b/electricitylci/modelconfig/ELCI_2022_config.yml
index 64e75bc5..94d53947 100644
--- a/electricitylci/modelconfig/ELCI_2022_config.yml
+++ b/electricitylci/modelconfig/ELCI_2022_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2020
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml
index fc0908d0..4182d437 100644
--- a/electricitylci/modelconfig/ELCI_2023_config.yml
+++ b/electricitylci/modelconfig/ELCI_2023_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2020
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
@@ -147,5 +154,4 @@ NETL_IO_trading_year: 2023
 # Product systems for the at-user consumption mixes are also generated.
 run_post_processes: true
 
-
 # OTHER PARAMETERS
diff --git a/electricitylci/modelconfig/ELCI_2024_config.yml b/electricitylci/modelconfig/ELCI_2024_config.yml
index 40e72044..c8caedce 100644
--- a/electricitylci/modelconfig/ELCI_2024_config.yml
+++ b/electricitylci/modelconfig/ELCI_2024_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2020
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/modelconfig/ELCI_2_config.yml b/electricitylci/modelconfig/ELCI_2_config.yml
index f3704a90..d0a56093 100644
--- a/electricitylci/modelconfig/ELCI_2_config.yml
+++ b/electricitylci/modelconfig/ELCI_2_config.yml
@@ -31,6 +31,13 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2020
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2016
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/modelconfig/ELCI_3_config.yml b/electricitylci/modelconfig/ELCI_3_config.yml
index 572f2c61..51332d55 100644
--- a/electricitylci/modelconfig/ELCI_3_config.yml
+++ b/electricitylci/modelconfig/ELCI_3_config.yml
@@ -31,6 +31,13 @@ replace_egrid: false
 # construction LCI.
 coal_model_year: 2020
 
+# NG baseline year
+# this is used to determine which NG inventory to use. 
+# this impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution
+# Select between 2016 and 2020 model inventories.
+ng_model_year: 2016
+
 # NETL developed profiles for renewable generation to capture construction
 # and O&M impacts (e.g., solar PV manufacturing and power plant operations).
 # There are two vintages for renewable inventories: 2016 and 2020. The 2016
diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
index d05303b9..b5a1b5bf 100644
--- a/electricitylci/natural_gas_upstream.py
+++ b/electricitylci/natural_gas_upstream.py
@@ -8,6 +8,7 @@
 ##############################################################################
 import logging
 import os
+import sys
 
 import pandas as pd
 
@@ -16,6 +17,8 @@
 import electricitylci.PhysicalQuantities as pq
 from electricitylci.generation import add_temporal_correlation_score
 from electricitylci.model_config import model_specs
+from electricitylci.utils import download_edx
+from electricitylci.globals import paths
 ##############################################################################
 # MODULE DOCUMENTATION
 ##############################################################################
@@ -26,15 +29,72 @@
 Created:
     2019-02-18
 Last updated:
-    2024-01-10
+    2025-10-15
 """
 __all__ = [
     "generate_upstream_ng",
 ]
 
 
+#############################################################################
+# GLOBALS
 ##############################################################################
-# FUNCTIONS
+technobasins_basins = {
+    'Appalachian': ['FI - App Shale'],
+    'Alaska Offshore': ['FI - Alaska Offshore'],
+    'Anadarko': ['FI - Anadarko Conv','FI - Anadarko Shale', 'FI - Anadarko Tight'],
+    'Arkla': ['FI - Arkla Conv','FI - Arkla Shale','FI - Arkla Tight'],
+    'Arkoma': ['FI - Arkoma Conv','FI - Arkoma Shale'],
+    'East Texas': ['FI - East Texas Conv', 'FI - East Texas Shale', 'FI - East Texas Tight'],
+    'Fort Worth': ['FI - Fort Worth Shale'],
+    'Green River': ['FI - Green River Conv', 'FI - Green River Tight'],
+    'Gulf': ['FI - Gulf Conv', 'FI - Gulf Shale', 'FI - Gulf TIght'], ## This not a typo - the title of the sheet in the excel file is 'FI - Gulf TIght'
+    'Permian': ['FI - Permian Conv', 'FI - Permian Shale'],
+    'Piceance': ['FI - Piceance Tight'],
+    'San Juan': ['FI - San Juan CBM', 'FI - San Juan Shale'],
+    'South Oklahoma': ['FI - South OK Shale'],
+    'Strawn': ['FI - Strawn Shale'],
+    'Uinta': ['FI - Uinta Conv', 'FI - Uinta Tight'],
+    'GoM': ['FI - GoM Offshore']
+}   
+
+# Aliases to account for different naming conventions of technobasins used in the excel file
+# the below dictionary is hardcoded
+
+aliases = {
+    'Appalachian Shale': 'FI - App Shale',
+    'Alaska Offshore': 'FI - Alaska Offshore',
+    'GoM Offshore': 'FI - GoM Offshore',
+    'Arkla Shale': 'FI - Arkla Shale',
+    'Arkla Tight': 'FI - Arkla Tight',
+    'Green River Conv': 'FI - Green River Conv',
+    'Green River Tight': 'FI - Green River Tight',
+    'Permian Conv': 'FI - Permian Conv',
+    'Gulf Tight': 'FI - Gulf TIght', ## This not a typo - the title of the sheet in the excel file is 'FI - Gulf TIght'
+    'Uinta Conv': 'FI - Uinta Conv',
+    'Gulf Conv': 'FI - Gulf Conv',
+    'Gulf Shale': 'FI - Gulf Shale',
+    'Permian Shale': 'FI - Permian Shale',
+    'Anadarko Shale': 'FI - Anadarko Shale',
+    'South Oklahoma Shale': 'FI - South OK Shale',
+    'Uinta Tight': 'FI - Uinta Tight',
+    'East Texas Tight': 'FI - East Texas Tight',
+    'East Texas Shale': 'FI - East Texas Shale',
+    'Strawn Shale': 'FI - Strawn Shale',
+    'Piceance Tight': 'FI - Piceance Tight',
+    'Fort Worth Shale': 'FI - Fort Worth Shale',
+    'Arkla Conv': 'FI - Arkla Conv',
+    'East Texas Conv': 'FI - East Texas Conv',
+    'Arkoma Shale': 'FI - Arkoma Shale',
+    'Anadarko Conv': 'FI - Anadarko Conv',
+    'San Juan CBM': 'FI - San Juan CBM',
+    'Anadarko Tight': 'FI - Anadarko Tight',
+    'Arkoma Conv': 'FI - Arkoma Conv',
+    'San Juan Shale': 'FI - San Juan Shale'
+}
+
+##############################################################################
+# MAN FUNCTION
 ##############################################################################
 def generate_upstream_ng(year):
     """
@@ -100,11 +160,12 @@ def generate_upstream_ng(year):
         columns=['Plant Code']
     )
 
-    # Read the NG LCI excel file
-    ng_lci = pd.read_csv(
-        os.path.join(data_dir, "NG_LCI.csv"),
-        index_col=[0,1,2,3,4,5]
-    )
+    # Read the NG LCI file
+    # if year = 2016 - this step will directly ready NG_LCI.csv from the data_dir
+    # if year = 2020 - this step will require edx api, download ng model and mapping 
+    # document from edx, and generate lci
+    ng_lci = get_ng_lci(model_specs.ng_model_year)
+
     ng_lci_columns=[
         "Compartment",
         "FlowName",
@@ -174,8 +235,484 @@ def generate_upstream_ng(year):
     #generation is combined, it needs to be based on the target year for the
     #inventory.
     ng_lci_basin["Year"]=year
+
+    # Issue: the current basin-to-plant mapping document does not include the Alaska Offshore and GoM Offshore basins
+    #        on the other hand, the ng_lci generated above includes emissions for both of there basins
+    #        this causes NaN values in the 'ng_lci_basin' dataframe and then returns errors when converting to int32
+    #        a quick fix involves omitting NaN values from the 'ng_lci_basin' dataframe - but this assumes that Offshore 
+    #        gas production is not used in electricity production
+    #        A fix for the future involves updating the mapping document: 'gas_supply_basin_mapping.csv' to account for 
+    #        offshore gas used in electricity production
+    
+    ng_lci_basin = ng_lci_basin.dropna(subset=['FlowAmount'])
+    
     return ng_lci_basin
 
+##############################################################################
+# HELPER FUNCTIONS
+##############################################################################
+
+def get_ng_lci(year):
+    """
+    Get the natural gas life cycle inventory for a given year.
+    Depending on the year, the natural gas life cycle inventory is either:
+        ** retrieved from existing data
+        ** calculated using the natural gas life cycle inventory model 
+
+    Parameters
+    ----------
+    year : str, int
+        The year for which to get the natural gas life cycle inventory.
+        This is retrieved from the model configuration
+    
+    Returns
+    -------
+    a dataframe containing the emissions associated with the natural gas 
+    production through transportation for each basin during the given year.
+
+    Notes
+    -----
+    This method depends on:
+        ** the configuration parameter: ------------
+        ** the NG_LCI csv file (if the old model is selected in the configuration)
+        ** the EDx API (if the new model is selected in the configuration)
+        ** the elci flow mapping csv file (if the new model is selected in the configuration)
+    """
+    if isinstance(year, int):
+        year = str(year)
+    if year == "2016":
+        logging.info(f"Retrieving the 2016 natural gas life cycle inventory by basin.")
+        ng_lci = pd.read_csv(
+            os.path.join(data_dir, "NG_LCI.csv"),
+            index_col=[0,1,2,3,4,5]
+        )
+    else:
+        data_folder = os.path.join(paths.local_path, 'netl')
+        #check if the ng_lci_2020rev1.csv already exists - if it does then we can skip all the below
+        if os.path.exists(os.path.join(data_folder, "ng_lci_2020rev1.csv")):
+            logging.info(f"NG LCI already exists in your data directory.")
+            ng_lci = pd.read_csv(
+                os.path.join(data_folder, "ng_lci_2020rev1.csv"),
+                index_col=[0,1,2,3,4,5]
+            )
+        else:
+            # if it does not exist, then we need to generate it
+            logging.info(f"Retrieving the {year} natural gas life cycle inventory by basin.")
+            # this step will require downloading files from edx      
+            # retrieve ng model
+            # check if model is data_dir
+            if os.path.exists(os.path.join(data_folder, "ng_model_2020Rev1.xlsx")):
+                logging.info(f"NG model already exists in your data directory.")
+                excel_file_path = os.path.join(data_folder, "ng_model_2020Rev1.xlsx")
+            else:
+                # download model from edx
+                logging.info(f"Downloading natural gas model from EDx.")
+                edx_api = model_specs.edx_api_key
+                r_id_ng_2020rev1 = 'cb8c8cf2-47ce-4ff0-b285-be73ba9294b9' 
+                # resource id of 2020 Rev1 ng model on EDx
+                try:
+                    download_edx(resource_id = r_id_ng_2020rev1, api_key = edx_api, output_dir = data_folder)
+                    excel_file_path = os.path.join(data_folder, "Appendix_F_2020_Full_Inventory_Results_US_Avg_ProdThruTrans.xlsx")
+                except Exception as e:
+                    logging.error(f"Error downloading natural gas model from EDx. Error: {e}")
+                    sys.exit(1)
+            # retrieve flow mapping document from edx [elci.csv]
+            # check if flowmapping csv exists in data_dir
+            if os.path.exists(os.path.join(data_folder, "elci.csv")):
+                logging.info(f"ELCI flow mapping document already exists in your data directory.")
+                flow_mapping_path = os.path.join(data_folder, "elci.csv")
+            else:
+                # download flowmapping document from edx
+                logging.info(f"Downloading ELCI flow mapping document from EDx.")
+                r_id_elci = 'e2c8f934-e95e-470a-879b-17ebe4afd39e' # resource id of elci flow mapping document on EDx
+                try:
+                    download_edx(resource_id = r_id_elci, api_key = edx_api, output_dir = data_folder)
+                    flow_mapping_path = os.path.join(data_folder, "elci.csv")  
+                except Exception as e:
+                    logging.error(f"Error downloading ELCI flow mapping document from EDx. Error: {e}")
+                    sys.exit(1)
+            # production sheet name
+            production_sheet_name = '2020 Production Shares'
+            # run the generate_ng_lci function and save it in data_dir
+            try:
+                generate_lci (technobasins_basins, excel_file_path, flow_mapping_path, production_sheet_name, destination_path = data_folder, final_table_name = "ng_lci_2020rev1")
+                ng_lci = pd.read_csv(
+                    os.path.join(data_folder, "ng_lci_2020rev1.csv"),
+                    index_col=[0,1,2,3,4,5]
+                )
+            except Exception as e:
+                logging.error(f"Error generating natural gas life cycle inventory. Error: {e}")
+                sys.exit(1)
+    return ng_lci
+
+def generate_lci(technobasins_basins, excel_file_path, flow_mapping_path, production_sheet_name, destination_path, final_table_name):
+    """
+    This function reads an excel file, extracts the data, and generates a LCI for NG with the same format as the currently used file.
+
+    Args:
+        technobasins_basins (dict): A dictionary that maps technobasins to basins
+        excel_file_path (str): The path to the excel file
+        production_sheet_name (str): The name of the sheet that contains the production shares
+        destination_path (str): !!This is an optional input!! 
+                                The path to the destination folder. If not provided, the function 
+                                will save the file in the current working directory.
+        final_table_name (str): The name of the final table to be saved
+                                Optional input. If not provided, the function will save the file with the name 'final_table.xlsx'.
+
+    Returns:
+        final_table (pd.DataFrame): A dataframe with the LCI for NG with the same format as the currently used file.
+
+    Notes:
+        - The function is senstive to the naming convention of the technobasins in the excel file.
+        - The current naming convention is: 'FI - <basin> <type>'. 
+        - Specifically, the current script is set up for the following sheet names:
+            - 'FI - App Shale', 'FI - Alaska Offshore', 'FI - Anadarko Conv', 'FI - Anadarko Shale', 'FI - Anadarko Tight', 
+            'FI - Arkla Conv', 'FI - Arkla Shale', 'FI - Arkla Tight', 'FI - Arkoma Conv', 'FI - Arkoma Shale', 'FI - East Texas Conv', 
+            'FI - East Texas Shale', 'FI - East Texas Tight', 'FI - Fort Worth Shale', 'FI - Green River Conv', 'FI - Green River Tight', 
+            'FI - Gulf Conv', 'FI - Gulf Shale', 'FI - Gulf TIght', 'FI - Permian Conv', 'FI - Permian Shale', 'FI - Piceance Tight', 
+            'FI - San Juan CBM', 'FI - San Juan Shale', 'FI - South OK Shale', 'FI - Strawn Shale', 'FI - Uinta Conv', 'FI - Uinta Tight', 
+            'FI - GoM Offshore'
+    """
+    # 0. Develop dictionary for basin, technobasins, and production shares
+    technobasins_basins = final_dictionary (technobasins_basins, excel_file_path, production_sheet_name)
+    print(technobasins_basins)
+
+    final_table = pd.DataFrame()
+
+    # 1. Read excel file
+    input_data = pd.ExcelFile(excel_file_path)
+    sheet_names = input_data.sheet_names
+    sheet_names = [name for name in sheet_names if name.startswith("FI")]
+    sheet_names = sheet_names[1:] # Drop the US Average sheet
+
+    # Get unused ground and water emissions based on average US emissions "FI - US Average"
+    unused_ground_emissions, unused_water_emissions = get_unused_flows(excel_file_path, "FI - US Average")
+
+    for sheet in sheet_names:
+        # Extract air, water, and ground emissions data for the selected sheet (i.e., technobasin)
+        air_emissions_data, water_emissions_data, ground_emissions_data = read_technobasin_data(excel_file_path, sheet)
+        
+        # Air emissions Get the correct flow names, compartment, and uuid for each flow
+        full_air_emissions_data = correct_netl_flow_names(air_emissions_data, flow_mapping_path)
+        full_air_emissions_data = full_air_emissions_data[full_air_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
+        
+        # Water emissions - drop unused flows
+        if unused_water_emissions is not None:
+            for flow in unused_water_emissions['FlowName']:
+                water_emissions_data = water_emissions_data.drop(water_emissions_data[water_emissions_data['FlowName'] == flow].index)        
+        # Water emissions - get the correct flow names, compartment, and uuid for each flow
+        full_water_emissions_data = correct_netl_flow_names(water_emissions_data, flow_mapping_path)
+        full_water_emissions_data = full_water_emissions_data[full_water_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
+        
+        # Ground emissions - drop unused flows
+        if unused_ground_emissions is not None:
+            for flow in unused_ground_emissions['FlowName']:
+                ground_emissions_data = ground_emissions_data.drop(ground_emissions_data[ground_emissions_data['FlowName'] == flow].index)
+        # Ground emissions - get the correct flow names, compartment, and uuid for each flow
+        full_ground_emissions_data = correct_netl_flow_names(ground_emissions_data, flow_mapping_path)
+        full_ground_emissions_data = full_ground_emissions_data[full_ground_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
+
+        # combine dataframes
+        df1 = pd.concat([full_air_emissions_data, full_water_emissions_data, full_ground_emissions_data])
+        df1 = df1.sort_values(by='FlowUUID') # sort by FlowUUID
+        basin_name = find_basin (technobasins_basins, sheet)
+        df1['FlowAmount'] = df1['FlowAmount'].astype(float)
+        df1['FlowAmount'] = df1['FlowAmount'].fillna(0)
+        norm_value = get_normalized_values(technobasins_basins, sheet)
+        df1['norm'] = norm_value
+        df1['norm'] = df1['norm'].astype(float)
+        df1['normalized_emissions'] = df1['FlowAmount'] * df1['norm']
+
+        # create final_table structure in 1st iteration
+        if final_table.empty:
+            final_table = df1[['FlowName', 'Compartment', 'Unit', 'input', 'FlowUUID']]
+            final_table = final_table.sort_values(by='FlowUUID')
+            final_table ['flow_type'] = 'ELEMENTARY_FLOW'
+            #reorder and rename columns
+            final_table = final_table[['Compartment', 'FlowName', 'FlowUUID', 'Unit', 'flow_type', 'input']]
+            final_table.columns = ['compartment', 'flow_name', 'uuid', 'unit', 'flow_type', 'is_input']
+            # add a column for each basin
+            basins_columns = list (technobasins_basins.keys())
+            for basin in basins_columns:
+                final_table[basin] = 0
+        final_table.head()
+        final_table.shape
+        
+        # Compute normalized emissions and add to final table   
+        try:
+            final_table['normalized_emissions'] = df1['normalized_emissions'].values
+            final_table[basin_name] += final_table['normalized_emissions']
+            final_table = final_table.drop(columns=['normalized_emissions'])
+        except Exception as e:
+            sys.exit(f"Error reading sheet. Make sure your excel file follows the correct naming convention.For reference, refer to the source code, lines 70-78. Error: {e}")
+
+
+    # 2. Save final table to excel
+    save_ng_lci(final_table, final_table_name ,destination_path)
+    print(f"Final table saved to {destination_path}/{final_table_name}.csv")
+    
+    return final_table
+
+def get_unused_flows(excel_file_path, sheet_name):
+    """
+    This function extracts the unused ground and water emissions from a given natural gas results dataset
+
+    Inputs:
+    - excel_file_path: path to the excel file
+    - sheet_name: name of the sheet to extract the data from
+
+    Outputs:
+    - unused_ground_emissions: dataframe containing the unused ground emissions
+    - unused_water_emissions: dataframe containing the unused water emissions
+    """
+    us_average_data = pd.read_excel(excel_file_path, sheet_name=sheet_name,skiprows=0,header=None) 
+    us_average_data.iloc[0] = us_average_data.iloc[0].ffill()
+    us_average_data.iloc[1] = us_average_data.iloc[1].ffill()
+    us_average_data.columns = us_average_data.iloc[2]
+    us_average_data = us_average_data.drop(columns=["P2.5", "P97.5"])
+    us_average_data.columns = us_average_data.iloc[0]
+    us_average_data = us_average_data.drop(us_average_data.index[0])
+    #extract ground data from us_average sheet
+    ground_emissions_data = us_average_data.iloc[:, [us_average_data.shape[1]-3, us_average_data.shape[1]-2]]
+    ground_emissions_data.columns.values[0] = "FlowName"
+    ground_emissions_data.columns.values[1] = "FlowAmount"
+    ground_emissions_data = ground_emissions_data.dropna()
+    ground_emissions_data = ground_emissions_data.iloc[1:]
+    #extract water data from us_average sheet
+    water_emissions_data = us_average_data.iloc[:, [us_average_data.shape[1]-3, us_average_data.shape[1]-1]]
+    water_emissions_data.columns.values[0] = "FlowName"
+    water_emissions_data.columns.values[1] = "FlowAmount"
+    water_emissions_data = water_emissions_data.iloc[2:]
+    water_emissions_data = water_emissions_data.dropna()
+    #unused ground emissions
+    unused_ground_emissions = ground_emissions_data[ground_emissions_data['FlowAmount'] == 0.00e+00]
+    #unused water emissions
+    unused_water_emissions = water_emissions_data[water_emissions_data['FlowAmount'] == 0.00e+00]
+
+    return unused_ground_emissions, unused_water_emissions
+
+def read_technobasin_data(excel_file_path, sheet_name):
+    """
+    This function reads an excel file, extracts the data, and generates a df for NG emissions for air, water, and ground.
+    The df includes the flow name and flow amount (P2.5 and P97.5 values are dropped).
+
+    Inputs:
+    - excel_file_path: path to the excel file
+    - sheet_name: name of the sheet to extract the data from
+
+    Outputs:
+    - air_emissions_data: dataframe containing the air emissions data
+    - water_emissions_data: dataframe containing the water emissions data
+    - ground_emissions_data: dataframe containing the ground emissions data
+    """
+    print(f"Processing sheet: {sheet_name}")
+    # create empty database
+    df = pd.DataFrame()
+    # Extract all the data from the sheet
+    df = pd.read_excel(excel_file_path, sheet_name=sheet_name, skiprows=0, header=None)
+    # Adjustments: 1) changing header, 2) dropping P2.5 and P97.5 columns
+    df.iloc[0] = df.iloc[0].ffill()
+    df.iloc[1] = df.iloc[1].ffill()
+    df.columns = df.iloc[2]
+    df = df.drop(columns=["P2.5", "P97.5"])
+    df.columns = df.iloc[0]
+    df = df.drop(df.index[0])
+    # separate water, soil, ground, and air emissions - and map them to FEDEFL elementary flows
+    # Air emissions
+    air_emissions_data = df.drop(columns=[col for col in df.columns if col != df.columns[1]])  
+    air_emissions_data = air_emissions_data.iloc[:, :-2]    # drop the last two columns (empty columns from excel)
+    air_emissions_data[f'FlowAmount'] = air_emissions_data.iloc[:, 1:11].sum(axis=1)  # sum columns 2:11 for each row
+    air_emissions_data = air_emissions_data.iloc[2:]
+    air_emissions_data = air_emissions_data.iloc[:, [0,-1]]
+    air_emissions_data['Compartment'] = 'Air' # add compartment
+    air_emissions_data.columns.values[0] = 'FlowName' # change header
+    air_emissions_data['Unit'] = 'kg' # add unit
+    air_emissions_data ['input'] = False # add input
+    # Water emissions
+    water_emissions_data = df.iloc[:, [df.shape[1]-3, df.shape[1]-1]]
+    water_emissions_data.columns.values[0] = "FlowName"
+    water_emissions_data.columns.values[1] = "FlowAmount"
+    water_emissions_data = water_emissions_data.iloc[2:]
+    water_emissions_data = water_emissions_data.dropna()
+    water_emissions_data['Compartment'] = 'Water'
+    water_emissions_data['Unit'] = 'kg'
+    water_emissions_data ['input'] = False
+    # Ground emissions
+    ground_emissions_data = df.iloc[:, [df.shape[1]-3, df.shape[1]-2]]
+    ground_emissions_data.columns.values[0] = "FlowName"
+    ground_emissions_data.columns.values[1] = "FlowAmount"
+    ground_emissions_data = ground_emissions_data.dropna()
+    ground_emissions_data = ground_emissions_data.iloc[1:]
+    ground_emissions_data['Compartment'] = 'Ground'
+    ground_emissions_data['Unit'] = 'kg'
+    ground_emissions_data ['input'] = False
+
+    return air_emissions_data, water_emissions_data, ground_emissions_data
+
+
+# Helper function to calculate normalized values for each technobasin
+def get_normalized_values(technobasins_basins, technobasin):
+    for outer, inner in technobasins_basins.items():
+        if technobasin in inner:
+            total = sum(inner.values())
+            return float(inner[technobasin] / total)
+    return None
+
+# helper function to find basins for a given technobasin
+def find_basin(technobasins_basins, technobasin_name):
+    for outer, inner in technobasins_basins.items():
+        if technobasin_name in inner:
+            return outer
+    return None
+
+# Helper function to use aliases to normalize technobasin naming
+def _normalize_technobasin_naming(name):
+    name_lower = name.lower().strip()
+    
+    # Check exact or partial match
+    for alias, canonical in aliases.items():
+        alias_clean = alias.lower()
+        if name_lower in alias_clean or alias_clean in name_lower:
+            return canonical
+
+# Helper function to create the final dictionary including basin, technobasin, and production share
+def final_dictionary(technobasins_basins, excel_file_path, production_sheet_name):
+    production_shares_2020 = pd.read_excel(excel_file_path, sheet_name=production_sheet_name)
+    production_shares_2020 = production_shares_2020.iloc[1:]
+    production_shares_2020['Scenario Normalized'] = production_shares_2020['Scenario'].apply(lambda x: _normalize_technobasin_naming(x))
+    production_shares_2020 = production_shares_2020.drop(columns=production_shares_2020.columns[0])
+    production_shares_2020.columns.values[1] = 'Scenario'
+    production_shares_2020 = production_shares_2020[['Scenario', 'Production Shares (%)']]
+    # final dictionary including basin, technobasin, and production share
+    technobasins_basins = {
+        key: {num: production_shares_2020.set_index('Scenario').loc[num, 'Production Shares (%)'] for num in nums}
+        for key, nums in technobasins_basins.items()
+    }
+    return technobasins_basins
+
+def save_ng_lci(df, filename, destination_path):
+    """
+    This function saves the final table to an excel file.
+    """
+    if destination_path is None:
+        destination_path = f"{os.getcwd()}/"
+    if filename is None:
+        filename = 'final_table'
+    full_path = os.path.join(destination_path, f"{filename}.csv")
+    df.to_csv(full_path, index=False)
+
+def correct_netl_flow_names(df, flow_mapping_path, amount_col="FlowAmount"):
+    """A helper method that replaces NETL air, water, and ground emissions
+    with Federal Elementary Flow List equivalents based on a subset of
+    flows defined in USEPA's eLCI mapping using the Python package
+    `fedelemflowlist <https://github.com/USEPA/fedelemflowlist>`_
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        A life cycle inventory data frame with columns, 'FlowName',
+        'Compartment', 'Unit', and ``amount_col``.
+    amount_col : str, optional
+        The column title representing the flow amount, by default "FlowAmount"
+
+    Returns
+    -------
+    pandas.DataFrame
+        A new data frame with the same number of rows and columns as the
+        sent data frame. Flow names, compartments, units, and flow amounts
+        are updated based on emissions matches with the FEDEFL. All unmatched
+        flows are returned 'as is'. If FlowUUID was not in the column list,
+        it is created; otherwise, the matched UUIDs are updated.
+    """
+    # This data frame has about 4k source flow names and contexts associated
+    # with NETL unit process models (e.g., petro, nuclear, coal).
+    flow_mapping = pd.read_csv(flow_mapping_path, encoding='ISO-8859-1')
+
+    # Matching occurs on name, compartment and units; help this along by
+    # lowering the case (improves coal UP matches from 10% to 42%).
+    df["FlowName_orig"] = df["FlowName"]
+    df["Compartment_orig"] = df["Compartment"]
+    df["FlowName"] = df["FlowName"].str.lower().str.rstrip()
+    df["Compartment"] = df["Compartment"].str.lower().str.rstrip()
+
+    # In the map, also lower-case names and compartments and remove trailing
+    # space; note this introduces duplicate entries in the map, so remove them.
+    # The duplicates are from later entries, so ignore mapper, verifier and
+    # last updated cols when searching for duplicates. [250917; TWD]
+    flow_mapping['SourceFlowName'] = flow_mapping[
+        'SourceFlowName'].str.lower().str.rstrip()
+    flow_mapping['SourceFlowContext'] = flow_mapping[
+        "SourceFlowContext"].str.lower().str.rstrip()
+    ignore_cols = ['Mapper', 'Verifier', 'LastUpdated']
+    flow_mapping = flow_mapping.drop_duplicates(
+        subset=[x for x in flow_mapping.columns if x not in ignore_cols]
+    )
+
+    # Some compartments in NETL UPs are complex (e.g., 'Emission to water/fresh
+    # water'), but are listed simply in the FEDEFL eLCI mapper (e.g., 'emission/
+    # water'). Improves coal mining UP matches from 42% to 62%.
+    is_emission = df['input'] == False
+    is_water = df['Compartment'].str.contains('water')
+    is_air = df['Compartment'].str.contains('air')
+    is_ground = df['Compartment'].str.contains('ground')
+
+    df.loc[is_emission * is_water, 'Compartment'] = 'emission/water'
+    df.loc[is_emission * is_air, 'Compartment'] = 'emission/air'
+    df.loc[is_emission * is_ground, 'Compartment'] = 'emission/ground'
+
+    # HOTFIX: Map against source units [250205; TWD]
+    # For coal mining, reduces matches from >62% to <62% (about 2k less rows)
+    logging.info("Mapping emissions to FEDEFL")
+    mapped_df = pd.merge(
+        df,
+        flow_mapping,
+        left_on=["FlowName", "Compartment", "Unit"],
+        right_on=["SourceFlowName", "SourceFlowContext", "SourceUnit"],
+        how="left",
+    )
+
+    # If TargetFlowName is present, there was a match.
+    is_match = mapped_df["TargetFlowName"].notnull()
+    logging.info("Correcting %d NETL flows" % is_match.sum())
+
+    # Quality Check (coal_df)
+    #   Check that target unit matches source unit.
+    #   No! Hydrogen, Uranium, and Lead-210/kg have mis-matched units.
+    #   Therefore, unit conversions are necessary.
+
+    # Return flow names and compartments back to their original values.
+    df["FlowName"] = df["FlowName_orig"]
+    df["Compartment"] = df["Compartment_orig"]
+    del df['FlowName_orig']      # use this syntax since you're editing
+    del df['Compartment_orig']   # a reference object that isn't returned
+    mapped_df['FlowName'] = mapped_df['FlowName_orig']
+    mapped_df["Compartment"] = mapped_df["Compartment_orig"]
+    mapped_df = mapped_df.drop(columns=['FlowName_orig', 'Compartment_orig'])
+
+    # Replace FlowName, Unit, and Compartment with new names (where matched)
+    mapped_df.loc[is_match, "FlowName"] = mapped_df.loc[
+        is_match, "TargetFlowName"]
+    mapped_df.loc[is_match, "Compartment"] = mapped_df.loc[
+        is_match, "TargetFlowContext"]
+    mapped_df.loc[is_match, "Unit"] = mapped_df.loc[is_match, "TargetUnit"]
+
+    # Correct values using the conversion factor
+    mapped_df.loc[is_match, amount_col] *= mapped_df.loc[
+        is_match, 'ConversionFactor']
+
+    if 'FlowUUID' in mapped_df.columns:
+        # Update existing values with new UUIDs
+        mapped_df.loc[is_match, 'FlowUUID'] = mapped_df.loc[
+            is_match, 'TargetFlowUUID']
+    else:
+        # Set UUIDs to target values
+        mapped_df = mapped_df.rename(columns={"TargetFlowUUID": "FlowUUID"})
+
+    # Drop all unneeded cols
+    drop_cols = [x for x in flow_mapping.columns if x in mapped_df.columns]
+    mapped_df = mapped_df.drop(columns=drop_cols)
+
+    return mapped_df
 
 ##############################################################################
 # MAIN

From 5f1b99a38032620c36e6d2a95e85865e31e81edf Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Mon, 20 Oct 2025 12:44:01 -0400
Subject: [PATCH 07/24] Updated ng model from basin to region basis - Updated
 the 2020 ng model to use the region basis - Implemented additional changes to
 map eia plants to regions - Updated the code to use basin mapping for 2016
 and region basis for 2020 ng model

---
 electricitylci/natural_gas_upstream.py | 571 ++++++++++++-------------
 1 file changed, 274 insertions(+), 297 deletions(-)

diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
index b5a1b5bf..92dd9a95 100644
--- a/electricitylci/natural_gas_upstream.py
+++ b/electricitylci/natural_gas_upstream.py
@@ -35,64 +35,35 @@
     "generate_upstream_ng",
 ]
 
-
-#############################################################################
-# GLOBALS
-##############################################################################
-technobasins_basins = {
-    'Appalachian': ['FI - App Shale'],
-    'Alaska Offshore': ['FI - Alaska Offshore'],
-    'Anadarko': ['FI - Anadarko Conv','FI - Anadarko Shale', 'FI - Anadarko Tight'],
-    'Arkla': ['FI - Arkla Conv','FI - Arkla Shale','FI - Arkla Tight'],
-    'Arkoma': ['FI - Arkoma Conv','FI - Arkoma Shale'],
-    'East Texas': ['FI - East Texas Conv', 'FI - East Texas Shale', 'FI - East Texas Tight'],
-    'Fort Worth': ['FI - Fort Worth Shale'],
-    'Green River': ['FI - Green River Conv', 'FI - Green River Tight'],
-    'Gulf': ['FI - Gulf Conv', 'FI - Gulf Shale', 'FI - Gulf TIght'], ## This not a typo - the title of the sheet in the excel file is 'FI - Gulf TIght'
-    'Permian': ['FI - Permian Conv', 'FI - Permian Shale'],
-    'Piceance': ['FI - Piceance Tight'],
-    'San Juan': ['FI - San Juan CBM', 'FI - San Juan Shale'],
-    'South Oklahoma': ['FI - South OK Shale'],
-    'Strawn': ['FI - Strawn Shale'],
-    'Uinta': ['FI - Uinta Conv', 'FI - Uinta Tight'],
-    'GoM': ['FI - GoM Offshore']
-}   
-
-# Aliases to account for different naming conventions of technobasins used in the excel file
-# the below dictionary is hardcoded
-
-aliases = {
-    'Appalachian Shale': 'FI - App Shale',
-    'Alaska Offshore': 'FI - Alaska Offshore',
-    'GoM Offshore': 'FI - GoM Offshore',
-    'Arkla Shale': 'FI - Arkla Shale',
-    'Arkla Tight': 'FI - Arkla Tight',
-    'Green River Conv': 'FI - Green River Conv',
-    'Green River Tight': 'FI - Green River Tight',
-    'Permian Conv': 'FI - Permian Conv',
-    'Gulf Tight': 'FI - Gulf TIght', ## This not a typo - the title of the sheet in the excel file is 'FI - Gulf TIght'
-    'Uinta Conv': 'FI - Uinta Conv',
-    'Gulf Conv': 'FI - Gulf Conv',
-    'Gulf Shale': 'FI - Gulf Shale',
-    'Permian Shale': 'FI - Permian Shale',
-    'Anadarko Shale': 'FI - Anadarko Shale',
-    'South Oklahoma Shale': 'FI - South OK Shale',
-    'Uinta Tight': 'FI - Uinta Tight',
-    'East Texas Tight': 'FI - East Texas Tight',
-    'East Texas Shale': 'FI - East Texas Shale',
-    'Strawn Shale': 'FI - Strawn Shale',
-    'Piceance Tight': 'FI - Piceance Tight',
-    'Fort Worth Shale': 'FI - Fort Worth Shale',
-    'Arkla Conv': 'FI - Arkla Conv',
-    'East Texas Conv': 'FI - East Texas Conv',
-    'Arkoma Shale': 'FI - Arkoma Shale',
-    'Anadarko Conv': 'FI - Anadarko Conv',
-    'San Juan CBM': 'FI - San Juan CBM',
-    'Anadarko Tight': 'FI - Anadarko Tight',
-    'Arkoma Conv': 'FI - Arkoma Conv',
-    'San Juan Shale': 'FI - San Juan Shale'
+# Supporting Dicts
+# #######################################################################################################
+region_sheets_dict = {
+    'Pacific': 'FI - Pacific Delivery',
+    'Rocky Mountain': 'FI - Rocky Mountain Delivery',
+    'Southwest': 'FI - Southwest Delivery',
+    'Midwest': 'FI - Midwest Delivery',
+    'Southeast': 'FI - Southeast Delivery',
+    'Northeast': 'FI - Northeast Delivery'
+ }
+
+r_ids_2020 = {
+    'Appendix_F_2020_Full_Inventory_Results_Midwest_ProdThruTrans.xlsx':'5665de40-fc2b-4643-b647-ceec226af2bb', 
+    'Appendix_F_2020_Full_Inventory_Results_Northeast_ProdThruTrans.xlsx' :'b396eb50-72ac-45f0-8231-9b613457c6d8', 
+    'Appendix_F_2020_Full_Inventory_Results_Pacific_ProdThruTrans.xlsx' :'347a0cd8-5ff2-4cb3-be0a-f31a56bac9c6', 
+    'Appendix_F_2020_Full_Inventory_Results_Rocky_Mountain_ProdThruTrans.xlsx' :'d08f4da2-543a-40b2-9ffd-c7138ed4f8c6', 
+    'Appendix_F_2020_Full_Inventory_Results_Southeast_ProdThruTrans.xlsx' :'4590712b-db21-4428-b488-6ded3b65d18b', 
+    'Appendix_F_2020_Full_Inventory_Results_Southwest_ProdThruTrans.xlsx':'9dd7a6e5-df1a-461e-87e7-0b9d8d600f26'
 }
 
+region_state_mapping = {
+    'WA':'Pacific','CA':'Pacific','OR':'Pacific','MT':'Rocky Mountain','ID':'Rocky Mountain','CO':'Rocky Mountain','NV':'Rocky Mountain','UT':'Rocky Mountain','WY':'Rocky Mountain',
+    'AZ':'Southwest','NM':'Southwest','OK':'Southwest','TX':'Southwest','MN':'Midwest','ND':'Midwest','IA':'Midwest','KS':'Midwest',
+    'MO':'Midwest','NE':'Midwest','SD':'Midwest','IL':'Midwest','IN':'Midwest','OH':'Midwest','WI':'Midwest','MI':'Midwest',
+    'AR':'Southeast','LA':'Southeast','AL':'Southeast','FL':'Southeast','GA':'Southeast','MS':'Southeast','SC':'Southeast','KY':'Southeast',
+    'NC':'Southeast','TN':'Southeast','VA':'Southeast','WV':'Southeast','DE':'Southeast','MD':'Southeast','CT':'Northeast','MA':'Northeast',
+    'NH':'Northeast','RI':'Northeast','VT':'Northeast','NJ':'Northeast','NY':'Northeast','PA':'Northeast','ME':'Northeast',
+} #TOTAL 48 -- EXCLUDING AL, HI, AND DC
+
 ##############################################################################
 # MAN FUNCTION
 ##############################################################################
@@ -124,134 +95,243 @@ def generate_upstream_ng(year):
     """
     logging.info("Generating natural gas inventory")
 
-    # Get the EIA generation data for the specified year, this dataset includes
-    # the fuel consumption for generating electricity for each facility
-    # and fuel type. Filter the data to only include NG facilities and on
-    # positive fuel consumption. Group that data by Plant Id as it is possible
-    # to have multiple rows for the same facility and fuel based on different
-    # prime movers (e.g., gas turbine and combined cycle).
-    eia_generation_data = eia923_download_extract(year)
-
-    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
-                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
-    ng_generation_data = eia_generation_data[column_filt]
-
-    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
-        {'Total Fuel Consumption MMBtu':'sum'}).reset_index()
-    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
-
-    # Import the mapping file which has the source gas basin for each Plant Id.
-    # NOTE:
-    #   This is a 2 MB file that provides about 100 kB of info!
-    ng_basin_mapping = pd.read_csv(
-        os.path.join(data_dir, 'gas_supply_basin_mapping.csv')
-    )
-    subset_cols = ['Plant Code', 'NG_LCI_Name']
-    ng_basin_mapping = ng_basin_mapping[subset_cols]
+    # get plant data and map each plant to its ng source: basin or region
+    # the 2016 ng emissions inventory is only available by basin
+    #   as such, plants can only be connected to upstream emissions via basin assignment
+    # newer data (2020) is available by region 
+    #   plants are connected to upstream ng emissions via region assignment
 
-    # Merge with ng_generation dataframe.
-    ng_generation_data_basin = pd.merge(
-        left = ng_generation_data,
-        right = ng_basin_mapping,
-        left_on = 'Plant Id',
-        right_on = 'Plant Code'
-    )
-    ng_generation_data_basin = ng_generation_data_basin.drop(
-        columns=['Plant Code']
-    )
+    if model_specs.ng_model_year == 2016:
+        ng_generation_data_mapped = map_ng_by_basin(year) # 'year' refers to eia_generation_year
+    else:
+        ng_generation_data_mapped = map_ng_by_region(year) # 'year' refers to eia_generation_year
 
     # Read the NG LCI file
-    # if year = 2016 - this step will directly ready NG_LCI.csv from the data_dir
-    # if year = 2020 - this step will require edx api, download ng model and mapping 
+    # if year = 2016 - this step will directly ready NG_LCI.csv from the data_dir - returns lci (by basin)
+    # if year = 2020 - this step will require edx api, download ng model and mapping - returns lci (by region)
     # document from edx, and generate lci
     ng_lci = get_ng_lci(model_specs.ng_model_year)
 
-    ng_lci_columns=[
-        "Compartment",
-        "FlowName",
-        "FlowUUID",
-        "Unit",
-        "FlowType",
-        "input",
-        "Basin",
-        "FlowAmount"
-    ]
-    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
-    ng_lci_stack.columns=ng_lci_columns
-
-    # Merge basin data with LCI dataset
-    ng_lci_basin = pd.merge(
-        ng_lci_stack,
-        ng_generation_data_basin,
-        left_on = 'Basin',
-        right_on = 'NG_LCI_Name',
-        how='left'
-    )
+    # merge ng lci and plants based on the common parameter: region or basin
+    if model_specs.ng_model_year == 2016:
+        ng_lci_mapped = map_ng_lci_to_plants_by_basin(ng_lci, ng_generation_data_mapped)
+    else:
+        ng_lci_mapped = map_ng_lci_to_plants_by_region(ng_lci, ng_generation_data_mapped)
 
     # Multiplying with the EIA 923 fuel consumption; conversion factor is
     # for MMBtu to MJ
     btu_to_MJ = pq.convert(10**6,'Btu','MJ')
-    ng_lci_basin["FlowAmount"]=(
-        ng_lci_basin["FlowAmount"]
-        * ng_lci_basin['Total Fuel Consumption MMBtu']
+    ng_lci_mapped["FlowAmount"]=(
+        ng_lci_mapped["FlowAmount"]
+        * ng_lci_mapped['Total Fuel Consumption MMBtu']
         * btu_to_MJ
     )
 
-    ng_lci_basin = ng_lci_basin.rename(
+    ng_lci_mapped = ng_lci_mapped.rename(
         columns={'Total Fuel Consumption MMBtu':'quantity'})
-    ng_lci_basin["quantity"]=ng_lci_basin["quantity"]*btu_to_MJ
+    ng_lci_mapped["quantity"]=ng_lci_mapped["quantity"]*btu_to_MJ
 
     # Output is kg emission for the specified year by facility Id,
     # not normalized to electricity output
 
-    ng_lci_basin['FuelCategory'] = 'GAS'
-    ng_lci_basin.rename(
+    ng_lci_mapped['FuelCategory'] = 'GAS'
+    ng_lci_mapped.rename(
         columns={
             'Plant Id':'plant_id',
+            'NG_LCI_Region': 'stage_code',
             'NG_LCI_Name':'stage_code',
             'Stage':'stage'},
         inplace=True
     )
-    ng_lci_basin["Year"] = year
-    ng_lci_basin["Source"] = "netlgaseiafuel"
-    ng_lci_basin["ElementaryFlowPrimeContext"] = "emission"
-    ng_lci_basin.loc[
-        ng_lci_basin["Compartment"].str.contains("resource/"),
+    ng_lci_mapped["Year"] = year
+    ng_lci_mapped["Source"] = "netlgaseiafuel"
+    ng_lci_mapped["ElementaryFlowPrimeContext"] = "emission"
+    ng_lci_mapped.loc[
+        ng_lci_mapped["Compartment"].str.contains("resource/"),
         "ElementaryFlowPrimeContext"] = "resource"
-    ng_lci_basin.loc[
-        ng_lci_basin["Compartment"].str.contains("Technosphere/"),
+    ng_lci_mapped.loc[
+        ng_lci_mapped["Compartment"].str.contains("Technosphere/"),
         "ElementaryFlowPrimeContext"] = "technosphere"
     # Issue #296 - adding DQI information for upstream processes
-    ng_lci_basin["Year"] = 2016
-    ng_lci_basin["DataReliability"] = 3
-    ng_lci_basin["TemporalCorrelation"] = add_temporal_correlation_score(
-        ng_lci_basin["Year"], model_specs.electricity_lci_target_year
+    ng_lci_mapped["Year"] = 2016
+    ng_lci_mapped["DataReliability"] = 3
+    ng_lci_mapped["TemporalCorrelation"] = add_temporal_correlation_score(
+        ng_lci_mapped["Year"], model_specs.electricity_lci_target_year
     )
-    ng_lci_basin["GeographicalCorrelation"] = 1
-    ng_lci_basin["TechnologicalCorrelation"] = 1
-    ng_lci_basin["DataCollection"] = 1
+    ng_lci_mapped["GeographicalCorrelation"] = 1
+    ng_lci_mapped["TechnologicalCorrelation"] = 1
+    ng_lci_mapped["DataCollection"] = 1
     #3/20/2025 MBJ - replacing renewable vintage here so that temporal correlation
     #is based on the year the inventory is based on, but when electricity
     #generation is combined, it needs to be based on the target year for the
     #inventory.
-    ng_lci_basin["Year"]=year
-
-    # Issue: the current basin-to-plant mapping document does not include the Alaska Offshore and GoM Offshore basins
-    #        on the other hand, the ng_lci generated above includes emissions for both of there basins
-    #        this causes NaN values in the 'ng_lci_basin' dataframe and then returns errors when converting to int32
-    #        a quick fix involves omitting NaN values from the 'ng_lci_basin' dataframe - but this assumes that Offshore 
-    #        gas production is not used in electricity production
-    #        A fix for the future involves updating the mapping document: 'gas_supply_basin_mapping.csv' to account for 
-    #        offshore gas used in electricity production
-    
-    ng_lci_basin = ng_lci_basin.dropna(subset=['FlowAmount'])
+    ng_lci_mapped["Year"]=year
     
-    return ng_lci_basin
+    return ng_lci_mapped
 
 ##############################################################################
 # HELPER FUNCTIONS
 ##############################################################################
 
+def map_ng_lci_to_plants_by_basin (ng_lci, ng_generation_data_mapped):
+    """
+    Map the natural gas generation data by basin.
+    """
+    ng_lci_columns=[
+        "Compartment",
+        "FlowName",
+        "FlowUUID",
+        "Unit",
+        "FlowType",
+        "input",
+        "Basin",
+        "FlowAmount"
+    ]
+    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
+    ng_lci_stack.columns=ng_lci_columns
+
+    # Merge basin data with LCI dataset
+    ng_lci_mapped = pd.merge(
+        ng_lci_stack,
+        ng_generation_data_mapped,
+        left_on = 'Basin',
+        right_on = 'NG_LCI_Name',
+        how='left'
+    )   
+    return ng_lci_mapped
+
+def map_ng_lci_to_plants_by_region (ng_lci, ng_generation_data_mapped):
+    """
+    Map the natural gas generation data by basin.
+    """
+    ng_lci_columns=[
+        "Compartment",
+        "FlowName",
+        "FlowUUID",
+        "Unit",
+        "FlowType",
+        "input",
+        "Region",
+        "FlowAmount"
+    ]
+    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
+    ng_lci_stack.columns=ng_lci_columns
+
+    # Merge basin data with LCI dataset
+    ng_lci_mapped = pd.merge(
+        ng_lci_stack,
+        ng_generation_data_mapped,
+        left_on = 'Region',
+        right_on = 'NG_LCI_Region',
+        how='left'
+    )   
+    return ng_lci_mapped
+
+
+def map_ng_by_region (year):
+    """
+    Map the natural gas generation data by region.
+    This includes 6 regions: Pacific, Rocky Mountain, Southwest, Midwest, Southeast, and Northeast.
+
+    Notes
+    -----
+    * Downloads eia plant data for the specified year
+    * Filters the data to only include NG facilities and on positive fuel consumption
+    * Groups the data by Plant Id and aggregates the fuel consumption by summing the total fuel consumption
+    * Maps each plant to a region using the region_state_mapping dictionary
+
+    Parameters
+    ----------
+    year: int, str
+        The year of the eia923 plant data to use.
+
+    Returns
+    ----------
+    pandas.DataFrame
+        A dataframe with the natural gas generation data by region.
+    """
+    if isinstance(year, str):
+        year = int(year)
+    
+    eia_generation_data = eia923_download_extract(year)
+
+    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
+                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
+
+    ng_generation_data = eia_generation_data[column_filt]
+
+    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
+        {'Total Fuel Consumption MMBtu':'sum','State':'first'}).reset_index()
+    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
+
+    ng_generation_data_region = ng_generation_data.copy()
+
+    ng_generation_data_region['NG_LCI_Region'] = ng_generation_data['State'].map(region_state_mapping)
+    
+    return ng_generation_data_region
+
+
+def map_ng_by_basin (year):
+    """
+    Map the natural gas generation data by basin.
+
+    Notes
+    -----
+    * Downloads eia plant data for the specified year
+    * Filters the data to only include NG facilities and on positive fuel consumption
+    * maps each plant to a basin using the gas_supply_basin_mapping.csv file
+    
+    Parameters
+    ----------
+    year: int, str
+        The year of the eia923 plant data to use.
+
+    Returns
+    ----------
+    pandas.DataFrame
+        A dataframe with the natural gas generation data by region.
+    """
+    if isinstance(year, str):
+        year = int(year)
+    
+    # Get the EIA generation data for the specified year, this dataset includes
+    # the fuel consumption for generating electricity for each facility
+    # and fuel type. Filter the data to only include NG facilities and on
+    # positive fuel consumption. Group that data by Plant Id as it is possible
+    # to have multiple rows for the same facility and fuel based on different
+    # prime movers (e.g., gas turbine and combined cycle).
+
+    eia_generation_data = eia923_download_extract(year)
+
+    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
+                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
+    ng_generation_data = eia_generation_data[column_filt]
+
+    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
+        {'Total Fuel Consumption MMBtu':'sum'}).reset_index()
+    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
+
+    # Import the mapping file which has the source gas basin for each Plant Id.
+    # NOTE:
+    #   This is a 2 MB file that provides about 100 kB of info!
+    ng_basin_mapping = pd.read_csv(
+        os.path.join(data_dir, 'gas_supply_basin_mapping.csv')
+    )
+    subset_cols = ['Plant Code', 'NG_LCI_Name']
+    ng_basin_mapping = ng_basin_mapping[subset_cols]
+
+    # Merge with ng_generation dataframe.
+    ng_generation_data_basin = pd.merge(
+        left = ng_generation_data,
+        right = ng_basin_mapping,
+        left_on = 'Plant Id',
+        right_on = 'Plant Code'
+    )
+    ng_generation_data_basin = ng_generation_data_basin.drop(
+        columns=['Plant Code']
+    )
+    return ng_generation_data_basin
+
 def get_ng_lci(year):
     """
     Get the natural gas life cycle inventory for a given year.
@@ -288,7 +368,11 @@ def get_ng_lci(year):
         )
     else:
         data_folder = os.path.join(paths.local_path, 'netl')
-        #check if the ng_lci_2020rev1.csv already exists - if it does then we can skip all the below
+        # create new directory for ng if non existing
+        if not os.path.exists(os.path.join(data_folder,"2020_ng")):
+            os.makedirs(os.path.join(data_folder,"2020_ng"))
+        data_folder = os.path.join(data_folder,"2020_ng")
+        # check if the ng_lci_2020rev1.csv already exists - if it does then we can skip all the below
         if os.path.exists(os.path.join(data_folder, "ng_lci_2020rev1.csv")):
             logging.info(f"NG LCI already exists in your data directory.")
             ng_lci = pd.read_csv(
@@ -297,25 +381,25 @@ def get_ng_lci(year):
             )
         else:
             # if it does not exist, then we need to generate it
-            logging.info(f"Retrieving the {year} natural gas life cycle inventory by basin.")
+            logging.info(f"Retrieving the {year} natural gas life cycle inventory by region.")
             # this step will require downloading files from edx      
             # retrieve ng model
             # check if model is data_dir
-            if os.path.exists(os.path.join(data_folder, "ng_model_2020Rev1.xlsx")):
-                logging.info(f"NG model already exists in your data directory.")
-                excel_file_path = os.path.join(data_folder, "ng_model_2020Rev1.xlsx")
+            if not os.path.exists(os.path.join(data_folder,"2020_ng_model")):
+                os.makedirs(os.path.join(data_folder,"2020_ng_model"))
+                model_folder = os.path.join(data_folder,"2020_ng_model")
             else:
-                # download model from edx
-                logging.info(f"Downloading natural gas model from EDx.")
-                edx_api = model_specs.edx_api_key
-                r_id_ng_2020rev1 = 'cb8c8cf2-47ce-4ff0-b285-be73ba9294b9' 
-                # resource id of 2020 Rev1 ng model on EDx
-                try:
-                    download_edx(resource_id = r_id_ng_2020rev1, api_key = edx_api, output_dir = data_folder)
-                    excel_file_path = os.path.join(data_folder, "Appendix_F_2020_Full_Inventory_Results_US_Avg_ProdThruTrans.xlsx")
-                except Exception as e:
-                    logging.error(f"Error downloading natural gas model from EDx. Error: {e}")
-                    sys.exit(1)
+                model_folder = os.path.join(data_folder,"2020_ng_model")
+                for ngmodel in r_ids_2020.keys():
+                    if os.path.exists(os.path.join(model_folder, ngmodel)):
+                        logging.info(f"{ngmodel} already exists in your data directory.")
+                    else:
+                        logging.info(f"Downloading {ngmodel} from EDx.")
+                        try:
+                            download_edx(resource_id = r_ids_2020[ngmodel], api_key = model_specs.edx_api_key, output_dir = model_folder)
+                        except Exception as e:
+                            logging.error(f"Error downloading {ngmodel} from EDx. Error: {e}")
+                            sys.exit(1)
             # retrieve flow mapping document from edx [elci.csv]
             # check if flowmapping csv exists in data_dir
             if os.path.exists(os.path.join(data_folder, "elci.csv")):
@@ -326,16 +410,15 @@ def get_ng_lci(year):
                 logging.info(f"Downloading ELCI flow mapping document from EDx.")
                 r_id_elci = 'e2c8f934-e95e-470a-879b-17ebe4afd39e' # resource id of elci flow mapping document on EDx
                 try:
-                    download_edx(resource_id = r_id_elci, api_key = edx_api, output_dir = data_folder)
+                    download_edx(resource_id = r_id_elci, api_key = model_specs.edx_api_key, output_dir = data_folder)
                     flow_mapping_path = os.path.join(data_folder, "elci.csv")  
                 except Exception as e:
                     logging.error(f"Error downloading ELCI flow mapping document from EDx. Error: {e}")
                     sys.exit(1)
-            # production sheet name
-            production_sheet_name = '2020 Production Shares'
+
             # run the generate_ng_lci function and save it in data_dir
             try:
-                generate_lci (technobasins_basins, excel_file_path, flow_mapping_path, production_sheet_name, destination_path = data_folder, final_table_name = "ng_lci_2020rev1")
+                generate_lci (excel_folder_path = model_folder, flow_mapping_path = flow_mapping_path, destination_path = data_folder, final_table_name = "ng_lci_2020rev1")
                 ng_lci = pd.read_csv(
                     os.path.join(data_folder, "ng_lci_2020rev1.csv"),
                     index_col=[0,1,2,3,4,5]
@@ -345,14 +428,13 @@ def get_ng_lci(year):
                 sys.exit(1)
     return ng_lci
 
-def generate_lci(technobasins_basins, excel_file_path, flow_mapping_path, production_sheet_name, destination_path, final_table_name):
+def generate_lci(excel_folder_path, flow_mapping_path, destination_path, final_table_name):
     """
     This function reads an excel file, extracts the data, and generates a LCI for NG with the same format as the currently used file.
 
     Args:
-        technobasins_basins (dict): A dictionary that maps technobasins to basins
-        excel_file_path (str): The path to the excel file
-        production_sheet_name (str): The name of the sheet that contains the production shares
+        excel_folder_path (str): The path to the folder containing the excel files (ng models/inventories)
+        flow_mapping_path (str): The path to the flow mapping file
         destination_path (str): !!This is an optional input!! 
                                 The path to the destination folder. If not provided, the function 
                                 will save the file in the current working directory.
@@ -363,51 +445,32 @@ def generate_lci(technobasins_basins, excel_file_path, flow_mapping_path, produc
         final_table (pd.DataFrame): A dataframe with the LCI for NG with the same format as the currently used file.
 
     Notes:
-        - The function is senstive to the naming convention of the technobasins in the excel file.
-        - The current naming convention is: 'FI - <basin> <type>'. 
-        - Specifically, the current script is set up for the following sheet names:
-            - 'FI - App Shale', 'FI - Alaska Offshore', 'FI - Anadarko Conv', 'FI - Anadarko Shale', 'FI - Anadarko Tight', 
-            'FI - Arkla Conv', 'FI - Arkla Shale', 'FI - Arkla Tight', 'FI - Arkoma Conv', 'FI - Arkoma Shale', 'FI - East Texas Conv', 
-            'FI - East Texas Shale', 'FI - East Texas Tight', 'FI - Fort Worth Shale', 'FI - Green River Conv', 'FI - Green River Tight', 
-            'FI - Gulf Conv', 'FI - Gulf Shale', 'FI - Gulf TIght', 'FI - Permian Conv', 'FI - Permian Shale', 'FI - Piceance Tight', 
-            'FI - San Juan CBM', 'FI - San Juan Shale', 'FI - South OK Shale', 'FI - Strawn Shale', 'FI - Uinta Conv', 'FI - Uinta Tight', 
-            'FI - GoM Offshore'
+        - The function is senstive to the naming convention of the regions in the excel file.
     """
-    # 0. Develop dictionary for basin, technobasins, and production shares
-    technobasins_basins = final_dictionary (technobasins_basins, excel_file_path, production_sheet_name)
-    print(technobasins_basins)
-
     final_table = pd.DataFrame()
 
-    # 1. Read excel file
-    input_data = pd.ExcelFile(excel_file_path)
-    sheet_names = input_data.sheet_names
-    sheet_names = [name for name in sheet_names if name.startswith("FI")]
-    sheet_names = sheet_names[1:] # Drop the US Average sheet
-
-    # Get unused ground and water emissions based on average US emissions "FI - US Average"
-    unused_ground_emissions, unused_water_emissions = get_unused_flows(excel_file_path, "FI - US Average")
+    # determine folder path containing the excel files
+    
+    # 1. Read excel files in the folder path containing the model
+    for filename in os.listdir(excel_folder_path):
+        if filename.endswith('.xlsx'):
+            file_path = os.path.join(excel_folder_path, filename)
+            logging.info(f"Reading file: {file_path}")
+            input_data = pd.ExcelFile(file_path)
+            sheet_names = input_data.sheet_names
+            sheet_name = [name for name in sheet_names if name in region_sheets_dict.values()][0]
 
-    for sheet in sheet_names:
         # Extract air, water, and ground emissions data for the selected sheet (i.e., technobasin)
-        air_emissions_data, water_emissions_data, ground_emissions_data = read_technobasin_data(excel_file_path, sheet)
+        air_emissions_data, water_emissions_data, ground_emissions_data = read_region_data(file_path, sheet_name)
         
         # Air emissions Get the correct flow names, compartment, and uuid for each flow
         full_air_emissions_data = correct_netl_flow_names(air_emissions_data, flow_mapping_path)
         full_air_emissions_data = full_air_emissions_data[full_air_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
         
-        # Water emissions - drop unused flows
-        if unused_water_emissions is not None:
-            for flow in unused_water_emissions['FlowName']:
-                water_emissions_data = water_emissions_data.drop(water_emissions_data[water_emissions_data['FlowName'] == flow].index)        
         # Water emissions - get the correct flow names, compartment, and uuid for each flow
         full_water_emissions_data = correct_netl_flow_names(water_emissions_data, flow_mapping_path)
         full_water_emissions_data = full_water_emissions_data[full_water_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
         
-        # Ground emissions - drop unused flows
-        if unused_ground_emissions is not None:
-            for flow in unused_ground_emissions['FlowName']:
-                ground_emissions_data = ground_emissions_data.drop(ground_emissions_data[ground_emissions_data['FlowName'] == flow].index)
         # Ground emissions - get the correct flow names, compartment, and uuid for each flow
         full_ground_emissions_data = correct_netl_flow_names(ground_emissions_data, flow_mapping_path)
         full_ground_emissions_data = full_ground_emissions_data[full_ground_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
@@ -415,13 +478,9 @@ def generate_lci(technobasins_basins, excel_file_path, flow_mapping_path, produc
         # combine dataframes
         df1 = pd.concat([full_air_emissions_data, full_water_emissions_data, full_ground_emissions_data])
         df1 = df1.sort_values(by='FlowUUID') # sort by FlowUUID
-        basin_name = find_basin (technobasins_basins, sheet)
+        region = [key for key, v in region_sheets_dict.items() if v == sheet_name][0]
         df1['FlowAmount'] = df1['FlowAmount'].astype(float)
         df1['FlowAmount'] = df1['FlowAmount'].fillna(0)
-        norm_value = get_normalized_values(technobasins_basins, sheet)
-        df1['norm'] = norm_value
-        df1['norm'] = df1['norm'].astype(float)
-        df1['normalized_emissions'] = df1['FlowAmount'] * df1['norm']
 
         # create final_table structure in 1st iteration
         if final_table.empty:
@@ -432,66 +491,25 @@ def generate_lci(technobasins_basins, excel_file_path, flow_mapping_path, produc
             final_table = final_table[['Compartment', 'FlowName', 'FlowUUID', 'Unit', 'flow_type', 'input']]
             final_table.columns = ['compartment', 'flow_name', 'uuid', 'unit', 'flow_type', 'is_input']
             # add a column for each basin
-            basins_columns = list (technobasins_basins.keys())
-            for basin in basins_columns:
-                final_table[basin] = 0
-        final_table.head()
-        final_table.shape
+            region_columns = list (region_sheets_dict.keys())
+            for r in region_columns:
+                final_table[r] = 0
         
-        # Compute normalized emissions and add to final table   
+        # add region emissions to final table   
         try:
-            final_table['normalized_emissions'] = df1['normalized_emissions'].values
-            final_table[basin_name] += final_table['normalized_emissions']
-            final_table = final_table.drop(columns=['normalized_emissions'])
+            logging.info(f"Adding emissions for {region}")
+            logging.info(f"df1: {df1['FlowAmount'].head(5)}")
+            final_table[region] = df1['FlowAmount']
         except Exception as e:
             sys.exit(f"Error reading sheet. Make sure your excel file follows the correct naming convention.For reference, refer to the source code, lines 70-78. Error: {e}")
 
-
     # 2. Save final table to excel
     save_ng_lci(final_table, final_table_name ,destination_path)
-    print(f"Final table saved to {destination_path}/{final_table_name}.csv")
+    print(f"Final table saved to {destination_path}/{final_table_name}.xlsx")
     
     return final_table
 
-def get_unused_flows(excel_file_path, sheet_name):
-    """
-    This function extracts the unused ground and water emissions from a given natural gas results dataset
-
-    Inputs:
-    - excel_file_path: path to the excel file
-    - sheet_name: name of the sheet to extract the data from
-
-    Outputs:
-    - unused_ground_emissions: dataframe containing the unused ground emissions
-    - unused_water_emissions: dataframe containing the unused water emissions
-    """
-    us_average_data = pd.read_excel(excel_file_path, sheet_name=sheet_name,skiprows=0,header=None) 
-    us_average_data.iloc[0] = us_average_data.iloc[0].ffill()
-    us_average_data.iloc[1] = us_average_data.iloc[1].ffill()
-    us_average_data.columns = us_average_data.iloc[2]
-    us_average_data = us_average_data.drop(columns=["P2.5", "P97.5"])
-    us_average_data.columns = us_average_data.iloc[0]
-    us_average_data = us_average_data.drop(us_average_data.index[0])
-    #extract ground data from us_average sheet
-    ground_emissions_data = us_average_data.iloc[:, [us_average_data.shape[1]-3, us_average_data.shape[1]-2]]
-    ground_emissions_data.columns.values[0] = "FlowName"
-    ground_emissions_data.columns.values[1] = "FlowAmount"
-    ground_emissions_data = ground_emissions_data.dropna()
-    ground_emissions_data = ground_emissions_data.iloc[1:]
-    #extract water data from us_average sheet
-    water_emissions_data = us_average_data.iloc[:, [us_average_data.shape[1]-3, us_average_data.shape[1]-1]]
-    water_emissions_data.columns.values[0] = "FlowName"
-    water_emissions_data.columns.values[1] = "FlowAmount"
-    water_emissions_data = water_emissions_data.iloc[2:]
-    water_emissions_data = water_emissions_data.dropna()
-    #unused ground emissions
-    unused_ground_emissions = ground_emissions_data[ground_emissions_data['FlowAmount'] == 0.00e+00]
-    #unused water emissions
-    unused_water_emissions = water_emissions_data[water_emissions_data['FlowAmount'] == 0.00e+00]
-
-    return unused_ground_emissions, unused_water_emissions
-
-def read_technobasin_data(excel_file_path, sheet_name):
+def read_region_data(excel_file_path, sheet_name):
     """
     This function reads an excel file, extracts the data, and generates a df for NG emissions for air, water, and ground.
     The df includes the flow name and flow amount (P2.5 and P97.5 values are dropped).
@@ -549,47 +567,6 @@ def read_technobasin_data(excel_file_path, sheet_name):
 
     return air_emissions_data, water_emissions_data, ground_emissions_data
 
-
-# Helper function to calculate normalized values for each technobasin
-def get_normalized_values(technobasins_basins, technobasin):
-    for outer, inner in technobasins_basins.items():
-        if technobasin in inner:
-            total = sum(inner.values())
-            return float(inner[technobasin] / total)
-    return None
-
-# helper function to find basins for a given technobasin
-def find_basin(technobasins_basins, technobasin_name):
-    for outer, inner in technobasins_basins.items():
-        if technobasin_name in inner:
-            return outer
-    return None
-
-# Helper function to use aliases to normalize technobasin naming
-def _normalize_technobasin_naming(name):
-    name_lower = name.lower().strip()
-    
-    # Check exact or partial match
-    for alias, canonical in aliases.items():
-        alias_clean = alias.lower()
-        if name_lower in alias_clean or alias_clean in name_lower:
-            return canonical
-
-# Helper function to create the final dictionary including basin, technobasin, and production share
-def final_dictionary(technobasins_basins, excel_file_path, production_sheet_name):
-    production_shares_2020 = pd.read_excel(excel_file_path, sheet_name=production_sheet_name)
-    production_shares_2020 = production_shares_2020.iloc[1:]
-    production_shares_2020['Scenario Normalized'] = production_shares_2020['Scenario'].apply(lambda x: _normalize_technobasin_naming(x))
-    production_shares_2020 = production_shares_2020.drop(columns=production_shares_2020.columns[0])
-    production_shares_2020.columns.values[1] = 'Scenario'
-    production_shares_2020 = production_shares_2020[['Scenario', 'Production Shares (%)']]
-    # final dictionary including basin, technobasin, and production share
-    technobasins_basins = {
-        key: {num: production_shares_2020.set_index('Scenario').loc[num, 'Production Shares (%)'] for num in nums}
-        for key, nums in technobasins_basins.items()
-    }
-    return technobasins_basins
-
 def save_ng_lci(df, filename, destination_path):
     """
     This function saves the final table to an excel file.

From 2726c60d16c2b802c76fe37f8c3815028cac9742 Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Mon, 20 Oct 2025 14:14:40 -0400
Subject: [PATCH 08/24] Minor edits to natural_gas_upstream.py

---
 electricitylci/natural_gas_upstream.py | 37 ++++++++++++--------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
index 92dd9a95..3c825ded 100644
--- a/electricitylci/natural_gas_upstream.py
+++ b/electricitylci/natural_gas_upstream.py
@@ -19,6 +19,7 @@
 from electricitylci.model_config import model_specs
 from electricitylci.utils import download_edx
 from electricitylci.globals import paths
+from electricitylci.utils import check_output_dir
 ##############################################################################
 # MODULE DOCUMENTATION
 ##############################################################################
@@ -61,8 +62,8 @@
     'MO':'Midwest','NE':'Midwest','SD':'Midwest','IL':'Midwest','IN':'Midwest','OH':'Midwest','WI':'Midwest','MI':'Midwest',
     'AR':'Southeast','LA':'Southeast','AL':'Southeast','FL':'Southeast','GA':'Southeast','MS':'Southeast','SC':'Southeast','KY':'Southeast',
     'NC':'Southeast','TN':'Southeast','VA':'Southeast','WV':'Southeast','DE':'Southeast','MD':'Southeast','CT':'Northeast','MA':'Northeast',
-    'NH':'Northeast','RI':'Northeast','VT':'Northeast','NJ':'Northeast','NY':'Northeast','PA':'Northeast','ME':'Northeast',
-} #TOTAL 48 -- EXCLUDING AL, HI, AND DC
+    'NH':'Northeast','RI':'Northeast','VT':'Northeast','NJ':'Northeast','NY':'Northeast','PA':'Northeast','ME':'Northeast', 'DC':'Northeast',
+} #TOTAL 48 -- EXCLUDING AK and HI
 
 ##############################################################################
 # MAN FUNCTION
@@ -153,7 +154,7 @@ def generate_upstream_ng(year):
         ng_lci_mapped["Compartment"].str.contains("Technosphere/"),
         "ElementaryFlowPrimeContext"] = "technosphere"
     # Issue #296 - adding DQI information for upstream processes
-    ng_lci_mapped["Year"] = 2016
+    ng_lci_mapped["Year"] = model_specs.ng_model_year
     ng_lci_mapped["DataReliability"] = 3
     ng_lci_mapped["TemporalCorrelation"] = add_temporal_correlation_score(
         ng_lci_mapped["Year"], model_specs.electricity_lci_target_year
@@ -369,8 +370,7 @@ def get_ng_lci(year):
     else:
         data_folder = os.path.join(paths.local_path, 'netl')
         # create new directory for ng if non existing
-        if not os.path.exists(os.path.join(data_folder,"2020_ng")):
-            os.makedirs(os.path.join(data_folder,"2020_ng"))
+        check_output_dir(os.path.join(data_folder,"2020_ng"))
         data_folder = os.path.join(data_folder,"2020_ng")
         # check if the ng_lci_2020rev1.csv already exists - if it does then we can skip all the below
         if os.path.exists(os.path.join(data_folder, "ng_lci_2020rev1.csv")):
@@ -385,21 +385,18 @@ def get_ng_lci(year):
             # this step will require downloading files from edx      
             # retrieve ng model
             # check if model is data_dir
-            if not os.path.exists(os.path.join(data_folder,"2020_ng_model")):
-                os.makedirs(os.path.join(data_folder,"2020_ng_model"))
-                model_folder = os.path.join(data_folder,"2020_ng_model")
-            else:
-                model_folder = os.path.join(data_folder,"2020_ng_model")
-                for ngmodel in r_ids_2020.keys():
-                    if os.path.exists(os.path.join(model_folder, ngmodel)):
-                        logging.info(f"{ngmodel} already exists in your data directory.")
-                    else:
-                        logging.info(f"Downloading {ngmodel} from EDx.")
-                        try:
-                            download_edx(resource_id = r_ids_2020[ngmodel], api_key = model_specs.edx_api_key, output_dir = model_folder)
-                        except Exception as e:
-                            logging.error(f"Error downloading {ngmodel} from EDx. Error: {e}")
-                            sys.exit(1)
+            check_output_dir(os.path.join(data_folder,"2020_ng_model"))
+            model_folder = os.path.join(data_folder,"2020_ng_model")
+            for ngmodel in r_ids_2020.keys():
+                if os.path.exists(os.path.join(model_folder, ngmodel)):
+                    logging.info(f"{ngmodel} already exists in your data directory.")
+                else:
+                    logging.info(f"Downloading {ngmodel} from EDx.")
+                    try:
+                        download_edx(resource_id = r_ids_2020[ngmodel], api_key = model_specs.edx_api_key, output_dir = model_folder)
+                    except Exception as e:
+                        logging.error(f"Error downloading {ngmodel} from EDx. Error: {e}")
+                        sys.exit(1)
             # retrieve flow mapping document from edx [elci.csv]
             # check if flowmapping csv exists in data_dir
             if os.path.exists(os.path.join(data_folder, "elci.csv")):

From e739bde68d09b98a1d5224e86ffc418007de617e Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Fri, 7 Nov 2025 11:33:02 -0500
Subject: [PATCH 09/24] updated yaml documentation

---
 electricitylci/data/process_metadata.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/electricitylci/data/process_metadata.yml b/electricitylci/data/process_metadata.yml
index 270d265a..27b3bf37 100644
--- a/electricitylci/data/process_metadata.yml
+++ b/electricitylci/data/process_metadata.yml
@@ -860,12 +860,12 @@ gas_upstream:
     - 'The cradle-to-gate inventory for production of gas aggregated to basin. '
 
   techno_process: &gas_upstream_techno_process
-    - 'The NETL natural gas life cycle model includes parameters to generate inventories for natural gas extraction based on basin and geology which determines the gas extraction type (e.g., Appalachian Shale using hydraulic fracturing).
-      2016 natural gas production then informs the amount of each type of technology/region that form the mix in the regions.
+    - 'The NETL natural gas life cycle model includes parameters to generate inventories for natural gas extraction based on region or basin and geology which determines the gas extraction type (e.g., Appalachian Shale using hydraulic fracturing).
+      2016 or 2020 natural gas production then informs the amount of each type of technology/region that form the mix in the regions, depending on the year selected in the model configuration.
       These can be further aggregated to a US average.
       More details are in the natural gas upstream report at the link below
-
-      https://www.netl.doe.gov/energy-analysis/details?id=3198'
+      2016: https://www.netl.doe.gov/energy-analysis/details?id=4f43cb3f-c0d7-482e-bf01-39995a7c7497
+      2020: https://www.netl.doe.gov/energy-analysis/details?id=546d4009-c43b-43f5-bcc9-64d5e63fc8d5
 
   Description:
   - *gas_upstream_techno_intro

From 93ac54971357059c92c8b9c5e0735ea9b53bcc35 Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Fri, 7 Nov 2025 11:49:02 -0500
Subject: [PATCH 10/24] yaml documentation quickfix

---
 electricitylci/data/process_metadata.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/electricitylci/data/process_metadata.yml b/electricitylci/data/process_metadata.yml
index 27b3bf37..f409add7 100644
--- a/electricitylci/data/process_metadata.yml
+++ b/electricitylci/data/process_metadata.yml
@@ -866,6 +866,7 @@ gas_upstream:
       More details are in the natural gas upstream report at the link below
       2016: https://www.netl.doe.gov/energy-analysis/details?id=4f43cb3f-c0d7-482e-bf01-39995a7c7497
       2020: https://www.netl.doe.gov/energy-analysis/details?id=546d4009-c43b-43f5-bcc9-64d5e63fc8d5
+      '
 
   Description:
   - *gas_upstream_techno_intro

From e7f5997f37cf91caa5a4f591bf421d1a864f8413 Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Fri, 7 Nov 2025 14:40:05 -0500
Subject: [PATCH 11/24] process metadata - Yaml file minor edit

---
 electricitylci/data/process_metadata.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/electricitylci/data/process_metadata.yml b/electricitylci/data/process_metadata.yml
index f409add7..9cc8f040 100644
--- a/electricitylci/data/process_metadata.yml
+++ b/electricitylci/data/process_metadata.yml
@@ -857,15 +857,15 @@ coal_upstream:
 gas_upstream:
 
   techno_intro: &gas_upstream_techno_intro
-    - 'The cradle-to-gate inventory for production of gas aggregated to basin. '
+    - 'The cradle-to-gate inventory for production of gas aggregated to basin or region, depending on the year selected in the model configuration. '
 
   techno_process: &gas_upstream_techno_process
     - 'The NETL natural gas life cycle model includes parameters to generate inventories for natural gas extraction based on region or basin and geology which determines the gas extraction type (e.g., Appalachian Shale using hydraulic fracturing).
       2016 or 2020 natural gas production then informs the amount of each type of technology/region that form the mix in the regions, depending on the year selected in the model configuration.
       These can be further aggregated to a US average.
-      More details are in the natural gas upstream report at the link below
-      2016: https://www.netl.doe.gov/energy-analysis/details?id=4f43cb3f-c0d7-482e-bf01-39995a7c7497
-      2020: https://www.netl.doe.gov/energy-analysis/details?id=546d4009-c43b-43f5-bcc9-64d5e63fc8d5
+      More details are in the natural gas upstream report at the following links.
+      Link for 2016: https://www.netl.doe.gov/energy-analysis/details?id=4f43cb3f-c0d7-482e-bf01-39995a7c7497
+      Link for 2020: https://www.netl.doe.gov/energy-analysis/details?id=546d4009-c43b-43f5-bcc9-64d5e63fc8d5
       '
 
   Description:

From d2496dcaddfefd41df9f04d617e6c5ed06811014 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Fri, 21 Nov 2025 16:28:06 -0500
Subject: [PATCH 12/24] fix spacing

---
 electricitylci/natural_gas_upstream.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
index d05303b9..67f14db2 100644
--- a/electricitylci/natural_gas_upstream.py
+++ b/electricitylci/natural_gas_upstream.py
@@ -16,6 +16,8 @@
 import electricitylci.PhysicalQuantities as pq
 from electricitylci.generation import add_temporal_correlation_score
 from electricitylci.model_config import model_specs
+
+
 ##############################################################################
 # MODULE DOCUMENTATION
 ##############################################################################

From 85c7fa3117caacbdb2cfebb6eb16e843697c92b4 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Mon, 24 Nov 2025 14:09:16 -0500
Subject: [PATCH 13/24] update stewicombo file for latest data; addresses #275

decisions on where the stewicombo file will live is in discussion; stewi files should be available on EPA data commons
---
 electricitylci/modelconfig/ELCI_2023_config.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml
index 4182d437..c84b2337 100644
--- a/electricitylci/modelconfig/ELCI_2023_config.yml
+++ b/electricitylci/modelconfig/ELCI_2023_config.yml
@@ -32,9 +32,9 @@ replace_egrid: true
 coal_model_year: 2023
 
 # NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
+# This is used to determine which NG inventory to use.
+# This impacts the upstream ng emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
 # Select between 2016 and 2020 model inventories.
 ng_model_year: 2020
 
@@ -64,7 +64,7 @@ inventories_of_interest:
   TRI: 2023
   NEI: 2020
   RCRAInfo: 2023
-stewicombo_file: 'ELCI_2023_v1.2.0_d3bb4db'
+stewicombo_file: 'ELCI_2023_v1.2.1_3687292'
 
 # Provide uncertainty estimates for emissions.
 calculate_uncertainty: true

From e0ff84b4782c01b4ae985cd723d7a5b10808e3df Mon Sep 17 00:00:00 2001
From: Francis Hanna <hannafra@msu.edu>
Date: Mon, 24 Nov 2025 17:45:12 -0500
Subject: [PATCH 14/24] HOTFIX - t&d download links

---
 electricitylci/eia_trans_dist_grid_loss.py    | 23 +++++++++++++++----
 .../modelconfig/ELCI_2023_config.yml          |  2 +-
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py
index 3a7ac4f0..2b99d62d 100644
--- a/electricitylci/eia_trans_dist_grid_loss.py
+++ b/electricitylci/eia_trans_dist_grid_loss.py
@@ -114,11 +114,19 @@ def eia_trans_dist_download_extract(year):
                 + "/xls/"
                 + filename
             )
-            # bugfix: url for year 2023 [FH]
-            # this has to be updated later when 2023 data gets archived 
-            # and links should be rechecked for compatibility with 2024 data (when released)
+            # bugfix: url for year 2024 [FH]
             url_c = (
                 "https://www.eia.gov/electricity/state/"
+                + "/xls/"
+                + "SEP Tables for "
+                + STATE_ABBREV[key].upper()
+                + ".xlsx"
+            )
+            # HOTFIX: url for newly archived year 2023 [FH] 
+            url_d = (
+                "https://www.eia.gov/electricity/state/archive/"
+                + year
+                + "/"
                 + key.replace(" ", "")
                 + "/xls/"
                 + "SEP Tables for "
@@ -127,11 +135,16 @@ def eia_trans_dist_download_extract(year):
             )           
             # HOTFIX: https://github.com/USEPA/ElectricityLCI/issues/235
             #adding 20s timeout to avoid long delays due to server issues.
-            # bugfix: added condition to account for the 2023 data link format [FH]
-            if year == "2023":
+            # HOTFIX: url to be used for year 2024 (for future reference) [FH]
+            if year == "2024":
                 r = requests.get(url_c, timeout=20)
                 with open (filename, "wb") as f:
                     f.write(r.content)
+            # HOTFIX: use url for newly archived year 2023 [FH] 
+            elif year == "2023":
+                r = requests.get(url_d, timeout=20)
+                with open (filename, "wb") as f:
+                    f.write(r.content)
             else:
                 r = requests.get(url_a, timeout=20)
                 r_head = r.headers.get("Content-Type", "")
diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml
index 4182d437..d1bc67e0 100644
--- a/electricitylci/modelconfig/ELCI_2023_config.yml
+++ b/electricitylci/modelconfig/ELCI_2023_config.yml
@@ -64,7 +64,7 @@ inventories_of_interest:
   TRI: 2023
   NEI: 2020
   RCRAInfo: 2023
-stewicombo_file: 'ELCI_2023_v1.2.0_d3bb4db'
+stewicombo_file: 'ELCI_2023_v1.2.1_3687292'
 
 # Provide uncertainty estimates for emissions.
 calculate_uncertainty: true

From ecbea96118bcd39ac19688bf9a39c6a3edf628be Mon Sep 17 00:00:00 2001
From: Francis Hanna <91334875+frankhanna94@users.noreply.github.com>
Date: Tue, 25 Nov 2025 10:02:49 -0500
Subject: [PATCH 15/24] Fix URL construction for year 2024

---
 electricitylci/eia_trans_dist_grid_loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py
index 2b99d62d..b9b06938 100644
--- a/electricitylci/eia_trans_dist_grid_loss.py
+++ b/electricitylci/eia_trans_dist_grid_loss.py
@@ -117,7 +117,7 @@ def eia_trans_dist_download_extract(year):
             # bugfix: url for year 2024 [FH]
             url_c = (
                 "https://www.eia.gov/electricity/state/"
-                + "/xls/"
+                + "xls/"
                 + "SEP Tables for "
                 + STATE_ABBREV[key].upper()
                 + ".xlsx"

From 8d0e1e02d60ea3320a804a3efc2a2b9c90df9dcd Mon Sep 17 00:00:00 2001
From: Francis Hanna <91334875+frankhanna94@users.noreply.github.com>
Date: Tue, 25 Nov 2025 10:52:08 -0500
Subject: [PATCH 16/24] Update t&d links setup following TWD comment

---
 electricitylci/eia_trans_dist_grid_loss.py | 72 ++++------------------
 1 file changed, 13 insertions(+), 59 deletions(-)

diff --git a/electricitylci/eia_trans_dist_grid_loss.py b/electricitylci/eia_trans_dist_grid_loss.py
index b9b06938..35c2e4e0 100644
--- a/electricitylci/eia_trans_dist_grid_loss.py
+++ b/electricitylci/eia_trans_dist_grid_loss.py
@@ -99,68 +99,22 @@ def eia_trans_dist_download_extract(year):
         filename = f"{STATE_ABBREV[key]}.xlsx"
         if not os.path.exists(filename):
             logging.info(f"Downloading archive data for {STATE_ABBREV[key]}")
-            # HOTFIX: URLs for two-word states have space omitted.
-            url_a = (
-                "https://www.eia.gov/electricity/state/archive/"
-                + year
-                + "/"
-                + key.replace(" ", "")
-                + "/xls/"
-                + filename
-            )
-            url_b = (
-                "https://www.eia.gov/electricity/state/"
-                + key.replace(" ", "")
-                + "/xls/"
-                + filename
-            )
-            # bugfix: url for year 2024 [FH]
-            url_c = (
-                "https://www.eia.gov/electricity/state/"
-                + "xls/"
-                + "SEP Tables for "
-                + STATE_ABBREV[key].upper()
-                + ".xlsx"
-            )
-            # HOTFIX: url for newly archived year 2023 [FH] 
-            url_d = (
-                "https://www.eia.gov/electricity/state/archive/"
-                + year
-                + "/"
-                + key.replace(" ", "")
-                + "/xls/"
-                + "SEP Tables for "
-                + STATE_ABBREV[key].upper()
-                + ".xlsx"
-            )           
+
+            url_key = key.replace(" ", "")
+            url = f"https://www.eia.gov/electricity/state/archive/{year}/{url_key}/xls/"
+            if int(year) > 2023:
+                url = url.replace(f"/archive/{year}/{url_key}/", "/")
+                url += "SEP%20Tables%20for%20" + f"{STATE_ABBREV[key].upper()}.xlsx"
+            elif int(year) == 2023:
+                url += "SEP%20Tables%20for%20" + f"{STATE_ABBREV[key].upper()}.xlsx"
+            else:
+                url += f"{STATE_ABBREV[key]}.xlsx"
+            
+            r = requests.get(url, timeout=20)    
             # HOTFIX: https://github.com/USEPA/ElectricityLCI/issues/235
             #adding 20s timeout to avoid long delays due to server issues.
-            # HOTFIX: url to be used for year 2024 (for future reference) [FH]
-            if year == "2024":
-                r = requests.get(url_c, timeout=20)
-                with open (filename, "wb") as f:
-                    f.write(r.content)
-            # HOTFIX: use url for newly archived year 2023 [FH] 
-            elif year == "2023":
-                r = requests.get(url_d, timeout=20)
-                with open (filename, "wb") as f:
+            with open (filename, "wb") as f:
                     f.write(r.content)
-            else:
-                r = requests.get(url_a, timeout=20)
-                r_head = r.headers.get("Content-Type", "")
-                if not r.ok or r_head.startswith("text"):
-                    logging.info(f"Trying alternative site {STATE_ABBREV[key]}")
-                    #adding 20s timeout to avoid long delays due to server issues.
-                    r = requests.get(url_b, timeout=20)
-                    r_head = r.headers.get("Content-Type", "")
-
-                if r.ok and not r_head.startswith("text"):
-                    with open(filename, 'wb') as f:
-                        f.write(r.content)
-                else:
-                    logging.error(
-                        f"No TD loss data for {STATE_ABBREV[key]} {year}")
-
         try:
             df = pd.read_excel(
                 filename,

From 5bbbcdd20065546cb6aa1944711b28d1c6b70964 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:19:17 -0500
Subject: [PATCH 17/24] add version 2.1 config options

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 6c883a10..c1f220c1 100644
--- a/README.md
+++ b/README.md
@@ -123,6 +123,9 @@ The `main()` method has four steps:
         * ELCI_2020
         * ELCI_2021
         * ELCI_2022
+    - Version 2.1 baselines include:
+        * ELCI_2023
+        * ELCI_2024
     - These configurations statically change the module, model_config.py, which is an object read by other modules.
     - To change configuration values, edit the YAML before running the code.
 2. `run_generation()`

From 7cbabd4f3e547aa216f50f5a7bfcd197d48bbba3 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:21:52 -0500
Subject: [PATCH 18/24] add ELCI_2023 run to comments

---
 electricitylci/egrid_emissions_and_waste_by_facility.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/electricitylci/egrid_emissions_and_waste_by_facility.py b/electricitylci/egrid_emissions_and_waste_by_facility.py
index 337824ec..f656e71a 100644
--- a/electricitylci/egrid_emissions_and_waste_by_facility.py
+++ b/electricitylci/egrid_emissions_and_waste_by_facility.py
@@ -112,6 +112,7 @@ def get_combined_stewicombo_file(model_specs):
     emissions_and_wastes_by_facility = get_combined_stewicombo_file(
         model_config)
     len(emissions_and_wastes_by_facility)
+    # for 'ELCI_2023': 90238 [251125; TWD]
     # for 'ELCI_2020': 88005 [250416; TWD]
     # for 'ELCI_1': 106284 (recorded as 88310 [250416;TWD])
 

From f3135658a6488d885949446820b8708c2c9f232c Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:22:40 -0500
Subject: [PATCH 19/24] fixes to inline comments and formatting

---
 electricitylci/combinator.py | 12 ++++++------
 electricitylci/generation.py |  4 +++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/electricitylci/combinator.py b/electricitylci/combinator.py
index a7029b3d..6942eb02 100644
--- a/electricitylci/combinator.py
+++ b/electricitylci/combinator.py
@@ -254,7 +254,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
     # Hoping to reduce memory usage or at least make more of it available
     # for the later groupby.
     del(arg)
-    # See https://github.com/USEPA/fedelemflowlist
+    # See https://github.com/FLCAC-admin/fedelemflowlist
     # The mapping data includes a conversion factor to convert everything into
     # standard units (e.g., kg, MJ, m2*a). Note that 'SourceFlowContext' is
     # already in lowercase letters, which is why no change happens below.
@@ -415,7 +415,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
     # columns to be converted to objects, so undoing that here
     for col in actual_quant_columns:
         upstream_mapped_df[col]=upstream_mapped_df[col].astype(float)
-    
+
     # Preserve unmapped resource flows;
     #   copy over the flow name, compartment and units and
     #   set conversion factor equal to 1.0.
@@ -492,7 +492,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
         "Source",
         "Year",
     ] + actual_quant_columns
-    
+
     if "input" in upstream_columns:
         final_columns = final_columns + ["input"]
 
@@ -587,9 +587,9 @@ def concat_clean_upstream_and_plant(pl_df, up_df):
     )
     # 3/19/2025 MBJ: more memory management. When this process is called from
     # __init__.combine_upstream_and_gen_df the up_df is 12GB big. Previously
-    # we used a merge to add all the regional columns, but that requires a 
+    # we used a merge to add all the regional columns, but that requires a
     # tremendous amount of memory. Invidually assigning columns will be a bit
-    # slower but will greatly reduce memory usage...and ultimately end up 
+    # slower but will greatly reduce memory usage...and ultimately end up
     # faster if your computer tends to run out of memory using the previous
     # merge.
     for col in existing_region_cols:
@@ -600,7 +600,7 @@ def concat_clean_upstream_and_plant(pl_df, up_df):
     # 3/19/2025 - these would be instances where there is a plant_id in up_df
     # but not a matching eGRID_ID. With the new, by-column mapping performed above
     # eGRID_ID does not exist so no Nans to fill. In previous versions, I believe
-    # the use of fillnans with plant_id being the source would result in the 
+    # the use of fillnans with plant_id being the source would result in the
     # same thing as below.
     up_df['eGRID_ID'] = up_df['plant_id'].astype("int")
 
diff --git a/electricitylci/generation.py b/electricitylci/generation.py
index c8c9a302..6af3bcc4 100644
--- a/electricitylci/generation.py
+++ b/electricitylci/generation.py
@@ -929,7 +929,9 @@ def create_generation_process_df():
         # numbers and maps them to eGRID facility numbers.
         # NOTE: there are unmatched facilities that are found in FRS_bridge,
         # but not in EIA (e.g., EGRID, RCRA).
-        emissions_and_wastes_by_facility = get_combined_stewicombo_file(model_specs)
+        emissions_and_wastes_by_facility = get_combined_stewicombo_file(
+            model_specs
+        )
         ewf_df = pd.merge(
             left=emissions_and_wastes_by_facility,
             right=eia860_FRS,

From aa2cdb76cc45f4ed06799d4da5d70ff9b213c972 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:23:43 -0500
Subject: [PATCH 20/24] formatting and style fixes

---
 electricitylci/natural_gas_upstream.py | 435 +++++++++++++++++--------
 1 file changed, 294 insertions(+), 141 deletions(-)

diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
index 695ed4b7..07cd9848 100644
--- a/electricitylci/natural_gas_upstream.py
+++ b/electricitylci/natural_gas_upstream.py
@@ -29,17 +29,22 @@
 component of natural gas power plant operation (extraction, processing, and
 transportation) for every plant in EIA-923.
 
+TODO:
+- Make eLCI.csv resource a public submission.
+
 Created:
     2019-02-18
 Last updated:
-    2025-10-15
+    2025-11-25
 """
 __all__ = [
     "generate_upstream_ng",
 ]
 
-# Supporting Dicts
-# #######################################################################################################
+
+##############################################################################
+# GLOBALS
+##############################################################################
 region_sheets_dict = {
     'Pacific': 'FI - Pacific Delivery',
     'Rocky Mountain': 'FI - Rocky Mountain Delivery',
@@ -48,6 +53,7 @@
     'Southeast': 'FI - Southeast Delivery',
     'Northeast': 'FI - Northeast Delivery'
  }
+'''dict : Region names mapped to Excel workbook sheet names.'''
 
 r_ids_2020 = {
     'Appendix_F_2020_Full_Inventory_Results_Midwest_ProdThruTrans.xlsx':'5665de40-fc2b-4643-b647-ceec226af2bb',
@@ -57,6 +63,7 @@
     'Appendix_F_2020_Full_Inventory_Results_Southeast_ProdThruTrans.xlsx' :'4590712b-db21-4428-b488-6ded3b65d18b',
     'Appendix_F_2020_Full_Inventory_Results_Southwest_ProdThruTrans.xlsx':'9dd7a6e5-df1a-461e-87e7-0b9d8d600f26'
 }
+'''dict : Excel workbook file names mapped to EDX resource IDs.'''
 
 region_state_mapping = {
     'WA':'Pacific','CA':'Pacific','OR':'Pacific','MT':'Rocky Mountain','ID':'Rocky Mountain','CO':'Rocky Mountain','NV':'Rocky Mountain','UT':'Rocky Mountain','WY':'Rocky Mountain',
@@ -65,10 +72,12 @@
     'AR':'Southeast','LA':'Southeast','AL':'Southeast','FL':'Southeast','GA':'Southeast','MS':'Southeast','SC':'Southeast','KY':'Southeast',
     'NC':'Southeast','TN':'Southeast','VA':'Southeast','WV':'Southeast','DE':'Southeast','MD':'Southeast','CT':'Northeast','MA':'Northeast',
     'NH':'Northeast','RI':'Northeast','VT':'Northeast','NJ':'Northeast','NY':'Northeast','PA':'Northeast','ME':'Northeast', 'DC':'Northeast',
-} #TOTAL 48 -- EXCLUDING AK and HI
+}
+'''dict : U.S. state abbreviations mapped to region. Excludes AK and HI.'''
+
 
 ##############################################################################
-# MAN FUNCTION
+# FUNCTIONS
 ##############################################################################
 def generate_upstream_ng(year):
     """
@@ -98,28 +107,37 @@ def generate_upstream_ng(year):
     """
     logging.info("Generating natural gas inventory")
 
-    # get plant data and map each plant to its ng source: basin or region
-    # the 2016 ng emissions inventory is only available by basin
-    #   as such, plants can only be connected to upstream emissions via basin assignment
-    # newer data (2020) is available by region
-    #   plants are connected to upstream ng emissions via region assignment
+    # Get plant data and map each plant to its ng source: basin or region.
+    # The 2016 ng emissions inventory is only available by basin.
+    # As such, plants can only be connected to upstream emissions via basin
+    # assignment newer data (2020) is available by region plants are connected
+    # to upstream ng emissions via region assignment
 
+    # 'year' refers to eia_gen_year
     if model_specs.ng_model_year == 2016:
-        ng_generation_data_mapped = map_ng_by_basin(year) # 'year' refers to eia_generation_year
+        ng_generation_data_mapped = map_ng_by_basin(year)
     else:
-        ng_generation_data_mapped = map_ng_by_region(year) # 'year' refers to eia_generation_year
+        ng_generation_data_mapped = map_ng_by_region(year)
 
     # Read the NG LCI file
-    # if year = 2016 - this step will directly ready NG_LCI.csv from the data_dir - returns lci (by basin)
-    # if year = 2020 - this step will require edx api, download ng model and mapping - returns lci (by region)
-    # document from edx, and generate lci
+    # If year = 2016
+    # - this step will directly ready NG_LCI.csv from the data_dir
+    # - returns lci (by basin)
+    # If year = 2020
+    # - this step will require edx api, download ng model and mapping
+    # - returns lci (by region)
+    # Document from edx, and generate lci
     ng_lci = get_ng_lci(model_specs.ng_model_year)
 
     # merge ng lci and plants based on the common parameter: region or basin
     if model_specs.ng_model_year == 2016:
-        ng_lci_mapped = map_ng_lci_to_plants_by_basin(ng_lci, ng_generation_data_mapped)
+        ng_lci_mapped = map_ng_lci_to_plants_by_basin(
+            ng_lci, ng_generation_data_mapped
+        )
     else:
-        ng_lci_mapped = map_ng_lci_to_plants_by_region(ng_lci, ng_generation_data_mapped)
+        ng_lci_mapped = map_ng_lci_to_plants_by_region(
+            ng_lci, ng_generation_data_mapped
+        )
 
     # Multiplying with the EIA 923 fuel consumption; conversion factor is
     # for MMBtu to MJ
@@ -132,7 +150,7 @@ def generate_upstream_ng(year):
 
     ng_lci_mapped = ng_lci_mapped.rename(
         columns={'Total Fuel Consumption MMBtu':'quantity'})
-    ng_lci_mapped["quantity"]=ng_lci_mapped["quantity"]*btu_to_MJ
+    ng_lci_mapped["quantity"] = ng_lci_mapped["quantity"]*btu_to_MJ
 
     # Output is kg emission for the specified year by facility Id,
     # not normalized to electricity output
@@ -164,19 +182,17 @@ def generate_upstream_ng(year):
     ng_lci_mapped["GeographicalCorrelation"] = 1
     ng_lci_mapped["TechnologicalCorrelation"] = 1
     ng_lci_mapped["DataCollection"] = 1
-    #3/20/2025 MBJ - replacing renewable vintage here so that temporal correlation
-    #is based on the year the inventory is based on, but when electricity
-    #generation is combined, it needs to be based on the target year for the
-    #inventory.
-    ng_lci_mapped["Year"]=year
+
+    # 3/20/2025 MBJ - replacing renewable vintage here so that temporal
+    # correlation is based on the year the inventory is based on, but when
+    # electricity generation is combined, it needs to be based on the target
+    # year for the inventory.
+    ng_lci_mapped["Year"] = year
 
     return ng_lci_mapped
 
-##############################################################################
-# HELPER FUNCTIONS
-##############################################################################
 
-def map_ng_lci_to_plants_by_basin (ng_lci, ng_generation_data_mapped):
+def map_ng_lci_to_plants_by_basin(ng_lci, ng_generation_data_mapped):
     """
     Map the natural gas generation data by basin.
     """
@@ -203,7 +219,8 @@ def map_ng_lci_to_plants_by_basin (ng_lci, ng_generation_data_mapped):
     )
     return ng_lci_mapped
 
-def map_ng_lci_to_plants_by_region (ng_lci, ng_generation_data_mapped):
+
+def map_ng_lci_to_plants_by_region(ng_lci, ng_generation_data_mapped):
     """
     Map the natural gas generation data by basin.
     """
@@ -231,17 +248,20 @@ def map_ng_lci_to_plants_by_region (ng_lci, ng_generation_data_mapped):
     return ng_lci_mapped
 
 
-def map_ng_by_region (year):
+def map_ng_by_region(year):
     """
     Map the natural gas generation data by region.
-    This includes 6 regions: Pacific, Rocky Mountain, Southwest, Midwest, Southeast, and Northeast.
+    This includes 6 regions: Pacific, Rocky Mountain, Southwest, Midwest,
+    Southeast, and Northeast.
 
     Notes
     -----
-    * Downloads eia plant data for the specified year
-    * Filters the data to only include NG facilities and on positive fuel consumption
-    * Groups the data by Plant Id and aggregates the fuel consumption by summing the total fuel consumption
-    * Maps each plant to a region using the region_state_mapping dictionary
+    -   Downloads EIA plant data for the specified year.
+    -   Filters the data to only include NG facilities and on positive fuel
+        consumption.
+    -   Groups the data by Plant Id and aggregates the fuel consumption by
+        summing the total fuel consumption.
+    -   Maps each plant to a region using the region_state_mapping dictionary.
 
     Parameters
     ----------
@@ -274,15 +294,16 @@ def map_ng_by_region (year):
     return ng_generation_data_region
 
 
-def map_ng_by_basin (year):
+def map_ng_by_basin(year):
     """
     Map the natural gas generation data by basin.
 
     Notes
     -----
-    * Downloads eia plant data for the specified year
-    * Filters the data to only include NG facilities and on positive fuel consumption
-    * maps each plant to a basin using the gas_supply_basin_mapping.csv file
+    -   Downloads EIA plant data for the specified year.
+    -   Filters the data to only include NG facilities and on positive fuel
+        consumption.
+    -   Maps each plant to a basin using the gas_supply_basin_mapping.csv file.
 
     Parameters
     ----------
@@ -315,8 +336,8 @@ def map_ng_by_basin (year):
     ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
 
     # Import the mapping file which has the source gas basin for each Plant Id.
-    # NOTE:
-    #   This is a 2 MB file that provides about 100 kB of info!
+    # NOTE: This is a 2 MB file that provides about 100 kB of info!
+    # TODO: Move this resource to EDX.
     ng_basin_mapping = pd.read_csv(
         os.path.join(data_dir, 'gas_supply_basin_mapping.csv')
     )
@@ -335,46 +356,53 @@ def map_ng_by_basin (year):
     )
     return ng_generation_data_basin
 
+
 def get_ng_lci(year):
     """
     Get the natural gas life cycle inventory for a given year.
     Depending on the year, the natural gas life cycle inventory is either:
-        ** retrieved from existing data
-        ** calculated using the natural gas life cycle inventory model
+
+    - retrieved from existing data
+    - calculated using the natural gas life cycle inventory model
 
     Parameters
     ----------
     year : str, int
         The year for which to get the natural gas life cycle inventory.
-        This is retrieved from the model configuration
+        This should reflect the model configuration, ``ng_model_year``.
 
     Returns
     -------
-    a dataframe containing the emissions associated with the natural gas
-    production through transportation for each basin during the given year.
+    pandas.DataFrame
+        A dataframe containing the emissions associated with the natural gas
+        production through transportation for each basin during the given year.
 
     Notes
     -----
     This method depends on:
-        ** the configuration parameter: ------------
-        ** the NG_LCI csv file (if the old model is selected in the configuration)
-        ** the EDx API (if the new model is selected in the configuration)
-        ** the elci flow mapping csv file (if the new model is selected in the configuration)
+
+    -   the NG_LCI CSV file (if the old model is selected in the configuration)
+    -   the EDX API (if the new model is selected in the configuration)
+    -   the elci flow mapping CSV file (if the new model is selected in the
+        configuration)
     """
     if isinstance(year, int):
         year = str(year)
     if year == "2016":
-        logging.info(f"Retrieving the 2016 natural gas life cycle inventory by basin.")
+        logging.info(
+            f"Retrieving the 2016 natural gas life cycle inventory by basin."
+        )
         ng_lci = pd.read_csv(
             os.path.join(data_dir, "NG_LCI.csv"),
             index_col=[0,1,2,3,4,5]
         )
     else:
         data_folder = os.path.join(paths.local_path, 'netl')
-        # create new directory for ng if non existing
+        # Create new directory for ng if non existing.
         check_output_dir(os.path.join(data_folder,"2020_ng"))
         data_folder = os.path.join(data_folder,"2020_ng")
-        # check if the ng_lci_2020rev1.csv already exists - if it does then we can skip all the below
+        # Check if the ng_lci_2020rev1.csv already exists
+        # - if it does then we can skip all the below
         if os.path.exists(os.path.join(data_folder, "ng_lci_2020rev1.csv")):
             logging.info(f"NG LCI already exists in your data directory.")
             ng_lci = pd.read_csv(
@@ -382,74 +410,120 @@ def get_ng_lci(year):
                 index_col=[0,1,2,3,4,5]
             )
         else:
-            # if it does not exist, then we need to generate it
-            logging.info(f"Retrieving the {year} natural gas life cycle inventory by region.")
-            # this step will require downloading files from edx
-            # retrieve ng model
-            # check if model is data_dir
-            check_output_dir(os.path.join(data_folder,"2020_ng_model"))
-            model_folder = os.path.join(data_folder,"2020_ng_model")
+            # If it does not exist, then generate it.
+            logging.info(
+                f"Retrieving the {year} natural gas life cycle inventory "
+                "by region."
+            )
+            # This step will require downloading files from EDX.
+            # - retrieve ng model
+            # - check if model is data_dir
+            check_output_dir(os.path.join(data_folder, "2020_ng_model"))
+            model_folder = os.path.join(data_folder, "2020_ng_model")
             for ngmodel in r_ids_2020.keys():
                 if os.path.exists(os.path.join(model_folder, ngmodel)):
-                    logging.info(f"{ngmodel} already exists in your data directory.")
+                    logging.info(
+                        f"{ngmodel} already exists in your data directory."
+                    )
                 else:
-                    logging.info(f"Downloading {ngmodel} from EDx.")
+                    logging.info(f"Downloading {ngmodel} from EDX.")
                     try:
-                        download_edx(resource_id = r_ids_2020[ngmodel], api_key = model_specs.edx_api_key, output_dir = model_folder)
+                        download_edx(
+                            resource_id=r_ids_2020[ngmodel],
+                            api_key=model_specs.edx_api_key,
+                            output_dir=model_folder
+                        )
                     except Exception as e:
-                        logging.error(f"Error downloading {ngmodel} from EDx. Error: {e}")
+                        logging.error(
+                            f"Error downloading {ngmodel} from EDX. Error: {e}"
+                        )
                         sys.exit(1)
-            # retrieve flow mapping document from edx [elci.csv]
-            # check if flowmapping csv exists in data_dir
-            if os.path.exists(os.path.join(data_folder, "elci.csv")):
-                logging.info(f"ELCI flow mapping document already exists in your data directory.")
-                flow_mapping_path = os.path.join(data_folder, "elci.csv")
+            # Retrieve flow mapping document from EDX, eLCI.csv, and check if
+            # flow mapping CSV exists in data_dir.
+            if os.path.exists(os.path.join(data_folder, "eLCI.csv")):
+                logging.info(
+                    "ELCI flow mapping document already exists in your "
+                    "data directory."
+                )
+                flow_mapping_path = os.path.join(data_folder, "eLCI.csv")
             else:
-                # download flowmapping document from edx
-                logging.info(f"Downloading ELCI flow mapping document from EDx.")
-                r_id_elci = 'e2c8f934-e95e-470a-879b-17ebe4afd39e' # resource id of elci flow mapping document on EDx
+                # Download flow mapping document from EDX.
+                logging.info(
+                    "Downloading ELCI flow mapping document from EDX."
+                )
+                # Resource id of eLCI flow mapping document on EDX
+                # NOTE: Currently in Life Cycle Collaborations Workspace
+                #       ---not public!!!
+                r_id_elci = 'e2c8f934-e95e-470a-879b-17ebe4afd39e'
                 try:
-                    download_edx(resource_id = r_id_elci, api_key = model_specs.edx_api_key, output_dir = data_folder)
-                    flow_mapping_path = os.path.join(data_folder, "elci.csv")
+                    download_edx(
+                        resource_id=r_id_elci,
+                        api_key=model_specs.edx_api_key,
+                        output_dir=data_folder
+                    )
+                    flow_mapping_path = os.path.join(data_folder, "eLCI.csv")
                 except Exception as e:
-                    logging.error(f"Error downloading ELCI flow mapping document from EDx. Error: {e}")
+                    logging.error(
+                        "Error downloading ELCI flow mapping document from "
+                        f"EDX. Error: {e}"
+                    )
                     sys.exit(1)
 
-            # run the generate_ng_lci function and save it in data_dir
+            # Run the generate_ng_lci function and save it in data_dir.
             try:
-                generate_lci (excel_folder_path = model_folder, flow_mapping_path = flow_mapping_path, destination_path = data_folder, final_table_name = "ng_lci_2020rev1")
+                generate_lci(
+                    excel_folder_path=model_folder,
+                    flow_mapping_path=flow_mapping_path,
+                    destination_path=data_folder,
+                    final_table_name="ng_lci_2020rev1"
+                )
                 ng_lci = pd.read_csv(
                     os.path.join(data_folder, "ng_lci_2020rev1.csv"),
                     index_col=[0,1,2,3,4,5]
                 )
             except Exception as e:
-                logging.error(f"Error generating natural gas life cycle inventory. Error: {e}")
+                logging.error(
+                    "Error generating natural gas life cycle inventory. "
+                    f"Error: {e}"
+                )
                 sys.exit(1)
     return ng_lci
 
-def generate_lci(excel_folder_path, flow_mapping_path, destination_path, final_table_name):
+
+def generate_lci(excel_folder_path,
+                 flow_mapping_path,
+                 destination_path,
+                 final_table_name):
     """
-    This function reads an excel file, extracts the data, and generates a LCI for NG with the same format as the currently used file.
-
-    Args:
-        excel_folder_path (str): The path to the folder containing the excel files (ng models/inventories)
-        flow_mapping_path (str): The path to the flow mapping file
-        destination_path (str): !!This is an optional input!!
-                                The path to the destination folder. If not provided, the function
-                                will save the file in the current working directory.
-        final_table_name (str): The name of the final table to be saved
-                                Optional input. If not provided, the function will save the file with the name 'final_table.xlsx'.
-
-    Returns:
-        final_table (pd.DataFrame): A dataframe with the LCI for NG with the same format as the currently used file.
-
-    Notes:
-        - The function is senstive to the naming convention of the regions in the excel file.
+    Read Excel file, extract data, and generate NG LCI in the correct format.
+
+    Parameters
+    ----------
+    excel_folder_path : str
+        The path to the folder containing the excel files (i.e., NG models and
+        inventories).
+    flow_mapping_path: str
+        The path to the flow mapping file.
+    destination_path : str, optional
+        The path to the destination folder. If not provided, the function
+        will save the file in the current working directory.
+    final_table_name : str, optional
+        The name of the final table to be saved. If not provided, the function
+        will save the file with the name 'final_table.xlsx'.
+
+    Returns
+    -------
+    pandas.DataFrame
+        A dataframe with the LCI for NG with the same format as the currently
+        used file.
+
+    Notes
+    -----
+    The function is sensitive to the naming convention of the regions in the
+    Excel file.
     """
     final_table = pd.DataFrame()
 
-    # determine folder path containing the excel files
-
     # 1. Read excel files in the folder path containing the model
     for filename in os.listdir(excel_folder_path):
         if filename.endswith('.xlsx'):
@@ -457,50 +531,100 @@ def generate_lci(excel_folder_path, flow_mapping_path, destination_path, final_t
             logging.info(f"Reading file: {file_path}")
             input_data = pd.ExcelFile(file_path)
             sheet_names = input_data.sheet_names
-            sheet_name = [name for name in sheet_names if name in region_sheets_dict.values()][0]
+            sheet_name = [
+                name for name in sheet_names if name in region_sheets_dict.values()
+            ][0]
 
-        # Extract air, water, and ground emissions data for the selected sheet (i.e., technobasin)
+        # Extract air, water, and ground emissions data for the selected sheet
+        # (i.e., technobasin).
         air_emissions_data, water_emissions_data, ground_emissions_data = read_region_data(file_path, sheet_name)
 
-        # Air emissions Get the correct flow names, compartment, and uuid for each flow
-        full_air_emissions_data = correct_netl_flow_names(air_emissions_data, flow_mapping_path)
-        full_air_emissions_data = full_air_emissions_data[full_air_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
-
-        # Water emissions - get the correct flow names, compartment, and uuid for each flow
-        full_water_emissions_data = correct_netl_flow_names(water_emissions_data, flow_mapping_path)
-        full_water_emissions_data = full_water_emissions_data[full_water_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
-
-        # Ground emissions - get the correct flow names, compartment, and uuid for each flow
-        full_ground_emissions_data = correct_netl_flow_names(ground_emissions_data, flow_mapping_path)
-        full_ground_emissions_data = full_ground_emissions_data[full_ground_emissions_data['FlowUUID'].notna()] # drop rows with FlowUUID NaN
-
-        # combine dataframes
-        df1 = pd.concat([full_air_emissions_data, full_water_emissions_data, full_ground_emissions_data])
-        df1 = df1.sort_values(by='FlowUUID') # sort by FlowUUID
-        region = [key for key, v in region_sheets_dict.items() if v == sheet_name][0]
+        # Air emissions
+        # - Get the correct flow names, compartment, and uuid for each flow
+        full_air_emissions_data = correct_netl_flow_names(
+            air_emissions_data,
+            flow_mapping_path
+        )
+        # Drop rows with FlowUUID NaN.
+        full_air_emissions_data = full_air_emissions_data[
+            full_air_emissions_data['FlowUUID'].notna()
+        ]
+
+        # Water emissions
+        # - get the correct flow names, compartment, and uuid for each flow.
+        full_water_emissions_data = correct_netl_flow_names(
+            water_emissions_data,
+            flow_mapping_path
+        )
+        # Drop rows with FlowUUID NaN.
+        full_water_emissions_data = full_water_emissions_data[
+            full_water_emissions_data['FlowUUID'].notna()
+        ]
+
+        # Ground emissions
+        # - get the correct flow names, compartment, and uuid for each flow
+        full_ground_emissions_data = correct_netl_flow_names(
+            ground_emissions_data,
+            flow_mapping_path
+        )
+        full_ground_emissions_data = full_ground_emissions_data[
+            full_ground_emissions_data['FlowUUID'].notna()
+        ]
+
+        # Combine dataframes.
+        df1 = pd.concat([
+            full_air_emissions_data,
+            full_water_emissions_data,
+            full_ground_emissions_data
+        ])
+        df1 = df1.sort_values(by='FlowUUID')
+        region = [
+            key for key, v in region_sheets_dict.items() if v == sheet_name
+        ][0]
         df1['FlowAmount'] = df1['FlowAmount'].astype(float)
         df1['FlowAmount'] = df1['FlowAmount'].fillna(0)
 
-        # create final_table structure in 1st iteration
+        # Create final_table structure in 1st iteration.
         if final_table.empty:
-            final_table = df1[['FlowName', 'Compartment', 'Unit', 'input', 'FlowUUID']]
+            final_table = df1[[
+                'FlowName', 'Compartment', 'Unit', 'input', 'FlowUUID'
+            ]]
             final_table = final_table.sort_values(by='FlowUUID')
             final_table ['flow_type'] = 'ELEMENTARY_FLOW'
-            #reorder and rename columns
-            final_table = final_table[['Compartment', 'FlowName', 'FlowUUID', 'Unit', 'flow_type', 'input']]
-            final_table.columns = ['compartment', 'flow_name', 'uuid', 'unit', 'flow_type', 'is_input']
-            # add a column for each basin
-            region_columns = list (region_sheets_dict.keys())
+            # Reorder and rename columns.
+            final_table = final_table[[
+                'Compartment',
+                'FlowName',
+                'FlowUUID',
+                'Unit',
+                'flow_type',
+                'input'
+            ]]
+            final_table.columns = [
+                'compartment',
+                'flow_name',
+                'uuid',
+                'unit',
+                'flow_type',
+                'is_input'
+            ]
+            # Add a column for each basin
+            region_columns = list(region_sheets_dict.keys())
             for r in region_columns:
                 final_table[r] = 0
 
-        # add region emissions to final table
+        # Add region emissions to final table
         try:
             logging.info(f"Adding emissions for {region}")
             logging.info(f"df1: {df1['FlowAmount'].head(5)}")
             final_table[region] = df1['FlowAmount']
         except Exception as e:
-            sys.exit(f"Error reading sheet. Make sure your excel file follows the correct naming convention.For reference, refer to the source code, lines 70-78. Error: {e}")
+            sys.exit(
+                "Error reading sheet. "
+                "Make sure your excel file follows the correct naming "
+                "convention. For reference, refer to the source code, "
+                f"lines 70-78. Error: {e}"
+            )
 
     # 2. Save final table to excel
     save_ng_lci(final_table, final_table_name ,destination_path)
@@ -508,25 +632,40 @@ def generate_lci(excel_folder_path, flow_mapping_path, destination_path, final_t
 
     return final_table
 
+
 def read_region_data(excel_file_path, sheet_name):
     """
-    This function reads an excel file, extracts the data, and generates a df for NG emissions for air, water, and ground.
-    The df includes the flow name and flow amount (P2.5 and P97.5 values are dropped).
+    Read Excel file, extract data, and generate a data frame for NG emissions
+    for air, water, and ground. The data frame includes the flow name and flow
+    amount (P2.5 and P97.5 values are dropped).
 
-    Inputs:
-    - excel_file_path: path to the excel file
-    - sheet_name: name of the sheet to extract the data from
+    Parameters
+    ----------
+    excel_file_path : str
+        Path to the Excel file.
+    sheet_name : str
+        Name of the sheet to extract the data from.
 
-    Outputs:
-    - air_emissions_data: dataframe containing the air emissions data
-    - water_emissions_data: dataframe containing the water emissions data
-    - ground_emissions_data: dataframe containing the ground emissions data
+    Returns
+    -------
+    tuple
+        A tuple of length three:
+
+        - pandas.DataFrame, the air emissions data
+        - pandas.DataFrame, the water emissions data
+        - pandas.DataFrame, the ground emissions data
     """
     print(f"Processing sheet: {sheet_name}")
     # create empty database
     df = pd.DataFrame()
     # Extract all the data from the sheet
-    df = pd.read_excel(excel_file_path, sheet_name=sheet_name, skiprows=0, header=None)
+    df = pd.read_excel(
+        excel_file_path,
+        sheet_name=sheet_name,
+        skiprows=0,
+        header=None
+    )
+
     # Adjustments: 1) changing header, 2) dropping P2.5 and P97.5 columns
     df.iloc[0] = df.iloc[0].ffill()
     df.iloc[1] = df.iloc[1].ffill()
@@ -534,17 +673,27 @@ def read_region_data(excel_file_path, sheet_name):
     df = df.drop(columns=["P2.5", "P97.5"])
     df.columns = df.iloc[0]
     df = df.drop(df.index[0])
-    # separate water, soil, ground, and air emissions - and map them to FEDEFL elementary flows
+
+    # Separate water, soil, ground, and air emissions - and map them to
+    # FEDEFL elementary flows
+
     # Air emissions
-    air_emissions_data = df.drop(columns=[col for col in df.columns if col != df.columns[1]])
-    air_emissions_data = air_emissions_data.iloc[:, :-2]    # drop the last two columns (empty columns from excel)
-    air_emissions_data[f'FlowAmount'] = air_emissions_data.iloc[:, 1:11].sum(axis=1)  # sum columns 2:11 for each row
+    air_emissions_data = df.drop(
+        columns=[col for col in df.columns if col != df.columns[1]]
+    )
+    # Drop the last two columns (empty columns from excel)
+    air_emissions_data = air_emissions_data.iloc[:, :-2]
+    # Sum columns 2:11 for each row
+    air_emissions_data[f'FlowAmount'] = air_emissions_data.iloc[:, 1:11].sum(
+        axis=1
+    )
     air_emissions_data = air_emissions_data.iloc[2:]
     air_emissions_data = air_emissions_data.iloc[:, [0,-1]]
-    air_emissions_data['Compartment'] = 'Air' # add compartment
+    air_emissions_data['Compartment'] = 'Air'
     air_emissions_data.columns.values[0] = 'FlowName' # change header
-    air_emissions_data['Unit'] = 'kg' # add unit
-    air_emissions_data ['input'] = False # add input
+    air_emissions_data['Unit'] = 'kg'
+    air_emissions_data ['input'] = False # not an input
+
     # Water emissions
     water_emissions_data = df.iloc[:, [df.shape[1]-3, df.shape[1]-1]]
     water_emissions_data.columns.values[0] = "FlowName"
@@ -554,6 +703,7 @@ def read_region_data(excel_file_path, sheet_name):
     water_emissions_data['Compartment'] = 'Water'
     water_emissions_data['Unit'] = 'kg'
     water_emissions_data ['input'] = False
+
     # Ground emissions
     ground_emissions_data = df.iloc[:, [df.shape[1]-3, df.shape[1]-2]]
     ground_emissions_data.columns.values[0] = "FlowName"
@@ -566,9 +716,10 @@ def read_region_data(excel_file_path, sheet_name):
 
     return air_emissions_data, water_emissions_data, ground_emissions_data
 
+
 def save_ng_lci(df, filename, destination_path):
     """
-    This function saves the final table to an excel file.
+    Save the final table to CSV file.
     """
     if destination_path is None:
         destination_path = f"{os.getcwd()}/"
@@ -577,6 +728,7 @@ def save_ng_lci(df, filename, destination_path):
     full_path = os.path.join(destination_path, f"{filename}.csv")
     df.to_csv(full_path, index=False)
 
+
 def correct_netl_flow_names(df, flow_mapping_path, amount_col="FlowAmount"):
     """A helper method that replaces NETL air, water, and ground emissions
     with Federal Elementary Flow List equivalents based on a subset of
@@ -690,6 +842,7 @@ def correct_netl_flow_names(df, flow_mapping_path, amount_col="FlowAmount"):
 
     return mapped_df
 
+
 ##############################################################################
 # MAIN
 ##############################################################################

From e2ab751b9e4b77127068137be8bd751bc94f3bef Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:28:28 -0500
Subject: [PATCH 21/24] alphabetize methods

---
 electricitylci/natural_gas_upstream.py | 902 +++++++++++++------------
 1 file changed, 452 insertions(+), 450 deletions(-)

diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
index 07cd9848..1545fcdd 100644
--- a/electricitylci/natural_gas_upstream.py
+++ b/electricitylci/natural_gas_upstream.py
@@ -35,7 +35,7 @@
 Created:
     2019-02-18
 Last updated:
-    2025-11-25
+    2025-12-04
 """
 __all__ = [
     "generate_upstream_ng",
@@ -79,282 +79,376 @@
 ##############################################################################
 # FUNCTIONS
 ##############################################################################
-def generate_upstream_ng(year):
-    """
-    Generate the annual gas extraction, processing and transportation
-    emissions (in kg) for each plant in EIA923.
-
-    Notes
-    -----
-    Depends on the data file, gas_supply_basin_mapping.csv, which includes the
-    identification information for every natural gas plant in the U.S.
-    Once imported, this data frame is simplified to contain just the plant
-    code and its NG_LCI_Name.
-
-    Also depends on the data file, NG_LCI.csv, which includes the LCA impact
-    species determined for every natural gas basin in the U.S.
-    Flows are separated by specific upstream process: production, gathering
-    & boosting, processing, transmission, storage, and pipeline.
+def correct_netl_flow_names(df, flow_mapping_path, amount_col="FlowAmount"):
+    """A helper method that replaces NETL air, water, and ground emissions
+    with Federal Elementary Flow List equivalents based on a subset of
+    flows defined in USEPA's eLCI mapping using the Python package
+    `fedelemflowlist <https://github.com/USEPA/fedelemflowlist>`_
 
     Parameters
     ----------
-    year: int
-        Year of EIA-923 fuel data to use.
+    df : pandas.DataFrame
+        A life cycle inventory data frame with columns, 'FlowName',
+        'Compartment', 'Unit', and ``amount_col``.
+    amount_col : str, optional
+        The column title representing the flow amount, by default "FlowAmount"
 
     Returns
-    ----------
+    -------
     pandas.DataFrame
+        A new data frame with the same number of rows and columns as the
+        sent data frame. Flow names, compartments, units, and flow amounts
+        are updated based on emissions matches with the FEDEFL. All unmatched
+        flows are returned 'as is'. If FlowUUID was not in the column list,
+        it is created; otherwise, the matched UUIDs are updated.
     """
-    logging.info("Generating natural gas inventory")
-
-    # Get plant data and map each plant to its ng source: basin or region.
-    # The 2016 ng emissions inventory is only available by basin.
-    # As such, plants can only be connected to upstream emissions via basin
-    # assignment newer data (2020) is available by region plants are connected
-    # to upstream ng emissions via region assignment
-
-    # 'year' refers to eia_gen_year
-    if model_specs.ng_model_year == 2016:
-        ng_generation_data_mapped = map_ng_by_basin(year)
-    else:
-        ng_generation_data_mapped = map_ng_by_region(year)
-
-    # Read the NG LCI file
-    # If year = 2016
-    # - this step will directly ready NG_LCI.csv from the data_dir
-    # - returns lci (by basin)
-    # If year = 2020
-    # - this step will require edx api, download ng model and mapping
-    # - returns lci (by region)
-    # Document from edx, and generate lci
-    ng_lci = get_ng_lci(model_specs.ng_model_year)
+    # This data frame has about 4k source flow names and contexts associated
+    # with NETL unit process models (e.g., petro, nuclear, coal).
+    flow_mapping = pd.read_csv(flow_mapping_path, encoding='ISO-8859-1')
 
-    # merge ng lci and plants based on the common parameter: region or basin
-    if model_specs.ng_model_year == 2016:
-        ng_lci_mapped = map_ng_lci_to_plants_by_basin(
-            ng_lci, ng_generation_data_mapped
-        )
-    else:
-        ng_lci_mapped = map_ng_lci_to_plants_by_region(
-            ng_lci, ng_generation_data_mapped
-        )
+    # Matching occurs on name, compartment and units; help this along by
+    # lowering the case (improves coal UP matches from 10% to 42%).
+    df["FlowName_orig"] = df["FlowName"]
+    df["Compartment_orig"] = df["Compartment"]
+    df["FlowName"] = df["FlowName"].str.lower().str.rstrip()
+    df["Compartment"] = df["Compartment"].str.lower().str.rstrip()
 
-    # Multiplying with the EIA 923 fuel consumption; conversion factor is
-    # for MMBtu to MJ
-    btu_to_MJ = pq.convert(10**6,'Btu','MJ')
-    ng_lci_mapped["FlowAmount"]=(
-        ng_lci_mapped["FlowAmount"]
-        * ng_lci_mapped['Total Fuel Consumption MMBtu']
-        * btu_to_MJ
+    # In the map, also lower-case names and compartments and remove trailing
+    # space; note this introduces duplicate entries in the map, so remove them.
+    # The duplicates are from later entries, so ignore mapper, verifier and
+    # last updated cols when searching for duplicates. [250917; TWD]
+    flow_mapping['SourceFlowName'] = flow_mapping[
+        'SourceFlowName'].str.lower().str.rstrip()
+    flow_mapping['SourceFlowContext'] = flow_mapping[
+        "SourceFlowContext"].str.lower().str.rstrip()
+    ignore_cols = ['Mapper', 'Verifier', 'LastUpdated']
+    flow_mapping = flow_mapping.drop_duplicates(
+        subset=[x for x in flow_mapping.columns if x not in ignore_cols]
     )
 
-    ng_lci_mapped = ng_lci_mapped.rename(
-        columns={'Total Fuel Consumption MMBtu':'quantity'})
-    ng_lci_mapped["quantity"] = ng_lci_mapped["quantity"]*btu_to_MJ
+    # Some compartments in NETL UPs are complex (e.g., 'Emission to water/fresh
+    # water'), but are listed simply in the FEDEFL eLCI mapper (e.g., 'emission/
+    # water'). Improves coal mining UP matches from 42% to 62%.
+    is_emission = df['input'] == False
+    is_water = df['Compartment'].str.contains('water')
+    is_air = df['Compartment'].str.contains('air')
+    is_ground = df['Compartment'].str.contains('ground')
 
-    # Output is kg emission for the specified year by facility Id,
-    # not normalized to electricity output
+    df.loc[is_emission * is_water, 'Compartment'] = 'emission/water'
+    df.loc[is_emission * is_air, 'Compartment'] = 'emission/air'
+    df.loc[is_emission * is_ground, 'Compartment'] = 'emission/ground'
 
-    ng_lci_mapped['FuelCategory'] = 'GAS'
-    ng_lci_mapped.rename(
-        columns={
-            'Plant Id':'plant_id',
-            'NG_LCI_Region': 'stage_code',
-            'NG_LCI_Name':'stage_code',
-            'Stage':'stage'},
-        inplace=True
-    )
-    ng_lci_mapped["Year"] = year
-    ng_lci_mapped["Source"] = "netlgaseiafuel"
-    ng_lci_mapped["ElementaryFlowPrimeContext"] = "emission"
-    ng_lci_mapped.loc[
-        ng_lci_mapped["Compartment"].str.contains("resource/"),
-        "ElementaryFlowPrimeContext"] = "resource"
-    ng_lci_mapped.loc[
-        ng_lci_mapped["Compartment"].str.contains("Technosphere/"),
-        "ElementaryFlowPrimeContext"] = "technosphere"
-    # Issue #296 - adding DQI information for upstream processes
-    ng_lci_mapped["Year"] = model_specs.ng_model_year
-    ng_lci_mapped["DataReliability"] = 3
-    ng_lci_mapped["TemporalCorrelation"] = add_temporal_correlation_score(
-        ng_lci_mapped["Year"], model_specs.electricity_lci_target_year
+    # HOTFIX: Map against source units [250205; TWD]
+    # For coal mining, reduces matches from >62% to <62% (about 2k less rows)
+    logging.info("Mapping emissions to FEDEFL")
+    mapped_df = pd.merge(
+        df,
+        flow_mapping,
+        left_on=["FlowName", "Compartment", "Unit"],
+        right_on=["SourceFlowName", "SourceFlowContext", "SourceUnit"],
+        how="left",
     )
-    ng_lci_mapped["GeographicalCorrelation"] = 1
-    ng_lci_mapped["TechnologicalCorrelation"] = 1
-    ng_lci_mapped["DataCollection"] = 1
 
-    # 3/20/2025 MBJ - replacing renewable vintage here so that temporal
-    # correlation is based on the year the inventory is based on, but when
-    # electricity generation is combined, it needs to be based on the target
-    # year for the inventory.
-    ng_lci_mapped["Year"] = year
+    # If TargetFlowName is present, there was a match.
+    is_match = mapped_df["TargetFlowName"].notnull()
+    logging.info("Correcting %d NETL flows" % is_match.sum())
 
-    return ng_lci_mapped
+    # Quality Check (coal_df)
+    #   Check that target unit matches source unit.
+    #   No! Hydrogen, Uranium, and Lead-210/kg have mis-matched units.
+    #   Therefore, unit conversions are necessary.
 
+    # Return flow names and compartments back to their original values.
+    df["FlowName"] = df["FlowName_orig"]
+    df["Compartment"] = df["Compartment_orig"]
+    del df['FlowName_orig']      # use this syntax since you're editing
+    del df['Compartment_orig']   # a reference object that isn't returned
+    mapped_df['FlowName'] = mapped_df['FlowName_orig']
+    mapped_df["Compartment"] = mapped_df["Compartment_orig"]
+    mapped_df = mapped_df.drop(columns=['FlowName_orig', 'Compartment_orig'])
 
-def map_ng_lci_to_plants_by_basin(ng_lci, ng_generation_data_mapped):
-    """
-    Map the natural gas generation data by basin.
-    """
-    ng_lci_columns=[
-        "Compartment",
-        "FlowName",
-        "FlowUUID",
-        "Unit",
-        "FlowType",
-        "input",
-        "Basin",
-        "FlowAmount"
-    ]
-    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
-    ng_lci_stack.columns=ng_lci_columns
+    # Replace FlowName, Unit, and Compartment with new names (where matched)
+    mapped_df.loc[is_match, "FlowName"] = mapped_df.loc[
+        is_match, "TargetFlowName"]
+    mapped_df.loc[is_match, "Compartment"] = mapped_df.loc[
+        is_match, "TargetFlowContext"]
+    mapped_df.loc[is_match, "Unit"] = mapped_df.loc[is_match, "TargetUnit"]
 
-    # Merge basin data with LCI dataset
-    ng_lci_mapped = pd.merge(
-        ng_lci_stack,
-        ng_generation_data_mapped,
-        left_on = 'Basin',
-        right_on = 'NG_LCI_Name',
-        how='left'
-    )
-    return ng_lci_mapped
+    # Correct values using the conversion factor
+    mapped_df.loc[is_match, amount_col] *= mapped_df.loc[
+        is_match, 'ConversionFactor']
 
+    if 'FlowUUID' in mapped_df.columns:
+        # Update existing values with new UUIDs
+        mapped_df.loc[is_match, 'FlowUUID'] = mapped_df.loc[
+            is_match, 'TargetFlowUUID']
+    else:
+        # Set UUIDs to target values
+        mapped_df = mapped_df.rename(columns={"TargetFlowUUID": "FlowUUID"})
 
-def map_ng_lci_to_plants_by_region(ng_lci, ng_generation_data_mapped):
-    """
-    Map the natural gas generation data by basin.
-    """
-    ng_lci_columns=[
-        "Compartment",
-        "FlowName",
-        "FlowUUID",
-        "Unit",
-        "FlowType",
-        "input",
-        "Region",
-        "FlowAmount"
-    ]
-    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
-    ng_lci_stack.columns=ng_lci_columns
+    # Drop all unneeded cols
+    drop_cols = [x for x in flow_mapping.columns if x in mapped_df.columns]
+    mapped_df = mapped_df.drop(columns=drop_cols)
 
-    # Merge basin data with LCI dataset
-    ng_lci_mapped = pd.merge(
-        ng_lci_stack,
-        ng_generation_data_mapped,
-        left_on = 'Region',
-        right_on = 'NG_LCI_Region',
-        how='left'
-    )
-    return ng_lci_mapped
+    return mapped_df
 
 
-def map_ng_by_region(year):
+def generate_lci(excel_folder_path,
+                 flow_mapping_path,
+                 destination_path,
+                 final_table_name):
     """
-    Map the natural gas generation data by region.
-    This includes 6 regions: Pacific, Rocky Mountain, Southwest, Midwest,
-    Southeast, and Northeast.
-
-    Notes
-    -----
-    -   Downloads EIA plant data for the specified year.
-    -   Filters the data to only include NG facilities and on positive fuel
-        consumption.
-    -   Groups the data by Plant Id and aggregates the fuel consumption by
-        summing the total fuel consumption.
-    -   Maps each plant to a region using the region_state_mapping dictionary.
+    Read Excel file, extract data, and generate NG LCI in the correct format.
 
     Parameters
     ----------
-    year: int, str
-        The year of the eia923 plant data to use.
+    excel_folder_path : str
+        The path to the folder containing the excel files (i.e., NG models and
+        inventories).
+    flow_mapping_path: str
+        The path to the flow mapping file.
+    destination_path : str, optional
+        The path to the destination folder. If not provided, the function
+        will save the file in the current working directory.
+    final_table_name : str, optional
+        The name of the final table to be saved. If not provided, the function
+        will save the file with the name 'final_table.xlsx'.
 
     Returns
-    ----------
+    -------
     pandas.DataFrame
-        A dataframe with the natural gas generation data by region.
+        A dataframe with the LCI for NG with the same format as the currently
+        used file.
+
+    Notes
+    -----
+    The function is sensitive to the naming convention of the regions in the
+    Excel file.
     """
-    if isinstance(year, str):
-        year = int(year)
+    final_table = pd.DataFrame()
 
-    eia_generation_data = eia923_download_extract(year)
+    # 1. Read excel files in the folder path containing the model
+    for filename in os.listdir(excel_folder_path):
+        if filename.endswith('.xlsx'):
+            file_path = os.path.join(excel_folder_path, filename)
+            logging.info(f"Reading file: {file_path}")
+            input_data = pd.ExcelFile(file_path)
+            sheet_names = input_data.sheet_names
+            sheet_name = [
+                name for name in sheet_names if name in region_sheets_dict.values()
+            ][0]
 
-    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
-                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
+        # Extract air, water, and ground emissions data for the selected sheet
+        # (i.e., technobasin).
+        air_emissions_data, water_emissions_data, ground_emissions_data = read_region_data(file_path, sheet_name)
 
-    ng_generation_data = eia_generation_data[column_filt]
+        # Air emissions
+        # - Get the correct flow names, compartment, and uuid for each flow
+        full_air_emissions_data = correct_netl_flow_names(
+            air_emissions_data,
+            flow_mapping_path
+        )
+        # Drop rows with FlowUUID NaN.
+        full_air_emissions_data = full_air_emissions_data[
+            full_air_emissions_data['FlowUUID'].notna()
+        ]
 
-    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
-        {'Total Fuel Consumption MMBtu':'sum','State':'first'}).reset_index()
-    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
+        # Water emissions
+        # - get the correct flow names, compartment, and uuid for each flow.
+        full_water_emissions_data = correct_netl_flow_names(
+            water_emissions_data,
+            flow_mapping_path
+        )
+        # Drop rows with FlowUUID NaN.
+        full_water_emissions_data = full_water_emissions_data[
+            full_water_emissions_data['FlowUUID'].notna()
+        ]
 
-    ng_generation_data_region = ng_generation_data.copy()
+        # Ground emissions
+        # - get the correct flow names, compartment, and uuid for each flow
+        full_ground_emissions_data = correct_netl_flow_names(
+            ground_emissions_data,
+            flow_mapping_path
+        )
+        full_ground_emissions_data = full_ground_emissions_data[
+            full_ground_emissions_data['FlowUUID'].notna()
+        ]
 
-    ng_generation_data_region['NG_LCI_Region'] = ng_generation_data['State'].map(region_state_mapping)
+        # Combine dataframes.
+        df1 = pd.concat([
+            full_air_emissions_data,
+            full_water_emissions_data,
+            full_ground_emissions_data
+        ])
+        df1 = df1.sort_values(by='FlowUUID')
+        region = [
+            key for key, v in region_sheets_dict.items() if v == sheet_name
+        ][0]
+        df1['FlowAmount'] = df1['FlowAmount'].astype(float)
+        df1['FlowAmount'] = df1['FlowAmount'].fillna(0)
+
+        # Create final_table structure in 1st iteration.
+        if final_table.empty:
+            final_table = df1[[
+                'FlowName', 'Compartment', 'Unit', 'input', 'FlowUUID'
+            ]]
+            final_table = final_table.sort_values(by='FlowUUID')
+            final_table ['flow_type'] = 'ELEMENTARY_FLOW'
+            # Reorder and rename columns.
+            final_table = final_table[[
+                'Compartment',
+                'FlowName',
+                'FlowUUID',
+                'Unit',
+                'flow_type',
+                'input'
+            ]]
+            final_table.columns = [
+                'compartment',
+                'flow_name',
+                'uuid',
+                'unit',
+                'flow_type',
+                'is_input'
+            ]
+            # Add a column for each basin
+            region_columns = list(region_sheets_dict.keys())
+            for r in region_columns:
+                final_table[r] = 0
+
+        # Add region emissions to final table
+        try:
+            logging.info(f"Adding emissions for {region}")
+            logging.info(f"df1: {df1['FlowAmount'].head(5)}")
+            final_table[region] = df1['FlowAmount']
+        except Exception as e:
+            sys.exit(
+                "Error reading sheet. "
+                "Make sure your excel file follows the correct naming "
+                "convention. For reference, refer to the source code, "
+                f"lines 70-78. Error: {e}"
+            )
+
+    # 2. Save final table to excel
+    save_ng_lci(final_table, final_table_name ,destination_path)
+    print(f"Final table saved to {destination_path}/{final_table_name}.xlsx")
 
-    return ng_generation_data_region
+    return final_table
 
 
-def map_ng_by_basin(year):
+def generate_upstream_ng(year):
     """
-    Map the natural gas generation data by basin.
+    Generate the annual gas extraction, processing and transportation
+    emissions (in kg) for each plant in EIA923.
 
     Notes
     -----
-    -   Downloads EIA plant data for the specified year.
-    -   Filters the data to only include NG facilities and on positive fuel
-        consumption.
-    -   Maps each plant to a basin using the gas_supply_basin_mapping.csv file.
+    This is the main method called outside this module.
+
+    Depends on the data file, gas_supply_basin_mapping.csv, which includes the
+    identification information for every natural gas plant in the U.S.
+    Once imported, this data frame is simplified to contain just the plant
+    code and its NG_LCI_Name.
+
+    Also depends on the data file, NG_LCI.csv, which includes the LCA impact
+    species determined for every natural gas basin in the U.S.
+    Flows are separated by specific upstream process: production, gathering
+    & boosting, processing, transmission, storage, and pipeline.
 
     Parameters
     ----------
-    year: int, str
-        The year of the eia923 plant data to use.
+    year: int
+        Year of EIA-923 fuel data to use.
 
     Returns
     ----------
     pandas.DataFrame
-        A dataframe with the natural gas generation data by region.
     """
-    if isinstance(year, str):
-        year = int(year)
+    logging.info("Generating natural gas inventory")
 
-    # Get the EIA generation data for the specified year, this dataset includes
-    # the fuel consumption for generating electricity for each facility
-    # and fuel type. Filter the data to only include NG facilities and on
-    # positive fuel consumption. Group that data by Plant Id as it is possible
-    # to have multiple rows for the same facility and fuel based on different
-    # prime movers (e.g., gas turbine and combined cycle).
+    # Get plant data and map each plant to its ng source: basin or region.
+    # The 2016 ng emissions inventory is only available by basin.
+    # As such, plants can only be connected to upstream emissions via basin
+    # assignment newer data (2020) is available by region plants are connected
+    # to upstream ng emissions via region assignment
 
-    eia_generation_data = eia923_download_extract(year)
+    # 'year' refers to eia_gen_year
+    if model_specs.ng_model_year == 2016:
+        ng_generation_data_mapped = map_ng_by_basin(year)
+    else:
+        ng_generation_data_mapped = map_ng_by_region(year)
 
-    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
-                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
-    ng_generation_data = eia_generation_data[column_filt]
+    # Read the NG LCI file
+    # If year = 2016
+    # - this step will directly ready NG_LCI.csv from the data_dir
+    # - returns lci (by basin)
+    # If year = 2020
+    # - this step will require edx api, download ng model and mapping
+    # - returns lci (by region)
+    # Document from edx, and generate lci
+    ng_lci = get_ng_lci(model_specs.ng_model_year)
 
-    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
-        {'Total Fuel Consumption MMBtu':'sum'}).reset_index()
-    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
+    # merge ng lci and plants based on the common parameter: region or basin
+    if model_specs.ng_model_year == 2016:
+        ng_lci_mapped = map_ng_lci_to_plants_by_basin(
+            ng_lci, ng_generation_data_mapped
+        )
+    else:
+        ng_lci_mapped = map_ng_lci_to_plants_by_region(
+            ng_lci, ng_generation_data_mapped
+        )
 
-    # Import the mapping file which has the source gas basin for each Plant Id.
-    # NOTE: This is a 2 MB file that provides about 100 kB of info!
-    # TODO: Move this resource to EDX.
-    ng_basin_mapping = pd.read_csv(
-        os.path.join(data_dir, 'gas_supply_basin_mapping.csv')
+    # Multiplying with the EIA 923 fuel consumption; conversion factor is
+    # for MMBtu to MJ
+    btu_to_MJ = pq.convert(10**6,'Btu','MJ')
+    ng_lci_mapped["FlowAmount"]=(
+        ng_lci_mapped["FlowAmount"]
+        * ng_lci_mapped['Total Fuel Consumption MMBtu']
+        * btu_to_MJ
     )
-    subset_cols = ['Plant Code', 'NG_LCI_Name']
-    ng_basin_mapping = ng_basin_mapping[subset_cols]
 
-    # Merge with ng_generation dataframe.
-    ng_generation_data_basin = pd.merge(
-        left = ng_generation_data,
-        right = ng_basin_mapping,
-        left_on = 'Plant Id',
-        right_on = 'Plant Code'
+    ng_lci_mapped = ng_lci_mapped.rename(
+        columns={'Total Fuel Consumption MMBtu':'quantity'})
+    ng_lci_mapped["quantity"] = ng_lci_mapped["quantity"]*btu_to_MJ
+
+    # Output is kg emission for the specified year by facility Id,
+    # not normalized to electricity output
+
+    ng_lci_mapped['FuelCategory'] = 'GAS'
+    ng_lci_mapped.rename(
+        columns={
+            'Plant Id':'plant_id',
+            'NG_LCI_Region': 'stage_code',
+            'NG_LCI_Name':'stage_code',
+            'Stage':'stage'},
+        inplace=True
     )
-    ng_generation_data_basin = ng_generation_data_basin.drop(
-        columns=['Plant Code']
+    ng_lci_mapped["Year"] = year
+    ng_lci_mapped["Source"] = "netlgaseiafuel"
+    ng_lci_mapped["ElementaryFlowPrimeContext"] = "emission"
+    ng_lci_mapped.loc[
+        ng_lci_mapped["Compartment"].str.contains("resource/"),
+        "ElementaryFlowPrimeContext"] = "resource"
+    ng_lci_mapped.loc[
+        ng_lci_mapped["Compartment"].str.contains("Technosphere/"),
+        "ElementaryFlowPrimeContext"] = "technosphere"
+    # Issue #296 - adding DQI information for upstream processes
+    ng_lci_mapped["Year"] = model_specs.ng_model_year
+    ng_lci_mapped["DataReliability"] = 3
+    ng_lci_mapped["TemporalCorrelation"] = add_temporal_correlation_score(
+        ng_lci_mapped["Year"], model_specs.electricity_lci_target_year
     )
-    return ng_generation_data_basin
+    ng_lci_mapped["GeographicalCorrelation"] = 1
+    ng_lci_mapped["TechnologicalCorrelation"] = 1
+    ng_lci_mapped["DataCollection"] = 1
+
+    # 3/20/2025 MBJ - replacing renewable vintage here so that temporal
+    # correlation is based on the year the inventory is based on, but when
+    # electricity generation is combined, it needs to be based on the target
+    # year for the inventory.
+    ng_lci_mapped["Year"] = year
+
+    return ng_lci_mapped
 
 
 def get_ng_lci(year):
@@ -490,147 +584,169 @@ def get_ng_lci(year):
     return ng_lci
 
 
-def generate_lci(excel_folder_path,
-                 flow_mapping_path,
-                 destination_path,
-                 final_table_name):
+def map_ng_by_basin(year):
     """
-    Read Excel file, extract data, and generate NG LCI in the correct format.
+    Map the natural gas generation data by basin.
+
+    Notes
+    -----
+    -   Downloads EIA plant data for the specified year.
+    -   Filters the data to only include NG facilities and on positive fuel
+        consumption.
+    -   Maps each plant to a basin using the gas_supply_basin_mapping.csv file.
 
     Parameters
     ----------
-    excel_folder_path : str
-        The path to the folder containing the excel files (i.e., NG models and
-        inventories).
-    flow_mapping_path: str
-        The path to the flow mapping file.
-    destination_path : str, optional
-        The path to the destination folder. If not provided, the function
-        will save the file in the current working directory.
-    final_table_name : str, optional
-        The name of the final table to be saved. If not provided, the function
-        will save the file with the name 'final_table.xlsx'.
+    year: int, str
+        The year of the eia923 plant data to use.
 
     Returns
-    -------
+    ----------
     pandas.DataFrame
-        A dataframe with the LCI for NG with the same format as the currently
-        used file.
+        A dataframe with the natural gas generation data by region.
+    """
+    if isinstance(year, str):
+        year = int(year)
+
+    # Get the EIA generation data for the specified year, this dataset includes
+    # the fuel consumption for generating electricity for each facility
+    # and fuel type. Filter the data to only include NG facilities and on
+    # positive fuel consumption. Group that data by Plant Id as it is possible
+    # to have multiple rows for the same facility and fuel based on different
+    # prime movers (e.g., gas turbine and combined cycle).
+
+    eia_generation_data = eia923_download_extract(year)
+
+    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
+                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
+    ng_generation_data = eia_generation_data[column_filt]
+
+    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
+        {'Total Fuel Consumption MMBtu':'sum'}).reset_index()
+    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
+
+    # Import the mapping file which has the source gas basin for each Plant Id.
+    # NOTE: This is a 2 MB file that provides about 100 kB of info!
+    # TODO: Move this resource to EDX.
+    ng_basin_mapping = pd.read_csv(
+        os.path.join(data_dir, 'gas_supply_basin_mapping.csv')
+    )
+    subset_cols = ['Plant Code', 'NG_LCI_Name']
+    ng_basin_mapping = ng_basin_mapping[subset_cols]
+
+    # Merge with ng_generation dataframe.
+    ng_generation_data_basin = pd.merge(
+        left = ng_generation_data,
+        right = ng_basin_mapping,
+        left_on = 'Plant Id',
+        right_on = 'Plant Code'
+    )
+    ng_generation_data_basin = ng_generation_data_basin.drop(
+        columns=['Plant Code']
+    )
+    return ng_generation_data_basin
+
+
+def map_ng_by_region(year):
+    """
+    Map the natural gas generation data by region.
+    This includes 6 regions: Pacific, Rocky Mountain, Southwest, Midwest,
+    Southeast, and Northeast.
 
     Notes
     -----
-    The function is sensitive to the naming convention of the regions in the
-    Excel file.
+    -   Downloads EIA plant data for the specified year.
+    -   Filters the data to only include NG facilities and on positive fuel
+        consumption.
+    -   Groups the data by Plant Id and aggregates the fuel consumption by
+        summing the total fuel consumption.
+    -   Maps each plant to a region using the region_state_mapping dictionary.
+
+    Parameters
+    ----------
+    year: int, str
+        The year of the eia923 plant data to use.
+
+    Returns
+    ----------
+    pandas.DataFrame
+        A dataframe with the natural gas generation data by region.
     """
-    final_table = pd.DataFrame()
+    if isinstance(year, str):
+        year = int(year)
 
-    # 1. Read excel files in the folder path containing the model
-    for filename in os.listdir(excel_folder_path):
-        if filename.endswith('.xlsx'):
-            file_path = os.path.join(excel_folder_path, filename)
-            logging.info(f"Reading file: {file_path}")
-            input_data = pd.ExcelFile(file_path)
-            sheet_names = input_data.sheet_names
-            sheet_name = [
-                name for name in sheet_names if name in region_sheets_dict.values()
-            ][0]
+    eia_generation_data = eia923_download_extract(year)
+
+    column_filt = ((eia_generation_data['Reported Fuel Type Code'] == 'NG') &
+                   (eia_generation_data['Total Fuel Consumption MMBtu'] > 0))
+
+    ng_generation_data = eia_generation_data[column_filt]
+
+    ng_generation_data = ng_generation_data.groupby('Plant Id').agg(
+        {'Total Fuel Consumption MMBtu':'sum','State':'first'}).reset_index()
+    ng_generation_data['Plant Id'] = ng_generation_data['Plant Id'].astype(int)
 
-        # Extract air, water, and ground emissions data for the selected sheet
-        # (i.e., technobasin).
-        air_emissions_data, water_emissions_data, ground_emissions_data = read_region_data(file_path, sheet_name)
+    ng_generation_data_region = ng_generation_data.copy()
 
-        # Air emissions
-        # - Get the correct flow names, compartment, and uuid for each flow
-        full_air_emissions_data = correct_netl_flow_names(
-            air_emissions_data,
-            flow_mapping_path
-        )
-        # Drop rows with FlowUUID NaN.
-        full_air_emissions_data = full_air_emissions_data[
-            full_air_emissions_data['FlowUUID'].notna()
-        ]
+    ng_generation_data_region['NG_LCI_Region'] = ng_generation_data['State'].map(region_state_mapping)
 
-        # Water emissions
-        # - get the correct flow names, compartment, and uuid for each flow.
-        full_water_emissions_data = correct_netl_flow_names(
-            water_emissions_data,
-            flow_mapping_path
-        )
-        # Drop rows with FlowUUID NaN.
-        full_water_emissions_data = full_water_emissions_data[
-            full_water_emissions_data['FlowUUID'].notna()
-        ]
+    return ng_generation_data_region
 
-        # Ground emissions
-        # - get the correct flow names, compartment, and uuid for each flow
-        full_ground_emissions_data = correct_netl_flow_names(
-            ground_emissions_data,
-            flow_mapping_path
-        )
-        full_ground_emissions_data = full_ground_emissions_data[
-            full_ground_emissions_data['FlowUUID'].notna()
-        ]
 
-        # Combine dataframes.
-        df1 = pd.concat([
-            full_air_emissions_data,
-            full_water_emissions_data,
-            full_ground_emissions_data
-        ])
-        df1 = df1.sort_values(by='FlowUUID')
-        region = [
-            key for key, v in region_sheets_dict.items() if v == sheet_name
-        ][0]
-        df1['FlowAmount'] = df1['FlowAmount'].astype(float)
-        df1['FlowAmount'] = df1['FlowAmount'].fillna(0)
+def map_ng_lci_to_plants_by_basin(ng_lci, ng_generation_data_mapped):
+    """
+    Map the natural gas generation data by basin.
+    """
+    ng_lci_columns=[
+        "Compartment",
+        "FlowName",
+        "FlowUUID",
+        "Unit",
+        "FlowType",
+        "input",
+        "Basin",
+        "FlowAmount"
+    ]
+    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
+    ng_lci_stack.columns=ng_lci_columns
 
-        # Create final_table structure in 1st iteration.
-        if final_table.empty:
-            final_table = df1[[
-                'FlowName', 'Compartment', 'Unit', 'input', 'FlowUUID'
-            ]]
-            final_table = final_table.sort_values(by='FlowUUID')
-            final_table ['flow_type'] = 'ELEMENTARY_FLOW'
-            # Reorder and rename columns.
-            final_table = final_table[[
-                'Compartment',
-                'FlowName',
-                'FlowUUID',
-                'Unit',
-                'flow_type',
-                'input'
-            ]]
-            final_table.columns = [
-                'compartment',
-                'flow_name',
-                'uuid',
-                'unit',
-                'flow_type',
-                'is_input'
-            ]
-            # Add a column for each basin
-            region_columns = list(region_sheets_dict.keys())
-            for r in region_columns:
-                final_table[r] = 0
+    # Merge basin data with LCI dataset
+    ng_lci_mapped = pd.merge(
+        ng_lci_stack,
+        ng_generation_data_mapped,
+        left_on = 'Basin',
+        right_on = 'NG_LCI_Name',
+        how='left'
+    )
+    return ng_lci_mapped
 
-        # Add region emissions to final table
-        try:
-            logging.info(f"Adding emissions for {region}")
-            logging.info(f"df1: {df1['FlowAmount'].head(5)}")
-            final_table[region] = df1['FlowAmount']
-        except Exception as e:
-            sys.exit(
-                "Error reading sheet. "
-                "Make sure your excel file follows the correct naming "
-                "convention. For reference, refer to the source code, "
-                f"lines 70-78. Error: {e}"
-            )
 
-    # 2. Save final table to excel
-    save_ng_lci(final_table, final_table_name ,destination_path)
-    print(f"Final table saved to {destination_path}/{final_table_name}.xlsx")
+def map_ng_lci_to_plants_by_region(ng_lci, ng_generation_data_mapped):
+    """
+    Map the natural gas generation data by basin.
+    """
+    ng_lci_columns=[
+        "Compartment",
+        "FlowName",
+        "FlowUUID",
+        "Unit",
+        "FlowType",
+        "input",
+        "Region",
+        "FlowAmount"
+    ]
+    ng_lci_stack = pd.DataFrame(ng_lci.stack()).reset_index()
+    ng_lci_stack.columns=ng_lci_columns
 
-    return final_table
+    # Merge basin data with LCI dataset
+    ng_lci_mapped = pd.merge(
+        ng_lci_stack,
+        ng_generation_data_mapped,
+        left_on = 'Region',
+        right_on = 'NG_LCI_Region',
+        how='left'
+    )
+    return ng_lci_mapped
 
 
 def read_region_data(excel_file_path, sheet_name):
@@ -729,125 +845,11 @@ def save_ng_lci(df, filename, destination_path):
     df.to_csv(full_path, index=False)
 
 
-def correct_netl_flow_names(df, flow_mapping_path, amount_col="FlowAmount"):
-    """A helper method that replaces NETL air, water, and ground emissions
-    with Federal Elementary Flow List equivalents based on a subset of
-    flows defined in USEPA's eLCI mapping using the Python package
-    `fedelemflowlist <https://github.com/USEPA/fedelemflowlist>`_
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-        A life cycle inventory data frame with columns, 'FlowName',
-        'Compartment', 'Unit', and ``amount_col``.
-    amount_col : str, optional
-        The column title representing the flow amount, by default "FlowAmount"
-
-    Returns
-    -------
-    pandas.DataFrame
-        A new data frame with the same number of rows and columns as the
-        sent data frame. Flow names, compartments, units, and flow amounts
-        are updated based on emissions matches with the FEDEFL. All unmatched
-        flows are returned 'as is'. If FlowUUID was not in the column list,
-        it is created; otherwise, the matched UUIDs are updated.
-    """
-    # This data frame has about 4k source flow names and contexts associated
-    # with NETL unit process models (e.g., petro, nuclear, coal).
-    flow_mapping = pd.read_csv(flow_mapping_path, encoding='ISO-8859-1')
-
-    # Matching occurs on name, compartment and units; help this along by
-    # lowering the case (improves coal UP matches from 10% to 42%).
-    df["FlowName_orig"] = df["FlowName"]
-    df["Compartment_orig"] = df["Compartment"]
-    df["FlowName"] = df["FlowName"].str.lower().str.rstrip()
-    df["Compartment"] = df["Compartment"].str.lower().str.rstrip()
-
-    # In the map, also lower-case names and compartments and remove trailing
-    # space; note this introduces duplicate entries in the map, so remove them.
-    # The duplicates are from later entries, so ignore mapper, verifier and
-    # last updated cols when searching for duplicates. [250917; TWD]
-    flow_mapping['SourceFlowName'] = flow_mapping[
-        'SourceFlowName'].str.lower().str.rstrip()
-    flow_mapping['SourceFlowContext'] = flow_mapping[
-        "SourceFlowContext"].str.lower().str.rstrip()
-    ignore_cols = ['Mapper', 'Verifier', 'LastUpdated']
-    flow_mapping = flow_mapping.drop_duplicates(
-        subset=[x for x in flow_mapping.columns if x not in ignore_cols]
-    )
-
-    # Some compartments in NETL UPs are complex (e.g., 'Emission to water/fresh
-    # water'), but are listed simply in the FEDEFL eLCI mapper (e.g., 'emission/
-    # water'). Improves coal mining UP matches from 42% to 62%.
-    is_emission = df['input'] == False
-    is_water = df['Compartment'].str.contains('water')
-    is_air = df['Compartment'].str.contains('air')
-    is_ground = df['Compartment'].str.contains('ground')
-
-    df.loc[is_emission * is_water, 'Compartment'] = 'emission/water'
-    df.loc[is_emission * is_air, 'Compartment'] = 'emission/air'
-    df.loc[is_emission * is_ground, 'Compartment'] = 'emission/ground'
-
-    # HOTFIX: Map against source units [250205; TWD]
-    # For coal mining, reduces matches from >62% to <62% (about 2k less rows)
-    logging.info("Mapping emissions to FEDEFL")
-    mapped_df = pd.merge(
-        df,
-        flow_mapping,
-        left_on=["FlowName", "Compartment", "Unit"],
-        right_on=["SourceFlowName", "SourceFlowContext", "SourceUnit"],
-        how="left",
-    )
-
-    # If TargetFlowName is present, there was a match.
-    is_match = mapped_df["TargetFlowName"].notnull()
-    logging.info("Correcting %d NETL flows" % is_match.sum())
-
-    # Quality Check (coal_df)
-    #   Check that target unit matches source unit.
-    #   No! Hydrogen, Uranium, and Lead-210/kg have mis-matched units.
-    #   Therefore, unit conversions are necessary.
-
-    # Return flow names and compartments back to their original values.
-    df["FlowName"] = df["FlowName_orig"]
-    df["Compartment"] = df["Compartment_orig"]
-    del df['FlowName_orig']      # use this syntax since you're editing
-    del df['Compartment_orig']   # a reference object that isn't returned
-    mapped_df['FlowName'] = mapped_df['FlowName_orig']
-    mapped_df["Compartment"] = mapped_df["Compartment_orig"]
-    mapped_df = mapped_df.drop(columns=['FlowName_orig', 'Compartment_orig'])
-
-    # Replace FlowName, Unit, and Compartment with new names (where matched)
-    mapped_df.loc[is_match, "FlowName"] = mapped_df.loc[
-        is_match, "TargetFlowName"]
-    mapped_df.loc[is_match, "Compartment"] = mapped_df.loc[
-        is_match, "TargetFlowContext"]
-    mapped_df.loc[is_match, "Unit"] = mapped_df.loc[is_match, "TargetUnit"]
-
-    # Correct values using the conversion factor
-    mapped_df.loc[is_match, amount_col] *= mapped_df.loc[
-        is_match, 'ConversionFactor']
-
-    if 'FlowUUID' in mapped_df.columns:
-        # Update existing values with new UUIDs
-        mapped_df.loc[is_match, 'FlowUUID'] = mapped_df.loc[
-            is_match, 'TargetFlowUUID']
-    else:
-        # Set UUIDs to target values
-        mapped_df = mapped_df.rename(columns={"TargetFlowUUID": "FlowUUID"})
-
-    # Drop all unneeded cols
-    drop_cols = [x for x in flow_mapping.columns if x in mapped_df.columns]
-    mapped_df = mapped_df.drop(columns=drop_cols)
-
-    return mapped_df
-
-
 ##############################################################################
 # MAIN
 ##############################################################################
 if __name__=='__main__':
     from electricitylci.globals import output_dir
-    year=2016
+    year = 2016
     df = generate_upstream_ng(year)
     df.to_csv(output_dir+'/ng_emissions_{}.csv'.format(year))

From 99128dbd2641d28a96a4b8382b6113a7907d53bf Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:43:12 -0500
Subject: [PATCH 22/24] fix comments

---
 electricitylci/combinator.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/electricitylci/combinator.py b/electricitylci/combinator.py
index 6942eb02..40f285b0 100644
--- a/electricitylci/combinator.py
+++ b/electricitylci/combinator.py
@@ -588,7 +588,7 @@ def concat_clean_upstream_and_plant(pl_df, up_df):
     # 3/19/2025 MBJ: more memory management. When this process is called from
     # __init__.combine_upstream_and_gen_df the up_df is 12GB big. Previously
     # we used a merge to add all the regional columns, but that requires a
-    # tremendous amount of memory. Invidually assigning columns will be a bit
+    # tremendous amount of memory. Individually assigning columns will be a bit
     # slower but will greatly reduce memory usage...and ultimately end up
     # faster if your computer tends to run out of memory using the previous
     # merge.
@@ -598,10 +598,10 @@ def concat_clean_upstream_and_plant(pl_df, up_df):
     # HOTFIX: during the merge, a lot eGRID_IDs are unmatched, so fill them in!
     # NOTE: triggers a pandas futurewarning on downcasting object datatypes.
     # 3/19/2025 - these would be instances where there is a plant_id in up_df
-    # but not a matching eGRID_ID. With the new, by-column mapping performed above
-    # eGRID_ID does not exist so no Nans to fill. In previous versions, I believe
-    # the use of fillnans with plant_id being the source would result in the
-    # same thing as below.
+    # but not a matching eGRID_ID. With the new, by-column mapping performed
+    # above eGRID_ID does not exist so no Nans to fill. In previous versions,
+    # I believe the use of fillnans with plant_id being the source would result
+    # in the same thing as below.
     up_df['eGRID_ID'] = up_df['plant_id'].astype("int")
 
     # NOTE: the only columns in up_df not in pl_df should be:

From 15c4c972cb2bd4224396a68245bdffbd6c758c8b Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Thu, 4 Dec 2025 16:43:49 -0500
Subject: [PATCH 23/24] update language for NG model parameter

---
 electricitylci/modelconfig/ELCI_1_config.yml    | 11 ++++++-----
 electricitylci/modelconfig/ELCI_2020_config.yml | 11 ++++++-----
 electricitylci/modelconfig/ELCI_2021_config.yml | 11 ++++++-----
 electricitylci/modelconfig/ELCI_2022_config.yml | 11 ++++++-----
 electricitylci/modelconfig/ELCI_2023_config.yml |  9 +++++----
 electricitylci/modelconfig/ELCI_2024_config.yml | 11 ++++++-----
 electricitylci/modelconfig/ELCI_2_config.yml    | 11 ++++++-----
 electricitylci/modelconfig/ELCI_3_config.yml    | 11 ++++++-----
 8 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/electricitylci/modelconfig/ELCI_1_config.yml b/electricitylci/modelconfig/ELCI_1_config.yml
index 80d7b3d5..ff30a42b 100644
--- a/electricitylci/modelconfig/ELCI_1_config.yml
+++ b/electricitylci/modelconfig/ELCI_1_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2020
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2016
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_2020_config.yml b/electricitylci/modelconfig/ELCI_2020_config.yml
index fbd4875e..2effc7a9 100644
--- a/electricitylci/modelconfig/ELCI_2020_config.yml
+++ b/electricitylci/modelconfig/ELCI_2020_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2020
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_2021_config.yml b/electricitylci/modelconfig/ELCI_2021_config.yml
index c39bdc8e..4fc8d5a9 100644
--- a/electricitylci/modelconfig/ELCI_2021_config.yml
+++ b/electricitylci/modelconfig/ELCI_2021_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2020
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_2022_config.yml b/electricitylci/modelconfig/ELCI_2022_config.yml
index 94d53947..76046f1a 100644
--- a/electricitylci/modelconfig/ELCI_2022_config.yml
+++ b/electricitylci/modelconfig/ELCI_2022_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2020
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_2023_config.yml b/electricitylci/modelconfig/ELCI_2023_config.yml
index c84b2337..17d3eca2 100644
--- a/electricitylci/modelconfig/ELCI_2023_config.yml
+++ b/electricitylci/modelconfig/ELCI_2023_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
-# NG baseline year
-# This is used to determine which NG inventory to use.
-# This impacts the upstream ng emissions for production, gathering and boosting,
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
 # processing, transmission, storage, and distribution.
-# Select between 2016 and 2020 model inventories.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2020
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_2024_config.yml b/electricitylci/modelconfig/ELCI_2024_config.yml
index c8caedce..4af82864 100644
--- a/electricitylci/modelconfig/ELCI_2024_config.yml
+++ b/electricitylci/modelconfig/ELCI_2024_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2023
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2020
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_2_config.yml b/electricitylci/modelconfig/ELCI_2_config.yml
index d0a56093..0f4aba4e 100644
--- a/electricitylci/modelconfig/ELCI_2_config.yml
+++ b/electricitylci/modelconfig/ELCI_2_config.yml
@@ -31,11 +31,12 @@ replace_egrid: true
 # construction LCI.
 coal_model_year: 2020
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2016
 
 # NETL developed profiles for renewable generation to capture construction
diff --git a/electricitylci/modelconfig/ELCI_3_config.yml b/electricitylci/modelconfig/ELCI_3_config.yml
index 51332d55..d5e32c86 100644
--- a/electricitylci/modelconfig/ELCI_3_config.yml
+++ b/electricitylci/modelconfig/ELCI_3_config.yml
@@ -31,11 +31,12 @@ replace_egrid: false
 # construction LCI.
 coal_model_year: 2020
 
-# NG baseline year
-# this is used to determine which NG inventory to use. 
-# this impacts the upstream ng emissions for production, gathering and boosting,
-# processing, transmission, storage, and distribution
-# Select between 2016 and 2020 model inventories.
+# The NG baseline model year is used to determine which NG inventory to use.
+# This impacts the upstream NG emissions for production, gathering and boosting,
+# processing, transmission, storage, and distribution.
+# Select between 2016 and 2020 model inventories:
+#   2016 model: https://doi.org/10.2172/1529553
+#   2020 model: https://doi.org/10.18141/2568690
 ng_model_year: 2016
 
 # NETL developed profiles for renewable generation to capture construction

From e49c2caa7b081a28c18ed6f1ceb027e1e7672b76 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Wed, 10 Dec 2025 11:46:03 -0500
Subject: [PATCH 24/24] hotfix setup for v2.1

---
 setup.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index d5af78f0..bd6d51e0 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='electricitylci',
-    version='2.0.0',
+    version='2.1.0',
     packages=['electricitylci'],
     package_data={
         'electricitylci': ["data/*.*",
@@ -16,13 +16,13 @@
                            "output/.gitignore",
                            ]
     },
-    url='https://github.com/USEPA/ElectricityLCI',
+    url='https://github.com/NETL-RIC/ElectricityLCI',
     license='CC0',
-    author='Matt Jamieson, Wesley Ingwersen, Greg Schively, TJ Ghosh, Ben Young, Troy Hottle',
-    author_email='ingwersen.wesley@epa.gov',
-    description='Create life cycle inventory data for regionalized electricity generation, mix of generation, mix of consumption, and distribution to the end-user in the United States.',
+    author='Tyler W. Davis, Matthew Jamieson, Wesley W. Ingwersen, Greg Schivley, Ben Young, Tapajyoti Ghosh, Jing Li, Shirley Sam, Daniel Lee Young, Michael Srocka, and Troy A. Hottle',
+    author_email='Mathew.Jamieson@netl.doe.gov',
+    description='A Python package to create regionalized life cycle inventory models of U.S. electricity generation, consumption, and distribution using standardized facility and generation data for use with open-source LCA software.',
     install_requires=[
-        'fedelemflowlist @ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List#egg=fedelemflowlist',
+        'fedelemflowlist @ git+https://github.com/FLCAC-Admin/fedelemflowlist',
         'StEWI @ git+https://github.com/USEPA/standardizedinventories#egg=StEWI',
         'scipy>=1.10',
         ],
@@ -32,7 +32,7 @@
         "Environment :: IDE",
         "Intended Audience :: Science/Research",
         "License :: CC0",
-        "Programming Language :: Python :: 3.x",
+        "Programming Language :: Python :: 3.12",
         "Topic :: Utilities",
     ]
 )