From 7f93665256504b9a344ef9e438e7a2403a8e5792 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 14:26:54 +0200 Subject: [PATCH 1/8] Add a quick start to the docs This merge also includes a small fix to an example in mesh conversion. --- conda_package/docs/index.rst | 13 ++-- conda_package/docs/mesh_conversion.rst | 2 +- conda_package/docs/quick_start.rst | 90 ++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 conda_package/docs/quick_start.rst diff --git a/conda_package/docs/index.rst b/conda_package/docs/index.rst index fa692db9c..a741d4520 100644 --- a/conda_package/docs/index.rst +++ b/conda_package/docs/index.rst @@ -8,17 +8,18 @@ MPAS-Tools MPAS-Tools includes a python package, compiled Fortran, C and C++ tools, and scripts for supporting initialization, visualization and analysis of Model for Prediction Across Scales (MPAS) components. These tools are used by the -`COMPASS `_ -(Configuring of MPAS Setups) framework within -`MPAS-Model `_ used to create -ocean and land-ice test cases, -the `MPAS-Analysis `_ package for -analyzing simulations, and in other MPAS-related workflows. +`Polaris `_ and +`Compass `_ packages used to create ocean, +sea-ice, and land-ice test cases; the +`MPAS-Analysis `_ package for +analyzing simulations; and in other MPAS-related workflows. .. toctree:: :caption: User's Guide :maxdepth: 2 + quick_start + mesh_creation mesh_conversion interpolation diff --git a/conda_package/docs/mesh_conversion.rst b/conda_package/docs/mesh_conversion.rst index 414495be9..02dc915ca 100644 --- a/conda_package/docs/mesh_conversion.rst +++ b/conda_package/docs/mesh_conversion.rst @@ -141,7 +141,7 @@ The same example in a python script can be accomplished with: dsBaseMesh = xarray.open_dataset('base_mesh.nc') dsLandMask = mask(dsBaseMesh, fcMask=fcLandCoverage) - dsCulledMesh = conversion.cull(dsBaseMesh, dsMask=dsLandMask) + dsCulledMesh = cull(dsBaseMesh, dsMask=dsLandMask) write_netcdf(dsCulledMesh, 'culled_mesh.nc') Here is the full usage of ``MpasCellCuller.x``: diff --git a/conda_package/docs/quick_start.rst b/conda_package/docs/quick_start.rst new file mode 100644 index 000000000..45f2b031f --- /dev/null +++ b/conda_package/docs/quick_start.rst @@ -0,0 +1,90 @@ +.. _quick_start: + +*********** +Quick Start +*********** + +This guide will help you get started with `mpas_tools` as quickly as possible. + +Installing Conda (Miniforge) +============================ + +If you do not already have conda installed, we recommend installing +`Miniforge `_. +This is a lightweight conda installer that default to the conda-forge channel. + +To install Miniforge: + +1. Download the installer for your platform from the + `Miniforge releases page `_. + +2. Run the installer following the instructions for your operating system. + +3. After installation, initialize conda in your shell (if prompted and if + desired). + +Installing mpas_tools from conda-forge +====================================== + +Once you have conda available, you can install `mpas_tools` into a new +environment: + +.. code-block:: bash + + conda create -n mpas_tools_env -c conda-forge mpas_tools + conda activate mpas_tools_env + +This will create and activate a new environment called `mpas_tools_env` with +the latest version of `mpas_tools` and its dependencies. + +Verifying the Installation +========================== + +To verify that `mpas_tools` is installed correctly, try importing it in Python: + +.. code-block:: python + + import mpas_tools + print(mpas_tools.__version__) + +You can also check that command-line tools are available, for example: + +.. code-block:: bash + + planar_hex --help + +Basic Usage +=========== + +A simple example of using `mpas_tools` to create a planar hexagonal mesh +that is periodic in one dimension, then cull out the boundary cells (to +remove periodicity in x) and finally to ensure that the mesh has been +converted to the proper MPAS format: + +.. code-block:: python + + from mpas_tools.planar_hex import make_planar_hex_mesh + from mpas_tools.mesh.conversion import convert, cull + from mpas_tools.io import write_netcdf + + ds = make_planar_hex_mesh(nx=4, ny=4, dc=10e3, nonperiodic_x=True, + nonperiodic_y=False) + + ds = cull(ds) + ds = convert(ds) + write_netcdf(ds, 'mesh.nc') + +The same can be accomplished with the command-line tool: +.. code-block:: bash + + planar_hex --nx 4 --ny 4 --dc 10000 --nonperiodic_x --output base_mesh.nc + MpasCellCuller.x base_mesh.nc culled_mesh.nc + MpasMeshConverter.x culled_mesh.nc final_mesh.nc + +Further Reading +=============== + +- :ref:`api_reference` +- :ref:`mesh_creation` +- :ref:`mesh_interpolation` +- :ref:`mesh_interpolation` From 74ff9246fb7ad1022be5ebd4f7bb92ce91cc75ec Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 14:57:45 +0200 Subject: [PATCH 2/8] Add documentation on the release process --- conda_package/docs/index.rst | 1 + conda_package/docs/releasing.rst | 85 ++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 conda_package/docs/releasing.rst diff --git a/conda_package/docs/index.rst b/conda_package/docs/index.rst index a741d4520..6d1d4a9ee 100644 --- a/conda_package/docs/index.rst +++ b/conda_package/docs/index.rst @@ -79,6 +79,7 @@ analyzing simulations; and in other MPAS-related workflows. making_changes testing_changes building_docs + releasing api diff --git a/conda_package/docs/releasing.rst b/conda_package/docs/releasing.rst new file mode 100644 index 000000000..8def86481 --- /dev/null +++ b/conda_package/docs/releasing.rst @@ -0,0 +1,85 @@ +.. _dev_releasing: + +*********************** +Releasing a New Version +*********************** + +This document describes the steps for maintainers to tag and release a new +version of ``mpas_tools``, and to update the conda-forge feedstock. + +Version Bump and Dependency Updates +=================================== + +1. **Update the Version Number** + + - Open a pull request (PR) to update the version number in the following + two files: + - ``conda_package/mpas_tools/__init__.py`` + - ``conda_package/recipe/meta.yaml`` + + - Make sure the version follows [semantic versioning](https://semver.org/). + +2. **Check and Update Dependencies** + + - Ensure that dependencies and their constraints are up-to-date and + consistent in: + - ``conda_package/recipe/meta.yaml`` (dependencies for the conda-forge + release) + - ``conda_package/pyproject.toml`` (dependencies for PyPI; used as a + sanity check) + - ``conda_package/dev-spec.txt`` (development dependencies; should be a + superset of those for the conda-forge release) + + - The dependencies in ``meta.yaml`` are the ones that will be used for the + released package on conda-forge. The dependencies in ``pyproject.toml`` + are for PyPI and should be kept in sync as much as possible but are only + there as a sanity check when we run ```pip check``. The ``dev-spec.txt`` + file should include all dependencies needed for development and testing. + + - Review and update dependency versions and constraints as needed. + +3. **Make a PR and merge it** + +Tagging and Publishing a Release +================================ + +3. **Tag the Release on GitHub** + + - Go to https://github.com/MPAS-Dev/MPAS-Tools/releases and click on + "Draft a new release". + - Enter the appropriate tag for the release, following semantic versioning + (e.g., ``1.2.3``; **do not** include a ``v`` in front). + - Enter a release title (typically the release version **with** a ``v`` in + front, e.g., ``v1.2.3``). + - Write a description and/or use the "Generate release notes" button to + auto-generate release notes. + - If the release is ready, click "Publish release". Otherwise, save it as a + draft. + +Updating the conda-forge Feedstock +================================== + +4. **Update the conda-forge Feedstock** + + - After the release is published, update and merge a PR for the new release + at the conda-forge feedstock: + https://github.com/conda-forge/mpas_tools-feedstock + + - The conda-forge bot should automatically create a pull request for the + new version within a few hours to a day after the release. + + - Compare the dependencies in the new release to those in the previous + release and update the recipe as needed. To do this: + - Find the most recent release at + https://github.com/MPAS-Dev/MPAS-Tools/releases + - Use the "Compare" feature to select the previous release. + - Under "changed files", locate ``conda_package/recipe/meta.yaml`` to see + any dependency changes. + + - Review and update the feedstock PR as needed, then merge it. + + - If you are not already a maintainer of the feedstock, you can request to + be added by creating a new issue at + https://github.com/conda-forge/mpas_tools-feedstock/issues, choosing + "Bot command", and putting + ``@conda-forge-admin, please add user @username`` as the subject. From 1e8169929cfe373fc3a436cfd515d1c0c42cbb85 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 14:58:04 +0200 Subject: [PATCH 3/8] Add note about me leaving --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index be5a67832..ead39ba5b 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,11 @@ should live in it's own directory. ## Documentation -The latest documentation for the conda package `mpas-tools` can be found here: +The latest documentation for the conda package `mpas_tools` can be found here: [http://mpas-dev.github.io/MPAS-Tools/master/](http://mpas-dev.github.io/MPAS-Tools/master/) Many tools are not in the conda package, and documentation (sometimes fairly limited) is available at the beginning of each script. + +**Note**: as of Sep 2025, [@xylar](https://github.com/xylar/) who has been the main maintainer of the conda package will no longer be working on the project. As a result updates to the `mpas_tools` conda package may be limited after that date. From 80c6825fbcbce976daa1ab6c139aa05e02b7d9f5 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 15:25:49 +0200 Subject: [PATCH 4/8] Reformat and improve docs for `config` module --- conda_package/docs/config.rst | 40 ++++++++++- conda_package/mpas_tools/config.py | 108 ++++++++++++++++++++++------- 2 files changed, 120 insertions(+), 28 deletions(-) diff --git a/conda_package/docs/config.rst b/conda_package/docs/config.rst index 06f1d36e1..f6df9a1b3 100644 --- a/conda_package/docs/config.rst +++ b/conda_package/docs/config.rst @@ -28,11 +28,48 @@ default behavior. In some cases, you would like the code to add the config options if the config file exists and do nothing if it does not (``exception=False``). -Ihe ``MpasConfigParser`` class also includes methods for adding a user +The ``MpasConfigParser`` class also includes methods for adding a user config file, :py:meth:`mpas_tools.config.MpasConfigParser.add_user_config()`, and other config files by file name, :py:meth:`mpas_tools.config.MpasConfigParser.add_from_file()`. +.. note:: + The ``MpasConfigParser`` class also provides methods for combining config + files, listing contributing files, and merging config parsers: + + +----------------------+---------------------------------------------------+ + | Method | Purpose | + +======================+===================================================+ + | add_from_file | Add a config file by filename | + +----------------------+---------------------------------------------------+ + | add_user_config | Add a user config file (highest precedence) | + +----------------------+---------------------------------------------------+ + | add_from_package | Add a config file from a Python package | + +----------------------+---------------------------------------------------+ + | get, getint, ... | Retrieve config values | + +----------------------+---------------------------------------------------+ + | getlist, getexpression | Retrieve values as lists or Python expressions | + +----------------------+---------------------------------------------------+ + | set | Set a config value, with optional comment | + +----------------------+---------------------------------------------------+ + | copy | Deep copy the config parser | + +----------------------+---------------------------------------------------+ + | append, prepend | Merge another config parser (priority control) | + +----------------------+---------------------------------------------------+ + | list_files | List all contributing config files | + +----------------------+---------------------------------------------------+ + +Example usage +------------- + +.. code-block:: python + + from mpas_tools.config import MpasConfigParser + config = MpasConfigParser() + config.add_from_file('defaults.cfg') + config.add_user_config('user.cfg') + value = config.get('section', 'option') + The :py:meth:`mpas_tools.config.MpasConfigParser.copy()` method can be used to make a deep copy of the config parser. This is useful in cases where config options should be added or modified without affecting the original config @@ -90,4 +127,3 @@ The comments can be any number of lines. Inline comments (after a config option on the same line) are not allowed and will be parsed as part of the config option itself. - diff --git a/conda_package/mpas_tools/config.py b/conda_package/mpas_tools/config.py index 26a3cb326..221341e55 100644 --- a/conda_package/mpas_tools/config.py +++ b/conda_package/mpas_tools/config.py @@ -1,11 +1,12 @@ -from configparser import RawConfigParser, ConfigParser, ExtendedInterpolation -import os +import ast import inspect +import os import sys -import numpy as np -import ast -from io import StringIO +from configparser import ConfigParser, ExtendedInterpolation, RawConfigParser from importlib.resources import files as imp_res_files +from io import StringIO + +import numpy as np class MpasConfigParser: @@ -16,6 +17,13 @@ class MpasConfigParser: options to always take precedence over other config options (even if they are added later). + Example + ------- + >>> config = MpasConfigParser() + >>> config.add_from_file('default.cfg') + >>> config.add_user_config('user.cfg') + >>> value = config.get('section', 'option') + Attributes ---------- combined : {None, configparser.ConfigParser} @@ -28,9 +36,17 @@ class MpasConfigParser: The source of each section or option """ - _np_allowed = dict(linspace=np.linspace, xrange=range, - range=range, array=np.array, arange=np.arange, - pi=np.pi, Pi=np.pi, int=int, __builtins__=None) + _np_allowed = dict( + linspace=np.linspace, + xrange=range, + range=range, + array=np.array, + arange=np.arange, + pi=np.pi, + Pi=np.pi, + int=int, + __builtins__=None, + ) def __init__(self): """ @@ -222,15 +238,17 @@ def getexpression(self, section, option, dtype=None, use_numpyfunc=False): If ``True``, the expression is evaluated including functionality from the numpy package (which can be referenced either as ``numpy`` or ``np``). - """ + """ # noqa: E501 expression_string = self.get(section, option) if use_numpyfunc: - assert '__' not in expression_string, \ - f'"__" is not allowed in {expression_string} ' \ + assert '__' not in expression_string, ( + f'"__" is not allowed in {expression_string} ' f'for use_numpyfunc=True' - sanitized_str = expression_string.replace('np.', '') \ - .replace('numpy.', '') + ) + sanitized_str = expression_string.replace('np.', '').replace( + 'numpy.', '' + ) result = eval(sanitized_str, MpasConfigParser._np_allowed) else: result = ast.literal_eval(expression_string) @@ -405,8 +423,9 @@ def copy(self): config_copy._configs[filename] = MpasConfigParser._deepcopy(config) for filename, config in self._user_config.items(): - config_copy._user_config[filename] = \ - MpasConfigParser._deepcopy(config) + config_copy._user_config[filename] = MpasConfigParser._deepcopy( + config + ) config_copy._comments = dict(self._comments) return config_copy @@ -497,15 +516,17 @@ def combine(self, raw=False): for source, config in configs.items(): for section in config.sections(): if section in self._comments[source]: - self.combined_comments[section] = \ - self._comments[source][section] + self.combined_comments[section] = self._comments[ + source + ][section] if not self.combined.has_section(section): self.combined.add_section(section) for option, value in config.items(section): self.sources[(section, option)] = source self.combined.set(section, option, value) - self.combined_comments[(section, option)] = \ + self.combined_comments[(section, option)] = ( self._comments[source][(section, option)] + ) def _add(self, filename, user): filename = os.path.abspath(filename) @@ -527,7 +548,23 @@ def _add(self, filename, user): @staticmethod def _parse_comments(fp, filename, comments_before=True): - """ Parse the comments in a config file into a dictionary """ + """ + Parse the comments in a config file into a dictionary. + + Parameters + ---------- + fp : file-like + Open file pointer to the config file. + filename : str + Name of the config file (for error messages). + comments_before : bool, optional + If True, associate comments before a section/option with that item. + + Returns + ------- + comments : dict + Mapping of (section, option) or section to comment strings. + """ comments = dict() current_comment = '' section_name = None @@ -546,8 +583,11 @@ def _parse_comments(fp, filename, comments_before=True): cur_indent_level = len(line) - len(line.lstrip()) is_continuation = cur_indent_level > indent_level # a section header or option header? - if section_name is None or option_name is None or \ - not is_continuation: + if ( + section_name is None + or option_name is None + or not is_continuation + ): indent_level = cur_indent_level # is it a section header? is_section = value.startswith('[') and value.endswith(']') @@ -556,8 +596,9 @@ def _parse_comments(fp, filename, comments_before=True): if option_name is None: comments[section_name] = current_comment else: - comments[(section_name, option_name)] = \ + comments[(section_name, option_name)] = ( current_comment + ) section_name = value[1:-1].strip() option_name = None @@ -568,15 +609,18 @@ def _parse_comments(fp, filename, comments_before=True): else: delimiter_index = value.find('=') if delimiter_index == -1: - raise ValueError(f'Expected to find "=" on line ' - f'{line_number} of {filename}') + raise ValueError( + f'Expected to find "=" on line ' + f'{line_number} of {filename}' + ) if not comments_before: if option_name is None: comments[section_name] = current_comment else: - comments[(section_name, option_name)] = \ + comments[(section_name, option_name)] = ( current_comment + ) option_name = value[:delimiter_index].strip().lower() @@ -588,7 +632,19 @@ def _parse_comments(fp, filename, comments_before=True): @staticmethod def _deepcopy(config): - """ Make a deep copy of the ConfigParser object """ + """ + Make a deep copy of the ConfigParser object. + + Parameters + ---------- + config : configparser.ConfigParser + The config parser to copy. + + Returns + ------- + new_config : configparser.ConfigParser + A deep copy of the config parser. + """ config_string = StringIO() config.write(config_string) # We must reset the buffer to make it ready for reading. From 99a8d72906cfa5cbad86219898fb39d68f14ddff Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 15:41:37 +0200 Subject: [PATCH 5/8] Fix formatting and update docstrings and docs for mpas_tools.mesh.mask --- conda_package/docs/mesh_conversion.rst | 155 ++++++++++++---------- conda_package/mpas_tools/mesh/mask.py | 176 +++++++++++-------------- 2 files changed, 166 insertions(+), 165 deletions(-) diff --git a/conda_package/docs/mesh_conversion.rst b/conda_package/docs/mesh_conversion.rst index 02dc915ca..95d73a6e6 100644 --- a/conda_package/docs/mesh_conversion.rst +++ b/conda_package/docs/mesh_conversion.rst @@ -210,83 +210,104 @@ Here is the full usage of ``MpasMaskCreator.x``: Mask Creation with Python Multiprocessing ========================================= -The Mask Creator is a serial code, and the algorithms it uses to find points in -a region or cells, edges and vertices along a transect are not particularly -efficient or sophisticated. +The ``mpas_tools.mesh.mask`` module provides a set of Python functions for +creating region and transect masks on MPAS meshes and longitude/latitude grids. +These functions are designed to be more efficient and flexible than the legacy +serial C++ Mask Creator, especially when used with Python's multiprocessing. + +Key Functions +------------- + ++-----------------------------------------------+-------------------------------------------------------------+ +| Function | Purpose | ++===============================================+=============================================================+ +| compute_mpas_region_masks | Create region masks (polygons) on MPAS meshes | ++-----------------------------------------------+-------------------------------------------------------------+ +| compute_mpas_transect_masks | Create transect masks (lines) on MPAS meshes | ++-----------------------------------------------+-------------------------------------------------------------+ +| compute_mpas_flood_fill_mask | Create a mask by flood-filling from seed points | ++-----------------------------------------------+-------------------------------------------------------------+ +| compute_lon_lat_region_masks | Create region masks on a 2D lon/lat grid | ++-----------------------------------------------+-------------------------------------------------------------+ +| compute_projection_grid_region_masks | Create region masks on a projected (e.g., polar) grid | ++-----------------------------------------------+-------------------------------------------------------------+ + +All of these functions accept a ``pool`` argument (a ``multiprocessing.Pool``) +to parallelize the computation, which is highly recommended for large meshes or +grids. If ``pool=None``, the computation will be performed serially, which may +be slow for large datasets. + +General Usage +------------- + +The typical workflow is: + +1. Open your MPAS mesh or grid as an ``xarray.Dataset``. +2. Read a ``geometric_features.FeatureCollection`` (e.g., from a GeoJSON file). +3. Optionally, create a multiprocessing pool using + :py:func:`mpas_tools.parallel.create_pool`. +4. Call the appropriate mask creation function, passing the mesh/grid, feature + collection, and pool. +5. Write the resulting masks to a NetCDF file using + :py:func:`mpas_tools.io.write_netcdf`. + +Example: Creating Region Masks on an MPAS Mesh +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To provide better efficiency and to enable more sophisticated algorithms (now -and in the future), a set of related python functions has been developed to -provide much (but not all) of the functionality of the C++ Mask Creator -described above. +.. code-block:: python -Computing MPAS Region Masks ---------------------------- + import xarray as xr + from geometric_features import read_feature_collection + from mpas_tools.mesh.mask import compute_mpas_region_masks + from mpas_tools.parallel import create_pool + from mpas_tools.io import write_netcdf -The function :py:func:`mpas_tools.mesh.mask.compute_mpas_region_masks()` -or the ``compute_mpas_region_masks`` command-line tool can -be used to create region masks on cells, edges and/or vertices given an MPAS -mesh :py:class:`xarray.Dataset` ``dsMesh`` and a -:py:class:`geometric_features.FeatureCollection` ``fcMask`` containing regions. -The resulting masks, in the variable ``regionCellMasks``, are 1 where the center -of the polygon corresponding to the cell, edge or vertex (see the -`MPAS Mesh Specification `_) -are inside the given region and 0 where they are outside. This function is -far more useful if the user provides a :py:class:`multiprocessing.Pool` in the -``pool`` argument. ``pool`` should be created at the beginning of the calling -code (when memory usage is small), possibly with -:py:func:`mpas_tools.parallel.create_pool()`, and terminated -(:py:meth:`multiprocessing.Pool.terminate`) before the code has finished. -The same pool can be used in multiple calls to this and the other Python-based -masking functions. If ``pool = None`` (the default), the masks are computed in -serial, which will likely be frustratingly slow. - -The ``chunkSize`` argument can be used to control how much work (how many cells, -edges or vertices) each process computes on at one time. A very small -``chunkSize`` will incur a high overhead, while a very large ``chunkSize`` will -lead to poor load balancing and infrequent progress updates (if -``showProgress = True``). The default ``chunkSize`` of 1000 seems to perform -well across a wide variety of mesh sizes and processor counts. - -It is a good idea to provide a ``logger`` (see :ref:`logging`) to get some -output as the mask creation is progressing. - -For efficiency, large shapes (e.g. the global coastline) are divided into -smaller "tiles". This subdivision is controlled with ``subdivisionThreshold``, -which should be set to a minimum size in degrees (latitude or longitude). If -a shape is larger than this, it will be divided into tiles. The underlying -algorithm first check bounding boxes of the resulting shapes against points -before performing the more time-consuming step of determining if the point is -inside the shape. The default value of 30 degrees performs much better than -no subdivision for large shapes (again, such as the global coastline), but -alternative values have not yet been explored. + dsMesh = xr.open_dataset('mesh.nc', decode_cf=False, decode_times=False) + fcMask = read_feature_collection('regions.geojson') + pool = create_pool(process_count=8) + dsMasks = compute_mpas_region_masks( + dsMesh, fcMask, maskTypes=('cell', 'vertex'), pool=pool + ) + write_netcdf(dsMasks, 'region_masks.nc') -The resulting variables are: +Arguments and Options +--------------------- - - ``regionCellMasks(nCells, nRegions)`` - a cell mask (1 for inside and 0 for - outside the region) for each region - - ``regionEdgeMasks(nEdges, nRegions)`` - an edge mask for each region - - ``regionVertexMasks(nVertices, nRegions)`` - a vertex mask for each region - - ``regionNames(nRegions, string64)`` - the names of the regions +All mask creation functions share several common arguments: -NetCDF fill values are used for invalid mask values, so ``nCellsInRegion``, -etc. are not produced. +- ``logger``: Optional logger for progress output. +- ``pool``: Optional multiprocessing pool for parallel computation. +- ``chunkSize``: Number of points to process per chunk (default: 1000). +- ``showProgress``: Whether to display a progress bar. +- ``subdivisionThreshold`` or ``subdivisionResolution``: Controls subdivision + of large polygons or transects for efficiency. -The command-line tool takes the following arguments: +Refer to the Python docstrings or the command-line ``--help`` output for +details on each function's arguments. -.. code-block:: +Performance Note +---------------- - $ compute_mpas_region_masks --help - usage: compute_mpas_region_masks [-h] -m MESH_FILE_NAME -g GEOJSON_FILE_NAME - -o MASK_FILE_NAME - [-t MASK_TYPES [MASK_TYPES ...]] - [-c CHUNK_SIZE] [--show_progress] - [-s SUBDIVISION] - [--process_count PROCESS_COUNT] - [--multiprocessing_method MULTIPROCESSING_METHOD] +For large meshes or grids, using a multiprocessing pool (via the ``pool`` +argument) is strongly recommended for reasonable performance. The pool should +be created early in your script, before large objects are loaded into memory, +and terminated when no longer needed. - optional arguments: - -h, --help show this help message and exit - -m MESH_FILE_NAME, --mesh_file_name MESH_FILE_NAME +Extensibility and Limitations +----------------------------- + +- The masking functions are extensible and can be adapted for new types of + features or grids. +- The algorithms use the ``shapely`` library for geometric operations, which + is designed for 2D Cartesian geometry. Care is taken to handle longitude + periodicity, but there may be limitations near the poles or for very large + polygons. +- For advanced use cases (e.g., custom mask types or additional properties), + see the source code and docstrings for guidance. + +See also the API documentation for :py:mod:`mpas_tools.mesh.mask` for further details. + +See also the API documentation for :py:mod:`mpas_tools.mesh.mask` for further details. An MPAS mesh file -g GEOJSON_FILE_NAME, --geojson_file_name GEOJSON_FILE_NAME An Geojson file containing mask regions diff --git a/conda_package/mpas_tools/mesh/mask.py b/conda_package/mpas_tools/mesh/mask.py index b0506b3bb..bbefe792d 100644 --- a/conda_package/mpas_tools/mesh/mask.py +++ b/conda_package/mpas_tools/mesh/mask.py @@ -1,10 +1,31 @@ +""" +mpas_tools.mesh.mask +==================== + +Efficient creation of region and transect masks on MPAS meshes and lon/lat +grids, with support for multiprocessing. + +This module provides functions to create region masks (from polygons), transect +masks (from lines), and flood-fill masks on MPAS meshes or longitude/latitude +grids. For large datasets, it is highly recommended to use the ``pool`` +argument to parallelize the computation. + +Main functions: +- compute_mpas_region_masks +- compute_mpas_transect_masks +- compute_mpas_flood_fill_mask +- compute_lon_lat_region_masks +- compute_projection_grid_region_masks + +See function docstrings for details. +""" + import argparse from functools import partial import numpy import progressbar import shapely.geometry -import shapely.ops import xarray as xr from geometric_features import read_feature_collection from igraph import Graph @@ -34,47 +55,38 @@ def compute_mpas_region_masks( subdivisionThreshold=30.0, ): """ - Use shapely and processes to create a set of masks from a feature - collection made up of regions (polygons) + Create region masks (polygons) on an MPAS mesh. Parameters ---------- dsMesh : xarray.Dataset - An MPAS mesh on which the masks should be created + MPAS mesh dataset. fcMask : geometric_features.FeatureCollection - A feature collection containing features to use to create the mask + Feature collection containing polygon regions. maskTypes : tuple of {'cell', 'edge', 'vertex'}, optional - Which type(s) of masks to make. Masks are created based on whether - the latitude and longitude associated with each of these locations - (e.g. ``dsMesh.latCell`` and ``dsMesh.lonCell`` for ``'cell'``) are - inside or outside of the regions in ``fcMask``. + Which types of masks to create. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for progress output. pool : multiprocessing.Pool, optional - A pool for performing multiprocessing + Pool for parallel computation. Strongly recommended for large meshes. chunkSize : int, optional - The number of cells, vertices or edges that are processed in one - operation. Experimentation has shown that 1000 is a reasonable - compromise between dividing the work into sufficient subtasks to - distribute the load and having sufficient work for each thread. + Number of points to process per chunk. showProgress : bool, optional - Whether to show a progress bar + Show a progress bar. subdivisionThreshold : float, optional - A threshold in degrees (lon or lat) above which the mask region will - be subdivided into smaller polygons for faster intersection checking + Subdivide large polygons for efficiency (degrees). Returns ------- - dsMask : xarray.Dataset - The masks - + dsMasks : xarray.Dataset + Dataset containing region masks and region names. """ suffixes = {'cell': 'Cell', 'edge': 'Edge', 'vertex': 'Vertex'} @@ -272,55 +284,45 @@ def compute_mpas_transect_masks( addEdgeSign=False, ): """ - Use shapely and processes to create a set of masks from a feature - collection made up of transects (line strings) + Create transect masks (lines) on an MPAS mesh. Parameters ---------- dsMesh : xarray.Dataset - An MPAS mesh on which the masks should be created + MPAS mesh dataset. fcMask : geometric_features.FeatureCollection - A feature collection containing features to use to create the mask + Feature collection containing transects (LineString or + MultiLineString). earthRadius : float - The radius of the earth in meters + Earth radius in meters. maskTypes : tuple of {'cell', 'edge', 'vertex'}, optional - Which type(s) of masks to make. Masks are created based on whether - the latitude and longitude associated with each of these locations - (e.g. ``dsMesh.latCell`` and ``dsMesh.lonCell`` for ``'cell'``) are - inside or outside of the transects in ``fcMask``. + Which types of masks to create. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for progress output. pool : multiprocessing.Pool, optional - A pool for performing multiprocessing + Pool for parallel computation. chunkSize : int, optional - The number of cells, vertices or edges that are processed in one - operation. Experimentation has shown that 1000 is a reasonable - compromise between dividing the work into sufficient subtasks to - distribute the load and having sufficient work for each thread. + Number of points to process per chunk. showProgress : bool, optional - Whether to show a progress bar + Show a progress bar. subdivisionResolution : float, optional - The maximum resolution (in meters) of segments in a transect. If a - transect is too coarse, it will be subdivided. Pass ``None`` for no - subdivision. + Subdivide coarse transects for efficiency (meters). addEdgeSign : bool, optional - Whether to add the ``edgeSign`` variable, which requires significant - extra computation + Whether to add edge sign variable (extra computation). Returns ------- - dsMask : xarray.Dataset - The masks - + dsMasks : xarray.Dataset + Dataset containing transect masks and transect names. """ suffixes = {'cell': 'Cell', 'edge': 'Edge', 'vertex': 'Vertex'} @@ -529,35 +531,29 @@ def compute_mpas_flood_fill_mask( dsMesh, fcSeed, daGrow=None, logger=None, workers=-1 ): """ - Flood fill from the given set of seed points to create a contiguous mask. - The flood fill operates using cellsOnCell, starting from the cells - whose centers are closest to the seed points. + Create a mask by flood-filling from seed points on an MPAS mesh. Parameters ---------- dsMesh : xarray.Dataset - An MPAS mesh on which the masks should be created + MPAS mesh dataset. fcSeed : geometric_features.FeatureCollection - A feature collection containing points at which to start the flood fill + Feature collection containing seed points. daGrow : xarray.DataArray, optional - A data array of size ``nCells`` with a mask that is 1 anywhere the - flood fill is allowed to grow. The default is that the mask is all - ones. + Mask where flood fill is allowed to grow (default: all ones). logger : logging.Logger, optional - A logger for the output if not stdout + Logger for progress output. workers : int, optional - The number of threads used for finding nearest neighbors. The default - is all available threads (``workers=-1``) + Number of threads for nearest neighbor search. Returns ------- - dsMask : xarray.Dataset - The masks - + dsMasks : xarray.Dataset + Dataset containing the flood-fill mask. """ dsMasks = xr.Dataset() @@ -665,44 +661,38 @@ def compute_lon_lat_region_masks( subdivisionThreshold=30.0, ): """ - Use shapely and processes to create a set of masks from a feature - collection made up of regions (polygons) on a tensor lon/lat grid + Create region masks on a 2D longitude/latitude grid. Parameters ---------- lon : numpy.ndarray - A 1D array of longitudes in degrees between -180 and 180 + 1D array of longitudes (degrees, -180 to 180). lat : numpy.ndarray - A 1D array of latitudes in degrees between -90 and 90 + 1D array of latitudes (degrees, -90 to 90). fcMask : geometric_features.FeatureCollection - A feature collection containing features to use to create the mask + Feature collection containing polygon regions. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for progress output. pool : multiprocessing.Pool, optional - A pool for performing multiprocessing + Pool for parallel computation. chunkSize : int, optional - The number of cells, vertices or edges that are processed in one - operation. Experimentation has shown that 1000 is a reasonable - compromise between dividing the work into sufficient subtasks to - distribute the load and having sufficient work for each thread. + Number of points to process per chunk. showProgress : bool, optional - Whether to show a progress bar + Show a progress bar. subdivisionThreshold : float, optional - A threshold in degrees (lon or lat) above which the mask region will - be subdivided into smaller polygons for faster intersection checking + Subdivide large polygons for efficiency (degrees). Returns ------- - dsMask : xarray.Dataset - The masks - + dsMasks : xarray.Dataset + Dataset containing region masks and region names. """ dsMasks = xr.Dataset() @@ -896,51 +886,41 @@ def compute_projection_grid_region_masks( ydim='y', ): """ - Use shapely and processes to create a set of masks from a feature - collection made up of regions (polygons) on a projection grid such as - a polar-stereographic grid. + Create region masks on a projected (e.g., polar) grid. Parameters ---------- lon : numpy.ndarray - A 2D array of longitudes in degrees between -180 and 180 + 2D array of longitudes (degrees, -180 to 180). lat : numpy.ndarray - A 2D array of latitudes in degrees between -90 and 90 + 2D array of latitudes (degrees, -90 to 90). fcMask : geometric_features.FeatureCollection - A feature collection containing features to use to create the mask + Feature collection containing polygon regions. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for progress output. pool : multiprocessing.Pool, optional - A pool for performing multiprocessing + Pool for parallel computation. chunkSize : int, optional - The number of cells, vertices or edges that are processed in one - operation. Experimentation has shown that 1000 is a reasonable - compromise between dividing the work into sufficient subtasks to - distribute the load and having sufficient work for each thread. + Number of points to process per chunk. showProgress : bool, optional - Whether to show a progress bar + Show a progress bar. subdivisionThreshold : float, optional - A threshold in degrees (lon or lat) above which the mask region will - be subdivided into smaller polygons for faster intersection checking - - xdim : str, optional - The name of the x dimension + Subdivide large polygons for efficiency (degrees). - ydim : str, optional - The name of the y dimension + xdim, ydim : str, optional + Names of the x and y dimensions. Returns ------- dsMask : xarray.Dataset - The masks - + Dataset containing region masks and region names. """ dsMasks = xr.Dataset() From 6ee5e02e6d22be1a917a8ed381287c920083bd81 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 15:47:59 +0200 Subject: [PATCH 6/8] Update mpas_to_xdmf docs --- conda_package/docs/mpas_to_xdmf.rst | 32 ++++++ .../viz/mpas_to_xdmf/mpas_to_xdmf.py | 97 ++++++++++++++----- 2 files changed, 106 insertions(+), 23 deletions(-) diff --git a/conda_package/docs/mpas_to_xdmf.rst b/conda_package/docs/mpas_to_xdmf.rst index 08c68f14e..0581b58ca 100644 --- a/conda_package/docs/mpas_to_xdmf.rst +++ b/conda_package/docs/mpas_to_xdmf.rst @@ -4,6 +4,10 @@ MPAS to XDMF Converter ====================== +.. seealso:: + For the Python API, see + :py:class:`mpas_tools.viz.mpas_to_xdmf.mpas_to_xdmf.MpasToXdmf`. + The MPAS to XDMF Converter is a tool designed to convert MPAS output files into XDMF + HDF5 format for visualization in tools like ParaView. This converter simplifies the process of working with MPAS data by providing a format that is @@ -21,6 +25,14 @@ Usage The converter can be used via its command-line interface ``mpas_to_xdmf`` or as a Python library. +.. note:: + Special variable keys: + - ``allOnCells``: all variables with dimension ``nCells`` + - ``allOnEdges``: all variables with dimension ``nEdges`` + - ``allOnVertices``: all variables with dimension ``nVertices`` + Extra dimensions (e.g., ``nVertLevels``) can be sliced using the ``-d`` + CLI option or the ``extra_dims`` argument in Python. + Command-Line Arguments ---------------------- The following arguments are supported: @@ -165,6 +177,26 @@ ParaView for visualization. Follow these steps: By following these steps, you can ensure that your MPAS data is correctly interpreted and visualized in ParaView. +Troubleshooting +=============== +Here are some common issues and solutions: + +- **Problem**: The converted files do not open in ParaView. + **Solution**: Ensure you are opening the ``.xdmf`` file and not the ``.h5`` + file. Also, check that you have selected the correct reader (preferably + **Xdmf3 Reader T**). + +- **Problem**: Time series data does not appear in ParaView. + **Solution**: Make sure you have used the correct variable for time + (``xtime`` by default) and that your time series files are correctly + specified. Also, check that you have selected the correct reader ( + **Xdmf3 Reader T**, not **Xdmf3 Reader S**). + +- **Problem**: Slicing does not seem to work. + **Solution**: Verify the syntax of your dimension slicing in the ``-d`` + option or ``extra_dims`` argument. Ensure the dimensions you are trying to + slice exist in your data. + References ========== - `ParaView Documentation `_ diff --git a/conda_package/mpas_tools/viz/mpas_to_xdmf/mpas_to_xdmf.py b/conda_package/mpas_tools/viz/mpas_to_xdmf/mpas_to_xdmf.py index bde105b41..6f555a841 100644 --- a/conda_package/mpas_tools/viz/mpas_to_xdmf/mpas_to_xdmf.py +++ b/conda_package/mpas_tools/viz/mpas_to_xdmf/mpas_to_xdmf.py @@ -1,3 +1,35 @@ +""" +MPAS to XDMF Converter +====================== + +This module provides the :py:class:`MpasToXdmf` class and a command-line +interface for converting MPAS NetCDF files to XDMF + HDF5 format, suitable +for visualization in ParaView and other tools. + +Features +-------- +- Supports cell-, edge-, and vertex-centered variables. +- Allows slicing along extra dimensions (e.g., nVertLevels). +- Combines mesh and time series files. +- Selects variables by name or by special keys (e.g., 'allOnCells'). + +Example Usage +------------- +Python: + >>> from mpas_tools.viz.mpas_to_xdmf.mpas_to_xdmf import MpasToXdmf + >>> converter = MpasToXdmf() + >>> converter.load(mesh_filename="mesh.nc", time_series_filenames="output.*.nc", + ... variables=["temperature", "salinity"], xtime_var="xtime") + >>> converter.convert_to_xdmf(out_dir="output_dir", extra_dims={"nVertLevels": [0, 1, 2]}) + +Command line: + $ mpas_to_xdmf -m mesh.nc -t output.*.nc -v temperature salinity -o output_dir -d nVertLevels=0:3 + +See Also +-------- +- Documentation: https://mpas-dev.github.io/MPAS-Tools/latest/mpas_to_xdmf.html +""" # noqa: E501 + from mpas_tools.viz.mpas_to_xdmf.io import ( _convert_to_xdmf, _load_dataset, @@ -16,9 +48,16 @@ class MpasToXdmf: Attributes ---------- ds : xarray.Dataset - An xarray Dataset to be converted. + The dataset containing variables to convert. ds_mesh : xarray.Dataset - An xarray Dataset representing the mesh. + The mesh dataset (may be the same as ds if no time series is provided). + + Notes + ----- + - Use the `load()` method to read mesh and time series files. + - Use `convert_to_xdmf()` to write XDMF and HDF5 files. + - Special variable keys: 'allOnCells', 'allOnEdges', 'allOnVertices'. + - Extra dimensions can be sliced using the `extra_dims` argument. """ def __init__(self, ds=None, ds_mesh=None, xtime_var=None): @@ -29,15 +68,13 @@ def __init__(self, ds=None, ds_mesh=None, xtime_var=None): Parameters ---------- ds : xarray.Dataset, optional - An xarray Dataset, all variables from which will be converted. If - ds_mesh is not provided, ds must also contain the mesh variables - if it is provided. + An xarray Dataset containing variables to convert. If ds_mesh is + not provided, ds must also contain mesh variables. ds_mesh : xarray.Dataset, optional - An xarray Dataset representing the mesh. These variables will not - be converted (unless they are also in ds). + An xarray Dataset representing the mesh. If not provided, ds is + used as the mesh. xtime_var : str, optional - Name of the variable containing time information, only used if - ``ds`` is not ``None``. + Name of the variable containing time information (e.g., 'xtime'). """ if ds is not None and ds_mesh is None: ds_mesh = ds @@ -61,13 +98,18 @@ def load( Parameters ---------- mesh_filename : str - Path to the MPAS mesh file. + Path to the MPAS mesh file (NetCDF). time_series_filenames : list of str or str, optional - List of filenames or a wildcard string for time series files. + List of NetCDF filenames or a wildcard string for time series + files. If None, only the mesh file is used. variables : list of str, optional - List of variables to convert. + List of variables to convert. Special keys: + - 'allOnCells': all variables with dimension 'nCells' + - 'allOnEdges': all variables with dimension 'nEdges' + - 'allOnVertices': all variables with dimension 'nVertices' + If None, all variables are included. xtime_var : str, optional - Name of the variable containing time information. + Name of the variable containing time information (e.g., 'xtime'). """ self.ds_mesh, self.ds = _load_dataset( mesh_filename=mesh_filename, @@ -78,7 +120,7 @@ def load( def convert_to_xdmf(self, out_dir, extra_dims=None, quiet=False): """ - Convert an xarray Dataset to XDMF + HDF5 format. + Convert the loaded xarray Dataset to XDMF + HDF5 format. Parameters ---------- @@ -86,16 +128,19 @@ def convert_to_xdmf(self, out_dir, extra_dims=None, quiet=False): Directory where XDMF and HDF5 files will be saved. extra_dims : dict, optional Dictionary mapping extra dimensions to their selected indices. - The keys are the names of the extra dimensions (e.g., - 'nVertLevels'), and the values are lists of indices to keep for - each dimension. For example: - { - 'nVertLevels': [0, 1, 2], - 'nParticles': [0, 2, 4, 6] - } - This allows slicing of fields along these dimensions. + Example: {'nVertLevels': [0, 1, 2]} + If None, all indices are included. quiet : bool, optional If True, suppress progress output. + + Output + ------ + - XDMF files (.xdmf) and HDF5 files (.h5) in the specified directory. + + Raises + ------ + ValueError + If required files or variables are missing. """ # Process extra dimensions self.ds = _process_extra_dims(self.ds, extra_dims) @@ -111,7 +156,13 @@ def convert_to_xdmf(self, out_dir, extra_dims=None, quiet=False): def main(): """ Command-line interface for the MpasToXdmf. - """ + + Usage + ----- + $ mpas_to_xdmf -m mesh.nc -t output.*.nc -v temperature salinity -o output_dir -d nVertLevels=0:3 + + See `mpas_to_xdmf --help` for all options. + """ # noqa: E501 import argparse parser = argparse.ArgumentParser( From b339636ed5ca964b2003712f11c005c7718b2e87 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 16:00:10 +0200 Subject: [PATCH 7/8] Update mpas_tools.mesh.conversion formatting, docstrings and docs --- conda_package/docs/mesh_conversion.rst | 153 +++++++------------ conda_package/mpas_tools/mesh/conversion.py | 159 +++++++++++--------- 2 files changed, 148 insertions(+), 164 deletions(-) diff --git a/conda_package/docs/mesh_conversion.rst b/conda_package/docs/mesh_conversion.rst index 95d73a6e6..0e554e690 100644 --- a/conda_package/docs/mesh_conversion.rst +++ b/conda_package/docs/mesh_conversion.rst @@ -9,26 +9,24 @@ Mesh Conversion Mesh Converter ============== -The command-line tool ``MpasMeshConverter.x`` and the corresponding wrapper -function :py:func:`mpas_tools.mesh.conversion.convert` are used to convert a -dataset describing cell locations, vertex locations, and connectivity between -cells and vertices into a valid MPAS mesh following the `MPAS mesh specification +The ``MpasMeshConverter.x`` command-line tool and the Python wrapper +:py:func:`mpas_tools.mesh.conversion.convert` convert a dataset describing +cell and vertex locations and connectivity into a valid MPAS mesh that +follows the `MPAS mesh specification `_. -Example call to the command-line tool: +Example usage (command line): .. code-block:: $ planar_hex --nx 4 --ny 4 --dc 10e3 -o base_mesh.nc $ MpasMeshConverter.x base_mesh.nc mesh.nc -This example uses the ``planar_hex`` tool to generate a small, doubly periodic -MPAS mesh with 10-km cells, then uses the MPAS mesh converter to make sure the -resulting mesh adheres to the mesh specification. ``MpasMeshConverter.x`` takes -two arguments, the input and output mesh files, and will prompt the user for -file names if these arguments are not supplied. +This generates a small, doubly periodic MPAS mesh and converts it to a +spec-compliant format. ``MpasMeshConverter.x`` takes input and output mesh +filenames as arguments; if omitted, it prompts for them. -The same example in a python script can be accomplished with: +Equivalent Python usage: .. code-block:: python @@ -41,9 +39,8 @@ The same example in a python script can be accomplished with: ds = convert(ds) write_netcdf(ds, 'mesh.nc') -Regardless of which of these methods is used, the input mesh must define the -following dimensions, variables and global attributes (not the dimension sizes -are merely examples from the mesh generated in the previous examples): +**Input requirements:** The mesh must define the following dimensions, +variables, and global attributes (example sizes shown): .. code-block:: @@ -67,12 +64,10 @@ are merely examples from the mesh generated in the previous examples): :sphere_radius = 0. ; :is_periodic = "YES" ; -The variable ``meshDensity`` is required for historical reasons and is passed -unchanged to the resulting mesh. It can contain all ones (or zeros), the -resolution of the mesh in kilometers, or whatever field the user wishes. +The ``meshDensity`` variable is required for historical reasons and is passed +unchanged to the output mesh. -The following global attributes are optional but will be passed on to the -resulting mesh: +Optional global attributes (passed through): .. code-block:: @@ -81,37 +76,27 @@ resulting mesh: :y_period = 34641.0161513775 ; :history = "Tue May 26 20:58:10 2020: /home/xylar/miniconda3/envs/mpas/bin/planar_hex --nx 4 --ny 4 --dc 10e3 -o base_mesh.nc" ; -The ``file_id`` global attribute is also optional and is preserved in the -resulting mesh as ``parent_id``. A new ``file_id`` (a random hash tag) is -generated by the mesh converter. +If present, the ``file_id`` attribute is preserved as ``parent_id`` in the +output mesh, and a new ``file_id`` is generated. -The resulting dataset has all the dimensions and variables required for an MPAS -mesh. - -The mesh converter also generates a file called ``graph.info`` that is used to -create graph partitions from tools like `Metis -`_. This file is not stored by -default when the python ``cull`` function is used but can be specified with -the ``graphInfoFileName`` argument. The python function also takes a ``logger`` -that can be used to capture the output that would otherwise go to the screen -via ``stdout`` and ``stderr``. +The converter also generates a ``graph.info`` file for graph partitioning +tools (e.g., Metis). In Python, this file is only written if the +``graphInfoFileName`` argument is provided. .. _cell_culler: Cell Culler =========== -The command-line tool ``MpasCellCuller.x`` and the corresponding wrapper -function :py:func:`mpas_tools.mesh.conversion.cull` are used to cull cells from -a mesh based on the ``cullCell`` field in the input dataset and/or the provided -masks. The contents of the ``cullCell`` field is merge with the mask(s) from a -masking dataset and the inverse of the mask(s) from an inverse-masking dataset. -Then, a preserve-masking dataset is used to determine where cells should *not* -be culled. +The ``MpasCellCuller.x`` command-line tool and the Python wrapper +:py:func:`mpas_tools.mesh.conversion.cull` remove cells from a mesh based on +the ``cullCell`` field and/or provided mask datasets. The culling logic is: + +- The ``cullCell`` field, mask(s) from a masking dataset, and the inverse of + mask(s) from an inverse-masking dataset are merged (union). +- A preserve-masking dataset indicates cells that must *not* be culled. -Example call to the command-line tool, assuming you start with a spherical mesh -called ``base_mesh.nc`` (not the doubly periodic planar mesh from the examples -above): +Example workflow (command line): .. code-block:: @@ -119,14 +104,10 @@ above): $ MpasMaskCreator.x base_mesh.nc land.nc -f land.geojson $ MpasCellCuller.x base_mesh.nc culled_mesh.nc -m land.nc -This example uses the ``merge_features`` tool from the ``geometric_features`` -conda package to get a geojson file containing land coverage. Then, it uses -the mask creator (see the next section) to create a mask on the MPAS mesh that -is one inside this region and zero outside. Finally, it culls the base mesh -to only those cells where the mask is zero (i.e. the mask indicates which cells -are to be removed). +This merges features to create a land mask, generates a mask on the mesh, +and culls cells where the mask is 1. -The same example in a python script can be accomplished with: +Equivalent Python workflow: .. code-block:: python @@ -135,75 +116,55 @@ The same example in a python script can be accomplished with: from mpas_tools.mesh.conversion import mask, cull gf = GeometricFeatures() - - fcLandCoverage = gf.read(componentName='natural_earth', objectType='region', - featureNames=['Land Coverage']) - + fcLandCoverage = gf.read( + componentName='natural_earth', + objectType='region', + featureNames=['Land Coverage'] + ) dsBaseMesh = xarray.open_dataset('base_mesh.nc') dsLandMask = mask(dsBaseMesh, fcMask=fcLandCoverage) dsCulledMesh = cull(dsBaseMesh, dsMask=dsLandMask) write_netcdf(dsCulledMesh, 'culled_mesh.nc') -Here is the full usage of ``MpasCellCuller.x``: +Full usage of ``MpasCellCuller.x``: .. code-block:: MpasCellCuller.x [input_name] [output_name] [[-m/-i/-p] masks_name] [-c] - input_name: - This argument specifies the input MPAS mesh. - output_name: - This argument specifies the output culled MPAS mesh. - If not specified, it defaults to culled_mesh.nc, but - it is required if additional arguments are specified. - -m/-i/-p: - These arguments control how a set of masks is used when - culling a mesh. - The -m argument applies a mask to cull based on (i.e. - where the mask is 1, the mesh will be culled). - The -i argument applies the inverse mask to cull based - on (i.e. where the mask is 0, the mesh will be - culled). - The -p argument forces any marked cells to not be - culled. - If this argument is specified, the masks_name argument - is required - -c: - Output the mapping from old to new mesh (cellMap) in - cellMapForward.txt, - and output the reverse mapping from new to old mesh in - cellMapBackward.txt. + input_name: Input MPAS mesh. + output_name: Output culled MPAS mesh (default: culled_mesh.nc). + -m/-i/-p: Masking options: + -m: Mask file(s) (1 = cull cell). + -i: Inverse mask file(s) (0 = cull cell). + -p: Preserve mask file(s) (1 = do not cull cell). + -c: Output cell mapping files. .. _mask_creator: Mask Creator ============ -The command-line tool ``MpasMaskCreator.x`` and the corresponding wrapper -function :py:func:`mpas_tools.mesh.conversion.mask` are used to create a set of -region masks either from mask features or from seed points to be used to flood -fill a contiguous block of cells. +The ``MpasMaskCreator.x`` command-line tool and the Python wrapper +:py:func:`mpas_tools.mesh.conversion.mask` create region masks from features +or seed points. -Examples usage of the mask creator can be found above under the Cell Culler. +Example usage is shown above under Cell Culler. -Here is the full usage of ``MpasMaskCreator.x``: +Full usage of ``MpasMaskCreator.x``: .. code-block:: MpasMaskCreator.x in_file out_file [ [-f/-s] file.geojson ] [--positive_lon] - in_file: This argument defines the input file that masks will be created for. - out_file: This argument defines the file that masks will be written to. - -s file.geojson: This argument pair defines a set of points (from the geojson point definition) - that will be used as seed points in a flood fill algorithim. This is useful when trying to remove isolated cells from a mesh. - -f file.geojson: This argument pair defines a set of geojson features (regions, transects, or points) - that will be converted into masks / lists. - --positive_lon: It is unlikely that you want this argument. In rare cases when using a non-standard geojson - file where the logitude ranges from 0 to 360 degrees (with the prime meridian at 0 degrees), use this flag. - If this flag is not set, the logitude range is -180-180 with 0 degrees being the prime meridian, which is the - case for standar geojson files including all features from the geometric_feature repo. - The fact that longitudes in the input MPAS mesh range from 0 to 360 is not relevant to this flag, - as latitude and longitude are recomputed internally from Cartesian coordinates. - Whether this flag is passed in or not, any longitudes written are in the 0-360 range. + in_file: Input mesh file. + out_file: Output mask file. + -s file.geojson: Use points as seed locations for flood fill. + -f file.geojson: Use features (regions, transects, or points) for masks. + --positive_lon: Use 0-360 longitude range for non-standard geojson files. + +.. note:: + Temporary files are created and deleted automatically by the Python wrappers. + Command-line tools require the relevant executables to be available in the path. .. _py_mask_creation: diff --git a/conda_package/mpas_tools/mesh/conversion.py b/conda_package/mpas_tools/mesh/conversion.py index 6d25a4149..bb26ffd71 100644 --- a/conda_package/mpas_tools/mesh/conversion.py +++ b/conda_package/mpas_tools/mesh/conversion.py @@ -1,8 +1,9 @@ import os +import shutil +from tempfile import TemporaryDirectory, mkdtemp + import numpy as np import xarray as xr -from tempfile import TemporaryDirectory, mkdtemp -import shutil import mpas_tools.io from mpas_tools.io import write_netcdf @@ -11,40 +12,46 @@ def convert(dsIn, graphInfoFileName=None, logger=None, dir=None): """ - Use ``MpasMeshConverter.x`` to convert an input mesh to a valid MPAS - mesh that is fully compliant with the MPAS mesh specification. - https://mpas-dev.github.io/files/documents/MPAS-MeshSpec.pdf + Convert an input mesh to a valid MPAS mesh using ``MpasMeshConverter.x``. + + This function ensures the mesh is fully compliant with the + `MPAS mesh specification `_. + It writes the input dataset to a temporary file, runs the converter, and + loads the output as an xarray.Dataset. Parameters ---------- dsIn : xarray.Dataset - A data set to convert + Input dataset describing the mesh to convert. graphInfoFileName : str, optional - A file path (relative or absolute) where the graph file (typically - ``graph.info`` should be written out. By default, ``graph.info`` is - not saved. + Path to save the generated ``graph.info`` file. If not provided, + the file is not saved. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for capturing output from the converter. dir : str, optional - A directory in which a temporary directory will be added with files - produced during conversion and then deleted upon completion. + Directory in which to create a temporary working directory. Returns ------- dsOut : xarray.Dataset - The MPAS mesh - """ + The converted MPAS mesh dataset. + + Notes + ----- + - Requires ``MpasMeshConverter.x`` to be available in the system path. + - Temporary files are created and deleted automatically. + """ # noqa: E501 if dir is not None: dir = os.path.abspath(dir) tempdir = mkdtemp(dir=dir) - inFileName = '{}/mesh_in.nc'.format(tempdir) + inFileName = f'{tempdir}/mesh_in.nc' write_netcdf(dsIn, inFileName) - outFileName = '{}/mesh_out.nc'.format(tempdir) + outFileName = f'{tempdir}/mesh_out.nc' if graphInfoFileName is not None: graphInfoFileName = os.path.abspath(graphInfoFileName) @@ -57,64 +64,68 @@ def convert(dsIn, graphInfoFileName=None, logger=None, dir=None): dsOut.load() if graphInfoFileName is not None: - shutil.copyfile('{}/graph.info'.format(outDir), - graphInfoFileName) + shutil.copyfile(f'{outDir}/graph.info', graphInfoFileName) return dsOut -def cull(dsIn, dsMask=None, dsInverse=None, dsPreserve=None, - graphInfoFileName=None, logger=None, dir=None): +def cull( + dsIn, + dsMask=None, + dsInverse=None, + dsPreserve=None, + graphInfoFileName=None, + logger=None, + dir=None, +): """ - Use ``MpasCellCuller.x`` to cull cells from a mesh based on the - ``cullCell`` field in the input file or DataSet and/or the provided masks. - ``cullCell``, dsMask and dsInverse are merged together so that the final - mask is the union of these 3. The preserve mask is then used to determine - where cells should *not* be culled. + Cull cells from a mesh using ``MpasCellCuller.x``. + + The function removes cells based on the ``cullCell`` field in the input + dataset and/or provided mask datasets. Masks are merged as follows: + - ``cullCell``, ``dsMask``, and ``dsInverse`` are combined (union). + - ``dsPreserve`` indicates cells that must not be culled. Parameters ---------- dsIn : xarray.Dataset - A data set to cull, possibly with a ``cullCell`` field set to one where - cells should be removed + Input mesh dataset, optionally with a ``cullCell`` field. dsMask : xarray.Dataset or list, optional - A data set (or data sets) with region masks that are 1 where cells - should be culled + Dataset(s) with region masks (1 where cells should be culled). dsInverse : xarray.Dataset or list, optional - A data set (or data sets) with region masks that are 0 where cells - should be culled + Dataset(s) with region masks (0 where cells should be culled). dsPreserve : xarray.Dataset or list, optional - A data set (or data sets) with region masks that are 1 where cells - should *not* be culled + Dataset(s) with region masks (1 where cells should NOT be culled). graphInfoFileName : str, optional - A file path (relative or absolute) where the graph file (typically - ``culled_graph.info`` should be written out. By default, - ``culled_graph.info`` is not saved. + Path to save the generated ``culled_graph.info`` file. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for capturing output from the culler. dir : str, optional - A directory in which a temporary directory will be added with files - produced during cell culling and then deleted upon completion. + Directory in which to create a temporary working directory. Returns ------- dsOut : xarray.Dataset - The culled mesh + The culled mesh dataset. + Notes + ----- + - Requires ``MpasCellCuller.x`` to be available in the system path. + - Temporary files are created and deleted automatically. """ if dir is not None: dir = os.path.abspath(dir) tempdir = mkdtemp(dir=dir) - inFileName = '{}/ds_in.nc'.format(tempdir) + inFileName = f'{tempdir}/ds_in.nc' dsIn = _masks_to_int(dsIn) write_netcdf(dsIn, inFileName) - outFileName = '{}/ds_out.nc'.format(tempdir) + outFileName = f'{tempdir}/ds_out.nc' args = ['MpasCellCuller.x', inFileName, outFileName] @@ -123,7 +134,7 @@ def cull(dsIn, dsMask=None, dsInverse=None, dsPreserve=None, dsMask = [dsMask] for index, ds in enumerate(dsMask): ds = _masks_to_int(ds) - fileName = '{}/mask{}.nc'.format(tempdir, index) + fileName = f'{tempdir}/mask{index}.nc' write_netcdf(ds, fileName) args.extend(['-m', fileName]) @@ -132,7 +143,7 @@ def cull(dsIn, dsMask=None, dsInverse=None, dsPreserve=None, dsInverse = [dsInverse] for index, ds in enumerate(dsInverse): ds = _masks_to_int(ds) - fileName = '{}/inverse{}.nc'.format(tempdir, index) + fileName = f'{tempdir}/inverse{index}.nc' write_netcdf(ds, fileName) args.extend(['-i', fileName]) @@ -141,7 +152,7 @@ def cull(dsIn, dsMask=None, dsInverse=None, dsPreserve=None, dsPreserve = [dsPreserve] for index, ds in enumerate(dsPreserve): ds = _masks_to_int(ds) - fileName = '{}/preserve{}.nc'.format(tempdir, index) + fileName = f'{tempdir}/preserve{index}.nc' write_netcdf(ds, fileName) args.extend(['-p', fileName]) @@ -156,40 +167,41 @@ def cull(dsIn, dsMask=None, dsInverse=None, dsPreserve=None, dsOut.load() if graphInfoFileName is not None: - shutil.copyfile('{}/culled_graph.info'.format(outDir), - graphInfoFileName) + shutil.copyfile(f'{outDir}/culled_graph.info', graphInfoFileName) return dsOut def mask(dsMesh, fcMask=None, logger=None, dir=None, cores=1): """ - Use ``compute_mpas_region_masks`` to create a set of region masks either - from mask feature collections + Create region masks on an MPAS mesh using ``compute_mpas_region_masks``. Parameters ---------- - dsMesh : xarray.Dataset, optional - An MPAS mesh on which the masks should be created + dsMesh : xarray.Dataset + MPAS mesh dataset. fcMask : geometric_features.FeatureCollection, optional - A feature collection containing features to use to create the mask + Feature collection with regions to mask. logger : logging.Logger, optional - A logger for the output if not stdout + Logger for capturing output. dir : str, optional - A directory in which a temporary directory will be added with files - produced during mask creation and then deleted upon completion. + Directory in which to create a temporary working directory. cores : int, optional - The number of cores to use for python multiprocessing + Number of processes for Python multiprocessing. Returns ------- dsMask : xarray.Dataset - The masks + Dataset containing the computed masks. + Notes + ----- + - Requires ``compute_mpas_region_masks`` command-line tool. + - Temporary files are created and deleted automatically. """ if dir is not None: dir = os.path.abspath(dir) @@ -200,14 +212,21 @@ def mask(dsMesh, fcMask=None, logger=None, dir=None, cores=1): geojsonFileName = f'{tempdir}/mask.geojson' fcMask.to_geojson(geojsonFileName) - args = ['compute_mpas_region_masks', - '-m', inFileName, - '-o', outFileName, - '-g', geojsonFileName, - '-t', 'cell', - '--process_count', f'{cores}', - '--format', mpas_tools.io.default_format, - ] + args = [ + 'compute_mpas_region_masks', + '-m', + inFileName, + '-o', + outFileName, + '-g', + geojsonFileName, + '-t', + 'cell', + '--process_count', + f'{cores}', + '--format', + mpas_tools.io.default_format, + ] if mpas_tools.io.default_engine is not None: args.extend(['--engine', mpas_tools.io.default_engine]) @@ -220,9 +239,13 @@ def mask(dsMesh, fcMask=None, logger=None, dir=None, cores=1): def _masks_to_int(dsIn): - """ Convert masks to int type required by the cell culler """ - var_list = ['regionCellMasks', 'transectCellMasks', 'cullCell', - 'cellSeedMask'] + """Convert mask variables to int32 type as required by the cell culler.""" + var_list = [ + 'regionCellMasks', + 'transectCellMasks', + 'cullCell', + 'cellSeedMask', + ] dsOut = xr.Dataset(dsIn, attrs=dsIn.attrs) for var in var_list: if var in dsIn: From 79525c6dd715dccb85c676c6eb735deff93513f5 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Wed, 9 Jul 2025 16:06:32 +0200 Subject: [PATCH 8/8] Update mpas_tools.mesh.interpolation formatting, docstrings and docs --- conda_package/docs/interpolation.rst | 18 ++++- .../mpas_tools/mesh/interpolation.py | 80 ++++++++++++++----- 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/conda_package/docs/interpolation.rst b/conda_package/docs/interpolation.rst index c6fa90529..87b0b6cf8 100644 --- a/conda_package/docs/interpolation.rst +++ b/conda_package/docs/interpolation.rst @@ -9,12 +9,16 @@ Interpolation Previously, various tools in this package used ``scipy`` for interpolation. However, the interpolation routines in ``scipy`` are not well suited to -interpolation from regular grids to MPAS meshes---they are slow and very memory +interpolation from regular grids to MPAS meshes—they are slow and very memory intensive, particularly for large meshes. -For bilinear interpolation from a tensor lon/lat grid to an MPAS mesh, it will -be faster to use the function -:py:func:`mpas_tools.mesh.interpolation.interp_bilin()` +For bilinear interpolation from a tensor (regular) lon/lat grid to an MPAS +mesh, it is much faster and more memory-efficient to use the function +:py:func:`mpas_tools.mesh.interpolation.interp_bilin()`. +This function is specifically designed for interpolating from a regular grid +(e.g., longitude/latitude) to the unstructured cell centers of an MPAS mesh, +and should be preferred over generic `scipy` routines for this use case. + Here is an example where we define cell width for an EC mesh (see :ref:`ec_mesh`), read in longitude and latitude from an MPAS mesh, and interpolate the cell widths to cell centers on the MPAS mesh. @@ -46,3 +50,9 @@ interpolate the cell widths to cell centers on the MPAS mesh. latCell = np.rad2deg(latCell) cellWidthOnMpas = interp_bilin(lon, lat, cellWidth, lonCell, latCell) + +.. note:: + - All cell center coordinates must be within the bounds of the input grid. + - No extrapolation is performed. + - For geographic data, use degrees for longitude/latitude to avoid + round-off issues. diff --git a/conda_package/mpas_tools/mesh/interpolation.py b/conda_package/mpas_tools/mesh/interpolation.py index 4f76ebe27..689bb6961 100644 --- a/conda_package/mpas_tools/mesh/interpolation.py +++ b/conda_package/mpas_tools/mesh/interpolation.py @@ -1,37 +1,76 @@ +""" +Efficient interpolation from regular (tensor) grids to MPAS mesh cell centers. + +This module provides fast, memory-efficient routines for bilinear interpolation +from a regular grid (e.g., longitude/latitude) to the unstructured cell centers +of an MPAS mesh. It is intended as a replacement for `scipy` interpolation +routines, which are slow and memory-intensive for large meshes. + +Main function: +- interp_bilin: Bilinear interpolation from a tensor grid to MPAS mesh cell + centers. + +Note: + - Input coordinates (x, y) and cell center coordinates (xCell, yCell) + should be in the same units (typically degrees for lon/lat). + - No extrapolation is performed: all xCell/yCell must be within the bounds + of x/y. +""" + import numpy as np def interp_bilin(x, y, field, xCell, yCell): """ - Perform bilinear interpolation of ``field`` on a tensor grid to cell centers - on an MPAS mesh. ``xCell`` and ``yCell`` must be bounded by ``x`` and ``y``, - respectively. + Perform bilinear interpolation of ``field`` on a regular (tensor) grid to + cell centers on an MPAS mesh. - If x and y coordinates are longitude and latitude, respectively, it is - recommended that they be passed in degrees to avoid round-off problems at - the north and south poles and at the date line. + This function is designed to be much faster and more memory-efficient than + using `scipy.interpolate` routines for large MPAS meshes. Parameters ---------- x : ndarray - x coordinate of the input field (length n) + 1D array of x coordinates of the input grid (length n). + For geographic data, this is typically longitude in degrees. y : ndarray - y coordinate fo the input field (length m) + 1D array of y coordinates of the input grid (length m). + For geographic data, this is typically latitude in degrees. field : ndarray - a field of size m x n + 2D array of field values of shape (m, n), defined on the (y, x) grid. xCell : ndarray - x coordinate of MPAS cell centers + 1D array of x coordinates of MPAS cell centers (same units as x). yCell : ndarray - y coordinate of MPAS cell centers + 1D array of y coordinates of MPAS cell centers (same units as y). Returns ------- mpasField : ndarray - ``field`` interpoyed to MPAS cell centers + 1D array of interpolated field values at MPAS cell centers. + + Notes + ----- + - All xCell and yCell values must be within the bounds of x and y. + - No extrapolation is performed. + - For longitude/latitude grids, it is recommended to use degrees to avoid + round-off issues at the poles or dateline. + - This function is intended for use cases where the input grid is regular + (tensor product), not scattered points. + + Examples + -------- + >>> import numpy as np + >>> from mpas_tools.mesh.interpolation import interp_bilin + >>> x = np.linspace(-180, 180, 361) + >>> y = np.linspace(-90, 90, 181) + >>> field = np.random.rand(181, 361) + >>> xCell = np.array([0.0, 45.0]) + >>> yCell = np.array([0.0, 45.0]) + >>> values = interp_bilin(x, y, field, xCell, yCell) """ assert np.all(xCell >= x[0]) @@ -55,16 +94,17 @@ def interp_bilin(x, y, field, xCell, yCell): # accordingly mask = xIndices == len(x) - 1 xIndices[mask] -= 1 - xFrac[mask] += 1. + xFrac[mask] += 1.0 mask = yIndices == len(y) - 1 yIndices[mask] -= 1 - yFrac[mask] += 1. - - mpasField = \ - (1. - xFrac) * (1. - yFrac) * field[yIndices, xIndices] + \ - xFrac * (1. - yFrac) * field[yIndices, xIndices + 1] + \ - (1. - xFrac) * yFrac * field[yIndices + 1, xIndices] + \ - xFrac * yFrac * field[yIndices + 1, xIndices + 1] + yFrac[mask] += 1.0 + + mpasField = ( + (1.0 - xFrac) * (1.0 - yFrac) * field[yIndices, xIndices] + + xFrac * (1.0 - yFrac) * field[yIndices, xIndices + 1] + + (1.0 - xFrac) * yFrac * field[yIndices + 1, xIndices] + + xFrac * yFrac * field[yIndices + 1, xIndices + 1] + ) return mpasField