From eee47c265efed10a132f12fc332a460dd7cb8efb Mon Sep 17 00:00:00 2001
From: Ben Evans <ben.richard.evans@gmail.com>
Date: Tue, 4 Nov 2025 16:02:14 +0000
Subject: [PATCH 1/3] Initial ORAS/ORCS grid compatibility implementation

---
 preprocess_toolbox/dataset/orca_grid.py | 169 ++++++++++++++++++++++++
 preprocess_toolbox/dataset/process.py   |   1 +
 2 files changed, 170 insertions(+)
 create mode 100644 preprocess_toolbox/dataset/orca_grid.py

diff --git a/preprocess_toolbox/dataset/orca_grid.py b/preprocess_toolbox/dataset/orca_grid.py
new file mode 100644
index 0000000..07ba4eb
--- /dev/null
+++ b/preprocess_toolbox/dataset/orca_grid.py
@@ -0,0 +1,169 @@
+"""
+Support for NEMO ORCA grid regridding to regular lat/lon grids.
+
+ORCA grids (like ORCA025 used in ORAS5) are tri-polar grids with 2D latitude/longitude
+coordinates that don't map to standard EPSG codes.
+"""
+import logging
+import numpy as np
+import xarray as xr
+from scipy.interpolate import griddata
+
+
+def regrid_orca_to_latlon(
+    orca_data: xr.DataArray,
+    target_lats: np.ndarray,
+    target_lons: np.ndarray,
+    method: str = 'linear'
+) -> xr.DataArray:
+    """
+    Regrid ORCA grid data to a regular lat/lon grid.
+    
+    Args:
+        orca_data: xarray DataArray with ORCA grid data
+        target_lats: 1D array of target latitudes
+        target_lons: 1D array of target longitudes
+        method: Interpolation method ('linear', 'nearest', 'cubic')
+        
+    Returns:
+        Regridded data on regular lat/lon grid
+    """
+    # Get 2D lat/lon from ORCA data
+    if 'nav_lat' in orca_data.coords and 'nav_lon' in orca_data.coords:
+        source_lats = orca_data.coords['nav_lat'].values
+        source_lons = orca_data.coords['nav_lon'].values
+    elif 'latitude' in orca_data.coords and 'longitude' in orca_data.coords:
+        source_lats = orca_data.coords['latitude'].values
+        source_lons = orca_data.coords['longitude'].values
+    else:
+        raise ValueError("Cannot find latitude/longitude coordinates in ORCA data")
+    
+    # Flatten source coordinates and data
+    source_points = np.column_stack([
+        source_lons.ravel(),
+        source_lats.ravel()
+    ])
+    source_values = orca_data.values.ravel()
+    
+    # Create target grid
+    target_lon_grid, target_lat_grid = np.meshgrid(target_lons, target_lats)
+    target_points = np.column_stack([
+        target_lon_grid.ravel(),
+        target_lat_grid.ravel()
+    ])
+    
+    # Remove NaN values from source
+    valid_mask = ~np.isnan(source_values)
+    source_points_valid = source_points[valid_mask]
+    source_values_valid = source_values[valid_mask]
+    
+    # Interpolate
+    logging.info(f"Regridding ORCA data using {method} interpolation")
+    regridded_values = griddata(
+        source_points_valid,
+        source_values_valid,
+        target_points,
+        method=method,
+        fill_value=np.nan
+    )
+    
+    # Reshape to target grid
+    regridded_grid = regridded_values.reshape(len(target_lats), len(target_lons))
+    
+    # Create output DataArray
+    regridded_data = xr.DataArray(
+        regridded_grid,
+        coords={
+            'latitude': target_lats,
+            'longitude': target_lons
+        },
+        dims=['latitude', 'longitude']
+    )
+    
+    return regridded_data
+
+
+def orca_coord_processing(ref_cube, orca_cube):
+    """
+    Coordinate processing function for ORCA grids to use with regrid_dataset.
+    
+    This function prepares ORCA data for regridding by converting it to xarray,
+    regridding to the reference grid, and converting back to iris cube.
+    
+    Args:
+        ref_cube: Reference iris cube with target grid
+        orca_cube: ORCA grid iris cube to regrid
+        
+    Returns:
+        Regridded iris cube on the same grid as ref_cube
+    """
+    import iris
+    from iris.coords import DimCoord
+    
+    logging.info("Processing ORCA grid coordinates")
+    
+    # Get target lat/lon from reference cube
+    target_lats = ref_cube.coord('latitude').points
+    target_lons = ref_cube.coord('longitude').points
+    
+    # Convert ORCA cube to xarray for easier handling
+    # Handle potential time dimension
+    if orca_cube.ndim == 3:  # time, y, x
+        regridded_slices = []
+        for time_idx in range(orca_cube.shape[0]):
+            orca_slice = orca_cube[time_idx]
+            orca_data = xr.DataArray(
+                orca_slice.data,
+                coords={
+                    'nav_lat': (['y', 'x'], orca_slice.coord('latitude').points),
+                    'nav_lon': (['y', 'x'], orca_slice.coord('longitude').points)
+                },
+                dims=['y', 'x']
+            )
+            regridded_slice = regrid_orca_to_latlon(orca_data, target_lats, target_lons)
+            regridded_slices.append(regridded_slice.values)
+        
+        regridded_data = np.stack(regridded_slices)
+        
+        # Create new iris cube with regridded data
+        lat_coord = DimCoord(target_lats, standard_name='latitude', units='degrees')
+        lon_coord = DimCoord(target_lons, standard_name='longitude', units='degrees')
+        time_coord = orca_cube.coord('time')
+        
+        regridded_cube = iris.cube.Cube(
+            regridded_data,
+            dim_coords_and_dims=[
+                (time_coord, 0),
+                (lat_coord, 1),
+                (lon_coord, 2)
+            ]
+        )
+    else:  # 2D: y, x
+        orca_data = xr.DataArray(
+            orca_cube.data,
+            coords={
+                'nav_lat': (['y', 'x'], orca_cube.coord('latitude').points),
+                'nav_lon': (['y', 'x'], orca_cube.coord('longitude').points)
+            },
+            dims=['y', 'x']
+        )
+        regridded_data = regrid_orca_to_latlon(orca_data, target_lats, target_lons)
+        
+        lat_coord = DimCoord(target_lats, standard_name='latitude', units='degrees')
+        lon_coord = DimCoord(target_lons, standard_name='longitude', units='degrees')
+        
+        regridded_cube = iris.cube.Cube(
+            regridded_data.values,
+            dim_coords_and_dims=[
+                (lat_coord, 0),
+                (lon_coord, 1)
+            ]
+        )
+    
+    # Copy metadata from original cube
+    regridded_cube.standard_name = orca_cube.standard_name
+    regridded_cube.long_name = orca_cube.long_name
+    regridded_cube.var_name = orca_cube.var_name
+    regridded_cube.units = orca_cube.units
+    
+    return regridded_cube
diff --git a/preprocess_toolbox/dataset/process.py b/preprocess_toolbox/dataset/process.py
index 2cda6ad..cba2c55 100644
--- a/preprocess_toolbox/dataset/process.py
+++ b/preprocess_toolbox/dataset/process.py
@@ -19,6 +19,7 @@
 from preprocess_toolbox.dataset.spatial import (gridcell_angles_from_dim_coords,
                                                 invert_gridcell_angles,
                                                 rotate_grid_vectors)
+from preprocess_toolbox.dataset.orca_grid import orca_coord_processing
 
 
 def regrid_dataset(ref_file: os.PathLike,

From c63b40badda1eb8802f5394291d645dabaac326b Mon Sep 17 00:00:00 2001
From: Ben Evans <ben.richard.evans@gmail.com>
Date: Wed, 5 Nov 2025 07:56:29 +0000
Subject: [PATCH 2/3] Initial regrid computation - uncached

---
 preprocess_toolbox/dataset/orca_grid.py | 87 ++++++++++++++++---------
 1 file changed, 58 insertions(+), 29 deletions(-)

diff --git a/preprocess_toolbox/dataset/orca_grid.py b/preprocess_toolbox/dataset/orca_grid.py
index 07ba4eb..b863b7a 100644
--- a/preprocess_toolbox/dataset/orca_grid.py
+++ b/preprocess_toolbox/dataset/orca_grid.py
@@ -7,23 +7,26 @@
 import logging
 import numpy as np
 import xarray as xr
-from scipy.interpolate import griddata
+from scipy.spatial import cKDTree
+
+# Cache for grid transformations to avoid recomputing
+_GRID_TRANSFORM_CACHE = {}
 
 
 def regrid_orca_to_latlon(
     orca_data: xr.DataArray,
     target_lats: np.ndarray,
     target_lons: np.ndarray,
-    method: str = 'linear'
+    method: str = 'nearest'
 ) -> xr.DataArray:
     """
-    Regrid ORCA grid data to a regular lat/lon grid.
+    Regrid ORCA grid data to a regular lat/lon grid using memory-efficient chunked processing.
     
     Args:
         orca_data: xarray DataArray with ORCA grid data
         target_lats: 1D array of target latitudes
         target_lons: 1D array of target longitudes
-        method: Interpolation method ('linear', 'nearest', 'cubic')
+        method: Interpolation method ('nearest' recommended for large grids, 'linear' requires huge memory)
         
     Returns:
         Regridded data on regular lat/lon grid
@@ -39,36 +42,58 @@ def regrid_orca_to_latlon(
         raise ValueError("Cannot find latitude/longitude coordinates in ORCA data")
     
     # Flatten source coordinates and data
-    source_points = np.column_stack([
-        source_lons.ravel(),
-        source_lats.ravel()
-    ])
+    source_lons_flat = source_lons.ravel()
+    source_lats_flat = source_lats.ravel()
     source_values = orca_data.values.ravel()
     
-    # Create target grid
-    target_lon_grid, target_lat_grid = np.meshgrid(target_lons, target_lats)
-    target_points = np.column_stack([
-        target_lon_grid.ravel(),
-        target_lat_grid.ravel()
-    ])
-    
-    # Remove NaN values from source
+    # Remove NaN values from source to reduce memory
     valid_mask = ~np.isnan(source_values)
-    source_points_valid = source_points[valid_mask]
+    source_lons_valid = source_lons_flat[valid_mask]
+    source_lats_valid = source_lats_flat[valid_mask]
     source_values_valid = source_values[valid_mask]
     
-    # Interpolate
-    logging.info(f"Regridding ORCA data using {method} interpolation")
-    regridded_values = griddata(
-        source_points_valid,
-        source_values_valid,
-        target_points,
-        method=method,
-        fill_value=np.nan
-    )
+    logging.info(f"Regridding ORCA data: {len(source_values_valid)} valid source points to {len(target_lats)}x{len(target_lons)} target grid")
+    
+    # Process in chunks to reduce memory usage
+    chunk_size = 50  # Process 50 latitudes at a time
+    n_chunks = int(np.ceil(len(target_lats) / chunk_size))
+    
+    regridded_grid = np.full((len(target_lats), len(target_lons)), np.nan, dtype=np.float32)
+    
+    # Pre-build source points array once (memory efficient)
+    source_points = np.column_stack([source_lons_valid, source_lats_valid])
+    
+    for i in range(n_chunks):
+        start_idx = i * chunk_size
+        end_idx = min((i + 1) * chunk_size, len(target_lats))
+        chunk_n_lats = end_idx - start_idx
+        
+        if i % 10 == 0:
+            logging.info(f"Processing chunk {i+1}/{n_chunks} (rows {start_idx}-{end_idx})")
+        
+        # Create meshgrid only for this chunk of latitudes
+        chunk_lats = target_lats[start_idx:end_idx]
+        chunk_lon_grid, chunk_lat_grid = np.meshgrid(target_lons, chunk_lats)
+        
+        # Flatten to get target points for this chunk
+        chunk_target_points = np.column_stack([
+            chunk_lon_grid.ravel(),
+            chunk_lat_grid.ravel()
+        ])
+        
+        # Interpolate for this chunk
+        chunk_values = griddata(
+            source_points,
+            source_values_valid,
+            chunk_target_points,
+            method=method,
+            fill_value=np.nan
+        )
+        
+        # Reshape and assign
+        regridded_grid[start_idx:end_idx, :] = chunk_values.reshape(chunk_n_lats, len(target_lons))
     
-    # Reshape to target grid
-    regridded_grid = regridded_values.reshape(len(target_lats), len(target_lons))
+    logging.info(f"ORCA regridding complete")
     
     # Create output DataArray
     regridded_data = xr.DataArray(
@@ -110,7 +135,11 @@ def orca_coord_processing(ref_cube, orca_cube):
     # Handle potential time dimension
     if orca_cube.ndim == 3:  # time, y, x
         regridded_slices = []
-        for time_idx in range(orca_cube.shape[0]):
+        n_times = orca_cube.shape[0]
+        logging.info(f"Processing {n_times} time steps")
+        
+        for time_idx in range(n_times):
+            logging.info(f"Processing time step {time_idx + 1}/{n_times}")
             orca_slice = orca_cube[time_idx]
             orca_data = xr.DataArray(
                 orca_slice.data,

From b6c6d5be7f4af0d810e950c88b9d0c2c165af986 Mon Sep 17 00:00:00 2001
From: Ben Evans <ben.richard.evans@gmail.com>
Date: Mon, 22 Dec 2025 06:46:40 +0000
Subject: [PATCH 3/3] Update orca_grid with caching and time coordinate
 correction

---
 preprocess_toolbox/dataset/orca_grid.py | 278 +++++++++++++++++++-----
 1 file changed, 219 insertions(+), 59 deletions(-)

diff --git a/preprocess_toolbox/dataset/orca_grid.py b/preprocess_toolbox/dataset/orca_grid.py
index b863b7a..27a8485 100644
--- a/preprocess_toolbox/dataset/orca_grid.py
+++ b/preprocess_toolbox/dataset/orca_grid.py
@@ -8,25 +8,158 @@
 import numpy as np
 import xarray as xr
 from scipy.spatial import cKDTree
+import pickle
+from pathlib import Path
+import gc
 
 # Cache for grid transformations to avoid recomputing
 _GRID_TRANSFORM_CACHE = {}
+_CACHE_DIR = Path.home() / '.cache' / 'preprocess_toolbox' / 'orca_grids'
+
+
+def _get_or_build_transform(source_lats, source_lons, target_lats, target_lons, cache_key):
+    """
+    Get cached transform or build new one using KDTree for nearest neighbor mapping.
+    
+    Checks in-memory cache first, then disk cache, then builds new transform.
+    Saves to both in-memory and disk cache for future use.
+    
+    Returns:
+        tuple: (indices of nearest source points, valid source mask, target shape)
+    """
+    # Check in-memory cache first
+    if cache_key in _GRID_TRANSFORM_CACHE:
+        logging.info(f"Using in-memory cached grid transform")
+        return _GRID_TRANSFORM_CACHE[cache_key]
+    
+    # Check disk cache
+    _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    cache_file = _CACHE_DIR / f"{cache_key}.pkl"
+    
+    if cache_file.exists():
+        logging.info(f"Loading grid transform from disk cache: {cache_file}")
+        try:
+            with open(cache_file, 'rb') as f:
+                transform = pickle.load(f)
+            _GRID_TRANSFORM_CACHE[cache_key] = transform
+            logging.info(f"Successfully loaded cached transform")
+            return transform
+        except Exception as e:
+            logging.warning(f"Failed to load cached transform: {e}, will rebuild")
+    
+    logging.info(f"Building new grid transform (will be cached for reuse)")
+    
+    # Flatten source coordinates
+    source_lons_flat = source_lons.ravel()
+    source_lats_flat = source_lats.ravel()
+    
+    # Create source points array (only valid, non-NaN points)
+    valid_mask = ~(np.isnan(source_lons_flat) | np.isnan(source_lats_flat))
+    valid_indices = np.where(valid_mask)[0]
+    
+    source_points = np.column_stack([
+        source_lons_flat[valid_mask],
+        source_lats_flat[valid_mask]
+    ])
+    
+    logging.info(f"Source grid: {len(source_points)} valid points")
+    logging.info(f"Target grid: {len(target_lats)} x {len(target_lons)} = {len(target_lats) * len(target_lons)} points")
+    
+    # Build KDTree for fast nearest neighbor lookup
+    logging.info(f"Building KDTree with {len(source_points)} valid source points...")
+    tree = cKDTree(source_points)
+    logging.info(f"KDTree construction complete")
+    
+    # Query in chunks to avoid creating full meshgrid in memory
+    chunk_size = 50  # Process 50 rows of target grid at a time
+    n_lat_chunks = (len(target_lats) - 1) // chunk_size + 1
+    n_target_total = len(target_lats) * len(target_lons)
+    source_indices = np.zeros(n_target_total, dtype=np.int32)
+    
+    logging.info(f"Querying KDTree in {n_lat_chunks} latitude chunks of {chunk_size} rows")
+    
+    idx = 0
+    for lat_chunk_idx in range(0, len(target_lats), chunk_size):
+        lat_end = min(lat_chunk_idx + chunk_size, len(target_lats))
+        chunk_lats = target_lats[lat_chunk_idx:lat_end]
+        chunk_num = lat_chunk_idx // chunk_size + 1
+        
+        # Create mini-meshgrid just for this chunk
+        chunk_lon_grid, chunk_lat_grid = np.meshgrid(target_lons, chunk_lats)
+        
+        # Debug: check shapes
+        logging.debug(f"chunk_lats shape: {chunk_lats.shape}, target_lons shape: {target_lons.shape}")
+        logging.debug(f"chunk_lon_grid shape: {chunk_lon_grid.shape}, chunk_lat_grid shape: {chunk_lat_grid.shape}")
+        
+        chunk_points = np.column_stack([
+            chunk_lon_grid.ravel(),
+            chunk_lat_grid.ravel()
+        ])
+        
+        logging.debug(f"chunk_points shape: {chunk_points.shape}")
+        logging.info(f"Processing latitude chunk {chunk_num}/{n_lat_chunks} ({len(chunk_points)} points)")
+        
+        # Query this chunk
+        _, nearest_chunk = tree.query(chunk_points, k=1, workers=1)
+        
+        # Ensure nearest_chunk is 1D
+        if nearest_chunk.ndim > 1:
+            nearest_chunk = nearest_chunk.ravel()
+        
+        # Sanity check
+        if len(nearest_chunk) != len(chunk_points):
+            logging.error(f"Size mismatch: nearest_chunk={len(nearest_chunk)}, chunk_points={len(chunk_points)}")
+            raise ValueError(f"KDTree query returned wrong size: {len(nearest_chunk)} != {len(chunk_points)}")
+        
+        # Store indices
+        chunk_size_actual = len(chunk_points)
+        source_indices[idx:idx+chunk_size_actual] = valid_indices[nearest_chunk]
+        idx += chunk_size_actual
+        
+        # Free chunk memory
+        del chunk_lon_grid, chunk_lat_grid, chunk_points, nearest_chunk
+        
+        if chunk_num % 5 == 0:
+            logging.info(f"Progress: {chunk_num}/{n_lat_chunks} chunks complete ({100*chunk_num/n_lat_chunks:.1f}%)")
+    
+    logging.info(f"All {n_lat_chunks} chunks processed successfully")
+    
+    # Clean up to free memory before caching
+    del tree, source_points
+    gc.collect()
+    
+    # Create transform tuple
+    transform = (source_indices, valid_mask, (len(target_lats), len(target_lons)))
+    
+    # Cache in memory
+    _GRID_TRANSFORM_CACHE[cache_key] = transform
+    
+    # Save to disk
+    try:
+        logging.info(f"Saving grid transform to disk cache: {cache_file}")
+        with open(cache_file, 'wb') as f:
+            pickle.dump(transform, f, protocol=pickle.HIGHEST_PROTOCOL)
+        logging.info(f"Grid transform cached successfully")
+    except Exception as e:
+        logging.warning(f"Failed to save transform to disk: {e}")
+    
+    return transform
 
 
 def regrid_orca_to_latlon(
     orca_data: xr.DataArray,
     target_lats: np.ndarray,
     target_lons: np.ndarray,
-    method: str = 'nearest'
+    cache_key: str = None
 ) -> xr.DataArray:
     """
-    Regrid ORCA grid data to a regular lat/lon grid using memory-efficient chunked processing.
+    Regrid ORCA grid data to a regular lat/lon grid using cached nearest-neighbor mapping.
     
     Args:
         orca_data: xarray DataArray with ORCA grid data
         target_lats: 1D array of target latitudes
         target_lons: 1D array of target longitudes
-        method: Interpolation method ('nearest' recommended for large grids, 'linear' requires huge memory)
+        cache_key: Unique key for caching the transform (e.g., "oras5_south")
         
     Returns:
         Regridded data on regular lat/lon grid
@@ -41,59 +174,24 @@ def regrid_orca_to_latlon(
     else:
         raise ValueError("Cannot find latitude/longitude coordinates in ORCA data")
     
-    # Flatten source coordinates and data
-    source_lons_flat = source_lons.ravel()
-    source_lats_flat = source_lats.ravel()
-    source_values = orca_data.values.ravel()
+    # Get or build the transform
+    if cache_key is None:
+        cache_key = f"orca_{source_lats.shape}_{target_lats.shape}"
     
-    # Remove NaN values from source to reduce memory
-    valid_mask = ~np.isnan(source_values)
-    source_lons_valid = source_lons_flat[valid_mask]
-    source_lats_valid = source_lats_flat[valid_mask]
-    source_values_valid = source_values[valid_mask]
-    
-    logging.info(f"Regridding ORCA data: {len(source_values_valid)} valid source points to {len(target_lats)}x{len(target_lons)} target grid")
+    source_indices, valid_mask, target_shape = _get_or_build_transform(
+        source_lats, source_lons, target_lats, target_lons, cache_key
+    )
     
-    # Process in chunks to reduce memory usage
-    chunk_size = 50  # Process 50 latitudes at a time
-    n_chunks = int(np.ceil(len(target_lats) / chunk_size))
+    # Apply transform to data
+    source_values = orca_data.values.ravel()
+    regridded_values = np.full(len(source_indices), np.nan, dtype=np.float32)
     
-    regridded_grid = np.full((len(target_lats), len(target_lons)), np.nan, dtype=np.float32)
+    # Only copy values where source data is valid
+    valid_data_mask = ~np.isnan(source_values[source_indices])
+    regridded_values[valid_data_mask] = source_values[source_indices[valid_data_mask]]
     
-    # Pre-build source points array once (memory efficient)
-    source_points = np.column_stack([source_lons_valid, source_lats_valid])
-    
-    for i in range(n_chunks):
-        start_idx = i * chunk_size
-        end_idx = min((i + 1) * chunk_size, len(target_lats))
-        chunk_n_lats = end_idx - start_idx
-        
-        if i % 10 == 0:
-            logging.info(f"Processing chunk {i+1}/{n_chunks} (rows {start_idx}-{end_idx})")
-        
-        # Create meshgrid only for this chunk of latitudes
-        chunk_lats = target_lats[start_idx:end_idx]
-        chunk_lon_grid, chunk_lat_grid = np.meshgrid(target_lons, chunk_lats)
-        
-        # Flatten to get target points for this chunk
-        chunk_target_points = np.column_stack([
-            chunk_lon_grid.ravel(),
-            chunk_lat_grid.ravel()
-        ])
-        
-        # Interpolate for this chunk
-        chunk_values = griddata(
-            source_points,
-            source_values_valid,
-            chunk_target_points,
-            method=method,
-            fill_value=np.nan
-        )
-        
-        # Reshape and assign
-        regridded_grid[start_idx:end_idx, :] = chunk_values.reshape(chunk_n_lats, len(target_lons))
-    
-    logging.info(f"ORCA regridding complete")
+    # Reshape to target grid
+    regridded_grid = regridded_values.reshape(target_shape)
     
     # Create output DataArray
     regridded_data = xr.DataArray(
@@ -112,8 +210,10 @@ def orca_coord_processing(ref_cube, orca_cube):
     """
     Coordinate processing function for ORCA grids to use with regrid_dataset.
     
-    This function prepares ORCA data for regridding by converting it to xarray,
-    regridding to the reference grid, and converting back to iris cube.
+    This function performs the full regridding of ORCA data and returns a cube
+    that's already on the target grid with matching coordinates. The subsequent
+    cube.regrid() call in process.py will recognize the cube is already on the
+    target grid and pass it through unchanged.
     
     Args:
         ref_cube: Reference iris cube with target grid
@@ -125,12 +225,27 @@ def orca_coord_processing(ref_cube, orca_cube):
     import iris
     from iris.coords import DimCoord
     
-    logging.info("Processing ORCA grid coordinates")
+    logging.info("Processing ORCA grid - performing custom nearest-neighbor regridding")
     
     # Get target lat/lon from reference cube
     target_lats = ref_cube.coord('latitude').points
     target_lons = ref_cube.coord('longitude').points
     
+    # Ensure we have 1D coordinate arrays (not 2D meshgrids)
+    if target_lats.ndim > 1:
+        logging.warning(f"Target latitudes are {target_lats.ndim}D, extracting unique values")
+        target_lats = np.unique(target_lats.ravel())
+        target_lats = np.sort(target_lats)
+    if target_lons.ndim > 1:
+        logging.warning(f"Target longitudes are {target_lons.ndim}D, extracting unique values")
+        target_lons = np.unique(target_lons.ravel())
+        target_lons = np.sort(target_lons)
+    
+    logging.info(f"Target grid: {len(target_lats)} lats x {len(target_lons)} lons")
+    
+    # Create a cache key based on grid dimensions
+    cache_key = f"orca_{orca_cube.shape}_{ref_cube.shape}"
+    
     # Convert ORCA cube to xarray for easier handling
     # Handle potential time dimension
     if orca_cube.ndim == 3:  # time, y, x
@@ -139,7 +254,8 @@ def orca_coord_processing(ref_cube, orca_cube):
         logging.info(f"Processing {n_times} time steps")
         
         for time_idx in range(n_times):
-            logging.info(f"Processing time step {time_idx + 1}/{n_times}")
+            if time_idx % 5 == 0:
+                logging.info(f"Processing time step {time_idx + 1}/{n_times}")
             orca_slice = orca_cube[time_idx]
             orca_data = xr.DataArray(
                 orca_slice.data,
@@ -149,15 +265,51 @@ def orca_coord_processing(ref_cube, orca_cube):
                 },
                 dims=['y', 'x']
             )
-            regridded_slice = regrid_orca_to_latlon(orca_data, target_lats, target_lons)
+            regridded_slice = regrid_orca_to_latlon(orca_data, target_lats, target_lons, cache_key=cache_key)
             regridded_slices.append(regridded_slice.values)
         
         regridded_data = np.stack(regridded_slices)
         
         # Create new iris cube with regridded data
+        # Create proper DimCoords with the 1D coordinate arrays
+        # Apply ellipsoid from ref_cube's coordinate system for compatibility
         lat_coord = DimCoord(target_lats, standard_name='latitude', units='degrees')
         lon_coord = DimCoord(target_lons, standard_name='longitude', units='degrees')
-        time_coord = orca_cube.coord('time')
+        
+        # Apply the ellipsoid (not the full projected coordinate system) for compatibility
+        if ref_cube.coord_system() is not None:
+            cs_ellipsoid = ref_cube.coord_system().ellipsoid
+            lat_coord.coord_system = cs_ellipsoid
+            lon_coord.coord_system = cs_ellipsoid
+        
+        # Get time coordinate and shift to end-of-month
+        time_coord = orca_cube.coord('time').copy()
+        
+        # Shift mid-month dates to end-of-month for consistency with other datasets
+        import pandas as pd
+        import cftime
+        from datetime import datetime
+        
+        time_points = time_coord.units.num2date(time_coord.points)
+        
+        # Handle cftime objects by converting to standard datetime
+        if isinstance(time_points[0], cftime.datetime):
+            # Convert cftime to standard datetime (works for most calendar types)
+            time_points = [dt.replace(tzinfo=None).to_pydatetime() if hasattr(dt, 'to_pydatetime') 
+                          else pd.Timestamp(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second).to_pydatetime()
+                          for dt in time_points]
+        
+        time_points_pd = pd.to_datetime(time_points)
+        end_of_month = time_points_pd + pd.offsets.MonthEnd(0)
+        
+        # Normalize to midnight (00:00:00) to ensure consistency
+        end_of_month_normalized = [datetime(dt.year, dt.month, dt.day, 0, 0, 0) for dt in end_of_month]
+        
+        # Convert back to numeric values using the same units
+        new_time_points = time_coord.units.date2num(end_of_month_normalized)
+        time_coord.points = new_time_points
+        
+        logging.info(f"Shifted ORCA time coordinates from mid-month to end-of-month and normalized to midnight")
         
         regridded_cube = iris.cube.Cube(
             regridded_data,
@@ -176,11 +328,19 @@ def orca_coord_processing(ref_cube, orca_cube):
             },
             dims=['y', 'x']
         )
-        regridded_data = regrid_orca_to_latlon(orca_data, target_lats, target_lons)
+        regridded_data = regrid_orca_to_latlon(orca_data, target_lats, target_lons, cache_key=cache_key)
         
+        # Create proper DimCoords with the 1D coordinate arrays
+        # Apply ellipsoid from ref_cube's coordinate system for compatibility
         lat_coord = DimCoord(target_lats, standard_name='latitude', units='degrees')
         lon_coord = DimCoord(target_lons, standard_name='longitude', units='degrees')
         
+        # Apply the ellipsoid (not the full projected coordinate system) for compatibility
+        if ref_cube.coord_system() is not None:
+            cs_ellipsoid = ref_cube.coord_system().ellipsoid
+            lat_coord.coord_system = cs_ellipsoid
+            lon_coord.coord_system = cs_ellipsoid
+        
         regridded_cube = iris.cube.Cube(
             regridded_data.values,
             dim_coords_and_dims=[