From 4292970d8c7eb67b1aea0d6751e2b064b5223751 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Wed, 21 May 2025 14:29:06 +0800 Subject: [PATCH] Update example notebook & fix errant `open_dataset`'s in tests --- examples/validate_chunkspec.ipynb | 161 +++++++++++++++++++----------- tests/test_chunking.py | 26 ++--- 2 files changed, 114 insertions(+), 73 deletions(-) diff --git a/examples/validate_chunkspec.ipynb b/examples/validate_chunkspec.ipynb index 7e12bdd..f75d281 100644 --- a/examples/validate_chunkspec.ipynb +++ b/examples/validate_chunkspec.ipynb @@ -68,7 +68,7 @@ " A dictionary of dictionaries, each containing the chunk specification for a\n", " single file in the dataset as key value pairs. This is only returned if\n", " files in the provided dataset are found to have inconsistent chunking.\n", - "\u001b[0;31mFile:\u001b[0m ~/access-intake-utils/src/access_intake_utils/chunking/_chunking.py\n", + "\u001b[0;31mFile:\u001b[0m ~/.local/lib/python3.11/site-packages/access_intake_utils/chunking/_chunking.py\n", "\u001b[0;31mType:\u001b[0m function" ] }, @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "75aebe8a-9ae0-487b-a3ae-59f17fba7415", "metadata": {}, "outputs": [ @@ -190,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "080acfe7-f45d-4acc-afad-7393205846ea", "metadata": {}, "outputs": [ @@ -589,8 +589,8 @@ " intake_esm_attrs:variable_units: degrees_E,degrees_N,meters,mete...\n", " intake_esm_attrs:realm: ocean\n", " intake_esm_attrs:_data_format_: netcdf\n", - " intake_esm_dataset_key: ocean_month.1mon
  • filename :
    ocean_month.nc
    title :
    ACCESS-OM2-01
    grid_type :
    mosaic
    grid_tile :
    1
    intake_esm_vars :
    ['temp']
    intake_esm_attrs:filename :
    ocean_month.nc
    intake_esm_attrs:file_id :
    ocean_month
    intake_esm_attrs:frequency :
    1mon
    intake_esm_attrs:variable :
    xt_ocean,yt_ocean,st_ocean,st_edges_ocean,time,nv,xu_ocean,yu_ocean,sw_ocean,sw_edges_ocean,grid_xu_ocean,grid_yt_ocean,potrho,potrho_edges,grid_xt_ocean,grid_yu_ocean,neutral,neutralrho_edges,temp,salt,u,v,wt,dzt,tx_trans,ty_trans,ty_trans_submeso,tx_trans_rho,ty_trans_rho,ty_trans_nrho_submeso,sea_level,sea_levelsq,mld,pme_river,pme_net,river,runoff,evap,melt,net_sfc_heating,frazil_3d_int_z,swflx,lw_heat,sens_heat,evap_heat,fprec_melt_heat,sfc_hflux_from_runoff,sfc_hflux_coupler,tau_x,tau_y,tx_trans_int_z,ty_trans_int_z,average_T1,average_T2,average_DT,time_bnds
    intake_esm_attrs:variable_long_name :
    tcell longitude,tcell latitude,tcell zstar depth,tcell zstar depth edges,time,vertex number,ucell longitude,ucell latitude,ucell zstar depth,ucell zstar depth edges,ucell longitude,tcell latitude,potential density,potential density edges,tcell longitude,ucell latitude,neutral density,neutral density edges,Conservative temperature,Practical Salinity,i-current,j-current,dia-surface velocity T-points,t-cell thickness,T-cell i-mass transport,T-cell j-mass transport,T-cell mass j-transport from submesoscale param,T-cell i-mass transport on pot_rho,T-cell j-mass transport on pot_rho,T-cell j-mass transport from submesoscale param on neutral rho,effective sea level (eta_t + patm/(rho0*g)) on T cells,square of effective sea level (eta_t + patm/(rho0*g)) on T cells,mixed layer depth determined by density criteria,mass flux of precip-evap+river via sbc (liquid, frozen, evaporation),precip-evap into ocean (total w/ restore + normalize),mass flux of river (runoff + calving) entering ocean,mass flux of liquid river runoff entering ocean,mass flux from evaporation/condensation (>0 enters ocean),water flux transferred with sea ice form/melt (>0 enters ocean),surface ocean heat flux coming through coupler and mass transfer,Vertical sum of ocn frazil heat flux over time step,shortwave flux into ocean (>0 heats ocean),longwave flux into ocean (<0 cools ocean),sensible heat into ocean (<0 cools ocean),latent heat flux into ocean (<0 cools ocean),heat flux to melt frozen precip (<0 cools ocean),heat flux (relative to 0C) from liquid river runoff,surface heat flux coming through coupler,i-directed wind stress forcing u-velocity,j-directed wind stress forcing v-velocity,T-cell i-mass transport vertically summed,T-cell j-mass transport vertically summed,Start time for average period,End time for average period,Length of average period,time axis boundaries
    intake_esm_attrs:variable_standard_name :
    ,,,,,,,,,,,,,,,,,,sea_water_conservative_temperature,sea_water_salinity,sea_water_x_velocity,sea_water_y_velocity,,cell_thickness,ocean_mass_x_transport,ocean_mass_y_transport,,,,,sea_surface_height_above_geoid,square_of_sea_surface_height_above_geoid,ocean_mixed_layer_thickness_defined_by_sigma_t,water_flux_into_sea_water,,,water_flux_into_sea_water_from_rivers,water_evaporation_flux,water_flux_into_sea_water_due_to_sea_ice_thermodynamics,,,surface_net_downward_shortwave_flux,surface_net_downward_longwave_flux,surface_downward_sensible_heat_flux,surface_downward_latent_heat_flux,heat_flux_into_sea_water_due_to_snow_thermodynamics,temperature_flux_due_to_runoff_expressed_as_heat_flux_into_sea_water,,surface_downward_x_stress,surface_downward_y_stress,,,,,,
    intake_esm_attrs:variable_cell_methods :
    ,,,,,,,,,,,,,,,,,,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,,,,
    intake_esm_attrs:variable_units :
    degrees_E,degrees_N,meters,meters,days since 1900-01-01 00:00:00,none,degrees_E,degrees_N,meters,meters,degrees_E,degrees_N,kg/m^3,kg/m^3,degrees_E,degrees_N,kg/m^3,kg/m^3,K,psu,m/sec,m/sec,m/sec,m,kg/s,kg/s,kg/s,kg/s,kg/s,kg/s,meter,m^2,m,(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),Watts/m^2,W/m^2,W/m^2,W/m^2,W/m^2,W/m^2,W/m^2,Watts/m^2,Watts/m^2,N/m^2,N/m^2,kg/s,kg/s,days since 1900-01-01 00:00:00,days since 1900-01-01 00:00:00,days,days
    intake_esm_attrs:realm :
    ocean
    intake_esm_attrs:_data_format_ :
    netcdf
    intake_esm_dataset_key :
    ocean_month.1mon
  • " ], "text/plain": [ " Size: 122GB\n", @@ -938,7 +938,7 @@ " intake_esm_dataset_key: ocean_month.1mon" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -958,7 +958,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 7, "id": "163b1fc7-ecc3-483e-8e38-2cb33d50d993", "metadata": {}, "outputs": [], @@ -968,7 +968,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 8, "id": "80dc541a-2486-4eaa-b4f7-c3c07d6893f1", "metadata": {}, "outputs": [ @@ -976,7 +976,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/jobfs/140304826.gadi-pbs/ipykernel_2541108/2116134971.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", + "/jobfs/141408895.gadi-pbs/ipykernel_2642014/2116134971.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", " validate_chunkspec(expt_ds, chunks,varnames = 'temp')\n" ] }, @@ -986,7 +986,7 @@ "{'time': 37, 'st_ocean': 57, 'yt_ocean': 405, 'xt_ocean': 360}" ] }, - "execution_count": 17, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -997,7 +997,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "id": "a9b801a4-3165-4982-9237-f3e5b7fb2d51", "metadata": {}, "outputs": [ @@ -1008,7 +1008,7 @@ "Name: path, dtype: object" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1063,7 +1063,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "id": "329176df-3166-4ca6-8837-a30e3a3a8d2b", "metadata": {}, "outputs": [ @@ -1071,11 +1071,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "/jobfs/140304826.gadi-pbs/ipykernel_2541108/677627901.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", + "/jobfs/141408895.gadi-pbs/ipykernel_2642014/677627901.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", " optimised_chunks = validate_chunkspec(expt_ds, chunks,varnames = 'temp')\n", - "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.02/lib/python3.11/site-packages/intake_esm/source.py:82: UserWarning: The specified chunks separate the stored chunks along dimension \"yt_ocean\" starting at index 405. This could degrade performance. Instead, consider rechunking after loading.\n", + "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.05/lib/python3.11/site-packages/intake_esm/source.py:82: UserWarning: The specified chunks separate the stored chunks along dimension \"yt_ocean\" starting at index 405. This could degrade performance. Instead, consider rechunking after loading.\n", " ds = xr.open_dataset(url, **xarray_open_kwargs)\n", - "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.02/lib/python3.11/site-packages/intake_esm/source.py:82: UserWarning: The specified chunks separate the stored chunks along dimension \"xt_ocean\" starting at index 360. This could degrade performance. Instead, consider rechunking after loading.\n", + "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.05/lib/python3.11/site-packages/intake_esm/source.py:82: UserWarning: The specified chunks separate the stored chunks along dimension \"xt_ocean\" starting at index 360. This could degrade performance. Instead, consider rechunking after loading.\n", " ds = xr.open_dataset(url, **xarray_open_kwargs)\n" ] }, @@ -1474,8 +1474,8 @@ " intake_esm_attrs:variable_units: degrees_E,degrees_N,meters,mete...\n", " intake_esm_attrs:realm: ocean\n", " intake_esm_attrs:_data_format_: netcdf\n", - " intake_esm_dataset_key: ocean_month.1mon
  • filename :
    ocean_month.nc
    title :
    ACCESS-OM2-01
    grid_type :
    mosaic
    grid_tile :
    1
    intake_esm_vars :
    ['temp']
    intake_esm_attrs:filename :
    ocean_month.nc
    intake_esm_attrs:file_id :
    ocean_month
    intake_esm_attrs:frequency :
    1mon
    intake_esm_attrs:variable :
    xt_ocean,yt_ocean,st_ocean,st_edges_ocean,time,nv,xu_ocean,yu_ocean,sw_ocean,sw_edges_ocean,grid_xu_ocean,grid_yt_ocean,potrho,potrho_edges,grid_xt_ocean,grid_yu_ocean,neutral,neutralrho_edges,temp,salt,u,v,wt,dzt,tx_trans,ty_trans,ty_trans_submeso,tx_trans_rho,ty_trans_rho,ty_trans_nrho_submeso,sea_level,sea_levelsq,mld,pme_river,pme_net,river,runoff,evap,melt,net_sfc_heating,frazil_3d_int_z,swflx,lw_heat,sens_heat,evap_heat,fprec_melt_heat,sfc_hflux_from_runoff,sfc_hflux_coupler,tau_x,tau_y,tx_trans_int_z,ty_trans_int_z,average_T1,average_T2,average_DT,time_bnds
    intake_esm_attrs:variable_long_name :
    tcell longitude,tcell latitude,tcell zstar depth,tcell zstar depth edges,time,vertex number,ucell longitude,ucell latitude,ucell zstar depth,ucell zstar depth edges,ucell longitude,tcell latitude,potential density,potential density edges,tcell longitude,ucell latitude,neutral density,neutral density edges,Conservative temperature,Practical Salinity,i-current,j-current,dia-surface velocity T-points,t-cell thickness,T-cell i-mass transport,T-cell j-mass transport,T-cell mass j-transport from submesoscale param,T-cell i-mass transport on pot_rho,T-cell j-mass transport on pot_rho,T-cell j-mass transport from submesoscale param on neutral rho,effective sea level (eta_t + patm/(rho0*g)) on T cells,square of effective sea level (eta_t + patm/(rho0*g)) on T cells,mixed layer depth determined by density criteria,mass flux of precip-evap+river via sbc (liquid, frozen, evaporation),precip-evap into ocean (total w/ restore + normalize),mass flux of river (runoff + calving) entering ocean,mass flux of liquid river runoff entering ocean,mass flux from evaporation/condensation (>0 enters ocean),water flux transferred with sea ice form/melt (>0 enters ocean),surface ocean heat flux coming through coupler and mass transfer,Vertical sum of ocn frazil heat flux over time step,shortwave flux into ocean (>0 heats ocean),longwave flux into ocean (<0 cools ocean),sensible heat into ocean (<0 cools ocean),latent heat flux into ocean (<0 cools ocean),heat flux to melt frozen precip (<0 cools ocean),heat flux (relative to 0C) from liquid river runoff,surface heat flux coming through coupler,i-directed wind stress forcing u-velocity,j-directed wind stress forcing v-velocity,T-cell i-mass transport vertically summed,T-cell j-mass transport vertically summed,Start time for average period,End time for average period,Length of average period,time axis boundaries
    intake_esm_attrs:variable_standard_name :
    ,,,,,,,,,,,,,,,,,,sea_water_conservative_temperature,sea_water_salinity,sea_water_x_velocity,sea_water_y_velocity,,cell_thickness,ocean_mass_x_transport,ocean_mass_y_transport,,,,,sea_surface_height_above_geoid,square_of_sea_surface_height_above_geoid,ocean_mixed_layer_thickness_defined_by_sigma_t,water_flux_into_sea_water,,,water_flux_into_sea_water_from_rivers,water_evaporation_flux,water_flux_into_sea_water_due_to_sea_ice_thermodynamics,,,surface_net_downward_shortwave_flux,surface_net_downward_longwave_flux,surface_downward_sensible_heat_flux,surface_downward_latent_heat_flux,heat_flux_into_sea_water_due_to_snow_thermodynamics,temperature_flux_due_to_runoff_expressed_as_heat_flux_into_sea_water,,surface_downward_x_stress,surface_downward_y_stress,,,,,,
    intake_esm_attrs:variable_cell_methods :
    ,,,,,,,,,,,,,,,,,,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,time: mean,,,,
    intake_esm_attrs:variable_units :
    degrees_E,degrees_N,meters,meters,days since 1900-01-01 00:00:00,none,degrees_E,degrees_N,meters,meters,degrees_E,degrees_N,kg/m^3,kg/m^3,degrees_E,degrees_N,kg/m^3,kg/m^3,K,psu,m/sec,m/sec,m/sec,m,kg/s,kg/s,kg/s,kg/s,kg/s,kg/s,meter,m^2,m,(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),(kg/m^3)*(m/sec),Watts/m^2,W/m^2,W/m^2,W/m^2,W/m^2,W/m^2,W/m^2,Watts/m^2,Watts/m^2,N/m^2,N/m^2,kg/s,kg/s,days since 1900-01-01 00:00:00,days since 1900-01-01 00:00:00,days,days
    intake_esm_attrs:realm :
    ocean
    intake_esm_attrs:_data_format_ :
    netcdf
    intake_esm_dataset_key :
    ocean_month.1mon
  • " ], "text/plain": [ " Size: 122GB\n", @@ -1755,7 +1755,7 @@ " intake_esm_dataset_key: ocean_month.1mon" ] }, - "execution_count": 18, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1777,7 +1777,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "id": "05b156af-efe8-4bca-b975-72492e227345", "metadata": {}, "outputs": [ @@ -1800,7 +1800,7 @@ " '/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55_ryf_Control/output989/ocean/ocean_month.nc']" ] }, - "execution_count": 20, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1812,7 +1812,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "id": "51f5a523-7ed2-4457-9877-525b42ec3221", "metadata": {}, "outputs": [ @@ -1820,7 +1820,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/jobfs/140304826.gadi-pbs/ipykernel_2541108/3620575421.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", + "/jobfs/141408895.gadi-pbs/ipykernel_2642014/3620575421.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", " validate_chunkspec(paths_as_strings, chunks,varnames = 'temp')\n" ] }, @@ -1830,7 +1830,7 @@ "{'time': 37, 'st_ocean': 57, 'yt_ocean': 405, 'xt_ocean': 360}" ] }, - "execution_count": 21, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1841,7 +1841,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "id": "1c987757-feec-477d-ae16-8cc0048c0018", "metadata": {}, "outputs": [ @@ -1864,7 +1864,7 @@ " PosixPath('/g/data/cj50/access-om2/raw-output/access-om2-01/01deg_jra55_ryf_Control/output989/ocean/ocean_month.nc')]" ] }, - "execution_count": 25, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1878,7 +1878,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "id": "39eedeb6-2914-4739-93de-96b70eba4ac3", "metadata": {}, "outputs": [ @@ -1886,7 +1886,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/jobfs/140304826.gadi-pbs/ipykernel_2541108/883754652.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", + "/jobfs/141408895.gadi-pbs/ipykernel_2642014/883754652.py:1: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", " validate_chunkspec(paths, chunks,varnames = 'temp')\n" ] }, @@ -1896,7 +1896,7 @@ "{'time': 37, 'st_ocean': 57, 'yt_ocean': 405, 'xt_ocean': 360}" ] }, - "execution_count": 26, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1910,26 +1910,32 @@ "id": "f1254bf0-5f8b-4e5f-a8bb-c376d1057878", "metadata": {}, "source": [ - "- Or, in some instances, an xarray dataset - but only if the dataset contains the file handles (this isn't guaranteed, so preferably use the other methods). In the example below, this won't work - but it might for others. This example will be updated as suppot for xarray detasets becomes more robust." + "- Or, an xarray Dataset - but only if the dataset contains the file handles (this isn't guaranteed, so preferably use the other methods). " ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 22, "id": "ffab3565-7c9f-44fe-b24d-12e9697ae82d", "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": " Dataset/DataArray does contain source attribute describing file path(s). Please provide a dataset with a source attribute, an esm_datastore, or a list of file paths.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[29], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mxarray\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mxr\u001b[39;00m\n\u001b[1;32m 3\u001b[0m ds \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mopen_mfdataset(paths, decode_timedelta \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m----> 5\u001b[0m \u001b[43mvalidate_chunkspec\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m,\u001b[49m\u001b[43mvarnames\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtemp\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/access-intake-utils/src/access_intake_utils/chunking/_chunking.py:162\u001b[0m, in \u001b[0;36mvalidate_chunkspec\u001b[0;34m(dataset, chunkspec, varnames, validate_mode, sample_size)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mcase\u001b[39;00m Dataset() \u001b[38;5;241m|\u001b[39m DataArray():\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m dataset\u001b[38;5;241m.\u001b[39mencoding\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msource\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m--> 162\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Dataset/DataArray does contain source attribute describing file path(s).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Please provide a dataset with a source attribute, an esm_datastore,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m or a list of file paths.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 166\u001b[0m )\n\u001b[1;32m 167\u001b[0m path_list \u001b[38;5;241m=\u001b[39m [Path(f) \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m dataset\u001b[38;5;241m.\u001b[39mencoding[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msource\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01mcase\u001b[39;00m Iterable() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(f, (\u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m|\u001b[39m Path)) \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m dataset):\n", - "\u001b[0;31mValueError\u001b[0m: Dataset/DataArray does contain source attribute describing file path(s). Please provide a dataset with a source attribute, an esm_datastore, or a list of file paths." + "name": "stderr", + "output_type": "stream", + "text": [ + "/jobfs/141408895.gadi-pbs/ipykernel_2642014/3638848020.py:5: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", + " validate_chunkspec(ds, chunks,varnames = 'temp')\n" ] + }, + { + "data": { + "text/plain": [ + "{'time': 37, 'st_ocean': 57, 'yt_ocean': 405, 'xt_ocean': 360}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1939,13 +1945,54 @@ "\n", "validate_chunkspec(ds, chunks,varnames = 'temp')" ] + }, + { + "cell_type": "markdown", + "id": "3a5b93e1-e071-4ef8-af26-2c3fb1e1f072", + "metadata": {}, + "source": [ + "- You can also pass an xarray DataArray, but in these instances, only the first file can be checked. In future, this may be updated to be more comprehensive." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ddc0bfd5-5a94-400a-8d43-e6041fae13e9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/jobfs/141408895.gadi-pbs/ipykernel_2642014/3550891165.py:5: ChunkingWarning: Specified chunks are not integer multiples of the disk chunks. Returning suggested chunks as a dictionary.\n", + " validate_chunkspec(da, chunks,varnames = 'temp')\n" + ] + }, + { + "data": { + "text/plain": [ + "{'time': 37, 'st_ocean': 57, 'yt_ocean': 405, 'xt_ocean': 360}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = xr.open_mfdataset(paths, decode_timedelta = False)\n", + "\n", + "da = ds['temp']\n", + "\n", + "validate_chunkspec(da, chunks,varnames = 'temp')" + ] } ], "metadata": { "kernelspec": { - "display_name": "Access-Intake-utils with system-site-packages", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "access-intake-system" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1957,7 +2004,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.12" } }, "nbformat": 4, diff --git a/tests/test_chunking.py b/tests/test_chunking.py index 6bea4e7..abeffbd 100644 --- a/tests/test_chunking.py +++ b/tests/test_chunking.py @@ -475,14 +475,11 @@ def test_validate_chunkspec_xr_ds( ], ) def test__get_file_handles(fpath, varname): - if isinstance(fpath, list): - ds = xr.open_mfdataset( - fpath, - decode_timedelta=False, - engine="netcdf4", - ) - elif isinstance(fpath, str): - ds = xr.open_dataset(fpath, decode_timedelta=False, engine="netcdf4") + ds = xr.open_mfdataset( + fpath, + decode_timedelta=False, + engine="netcdf4", + ) if varname is not None: ds = ds[varname] @@ -509,14 +506,11 @@ def test__get_file_handles(fpath, varname): ], ) def test__get_file_handles_failing(fpath, varname): - if isinstance(fpath, list): - ds = xr.open_mfdataset( - fpath, - decode_timedelta=False, - engine="netcdf4", - ) - elif isinstance(fpath, str): - ds = xr.open_dataset(fpath, decode_timedelta=False, engine="netcdf4") + ds = xr.open_mfdataset( + fpath, + decode_timedelta=False, + engine="netcdf4", + ) if varname is not None: ds = ds[varname]