From af4ce1b73b30581d53b475deae6dc7490467f085 Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Sun, 24 May 2026 20:32:23 -0500 Subject: [PATCH 1/7] Adding test for freedecomp before close Adding test that writes and immediately frees (before file close/sync) I/O decompositions. The decomposition used for read is freed after the file is closed/deleted --- tests/general/pio_decomp_tests.F90.in | 211 ++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) diff --git a/tests/general/pio_decomp_tests.F90.in b/tests/general/pio_decomp_tests.F90.in index 17b78f5215..af6a4fa0b4 100644 --- a/tests/general/pio_decomp_tests.F90.in +++ b/tests/general/pio_decomp_tests.F90.in @@ -59,6 +59,217 @@ PIO_TF_AUTO_TEST_SUB_BEGIN nc_write_1d_darray call PIO_freedecomp(pio_tf_iosystem_, iodesc) PIO_TF_AUTO_TEST_SUB_END nc_write_1d_darray +! Test freeing I/O decomposition before closing (writing data to) file +PIO_TF_AUTO_TEST_SUB_BEGIN free_before_close + implicit none + + ! pio_file1 uses iodesc1, iodesc2 + ! pio_file2 uses iodesc1, iodesc3 + ! pio_file3 uses iodesc2 + type(file_desc_t) :: pio_file1, pio_file2, pio_file3 + character(len=PIO_TF_MAX_STR_LEN) :: filename1, filename2, filename3 + type(io_desc_t) :: iodesc1, iodesc2, iodesc3 + ! rd_iodesc is used by all files to read data + type(io_desc_t) :: rd_iodesc + + integer, parameter :: VEC_LOCAL_SZ = 7 + integer, dimension(VEC_LOCAL_SZ) :: compdof, compdof_rel_disps + integer, dimension(VEC_LOCAL_SZ) :: buf, rbuf + character(len=*), parameter :: PIO_VAR1_NAME = 'PIO_TF_test_var1' + character(len=*), parameter :: PIO_VAR2_NAME = 'PIO_TF_test_var2' + + ! pio_file1 & pio_file2 have 2 vars, pio_file3 has a single var : All vars have the same size + type(var_desc_t) :: pio_var1_file1, pio_var2_file1, pio_var1_file2, pio_var2_file2, pio_var1_file3 + integer, dimension(1) :: dims + integer :: pio_dim1, pio_dim2, pio_dim3 + + ! iotypes = valid io types + integer, dimension(:), allocatable :: iotypes + character(len=PIO_TF_MAX_STR_LEN), dimension(:), allocatable :: iotype_descs + integer :: num_iotypes + + integer :: i, ierr + + do i=1,VEC_LOCAL_SZ + compdof_rel_disps(i) = i + end do + dims(1) = VEC_LOCAL_SZ * pio_tf_world_sz_ + compdof = VEC_LOCAL_SZ * pio_tf_world_rank_ + compdof_rel_disps + buf = pio_tf_world_rank_; + + filename1 = "test_pio_decomp_simple_tests_free_before_close_file1.testfile" + filename2 = "test_pio_decomp_simple_tests_free_before_close_file2.testfile" + filename3 = "test_pio_decomp_simple_tests_free_before_close_file3.testfile" + + num_iotypes = 0 + call PIO_TF_Get_nc_iotypes(iotypes, iotype_descs, num_iotypes) + do i=1,num_iotypes + PIO_TF_LOG(0,*) "Testing : PIO_TF_DATA_TYPE : ", iotype_descs(i) + + ! Create the 3 files + ierr = PIO_createfile(pio_tf_iosystem_, pio_file1, iotypes(i), filename1, PIO_CLOBBER) + PIO_TF_CHECK_ERR(ierr, "Could not create (file 1) " // trim(filename1)) + + ierr = PIO_createfile(pio_tf_iosystem_, pio_file2, iotypes(i), filename2, PIO_CLOBBER) + PIO_TF_CHECK_ERR(ierr, "Could not create (file 2) " // trim(filename2)) + + ierr = PIO_createfile(pio_tf_iosystem_, pio_file3, iotypes(i), filename3, PIO_CLOBBER) + PIO_TF_CHECK_ERR(ierr, "Could not create (file 3) " // trim(filename3)) + + ierr = PIO_def_dim(pio_file1, 'PIO_TF_test_dim', dims(1), pio_dim1) + PIO_TF_CHECK_ERR(ierr, "Failed to define a dim (file 1) : " // trim(filename1)) + + ierr = PIO_def_dim(pio_file2, 'PIO_TF_test_dim', dims(1), pio_dim2) + PIO_TF_CHECK_ERR(ierr, "Failed to define a dim (file 2) : " // trim(filename2)) + + ierr = PIO_def_dim(pio_file3, 'PIO_TF_test_dim', dims(1), pio_dim3) + PIO_TF_CHECK_ERR(ierr, "Failed to define a dim (file 3) : " // trim(filename3)) + + ierr = PIO_def_var(pio_file1, PIO_VAR1_NAME, PIO_INT, (/pio_dim1/), pio_var1_file1) + PIO_TF_CHECK_ERR(ierr, "Failed to define a var (var 1 + file 1): " // trim(filename1)) + + ierr = PIO_def_var(pio_file1, PIO_VAR2_NAME, PIO_INT, (/pio_dim1/), pio_var2_file1) + PIO_TF_CHECK_ERR(ierr, "Failed to define a var (var 2 + file 1): " // trim(filename1)) + + ierr = PIO_def_var(pio_file2, PIO_VAR1_NAME, PIO_INT, (/pio_dim2/), pio_var1_file2) + PIO_TF_CHECK_ERR(ierr, "Failed to define a var (var 1 + file 2): " // trim(filename2)) + + ierr = PIO_def_var(pio_file2, PIO_VAR2_NAME, PIO_INT, (/pio_dim2/), pio_var2_file2) + PIO_TF_CHECK_ERR(ierr, "Failed to define a var (var 2 + file 2): " // trim(filename2)) + + ierr = PIO_def_var(pio_file3, PIO_VAR1_NAME, PIO_INT, (/pio_dim3/), pio_var1_file3) + PIO_TF_CHECK_ERR(ierr, "Failed to define a var (var 1 + file 3): " // trim(filename3)) + + ierr = PIO_enddef(pio_file1) + PIO_TF_CHECK_ERR(ierr, "Failed to end define mode (file 1): " // trim(filename1)) + + ierr = PIO_enddef(pio_file2) + PIO_TF_CHECK_ERR(ierr, "Failed to end define mode (file 2): " // trim(filename2)) + + ierr = PIO_enddef(pio_file3) + PIO_TF_CHECK_ERR(ierr, "Failed to end define mode (file 3): " // trim(filename3)) + + ! Create I/O decomps : Same decomposition for all variables in all files + call PIO_initdecomp(pio_tf_iosystem_, PIO_INT, dims, compdof, iodesc1) + call PIO_initdecomp(pio_tf_iosystem_, PIO_INT, dims, compdof, iodesc2) + call PIO_initdecomp(pio_tf_iosystem_, PIO_INT, dims, compdof, iodesc3) + + ! Write the variable out into the three files + ! file 1 : writes using iodesc1 and iodesc2 + ! file 2 : writes using iodesc1 and iodesc3 + ! file 3 : writes using iodesc2 + + call PIO_write_darray(pio_file1, pio_var1_file1, iodesc1, buf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to write darray (file 1 + var 1 + iodesc 1): " // trim(filename1)) + + call PIO_write_darray(pio_file1, pio_var2_file1, iodesc2, buf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to write darray (file 1 + var 2 + iodesc 2): " // trim(filename1)) + + call PIO_write_darray(pio_file2, pio_var1_file2, iodesc1, buf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to write darray (file 2 + var 1 + iodesc 1): " // trim(filename2)) + + call PIO_write_darray(pio_file2, pio_var2_file2, iodesc3, buf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to write darray (file 2 + var 2 + iodesc 3): " // trim(filename2)) + + call PIO_write_darray(pio_file3, pio_var1_file3, iodesc2, buf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to write darray (file 3 + var 1 + iodesc 2): " // trim(filename3)) + + ! Free I/O decompisitions (write decomps) before sync (and before closing the files) + call PIO_freedecomp(pio_tf_iosystem_, iodesc1) + call PIO_freedecomp(pio_tf_iosystem_, iodesc2) + call PIO_freedecomp(pio_tf_iosystem_, iodesc3) + +#ifdef PIO_TEST_CLOSE_OPEN_FOR_SYNC + call PIO_closefile(pio_file1) + call PIO_closefile(pio_file2) + call PIO_closefile(pio_file3) + + ierr = PIO_openfile(pio_tf_iosystem_, pio_file1, iotypes(i), filename1, PIO_nowrite) + PIO_TF_CHECK_ERR(ierr, "Could not reopen file (file 1)" // trim(filename1)) + + ierr = PIO_openfile(pio_tf_iosystem_, pio_file2, iotypes(i), filename2, PIO_nowrite) + PIO_TF_CHECK_ERR(ierr, "Could not reopen file (file 2)" // trim(filename2)) + + ierr = PIO_openfile(pio_tf_iosystem_, pio_file3, iotypes(i), filename3, PIO_nowrite) + PIO_TF_CHECK_ERR(ierr, "Could not reopen file (file 3)" // trim(filename3)) + + ierr = PIO_inq_varid(pio_file1, PIO_VAR1_NAME, pio_var1_file1) + PIO_TF_CHECK_ERR(ierr, "Could not inq var (var 1 + file 1): " // trim(filename1)) + + ierr = PIO_inq_varid(pio_file1, PIO_VAR2_NAME, pio_var2_file1) + PIO_TF_CHECK_ERR(ierr, "Could not inq var (var 2 + file 1): " // trim(filename1)) + + ierr = PIO_inq_varid(pio_file2, PIO_VAR1_NAME, pio_var1_file2) + PIO_TF_CHECK_ERR(ierr, "Could not inq var (var 1 + file 2): " // trim(filename2)) + + ierr = PIO_inq_varid(pio_file2, PIO_VAR2_NAME, pio_var2_file2) + PIO_TF_CHECK_ERR(ierr, "Could not inq var (var 2 + file 2): " // trim(filename2)) + + ierr = PIO_inq_varid(pio_file3, PIO_VAR1_NAME, pio_var1_file3) + PIO_TF_CHECK_ERR(ierr, "Could not inq var (var 1 + file 3): " // trim(filename3)) +#else + call PIO_syncfile(pio_file1) + call PIO_syncfile(pio_file2) + call PIO_syncfile(pio_file3) +#endif + + ! Create read I/O decomp + call PIO_initdecomp(pio_tf_iosystem_, PIO_INT, dims, compdof, rd_iodesc) + + ! Read and verify contents of the first file + rbuf = 0 + call PIO_read_darray(pio_file1, pio_var1_file1, rd_iodesc, rbuf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to read darray (var 1 + file 1): " // trim(filename1)) + + PIO_TF_CHECK_VAL((rbuf, buf), "Got wrong val (var 1 + file 1)") + + rbuf = 0 + call PIO_read_darray(pio_file1, pio_var2_file1, rd_iodesc, rbuf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to read darray (var 2 + file 1): " // trim(filename1)) + + PIO_TF_CHECK_VAL((rbuf, buf), "Got wrong val (var 2 + file 1)") + + ! Close/delete first file, the next two files will be closed/deleted together + call PIO_closefile(pio_file1) + call PIO_deletefile(pio_tf_iosystem_, filename1); + + ! Verify the contents of the remaining files (interleaved reads) + ! Read from second file + rbuf = 0 + call PIO_read_darray(pio_file2, pio_var1_file2, rd_iodesc, rbuf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to read darray (var 1 + file 2): " // trim(filename2)) + + PIO_TF_CHECK_VAL((rbuf, buf), "Got wrong val (var 1 + file 2)") + + ! Read from third file + rbuf = 0 + call PIO_read_darray(pio_file3, pio_var1_file3, rd_iodesc, rbuf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to read darray (var 1 + file 3): " // trim(filename3)) + + PIO_TF_CHECK_VAL((rbuf, buf), "Got wrong val (var 1 + file 3)") + + ! Now read back from second file + rbuf = 0 + call PIO_read_darray(pio_file2, pio_var2_file2, rd_iodesc, rbuf, ierr) + PIO_TF_CHECK_ERR(ierr, "Failed to read darray (var 2 + file 2): " // trim(filename2)) + + PIO_TF_CHECK_VAL((rbuf, buf), "Got wrong val (var 2 + file 2)") + + call PIO_closefile(pio_file2) + call PIO_closefile(pio_file3) + call PIO_deletefile(pio_tf_iosystem_, filename2); + call PIO_deletefile(pio_tf_iosystem_, filename3); + + ! Test freeing (read) I/O decomp after the files are closed/deleted + call PIO_freedecomp(pio_tf_iosystem_, rd_iodesc) + end do + if(allocated(iotypes)) then + deallocate(iotypes) + deallocate(iotype_descs) + end if + +PIO_TF_AUTO_TEST_SUB_END free_before_close + ! Write 1d array, although diff procs have different ! number of elements to write locally they all use ! the same buffer size (compdof size is different for From 248bf77c95a74a4e4c5aae61708a027d8574e408 Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Sun, 24 May 2026 20:34:18 -0500 Subject: [PATCH 2/7] Rm unused linked list remnant in iodesc We no longer use custom linked lists for I/O decomposition lists. Removing unused/stale remnants (ptr to next) of the old linked list in io_desc_t. --- src/clib/pio_types.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/clib/pio_types.hpp b/src/clib/pio_types.hpp index 7d15526516..a9dc77dd0f 100644 --- a/src/clib/pio_types.hpp +++ b/src/clib/pio_types.hpp @@ -345,9 +345,6 @@ typedef struct io_desc_t /* Number of pending async ops using this I/O desc */ std::atomic_int nasync_pend_ops; - - /** Pointer to the next io_desc_t in the list. */ - struct io_desc_t *next; } io_desc_t; /* Forward decl for I/O file summary stats info */ From efc1f7708b7855582df124726622c7cb4951ab2d Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Mon, 25 May 2026 19:13:08 -0500 Subject: [PATCH 3/7] Moving I/O decomp utils to spio_iodesc_utils.cpp Moving some util functions related to I/O decomposition from pioc_support.cpp to a new file (spio_iodesc_utils.cpp) No change in source code --- src/clib/core/CMakeLists.txt | 1 + src/clib/core/pioc_support.cpp | 285 --------------------------- src/clib/core/spio_iodesc_utils.cpp | 294 ++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+), 285 deletions(-) create mode 100644 src/clib/core/spio_iodesc_utils.cpp diff --git a/src/clib/core/CMakeLists.txt b/src/clib/core/CMakeLists.txt index 09fb86ec06..7951aed68d 100644 --- a/src/clib/core/CMakeLists.txt +++ b/src/clib/core/CMakeLists.txt @@ -6,6 +6,7 @@ message(STATUS "===== Configuring SCORPIO C Core... =====") set (spio_core_src pio_spmd.cpp pioc_support.cpp + spio_iodesc_utils.cpp pioc.cpp pio_nc.cpp pio_getput_int.cpp diff --git a/src/clib/core/pioc_support.cpp b/src/clib/core/pioc_support.cpp index c4d7430297..3fcfa17f5a 100644 --- a/src/clib/core/pioc_support.cpp +++ b/src/clib/core/pioc_support.cpp @@ -1528,52 +1528,6 @@ void PIOc_warn(int iosysid, int ncid, } } -/** - * Allocate a region struct, and initialize it. - * - * @param ios pointer to the IO system info, used for error - * handling. Ignored if NULL. - * @param ndims the number of dimensions for the data in this region. - * @param a pointer that gets a pointer to the newly allocated - * io_region struct. - * @returns 0 for success, error code otherwise. - */ -int alloc_region2(iosystem_desc_t *ios, int ndims, io_region **regionp) -{ - io_region *region; - - /* Check inputs. */ - pioassert(ndims >= 0 && regionp, "invalid input", __FILE__, __LINE__); - LOG((1, "alloc_region2 ndims = %d sizeof(io_region) = %d", ndims, - sizeof(io_region))); - - /* Allocate memory for the io_region struct. */ - if (!(region = (io_region *) calloc(1, sizeof(io_region)))) - { - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating region. Out of memory allocating %lld bytes I/O region", (unsigned long long) sizeof(io_region)); - } - - /* Allocate memory for the array of start indicies. */ - if (!(region->start = (PIO_Offset *) calloc(ndims, sizeof(PIO_Offset)))) - { - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating region. Out of memory allocating %lld bytes for start array in the I/O region", (unsigned long long) (ndims * sizeof(PIO_Offset))); - } - - /* Allocate memory for the array of counts. */ - if (!(region->count = (PIO_Offset *) calloc(ndims, sizeof(PIO_Offset)))) - { - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating region. Out of memory allocating %lld bytes for count array in the I/O region", (unsigned long long) (ndims * sizeof(PIO_Offset))); - } - - /* Return pointer to new region to caller. */ - *regionp = region; - - return PIO_NOERR; -} - /** * Given a PIO type, find the MPI type and the type size. * @@ -1657,245 +1611,6 @@ int find_mpi_type(int pio_type, MPI_Datatype *mpi_type, int *type_size) return PIO_NOERR; } -/** - * Allocate space for an IO description struct, and initialize it. - * - * @param ios pointer to the IO system info, used for error - * handling. - * @param piotype the PIO data type (ex. PIO_FLOAT, PIO_INT, etc.). - * @param ndims the number of dimensions. - * @param maplen the length of the local decomposition map - * @param iodesc pointer that gets the newly allocated io_desc_t. - * @returns 0 for success, error code otherwise. - */ -int malloc_iodesc(iosystem_desc_t *ios, int piotype, int ndims, int maplen, - io_desc_t **iodesc) -{ - MPI_Datatype mpi_type; - PIO_Offset type_size; - int mpierr = MPI_SUCCESS; - int ret; - - /* Check input. */ - pioassert(ios && piotype > 0 && ndims >= 0 && iodesc, - "invalid input", __FILE__, __LINE__); - - LOG((1, "malloc_iodesc piotype = %d ndims = %d", piotype, ndims)); - - /* Get the MPI type corresponding with the PIO type. */ - if((ret = find_mpi_type(piotype, &mpi_type, NULL))){ - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Unable to find MPI type corresponding to PIO type (%d)", piotype); - } - - /* What is the size of the pio type? */ - if((ret = spio_pnetcdf_inq_type(0, piotype, NULL, &type_size))){ - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Finding the size of PIO type (%d) failed", piotype); - } - - /* Allocate space for the io_desc_t struct. */ - if(!(*iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))){ - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Out of memory allocating %lld bytes for the I/O descriptor", (unsigned long long) sizeof(io_desc_t)); - } - - /* Remember the pio type and its size. */ - (*iodesc)->piotype = piotype; - (*iodesc)->piotype_size = type_size; - - /* Remember the MPI type. */ - (*iodesc)->mpitype = mpi_type; - - /* Get the size of the type. */ - if((mpierr = MPI_Type_size((*iodesc)->mpitype, &(*iodesc)->mpitype_size))){ - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - /* Initialize some values in the struct. */ - (*iodesc)->maxregions = 1; - (*iodesc)->ioid = -1; - (*iodesc)->maplen = maplen; - (*iodesc)->ndims = ndims; - - /* Allocate space for, and initialize, the first region. */ - if((ret = alloc_region2(ios, ndims, &((*iodesc)->firstregion)))){ - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Allocating memory for 1st region failed. Out of memory allocating memory for I/O region in the I/O descriptor"); - } - - /* Allocate memory for the local decomposition map */ - if(!((*iodesc)->map = (PIO_Offset *) malloc(sizeof(PIO_Offset) * maplen))){ - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating memory (%lld bytes) to store I/O decomposition map", (unsigned long long) (sizeof(PIO_Offset) * maplen)); - } - - /* Allocate memory for storing the dimension lengths of variables that use this decomposition */ - if(!((*iodesc)->dimlen = (int *)malloc(sizeof(int) * ndims))){ - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating memory (%lld bytes) for storing dimension sizes in the I/O decomposition map", (unsigned long long) (sizeof(int) * ndims)); - } - - /* Set the swap memory settings to defaults for this IO system. */ - (*iodesc)->rearr_opts = ios->rearr_opts; - -#if PIO_SAVE_DECOMPS - /* The descriptor is not yet saved to disk */ - (*iodesc)->is_saved = false; -#endif - - return PIO_NOERR; -} - -/** - * Free a region list. - * - * top a pointer to the start of the list to free. - */ -void free_region_list(io_region *top) -{ - io_region *ptr, *tptr; - - ptr = top; - while (ptr) - { - if (ptr->start) - free(ptr->start); - if (ptr->count) - free(ptr->count); - tptr = ptr; - ptr = ptr->next; - free(tptr); - } -} - -/** - * Free a decomposition map. - * - * @param iosysid the IO system ID. - * @param ioid the ID of the decomposition map to free. - * @returns 0 for success, error code otherwise. - */ -int PIOc_freedecomp_impl(int iosysid, int ioid) -{ - iosystem_desc_t *ios; - io_desc_t *iodesc; - int mpierr = MPI_SUCCESS; /* Return code from MPI function calls. */ - int ret = 0; - - if (!(ios = pio_get_iosystem_from_id(iosysid))) - { - return pio_err(NULL, NULL, PIO_EBADID, __FILE__, __LINE__, - "Freeing PIO decomposition failed. Invalid iosystem id (%d) provided", iosysid); - } - assert(ios); - spio_ltimer_start(ios->io_fstats->tot_timer_name); - - if (!(iodesc = pio_get_iodesc_from_id(ioid))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return pio_err(ios, NULL, PIO_EBADID, __FILE__, __LINE__, - "Freeing PIO decomposition failed. Invalid io decomposition id (%d) provided", ioid); - } - - /* If async is in use, and this is not an IO task, bcast the parameters. */ - if (ios->async) - { - int msg = PIO_MSG_FREEDECOMP; /* Message for async notification. */ - - PIO_SEND_ASYNC_MSG(ios, msg, &ret, iosysid, ioid); - if(ret != PIO_NOERR) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Freeing PIO decomposition failed (iosysid = %d, iodesc id=%d). Error sending asynchronous message, PIO_MSG_FREEDECOMP, on iosystem", iosysid, ioid); - } - } - - if(iodesc->nasync_pend_ops > 0){ - /* Let I/O desc be freed during finalize */ - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return PIO_NOERR; - } - - /* Free the map. */ - free(iodesc->map); - - /* Free the dimlens. */ - free(iodesc->dimlen); - - if (iodesc->rfrom) - free(iodesc->rfrom); - - if (iodesc->rtype) - { - for (int i = 0; i < iodesc->nrecvs; i++) - if (iodesc->rtype[i] != MPI_DATATYPE_NULL) - if ((mpierr = MPI_Type_free(&iodesc->rtype[i]))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - free(iodesc->rtype); - } - - if (iodesc->stype) - { - for (int i = 0; i < iodesc->num_stypes; i++) - if (iodesc->stype[i] != MPI_DATATYPE_NULL) - if ((mpierr = MPI_Type_free(iodesc->stype + i))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - iodesc->num_stypes = 0; - free(iodesc->stype); - } - - if (iodesc->scount) - free(iodesc->scount); - - if (iodesc->rcount) - free(iodesc->rcount); - - if (iodesc->sindex) - free(iodesc->sindex); - - if (iodesc->rindex) - free(iodesc->rindex); - - if (iodesc->firstregion) - free_region_list(iodesc->firstregion); - - if (iodesc->fillregion) - free_region_list(iodesc->fillregion); - - if (iodesc->rearranger == PIO_REARR_SUBSET) - if ((mpierr = MPI_Comm_free(&iodesc->subset_comm))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - if(iodesc->rearr){ - iodesc->rearr->finalize(); - delete iodesc->rearr; - } - - ret = pio_delete_iodesc_from_list(ioid); - if (ret != PIO_NOERR) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Freeing PIO decomposition failed (iosysid = %d, ioid=%d). Error while trying to delete I/O descriptor from internal list", iosysid, ioid); - } - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - - return ret; -} - int PIOc_readmap_txt_impl(const char *file, int *ndims, int **gdims, PIO_Offset *fmaplen, PIO_Offset **map, MPI_Comm comm) { diff --git a/src/clib/core/spio_iodesc_utils.cpp b/src/clib/core/spio_iodesc_utils.cpp new file mode 100644 index 0000000000..3cde284950 --- /dev/null +++ b/src/clib/core/spio_iodesc_utils.cpp @@ -0,0 +1,294 @@ +/** @file + * Utils related to I/O decomposition + */ +#include "pio_config.h" +#include "pio.h" +#include "pio_internal.h" +#include "spio_io_summary.h" +#include "pio_rearr_contig.hpp" + +/** + * Allocate a region struct, and initialize it. + * + * @param ios pointer to the IO system info, used for error + * handling. Ignored if NULL. + * @param ndims the number of dimensions for the data in this region. + * @param a pointer that gets a pointer to the newly allocated + * io_region struct. + * @returns 0 for success, error code otherwise. + */ +int alloc_region2(iosystem_desc_t *ios, int ndims, io_region **regionp) +{ + io_region *region; + + /* Check inputs. */ + pioassert(ndims >= 0 && regionp, "invalid input", __FILE__, __LINE__); + LOG((1, "alloc_region2 ndims = %d sizeof(io_region) = %d", ndims, + sizeof(io_region))); + + /* Allocate memory for the io_region struct. */ + if (!(region = (io_region *) calloc(1, sizeof(io_region)))) + { + return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, + "Internal error while allocating region. Out of memory allocating %lld bytes I/O region", (unsigned long long) sizeof(io_region)); + } + + /* Allocate memory for the array of start indicies. */ + if (!(region->start = (PIO_Offset *) calloc(ndims, sizeof(PIO_Offset)))) + { + return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, + "Internal error while allocating region. Out of memory allocating %lld bytes for start array in the I/O region", (unsigned long long) (ndims * sizeof(PIO_Offset))); + } + + /* Allocate memory for the array of counts. */ + if (!(region->count = (PIO_Offset *) calloc(ndims, sizeof(PIO_Offset)))) + { + return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, + "Internal error while allocating region. Out of memory allocating %lld bytes for count array in the I/O region", (unsigned long long) (ndims * sizeof(PIO_Offset))); + } + + /* Return pointer to new region to caller. */ + *regionp = region; + + return PIO_NOERR; +} + +/** + * Allocate space for an IO description struct, and initialize it. + * + * @param ios pointer to the IO system info, used for error + * handling. + * @param piotype the PIO data type (ex. PIO_FLOAT, PIO_INT, etc.). + * @param ndims the number of dimensions. + * @param maplen the length of the local decomposition map + * @param iodesc pointer that gets the newly allocated io_desc_t. + * @returns 0 for success, error code otherwise. + */ +int malloc_iodesc(iosystem_desc_t *ios, int piotype, int ndims, int maplen, + io_desc_t **iodesc) +{ + MPI_Datatype mpi_type; + PIO_Offset type_size; + int mpierr = MPI_SUCCESS; + int ret; + + /* Check input. */ + pioassert(ios && piotype > 0 && ndims >= 0 && iodesc, + "invalid input", __FILE__, __LINE__); + + LOG((1, "malloc_iodesc piotype = %d ndims = %d", piotype, ndims)); + + /* Get the MPI type corresponding with the PIO type. */ + if((ret = find_mpi_type(piotype, &mpi_type, NULL))){ + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Internal error while allocating memory for iodesc. Unable to find MPI type corresponding to PIO type (%d)", piotype); + } + + /* What is the size of the pio type? */ + if((ret = spio_pnetcdf_inq_type(0, piotype, NULL, &type_size))){ + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Internal error while allocating memory for iodesc. Finding the size of PIO type (%d) failed", piotype); + } + + /* Allocate space for the io_desc_t struct. */ + if(!(*iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))){ + return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, + "Internal error while allocating memory for iodesc. Out of memory allocating %lld bytes for the I/O descriptor", (unsigned long long) sizeof(io_desc_t)); + } + + /* Remember the pio type and its size. */ + (*iodesc)->piotype = piotype; + (*iodesc)->piotype_size = type_size; + + /* Remember the MPI type. */ + (*iodesc)->mpitype = mpi_type; + + /* Get the size of the type. */ + if((mpierr = MPI_Type_size((*iodesc)->mpitype, &(*iodesc)->mpitype_size))){ + return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); + } + + /* Initialize some values in the struct. */ + (*iodesc)->maxregions = 1; + (*iodesc)->ioid = -1; + (*iodesc)->maplen = maplen; + (*iodesc)->ndims = ndims; + + /* Allocate space for, and initialize, the first region. */ + if((ret = alloc_region2(ios, ndims, &((*iodesc)->firstregion)))){ + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Internal error while allocating memory for iodesc. Allocating memory for 1st region failed. Out of memory allocating memory for I/O region in the I/O descriptor"); + } + + /* Allocate memory for the local decomposition map */ + if(!((*iodesc)->map = (PIO_Offset *) malloc(sizeof(PIO_Offset) * maplen))){ + return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, + "Internal error while allocating memory (%lld bytes) to store I/O decomposition map", (unsigned long long) (sizeof(PIO_Offset) * maplen)); + } + + /* Allocate memory for storing the dimension lengths of variables that use this decomposition */ + if(!((*iodesc)->dimlen = (int *)malloc(sizeof(int) * ndims))){ + return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, + "Internal error while allocating memory (%lld bytes) for storing dimension sizes in the I/O decomposition map", (unsigned long long) (sizeof(int) * ndims)); + } + + /* Set the swap memory settings to defaults for this IO system. */ + (*iodesc)->rearr_opts = ios->rearr_opts; + +#if PIO_SAVE_DECOMPS + /* The descriptor is not yet saved to disk */ + (*iodesc)->is_saved = false; +#endif + + return PIO_NOERR; +} + +/** + * Free a region list. + * + * top a pointer to the start of the list to free. + */ +void free_region_list(io_region *top) +{ + io_region *ptr, *tptr; + + ptr = top; + while (ptr) + { + if (ptr->start) + free(ptr->start); + if (ptr->count) + free(ptr->count); + tptr = ptr; + ptr = ptr->next; + free(tptr); + } +} + +/** + * Free a decomposition map. + * + * @param iosysid the IO system ID. + * @param ioid the ID of the decomposition map to free. + * @returns 0 for success, error code otherwise. + */ +int PIOc_freedecomp_impl(int iosysid, int ioid) +{ + iosystem_desc_t *ios; + io_desc_t *iodesc; + int mpierr = MPI_SUCCESS; /* Return code from MPI function calls. */ + int ret = 0; + + if (!(ios = pio_get_iosystem_from_id(iosysid))) + { + return pio_err(NULL, NULL, PIO_EBADID, __FILE__, __LINE__, + "Freeing PIO decomposition failed. Invalid iosystem id (%d) provided", iosysid); + } + assert(ios); + spio_ltimer_start(ios->io_fstats->tot_timer_name); + + if (!(iodesc = pio_get_iodesc_from_id(ioid))) + { + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return pio_err(ios, NULL, PIO_EBADID, __FILE__, __LINE__, + "Freeing PIO decomposition failed. Invalid io decomposition id (%d) provided", ioid); + } + + /* If async is in use, and this is not an IO task, bcast the parameters. */ + if (ios->async) + { + int msg = PIO_MSG_FREEDECOMP; /* Message for async notification. */ + + PIO_SEND_ASYNC_MSG(ios, msg, &ret, iosysid, ioid); + if(ret != PIO_NOERR) + { + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Freeing PIO decomposition failed (iosysid = %d, iodesc id=%d). Error sending asynchronous message, PIO_MSG_FREEDECOMP, on iosystem", iosysid, ioid); + } + } + + if(iodesc->nasync_pend_ops > 0){ + /* Let I/O desc be freed during finalize */ + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return PIO_NOERR; + } + + /* Free the map. */ + free(iodesc->map); + + /* Free the dimlens. */ + free(iodesc->dimlen); + + if (iodesc->rfrom) + free(iodesc->rfrom); + + if (iodesc->rtype) + { + for (int i = 0; i < iodesc->nrecvs; i++) + if (iodesc->rtype[i] != MPI_DATATYPE_NULL) + if ((mpierr = MPI_Type_free(&iodesc->rtype[i]))) + { + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); + } + + free(iodesc->rtype); + } + + if (iodesc->stype) + { + for (int i = 0; i < iodesc->num_stypes; i++) + if (iodesc->stype[i] != MPI_DATATYPE_NULL) + if ((mpierr = MPI_Type_free(iodesc->stype + i))) + { + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); + } + + iodesc->num_stypes = 0; + free(iodesc->stype); + } + + if (iodesc->scount) + free(iodesc->scount); + + if (iodesc->rcount) + free(iodesc->rcount); + + if (iodesc->sindex) + free(iodesc->sindex); + + if (iodesc->rindex) + free(iodesc->rindex); + + if (iodesc->firstregion) + free_region_list(iodesc->firstregion); + + if (iodesc->fillregion) + free_region_list(iodesc->fillregion); + + if (iodesc->rearranger == PIO_REARR_SUBSET) + if ((mpierr = MPI_Comm_free(&iodesc->subset_comm))) + { + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); + } + + if(iodesc->rearr){ + iodesc->rearr->finalize(); + delete iodesc->rearr; + } + + ret = pio_delete_iodesc_from_list(ioid); + if (ret != PIO_NOERR) + { + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Freeing PIO decomposition failed (iosysid = %d, ioid=%d). Error while trying to delete I/O descriptor from internal list", iosysid, ioid); + } + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + + return ret; +} + From a27f3a744d41a6a55f903c8e2491e7f4661b8d6a Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Tue, 26 May 2026 22:56:02 -0500 Subject: [PATCH 4/7] Convert io_desc_t to a simple C++ class Converting io_desc_t from a C struct to a simple C++ struct with constructor/destructor/util functions. The io_desc_t is still not a "full fledged" C++ class (this will be done in a future PR). However this minimum change is required to use shared pointers with I/O decomps. * Adding a constructor that manages constructing the io_desc_t object (instead of custom functions that alloc memory and initialize fields) * Also adding a destructor to free decomp and some util functions to free up resources. * All members of the io_desc_t struct is still public since a lot of the code base access the members directly * The member arrays/pointers are still allocated memory explicitly using malloc/calloc (these will be changed in future PRs) * A shared pointer to the io_desc_t, stored in the global io_desc list, now manages the lifetime of I/O decomps * Modified tests accordingly * Modified asserts to account for zero sized local decomp maps --- src/clib/core/pioc.cpp | 70 +---- src/clib/core/rearr/pio_rearrange.cpp | 10 +- src/clib/core/spio_iodesc_utils.cpp | 383 ++++++++++++------------ src/clib/core/util/pio_lists.cpp | 17 +- src/clib/pio_internal.h | 5 +- src/clib/pio_types.hpp | 21 +- tests/cunit/test_pioc.cpp | 16 +- tests/cunit/test_rearr.cpp | 173 ++++++----- tests/cunit/test_spio_decomp_logger.cpp | 22 +- 9 files changed, 337 insertions(+), 380 deletions(-) diff --git a/src/clib/core/pioc.cpp b/src/clib/core/pioc.cpp index 195567931a..81457b9371 100644 --- a/src/clib/core/pioc.cpp +++ b/src/clib/core/pioc.cpp @@ -535,7 +535,7 @@ static int subset_rearranger_init(iosystem_desc_t *ios, io_desc_t *iodesc, const int ret = PIO_NOERR; assert(ios && iodesc); - assert(iodesc->map && iodesc->dimlen); + assert(((iodesc->maplen == 0) || iodesc->map) && iodesc->dimlen); iodesc->num_aiotasks = ios->num_iotasks; LOG((2, "creating subset rearranger iodesc->num_aiotasks = %d", iodesc->num_aiotasks)); @@ -552,7 +552,7 @@ static int box_rearranger_init(iosystem_desc_t *ios, io_desc_t *iodesc, const PI { int ret = PIO_NOERR; assert(ios && iodesc); - assert(iodesc->map && iodesc->dimlen && iodesc->firstregion); + assert(((iodesc->maplen == 0) || iodesc->map) && ((iodesc->ndims == 0) || iodesc->dimlen) && iodesc->firstregion); if(ios->ioproc){ /* Unless the user specifies the start and count for each @@ -652,7 +652,7 @@ static int contig_rearranger_init(iosystem_desc_t *ios, io_desc_t *iodesc, const int ret = PIO_NOERR; assert(ios && iodesc); - assert(iodesc->map && iodesc->dimlen); + assert(((iodesc->maplen == 0) || iodesc->map) && ((iodesc->ndims == 0) || iodesc->dimlen)); if(iostart && iocount){ /* FIXME: We should at least convert iostart/iocount arrays to decomp maps */ @@ -746,7 +746,6 @@ static int initdecomp(int iosysid, int pio_type, int ndims, const int *gdimlen, const PIO_Offset *iostart, const PIO_Offset *iocount, bool map_zero_based=false) { iosystem_desc_t *ios; /* Pointer to io system information. */ - io_desc_t *iodesc; /* The IO description. */ int mpierr = MPI_SUCCESS; /* Return code from MPI function calls. */ int ierr; /* Return code. */ @@ -810,22 +809,7 @@ static int initdecomp(int iosysid, int pio_type, int ndims, const int *gdimlen, } } - /* Allocate space for the iodesc info. This also allocates the - * first region and copies the rearranger opts into this - * iodesc. */ - if((ierr = malloc_iodesc(ios, pio_type, ndims, maplen, &iodesc))){ - return pio_err(ios, NULL, ierr, __FILE__, __LINE__, - "Initializing the PIO decomposition failed. Out of memory allocating memory for I/O descriptor (ndims = %d, maplen = %d)", ndims, maplen); - } - - /* Set the rearranger. */ - if(!rearranger){ - iodesc->rearranger = ios->default_rearranger; - } - else{ - iodesc->rearranger = *rearranger; - } - LOG((2, "iodesc->rearranger = %d", iodesc->rearranger)); + int rearr = (rearranger) ? *rearranger : ios->default_rearranger; /* In scenarios involving ultra-high resolution E3SM/SCREAM cases, * the default BOX rearranger may encounter significant performance @@ -844,45 +828,20 @@ static int initdecomp(int iosysid, int pio_type, int ndims, const int *gdimlen, * potentially negating the benefits of reduced initialization time * associated with this approach. */ - if(iodesc->rearranger == PIO_REARR_ANY){ - iodesc->rearranger = spio_get_opt_pio_rearr(ios, maplen); - } - - /* Cache the local decomposition map */ - if(map_zero_based){ - /* BOX and SUBSET rearrangers expect map to the 1-based */ - if((iodesc->rearranger == PIO_REARR_BOX) || (iodesc->rearranger == PIO_REARR_SUBSET)){ - std::transform(compmap, compmap + maplen, iodesc->map, - [](PIO_Offset off) { return off + 1; }); - } - else{ - std::copy(compmap, compmap + maplen, iodesc->map); - } - } - else{ - /* The decomposition map is 1-based */ - if(iodesc->rearranger == PIO_REARR_CONTIG){ - /* CONTIG rearranger expects map to be 0-based */ - std::transform(compmap, compmap + maplen, iodesc->map, - [](PIO_Offset off) { return off - 1; }); - } - else{ - std::copy(compmap, compmap + maplen, iodesc->map); - } - } + if(rearr == PIO_REARR_ANY) { rearr = spio_get_opt_pio_rearr(ios, maplen); } - /* Cache the dimension lengths */ - std::copy(gdimlen, gdimlen + ndims, iodesc->dimlen); + std::shared_ptr iodesc = std::make_shared(ios, pio_type, + ndims, gdimlen, maplen, compmap, rearr, map_zero_based); /* Initialize the rearranger */ if(iodesc->rearranger == PIO_REARR_SUBSET){ - ierr = subset_rearranger_init(ios, iodesc, iostart, iocount); + ierr = subset_rearranger_init(ios, iodesc.get(), iostart, iocount); } else if(iodesc->rearranger == PIO_REARR_BOX){ - ierr = box_rearranger_init(ios, iodesc, iostart, iocount); + ierr = box_rearranger_init(ios, iodesc.get(), iostart, iocount); } else if(iodesc->rearranger == PIO_REARR_CONTIG){ - ierr = contig_rearranger_init(ios, iodesc, iostart, iocount); + ierr = contig_rearranger_init(ios, iodesc.get(), iostart, iocount); } else{ return pio_err(ios, NULL, ierr, __FILE__, __LINE__, @@ -905,20 +864,21 @@ static int initdecomp(int iosysid, int pio_type, int ndims, const int *gdimlen, */ comm = ios->union_comm; } + *ioidp = pio_add_to_iodesc_list(iodesc, comm); #if PIO_SAVE_DECOMPS if(pio_save_decomps_regex_match(*ioidp, NULL, NULL)){ std::string filename; - pio_create_uniq_str(ios, iodesc, filename, "piodecomp", ".dat"); + pio_create_uniq_str(ios, iodesc.get(), filename, "piodecomp", ".dat"); LOG((2, "Saving decomp map to %s", filename.c_str())); PIOc_writemap_impl(filename.c_str(), *ioidp, ndims, gdimlen, maplen, (PIO_Offset *)compmap, ios->my_comm); std::string log_fname; - pio_create_uniq_str(ios, iodesc, log_fname, "piodecomp", ".nc"); + pio_create_uniq_str(ios, iodesc.get(), log_fname, "piodecomp", ".nc"); SPIO_Util::Decomp_Util::Decomp_logger *logger = SPIO_Util::Decomp_Util::create_decomp_logger(ios->comp_comm, log_fname); - (*logger).write_only().open().put(iodesc).close(); + (*logger).write_only().open().put(iodesc.get()).close(); delete logger; iodesc->is_saved = true; @@ -928,7 +888,7 @@ static int initdecomp(int iosysid, int pio_type, int ndims, const int *gdimlen, #endif #if PIO_ENABLE_LOGGING - dbg_log_iodesc(ios, iodesc); + dbg_log_iodesc(ios, iodesc.get()); #endif /* PIO_ENABLE_LOGGING */ return PIO_NOERR; diff --git a/src/clib/core/rearr/pio_rearrange.cpp b/src/clib/core/rearr/pio_rearrange.cpp index 514594d221..4c11e751bf 100644 --- a/src/clib/core/rearr/pio_rearrange.cpp +++ b/src/clib/core/rearr/pio_rearrange.cpp @@ -1218,7 +1218,7 @@ int determine_fill(iosystem_desc_t *ios, io_desc_t *iodesc, const int *gdimlen, int mpierr; /* Return code from MPI calls. */ /* Check inputs. */ - pioassert(ios && iodesc && gdimlen && compmap, "invalid input", + pioassert(ios && iodesc && gdimlen, "invalid input", __FILE__, __LINE__); /* Determine size of data space. */ @@ -1228,10 +1228,12 @@ int determine_fill(iosystem_desc_t *ios, io_desc_t *iodesc, const int *gdimlen, /* Determine how many values we have locally. */ if (iodesc->rearranger == PIO_REARR_SUBSET) totalllen = iodesc->llen; - else + else{ + pioassert((iodesc->ndof == 0) || compmap, "invalid input", __FILE__, __LINE__); for (int i = 0; i < iodesc->ndof; i++) if (compmap[i] > 0) totalllen++; + } /* Add results accross communicator. */ LOG((2, "determine_fill before allreduce totalllen = %d totalgridsize = %d", @@ -1298,7 +1300,7 @@ int box_rearrange_create(iosystem_desc_t *ios, int maplen, const PIO_Offset *com GPTLstart("PIO:box_rearrange_create"); /* Check inputs. */ - pioassert(ios && maplen >= 0 && compmap && gdimlen && ndims > 0 && iodesc, + pioassert(ios && maplen >= 0 && ((maplen == 0) || compmap) && ((ndims == 0) || gdimlen) && ndims >= 0 && iodesc, "invalid input", __FILE__, __LINE__); LOG((1, "box_rearrange_create maplen = %d ndims = %d ios->num_comptasks = %d " "ios->num_iotasks = %d", maplen, ndims, ios->num_comptasks, ios->num_iotasks)); @@ -2201,7 +2203,7 @@ int subset_rearrange_create(iosystem_desc_t *ios, int maplen, PIO_Offset *compma GPTLstart("PIO:subset_rearrange_create"); /* Check inputs. */ - pioassert(ios && maplen >= 0 && compmap && gdimlen && ndims >= 0 && iodesc, + pioassert(ios && maplen >= 0 && ((maplen == 0) || compmap) && gdimlen && ndims >= 0 && iodesc, "invalid input", __FILE__, __LINE__); LOG((2, "subset_rearrange_create maplen = %d ndims = %d", maplen, ndims)); diff --git a/src/clib/core/spio_iodesc_utils.cpp b/src/clib/core/spio_iodesc_utils.cpp index 3cde284950..57c8038007 100644 --- a/src/clib/core/spio_iodesc_utils.cpp +++ b/src/clib/core/spio_iodesc_utils.cpp @@ -7,6 +7,8 @@ #include "spio_io_summary.h" #include "pio_rearr_contig.hpp" +#include + /** * Allocate a region struct, and initialize it. * @@ -53,116 +55,170 @@ int alloc_region2(iosystem_desc_t *ios, int ndims, io_region **regionp) return PIO_NOERR; } -/** - * Allocate space for an IO description struct, and initialize it. - * - * @param ios pointer to the IO system info, used for error - * handling. - * @param piotype the PIO data type (ex. PIO_FLOAT, PIO_INT, etc.). - * @param ndims the number of dimensions. - * @param maplen the length of the local decomposition map - * @param iodesc pointer that gets the newly allocated io_desc_t. - * @returns 0 for success, error code otherwise. - */ -int malloc_iodesc(iosystem_desc_t *ios, int piotype, int ndims, int maplen, - io_desc_t **iodesc) +io_desc_t::io_desc_t(iosystem_desc_t *ios, int piotype, + int ndims, const int *gdimlen, + int maplen, const PIO_Offset *compmap, + int rearranger, bool map_zero_based): ioid(SPIO_INVALID_ID), + maplen(0), map(NULL), nrecvs(0), ndof(0), ndims(0), dimlen(NULL), + num_aiotasks(0), rearranger(rearranger), maxregions(0), needsfill(false), + maxbytes(0), piotype(piotype), piotype_size(0), mpitype(MPI_DATATYPE_NULL), + mpitype_size(0), llen(0), maxiobuflen(0), rfrom(NULL), rcount(NULL), + scount(NULL), sindex(NULL), rindex(NULL), rtype(NULL), stype(NULL), + num_stypes(0), holegridsize(0), maxholegridsize(0), maxfillregions(0), + firstregion(NULL), fillregion(NULL), + rearr_opts({PIO_REARR_COMM_P2P, PIO_REARR_COMM_FC_2D_ENABLE, + {true, true, PIO_REARR_COMM_UNLIMITED_PEND_REQ}, + {true, true, PIO_REARR_COMM_UNLIMITED_PEND_REQ}}), + subset_comm(MPI_COMM_NULL), is_saved(false), rearr(NULL), + nasync_pend_ops(0) { - MPI_Datatype mpi_type; - PIO_Offset type_size; - int mpierr = MPI_SUCCESS; - int ret; - - /* Check input. */ - pioassert(ios && piotype > 0 && ndims >= 0 && iodesc, - "invalid input", __FILE__, __LINE__); - - LOG((1, "malloc_iodesc piotype = %d ndims = %d", piotype, ndims)); + int ret; + + assert(ios && (piotype > 0) && (ndims >= 0)); + + ret = find_mpi_type(piotype, &mpitype, NULL); + if(ret == PIO_NOERR) { ret = MPI_Type_size(mpitype, &mpitype_size); } + if(ret != PIO_NOERR){ + throw std::runtime_error(std::string("Internal error while allocating memory for iodesc. Unable to find MPI type (or type size) corresponding to PIO type (") + std::to_string(piotype) + ")"); + } + + /* FIXME: Add version of func to inq type size as ints */ + PIO_Offset tmp_sz = 0; + ret = spio_pnetcdf_inq_type(0, piotype, NULL, &tmp_sz); + if(ret != PIO_NOERR){ + throw std::runtime_error(std::string("Internal error while allocating memory for iodesc. Unable to find the size of PIO type (") + std::to_string(piotype) + ")"); + } + piotype_size = static_cast(tmp_sz); + + /* FIXME: Move to C++ lists of classes instead of these custom lists */ + ret = alloc_region2(ios, ndims, &firstregion); + if(ret != PIO_NOERR){ + throw std::runtime_error("Internal error while allocating memory for iodesc. Allocating memory for 1st region failed. Out of memory allocating memory for I/O region in the I/O descriptor"); + } + maxregions = 1; + + /* Cache the local decomposition map (and transform it as needed) */ + init_map(maplen, compmap, rearranger, map_zero_based); + + /* FIXME: Move to a vector, instead of malloc()ed array */ + /* Cache global dimension lengths */ + dimlen = static_cast(malloc(sizeof(int) * ndims)); + if(dimlen){ + this->ndims = ndims; + std::copy(gdimlen, gdimlen + ndims, dimlen); + } + else{ + throw std::runtime_error(std::string("Internal error while allocating memory (") + std::to_string(sizeof(int) * ndims) + "bytes) for storing dimension sizes in the I/O decomposition map"); + } + + /* Set the swap memory settings to defaults for this IO system. */ + rearr_opts = ios->rearr_opts; +} - /* Get the MPI type corresponding with the PIO type. */ - if((ret = find_mpi_type(piotype, &mpi_type, NULL))){ - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Unable to find MPI type corresponding to PIO type (%d)", piotype); +void io_desc_t::init_map(int maplen, const PIO_Offset *compmap, int rearranger, bool map_zero_based) +{ + if(maplen == 0) { return; } + + /* FIXME: Move to a vector, instead of malloc()ed array */ + map = static_cast (malloc(sizeof(PIO_Offset) * maplen)); + if(!map){ + throw std::runtime_error(std::string("Internal error while allocating memory (") + std::to_string(sizeof(PIO_Offset) * maplen) + ") bytes to store I/O decomposition map"); + } + this->maplen = maplen; + + /* Cache the local decomposition map */ + if(map_zero_based){ + /* BOX and SUBSET rearrangers expect map to the 1-based */ + if((rearranger == PIO_REARR_BOX) || (rearranger == PIO_REARR_SUBSET)){ + std::transform(compmap, compmap + maplen, map, + [](PIO_Offset off) { return off + 1; }); } - - /* What is the size of the pio type? */ - if((ret = spio_pnetcdf_inq_type(0, piotype, NULL, &type_size))){ - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Finding the size of PIO type (%d) failed", piotype); + else{ + std::copy(compmap, compmap + maplen, map); } - - /* Allocate space for the io_desc_t struct. */ - if(!(*iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))){ - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Out of memory allocating %lld bytes for the I/O descriptor", (unsigned long long) sizeof(io_desc_t)); + } + else{ + /* The decomposition map is 1-based */ + if(rearranger == PIO_REARR_CONTIG){ + /* CONTIG rearranger expects map to be 0-based */ + std::transform(compmap, compmap + maplen, map, + [](PIO_Offset off) { return off - 1; }); } - - /* Remember the pio type and its size. */ - (*iodesc)->piotype = piotype; - (*iodesc)->piotype_size = type_size; - - /* Remember the MPI type. */ - (*iodesc)->mpitype = mpi_type; - - /* Get the size of the type. */ - if((mpierr = MPI_Type_size((*iodesc)->mpitype, &(*iodesc)->mpitype_size))){ - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); + else{ + std::copy(compmap, compmap + maplen, map); } + } +} - /* Initialize some values in the struct. */ - (*iodesc)->maxregions = 1; - (*iodesc)->ioid = -1; - (*iodesc)->maplen = maplen; - (*iodesc)->ndims = ndims; +io_desc_t::~io_desc_t() +{ + if(map) { free(map); } + if(dimlen) { free(dimlen); } + if(rfrom) { free(rfrom); } - /* Allocate space for, and initialize, the first region. */ - if((ret = alloc_region2(ios, ndims, &((*iodesc)->firstregion)))){ - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Internal error while allocating memory for iodesc. Allocating memory for 1st region failed. Out of memory allocating memory for I/O region in the I/O descriptor"); - } + free_all_mpi_types(); - /* Allocate memory for the local decomposition map */ - if(!((*iodesc)->map = (PIO_Offset *) malloc(sizeof(PIO_Offset) * maplen))){ - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating memory (%lld bytes) to store I/O decomposition map", (unsigned long long) (sizeof(PIO_Offset) * maplen)); - } + if(scount) { free(scount); } + if(rcount) { free(rcount); } + if(sindex) { free(sindex); } + if(rindex) { free(rindex); } - /* Allocate memory for storing the dimension lengths of variables that use this decomposition */ - if(!((*iodesc)->dimlen = (int *)malloc(sizeof(int) * ndims))){ - return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, - "Internal error while allocating memory (%lld bytes) for storing dimension sizes in the I/O decomposition map", (unsigned long long) (sizeof(int) * ndims)); - } + free_all_regions(); - /* Set the swap memory settings to defaults for this IO system. */ - (*iodesc)->rearr_opts = ios->rearr_opts; + if(rearranger == PIO_REARR_SUBSET) { MPI_Comm_free(&subset_comm); } -#if PIO_SAVE_DECOMPS - /* The descriptor is not yet saved to disk */ - (*iodesc)->is_saved = false; -#endif + if(rearr){ + rearr->finalize(); + delete rearr; + } +} - return PIO_NOERR; +/* FIXME: Move to C++ lists instead of custom lists */ +/* Free an I/O region list */ +void io_desc_t::free_region_list(io_region *phead) +{ + io_region *p = phead, *prev = NULL; + + while(p){ + if(p->start) { free(p->start); } + if(p->count) { free(p->count); } + prev = p; + p = p->next; + free(prev); + } } -/** - * Free a region list. - * - * top a pointer to the start of the list to free. - */ -void free_region_list(io_region *top) +/* Free all I/O regions */ +void io_desc_t::free_all_regions(void ) +{ + free_region_list(firstregion); firstregion = NULL; + free_region_list(fillregion); fillregion = NULL; +} + +void io_desc_t::free_mpi_types(MPI_Datatype *ptypes, int ntypes) { - io_region *ptr, *tptr; + assert(ntypes >= 0); - ptr = top; - while (ptr) - { - if (ptr->start) - free(ptr->start); - if (ptr->count) - free(ptr->count); - tptr = ptr; - ptr = ptr->next; - free(tptr); + if(!ptypes) { return; } + + for(int i = 0; i < ntypes; i++){ + if(ptypes[i] != MPI_DATATYPE_NULL){ + MPI_Type_free(&ptypes[i]); } + } +} + +void io_desc_t::free_all_mpi_types(void ) +{ + if(rtype){ + free_mpi_types(rtype, nrecvs); + free(rtype); rtype = NULL; + } + + if(stype){ + free_mpi_types(stype, num_stypes); + free(stype); stype = NULL; + } } /** @@ -174,121 +230,50 @@ void free_region_list(io_region *top) */ int PIOc_freedecomp_impl(int iosysid, int ioid) { - iosystem_desc_t *ios; - io_desc_t *iodesc; - int mpierr = MPI_SUCCESS; /* Return code from MPI function calls. */ - int ret = 0; + iosystem_desc_t *ios; + io_desc_t *iodesc; + int mpierr = MPI_SUCCESS; /* Return code from MPI function calls. */ + int ret = 0; - if (!(ios = pio_get_iosystem_from_id(iosysid))) - { - return pio_err(NULL, NULL, PIO_EBADID, __FILE__, __LINE__, - "Freeing PIO decomposition failed. Invalid iosystem id (%d) provided", iosysid); - } - assert(ios); - spio_ltimer_start(ios->io_fstats->tot_timer_name); + if(!(ios = pio_get_iosystem_from_id(iosysid))){ + return pio_err(NULL, NULL, PIO_EBADID, __FILE__, __LINE__, + "Freeing PIO decomposition failed. Invalid iosystem id (%d) provided", iosysid); + } - if (!(iodesc = pio_get_iodesc_from_id(ioid))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return pio_err(ios, NULL, PIO_EBADID, __FILE__, __LINE__, - "Freeing PIO decomposition failed. Invalid io decomposition id (%d) provided", ioid); - } + assert(ios); + spio_ltimer_start(ios->io_fstats->tot_timer_name); - /* If async is in use, and this is not an IO task, bcast the parameters. */ - if (ios->async) - { - int msg = PIO_MSG_FREEDECOMP; /* Message for async notification. */ - - PIO_SEND_ASYNC_MSG(ios, msg, &ret, iosysid, ioid); - if(ret != PIO_NOERR) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Freeing PIO decomposition failed (iosysid = %d, iodesc id=%d). Error sending asynchronous message, PIO_MSG_FREEDECOMP, on iosystem", iosysid, ioid); - } - } - - if(iodesc->nasync_pend_ops > 0){ - /* Let I/O desc be freed during finalize */ - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return PIO_NOERR; - } - - /* Free the map. */ - free(iodesc->map); - - /* Free the dimlens. */ - free(iodesc->dimlen); - - if (iodesc->rfrom) - free(iodesc->rfrom); + if(!(iodesc = pio_get_iodesc_from_id(ioid))){ + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return pio_err(ios, NULL, PIO_EBADID, __FILE__, __LINE__, + "Freeing PIO decomposition failed. Invalid io decomposition id (%d) provided", ioid); + } - if (iodesc->rtype) - { - for (int i = 0; i < iodesc->nrecvs; i++) - if (iodesc->rtype[i] != MPI_DATATYPE_NULL) - if ((mpierr = MPI_Type_free(&iodesc->rtype[i]))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - free(iodesc->rtype); - } + /* If async is in use, and this is not an IO task, bcast the parameters. */ + if(ios->async){ + int msg = PIO_MSG_FREEDECOMP; /* Message for async notification. */ - if (iodesc->stype) - { - for (int i = 0; i < iodesc->num_stypes; i++) - if (iodesc->stype[i] != MPI_DATATYPE_NULL) - if ((mpierr = MPI_Type_free(iodesc->stype + i))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - iodesc->num_stypes = 0; - free(iodesc->stype); + PIO_SEND_ASYNC_MSG(ios, msg, &ret, iosysid, ioid); + if(ret != PIO_NOERR){ + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Freeing PIO decomposition failed (iosysid = %d, iodesc id=%d). Error sending asynchronous message, PIO_MSG_FREEDECOMP, on iosystem", iosysid, ioid); } + } - if (iodesc->scount) - free(iodesc->scount); - - if (iodesc->rcount) - free(iodesc->rcount); - - if (iodesc->sindex) - free(iodesc->sindex); - - if (iodesc->rindex) - free(iodesc->rindex); - - if (iodesc->firstregion) - free_region_list(iodesc->firstregion); - - if (iodesc->fillregion) - free_region_list(iodesc->fillregion); - - if (iodesc->rearranger == PIO_REARR_SUBSET) - if ((mpierr = MPI_Comm_free(&iodesc->subset_comm))) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return check_mpi(ios, NULL, mpierr, __FILE__, __LINE__); - } - - if(iodesc->rearr){ - iodesc->rearr->finalize(); - delete iodesc->rearr; - } + if(iodesc->nasync_pend_ops > 0){ + /* Let I/O desc be freed during finalize */ + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return PIO_NOERR; + } - ret = pio_delete_iodesc_from_list(ioid); - if (ret != PIO_NOERR) - { - spio_ltimer_stop(ios->io_fstats->tot_timer_name); - return pio_err(ios, NULL, ret, __FILE__, __LINE__, - "Freeing PIO decomposition failed (iosysid = %d, ioid=%d). Error while trying to delete I/O descriptor from internal list", iosysid, ioid); - } + ret = pio_delete_iodesc_from_list(ioid); + if(ret != PIO_NOERR){ spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return pio_err(ios, NULL, ret, __FILE__, __LINE__, + "Freeing PIO decomposition failed (iosysid = %d, ioid=%d). Error while trying to delete I/O descriptor from internal list", iosysid, ioid); + } - return ret; + spio_ltimer_stop(ios->io_fstats->tot_timer_name); + return ret; } - diff --git a/src/clib/core/util/pio_lists.cpp b/src/clib/core/util/pio_lists.cpp index 0e0507002d..d40d2852c6 100644 --- a/src/clib/core/util/pio_lists.cpp +++ b/src/clib/core/util/pio_lists.cpp @@ -19,11 +19,12 @@ #include #include #include +#include namespace SPIO_Util{ namespace SPIO_Lists{ namespace GVars{ - std::map pio_iodesc_list; + std::map > pio_iodesc_list; std::map pio_iosystem_list; /* This list is independent of the I/O system because users only provide the * file id during a call - hence instead of deducing the I/O system that the file @@ -428,7 +429,7 @@ int pio_num_iosystem(int *niosys) * need to be unique * @returns the ioid of the newly added iodesc. */ -int pio_add_to_iodesc_list(io_desc_t *iodesc, MPI_Comm comm) +int pio_add_to_iodesc_list(std::shared_ptr iodesc, MPI_Comm comm) { static int pio_iodesc_next_id = PIO_IODESC_START_ID; @@ -460,7 +461,7 @@ io_desc_t *pio_get_iodesc_from_id(int ioid) io_desc_t *iodesc = NULL; try{ - iodesc = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.at(ioid); + iodesc = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.at(ioid).get(); } catch(const std::out_of_range &e){ LOG((1, "Finding I/O descriptor corresponding to ioid = %d failed. Invalid I/O descriptor id provided", ioid)); } @@ -477,12 +478,12 @@ io_desc_t *pio_get_iodesc_from_id(int ioid) int pio_delete_iodesc_from_list(int ioid) { LOG((2, "pio_delete_iodesc_from_list(ioid=%d)", ioid)); - std::map::iterator iter = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.find(ioid); + std::map >::iterator iter = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.find(ioid); if(iter != SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.end()){ - io_desc_t *iodesc = (*iter).second; + std::shared_ptr iodesc = (*iter).second; assert(iodesc); if(iodesc->nasync_pend_ops == 0){ - free(iodesc); + //delete(iodesc); SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.erase(iter); } } @@ -507,9 +508,9 @@ int pio_delete_all_iodescs(int iosysid) { int ret = PIO_NOERR; /* Delete the head of the list, one at a time - to delete all I/O descs */ - std::map::iterator iter = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.begin(); + std::map >::iterator iter = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.begin(); while(iter != SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.end()){ - io_desc_t *iodesc = (*iter).second; + io_desc_t *iodesc = (*iter).second.get(); ret = spio_wait_async_iodesc_ops(iodesc); if(ret != PIO_NOERR){ return pio_err(NULL, NULL, ret, __FILE__, __LINE__, diff --git a/src/clib/pio_internal.h b/src/clib/pio_internal.h index 8d0f46b24d..9a8026f17b 100644 --- a/src/clib/pio_internal.h +++ b/src/clib/pio_internal.h @@ -31,6 +31,7 @@ #include "util/spio_ltimer.h" #include "pio_types.hpp" #include +#include #if defined(__cplusplus) extern "C" { @@ -113,7 +114,7 @@ extern "C" { int recv_async_msg(iosystem_desc_t *ios, int msg, ...); void pio_get_env(void); - int pio_add_to_iodesc_list(io_desc_t *iodesc, MPI_Comm comm); + int pio_add_to_iodesc_list(std::shared_ptr iodesc, MPI_Comm comm); io_desc_t *pio_get_iodesc_from_id(int ioid); int pio_delete_iodesc_from_list(int ioid); int pio_delete_all_iodescs(int iosysid); @@ -259,8 +260,6 @@ extern "C" { int rearrange_comp2io(iosystem_desc_t *ios, io_desc_t *iodesc, file_desc_t *file, const void *sbuf, void *rbuf, int nvars); - /* Allocate and initialize storage for decomposition information. */ - int malloc_iodesc(iosystem_desc_t *ios, int piotype, int ndims, int maplen, io_desc_t **iodesc); void performance_tune_rearranger(iosystem_desc_t *ios, io_desc_t *iodesc); /* Flush contents of multi-buffer to disk. */ diff --git a/src/clib/pio_types.hpp b/src/clib/pio_types.hpp index a9dc77dd0f..37857e1d57 100644 --- a/src/clib/pio_types.hpp +++ b/src/clib/pio_types.hpp @@ -68,6 +68,8 @@ namespace SPIO_Util{ class TComm_info; } +const int SPIO_INVALID_ID = -1; + /** The viobuf_cache is used to cache the rearranged data for the * variable. The iobuf inside the cache is freed when the data * is written out. @@ -211,13 +213,17 @@ namespace SPIO{ } } +struct iosystem_desc_t; +/* FIXME: Move io_desc_t to a proper class with public/private + * data members (instead of a struct with all public members) + */ /** * IO descriptor structure. * * This structure defines the mapping for a given variable between * compute and IO decomposition. */ -typedef struct io_desc_t +struct io_desc_t { /** The ID of this io_desc_t. */ int ioid; @@ -333,19 +339,26 @@ typedef struct io_desc_t * group. */ MPI_Comm subset_comm; -#if PIO_SAVE_DECOMPS /* Indicates whether this iodesc has been saved to disk (the * decomposition is dumped to disk) */ bool is_saved; -#endif /* FIXME: Once we have classes for subset/box this ptr should be to a base rearr class */ SPIO::DataRearr::Contig_rearr *rearr; /* Number of pending async ops using this I/O desc */ std::atomic_int nasync_pend_ops; -} io_desc_t; + + io_desc_t(iosystem_desc_t *ios, int piotype, int ndims, const int *gdimlen, + int maplen, const PIO_Offset *compmap, int rearranger, bool map_zero_based); + void init_map(int maplen, const PIO_Offset *compmap, int rearranger, bool map_zero_based); + void free_region_list(io_region *phead); + void free_all_regions(void ); + void free_mpi_types(MPI_Datatype *ptypes, int ntypes); + void free_all_mpi_types(void ); + ~io_desc_t(); +}; /* Forward decl for I/O file summary stats info */ struct spio_io_fstats_summary; diff --git a/tests/cunit/test_pioc.cpp b/tests/cunit/test_pioc.cpp index 09bb5d70e3..be6cfb0408 100644 --- a/tests/cunit/test_pioc.cpp +++ b/tests/cunit/test_pioc.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include /* The number of tasks this test should run on. */ #define TARGET_NTASKS 4 @@ -1629,7 +1631,6 @@ int test_malloc_iodesc2(int iosysid, int my_rank) MPI_UNSIGNED_LONG_LONG, MPI_CHAR}; int ioid; iosystem_desc_t *ios; - io_desc_t *iodesc; int ret; if (!(ios = pio_get_iosystem_from_id(iosysid))) @@ -1638,18 +1639,21 @@ int test_malloc_iodesc2(int iosysid, int my_rank) /* Test with each type. */ for (int t = 0; t < num_types; t++) { + int gdimlen[] = {1}; + PIO_Offset compmap[] = {1}; + std::shared_ptr iodesc = std::make_shared(ios, test_type[t], + 1, gdimlen, + 1, compmap, PIO_REARR_BOX, false); - if ((ret = malloc_iodesc(ios, test_type[t], 1, 1, &iodesc))) - return ret; if (iodesc->mpitype != mpi_type[t]) return ERR_WRONG; if (iodesc->ndims != 1) return ERR_WRONG; ioid = pio_add_to_iodesc_list(iodesc, MPI_COMM_NULL); if (iodesc->firstregion) - free_region_list(iodesc->firstregion); - free(iodesc->map); - free(iodesc->dimlen); + iodesc->free_region_list(iodesc->firstregion); + free(iodesc->map); iodesc->map = NULL; + free(iodesc->dimlen); iodesc->dimlen = NULL; if ((ret = pio_delete_iodesc_from_list(ioid))) return ret; } diff --git a/tests/cunit/test_rearr.cpp b/tests/cunit/test_rearr.cpp index 4dbd37caf1..3837fa4ed3 100644 --- a/tests/cunit/test_rearr.cpp +++ b/tests/cunit/test_rearr.cpp @@ -30,6 +30,16 @@ /* Name of test var. (Name of a Welsh town.)*/ #define VAR_NAME "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch" +iosystem_desc_t *create_dummy_iosystem(MPI_Comm test_comm) +{ + iosystem_desc_t *ios = static_cast(calloc(1, sizeof(iosystem_desc_t))); + assert(ios); + + ios->union_comm = test_comm; + + return ios; +} + /* Test some of the rearranger utility functions. */ int test_rearranger_opts1(int iosysid) { @@ -299,13 +309,15 @@ int test_compute_maxIObuffersize(MPI_Comm test_comm, int my_rank) { int ret; + iosystem_desc_t *ios = create_dummy_iosystem(test_comm); { /* This is a simple test with one region containing 1 data * element. */ - io_desc_t iodesc; + io_desc_t iodesc(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); io_region *ior1; int ndims = 1; + iodesc.free_all_regions(); /* This is how we allocate a region. */ if ((ret = alloc_region2(NULL, ndims, &ior1))) return ret; @@ -325,16 +337,17 @@ int test_compute_maxIObuffersize(MPI_Comm test_comm, int my_rank) free(ior1->start); free(ior1->count); free(ior1); - + iodesc.firstregion = NULL; } { /* This also has a single region, but with 2 dims and count * values > 1. */ - io_desc_t iodesc; + io_desc_t iodesc(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); io_region *ior2; int ndims = 2; + iodesc.free_all_regions(); /* This is how we allocate a region. */ if ((ret = alloc_region2(NULL, ndims, &ior2))) return ret; @@ -361,15 +374,17 @@ int test_compute_maxIObuffersize(MPI_Comm test_comm, int my_rank) free(ior2->start); free(ior2->count); free(ior2); + iodesc.firstregion = NULL; } { /* This test has two regions of different sizes. */ - io_desc_t iodesc; + io_desc_t iodesc(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); io_region *ior3; io_region *ior4; int ndims = 2; + iodesc.free_all_regions(); /* This is how we allocate a region. */ if ((ret = alloc_region2(NULL, ndims, &ior4))) return ret; @@ -400,8 +415,11 @@ int test_compute_maxIObuffersize(MPI_Comm test_comm, int my_rank) free(ior3->start); free(ior3->count); free(ior3); + iodesc.firstregion = NULL; } + free(ios); + return 0; } @@ -409,7 +427,6 @@ int test_compute_maxIObuffersize(MPI_Comm test_comm, int my_rank) int test_determine_fill(MPI_Comm test_comm) { iosystem_desc_t *ios; - io_desc_t *iodesc; int gsize[1] = {4}; PIO_Offset compmap[1] = {1}; int ret; @@ -419,9 +436,8 @@ int test_determine_fill(MPI_Comm test_comm) return PIO_ENOMEM; ios->union_comm = test_comm; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); /* Set up iodesc for test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; iodesc->ndims = 1; iodesc->rearranger = PIO_REARR_SUBSET; iodesc->llen = 1; @@ -440,8 +456,8 @@ int test_determine_fill(MPI_Comm test_comm) return ERR_WRONG; /* Free test resources. */ + delete(iodesc); free(ios); - free(iodesc); return 0; } @@ -535,7 +551,7 @@ int test_define_iodesc_datatypes() for (int r = 0; r < NUM_REARRANGERS; r++) { iosystem_desc_t ios; - io_desc_t iodesc; + io_desc_t iodesc(&ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); /* Set up test for IO task with BOX rearranger to create one type. */ ios.ioproc = 1; /* this is IO proc. */ @@ -586,13 +602,13 @@ int test_define_iodesc_datatypes() MPIERR(mpierr); /* Free resources. */ - free(iodesc.rtype); - free(iodesc.sindex); - free(iodesc.scount); - free(iodesc.stype); - free(iodesc.rcount); - free(iodesc.rfrom); - free(iodesc.rindex); + free(iodesc.rtype); iodesc.rtype = NULL; + free(iodesc.sindex); iodesc.sindex = NULL; + free(iodesc.scount); iodesc.scount = NULL; + free(iodesc.stype); iodesc.stype = NULL; + free(iodesc.rcount); iodesc.rcount = NULL; + free(iodesc.rfrom); iodesc.rfrom = NULL; + free(iodesc.rindex); iodesc.rindex = NULL; } return 0; @@ -602,7 +618,6 @@ int test_define_iodesc_datatypes() int test_compute_counts(MPI_Comm test_comm, int my_rank) { iosystem_desc_t *ios; - io_desc_t *iodesc; int dest_ioproc[TARGET_NTASKS] = {0, 1, 2, 3}; PIO_Offset dest_ioindex[TARGET_NTASKS] = {0, 1, 2, 3}; int ret; @@ -627,8 +642,8 @@ int test_compute_counts(MPI_Comm test_comm, int my_rank) ios->compranks[i] = i; /* Initialize iodesc. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); iodesc->rearranger = PIO_REARR_BOX; iodesc->ndof = TARGET_NTASKS; iodesc->llen = TARGET_NTASKS; @@ -650,16 +665,16 @@ int test_compute_counts(MPI_Comm test_comm, int my_rank) return ERR_WRONG; /* Free resources allocated in compute_counts(). */ - free(iodesc->scount); - free(iodesc->sindex); - free(iodesc->rcount); - free(iodesc->rfrom); - free(iodesc->rindex); + free(iodesc->scount); iodesc->scount = NULL; + free(iodesc->sindex); iodesc->sindex = NULL; + free(iodesc->rcount); iodesc->rcount = NULL; + free(iodesc->rfrom); iodesc->rfrom = NULL; + free(iodesc->rindex); iodesc->rindex = NULL; /* Free test resources. */ free(ios->ioranks); free(ios->compranks); - free(iodesc); + delete(iodesc); free(ios); return 0; @@ -691,7 +706,6 @@ int test_init_decomp(int iosysid, MPI_Comm test_comm, int my_rank) int test_box_rearrange_create(MPI_Comm test_comm, int my_rank) { iosystem_desc_t *ios; - io_desc_t *iodesc; io_region *ior1; int maplen = MAPLEN2; PIO_Offset compmap[MAPLEN2] = {(my_rank * 2) + 1, ((my_rank + 1) * 2) + 1}; @@ -704,8 +718,8 @@ int test_box_rearrange_create(MPI_Comm test_comm, int my_rank) return PIO_ENOMEM; /* Allocate IO desc struct for this test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); /* Default rearranger options. */ iodesc->rearr_opts.comm_type = PIO_REARR_COMM_COLL; @@ -783,19 +797,19 @@ int test_box_rearrange_create(MPI_Comm test_comm, int my_rank) /* } */ /* Free resources allocated in compute_counts(). */ - free(iodesc->scount); - free(iodesc->sindex); - free(iodesc->rcount); - free(iodesc->rfrom); - free(iodesc->rindex); + free(iodesc->scount); iodesc->scount = NULL; + free(iodesc->sindex); iodesc->sindex = NULL; + free(iodesc->rcount); iodesc->rcount = NULL; + free(iodesc->rfrom); iodesc->rfrom = NULL; + free(iodesc->rindex); iodesc->rindex = NULL; /* Free resources from test. */ free(ior1->start); free(ior1->count); - free(ior1); + free(ior1); iodesc->firstregion = NULL; free(ios->ioranks); free(ios->compranks); - free(iodesc); + delete(iodesc); free(ios); return 0; @@ -806,7 +820,6 @@ int test_box_rearrange_create_2(MPI_Comm test_comm, int my_rank) { #define MAPLEN2 2 iosystem_desc_t *ios; - io_desc_t *iodesc; io_region *ior1; int maplen = MAPLEN2; PIO_Offset compmap[MAPLEN2] = {1, 0}; @@ -819,8 +832,8 @@ int test_box_rearrange_create_2(MPI_Comm test_comm, int my_rank) return PIO_ENOMEM; /* Allocate IO desc struct for this test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); /* Default rearranger options. */ iodesc->rearr_opts.comm_type = PIO_REARR_COMM_COLL; @@ -898,19 +911,19 @@ int test_box_rearrange_create_2(MPI_Comm test_comm, int my_rank) } /* Free resources allocated in compute_counts(). */ - free(iodesc->scount); - free(iodesc->sindex); - free(iodesc->rcount); - free(iodesc->rfrom); - free(iodesc->rindex); + free(iodesc->scount); iodesc->scount = NULL; + free(iodesc->sindex); iodesc->sindex = NULL; + free(iodesc->rcount); iodesc->rcount = NULL; + free(iodesc->rfrom); iodesc->rfrom = NULL; + free(iodesc->rindex); iodesc->rindex = NULL; /* Free resources from test. */ free(ior1->start); free(ior1->count); - free(ior1); + free(ior1); iodesc->firstregion = NULL; free(ios->ioranks); free(ios->compranks); - free(iodesc); + delete(iodesc); free(ios); return 0; @@ -922,7 +935,6 @@ int test_box_rearrange_create_3(MPI_Comm test_comm, int my_rank) { #define MAPLEN1 1 iosystem_desc_t *ios; - io_desc_t *iodesc; io_region *ior1; int maplen = MAPLEN1; PIO_Offset compmap[MAPLEN1] = {0}; @@ -935,8 +947,8 @@ int test_box_rearrange_create_3(MPI_Comm test_comm, int my_rank) return PIO_ENOMEM; /* Allocate IO desc struct for this test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); /* Default rearranger options. */ iodesc->rearr_opts.comm_type = PIO_REARR_COMM_COLL; @@ -990,16 +1002,16 @@ int test_box_rearrange_create_3(MPI_Comm test_comm, int my_rank) return ERR_WRONG; /* Free resources allocated in compute_counts(). */ - free(iodesc->scount); - free(iodesc->sindex); - free(iodesc->rcount); - free(iodesc->rfrom); - free(iodesc->rindex); + free(iodesc->scount); iodesc->scount = NULL; + free(iodesc->sindex); iodesc->sindex = NULL; + free(iodesc->rcount); iodesc->rcount = NULL; + free(iodesc->rfrom); iodesc->rfrom = NULL; + free(iodesc->rindex); iodesc->rindex = NULL; /* Free resources from test. */ free(ior1->start); free(ior1->count); - free(ior1); + free(ior1); iodesc->firstregion = NULL; free(ios->ioranks); free(ios->compranks); free(iodesc); @@ -1012,7 +1024,6 @@ int test_box_rearrange_create_3(MPI_Comm test_comm, int my_rank) int test_default_subset_partition(MPI_Comm test_comm, int my_rank) { iosystem_desc_t *ios; - io_desc_t *iodesc; int mpierr; int ret; @@ -1021,8 +1032,8 @@ int test_default_subset_partition(MPI_Comm test_comm, int my_rank) return PIO_ENOMEM; /* Allocate IO desc struct for this test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); ios->ioproc = 1; ios->io_rank = my_rank; @@ -1041,7 +1052,7 @@ int test_default_subset_partition(MPI_Comm test_comm, int my_rank) MPIERR(mpierr); /* Free resources from test. */ - free(iodesc); + delete(iodesc); free(ios); return 0; @@ -1051,7 +1062,6 @@ int test_default_subset_partition(MPI_Comm test_comm, int my_rank) int test_rearrange_comp2io(MPI_Comm test_comm, int my_rank) { iosystem_desc_t *ios; - io_desc_t *iodesc; file_desc_t *file; void *sbuf = NULL; void *rbuf = NULL; @@ -1075,8 +1085,8 @@ int test_rearrange_comp2io(MPI_Comm test_comm, int my_rank) return PIO_ENOMEM; /* Allocate IO desc struct for this test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); if (!(file = (file_desc_t *) calloc(1, sizeof(file_desc_t)))) return PIO_ENOMEM; @@ -1158,22 +1168,22 @@ int test_rearrange_comp2io(MPI_Comm test_comm, int my_rank) MPIERR(mpierr); /* Free resources allocated in library code. */ - free(iodesc->rtype); - free(iodesc->sindex); - free(iodesc->scount); - free(iodesc->stype); - free(iodesc->rcount); - free(iodesc->rfrom); - free(iodesc->rindex); + free(iodesc->rtype); iodesc->rtype = NULL; + free(iodesc->sindex); iodesc->sindex = NULL; + free(iodesc->scount); iodesc->scount = NULL; + free(iodesc->stype); iodesc->stype = NULL; + free(iodesc->rcount); iodesc->rcount = NULL; + free(iodesc->rfrom); iodesc->rfrom = NULL; + free(iodesc->rindex); iodesc->rindex = NULL; /* Free resources from test. */ free(ior1->start); free(ior1->count); - free(ior1); + free(ior1); iodesc->firstregion = NULL; free(ios->ioranks); free(ios->compranks); free(file); - free(iodesc); + delete(iodesc); free(ios); free(sbuf); free(rbuf); @@ -1185,7 +1195,6 @@ int test_rearrange_comp2io(MPI_Comm test_comm, int my_rank) int test_rearrange_io2comp(MPI_Comm test_comm, int my_rank) { iosystem_desc_t *ios; - io_desc_t *iodesc; void *sbuf = NULL; void *rbuf = NULL; io_region *ior1; @@ -1207,8 +1216,8 @@ int test_rearrange_io2comp(MPI_Comm test_comm, int my_rank) return PIO_ENOMEM; /* Allocate IO desc struct for this test. */ - if (!(iodesc = (io_desc_t *) calloc(1, sizeof(io_desc_t)))) - return PIO_ENOMEM; + io_desc_t *iodesc = new io_desc_t(ios, PIO_INT, 0, NULL, 0, NULL, PIO_REARR_BOX, false); + assert(iodesc); ios->ioproc = 1; ios->io_rank = my_rank; @@ -1288,21 +1297,21 @@ int test_rearrange_io2comp(MPI_Comm test_comm, int my_rank) MPIERR(mpierr); /* Free resources allocated in library code. */ - free(iodesc->rtype); - free(iodesc->sindex); - free(iodesc->scount); - free(iodesc->stype); - free(iodesc->rcount); - free(iodesc->rfrom); - free(iodesc->rindex); + free(iodesc->rtype); iodesc->rtype = NULL; + free(iodesc->sindex); iodesc->sindex = NULL; + free(iodesc->scount); iodesc->scount = NULL; + free(iodesc->stype); iodesc->stype = NULL; + free(iodesc->rcount); iodesc->rcount = NULL; + free(iodesc->rfrom); iodesc->rfrom = NULL; + free(iodesc->rindex); iodesc->rindex = NULL; /* Free resources from test. */ free(ior1->start); free(ior1->count); - free(ior1); + free(ior1); iodesc->firstregion = NULL; free(ios->ioranks); free(ios->compranks); - free(iodesc); + delete(iodesc); free(ios); free(sbuf); free(rbuf); diff --git a/tests/cunit/test_spio_decomp_logger.cpp b/tests/cunit/test_spio_decomp_logger.cpp index 377910841a..40ef06e59b 100644 --- a/tests/cunit/test_spio_decomp_logger.cpp +++ b/tests/cunit/test_spio_decomp_logger.cpp @@ -111,36 +111,20 @@ void free_iosystem(iosystem_desc_t *ios){ io_desc_t *get_iodesc(int wrank, iosystem_desc_t *ios, const std::vector &compmap, const std::vector &gdimlen) { - io_desc_t *iodesc = NULL; int ret = PIO_NOERR; - ret = malloc_iodesc(ios, PIO_DOUBLE, static_cast(gdimlen.size()), static_cast(compmap.size()), &iodesc); - if(ret != PIO_NOERR){ + io_desc_t *iodesc = new io_desc_t(ios, PIO_DOUBLE, static_cast(gdimlen.size()), gdimlen.data(), static_cast(compmap.size()), compmap.data(), PIO_REARR_BOX, false); + if(!iodesc){ LOG_RANK0(wrank, "Unable to alloc mem for I/O desc\n"); return iodesc; } - assert(iodesc->dimlen); - std::copy(gdimlen.cbegin(), gdimlen.cend(), iodesc->dimlen); - - assert(iodesc->map); - std::copy(compmap.cbegin(), compmap.cend(), iodesc->map); - return iodesc; } void free_iodesc(io_desc_t *iodesc) { - if(!iodesc){ - return; - } - - free(iodesc->firstregion->start); - free(iodesc->firstregion->count); - free(iodesc->firstregion); - free(iodesc->map); - free(iodesc->dimlen); - free(iodesc); + if(iodesc) { delete(iodesc); } } int test_create_decomp_logger(MPI_Comm comm, int wrank, int wsz) From 929c41a610d0e22c94eae7fd55b522aace9ca653 Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Wed, 27 May 2026 09:48:01 -0500 Subject: [PATCH 5/7] Using C++ const/dest for wcache info Using C++ constructors and destructors for write cache info. The members are all still public (since other parts of the code access them directly) and will be changed in a future PR --- .../core/iolib/hdf5/spio_async_hdf5_utils.cpp | 57 ++++++++++++------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp b/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp index 7d259d9a9d..0f91268b3b 100644 --- a/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp +++ b/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp @@ -87,8 +87,42 @@ struct Hdf5_wcache{ std::size_t iobuf_sz; void *fillbuf; std::size_t fillbuf_sz; + + Hdf5_wcache(file_desc_t *file, int nvars, int fndims, + const int *varids, io_desc_t *iodesc, bool wr_fillbuf, const int *frame); + ~Hdf5_wcache(); }; +Hdf5_wcache::Hdf5_wcache(file_desc_t *file, int nvars, int fndims, + const int *varids, io_desc_t *iodesc, bool wr_fillbuf, const int *frame): + file(file), nvars(nvars), fndims(fndims), iodesc(iodesc), + wr_fillbuf(wr_fillbuf), iobuf(NULL), iobuf_sz(0), + fillbuf(NULL), fillbuf_sz(0) +{ + assert(file && (nvars > 0) && (fndims > 0) && varids && iodesc); + + /* Cache varids and frames (one frame, the frame being written, + * for each varid) + */ + this->varids.resize(nvars); + std::copy(varids, varids + nvars, this->varids.begin()); + + if(frame){ + this->frame.resize(nvars); + std::copy(frame, frame + nvars, this->frame.begin()); + } + + /* FIXME: Copy/init iobuf and fillbuf here */ +} + +Hdf5_wcache::~Hdf5_wcache() +{ + /* Don't delete cached iodesc ptr (we don't own it) */ + + if(iobuf) { brel(iobuf); iobuf = NULL; } + if(fillbuf) { brel(fillbuf); fillbuf = NULL; } +} + /* Global vars */ std::atomic SPIO_Util::GVars::npend_hdf5_async_ops; @@ -787,18 +821,8 @@ void pio_iosys_async_op_hdf5_write_free(void *pdata) Hdf5_wcache *wcache = static_cast(pdata); assert(wcache); - /* Using swap trick to free vectors - * - swap vector with an empty local/temp vector that gets deallocated when func exits - */ - //wcache->varids.clear(); - std::vector().swap(wcache->varids); - //wcache->frame.clear(); - std::vector().swap(wcache->frame); - - if(wcache->iobuf){ brel(wcache->iobuf); } - if(wcache->fillbuf){ brel(wcache->fillbuf); } - free(wcache); + delete(wcache); #else // _HDF5 assert(0); #endif // _HDF5 @@ -819,15 +843,8 @@ int pio_iosys_async_hdf5_write_op_add(file_desc_t *file, int nvars, int fndims, return PIO_NOERR; } - std::vector vids(varids, varids + nvars); - std::vector frms; - if(frame){ - frms.resize(nvars); - std::copy(frame, frame + nvars, frms.begin()); - } - - Hdf5_wcache *wcache = static_cast(calloc(1, sizeof(Hdf5_wcache))); - *wcache = {file, nvars, fndims, vids, iodesc, frms, (fill) ? true : false, NULL, 0, NULL, 0}; + Hdf5_wcache *wcache = new Hdf5_wcache(file, nvars, fndims, varids, iodesc, + (fill) ? true : false, frame); /* We need to copy the iobuf/fillbuf since the mvcache gets reused for future writes */ /* Copy iobuf/fillvalue */ From d20663d4d258837e86bcd2c8e3f0a65cd34b21a6 Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Wed, 27 May 2026 12:28:00 -0500 Subject: [PATCH 6/7] Using shared ptr to keep track of I/O descs I/O decompositions are separate from files and is used by files when rearranging data (before writing the data out). Since I/O decompositions can be potentially freed before it gets used by a file, we now cache a reference to the I/O decomposition in the file. * I/O decomposition objects are ref counted using shared ptrs * Using a map to cache I/O decomps in files * In the async case cache a ref to I/O decomp in the write info object --- .../core/iolib/hdf5/spio_async_hdf5_utils.cpp | 10 +++--- .../core/iolib/hdf5/spio_async_hdf5_utils.hpp | 3 +- src/clib/core/pio_darray.cpp | 15 +++++--- src/clib/core/pio_file.cpp | 36 +++++++++++++++++++ src/clib/core/pioc_support.cpp | 12 +++++++ src/clib/core/util/pio_lists.cpp | 21 +++++++++++ src/clib/pio_internal.h | 3 ++ src/clib/pio_types.hpp | 6 ++++ 8 files changed, 96 insertions(+), 10 deletions(-) diff --git a/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp b/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp index 0f91268b3b..eb5949f328 100644 --- a/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp +++ b/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.cpp @@ -79,7 +79,7 @@ struct Hdf5_wcache{ int nvars; int fndims; std::vector varids; - io_desc_t *iodesc; + std::shared_ptr iodesc; std::vector frame; bool wr_fillbuf; @@ -89,12 +89,12 @@ struct Hdf5_wcache{ std::size_t fillbuf_sz; Hdf5_wcache(file_desc_t *file, int nvars, int fndims, - const int *varids, io_desc_t *iodesc, bool wr_fillbuf, const int *frame); + const int *varids, std::shared_ptr iodesc, bool wr_fillbuf, const int *frame); ~Hdf5_wcache(); }; Hdf5_wcache::Hdf5_wcache(file_desc_t *file, int nvars, int fndims, - const int *varids, io_desc_t *iodesc, bool wr_fillbuf, const int *frame): + const int *varids, std::shared_ptr iodesc, bool wr_fillbuf, const int *frame): file(file), nvars(nvars), fndims(fndims), iodesc(iodesc), wr_fillbuf(wr_fillbuf), iobuf(NULL), iobuf_sz(0), fillbuf(NULL), fillbuf_sz(0) @@ -600,7 +600,7 @@ int pio_iosys_async_op_hdf5_write(void *pdata) file_desc_t *file = wcache->file; int nvars = wcache->nvars; int fndims = wcache->fndims; - io_desc_t *iodesc = wcache->iodesc; + io_desc_t *iodesc = wcache->iodesc.get(); assert(file && (nvars > 0) && (fndims > 0) && iodesc); assert((file->iotype == PIO_IOTYPE_HDF5) || (file->iotype == PIO_IOTYPE_HDF5C)); @@ -829,7 +829,7 @@ void pio_iosys_async_op_hdf5_write_free(void *pdata) } int pio_iosys_async_hdf5_write_op_add(file_desc_t *file, int nvars, int fndims, - const int *varids, io_desc_t *iodesc, int fill, const int *frame) + const int *varids, std::shared_ptr iodesc, int fill, const int *frame) { #ifdef _HDF5 int ret = PIO_NOERR; diff --git a/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.hpp b/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.hpp index f52368204a..d1f4f6c315 100644 --- a/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.hpp +++ b/src/clib/core/iolib/hdf5/spio_async_hdf5_utils.hpp @@ -2,6 +2,7 @@ #include "pio_config.h" #include "pio.h" #include "pio_internal.h" +#include namespace SPIO_Util{ namespace GVars{ @@ -23,7 +24,7 @@ int spio_wait_all_hdf5_async_ops(int iosysid); int pio_iosys_async_op_hdf5_write(void *pdata); void pio_iosys_async_op_hdf5_write_free(void *pdata); int pio_iosys_async_hdf5_write_op_add(file_desc_t *file, int nvars, int fndims, - const int *varids, io_desc_t *iodesc, int fill, const int *frame); + const int *varids, std::shared_ptr iodesc, int fill, const int *frame); int spio_iosys_async_hdf5_set_frame_op_add(file_desc_t *file, int varid, int frame); #define __SPIO_ASYNC_HDF5_UTILS_HPP__ diff --git a/src/clib/core/pio_darray.cpp b/src/clib/core/pio_darray.cpp index bd582f4e68..637621ba89 100644 --- a/src/clib/core/pio_darray.cpp +++ b/src/clib/core/pio_darray.cpp @@ -25,6 +25,8 @@ #include "spio_dt_converter.hpp" #include "spio_async_utils.hpp" #include +#include +#include /* uint64_t definition */ #ifdef _ADIOS2 @@ -165,11 +167,13 @@ int PIOc_write_darray_multi_impl(int ncid, const int *varids, int ioid, int nvar "Writing multiple variables to file (%s, ncid=%d) failed. Trying to write to a read only file, try reopening the file in write mode (use the PIO_WRITE flag)", pio_get_fname_from_file(file), ncid); } - /* Get iodesc. */ - if(!(iodesc = pio_get_iodesc_from_id(ioid))){ + /* Get cached iodesc. */ + std::shared_ptr sp_iodesc = spio_get_iodesc_ref_from_file(file, ioid); + if(!sp_iodesc){ return pio_err(ios, file, PIO_EBADID, __FILE__, __LINE__, "Writing multiple variables to file (%s, ncid=%d) failed. Invalid arguments, invalid PIO decomposition id (%d) provided", pio_get_fname_from_file(file), ncid, ioid); } + iodesc = sp_iodesc.get(); pioassert(iodesc->rearranger == PIO_REARR_BOX || iodesc->rearranger == PIO_REARR_SUBSET || iodesc->rearranger == PIO_REARR_CONTIG, "unknown rearranger", __FILE__, __LINE__); @@ -411,7 +415,7 @@ int PIOc_write_darray_multi_impl(int ncid, const int *varids, int ioid, int nvar case PIO_IOTYPE_HDF5: case PIO_IOTYPE_HDF5C: #if PIO_USE_ASYNC_WR_THREAD - ierr = pio_iosys_async_hdf5_write_op_add(file, nvars, fndims, varids, iodesc, + ierr = pio_iosys_async_hdf5_write_op_add(file, nvars, fndims, varids, sp_iodesc, DARRAY_DATA, frame); if(ierr != PIO_NOERR){ return pio_err(ios, file, ierr, __FILE__, __LINE__, @@ -503,7 +507,7 @@ int PIOc_write_darray_multi_impl(int ncid, const int *varids, int ioid, int nvar case PIO_IOTYPE_HDF5: case PIO_IOTYPE_HDF5C: #if PIO_USE_ASYNC_WR_THREAD - ierr = pio_iosys_async_hdf5_write_op_add(file, nvars, fndims, varids, iodesc, + ierr = pio_iosys_async_hdf5_write_op_add(file, nvars, fndims, varids, sp_iodesc, DARRAY_FILL, frame); if(ierr != PIO_NOERR){ return pio_err(ios, file, ierr, __FILE__, __LINE__, @@ -2470,6 +2474,9 @@ int PIOc_write_darray_impl(int ncid, int varid, int ioid, PIO_Offset arraylen, c } wmb->num_arrays++; + /* Cache a ref to the iodesc in the file */ + spio_add_iodesc_ref_to_file(file, ioid); + LOG((2, "wmb->num_arrays = %d iodesc->maxbytes / iodesc->mpitype_size = %d " "iodesc->ndof = %d iodesc->llen = %d", wmb->num_arrays, iodesc->maxbytes / iodesc->mpitype_size, iodesc->ndof, iodesc->llen)); diff --git a/src/clib/core/pio_file.cpp b/src/clib/core/pio_file.cpp index 8e9cb48e89..1d7004d749 100644 --- a/src/clib/core/pio_file.cpp +++ b/src/clib/core/pio_file.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "spio_hdf5_utils.hpp" #include "spio_async_tcomm.hpp" @@ -477,6 +478,41 @@ int spio_wait_on_hard_close(iosystem_desc_t *ios, file_desc_t *file) return PIO_NOERR; } +void spio_add_iodesc_ref_to_file(file_desc_t *file, int ioid) +{ + assert(file && file->pmtx && file->io_desc_refs); + + /* Get the lock before proceeding */ + std::lock_guard lg(*(file->pmtx)); + + /* Don't add duplicates, just one ref to iodesc used by file. + * When cached iodesc is used, for writing data, its used for all + * cached variables at once. So we just need one ref to iodesc here + */ + std::shared_ptr iodesc = pio_get_iodesc_sptr_from_id(ioid); + file->io_desc_refs->insert({ioid, iodesc}); +} + +std::shared_ptr spio_get_iodesc_ref_from_file(file_desc_t *file, int ioid) +{ + assert(file && file->pmtx && file->io_desc_refs); + + /* Get the lock before proceeding */ + std::lock_guard lg(*(file->pmtx)); + + /* Search through the cached I/O descs */ + std::map >::iterator iter = file->io_desc_refs->find(ioid); + + if(iter != file->io_desc_refs->end()){ + /* The caller now has ownership of this iodesc */ + std::shared_ptr sp = iter->second; + file->io_desc_refs->erase(iter); + return sp; + } + + return nullptr; +} + /* Close the file ("hard close") * @param ios: Pointer to the iosystem_desc * @param file: Pointer to the file_desc for the file diff --git a/src/clib/core/pioc_support.cpp b/src/clib/core/pioc_support.cpp index 3fcfa17f5a..788d26da1b 100644 --- a/src/clib/core/pioc_support.cpp +++ b/src/clib/core/pioc_support.cpp @@ -2862,9 +2862,13 @@ int spio_createfile_int(int iosysid, int *ncidp, const int *iotype, const char * file->pmtx = new std::mutex(); assert(file->pmtx); + file->io_desc_refs = new std::map >(); + assert(file->io_desc_refs); + file->io_fstats = (spio_io_fstats_summary_t *) calloc(sizeof(spio_io_fstats_summary_t), 1); if(!(file->io_fstats)) { + delete(file->io_desc_refs); delete(file->pmtx); return pio_err(ios, NULL, PIO_ENOMEM, __FILE__, __LINE__, "Creating file (%s) failed. Out of memory allocating %lld bytes for caching file I/O statistics", filename, (unsigned long long) (sizeof(spio_io_fstats_summary_t))); @@ -3573,6 +3577,7 @@ int spio_createfile_int(int iosysid, int *ncidp, const int *iotype, const char * spio_ltimer_stop(file->io_fstats->wr_timer_name); spio_ltimer_stop(file->io_fstats->tot_timer_name); delete(file->pmtx); + delete(file->io_desc_refs); free(file->io_fstats); free(file); return check_mpi(NULL, file, mpierr, __FILE__, __LINE__); @@ -4579,6 +4584,9 @@ int PIOc_openfile_retry_impl(int iosysid, int *ncidp, int *iotype, const char *f file->pmtx = new std::mutex(); assert(file->pmtx); + file->io_desc_refs = new std::map >(); + assert(file->io_desc_refs); + file->io_fstats = (spio_io_fstats_summary_t *) calloc(sizeof(spio_io_fstats_summary_t), 1); if(!(file->io_fstats)){ spio_ltimer_stop(ios->io_fstats->rd_timer_name); @@ -5151,6 +5159,7 @@ int PIOc_openfile_retry_impl(int iosysid, int *ncidp, int *iotype, const char *f spio_ltimer_stop(file->io_fstats->tot_timer_name); delete(file->pmtx); + delete(file->io_desc_refs); free(file->io_fstats); free(file); PIO_get_avail_iotypes(avail_iotypes, PIO_MAX_NAME); @@ -5225,6 +5234,7 @@ int PIOc_openfile_retry_impl(int iosysid, int *ncidp, int *iotype, const char *f spio_ltimer_stop(file->io_fstats->rd_timer_name); spio_ltimer_stop(file->io_fstats->tot_timer_name); delete(file->pmtx); + delete(file->io_desc_refs); free(file->io_fstats); free(file); return check_mpi(NULL, file, mpierr, __FILE__, __LINE__); @@ -5238,6 +5248,7 @@ int PIOc_openfile_retry_impl(int iosysid, int *ncidp, int *iotype, const char *f spio_ltimer_stop(file->io_fstats->rd_timer_name); spio_ltimer_stop(file->io_fstats->tot_timer_name); delete(file->pmtx); + delete(file->io_desc_refs); free(file->io_fstats); free(file); return check_mpi(NULL, file, mpierr, __FILE__, __LINE__); @@ -5256,6 +5267,7 @@ int PIOc_openfile_retry_impl(int iosysid, int *ncidp, int *iotype, const char *f spio_ltimer_stop(file->io_fstats->rd_timer_name); spio_ltimer_stop(file->io_fstats->tot_timer_name); delete(file->pmtx); + delete(file->io_desc_refs); free(file->io_fstats); free(file); LOG((1, "PIOc_openfile_retry failed, ierr = %d", ierr)); diff --git a/src/clib/core/util/pio_lists.cpp b/src/clib/core/util/pio_lists.cpp index d40d2852c6..c85ed212da 100644 --- a/src/clib/core/util/pio_lists.cpp +++ b/src/clib/core/util/pio_lists.cpp @@ -134,6 +134,7 @@ int pio_free_file(file_desc_t *file) } delete(file->pmtx); + delete(file->io_desc_refs); free(file->unlim_dimids); free(file->io_fstats); @@ -469,6 +470,26 @@ io_desc_t *pio_get_iodesc_from_id(int ioid) return iodesc; } +/** + * Get a shared ptr to the I/O descriptor (io_desc_t) associated with a I/O descriptor id. + * + * @param ioid The id of the I/O descriptor (io_desc_t) to lookup + * @returns Shared pointer to the I/O descriptor (io_desc_t) associated with the id + */ +std::shared_ptr pio_get_iodesc_sptr_from_id(int ioid) +{ + LOG((2, "pio_get_iodesc_from_id(ioid=%d)", ioid)); + + std::shared_ptr iodesc; + try{ + iodesc = SPIO_Util::SPIO_Lists::GVars::pio_iodesc_list.at(ioid); + } catch(const std::out_of_range &e){ + LOG((1, "Finding I/O descriptor corresponding to ioid = %d failed. Invalid I/O descriptor id provided", ioid)); + } + + return iodesc; +} + /** * Delete a I/O descriptor from the global list of valid I/O descriptors * diff --git a/src/clib/pio_internal.h b/src/clib/pio_internal.h index 9a8026f17b..182e673fbf 100644 --- a/src/clib/pio_internal.h +++ b/src/clib/pio_internal.h @@ -116,6 +116,7 @@ extern "C" { void pio_get_env(void); int pio_add_to_iodesc_list(std::shared_ptr iodesc, MPI_Comm comm); io_desc_t *pio_get_iodesc_from_id(int ioid); + std::shared_ptr pio_get_iodesc_sptr_from_id(int ioid); int pio_delete_iodesc_from_list(int ioid); int pio_delete_all_iodescs(int iosysid); int pio_num_iosystem(int *niosysid); @@ -143,6 +144,8 @@ extern "C" { /* Close the file ("hard close") */ int spio_wait_on_hard_close(iosystem_desc_t *ios, file_desc_t *file); + void spio_add_iodesc_ref_to_file(file_desc_t *file, int ioid); + std::shared_ptr spio_get_iodesc_ref_from_file(file_desc_t *file, int ioid); int spio_hard_closefile(iosystem_desc_t *ios, file_desc_t *file, bool sync_with_ioprocs); int spio_soft_closefile(iosystem_desc_t *ios, file_desc_t *file); diff --git a/src/clib/pio_types.hpp b/src/clib/pio_types.hpp index 37857e1d57..091aff4653 100644 --- a/src/clib/pio_types.hpp +++ b/src/clib/pio_types.hpp @@ -50,6 +50,9 @@ extern "C" { #include #include #include +#include +#include +#include #include "core/progress_engine/spio_async_op.hpp" #ifdef PIO_MICRO_TIMING @@ -953,6 +956,9 @@ typedef struct file_desc_t /** True if this is an existing file reopened */ bool is_reopened; + + /* Keep track of the I/O decompositions used by this file */ + std::map > *io_desc_refs; } file_desc_t; #endif // __PIO_TYPES_HPP__ From 652a58ea108e8a24835bd63f6c80011cf535e76d Mon Sep 17 00:00:00 2001 From: jayeshkrishna Date: Wed, 27 May 2026 12:35:24 -0500 Subject: [PATCH 7/7] Rm sync hack when freeing decomp Since we now ref count I/O decompositions correctly we no longer need the hack to sync all files before freeing decomps. --- src/flib/spio_decomp.F90.in | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/flib/spio_decomp.F90.in b/src/flib/spio_decomp.F90.in index 9a1c82f5ae..6bd2b6a376 100644 --- a/src/flib/spio_decomp.F90.in +++ b/src/flib/spio_decomp.F90.in @@ -357,21 +357,6 @@ CONTAINS CALL t_startf("PIO:freedecomp") #endif - ! Since a I/O decomposition can be freed before all writes - ! to a file is synced (explicit sync or close) we need to - ! ensure that all writes to the file is synced - ! FIXME : This is unfortunate since all writes (even writes - ! with a different I/O decomposition) are synced here. This - ! is unnecessary and needs to be fixed (a performance bug) - cerr = PIOc_sync(file%fh) - IF(cerr /= PIO_NOERR) THEN - WRITE(log_msg, *) "File sync failed in PIO_freedecomp (PIO_freedecomp_file),",& - " err = ", cerr, ", iosysid =", file%iosystem%iosysid,& - ", ncid =", file%fh, ", ioid = ", iodesc%ioid - ret = pio_error(file%iosystem, INT(cerr), __FILE__, __LINE__, trim(log_msg)) - RETURN - END IF - cerr = PIOc_freedecomp(file%iosystem%iosysid, INT(iodesc%ioid, C_INT)) IF(PRESENT(ierr)) THEN ierr = INT(cerr)