Skip to content

Commit 60bb88d

Browse files
authored
Merge pull request #551 from ValeevGroup/evaleev/feature/lazy-deleter-skip-sync-in-do-cleanup
DistArray::lazy_deleter: skip lazy_sync when invoked from fence's do_cleanup
2 parents 8ca1ff2 + d294ecb commit 60bb88d

2 files changed

Lines changed: 20 additions & 2 deletions

File tree

external/versions.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6
1212
set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496)
1313

1414
set(TA_TRACKED_MADNESS_URL https://github.com/m-a-d-n-e-s-s/madness.git CACHE STRING "GIT_REPOSITORY for cloning MADNESS source")
15-
set(TA_TRACKED_MADNESS_TAG f7aa1401e CACHE STRING "GIT_TAG (branch or hash) for cloning MADNESS")
16-
set(TA_TRACKED_MADNESS_PREVIOUS_TAG 7d8aaf9d51981e4accf4d84742270d1473f8ca2e)
15+
set(TA_TRACKED_MADNESS_TAG 666765ca6 CACHE STRING "GIT_TAG (branch or hash) for cloning MADNESS")
16+
set(TA_TRACKED_MADNESS_PREVIOUS_TAG f7aa1401e)
1717
set(TA_TRACKED_MADNESS_VERSION 0.10.1)
1818
set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)
1919

src/TiledArray/array_impl.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,24 @@ class ArrayImpl : public TensorImpl<Policy>,
480480
// wait for all DelayedSet's to vanish
481481
world.await([&]() { return (pimpl->num_live_ds() == 0); }, true);
482482

483+
// Fast path when invoked from inside the fence's deferred-cleanup
484+
// phase: the global-termination protocol has already established
485+
// global quiescence (no in-flight AM, all ranks at the same point),
486+
// and symmetric collective use of `defer_deleter_to_next_fence()`
487+
// guarantees every rank has this same pimpl in its deferred list
488+
// and so reaches this same delete in lockstep. The cross-rank
489+
// lazy_sync handshake below is therefore redundant; it would also
490+
// schedule a lazy_sync_children task on this world's taskq that the
491+
// fence cannot drain (do_cleanup runs after the drain loop) and
492+
// that would later be run by some unrelated fence -- against freed
493+
// state if this world is destroyed before then (e.g. einsum's
494+
// per-Hadamard sub-Worlds).
495+
if (world.gop.is_in_do_cleanup()) {
496+
delete pimpl;
497+
cleanup_counter_--;
498+
return;
499+
}
500+
483501
try {
484502
world.gop.lazy_sync(id, [pimpl]() {
485503
delete pimpl;

0 commit comments

Comments
 (0)