From eab3aca586243d0ba4cab8d20beb4c233f1e455f Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 3 Jan 2026 17:28:38 -0500 Subject: [PATCH 01/74] Move CoreStateCounters in global to the GPU --- src/celeritas/global/CoreState.cc | 85 +++++++++++++++++-- src/celeritas/global/CoreState.hh | 53 ++++++++++-- src/celeritas/global/Stepper.cc | 85 ++++++++++++++----- .../optical/detail/OpticalLaunchAction.cc | 34 +++++++- .../optical/detail/OpticalLaunchAction.hh | 5 +- src/celeritas/track/CoreStateCounters.hh | 12 ++- .../track/ExtendFromPrimariesAction.cc | 26 +++++- .../track/ExtendFromPrimariesAction.cu | 29 ++++++- .../track/ExtendFromPrimariesAction.hh | 4 +- .../track/ExtendFromSecondariesAction.cc | 16 ++-- .../track/ExtendFromSecondariesAction.cu | 57 ++++++++++++- .../track/ExtendFromSecondariesAction.hh | 4 +- src/celeritas/track/InitializeTracksAction.cc | 57 +++++++++++-- src/celeritas/track/InitializeTracksAction.cu | 6 +- src/celeritas/track/InitializeTracksAction.hh | 4 +- src/celeritas/track/TrackInitData.hh | 19 ++++- .../track/detail/InitTracksExecutor.hh | 12 ++- .../track/detail/TrackInitAlgorithms.cc | 20 +++-- .../track/detail/TrackInitAlgorithms.cu | 47 +++++----- .../track/detail/TrackInitAlgorithms.hh | 15 ++-- src/corecel/data/Filler.cu | 3 + test/celeritas/track/TrackInit.test.cc | 45 ++++++---- 22 files changed, 502 insertions(+), 136 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index f83378725b..131da7037d 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -6,6 +6,7 @@ //---------------------------------------------------------------------------// #include "CoreState.hh" +#include "corecel/data/ObserverPtr.device.hh" #include "corecel/io/Logger.hh" #include "corecel/sys/ActionRegistry.hh" #include "corecel/sys/ScopedProfiling.hh" @@ -52,16 +53,19 @@ CoreState::CoreState(CoreParams const& params, states_ = CollectionStateStore( params.host_ref(), stream_id, num_track_slots); - counters_.num_vacancies = num_track_slots; - if constexpr (M == MemSpace::device) { + auto counters = CoreStateCounters{}; + counters.num_vacancies = num_track_slots; + this->sync_put_counters(counters); device_ref_vec_ = DeviceVector(1); device_ref_vec_.copy_to_device({&this->ref(), 1}); ptr_ = make_observer(device_ref_vec_); } else if constexpr (M == MemSpace::host) { + auto& counters = this->counters(); + counters.num_vacancies = num_track_slots; ptr_ = make_observer(&this->ref()); } @@ -114,7 +118,18 @@ CoreState::~CoreState() template void CoreState::warming_up(bool new_state) { - CELER_EXPECT(!new_state || counters_.num_active == 0); + size_type num_active; + if constexpr (M == MemSpace::host) + { + auto& counters = this->counters(); + num_active = counters.num_active; + } + else if constexpr (M == MemSpace::device) + { + auto counters = this->sync_get_counters(); + num_active = counters.num_active; + } + CELER_EXPECT(!new_state || num_active == 0); warming_up_ = new_state; } @@ -133,19 +148,77 @@ Range CoreState::get_action_range(ActionId action_id) const return {thread_offsets[action_id], thread_offsets[action_id + 1]}; } +//---------------------------------------------------------------------------// +/*! + * Copy the core state counters from the device to the host. Since the entire + * sequence of actions in a step are performed on the device, this is typically + * done at the end of a step. + */ +template +CoreStateCounters const CoreState::sync_get_counters() const +{ + if constexpr (M == MemSpace::device) + { + auto counters = device_pointer_cast(this->ref().init.counters.data()); + return ItemCopier{stream_id()}(counters.get()); + } + else if constexpr (M == MemSpace::host) + { + return *(this->ref().init.counters.data().get()); + // CELER_ASSERT_UNREACHABLE(); + // return CoreStateCounters{}; + } +} + +//---------------------------------------------------------------------------// +/*! + * Copy the core state counters from the host to the device. This function is a + * placeholder function until the corresponding host code that updates the Core + * State counters can be moved to device functions. + */ +template +void CoreState::sync_put_counters(CoreStateCounters& host_counters) +{ + if constexpr (M == MemSpace::device) + { + auto counters = device_pointer_cast(this->ref().init.counters.data()); + copy_bytes(MemSpace::device, + counters.get(), + MemSpace::host, + &host_counters, + sizeof(CoreStateCounters), + stream_id()); + } + else if constexpr (M == MemSpace::host) + { + CELER_ASSERT_UNREACHABLE(); + } + return; +} + //---------------------------------------------------------------------------// /*! * Reset the state data. * * This clears the state counters and initializes the necessary state data so - * the state can be reused for a new event. This should only be necessary if + * the state can be reused for a new event. This should be necessary only if * the previous event aborted early. */ template void CoreState::reset() { - counters_ = CoreStateCounters{}; - counters_.num_vacancies = this->size(); + if constexpr (M == MemSpace::host) + { + auto& counters = this->counters(); + counters = CoreStateCounters{}; + counters.num_vacancies = this->size(); + } + else if constexpr (M == MemSpace::device) + { + auto counters = CoreStateCounters{}; + counters.num_vacancies = this->size(); + sync_put_counters(counters); + } // Reset all the track slots to inactive fill(TrackStatus::inactive, &this->ref().sim.status); diff --git a/src/celeritas/global/CoreState.hh b/src/celeritas/global/CoreState.hh index f0c1d5ee80..e0aa836ed3 100644 --- a/src/celeritas/global/CoreState.hh +++ b/src/celeritas/global/CoreState.hh @@ -49,8 +49,18 @@ class CoreStateInterface virtual size_type size() const = 0; //! Access track initialization counters + // Use when running all code on the host + // Use sync_get_counters() instead if accessing device data from the host virtual CoreStateCounters const& counters() const = 0; + //! Access track initialization counters + // Use when running all code on the host + // Use sync_get_counters() instead if accessing device data from the host + virtual CoreStateCounters& counters() = 0; + + //! Synchronize and copy track initialization counters from device to host + [[nodiscard]] virtual CoreStateCounters const sync_get_counters() const = 0; + //! Access auxiliary state data virtual AuxStateVec const& aux() const = 0; @@ -131,10 +141,18 @@ class CoreState final : public CoreStateInterface //// COUNTERS //// //! Track initialization counters - CoreStateCounters& counters() { return counters_; } + inline CoreStateCounters& counters() final; //! Track initialization counters - CoreStateCounters const& counters() const final { return counters_; } + inline CoreStateCounters const& counters() const final; + + //! Synchronize and copy track initialization counters from device to host + [[nodiscard]] CoreStateCounters const sync_get_counters() const final; + + //! Synchronize and copy track initialization counters from host to device + //! Remove this once all the counter maintenance is device-only or + //! host-only + void sync_put_counters(CoreStateCounters&); //// AUXILIARY DATA //// @@ -178,9 +196,6 @@ class CoreState final : public CoreStateInterface // Native pointer to ref data Ptr ptr_; - // Counters for track initialization and activity - CoreStateCounters counters_; - // User-added data associated with params SPAuxStateVec aux_state_; @@ -191,6 +206,34 @@ class CoreState final : public CoreStateInterface bool warming_up_{false}; }; +//---------------------------------------------------------------------------// +/*! + * Access counters -- works only when counters are stored on host. + * Otherwise, use sync_get_counters() to copy counters from device to host. + */ +template +CoreStateCounters& CoreState::counters() +{ + if constexpr (M == MemSpace::host) + return *(this->ref().init.counters.data().get()); + else + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} + +//---------------------------------------------------------------------------// +/*! + * Access counters -- works only when counters are not stored on device. + * Otherwise, use sync_get_counters() to return results to the host. + */ +template +CoreStateCounters const& CoreState::counters() const +{ + if constexpr (M == MemSpace::host) + return *(this->ref().init.counters.data().get()); + else + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} + //---------------------------------------------------------------------------// /*! * Convenience function to access auxiliary "collection group" data. diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index fc991afeb5..e45f4d3a52 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -107,14 +107,29 @@ Stepper::~Stepper() = default; template void Stepper::warm_up() { - CELER_VALIDATE(state_->counters().num_active == 0, - << "cannot warm up when state has active tracks"); - - ScopedProfiling profile_this{"warmup"}; - state_->warming_up(true); - ScopeExit on_exit_{[this] { state_->warming_up(false); }}; - actions_->step(*params_, *state_); - CELER_ENSURE(state_->counters().num_active == 0); + if constexpr (M == MemSpace::host) + { + CELER_VALIDATE(state_->counters().num_active == 0, + << "cannot warm up when state has active tracks"); + + ScopedProfiling profile_this{"warmup"}; + state_->warming_up(true); + ScopeExit on_exit_{[this] { state_->warming_up(false); }}; + actions_->step(*params_, *state_); + CELER_ENSURE(state_->counters().num_active == 0); + } + else if constexpr (M == MemSpace::device) + { + auto counters = state_->sync_get_counters(); + CELER_VALIDATE(counters.num_active == 0, + << "cannot warm up when state has active tracks"); + + ScopedProfiling profile_this{"warmup"}; + state_->warming_up(true); + ScopeExit on_exit_{[this] { state_->warming_up(false); }}; + actions_->step(*params_, *state_); + CELER_ENSURE(counters.num_active == 0); + } } //---------------------------------------------------------------------------// @@ -128,17 +143,31 @@ template auto Stepper::operator()() -> result_type { ScopedProfiling profile_this{"step"}; - auto& counters = state_->counters(); - counters.num_generated = 0; - actions_->step(*params_, *state_); - // Get the number of track initializers and active tracks result_type result; - result.generated = counters.num_generated; - result.active = counters.num_active; - result.alive = counters.num_alive; - result.queued = counters.num_initializers; + if constexpr (M == MemSpace::host) + { + auto& counters = state_->counters(); + counters.num_generated = 0; + actions_->step(*params_, *state_); + result.generated = counters.num_generated; + result.active = counters.num_active; + result.alive = counters.num_alive; + result.queued = counters.num_initializers; + } + else if constexpr (M == MemSpace::device) + { + auto counters = state_->sync_get_counters(); + counters.num_generated = 0; + state_->sync_put_counters(counters); + actions_->step(*params_, *state_); + counters = state_->sync_get_counters(); + result.generated = counters.num_generated; + result.active = counters.num_active; + result.alive = counters.num_alive; + result.queued = counters.num_initializers; + } return result; } @@ -164,7 +193,16 @@ auto Stepper::operator()(SpanConstPrimary primaries) -> result_type << "event number " << max_id->event_id.unchecked_get() << " exceeds max_events=" << params_->init()->max_events()); - state_->counters().num_pending = primaries.size(); + if constexpr (M == MemSpace::host) + { + state_->counters().num_pending = primaries.size(); + } + else if constexpr (M == MemSpace::device) + { + auto counters = state_->sync_get_counters(); + counters.num_pending = primaries.size(); + state_->sync_put_counters(counters); + } primaries_action_->insert(*params_, *state_, primaries); return (*this)(); @@ -180,8 +218,17 @@ auto Stepper::operator()(SpanConstPrimary primaries) -> result_type template void Stepper::kill_active() { - CELER_LOG_LOCAL(error) << "Killing " << state_->counters().num_active - << " active tracks"; + if constexpr (M == MemSpace::host) + { + CELER_LOG_LOCAL(error) + << "Killing " << state_->counters().num_active << " active tracks"; + } + else if constexpr (M == MemSpace::device) + { + auto counters = state_->sync_get_counters(); + CELER_LOG_LOCAL(error) + << "Killing " << counters.num_active << " active tracks"; + } detail::kill_active(*params_, *state_); } diff --git a/src/celeritas/optical/detail/OpticalLaunchAction.cc b/src/celeritas/optical/detail/OpticalLaunchAction.cc index c080a805f8..4aae208c9f 100644 --- a/src/celeritas/optical/detail/OpticalLaunchAction.cc +++ b/src/celeritas/optical/detail/OpticalLaunchAction.cc @@ -127,11 +127,11 @@ void OpticalLaunchAction::step(CoreParams const& params, /*! * Launch the optical tracking loop. */ -template void OpticalLaunchAction::execute_impl(CoreParams const&, - CoreState& core_state) const + CoreStateHost& core_state) const { - auto& state = get>(core_state.aux(), this->aux_id()); + auto& state = get>(core_state.aux(), + this->aux_id()); CELER_ASSERT(state.size() > 0); auto const& core_counters = core_state.counters(); @@ -150,6 +150,34 @@ void OpticalLaunchAction::execute_impl(CoreParams const&, (*transport_)(state); } +//---------------------------------------------------------------------------// +/*! + * Launch the optical tracking loop. + */ +void OpticalLaunchAction::execute_impl(CoreParams const&, + CoreStateDevice& core_state) const +{ + auto& state = get>(core_state.aux(), + this->aux_id()); + CELER_ASSERT(state.size() > 0); + + auto core_counters = core_state.sync_get_counters(); + auto counters = state.counters(); + + if ((counters.num_pending < data_.auto_flush + && (core_counters.num_alive > 0 || core_counters.num_initializers > 0)) + || counters.num_pending == 0) + { + // Don't launch the optical loop if the number of pending tracks is + // below the threshold and the core stepping loop hasn't completed yet + return; + } + + // Transport pending optical tracks + (*transport_)(state); + core_state.sync_put_counters(core_counters); +} + //---------------------------------------------------------------------------// /*! * Create the transporter and cache a pointer to the auxiliary data. diff --git a/src/celeritas/optical/detail/OpticalLaunchAction.hh b/src/celeritas/optical/detail/OpticalLaunchAction.hh index 18dc4d1f66..0d2ebffc0a 100644 --- a/src/celeritas/optical/detail/OpticalLaunchAction.hh +++ b/src/celeritas/optical/detail/OpticalLaunchAction.hh @@ -139,8 +139,9 @@ class OpticalLaunchAction : public AuxParamsInterface, //// HELPERS //// - template - void execute_impl(CoreParams const&, CoreState&) const; + void execute_impl(CoreParams const&, CoreStateHost&) const; + void execute_impl(CoreParams const&, CoreStateDevice&) const; + template void begin_run_impl(CoreState&); }; diff --git a/src/celeritas/track/CoreStateCounters.hh b/src/celeritas/track/CoreStateCounters.hh index 53c716435e..144a8af65a 100644 --- a/src/celeritas/track/CoreStateCounters.hh +++ b/src/celeritas/track/CoreStateCounters.hh @@ -14,9 +14,10 @@ namespace celeritas /*! * Counters for within-step track initialization and activity. * - * These counters are updated *by value on the host at every step* so they - * should not be stored in TrackInitStateData because then the device-memory - * copy will not be synchronized. + * When running device code, these counters are now updated on the device + * throughout the step, so they are stored in TrackInitStateData. They need to + * be synchronized between the host and device before and after the step to + * maintain consistency. * * For all user \c StepActionOrder (TODO: this may change if we add a * "user_end"), all but the secondaries/alive @@ -49,6 +50,11 @@ struct CoreStateCounters size_type num_secondaries{0}; //!< Number of secondaries produced size_type num_alive{0}; //!< Number of alive tracks at end //!@} + + //!@{ + //! \name Set by CUDA CUB when partitioning the tracks, unused by celeritas + size_type num_neutral{0}; //!< Number of neutral tracks + //!@} }; //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index 027a71a60f..eaebbe8f28 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -103,7 +103,19 @@ void ExtendFromPrimariesAction::insert(CoreParams const& params, CoreStateInterface& state, Span host_primaries) const { - size_type num_initializers = state.counters().num_initializers; + size_type num_initializers; + if (auto* s = dynamic_cast*>(&state)) + { + num_initializers = state.counters().num_initializers; + } + else if (auto* s = dynamic_cast*>(&state)) + { + num_initializers = s->sync_get_counters().num_initializers; + } + else + { + CELER_ASSERT_UNREACHABLE(); + } size_type init_capacity = params.init()->capacity(); CELER_VALIDATE(host_primaries.size() + num_initializers <= init_capacity, @@ -179,11 +191,11 @@ void ExtendFromPrimariesAction::insert_impl( /*! * Construct primaries. */ -template void ExtendFromPrimariesAction::step_impl(CoreParams const& params, - CoreState& state) const + CoreStateHost& state) const { - auto& primaries = get>(state.aux(), aux_id_); + auto& primaries + = get>(state.aux(), aux_id_); // Create track initializers from primaries state.counters().num_initializers += primaries.count; @@ -221,6 +233,12 @@ void ExtendFromPrimariesAction::process_primaries( { CELER_NOT_CONFIGURED("CUDA OR HIP"); } + +void ExtendFromPrimariesAction::step_impl(CoreParams const&, + CoreStateDevice&) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} #endif //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index 4d37f4be24..889ef877d8 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -24,11 +24,9 @@ void ExtendFromPrimariesAction::process_primaries( PrimaryStateData const& pstate) const { auto primaries = pstate.primaries(); + auto counters = state.sync_get_counters(); detail::ProcessPrimariesExecutor execute_thread{ - params.ptr(), - state.ptr(), - state.counters(), - primaries}; + params.ptr(), state.ptr(), counters, primaries}; static ActionLauncher const launch_kernel(*this); if (!primaries.empty()) { @@ -36,5 +34,28 @@ void ExtendFromPrimariesAction::process_primaries( } } +//---------------------------------------------------------------------------// +/*! + * Construct primaries. + */ +void ExtendFromPrimariesAction::step_impl(CoreParams const& params, + CoreStateDevice& state) const +{ + auto& primaries + = get>(state.aux(), aux_id_); + auto counters = state.sync_get_counters(); + + // Create track initializers from primaries + counters.num_initializers += primaries.count; + state.sync_put_counters(counters); + this->process_primaries(params, state, primaries); + + // Mark that the primaries have been processed + counters.num_generated += primaries.count; + counters.num_pending = 0; + primaries.count = 0; + state.sync_put_counters(counters); +} + //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/src/celeritas/track/ExtendFromPrimariesAction.hh b/src/celeritas/track/ExtendFromPrimariesAction.hh index 80d1fcb3ad..d347eecf56 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.hh +++ b/src/celeritas/track/ExtendFromPrimariesAction.hh @@ -90,8 +90,8 @@ class ExtendFromPrimariesAction final : public CoreStepActionInterface, void insert_impl(CoreState& state, Span host_primaries) const; - template - void step_impl(CoreParams const&, CoreState&) const; + void step_impl(CoreParams const&, CoreStateHost&) const; + void step_impl(CoreParams const&, CoreStateDevice&) const; void process_primaries(CoreParams const&, CoreStateHost&, diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cc b/src/celeritas/track/ExtendFromSecondariesAction.cc index d8595f7378..8cb8f593fc 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cc +++ b/src/celeritas/track/ExtendFromSecondariesAction.cc @@ -51,12 +51,12 @@ void ExtendFromSecondariesAction::step(CoreParams const& params, /*! * Initialize track states. */ -template void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, - CoreState& core_state) const + CoreStateHost& core_state) const { - TrackInitStateData& init = core_state.ref().init; - CoreStateCounters& counters = core_state.counters(); + TrackInitStateData& init + = core_state.ref().init; + auto& counters = core_state.counters(); // Launch a kernel to identify which track slots are still alive and count // the number of surviving secondaries per track @@ -65,7 +65,7 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, // Remove all elements in the vacancy vector that were flagged as active // tracks, leaving the (sorted) indices of the empty slots counters.num_vacancies - = detail::remove_if_alive(init.vacancies, core_state.stream_id()); + = detail::remove_if_alive(init, core_state.stream_id()); // The exclusive prefix sum of the number of secondaries produced by each // track is used to get the start index in the vector of track initializers @@ -137,6 +137,12 @@ void ExtendFromSecondariesAction::begin_run(CoreParams const&, CoreStateDevice&) CELER_NOT_CONFIGURED("CUDA OR HIP"); } +void ExtendFromSecondariesAction::step_impl(CoreParams const&, + CoreStateDevice&) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} + void ExtendFromSecondariesAction::locate_alive(CoreParams const&, CoreStateDevice&) const { diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cu b/src/celeritas/track/ExtendFromSecondariesAction.cu index 4ee5dacafa..22bb2afbbe 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cu +++ b/src/celeritas/track/ExtendFromSecondariesAction.cu @@ -16,6 +16,7 @@ #include "detail/LocateAliveExecutor.hh" #include "detail/ProcessSecondariesExecutor.hh" +#include "detail/TrackInitAlgorithms.hh" namespace celeritas { @@ -35,6 +36,60 @@ void ExtendFromSecondariesAction::begin_run(CoreParams const&, s.free_async(p); } +//---------------------------------------------------------------------------// +/*! + * Initialize track states. + */ +void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, + CoreStateDevice& core_state) const +{ + TrackInitStateData& init + = core_state.ref().init; + auto counters = core_state.sync_get_counters(); + + // Launch a kernel to identify which track slots are still alive and count + // the number of surviving secondaries per track + this->locate_alive(core_params, core_state); + + // Remove all elements in the vacancy vector that were flagged as active + // tracks, leaving the (sorted) indices of the empty slots + counters.num_vacancies + = detail::remove_if_alive(init, core_state.stream_id()); + + // The exclusive prefix sum of the number of secondaries produced by each + // track is used to get the start index in the vector of track initializers + // for each thread. Starting at that index, each thread creates track + // initializers from all surviving secondaries produced in its + // interaction. + counters.num_secondaries = detail::exclusive_scan_counts( + init.secondary_counts, core_state.stream_id()); + + /*! \todo If we don't have space for all the secondaries, we will need to + * buffer the current track initializers to create room. + * + * This isn't trivial because we will need to: + * - Allocate a new buffer (probably do something like 2x, rounding up to + * nearest power of 2)? + * - Update the collection references for track sim + * - Update the *copies* of that reference (?) like in track state + * - Copy to device to update the on-device references (state.ptr) + */ + counters.num_initializers += counters.num_secondaries; + CELER_VALIDATE( + counters.num_initializers <= init.initializers.size(), + << "insufficient capacity (" << init.initializers.size() + << ") for track initializers (created " << counters.num_secondaries + << " new secondaries for a total capacity requirement of " + << counters.num_initializers + << "): increase initializer capacity or decrease track slots"); + + // Launch a kernel to create track initializers from secondaries + counters.num_alive = core_state.size() - counters.num_vacancies; + core_state.sync_put_counters(counters); + + this->process_secondaries(core_params, core_state); +} + //---------------------------------------------------------------------------// /*! * Launch a kernel to locate alive particles. @@ -64,7 +119,7 @@ void ExtendFromSecondariesAction::process_secondaries( launch(core_state, Executor{core_params.ptr(), core_state.ptr(), - core_state.counters()}); + core_state.sync_get_counters()}); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromSecondariesAction.hh b/src/celeritas/track/ExtendFromSecondariesAction.hh index c1abdce5fd..3ecbcca10d 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.hh +++ b/src/celeritas/track/ExtendFromSecondariesAction.hh @@ -108,8 +108,8 @@ class ExtendFromSecondariesAction final : public CoreStepActionInterface, private: ActionId id_; - template - void step_impl(CoreParams const&, CoreState&) const; + void step_impl(CoreParams const&, CoreStateHost&) const; + void step_impl(CoreParams const&, CoreStateDevice&) const; void locate_alive(CoreParams const&, CoreStateHost&) const; void locate_alive(CoreParams const&, CoreStateDevice&) const; diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index 93c5516c33..9c758c0724 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -51,12 +51,11 @@ void InitializeTracksAction::step(CoreParams const& params, * If there are more empty slots than new secondaries, they will be filled by * any track initializers remaining from previous steps using the position. */ -template void InitializeTracksAction::step_impl(CoreParams const& core_params, - CoreState& core_state) const + CoreStateHost& core_state) const { auto& counters = core_state.counters(); - + auto init = core_state.ref().init; // The number of new tracks to initialize is the smaller of the number of // empty slots in the track vector and the number of track initializers size_type num_new_tracks @@ -72,7 +71,6 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, // Partition indices by whether tracks are charged or neutral detail::partition_initializers(core_params, core_state.ref().init, - counters, num_new_tracks, core_state.stream_id()); } @@ -89,6 +87,51 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, counters.num_active = core_state.size() - counters.num_vacancies; } +//---------------------------------------------------------------------------// +/*! + * Initialize track states. + * + * Tracks created from secondaries produced in this step will have the geometry + * state copied over from the parent instead of initialized from the position. + * If there are more empty slots than new secondaries, they will be filled by + * any track initializers remaining from previous steps using the position. + */ +void InitializeTracksAction::step_impl(CoreParams const& core_params, + CoreStateDevice& core_state) const +{ + auto counters = core_state.sync_get_counters(); + auto init = core_state.ref().init; + // The number of new tracks to initialize is the smaller of the number of + // empty slots in the track vector and the number of track initializers + size_type num_new_tracks + = std::min(counters.num_vacancies, counters.num_initializers); + if (num_new_tracks > 0) + { + if (core_params.init()->track_order() == TrackOrder::init_charge) + { + // Reset track initializer indices + fill_sequence(&core_state.ref().init.indices, + core_state.stream_id()); + + // Partition indices by whether tracks are charged or neutral + detail::partition_initializers(core_params, + core_state.ref().init, + num_new_tracks, + core_state.stream_id()); + } + // Launch a kernel to initialize tracks + this->step_impl(core_params, core_state, num_new_tracks); + + // Update initializers/vacancies + counters.num_initializers -= num_new_tracks; + counters.num_vacancies -= num_new_tracks; + } + + // Store number of active tracks at the start of the loop + counters.num_active = core_state.size() - counters.num_vacancies; + core_state.sync_put_counters(counters); +} + //---------------------------------------------------------------------------// /*! * Launch a (host) kernel to initialize tracks. @@ -100,10 +143,8 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreStateHost& core_state, size_type num_new_tracks) const { - detail::InitTracksExecutor execute{core_params.ptr(), - core_state.ptr(), - num_new_tracks, - core_state.counters()}; + detail::InitTracksExecutor execute{ + core_params.ptr(), core_state.ptr(), num_new_tracks}; return launch_action( *this, num_new_tracks, core_params, core_state, execute); } diff --git a/src/celeritas/track/InitializeTracksAction.cu b/src/celeritas/track/InitializeTracksAction.cu index 980a9f0585..da65606798 100644 --- a/src/celeritas/track/InitializeTracksAction.cu +++ b/src/celeritas/track/InitializeTracksAction.cu @@ -22,10 +22,8 @@ void InitializeTracksAction::step_impl(CoreParams const& params, CoreStateDevice& state, size_type num_new_tracks) const { - detail::InitTracksExecutor execute_thread{params.ptr(), - state.ptr(), - num_new_tracks, - state.counters()}; + detail::InitTracksExecutor execute_thread{ + params.ptr(), state.ptr(), num_new_tracks}; static ActionLauncher const launch_kernel(*this); launch_kernel(num_new_tracks, state.stream_id(), execute_thread); } diff --git a/src/celeritas/track/InitializeTracksAction.hh b/src/celeritas/track/InitializeTracksAction.hh index b7b3ccb8b3..f0b88f578d 100644 --- a/src/celeritas/track/InitializeTracksAction.hh +++ b/src/celeritas/track/InitializeTracksAction.hh @@ -50,8 +50,8 @@ class InitializeTracksAction final : public CoreStepActionInterface private: ActionId id_; - template - void step_impl(CoreParams const&, CoreState&) const; + void step_impl(CoreParams const&, CoreStateHost&) const; + void step_impl(CoreParams const&, CoreStateDevice&) const; void step_impl(CoreParams const&, CoreStateHost&, size_type) const; void step_impl(CoreParams const&, CoreStateDevice&, size_type) const; diff --git a/src/celeritas/track/TrackInitData.hh b/src/celeritas/track/TrackInitData.hh index a06f19703a..a304cb95b7 100644 --- a/src/celeritas/track/TrackInitData.hh +++ b/src/celeritas/track/TrackInitData.hh @@ -11,6 +11,7 @@ #include "corecel/data/Collection.hh" #include "corecel/data/CollectionAlgorithms.hh" #include "corecel/data/CollectionBuilder.hh" +#include "corecel/data/PinnedAllocator.hh" #include "corecel/sys/Device.hh" #include "corecel/sys/ThreadId.hh" #include "geocel/Types.hh" @@ -18,6 +19,7 @@ #include "celeritas/phys/ParticleData.hh" #include "celeritas/phys/Primary.hh" +#include "CoreStateCounters.hh" #include "SimData.hh" namespace celeritas @@ -92,7 +94,9 @@ struct TrackInitializer * created per event. * - \c secondary_counts stores the number of secondaries created by each track * (with one remainder at the end for storing the accumulated number of - * secondaries) + * secondaries). + * - \c counters stores the number of tracks with a given status and is updated + * during each step of the simulation of the event. */ template struct TrackInitStateData @@ -117,6 +121,11 @@ struct TrackInitStateData // CoreStateCounters) Items initializers; + // Maintain the counters here to allow GPU-resident computation with + // synchronization between host and device only at the end of a step or + // when explicitly requested, such as in the tests + Items counters; + //// METHODS //// //! Whether the data are assigned @@ -124,7 +133,8 @@ struct TrackInitStateData { return (indices.size() == vacancies.size() || indices.empty()) && secondary_counts.size() == vacancies.size() + 1 - && !track_counters.empty() && !initializers.empty(); + && !track_counters.empty() && !initializers.empty() + && !counters.empty(); } //! Assign from another set of data @@ -139,6 +149,7 @@ struct TrackInitStateData vacancies = other.vacancies; initializers = other.initializers; + counters = other.counters; return *this; } @@ -168,6 +179,7 @@ void resize(TrackInitStateData* data, // Allocate device data resize(&data->secondary_counts, size + 1); resize(&data->track_counters, params.max_events); + resize(&data->counters, 1); if (params.track_order == TrackOrder::init_charge) { resize(&data->indices, size); @@ -183,6 +195,9 @@ void resize(TrackInitStateData* data, // Reserve space for initializers resize(&data->initializers, params.capacity); + // Initialize the counters for the step to zero + fill(CoreStateCounters{}, &data->counters); + CELER_ENSURE(*data); } diff --git a/src/celeritas/track/detail/InitTracksExecutor.hh b/src/celeritas/track/detail/InitTracksExecutor.hh index 9f4c6ab919..804bc5b305 100644 --- a/src/celeritas/track/detail/InitTracksExecutor.hh +++ b/src/celeritas/track/detail/InitTracksExecutor.hh @@ -8,7 +8,6 @@ #include "corecel/Assert.hh" #include "corecel/Macros.hh" -#include "corecel/cont/Span.hh" #include "corecel/sys/ThreadId.hh" #include "celeritas/Types.hh" #include "celeritas/geo/GeoMaterialView.hh" @@ -47,7 +46,6 @@ struct InitTracksExecutor ParamsPtr params; StatePtr state; size_type num_init{}; - CoreStateCounters counters; //// FUNCTIONS //// @@ -68,7 +66,7 @@ CELER_FUNCTION void InitTracksExecutor::operator()(ThreadId tid) const CELER_EXPECT(tid < num_init); auto const& data = state->init; - + auto counters = state->init.counters.data().get(); // Get the track initializer from the back of the vector. Since new // initializers are pushed to the back of the vector, these will be the // most recently added and therefore the ones that still might have a @@ -79,9 +77,9 @@ CELER_FUNCTION void InitTracksExecutor::operator()(ThreadId tid) const // Get the index into the track initializer or parent track slot ID // array from the sorted indices return data.indices[TrackSlotId(index_before(num_init, tid))] - + counters.num_initializers - num_init; + + counters->num_initializers - num_init; } - return index_before(counters.num_initializers, tid); + return index_before(counters->num_initializers, tid); }())]; // View to the new track to be initialized @@ -95,11 +93,11 @@ CELER_FUNCTION void InitTracksExecutor::operator()(ThreadId tid) const } // Get the vacancy from the back of the track state return data.vacancies[TrackSlotId( - index_before(counters.num_vacancies, tid))]; + index_before(counters->num_vacancies, tid))]; }()}; // Clear parent IDs if new primaries were added this step - if (counters.num_generated) + if (counters->num_generated) { init.geo.parent = {}; } diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cc b/src/celeritas/track/detail/TrackInitAlgorithms.cc index 65d72854b8..e90ffd5741 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cc +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cc @@ -25,12 +25,14 @@ namespace detail * \return New size of the vacancy vector */ size_type remove_if_alive( - StateCollection const& - vacancies, + TrackInitStateData const& init, StreamId) { - auto* start = vacancies.data().get(); - auto* stop = std::remove_if(start, start + vacancies.size(), LogicalNot{}); + auto* start = init.vacancies.data().get(); + auto* counters = init.counters.data().get(); + auto* stop + = std::remove_if(start, start + init.vacancies.size(), LogicalNot{}); + counters->num_vacancies = stop - start; return stop - start; } @@ -80,15 +82,15 @@ size_type exclusive_scan_counts( void partition_initializers( CoreParams const& params, TrackInitStateData const& init, - CoreStateCounters const& counters, size_type count, StreamId) { // Partition the indices based on the track initializer charge - auto start = init.indices.data().get(); - auto end = start + count; - auto stencil = init.initializers.data().get() + counters.num_initializers - - count; + auto* start = init.indices.data().get(); + auto* counters = init.counters.data().get(); + auto* end = start + count; + auto* stencil = init.initializers.data().get() + counters->num_initializers + - count; std::stable_partition( start, end, IsNeutralStencil{params.ptr(), stencil}); } diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cu b/src/celeritas/track/detail/TrackInitAlgorithms.cu index 0c55ce11c4..65353ada5d 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cu +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cu @@ -72,16 +72,15 @@ struct NotNull * tracks. */ size_type remove_if_alive( - StateCollection const& - vacancies, + TrackInitStateData const& init, StreamId stream_id) { ScopedProfiling profile_this{"remove-if-alive"}; #if CELER_USE_THRUST - auto start = device_pointer_cast(vacancies.data()); + auto start = device_pointer_cast(init.vacancies.data()); auto end = thrust::remove_if(thrust_execute_on(stream_id), start, - start + vacancies.size(), + start + init.vacancies.size(), LogicalNot{}); CELER_DEVICE_API_CALL(PeekAtLastError()); @@ -89,17 +88,17 @@ size_type remove_if_alive( return end - start; #else auto& stream = device().stream(stream_id); - DeviceVector num_not_active{1, stream_id}; // Calling with nullptr causes the function to return the amount of working // space needed instead of invoking the kernel. size_t temp_storage_bytes = 0; - auto data = device_pointer_cast(vacancies.data()); + auto data = device_pointer_cast(init.vacancies.data()); + auto counters = device_pointer_cast(init.counters.data()); // HIP defines hipCUB functions as [[nodiscard]], but we defer error checks auto cub_error_code = cub::DeviceSelect::If(nullptr, temp_storage_bytes, data, - num_not_active.data(), - vacancies.size(), + &(counters->num_vacancies), + init.vacancies.size(), NotNull{}, stream.get()); CELER_DISCARD(cub_error_code); @@ -109,14 +108,14 @@ size_type remove_if_alive( cub_error_code = cub::DeviceSelect::If(temp_storage.data(), temp_storage_bytes, data, - num_not_active.data(), - vacancies.size(), + &(counters->num_vacancies), + init.vacancies.size(), NotNull{}, stream.get()); CELER_DISCARD(cub_error_code); CELER_DEVICE_API_CALL(PeekAtLastError()); - auto result = ItemCopier{stream_id}(num_not_active.data()); + auto result = ItemCopier{stream_id}(&(counters->num_vacancies)); stream.sync(); return result; @@ -196,7 +195,6 @@ size_type exclusive_scan_counts( void partition_initializers( CoreParams const& params, TrackInitStateData const& init, - CoreStateCounters const& counters, size_type count, StreamId stream_id) { @@ -208,7 +206,7 @@ void partition_initializers( auto start = device_pointer_cast(init.indices.data()); auto end = start + count; auto stencil = static_cast(init.initializers.data()) - + counters.num_initializers - count; + + init.counters.num_initializers - count; thrust::stable_partition( thrust_execute_on(stream_id), start, @@ -226,8 +224,10 @@ void partition_initializers( // // The initializers array is large. Use stencil to point to the start where // this array is being used + auto counters = device_pointer_cast(init.counters.data()); + auto cpucntrs = ItemCopier{stream_id}(counters.get()); auto stencil = static_cast(init.initializers.data()) - + counters.num_initializers - count; + + cpucntrs.num_initializers - count; DeviceVector flags{count, stream_id}; # if CELER_CUB_HAS_TRANSFORM || CELER_HIPCUB_HAS_TRANSFORM // HIP defines hipCUB functions as [[nodiscard]], but we defer error checks @@ -256,14 +256,15 @@ void partition_initializers( auto data = device_pointer_cast(init.indices.data()); // Allocate storage for the number of neutral tracks (unused by celeritas) DeviceVector num_neutral{1, stream_id}; - auto cub_error_code = cub::DevicePartition::Flagged(nullptr, - temp_storage_bytes, - start, - flags.data(), - data, - num_neutral.data(), - count, - stream.get()); + auto cub_error_code + = cub::DevicePartition::Flagged(nullptr, + temp_storage_bytes, + start, + flags.data(), + data, + &(counters->num_neutral), + count, + stream.get()); CELER_DISCARD(cub_error_code); // Allocate temporary storage DeviceVector temp_storage(temp_storage_bytes, stream_id); @@ -273,7 +274,7 @@ void partition_initializers( start, flags.data(), data, - num_neutral.data(), + &(counters->num_neutral), count, stream.get()); CELER_DISCARD(cub_error_code); diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.hh b/src/celeritas/track/detail/TrackInitAlgorithms.hh index bdea2b3253..933b4ae8a0 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.hh +++ b/src/celeritas/track/detail/TrackInitAlgorithms.hh @@ -39,11 +39,11 @@ struct IsNeutralStencil //---------------------------------------------------------------------------// // Remove all elements in the vacancy vector that were flagged as alive +size_type +remove_if_alive(TrackInitStateData const&, + StreamId); size_type remove_if_alive( - StateCollection const&, - StreamId); -size_type remove_if_alive( - StateCollection const&, + TrackInitStateData const&, StreamId); //---------------------------------------------------------------------------// @@ -60,23 +60,20 @@ size_type exclusive_scan_counts( void partition_initializers( CoreParams const&, TrackInitStateData const&, - CoreStateCounters const&, size_type, StreamId); void partition_initializers( CoreParams const&, TrackInitStateData const&, - CoreStateCounters const&, size_type, StreamId); //---------------------------------------------------------------------------// -// INLINE DEFINITIONS +// DEVICE-DISABLED IMPLEMENTATION //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE inline size_type remove_if_alive( - StateCollection const&, - StreamId) + TrackInitStateData const&, StreamId) { CELER_NOT_CONFIGURED("CUDA or HIP"); } diff --git a/src/corecel/data/Filler.cu b/src/corecel/data/Filler.cu index 37e419278b..4b4eeecb27 100644 --- a/src/corecel/data/Filler.cu +++ b/src/corecel/data/Filler.cu @@ -4,6 +4,8 @@ //---------------------------------------------------------------------------// //! \file corecel/data/Filler.cu //---------------------------------------------------------------------------// +#include "celeritas/track/CoreStateCounters.hh" + #include "Filler.device.t.hh" namespace celeritas @@ -13,5 +15,6 @@ template class Filler; template class Filler; template class Filler; template class Filler; +template class Filler; //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index 29aab0b16f..994a1679cb 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -61,16 +61,27 @@ RunResult RunResult::from_state(CoreState& state) // Copy track initializer data to host HostVal data; data = state.ref().init; + size_type num_vacancies, num_initializers; + + if constexpr (M == MemSpace::host) + { + num_vacancies = state.counters().num_vacancies; + num_initializers = state.counters().num_initializers; + } + else if constexpr (M == MemSpace::device) + { + num_vacancies = state.sync_get_counters().num_vacancies; + num_initializers = state.sync_get_counters().num_initializers; + } // Store the IDs of the vacant track slots - for (auto tid : range(TrackSlotId{state.counters().num_vacancies})) + for (auto tid : range(TrackSlotId{num_vacancies})) { result.vacancies.push_back(id_to_int(data.vacancies[tid])); } // Store the track IDs of the initializers - for (auto init_id : - range(ItemId{state.counters().num_initializers})) + for (auto init_id : range(ItemId{num_initializers})) { auto const& init = data.initializers[init_id]; result.init_ids.push_back(id_to_int(init.sim.track_id)); @@ -226,15 +237,15 @@ TYPED_TEST_SUITE(TrackInitTest, MemspaceTypes, MemspaceTypeString); TYPED_TEST(TrackInitTest, add_more_primaries) { this->build_states(16); - EXPECT_EQ(0, this->state().counters().num_initializers); + EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); auto primaries = this->make_primaries(22); this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(22, this->state().counters().num_initializers); + EXPECT_EQ(22, this->state().sync_get_counters().num_initializers); primaries = this->make_primaries(32); this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(54, this->state().counters().num_initializers); + EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); } //! Test that we can add more primaries than the first allocation @@ -248,7 +259,7 @@ TYPED_TEST(TrackInitTest, extend_primaries) this->insert_primaries(this->state(), make_span(primaries)); RunResult::from_state(this->state()); - EXPECT_EQ(0, this->state().counters().num_initializers); + EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); } { // Now initialize after adding @@ -415,8 +426,9 @@ TYPED_TEST(TrackInitTest, primaries) // Find vacancies and create track initializers from secondaries extend_from_secondaries.step(*this->core(), this->state()); EXPECT_EQ(i * num_tracks / 2, - this->state().counters().num_initializers); - EXPECT_EQ(num_tracks / 2, this->state().counters().num_vacancies); + this->state().sync_get_counters().num_initializers); + EXPECT_EQ(num_tracks / 2, + this->state().sync_get_counters().num_vacancies); } // Check the results @@ -474,7 +486,8 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) // Create track initializers on device from primary particles auto primaries = this->make_primaries(num_primaries); this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(num_primaries, this->state().counters().num_initializers); + EXPECT_EQ(num_primaries, + this->state().sync_get_counters().num_initializers); auto apply_actions = [&actions, this] { for (auto const& ea_interface : actions) @@ -497,7 +510,7 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) static int const expected_geo_parent_ids[] = {0, 2}; EXPECT_VEC_EQ(expected_geo_parent_ids, result.geo_parent_ids); - // init ids may not be deterministic, but can guarantee they are in the + // init IDs may not be deterministic, but can guarantee they are in the // range 8<=x<=12 as we create 4 tracks per iteration, 2 in reused // slots from their parent, 2 as new inits EXPECT_EQ(2, result.init_ids.size()); @@ -506,7 +519,7 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) std::end(result.init_ids), [i](int id) { return id >= 8 + i * 4 && id <= 11 + i * 4; })); - // Track ids may not be deterministic, so only validate size and + // Track IDs may not be deterministic, so only validate size and // range. (Remember that we create 4 new tracks per iteration, with 2 // slots reused EXPECT_EQ(num_tracks, result.track_ids.size()); @@ -518,9 +531,9 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) + (i + 1) * 4; })); - // Parent ids may not be deterministic, but all non-killed tracks are - // guaranteed to be primaries at every iteration. At end of first - // iteration, will still have some primary ids as these are not cleared + // Parent IDs may not be deterministic, but all non-killed tracks are + // guaranteed to be primaries at every iteration. At end of the first + // iteration, will still have some primary IDs as these are not cleared // until the next iteration for (size_type pidx : range(num_tracks)) { @@ -529,7 +542,7 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) << "iteration " << i; } } -} // namespace test +} //---------------------------------------------------------------------------// } // namespace test From 74b3cde70b446890805f71900d354fd8900e2725 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 3 Jan 2026 20:06:10 -0500 Subject: [PATCH 02/74] Don't include thrust header for host code --- src/celeritas/global/CoreState.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 131da7037d..790107f2b5 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -6,7 +6,9 @@ //---------------------------------------------------------------------------// #include "CoreState.hh" +#if CELER_USE_DEVICE #include "corecel/data/ObserverPtr.device.hh" +#endif #include "corecel/io/Logger.hh" #include "corecel/sys/ActionRegistry.hh" #include "corecel/sys/ScopedProfiling.hh" From 6a8f12c6e6b25f7a06f1e15d1c61d6f8e91a4244 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 3 Jan 2026 20:14:39 -0500 Subject: [PATCH 03/74] Formatting --- src/celeritas/global/CoreState.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 790107f2b5..d16b1f4c05 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -7,7 +7,7 @@ #include "CoreState.hh" #if CELER_USE_DEVICE -#include "corecel/data/ObserverPtr.device.hh" +# include "corecel/data/ObserverPtr.device.hh" #endif #include "corecel/io/Logger.hh" #include "corecel/sys/ActionRegistry.hh" From 6ab34791782d736180644259af5e594b8f121e2e Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 3 Jan 2026 20:26:07 -0500 Subject: [PATCH 04/74] Check for device usage --- src/celeritas/global/CoreState.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index d16b1f4c05..abf0a1b2cf 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -6,6 +6,7 @@ //---------------------------------------------------------------------------// #include "CoreState.hh" +#include "corecel/Macros.hh" #if CELER_USE_DEVICE # include "corecel/data/ObserverPtr.device.hh" #endif From 56518427348caaf6b939495301f8495a750bbfb4 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 13:54:09 -0500 Subject: [PATCH 05/74] Different code paths for host and device tests --- test/celeritas/track/TrackInit.test.cc | 63 +++++++++++++++++++------- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index 994a1679cb..d1183ab31d 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -236,16 +236,32 @@ TYPED_TEST_SUITE(TrackInitTest, MemspaceTypes, MemspaceTypeString); //! Test that we can add more primaries than the first allocation TYPED_TEST(TrackInitTest, add_more_primaries) { - this->build_states(16); - EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); + if (TestFixture::M == MemSpace::device) + { + this->build_states(16); + EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); - auto primaries = this->make_primaries(22); - this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(22, this->state().sync_get_counters().num_initializers); + auto primaries = this->make_primaries(22); + this->extend_from_primaries(make_span(primaries)); + EXPECT_EQ(22, this->state().sync_get_counters().num_initializers); - primaries = this->make_primaries(32); - this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); + primaries = this->make_primaries(32); + this->extend_from_primaries(make_span(primaries)); + EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); + } + else + { + this->build_states(16); + EXPECT_EQ(0, this->state().counters().num_initializers); + + auto primaries = this->make_primaries(22); + this->extend_from_primaries(make_span(primaries)); + EXPECT_EQ(22, this->state().counters().num_initializers); + + primaries = this->make_primaries(32); + this->extend_from_primaries(make_span(primaries)); + EXPECT_EQ(54, this->state().counters().num_initializers); + } } //! Test that we can add more primaries than the first allocation @@ -258,8 +274,10 @@ TYPED_TEST(TrackInitTest, extend_primaries) auto primaries = this->make_primaries(2); this->insert_primaries(this->state(), make_span(primaries)); RunResult::from_state(this->state()); - - EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); + if (TestFixture::M == MemSpace::device) + EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); + else + EXPECT_EQ(0, this->state().counters().num_initializers); } { // Now initialize after adding @@ -425,10 +443,19 @@ TYPED_TEST(TrackInitTest, primaries) // Find vacancies and create track initializers from secondaries extend_from_secondaries.step(*this->core(), this->state()); - EXPECT_EQ(i * num_tracks / 2, - this->state().sync_get_counters().num_initializers); - EXPECT_EQ(num_tracks / 2, - this->state().sync_get_counters().num_vacancies); + if (TestFixture::M == MemSpace::device) + { + EXPECT_EQ(i * num_tracks / 2, + this->state().sync_get_counters().num_initializers); + EXPECT_EQ(num_tracks / 2, + this->state().sync_get_counters().num_vacancies); + } + else + { + EXPECT_EQ(i * num_tracks / 2, + this->state().counters().num_initializers); + EXPECT_EQ(num_tracks / 2, this->state().counters().num_vacancies); + } } // Check the results @@ -486,9 +513,11 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) // Create track initializers on device from primary particles auto primaries = this->make_primaries(num_primaries); this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(num_primaries, - this->state().sync_get_counters().num_initializers); - + if (TestFixture::M == MemSpace::device) + EXPECT_EQ(num_primaries, + this->state().sync_get_counters().num_initializers); + else + EXPECT_EQ(num_primaries, this->state().counters().num_initializers); auto apply_actions = [&actions, this] { for (auto const& ea_interface : actions) { From 4272438e015012cd1d730cea866501d0d8e6e201 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 13:55:01 -0500 Subject: [PATCH 06/74] Resolve issue with host-only builds --- src/celeritas/global/CoreState.cc | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index abf0a1b2cf..0cd16c5936 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -7,7 +7,7 @@ #include "CoreState.hh" #include "corecel/Macros.hh" -#if CELER_USE_DEVICE +#ifdef CELER_USE_DEVICE # include "corecel/data/ObserverPtr.device.hh" #endif #include "corecel/io/Logger.hh" @@ -151,6 +151,7 @@ Range CoreState::get_action_range(ActionId action_id) const return {thread_offsets[action_id], thread_offsets[action_id + 1]}; } +#if CELER_USE_DEVICE //---------------------------------------------------------------------------// /*! * Copy the core state counters from the device to the host. Since the entire @@ -167,9 +168,9 @@ CoreStateCounters const CoreState::sync_get_counters() const } else if constexpr (M == MemSpace::host) { - return *(this->ref().init.counters.data().get()); - // CELER_ASSERT_UNREACHABLE(); - // return CoreStateCounters{}; + // return *(this->ref().init.counters.data().get()); + CELER_ASSERT_UNREACHABLE(); + return CoreStateCounters{}; } } @@ -198,6 +199,7 @@ void CoreState::sync_put_counters(CoreStateCounters& host_counters) } return; } +#endif //---------------------------------------------------------------------------// /*! @@ -230,10 +232,26 @@ void CoreState::reset() fill_sequence(&this->ref().init.vacancies, this->stream_id()); } +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +template +CoreStateCounters const CoreState::sync_get_counters() const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} + +template +void CoreState::sync_put_counters(CoreStateCounters& host_counters) +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + //---------------------------------------------------------------------------// // EXPLICIT INSTANTIATION //---------------------------------------------------------------------------// template class CoreState; template class CoreState; //---------------------------------------------------------------------------// + } // namespace celeritas From 09beb0d14bde08536dcecbee10718bce6801ebc8 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 14:03:57 -0500 Subject: [PATCH 07/74] Fix preprocessor command --- src/celeritas/global/CoreState.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 0cd16c5936..7c20c27fed 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -6,8 +6,7 @@ //---------------------------------------------------------------------------// #include "CoreState.hh" -#include "corecel/Macros.hh" -#ifdef CELER_USE_DEVICE +#if CELER_USE_DEVICE # include "corecel/data/ObserverPtr.device.hh" #endif #include "corecel/io/Logger.hh" From ff07cb7f46ea140626bdb9a00076537e6ce4852d Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 14:10:15 -0500 Subject: [PATCH 08/74] Fake parameter usage on error path to avoid unused-parameter warning --- src/celeritas/global/CoreState.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 7c20c27fed..ab9b316ecb 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -167,7 +167,6 @@ CoreStateCounters const CoreState::sync_get_counters() const } else if constexpr (M == MemSpace::host) { - // return *(this->ref().init.counters.data().get()); CELER_ASSERT_UNREACHABLE(); return CoreStateCounters{}; } @@ -194,6 +193,7 @@ void CoreState::sync_put_counters(CoreStateCounters& host_counters) } else if constexpr (M == MemSpace::host) { + CELER_DISCARD(host_counters); CELER_ASSERT_UNREACHABLE(); } return; From 50b138a77d17dba2a9e169e873bd907571db3851 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 14:13:49 -0500 Subject: [PATCH 09/74] Artificial parameter use on error path to avoid unused-parameter warning --- src/celeritas/global/CoreState.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index ab9b316ecb..78a4eba79b 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -193,7 +193,7 @@ void CoreState::sync_put_counters(CoreStateCounters& host_counters) } else if constexpr (M == MemSpace::host) { - CELER_DISCARD(host_counters); + host_counters = CoreStateCounters{}; CELER_ASSERT_UNREACHABLE(); } return; From cdeee5d9187e59e928ab87eb2007c64e5cbfd15c Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 14:18:47 -0500 Subject: [PATCH 10/74] Artificial parameter use on error path to avoid unused-parameter warning --- src/celeritas/global/CoreState.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 78a4eba79b..fd353a8648 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -193,7 +193,6 @@ void CoreState::sync_put_counters(CoreStateCounters& host_counters) } else if constexpr (M == MemSpace::host) { - host_counters = CoreStateCounters{}; CELER_ASSERT_UNREACHABLE(); } return; @@ -242,6 +241,7 @@ CoreStateCounters const CoreState::sync_get_counters() const template void CoreState::sync_put_counters(CoreStateCounters& host_counters) { + host_counters = CoreStateCounters{}; CELER_NOT_CONFIGURED("CUDA OR HIP"); } #endif From cb09dbd9012e8644800af3efe6fd5aa011b24e21 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 14:26:10 -0500 Subject: [PATCH 11/74] Remove unused variable --- src/celeritas/track/InitializeTracksAction.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index 9c758c0724..eae4cfe593 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -55,7 +55,6 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreStateHost& core_state) const { auto& counters = core_state.counters(); - auto init = core_state.ref().init; // The number of new tracks to initialize is the smaller of the number of // empty slots in the track vector and the number of track initializers size_type num_new_tracks @@ -100,7 +99,6 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreStateDevice& core_state) const { auto counters = core_state.sync_get_counters(); - auto init = core_state.ref().init; // The number of new tracks to initialize is the smaller of the number of // empty slots in the track vector and the number of track initializers size_type num_new_tracks From 610f0386ce33185108f5622360000dd487700dc5 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Tue, 6 Jan 2026 14:32:21 -0500 Subject: [PATCH 12/74] Remove unused variable --- src/celeritas/track/ExtendFromPrimariesAction.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index eaebbe8f28..f2fda1ed75 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -106,7 +106,7 @@ void ExtendFromPrimariesAction::insert(CoreParams const& params, size_type num_initializers; if (auto* s = dynamic_cast*>(&state)) { - num_initializers = state.counters().num_initializers; + num_initializers = s->counters().num_initializers; } else if (auto* s = dynamic_cast*>(&state)) { From 600dce60a07ede655fd051b6b1450d4fba4958bc Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 7 Jan 2026 13:20:48 -0500 Subject: [PATCH 13/74] Fix parameter list --- src/celeritas/track/detail/TrackInitAlgorithms.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.hh b/src/celeritas/track/detail/TrackInitAlgorithms.hh index 933b4ae8a0..418f8b835e 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.hh +++ b/src/celeritas/track/detail/TrackInitAlgorithms.hh @@ -88,7 +88,6 @@ inline size_type exclusive_scan_counts( inline void partition_initializers( CoreParams const&, TrackInitStateData const&, - CoreStateCounters const&, size_type, StreamId) { From be1d7dfbceaec23f75714160c78db9a849a4b0a4 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 7 Jan 2026 13:29:03 -0500 Subject: [PATCH 14/74] Synchronize the stream after copy --- src/celeritas/global/CoreState.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index fd353a8648..983bc6460a 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -163,7 +163,11 @@ CoreStateCounters const CoreState::sync_get_counters() const if constexpr (M == MemSpace::device) { auto counters = device_pointer_cast(this->ref().init.counters.data()); - return ItemCopier{stream_id()}(counters.get()); + auto& stream = device().stream(stream_id()); + auto result + = ItemCopier{stream_id()}(counters.get()); + stream.sync(); + return result; } else if constexpr (M == MemSpace::host) { @@ -184,12 +188,14 @@ void CoreState::sync_put_counters(CoreStateCounters& host_counters) if constexpr (M == MemSpace::device) { auto counters = device_pointer_cast(this->ref().init.counters.data()); + auto& stream = device().stream(stream_id()); copy_bytes(MemSpace::device, counters.get(), MemSpace::host, &host_counters, sizeof(CoreStateCounters), stream_id()); + stream.sync(); } else if constexpr (M == MemSpace::host) { @@ -239,9 +245,8 @@ CoreStateCounters const CoreState::sync_get_counters() const } template -void CoreState::sync_put_counters(CoreStateCounters& host_counters) +void CoreState::sync_put_counters(CoreStateCounters&) { - host_counters = CoreStateCounters{}; CELER_NOT_CONFIGURED("CUDA OR HIP"); } #endif From a68bec58eb111eec4605ce2192af316f8ab089d9 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 14 Jan 2026 11:47:33 -0500 Subject: [PATCH 15/74] Remove synchronization from remove_if_alive track device function --- .../track/ExtendFromSecondariesAction.cc | 3 +-- .../track/ExtendFromSecondariesAction.cu | 5 ++--- .../track/detail/TrackInitAlgorithms.cc | 4 ++-- .../track/detail/TrackInitAlgorithms.cu | 22 +++++++++++++------ .../track/detail/TrackInitAlgorithms.hh | 9 ++++---- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cc b/src/celeritas/track/ExtendFromSecondariesAction.cc index 8cb8f593fc..e5134d9171 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cc +++ b/src/celeritas/track/ExtendFromSecondariesAction.cc @@ -64,8 +64,7 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, // Remove all elements in the vacancy vector that were flagged as active // tracks, leaving the (sorted) indices of the empty slots - counters.num_vacancies - = detail::remove_if_alive(init, core_state.stream_id()); + detail::remove_if_alive(init, core_state.stream_id()); // The exclusive prefix sum of the number of secondaries produced by each // track is used to get the start index in the vector of track initializers diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cu b/src/celeritas/track/ExtendFromSecondariesAction.cu index 22bb2afbbe..72749c93e5 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cu +++ b/src/celeritas/track/ExtendFromSecondariesAction.cu @@ -45,7 +45,6 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, { TrackInitStateData& init = core_state.ref().init; - auto counters = core_state.sync_get_counters(); // Launch a kernel to identify which track slots are still alive and count // the number of surviving secondaries per track @@ -53,14 +52,14 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, // Remove all elements in the vacancy vector that were flagged as active // tracks, leaving the (sorted) indices of the empty slots - counters.num_vacancies - = detail::remove_if_alive(init, core_state.stream_id()); + detail::remove_if_alive(init, core_state.stream_id()); // The exclusive prefix sum of the number of secondaries produced by each // track is used to get the start index in the vector of track initializers // for each thread. Starting at that index, each thread creates track // initializers from all surviving secondaries produced in its // interaction. + auto counters = core_state.sync_get_counters(); counters.num_secondaries = detail::exclusive_scan_counts( init.secondary_counts, core_state.stream_id()); diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cc b/src/celeritas/track/detail/TrackInitAlgorithms.cc index e90ffd5741..bee0a855e4 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cc +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cc @@ -24,7 +24,7 @@ namespace detail * * \return New size of the vacancy vector */ -size_type remove_if_alive( +void remove_if_alive( TrackInitStateData const& init, StreamId) { @@ -33,7 +33,7 @@ size_type remove_if_alive( auto* stop = std::remove_if(start, start + init.vacancies.size(), LogicalNot{}); counters->num_vacancies = stop - start; - return stop - start; + return; } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cu b/src/celeritas/track/detail/TrackInitAlgorithms.cu index 65353ada5d..9f1168b9fc 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cu +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cu @@ -71,13 +71,17 @@ struct NotNull * Remove all elements in the vacancy vector that were flagged as active * tracks. */ -size_type remove_if_alive( +void remove_if_alive( TrackInitStateData const& init, StreamId stream_id) { ScopedProfiling profile_this{"remove-if-alive"}; #if CELER_USE_THRUST + auto& stream = device().stream(stream_id); auto start = device_pointer_cast(init.vacancies.data()); + auto counters = device_pointer_cast(init.counters.data()); + auto host_counters + = ItemCopier{stream_id}(counters.get()); auto end = thrust::remove_if(thrust_execute_on(stream_id), start, start + init.vacancies.size(), @@ -85,7 +89,15 @@ size_type remove_if_alive( CELER_DEVICE_API_CALL(PeekAtLastError()); // New size of the vacancy vector - return end - start; + host_counters.num_vacancies = end - start; + copy_bytes(MemSpace::device, + counters.get(), + MemSpace::host, + &host_counters, + sizeof(CoreStateCounters), + stream_id); + stream.sync(); + return; #else auto& stream = device().stream(stream_id); // Calling with nullptr causes the function to return the amount of working @@ -114,11 +126,7 @@ size_type remove_if_alive( stream.get()); CELER_DISCARD(cub_error_code); CELER_DEVICE_API_CALL(PeekAtLastError()); - - auto result = ItemCopier{stream_id}(&(counters->num_vacancies)); - - stream.sync(); - return result; + return; #endif } diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.hh b/src/celeritas/track/detail/TrackInitAlgorithms.hh index 418f8b835e..81d0b53201 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.hh +++ b/src/celeritas/track/detail/TrackInitAlgorithms.hh @@ -39,10 +39,9 @@ struct IsNeutralStencil //---------------------------------------------------------------------------// // Remove all elements in the vacancy vector that were flagged as alive -size_type -remove_if_alive(TrackInitStateData const&, - StreamId); -size_type remove_if_alive( +void remove_if_alive( + TrackInitStateData const&, StreamId); +void remove_if_alive( TrackInitStateData const&, StreamId); @@ -72,7 +71,7 @@ void partition_initializers( // DEVICE-DISABLED IMPLEMENTATION //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -inline size_type remove_if_alive( +inline void remove_if_alive( TrackInitStateData const&, StreamId) { CELER_NOT_CONFIGURED("CUDA or HIP"); From 69c225337bbba92574817e92ddbd45f936980e02 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 14 Jan 2026 11:49:32 -0500 Subject: [PATCH 16/74] Use constexpr tests for memory space --- test/celeritas/track/TrackInit.test.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index d1183ab31d..ffefc44f49 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -236,7 +236,7 @@ TYPED_TEST_SUITE(TrackInitTest, MemspaceTypes, MemspaceTypeString); //! Test that we can add more primaries than the first allocation TYPED_TEST(TrackInitTest, add_more_primaries) { - if (TestFixture::M == MemSpace::device) + if constexpr (TestFixture::M == MemSpace::device) { this->build_states(16); EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); @@ -249,7 +249,7 @@ TYPED_TEST(TrackInitTest, add_more_primaries) this->extend_from_primaries(make_span(primaries)); EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); } - else + else if constexpr (TestFixture::M == MemSpace::host) { this->build_states(16); EXPECT_EQ(0, this->state().counters().num_initializers); @@ -274,9 +274,9 @@ TYPED_TEST(TrackInitTest, extend_primaries) auto primaries = this->make_primaries(2); this->insert_primaries(this->state(), make_span(primaries)); RunResult::from_state(this->state()); - if (TestFixture::M == MemSpace::device) + if constexpr (TestFixture::M == MemSpace::device) EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); - else + else if constexpr (TestFixture::M == MemSpace::host) EXPECT_EQ(0, this->state().counters().num_initializers); } { @@ -443,14 +443,14 @@ TYPED_TEST(TrackInitTest, primaries) // Find vacancies and create track initializers from secondaries extend_from_secondaries.step(*this->core(), this->state()); - if (TestFixture::M == MemSpace::device) + if constexpr (TestFixture::M == MemSpace::device) { EXPECT_EQ(i * num_tracks / 2, this->state().sync_get_counters().num_initializers); EXPECT_EQ(num_tracks / 2, this->state().sync_get_counters().num_vacancies); } - else + else if constexpr (TestFixture::M == MemSpace::host) { EXPECT_EQ(i * num_tracks / 2, this->state().counters().num_initializers); @@ -513,10 +513,10 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) // Create track initializers on device from primary particles auto primaries = this->make_primaries(num_primaries); this->extend_from_primaries(make_span(primaries)); - if (TestFixture::M == MemSpace::device) + if constexpr (TestFixture::M == MemSpace::device) EXPECT_EQ(num_primaries, this->state().sync_get_counters().num_initializers); - else + else if constexpr (TestFixture::M == MemSpace::host) EXPECT_EQ(num_primaries, this->state().counters().num_initializers); auto apply_actions = [&actions, this] { for (auto const& ea_interface : actions) From 1434459f00c8b15aa4a72ef3374fa5606d48f7a0 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 14 Jan 2026 12:03:17 -0500 Subject: [PATCH 17/74] Add braces to resolve ambiguous else warnings --- test/celeritas/track/TrackInit.test.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index ffefc44f49..e349af4b64 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -275,9 +275,13 @@ TYPED_TEST(TrackInitTest, extend_primaries) this->insert_primaries(this->state(), make_span(primaries)); RunResult::from_state(this->state()); if constexpr (TestFixture::M == MemSpace::device) + { EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); + } else if constexpr (TestFixture::M == MemSpace::host) + { EXPECT_EQ(0, this->state().counters().num_initializers); + } } { // Now initialize after adding @@ -514,10 +518,14 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) auto primaries = this->make_primaries(num_primaries); this->extend_from_primaries(make_span(primaries)); if constexpr (TestFixture::M == MemSpace::device) + { EXPECT_EQ(num_primaries, this->state().sync_get_counters().num_initializers); + } else if constexpr (TestFixture::M == MemSpace::host) + { EXPECT_EQ(num_primaries, this->state().counters().num_initializers); + } auto apply_actions = [&actions, this] { for (auto const& ea_interface : actions) { From 9b5736689a820b32ce7ee2d08825f0ead5dad95c Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 16 Jan 2026 21:25:38 -0500 Subject: [PATCH 18/74] Incorporate feedback on PR --- src/celeritas/global/CoreState.cc | 108 +++++---------- src/celeritas/global/CoreState.hh | 59 ++------ src/celeritas/global/Stepper.cc | 90 ++++-------- .../optical/detail/OpticalLaunchAction.cc | 35 +---- .../optical/detail/OpticalLaunchAction.hh | 5 +- .../track/ExtendFromPrimariesAction.cc | 37 ++--- .../track/ExtendFromPrimariesAction.cu | 23 ---- .../track/ExtendFromPrimariesAction.hh | 4 +- .../track/ExtendFromSecondariesAction.cc | 18 +-- .../track/ExtendFromSecondariesAction.cu | 53 -------- .../track/ExtendFromSecondariesAction.hh | 4 +- src/celeritas/track/InitializeTracksAction.cc | 47 +------ src/celeritas/track/InitializeTracksAction.hh | 4 +- test/celeritas/global/Stepper.test.cc | 4 +- test/celeritas/track/TrackInit.test.cc | 128 +++++++++--------- 15 files changed, 162 insertions(+), 457 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 983bc6460a..c5b3ca4983 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -55,19 +55,18 @@ CoreState::CoreState(CoreParams const& params, states_ = CollectionStateStore( params.host_ref(), stream_id, num_track_slots); + auto counters = CoreStateCounters{}; + counters.num_vacancies = num_track_slots; + this->sync_put_counters(counters); + if constexpr (M == MemSpace::device) { - auto counters = CoreStateCounters{}; - counters.num_vacancies = num_track_slots; - this->sync_put_counters(counters); device_ref_vec_ = DeviceVector(1); device_ref_vec_.copy_to_device({&this->ref(), 1}); ptr_ = make_observer(device_ref_vec_); } else if constexpr (M == MemSpace::host) { - auto& counters = this->counters(); - counters.num_vacancies = num_track_slots; ptr_ = make_observer(&this->ref()); } @@ -120,18 +119,7 @@ CoreState::~CoreState() template void CoreState::warming_up(bool new_state) { - size_type num_active; - if constexpr (M == MemSpace::host) - { - auto& counters = this->counters(); - num_active = counters.num_active; - } - else if constexpr (M == MemSpace::device) - { - auto counters = this->sync_get_counters(); - num_active = counters.num_active; - } - CELER_EXPECT(!new_state || num_active == 0); + CELER_EXPECT(!new_state || this->sync_get_counters().num_active == 0); warming_up_ = new_state; } @@ -150,60 +138,50 @@ Range CoreState::get_action_range(ActionId action_id) const return {thread_offsets[action_id], thread_offsets[action_id + 1]}; } -#if CELER_USE_DEVICE +// #if CELER_USE_DEVICE //---------------------------------------------------------------------------// /*! - * Copy the core state counters from the device to the host. Since the entire - * sequence of actions in a step are performed on the device, this is typically - * done at the end of a step. + * Copy the core state counters from the device to the host. For host-only + * code, the counters reside on the host, so this just returns a + * CoreStateCounters object. Note that it does not return a reference, so + * sync_put_counters() must be used if any counters change. */ template -CoreStateCounters const CoreState::sync_get_counters() const +CoreStateCounters CoreState::sync_get_counters() const { + auto* counters + = static_cast(this->ref().init.counters.data()); + CELER_ASSERT(counters); if constexpr (M == MemSpace::device) { - auto counters = device_pointer_cast(this->ref().init.counters.data()); - auto& stream = device().stream(stream_id()); auto result - = ItemCopier{stream_id()}(counters.get()); - stream.sync(); + = ItemCopier{this->stream_id()}(counters); + device().stream(this->stream_id()).sync(); return result; } - else if constexpr (M == MemSpace::host) - { - CELER_ASSERT_UNREACHABLE(); - return CoreStateCounters{}; - } + return *counters; } //---------------------------------------------------------------------------// /*! - * Copy the core state counters from the host to the device. This function is a - * placeholder function until the corresponding host code that updates the Core - * State counters can be moved to device functions. + * Copy the core state counters from the host to the device. For host-only + * code, this function copies a CoreStateCounter object into the CoreState + * object, which is needed when any of the counters change, because + * sync_get_counters() doesn't return a reference. */ template -void CoreState::sync_put_counters(CoreStateCounters& host_counters) +void CoreState::sync_put_counters(CoreStateCounters const& host_counters) { + auto* counters + = static_cast(this->ref().init.counters.data()); + CELER_ASSERT(counters); + Copier copy{{counters, 1}, this->stream_id()}; + copy(MemSpace::host, {&host_counters, 1}); if constexpr (M == MemSpace::device) { - auto counters = device_pointer_cast(this->ref().init.counters.data()); - auto& stream = device().stream(stream_id()); - copy_bytes(MemSpace::device, - counters.get(), - MemSpace::host, - &host_counters, - sizeof(CoreStateCounters), - stream_id()); - stream.sync(); + device().stream(this->stream_id()).sync(); } - else if constexpr (M == MemSpace::host) - { - CELER_ASSERT_UNREACHABLE(); - } - return; } -#endif //---------------------------------------------------------------------------// /*! @@ -216,18 +194,9 @@ void CoreState::sync_put_counters(CoreStateCounters& host_counters) template void CoreState::reset() { - if constexpr (M == MemSpace::host) - { - auto& counters = this->counters(); - counters = CoreStateCounters{}; - counters.num_vacancies = this->size(); - } - else if constexpr (M == MemSpace::device) - { - auto counters = CoreStateCounters{}; - counters.num_vacancies = this->size(); - sync_put_counters(counters); - } + auto counters = CoreStateCounters{}; + counters.num_vacancies = this->size(); + sync_put_counters(counters); // Reset all the track slots to inactive fill(TrackStatus::inactive, &this->ref().sim.status); @@ -236,21 +205,6 @@ void CoreState::reset() fill_sequence(&this->ref().init.vacancies, this->stream_id()); } -//---------------------------------------------------------------------------// -#if !CELER_USE_DEVICE -template -CoreStateCounters const CoreState::sync_get_counters() const -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} - -template -void CoreState::sync_put_counters(CoreStateCounters&) -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} -#endif - //---------------------------------------------------------------------------// // EXPLICIT INSTANTIATION //---------------------------------------------------------------------------// diff --git a/src/celeritas/global/CoreState.hh b/src/celeritas/global/CoreState.hh index e0aa836ed3..5c18aba534 100644 --- a/src/celeritas/global/CoreState.hh +++ b/src/celeritas/global/CoreState.hh @@ -48,19 +48,14 @@ class CoreStateInterface //! Number of track slots virtual size_type size() const = 0; - //! Access track initialization counters - // Use when running all code on the host - // Use sync_get_counters() instead if accessing device data from the host - virtual CoreStateCounters const& counters() const = 0; - - //! Access track initialization counters - // Use when running all code on the host - // Use sync_get_counters() instead if accessing device data from the host - virtual CoreStateCounters& counters() = 0; - //! Synchronize and copy track initialization counters from device to host - [[nodiscard]] virtual CoreStateCounters const sync_get_counters() const = 0; + //! For host-only code, this replaces the old counters() function + [[nodiscard]] virtual CoreStateCounters sync_get_counters() const = 0; + //! Synchronize and copy track initialization counters from host to device + //! For host-only code, this replaces the old counters() function + //! since we return a CoreStateCounters object instead of a reference + virtual void sync_put_counters(CoreStateCounters const&) = 0; //! Access auxiliary state data virtual AuxStateVec const& aux() const = 0; @@ -140,19 +135,13 @@ class CoreState final : public CoreStateInterface //// COUNTERS //// - //! Track initialization counters - inline CoreStateCounters& counters() final; - - //! Track initialization counters - inline CoreStateCounters const& counters() const final; - //! Synchronize and copy track initialization counters from device to host - [[nodiscard]] CoreStateCounters const sync_get_counters() const final; + [[nodiscard]] CoreStateCounters sync_get_counters() const final; //! Synchronize and copy track initialization counters from host to device - //! Remove this once all the counter maintenance is device-only or - //! host-only - void sync_put_counters(CoreStateCounters&); + //! For host-only code, this copies the local CoreStateCounters back to the + //! class, since sync_get_counters() doesn't return a reference + void sync_put_counters(CoreStateCounters const&) final; //// AUXILIARY DATA //// @@ -206,34 +195,6 @@ class CoreState final : public CoreStateInterface bool warming_up_{false}; }; -//---------------------------------------------------------------------------// -/*! - * Access counters -- works only when counters are stored on host. - * Otherwise, use sync_get_counters() to copy counters from device to host. - */ -template -CoreStateCounters& CoreState::counters() -{ - if constexpr (M == MemSpace::host) - return *(this->ref().init.counters.data().get()); - else - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} - -//---------------------------------------------------------------------------// -/*! - * Access counters -- works only when counters are not stored on device. - * Otherwise, use sync_get_counters() to return results to the host. - */ -template -CoreStateCounters const& CoreState::counters() const -{ - if constexpr (M == MemSpace::host) - return *(this->ref().init.counters.data().get()); - else - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} - //---------------------------------------------------------------------------// /*! * Convenience function to access auxiliary "collection group" data. diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index e45f4d3a52..d84620e68a 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -107,29 +107,14 @@ Stepper::~Stepper() = default; template void Stepper::warm_up() { - if constexpr (M == MemSpace::host) - { - CELER_VALIDATE(state_->counters().num_active == 0, - << "cannot warm up when state has active tracks"); - - ScopedProfiling profile_this{"warmup"}; - state_->warming_up(true); - ScopeExit on_exit_{[this] { state_->warming_up(false); }}; - actions_->step(*params_, *state_); - CELER_ENSURE(state_->counters().num_active == 0); - } - else if constexpr (M == MemSpace::device) - { - auto counters = state_->sync_get_counters(); - CELER_VALIDATE(counters.num_active == 0, - << "cannot warm up when state has active tracks"); - - ScopedProfiling profile_this{"warmup"}; - state_->warming_up(true); - ScopeExit on_exit_{[this] { state_->warming_up(false); }}; - actions_->step(*params_, *state_); - CELER_ENSURE(counters.num_active == 0); - } + CELER_VALIDATE(state_->sync_get_counters().num_active == 0, + << "cannot warm up when state has active tracks"); + + ScopedProfiling profile_this{"warmup"}; + state_->warming_up(true); + ScopeExit on_exit_{[this] { state_->warming_up(false); }}; + actions_->step(*params_, *state_); + CELER_ENSURE(state_->sync_get_counters().num_active == 0); } //---------------------------------------------------------------------------// @@ -145,29 +130,15 @@ auto Stepper::operator()() -> result_type ScopedProfiling profile_this{"step"}; // Get the number of track initializers and active tracks result_type result; - if constexpr (M == MemSpace::host) - { - auto& counters = state_->counters(); - counters.num_generated = 0; - actions_->step(*params_, *state_); - - result.generated = counters.num_generated; - result.active = counters.num_active; - result.alive = counters.num_alive; - result.queued = counters.num_initializers; - } - else if constexpr (M == MemSpace::device) - { - auto counters = state_->sync_get_counters(); - counters.num_generated = 0; - state_->sync_put_counters(counters); - actions_->step(*params_, *state_); - counters = state_->sync_get_counters(); - result.generated = counters.num_generated; - result.active = counters.num_active; - result.alive = counters.num_alive; - result.queued = counters.num_initializers; - } + auto counters = state_->sync_get_counters(); + counters.num_generated = 0; + state_->sync_put_counters(counters); + actions_->step(*params_, *state_); + counters = state_->sync_get_counters(); + result.generated = counters.num_generated; + result.active = counters.num_active; + result.alive = counters.num_alive; + result.queued = counters.num_initializers; return result; } @@ -193,16 +164,9 @@ auto Stepper::operator()(SpanConstPrimary primaries) -> result_type << "event number " << max_id->event_id.unchecked_get() << " exceeds max_events=" << params_->init()->max_events()); - if constexpr (M == MemSpace::host) - { - state_->counters().num_pending = primaries.size(); - } - else if constexpr (M == MemSpace::device) - { - auto counters = state_->sync_get_counters(); - counters.num_pending = primaries.size(); - state_->sync_put_counters(counters); - } + auto counters = state_->sync_get_counters(); + counters.num_pending = primaries.size(); + state_->sync_put_counters(counters); primaries_action_->insert(*params_, *state_, primaries); return (*this)(); @@ -218,17 +182,9 @@ auto Stepper::operator()(SpanConstPrimary primaries) -> result_type template void Stepper::kill_active() { - if constexpr (M == MemSpace::host) - { - CELER_LOG_LOCAL(error) - << "Killing " << state_->counters().num_active << " active tracks"; - } - else if constexpr (M == MemSpace::device) - { - auto counters = state_->sync_get_counters(); - CELER_LOG_LOCAL(error) - << "Killing " << counters.num_active << " active tracks"; - } + CELER_LOG_LOCAL(error) << "Killing " + << state_->sync_get_counters().num_active + << " active tracks"; detail::kill_active(*params_, *state_); } diff --git a/src/celeritas/optical/detail/OpticalLaunchAction.cc b/src/celeritas/optical/detail/OpticalLaunchAction.cc index 4aae208c9f..fe4d473cf3 100644 --- a/src/celeritas/optical/detail/OpticalLaunchAction.cc +++ b/src/celeritas/optical/detail/OpticalLaunchAction.cc @@ -127,42 +127,15 @@ void OpticalLaunchAction::step(CoreParams const& params, /*! * Launch the optical tracking loop. */ +template void OpticalLaunchAction::execute_impl(CoreParams const&, - CoreStateHost& core_state) const -{ - auto& state = get>(core_state.aux(), - this->aux_id()); - CELER_ASSERT(state.size() > 0); - - auto const& core_counters = core_state.counters(); - auto& counters = state.counters(); - - if ((counters.num_pending < data_.auto_flush - && (core_counters.num_alive > 0 || core_counters.num_initializers > 0)) - || counters.num_pending == 0) - { - // Don't launch the optical loop if the number of pending tracks is - // below the threshold and the core stepping loop hasn't completed yet - return; - } - - // Transport pending optical tracks - (*transport_)(state); -} - -//---------------------------------------------------------------------------// -/*! - * Launch the optical tracking loop. - */ -void OpticalLaunchAction::execute_impl(CoreParams const&, - CoreStateDevice& core_state) const + CoreState& core_state) const { - auto& state = get>(core_state.aux(), - this->aux_id()); + auto& state = get>(core_state.aux(), this->aux_id()); CELER_ASSERT(state.size() > 0); auto core_counters = core_state.sync_get_counters(); - auto counters = state.counters(); + auto const& counters = state.counters(); if ((counters.num_pending < data_.auto_flush && (core_counters.num_alive > 0 || core_counters.num_initializers > 0)) diff --git a/src/celeritas/optical/detail/OpticalLaunchAction.hh b/src/celeritas/optical/detail/OpticalLaunchAction.hh index 0d2ebffc0a..18dc4d1f66 100644 --- a/src/celeritas/optical/detail/OpticalLaunchAction.hh +++ b/src/celeritas/optical/detail/OpticalLaunchAction.hh @@ -139,9 +139,8 @@ class OpticalLaunchAction : public AuxParamsInterface, //// HELPERS //// - void execute_impl(CoreParams const&, CoreStateHost&) const; - void execute_impl(CoreParams const&, CoreStateDevice&) const; - + template + void execute_impl(CoreParams const&, CoreState&) const; template void begin_run_impl(CoreState&); }; diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index f2fda1ed75..5c89484e48 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -103,19 +103,7 @@ void ExtendFromPrimariesAction::insert(CoreParams const& params, CoreStateInterface& state, Span host_primaries) const { - size_type num_initializers; - if (auto* s = dynamic_cast*>(&state)) - { - num_initializers = s->counters().num_initializers; - } - else if (auto* s = dynamic_cast*>(&state)) - { - num_initializers = s->sync_get_counters().num_initializers; - } - else - { - CELER_ASSERT_UNREACHABLE(); - } + size_type num_initializers = state.sync_get_counters().num_initializers; size_type init_capacity = params.init()->capacity(); CELER_VALIDATE(host_primaries.size() + num_initializers <= init_capacity, @@ -191,20 +179,23 @@ void ExtendFromPrimariesAction::insert_impl( /*! * Construct primaries. */ +template void ExtendFromPrimariesAction::step_impl(CoreParams const& params, - CoreStateHost& state) const + CoreState& state) const { - auto& primaries - = get>(state.aux(), aux_id_); + auto& primaries = get>(state.aux(), aux_id_); + auto counters = state.sync_get_counters(); // Create track initializers from primaries - state.counters().num_initializers += primaries.count; + counters.num_initializers += primaries.count; + state.sync_put_counters(counters); this->process_primaries(params, state, primaries); // Mark that the primaries have been processed - state.counters().num_generated += primaries.count; - state.counters().num_pending = 0; + counters.num_generated += primaries.count; + counters.num_pending = 0; primaries.count = 0; + state.sync_put_counters(counters); } //---------------------------------------------------------------------------// @@ -219,7 +210,7 @@ void ExtendFromPrimariesAction::process_primaries( auto primaries = pstate.primaries(); detail::ProcessPrimariesExecutor execute{params.ptr(), state.ptr(), - state.counters(), + state.sync_get_counters(), primaries}; return launch_action(*this, primaries.size(), params, state, execute); } @@ -233,12 +224,6 @@ void ExtendFromPrimariesAction::process_primaries( { CELER_NOT_CONFIGURED("CUDA OR HIP"); } - -void ExtendFromPrimariesAction::step_impl(CoreParams const&, - CoreStateDevice&) const -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} #endif //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index 889ef877d8..85aa7d051d 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -34,28 +34,5 @@ void ExtendFromPrimariesAction::process_primaries( } } -//---------------------------------------------------------------------------// -/*! - * Construct primaries. - */ -void ExtendFromPrimariesAction::step_impl(CoreParams const& params, - CoreStateDevice& state) const -{ - auto& primaries - = get>(state.aux(), aux_id_); - auto counters = state.sync_get_counters(); - - // Create track initializers from primaries - counters.num_initializers += primaries.count; - state.sync_put_counters(counters); - this->process_primaries(params, state, primaries); - - // Mark that the primaries have been processed - counters.num_generated += primaries.count; - counters.num_pending = 0; - primaries.count = 0; - state.sync_put_counters(counters); -} - //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/src/celeritas/track/ExtendFromPrimariesAction.hh b/src/celeritas/track/ExtendFromPrimariesAction.hh index d347eecf56..80d1fcb3ad 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.hh +++ b/src/celeritas/track/ExtendFromPrimariesAction.hh @@ -90,8 +90,8 @@ class ExtendFromPrimariesAction final : public CoreStepActionInterface, void insert_impl(CoreState& state, Span host_primaries) const; - void step_impl(CoreParams const&, CoreStateHost&) const; - void step_impl(CoreParams const&, CoreStateDevice&) const; + template + void step_impl(CoreParams const&, CoreState&) const; void process_primaries(CoreParams const&, CoreStateHost&, diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cc b/src/celeritas/track/ExtendFromSecondariesAction.cc index e5134d9171..55a77eb91e 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cc +++ b/src/celeritas/track/ExtendFromSecondariesAction.cc @@ -51,12 +51,11 @@ void ExtendFromSecondariesAction::step(CoreParams const& params, /*! * Initialize track states. */ +template void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, - CoreStateHost& core_state) const + CoreState& core_state) const { - TrackInitStateData& init - = core_state.ref().init; - auto& counters = core_state.counters(); + TrackInitStateData& init = core_state.ref().init; // Launch a kernel to identify which track slots are still alive and count // the number of surviving secondaries per track @@ -71,6 +70,7 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, // for each thread. Starting at that index, each thread creates track // initializers from all surviving secondaries produced in its // interaction. + auto counters = core_state.sync_get_counters(); counters.num_secondaries = detail::exclusive_scan_counts( init.secondary_counts, core_state.stream_id()); @@ -95,6 +95,8 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, // Launch a kernel to create track initializers from secondaries counters.num_alive = core_state.size() - counters.num_vacancies; + core_state.sync_put_counters(counters); + this->process_secondaries(core_params, core_state); } @@ -123,7 +125,7 @@ void ExtendFromSecondariesAction::process_secondaries( detail::ProcessSecondariesExecutor execute{ core_params.ptr(), core_state.ptr(), - core_state.counters()}; + core_state.sync_get_counters()}; launch_action(*this, core_params, core_state, execute); } @@ -136,12 +138,6 @@ void ExtendFromSecondariesAction::begin_run(CoreParams const&, CoreStateDevice&) CELER_NOT_CONFIGURED("CUDA OR HIP"); } -void ExtendFromSecondariesAction::step_impl(CoreParams const&, - CoreStateDevice&) const -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} - void ExtendFromSecondariesAction::locate_alive(CoreParams const&, CoreStateDevice&) const { diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cu b/src/celeritas/track/ExtendFromSecondariesAction.cu index 72749c93e5..a001b454cb 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cu +++ b/src/celeritas/track/ExtendFromSecondariesAction.cu @@ -36,59 +36,6 @@ void ExtendFromSecondariesAction::begin_run(CoreParams const&, s.free_async(p); } -//---------------------------------------------------------------------------// -/*! - * Initialize track states. - */ -void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, - CoreStateDevice& core_state) const -{ - TrackInitStateData& init - = core_state.ref().init; - - // Launch a kernel to identify which track slots are still alive and count - // the number of surviving secondaries per track - this->locate_alive(core_params, core_state); - - // Remove all elements in the vacancy vector that were flagged as active - // tracks, leaving the (sorted) indices of the empty slots - detail::remove_if_alive(init, core_state.stream_id()); - - // The exclusive prefix sum of the number of secondaries produced by each - // track is used to get the start index in the vector of track initializers - // for each thread. Starting at that index, each thread creates track - // initializers from all surviving secondaries produced in its - // interaction. - auto counters = core_state.sync_get_counters(); - counters.num_secondaries = detail::exclusive_scan_counts( - init.secondary_counts, core_state.stream_id()); - - /*! \todo If we don't have space for all the secondaries, we will need to - * buffer the current track initializers to create room. - * - * This isn't trivial because we will need to: - * - Allocate a new buffer (probably do something like 2x, rounding up to - * nearest power of 2)? - * - Update the collection references for track sim - * - Update the *copies* of that reference (?) like in track state - * - Copy to device to update the on-device references (state.ptr) - */ - counters.num_initializers += counters.num_secondaries; - CELER_VALIDATE( - counters.num_initializers <= init.initializers.size(), - << "insufficient capacity (" << init.initializers.size() - << ") for track initializers (created " << counters.num_secondaries - << " new secondaries for a total capacity requirement of " - << counters.num_initializers - << "): increase initializer capacity or decrease track slots"); - - // Launch a kernel to create track initializers from secondaries - counters.num_alive = core_state.size() - counters.num_vacancies; - core_state.sync_put_counters(counters); - - this->process_secondaries(core_params, core_state); -} - //---------------------------------------------------------------------------// /*! * Launch a kernel to locate alive particles. diff --git a/src/celeritas/track/ExtendFromSecondariesAction.hh b/src/celeritas/track/ExtendFromSecondariesAction.hh index 3ecbcca10d..c1abdce5fd 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.hh +++ b/src/celeritas/track/ExtendFromSecondariesAction.hh @@ -108,8 +108,8 @@ class ExtendFromSecondariesAction final : public CoreStepActionInterface, private: ActionId id_; - void step_impl(CoreParams const&, CoreStateHost&) const; - void step_impl(CoreParams const&, CoreStateDevice&) const; + template + void step_impl(CoreParams const&, CoreState&) const; void locate_alive(CoreParams const&, CoreStateHost&) const; void locate_alive(CoreParams const&, CoreStateDevice&) const; diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index eae4cfe593..0c91bb60e9 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -51,52 +51,9 @@ void InitializeTracksAction::step(CoreParams const& params, * If there are more empty slots than new secondaries, they will be filled by * any track initializers remaining from previous steps using the position. */ +template void InitializeTracksAction::step_impl(CoreParams const& core_params, - CoreStateHost& core_state) const -{ - auto& counters = core_state.counters(); - // The number of new tracks to initialize is the smaller of the number of - // empty slots in the track vector and the number of track initializers - size_type num_new_tracks - = std::min(counters.num_vacancies, counters.num_initializers); - if (num_new_tracks > 0) - { - if (core_params.init()->track_order() == TrackOrder::init_charge) - { - // Reset track initializer indices - fill_sequence(&core_state.ref().init.indices, - core_state.stream_id()); - - // Partition indices by whether tracks are charged or neutral - detail::partition_initializers(core_params, - core_state.ref().init, - num_new_tracks, - core_state.stream_id()); - } - - // Launch a kernel to initialize tracks - this->step_impl(core_params, core_state, num_new_tracks); - - // Update initializers/vacancies - counters.num_initializers -= num_new_tracks; - counters.num_vacancies -= num_new_tracks; - } - - // Store number of active tracks at the start of the loop - counters.num_active = core_state.size() - counters.num_vacancies; -} - -//---------------------------------------------------------------------------// -/*! - * Initialize track states. - * - * Tracks created from secondaries produced in this step will have the geometry - * state copied over from the parent instead of initialized from the position. - * If there are more empty slots than new secondaries, they will be filled by - * any track initializers remaining from previous steps using the position. - */ -void InitializeTracksAction::step_impl(CoreParams const& core_params, - CoreStateDevice& core_state) const + CoreState& core_state) const { auto counters = core_state.sync_get_counters(); // The number of new tracks to initialize is the smaller of the number of diff --git a/src/celeritas/track/InitializeTracksAction.hh b/src/celeritas/track/InitializeTracksAction.hh index f0b88f578d..b7b3ccb8b3 100644 --- a/src/celeritas/track/InitializeTracksAction.hh +++ b/src/celeritas/track/InitializeTracksAction.hh @@ -50,8 +50,8 @@ class InitializeTracksAction final : public CoreStepActionInterface private: ActionId id_; - void step_impl(CoreParams const&, CoreStateHost&) const; - void step_impl(CoreParams const&, CoreStateDevice&) const; + template + void step_impl(CoreParams const&, CoreState&) const; void step_impl(CoreParams const&, CoreStateHost&, size_type) const; void step_impl(CoreParams const&, CoreStateDevice&, size_type) const; diff --git a/test/celeritas/global/Stepper.test.cc b/test/celeritas/global/Stepper.test.cc index 43d7626a64..0980a02993 100644 --- a/test/celeritas/global/Stepper.test.cc +++ b/test/celeritas/global/Stepper.test.cc @@ -372,8 +372,8 @@ TEST_F(StepperOrderTest, warm_up) EXPECT_EQ(0, dumstate.action_order.size()); step.warm_up(); - EXPECT_EQ(0, step.state().counters().num_active); - EXPECT_EQ(0, step.state().counters().num_alive); + EXPECT_EQ(0, step.state().sync_get_counters().num_active); + EXPECT_EQ(0, step.state().sync_get_counters().num_alive); static char const* const expected_action_order[] = {"user_start", "user_pre", "user_post"}; diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index e349af4b64..b111239fcf 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -63,16 +63,16 @@ RunResult RunResult::from_state(CoreState& state) data = state.ref().init; size_type num_vacancies, num_initializers; - if constexpr (M == MemSpace::host) - { - num_vacancies = state.counters().num_vacancies; - num_initializers = state.counters().num_initializers; - } - else if constexpr (M == MemSpace::device) - { - num_vacancies = state.sync_get_counters().num_vacancies; - num_initializers = state.sync_get_counters().num_initializers; - } + // if constexpr (M == MemSpace::host) + // { + // num_vacancies = state.counters().num_vacancies; + // num_initializers = state.counters().num_initializers; + // } + // else if constexpr (M == MemSpace::device) + // { + num_vacancies = state.sync_get_counters().num_vacancies; + num_initializers = state.sync_get_counters().num_initializers; + // } // Store the IDs of the vacant track slots for (auto tid : range(TrackSlotId{num_vacancies})) @@ -236,32 +236,32 @@ TYPED_TEST_SUITE(TrackInitTest, MemspaceTypes, MemspaceTypeString); //! Test that we can add more primaries than the first allocation TYPED_TEST(TrackInitTest, add_more_primaries) { - if constexpr (TestFixture::M == MemSpace::device) - { - this->build_states(16); - EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); - - auto primaries = this->make_primaries(22); - this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(22, this->state().sync_get_counters().num_initializers); + // if constexpr (TestFixture::M == MemSpace::device) + // { + this->build_states(16); + EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); - primaries = this->make_primaries(32); - this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); - } - else if constexpr (TestFixture::M == MemSpace::host) - { - this->build_states(16); - EXPECT_EQ(0, this->state().counters().num_initializers); - - auto primaries = this->make_primaries(22); - this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(22, this->state().counters().num_initializers); + auto primaries = this->make_primaries(22); + this->extend_from_primaries(make_span(primaries)); + EXPECT_EQ(22, this->state().sync_get_counters().num_initializers); - primaries = this->make_primaries(32); - this->extend_from_primaries(make_span(primaries)); - EXPECT_EQ(54, this->state().counters().num_initializers); - } + primaries = this->make_primaries(32); + this->extend_from_primaries(make_span(primaries)); + EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); + // } + // else if constexpr (TestFixture::M == MemSpace::host) + // { + // this->build_states(16); + // EXPECT_EQ(0, this->state().counters().num_initializers); + + // auto primaries = this->make_primaries(22); + // this->extend_from_primaries(make_span(primaries)); + // EXPECT_EQ(22, this->state().counters().num_initializers); + + // primaries = this->make_primaries(32); + // this->extend_from_primaries(make_span(primaries)); + // EXPECT_EQ(54, this->state().counters().num_initializers); + // } } //! Test that we can add more primaries than the first allocation @@ -274,14 +274,14 @@ TYPED_TEST(TrackInitTest, extend_primaries) auto primaries = this->make_primaries(2); this->insert_primaries(this->state(), make_span(primaries)); RunResult::from_state(this->state()); - if constexpr (TestFixture::M == MemSpace::device) - { - EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); - } - else if constexpr (TestFixture::M == MemSpace::host) - { - EXPECT_EQ(0, this->state().counters().num_initializers); - } + // if constexpr (TestFixture::M == MemSpace::device) + // { + EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); + // } + // else if constexpr (TestFixture::M == MemSpace::host) + // { + // EXPECT_EQ(0, this->state().counters().num_initializers); + // } } { // Now initialize after adding @@ -447,19 +447,19 @@ TYPED_TEST(TrackInitTest, primaries) // Find vacancies and create track initializers from secondaries extend_from_secondaries.step(*this->core(), this->state()); - if constexpr (TestFixture::M == MemSpace::device) - { - EXPECT_EQ(i * num_tracks / 2, - this->state().sync_get_counters().num_initializers); - EXPECT_EQ(num_tracks / 2, - this->state().sync_get_counters().num_vacancies); - } - else if constexpr (TestFixture::M == MemSpace::host) - { - EXPECT_EQ(i * num_tracks / 2, - this->state().counters().num_initializers); - EXPECT_EQ(num_tracks / 2, this->state().counters().num_vacancies); - } + // if constexpr (TestFixture::M == MemSpace::device) + // { + EXPECT_EQ(i * num_tracks / 2, + this->state().sync_get_counters().num_initializers); + EXPECT_EQ(num_tracks / 2, + this->state().sync_get_counters().num_vacancies); + // } + // else if constexpr (TestFixture::M == MemSpace::host) + // { + // EXPECT_EQ(i * num_tracks / 2, + // this->state().counters().num_initializers); + // EXPECT_EQ(num_tracks / 2, this->state().counters().num_vacancies); + // } } // Check the results @@ -517,15 +517,15 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) // Create track initializers on device from primary particles auto primaries = this->make_primaries(num_primaries); this->extend_from_primaries(make_span(primaries)); - if constexpr (TestFixture::M == MemSpace::device) - { - EXPECT_EQ(num_primaries, - this->state().sync_get_counters().num_initializers); - } - else if constexpr (TestFixture::M == MemSpace::host) - { - EXPECT_EQ(num_primaries, this->state().counters().num_initializers); - } + // if constexpr (TestFixture::M == MemSpace::device) + // { + EXPECT_EQ(num_primaries, + this->state().sync_get_counters().num_initializers); + // } + // else if constexpr (TestFixture::M == MemSpace::host) + // { + // EXPECT_EQ(num_primaries, this->state().counters().num_initializers); + // } auto apply_actions = [&actions, this] { for (auto const& ea_interface : actions) { From 9f8e1b7b32eed709f73ea7995a74a4bb0b615bcf Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 16 Jan 2026 21:59:01 -0500 Subject: [PATCH 19/74] Remove unnecessary leftover testing code --- src/celeritas/global/CoreState.cc | 2 - src/celeritas/global/Stepper.cc | 6 ++- .../optical/detail/OpticalLaunchAction.cc | 3 +- .../track/ExtendFromSecondariesAction.cu | 1 - src/celeritas/track/TrackInitData.hh | 1 - .../track/detail/TrackInitAlgorithms.cc | 2 +- test/celeritas/track/TrackInit.test.cc | 47 ------------------- 7 files changed, 6 insertions(+), 56 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index c5b3ca4983..756c634044 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -138,7 +138,6 @@ Range CoreState::get_action_range(ActionId action_id) const return {thread_offsets[action_id], thread_offsets[action_id + 1]}; } -// #if CELER_USE_DEVICE //---------------------------------------------------------------------------// /*! * Copy the core state counters from the device to the host. For host-only @@ -211,5 +210,4 @@ void CoreState::reset() template class CoreState; template class CoreState; //---------------------------------------------------------------------------// - } // namespace celeritas diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index d84620e68a..97215967de 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -128,17 +128,19 @@ template auto Stepper::operator()() -> result_type { ScopedProfiling profile_this{"step"}; - // Get the number of track initializers and active tracks - result_type result; auto counters = state_->sync_get_counters(); counters.num_generated = 0; state_->sync_put_counters(counters); actions_->step(*params_, *state_); counters = state_->sync_get_counters(); + + // Get the number of track initializers and active tracks + result_type result; result.generated = counters.num_generated; result.active = counters.num_active; result.alive = counters.num_alive; result.queued = counters.num_initializers; + return result; } diff --git a/src/celeritas/optical/detail/OpticalLaunchAction.cc b/src/celeritas/optical/detail/OpticalLaunchAction.cc index fe4d473cf3..1dcc73b86f 100644 --- a/src/celeritas/optical/detail/OpticalLaunchAction.cc +++ b/src/celeritas/optical/detail/OpticalLaunchAction.cc @@ -134,7 +134,7 @@ void OpticalLaunchAction::execute_impl(CoreParams const&, auto& state = get>(core_state.aux(), this->aux_id()); CELER_ASSERT(state.size() > 0); - auto core_counters = core_state.sync_get_counters(); + auto const core_counters = core_state.sync_get_counters(); auto const& counters = state.counters(); if ((counters.num_pending < data_.auto_flush @@ -148,7 +148,6 @@ void OpticalLaunchAction::execute_impl(CoreParams const&, // Transport pending optical tracks (*transport_)(state); - core_state.sync_put_counters(core_counters); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cu b/src/celeritas/track/ExtendFromSecondariesAction.cu index a001b454cb..75d6eb951d 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cu +++ b/src/celeritas/track/ExtendFromSecondariesAction.cu @@ -16,7 +16,6 @@ #include "detail/LocateAliveExecutor.hh" #include "detail/ProcessSecondariesExecutor.hh" -#include "detail/TrackInitAlgorithms.hh" namespace celeritas { diff --git a/src/celeritas/track/TrackInitData.hh b/src/celeritas/track/TrackInitData.hh index a304cb95b7..32a17628ed 100644 --- a/src/celeritas/track/TrackInitData.hh +++ b/src/celeritas/track/TrackInitData.hh @@ -11,7 +11,6 @@ #include "corecel/data/Collection.hh" #include "corecel/data/CollectionAlgorithms.hh" #include "corecel/data/CollectionBuilder.hh" -#include "corecel/data/PinnedAllocator.hh" #include "corecel/sys/Device.hh" #include "corecel/sys/ThreadId.hh" #include "geocel/Types.hh" diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cc b/src/celeritas/track/detail/TrackInitAlgorithms.cc index bee0a855e4..a1c7230e4d 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cc +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cc @@ -87,8 +87,8 @@ void partition_initializers( { // Partition the indices based on the track initializer charge auto* start = init.indices.data().get(); - auto* counters = init.counters.data().get(); auto* end = start + count; + auto* counters = init.counters.data().get(); auto* stencil = init.initializers.data().get() + counters->num_initializers - count; std::stable_partition( diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index b111239fcf..7ca2b9295d 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -63,16 +63,8 @@ RunResult RunResult::from_state(CoreState& state) data = state.ref().init; size_type num_vacancies, num_initializers; - // if constexpr (M == MemSpace::host) - // { - // num_vacancies = state.counters().num_vacancies; - // num_initializers = state.counters().num_initializers; - // } - // else if constexpr (M == MemSpace::device) - // { num_vacancies = state.sync_get_counters().num_vacancies; num_initializers = state.sync_get_counters().num_initializers; - // } // Store the IDs of the vacant track slots for (auto tid : range(TrackSlotId{num_vacancies})) @@ -236,8 +228,6 @@ TYPED_TEST_SUITE(TrackInitTest, MemspaceTypes, MemspaceTypeString); //! Test that we can add more primaries than the first allocation TYPED_TEST(TrackInitTest, add_more_primaries) { - // if constexpr (TestFixture::M == MemSpace::device) - // { this->build_states(16); EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); @@ -248,20 +238,6 @@ TYPED_TEST(TrackInitTest, add_more_primaries) primaries = this->make_primaries(32); this->extend_from_primaries(make_span(primaries)); EXPECT_EQ(54, this->state().sync_get_counters().num_initializers); - // } - // else if constexpr (TestFixture::M == MemSpace::host) - // { - // this->build_states(16); - // EXPECT_EQ(0, this->state().counters().num_initializers); - - // auto primaries = this->make_primaries(22); - // this->extend_from_primaries(make_span(primaries)); - // EXPECT_EQ(22, this->state().counters().num_initializers); - - // primaries = this->make_primaries(32); - // this->extend_from_primaries(make_span(primaries)); - // EXPECT_EQ(54, this->state().counters().num_initializers); - // } } //! Test that we can add more primaries than the first allocation @@ -274,14 +250,7 @@ TYPED_TEST(TrackInitTest, extend_primaries) auto primaries = this->make_primaries(2); this->insert_primaries(this->state(), make_span(primaries)); RunResult::from_state(this->state()); - // if constexpr (TestFixture::M == MemSpace::device) - // { EXPECT_EQ(0, this->state().sync_get_counters().num_initializers); - // } - // else if constexpr (TestFixture::M == MemSpace::host) - // { - // EXPECT_EQ(0, this->state().counters().num_initializers); - // } } { // Now initialize after adding @@ -447,19 +416,10 @@ TYPED_TEST(TrackInitTest, primaries) // Find vacancies and create track initializers from secondaries extend_from_secondaries.step(*this->core(), this->state()); - // if constexpr (TestFixture::M == MemSpace::device) - // { EXPECT_EQ(i * num_tracks / 2, this->state().sync_get_counters().num_initializers); EXPECT_EQ(num_tracks / 2, this->state().sync_get_counters().num_vacancies); - // } - // else if constexpr (TestFixture::M == MemSpace::host) - // { - // EXPECT_EQ(i * num_tracks / 2, - // this->state().counters().num_initializers); - // EXPECT_EQ(num_tracks / 2, this->state().counters().num_vacancies); - // } } // Check the results @@ -517,15 +477,8 @@ TYPED_TEST(TrackInitTest, extend_from_secondaries) // Create track initializers on device from primary particles auto primaries = this->make_primaries(num_primaries); this->extend_from_primaries(make_span(primaries)); - // if constexpr (TestFixture::M == MemSpace::device) - // { EXPECT_EQ(num_primaries, this->state().sync_get_counters().num_initializers); - // } - // else if constexpr (TestFixture::M == MemSpace::host) - // { - // EXPECT_EQ(num_primaries, this->state().counters().num_initializers); - // } auto apply_actions = [&actions, this] { for (auto const& ea_interface : actions) { From f2457622953148b849581a8ad6402a6796f3477a Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 17 Jan 2026 09:45:42 -0500 Subject: [PATCH 20/74] Update StepDiagnostic example --- example/offload-template/src/StepDiagnostic.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/offload-template/src/StepDiagnostic.cc b/example/offload-template/src/StepDiagnostic.cc index d266fe49a3..838ad473f7 100644 --- a/example/offload-template/src/StepDiagnostic.cc +++ b/example/offload-template/src/StepDiagnostic.cc @@ -149,7 +149,7 @@ void StepDiagnostic::step(CoreParams const& params, CoreStateHost& state) const auto& step_state = state.aux_data(aux_id_); // Accumulate counters - this->accum_counters(state.counters(), step_state.host_data); + this->accum_counters(state.sync_get_counters(), step_state.host_data); // Create a functor that gathers data from a single track slot auto execute = make_active_track_executor( From 46b1d0f1d260393ffc636479735a1740dadf33a8 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 17 Jan 2026 09:51:55 -0500 Subject: [PATCH 21/74] Update StepDiagnostic device example --- example/offload-template/src/StepDiagnostic.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/offload-template/src/StepDiagnostic.cu b/example/offload-template/src/StepDiagnostic.cu index f8dccefdbb..a8e30d1768 100644 --- a/example/offload-template/src/StepDiagnostic.cu +++ b/example/offload-template/src/StepDiagnostic.cu @@ -28,7 +28,7 @@ void StepDiagnostic::step(CoreParams const& params, CoreStateDevice& state) cons auto& step_state = state.aux_data(aux_id_); // Accumulate counters - this->accum_counters(state.counters(), step_state.host_data); + this->accum_counters(state.sync_get_counters(), step_state.host_data); // Create a functor that gathers data from a single track slot auto execute = make_active_track_executor( From 01113670db44039dcba7d01232db0fb2937129aa Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 19 Jan 2026 22:18:20 -0500 Subject: [PATCH 22/74] Remove unnecessary header to resolve ROCm compiler issue --- src/celeritas/global/CoreState.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/celeritas/global/CoreState.cc b/src/celeritas/global/CoreState.cc index 756c634044..4322ea9a62 100644 --- a/src/celeritas/global/CoreState.cc +++ b/src/celeritas/global/CoreState.cc @@ -6,9 +6,6 @@ //---------------------------------------------------------------------------// #include "CoreState.hh" -#if CELER_USE_DEVICE -# include "corecel/data/ObserverPtr.device.hh" -#endif #include "corecel/io/Logger.hh" #include "corecel/sys/ActionRegistry.hh" #include "corecel/sys/ScopedProfiling.hh" From c07dc858afeabd9dafe55c8ab28f72e990d0710e Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 21 Jan 2026 14:32:08 -0500 Subject: [PATCH 23/74] Resolve PR Feedback and minor code cleanup --- src/celeritas/track/ExtendFromPrimariesAction.cc | 6 ++---- src/celeritas/track/ExtendFromPrimariesAction.cu | 2 +- .../track/ExtendFromSecondariesAction.cc | 5 +---- .../track/ExtendFromSecondariesAction.cu | 4 +--- src/celeritas/track/InitializeTracksAction.cc | 2 ++ .../track/detail/ProcessPrimariesExecutor.hh | 6 +++--- .../track/detail/ProcessSecondariesExecutor.hh | 15 ++++++++------- src/celeritas/track/detail/TrackInitAlgorithms.cu | 15 ++++++--------- test/celeritas/track/TrackInit.test.cc | 9 +++------ 9 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index 5c89484e48..305fc5d9b5 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -208,10 +208,8 @@ void ExtendFromPrimariesAction::process_primaries( PrimaryStateData const& pstate) const { auto primaries = pstate.primaries(); - detail::ProcessPrimariesExecutor execute{params.ptr(), - state.ptr(), - state.sync_get_counters(), - primaries}; + detail::ProcessPrimariesExecutor execute{ + params.ptr(), state.ptr(), primaries}; return launch_action(*this, primaries.size(), params, state, execute); } diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index 85aa7d051d..3e1ea93f50 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -26,7 +26,7 @@ void ExtendFromPrimariesAction::process_primaries( auto primaries = pstate.primaries(); auto counters = state.sync_get_counters(); detail::ProcessPrimariesExecutor execute_thread{ - params.ptr(), state.ptr(), counters, primaries}; + params.ptr(), state.ptr(), primaries}; static ActionLauncher const launch_kernel(*this); if (!primaries.empty()) { diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cc b/src/celeritas/track/ExtendFromSecondariesAction.cc index 55a77eb91e..139b98aa99 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cc +++ b/src/celeritas/track/ExtendFromSecondariesAction.cc @@ -96,7 +96,6 @@ void ExtendFromSecondariesAction::step_impl(CoreParams const& core_params, // Launch a kernel to create track initializers from secondaries counters.num_alive = core_state.size() - counters.num_vacancies; core_state.sync_put_counters(counters); - this->process_secondaries(core_params, core_state); } @@ -123,9 +122,7 @@ void ExtendFromSecondariesAction::process_secondaries( { //! \todo Wrap with a regular track executor but without remapping slots? detail::ProcessSecondariesExecutor execute{ - core_params.ptr(), - core_state.ptr(), - core_state.sync_get_counters()}; + core_params.ptr(), core_state.ptr()}; launch_action(*this, core_params, core_state, execute); } diff --git a/src/celeritas/track/ExtendFromSecondariesAction.cu b/src/celeritas/track/ExtendFromSecondariesAction.cu index 75d6eb951d..5d501d24ae 100644 --- a/src/celeritas/track/ExtendFromSecondariesAction.cu +++ b/src/celeritas/track/ExtendFromSecondariesAction.cu @@ -62,9 +62,7 @@ void ExtendFromSecondariesAction::process_secondaries( using Executor = detail::ProcessSecondariesExecutor; static ActionLauncher launch(*this, "process-secondaries"); launch(core_state, - Executor{core_params.ptr(), - core_state.ptr(), - core_state.sync_get_counters()}); + Executor{core_params.ptr(), core_state.ptr()}); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index 0c91bb60e9..a14930fc62 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -56,6 +56,7 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreState& core_state) const { auto counters = core_state.sync_get_counters(); + // The number of new tracks to initialize is the smaller of the number of // empty slots in the track vector and the number of track initializers size_type num_new_tracks @@ -74,6 +75,7 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, num_new_tracks, core_state.stream_id()); } + // Launch a kernel to initialize tracks this->step_impl(core_params, core_state, num_new_tracks); diff --git a/src/celeritas/track/detail/ProcessPrimariesExecutor.hh b/src/celeritas/track/detail/ProcessPrimariesExecutor.hh index 4af48caf45..e8d856f733 100644 --- a/src/celeritas/track/detail/ProcessPrimariesExecutor.hh +++ b/src/celeritas/track/detail/ProcessPrimariesExecutor.hh @@ -38,7 +38,6 @@ struct ProcessPrimariesExecutor ParamsPtr params; StatePtr state; - CoreStateCounters counters; Span primaries; @@ -55,7 +54,8 @@ struct ProcessPrimariesExecutor CELER_FUNCTION void ProcessPrimariesExecutor::operator()(ThreadId tid) const { CELER_EXPECT(tid < primaries.size()); - CELER_EXPECT(primaries.size() <= counters.num_initializers + tid.get()); + auto counters = state->init.counters.data().get(); + CELER_EXPECT(primaries.size() <= counters->num_initializers + tid.get()); Primary const& primary = primaries[tid.unchecked_get()]; @@ -73,7 +73,7 @@ CELER_FUNCTION void ProcessPrimariesExecutor::operator()(ThreadId tid) const ti.particle.energy = primary.energy; // Store the initializer - size_type idx = counters.num_initializers - primaries.size() + tid.get(); + size_type idx = counters->num_initializers - primaries.size() + tid.get(); state->init.initializers[ItemId(idx)] = ti; } diff --git a/src/celeritas/track/detail/ProcessSecondariesExecutor.hh b/src/celeritas/track/detail/ProcessSecondariesExecutor.hh index d1b2f5bb64..f8c7d88025 100644 --- a/src/celeritas/track/detail/ProcessSecondariesExecutor.hh +++ b/src/celeritas/track/detail/ProcessSecondariesExecutor.hh @@ -38,7 +38,6 @@ struct ProcessSecondariesExecutor ParamsPtr params; StatePtr state; - CoreStateCounters counters; //// FUNCTIONS //// @@ -77,8 +76,9 @@ ProcessSecondariesExecutor::operator()(TrackSlotId tid) const // Offset in the vector of track initializers auto& data = state->init; - CELER_ASSERT(data.secondary_counts[tid] <= counters.num_secondaries); - size_type offset = counters.num_secondaries - data.secondary_counts[tid]; + auto counters = state->init.counters.data().get(); + CELER_ASSERT(data.secondary_counts[tid] <= counters->num_secondaries); + size_type offset = counters->num_secondaries - data.secondary_counts[tid]; // Save the parent ID since it will be overwritten if a secondary is // initialized in this slot @@ -129,10 +129,11 @@ ProcessSecondariesExecutor::operator()(TrackSlotId tid) const } else { - CELER_ASSERT(offset > 0 && offset <= counters.num_initializers); + CELER_ASSERT(offset > 0 + && offset <= counters->num_initializers); - if (offset <= min(counters.num_secondaries, - counters.num_vacancies) + if (offset <= min(counters->num_secondaries, + counters->num_vacancies) && (params->init.track_order != TrackOrder::init_charge || sim.status() == TrackStatus::alive)) { @@ -147,7 +148,7 @@ ProcessSecondariesExecutor::operator()(TrackSlotId tid) const // Store the track initializer data.initializers[ItemId{ - counters.num_initializers - offset}] + counters->num_initializers - offset}] = ti; --offset; diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cu b/src/celeritas/track/detail/TrackInitAlgorithms.cu index 9f1168b9fc..5121e7432c 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cu +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cu @@ -90,12 +90,9 @@ void remove_if_alive( // New size of the vacancy vector host_counters.num_vacancies = end - start; - copy_bytes(MemSpace::device, - counters.get(), - MemSpace::host, - &host_counters, - sizeof(CoreStateCounters), - stream_id); + Copier copy{{counters.get(), 1}, + stream_id}; + copy(MemSpace::host, {&host_counters, 1}); stream.sync(); return; #else @@ -213,8 +210,10 @@ void partition_initializers( // Partition the indices based on the track initializer charge auto start = device_pointer_cast(init.indices.data()); auto end = start + count; + auto counters = device_pointer_cast(init.counters.data()); + auto cpucntrs = ItemCopier{stream_id}(counters.get()); auto stencil = static_cast(init.initializers.data()) - + init.counters.num_initializers - count; + + cpucntrs.num_initializers - count; thrust::stable_partition( thrust_execute_on(stream_id), start, @@ -262,8 +261,6 @@ void partition_initializers( // because the indices are always sequential from zero auto start = thrust::make_counting_iterator(0); auto data = device_pointer_cast(init.indices.data()); - // Allocate storage for the number of neutral tracks (unused by celeritas) - DeviceVector num_neutral{1, stream_id}; auto cub_error_code = cub::DevicePartition::Flagged(nullptr, temp_storage_bytes, diff --git a/test/celeritas/track/TrackInit.test.cc b/test/celeritas/track/TrackInit.test.cc index 7ca2b9295d..8c3b8750dd 100644 --- a/test/celeritas/track/TrackInit.test.cc +++ b/test/celeritas/track/TrackInit.test.cc @@ -61,19 +61,16 @@ RunResult RunResult::from_state(CoreState& state) // Copy track initializer data to host HostVal data; data = state.ref().init; - size_type num_vacancies, num_initializers; - - num_vacancies = state.sync_get_counters().num_vacancies; - num_initializers = state.sync_get_counters().num_initializers; // Store the IDs of the vacant track slots - for (auto tid : range(TrackSlotId{num_vacancies})) + for (auto tid : range(TrackSlotId{state.sync_get_counters().num_vacancies})) { result.vacancies.push_back(id_to_int(data.vacancies[tid])); } // Store the track IDs of the initializers - for (auto init_id : range(ItemId{num_initializers})) + for (auto init_id : range(ItemId{ + state.sync_get_counters().num_initializers})) { auto const& init = data.initializers[init_id]; result.init_ids.push_back(id_to_int(init.sim.track_id)); From 843926e1cf61b168fb83ce6d25d5570e4d73a032 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 25 Jan 2026 17:45:55 -0500 Subject: [PATCH 24/74] Initial move of counters to device. Some tests not passing yet --- src/accel/LocalOpticalOffload.cc | 4 +- src/celeritas/optical/CoreState.cc | 55 +++++++++++++++++-- src/celeritas/optical/CoreState.hh | 27 +++++---- src/celeritas/optical/Runner.cc | 4 +- src/celeritas/optical/TrackInitData.hh | 17 +++++- src/celeritas/optical/Transporter.cc | 5 +- .../optical/action/LocateVacanciesAction.cc | 3 +- .../optical/detail/OpticalLaunchAction.cc | 2 +- .../optical/gen/DirectGeneratorAction.cc | 16 +++--- .../optical/gen/DirectGeneratorAction.cu | 10 ++-- src/celeritas/optical/gen/GeneratorAction.cc | 9 ++- src/celeritas/optical/gen/GeneratorAction.cu | 7 +-- src/celeritas/optical/gen/GeneratorBase.hh | 3 +- src/celeritas/optical/gen/OffloadAction.cc | 4 +- .../optical/gen/PrimaryGeneratorAction.cc | 17 +++--- .../optical/gen/PrimaryGeneratorAction.cu | 7 +-- .../gen/detail/DirectGeneratorExecutor.hh | 6 +- .../optical/gen/detail/GeneratorExecutor.hh | 17 +++--- .../gen/detail/PrimaryGeneratorExecutor.hh | 17 +++--- src/celeritas/track/TrackInitData.hh | 4 +- 20 files changed, 154 insertions(+), 80 deletions(-) diff --git a/src/accel/LocalOpticalOffload.cc b/src/accel/LocalOpticalOffload.cc index 74efecc902..df688c4c60 100644 --- a/src/accel/LocalOpticalOffload.cc +++ b/src/accel/LocalOpticalOffload.cc @@ -186,7 +186,9 @@ void LocalOpticalOffload::Flush() // Copy the buffered distributions to device generate_->insert(*state_, make_span(buffer_)); - state_->counters().num_pending += num_photons_; + auto counters = state_->sync_get_counters(); + counters.num_pending += num_photons_; + state_->sync_put_counters(counters); num_photons_ = 0; buffer_.clear(); diff --git a/src/celeritas/optical/CoreState.cc b/src/celeritas/optical/CoreState.cc index 0abc663d35..cedf8210f3 100644 --- a/src/celeritas/optical/CoreState.cc +++ b/src/celeritas/optical/CoreState.cc @@ -46,7 +46,9 @@ CoreState::CoreState(CoreParams const& params, states_ = CollectionStateStore( params.host_ref(), stream_id, num_track_slots); - this->counters().num_vacancies = num_track_slots; + auto counters = this->sync_get_counters(); + counters.num_vacancies = num_track_slots; + this->sync_put_counters(counters); if constexpr (M == MemSpace::device) { @@ -83,7 +85,7 @@ template bool CoreState::warming_up() const { CELER_NOT_IMPLEMENTED("warming up"); - return this->counters().num_active == 0; + return this->sync_get_counters().num_active == 0; } //---------------------------------------------------------------------------// @@ -98,6 +100,50 @@ void CoreState::insert_primaries(Span) CELER_NOT_IMPLEMENTED("primary insertion"); } +//---------------------------------------------------------------------------// +/*! + * Copy the core state counters from the device to the host. For host-only + * code, the counters reside on the host, so this just returns a + * CoreStateCounters object. Note that it does not return a reference, so + * sync_put_counters() must be used if any counters change. + */ +template +CoreStateCounters CoreState::sync_get_counters() const +{ + auto* counters + = static_cast(this->ref().init.counters.data()); + CELER_ASSERT(counters); + if constexpr (M == MemSpace::device) + { + auto result + = ItemCopier{this->stream_id()}(counters); + device().stream(this->stream_id()).sync(); + return result; + } + return *counters; +} + +//---------------------------------------------------------------------------// +/*! + * Copy the core state counters from the host to the device. For host-only + * code, this function copies a CoreStateCounter object into the CoreState + * object, which is needed when any of the counters change, because + * sync_get_counters() doesn't return a reference. + */ +template +void CoreState::sync_put_counters(CoreStateCounters const& host_counters) +{ + auto* counters + = static_cast(this->ref().init.counters.data()); + CELER_ASSERT(counters); + Copier copy{{counters, 1}, this->stream_id()}; + copy(MemSpace::host, {&host_counters, 1}); + if constexpr (M == MemSpace::device) + { + device().stream(this->stream_id()).sync(); + } +} + //---------------------------------------------------------------------------// /*! * Reset the state data. @@ -109,8 +155,9 @@ void CoreState::insert_primaries(Span) template void CoreState::reset() { - this->counters() = CoreStateCounters{}; - this->counters().num_vacancies = this->size(); + auto counters = CoreStateCounters{}; + counters.num_vacancies = this->size(); + sync_put_counters(counters); // Reset all the track slots to inactive fill(TrackStatus::inactive, &this->ref().sim.status); diff --git a/src/celeritas/optical/CoreState.hh b/src/celeritas/optical/CoreState.hh index 5e109cc0b6..3551c2461a 100644 --- a/src/celeritas/optical/CoreState.hh +++ b/src/celeritas/optical/CoreState.hh @@ -47,8 +47,14 @@ class CoreStateInterface : public AuxStateInterface //! Thread/stream ID virtual StreamId stream_id() const = 0; - //! Access track initialization counters - virtual CoreStateCounters const& counters() const = 0; + //! Synchronize and copy track initialization counters from device to host + //! For host-only code, this replaces the old counters() function + [[nodiscard]] virtual CoreStateCounters sync_get_counters() const = 0; + + //! Synchronize and copy track initialization counters from host to device + //! For host-only code, this replaces the old counters() function + //! since we return a CoreStateCounters object instead of a reference + virtual void sync_put_counters(CoreStateCounters const&) = 0; //! Reseed the RNGs at the start of an event for reproducibility virtual void reseed(std::shared_ptr, UniqueEventId) = 0; @@ -79,12 +85,6 @@ class CoreStateBase : public CoreStateInterface //!@} public: - //! Track initialization counters - CoreStateCounters& counters() { return counters_; } - - //! Track initialization counters - CoreStateCounters const& counters() const final { return counters_; } - //! Optical loop statistics OpticalAccumStats const& accum() const { return accum_; } @@ -106,9 +106,6 @@ class CoreStateBase : public CoreStateInterface ~CoreStateBase() override; private: - // Counters for track initialization and activity - CoreStateCounters counters_; - //! Counts accumulated over the event for diagnostics OpticalAccumStats accum_; @@ -154,6 +151,14 @@ class CoreState final : public CoreStateBase //! Number of track slots size_type size() const final { return states_.size(); } + //! Synchronize and copy track initialization counters from device to host + [[nodiscard]] CoreStateCounters sync_get_counters() const final; + + //! Synchronize and copy track initialization counters from host to device + //! For host-only code, this copies the local CoreStateCounters back to the + //! class, since sync_get_counters() doesn't return a reference + void sync_put_counters(CoreStateCounters const&) final; + // Whether the state is being transported with no active particles bool warming_up() const; diff --git a/src/celeritas/optical/Runner.cc b/src/celeritas/optical/Runner.cc index 557d806e40..ca54bae49b 100644 --- a/src/celeritas/optical/Runner.cc +++ b/src/celeritas/optical/Runner.cc @@ -115,10 +115,12 @@ auto Runner::operator()(OffloadGeneratorData data) -> Result * for some run modes, e.g. offloading distributions through accel where we * already know the number of pending tracks. */ + auto counters = state_->sync_get_counters(); for (auto const& d : data) { - state_->counters().num_pending += d.num_photons; + counters.num_pending += d.num_photons; } + state_->sync_put_counters(counters); // Generate optical photons and transport to completion (*problem_.transporter)(*state_); diff --git a/src/celeritas/optical/TrackInitData.hh b/src/celeritas/optical/TrackInitData.hh index 407179d9ff..e64b620797 100644 --- a/src/celeritas/optical/TrackInitData.hh +++ b/src/celeritas/optical/TrackInitData.hh @@ -14,6 +14,7 @@ #include "corecel/sys/Device.hh" #include "corecel/sys/ThreadId.hh" #include "celeritas/Types.hh" +#include "celeritas/track/CoreStateCounters.hh" #include "TrackInitializer.hh" @@ -30,6 +31,8 @@ namespace optical * capacity. * - \c vacancies stores the \c TrackSlotid of the tracks that have been * killed; the size will be <= the number of track states. + * - \c counters stores the number of tracks with a given status and is updated + * during each step of the simulation of an event. */ template struct TrackInitStateData @@ -38,17 +41,24 @@ struct TrackInitStateData template using StateItems = StateCollection; + template + using Items = Collection; //// DATA //// StateItems vacancies; + // Maintain the counters here to allow device-resident computation with + // synchronization between host and device only at the end of a step or + // when explicitly requested, such as in the tests + Items counters; + //// METHODS //// //! Whether the data are assigned explicit CELER_FUNCTION operator bool() const { - return !vacancies.empty(); + return !vacancies.empty() && !counters.empty(); } //! Assign from another set of data @@ -57,6 +67,7 @@ struct TrackInitStateData { CELER_EXPECT(other); vacancies = other.vacancies; + counters = other.counters; return *this; } }; @@ -79,6 +90,10 @@ void resize(TrackInitStateData* data, resize(&data->vacancies, size); fill_sequence(&data->vacancies, stream); + // Initialize the counters for the step to zero + resize(&data->counters, 1); + fill(CoreStateCounters{}, &data->counters); + CELER_ENSURE(*data); } diff --git a/src/celeritas/optical/Transporter.cc b/src/celeritas/optical/Transporter.cc index 701f83fab0..463707d46d 100644 --- a/src/celeritas/optical/Transporter.cc +++ b/src/celeritas/optical/Transporter.cc @@ -64,7 +64,7 @@ void Transporter::transport_impl(CoreState& state) const size_type num_step_iters{0}; size_type num_steps{0}; - auto const& counters = state.counters(); + auto counters = state.sync_get_counters(); // Store a pointer to aux data for timing results std::vector* accum_time = nullptr; @@ -111,6 +111,9 @@ void Transporter::transport_impl(CoreState& state) const state.reset(); break; } + // No longer have a reference to the counters, so need to retrieve the + // updated values + counters = state.sync_get_counters(); } // Update statistics diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cc b/src/celeritas/optical/action/LocateVacanciesAction.cc index fecc5c0266..20fa9020e8 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cc +++ b/src/celeritas/optical/action/LocateVacanciesAction.cc @@ -51,7 +51,7 @@ void LocateVacanciesAction::step(CoreParams const&, CoreStateDevice& state) cons template void LocateVacanciesAction::step_impl(CoreState& state) const { - auto& counters = state.counters(); + auto counters = state.sync_get_counters(); // Compact the IDs of the inactive tracks, getting the sorted indices of // the empty slots @@ -59,6 +59,7 @@ void LocateVacanciesAction::step_impl(CoreState& state) const state.ref().sim.status, state.ref().init.vacancies, state.stream_id()); counters.num_alive = state.size() - counters.num_vacancies; + state.sync_put_counters(counters); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/detail/OpticalLaunchAction.cc b/src/celeritas/optical/detail/OpticalLaunchAction.cc index 1dcc73b86f..4dea6be4a0 100644 --- a/src/celeritas/optical/detail/OpticalLaunchAction.cc +++ b/src/celeritas/optical/detail/OpticalLaunchAction.cc @@ -135,7 +135,7 @@ void OpticalLaunchAction::execute_impl(CoreParams const&, CELER_ASSERT(state.size() > 0); auto const core_counters = core_state.sync_get_counters(); - auto const& counters = state.counters(); + auto const counters = state.sync_get_counters(); if ((counters.num_pending < data_.auto_flush && (core_counters.num_alive > 0 || core_counters.num_initializers > 0)) diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index 1137d0e70f..22c0f37c22 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -144,7 +144,9 @@ void DirectGeneratorAction::insert_impl(CoreState& state, // Update counters and copy distributions to aux state storage aux_state.counters.buffer_size = data.size(); aux_state.counters.num_pending = data.size(); - state.counters().num_pending += data.size(); + auto counters = state.sync_get_counters(); + counters.num_pending += data.size(); + state.sync_put_counters(counters); Copier copy_to_aux{aux_state.initializers(), state.stream_id()}; @@ -184,7 +186,7 @@ void DirectGeneratorAction::step_impl(CoreParams const& params, = get>(*state.aux(), this->aux_id()); auto& counters = aux_state.counters; - if (state.counters().num_vacancies > 0 && counters.num_pending > 0) + if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) { // Generate the optical photons from the distribution data this->generate(params, state); @@ -213,14 +215,12 @@ void DirectGeneratorAction::generate(CoreParams const& params, auto& aux_state = get>( *state.aux(), this->aux_id()); - size_type num_gen - = min(state.counters().num_vacancies, aux_state.counters.num_pending); + size_type num_gen = min(state.sync_get_counters().num_vacancies, + aux_state.counters.num_pending); // Generate optical photons in vacant track slots - detail::DirectGeneratorExecutor execute{params.ptr(), - state.ptr(), - aux_state.store.ref(), - state.counters()}; + detail::DirectGeneratorExecutor execute{ + params.ptr(), state.ptr(), aux_state.store.ref()}; launch_action(num_gen, execute); } diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cu b/src/celeritas/optical/gen/DirectGeneratorAction.cu index b173c8fed5..5f46af8a0f 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cu +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cu @@ -29,13 +29,11 @@ void DirectGeneratorAction::generate(CoreParams const& params, auto& aux_state = get>( *state.aux(), this->aux_id()); - size_type num_gen - = min(state.counters().num_vacancies, aux_state.counters.num_pending); + size_type num_gen = min(state.sync_get_counters().num_vacancies, + aux_state.counters.num_pending); - detail::DirectGeneratorExecutor execute{params.ptr(), - state.ptr(), - aux_state.store.ref(), - state.counters()}; + detail::DirectGeneratorExecutor execute{ + params.ptr(), state.ptr(), aux_state.store.ref()}; static ActionLauncher const launch(*this); launch(num_gen, state.stream_id(), execute); } diff --git a/src/celeritas/optical/gen/GeneratorAction.cc b/src/celeritas/optical/gen/GeneratorAction.cc index 1454fdc4d7..1c15fe6810 100644 --- a/src/celeritas/optical/gen/GeneratorAction.cc +++ b/src/celeritas/optical/gen/GeneratorAction.cc @@ -202,7 +202,7 @@ void GeneratorAction::step_impl(CoreParams const& params, state.stream_id()); } - if (state.counters().num_vacancies > 0 && counters.num_pending > 0) + if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) { // Generate the optical photons from the distribution data this->generate(params, state); @@ -232,8 +232,8 @@ void GeneratorAction::generate(CoreParams const& params, auto& aux_state = get>(*state.aux(), this->aux_id()); - size_type num_gen - = min(state.counters().num_vacancies, aux_state.counters.num_pending); + size_type num_gen = min(state.sync_get_counters().num_vacancies, + aux_state.counters.num_pending); { // Generate optical photons in vacant track slots detail::GeneratorExecutor execute{params.ptr(), @@ -241,8 +241,7 @@ void GeneratorAction::generate(CoreParams const& params, params.host_ref().cherenkov, params.host_ref().scintillation, aux_state.store.ref(), - aux_state.counters.buffer_size, - state.counters()}; + aux_state.counters.buffer_size}; launch_action(num_gen, execute); } { diff --git a/src/celeritas/optical/gen/GeneratorAction.cu b/src/celeritas/optical/gen/GeneratorAction.cu index 93263d2ea8..697cc0746e 100644 --- a/src/celeritas/optical/gen/GeneratorAction.cu +++ b/src/celeritas/optical/gen/GeneratorAction.cu @@ -38,8 +38,8 @@ void GeneratorAction::generate(CoreParams const& params, auto& aux_state = get>(*state.aux(), this->aux_id()); - size_type num_gen - = min(state.counters().num_vacancies, aux_state.counters.num_pending); + size_type num_gen = min(state.sync_get_counters().num_vacancies, + aux_state.counters.num_pending); { // Generate optical photons in vacant track slots detail::GeneratorExecutor execute{params.ptr(), @@ -47,8 +47,7 @@ void GeneratorAction::generate(CoreParams const& params, params.device_ref().cherenkov, params.device_ref().scintillation, aux_state.store.ref(), - aux_state.counters.buffer_size, - state.counters()}; + aux_state.counters.buffer_size}; static ActionLauncher const launch(*this); launch(num_gen, state.stream_id(), execute); } diff --git a/src/celeritas/optical/gen/GeneratorBase.hh b/src/celeritas/optical/gen/GeneratorBase.hh index 6ce5c412a4..09c26d7330 100644 --- a/src/celeritas/optical/gen/GeneratorBase.hh +++ b/src/celeritas/optical/gen/GeneratorBase.hh @@ -91,7 +91,7 @@ void GeneratorBase::update_counters(optical::CoreState& state) const { CELER_EXPECT(state.aux()); - auto& counters = state.counters(); + auto counters = state.sync_get_counters(); auto& gen_counters = this->counters(*state.aux()); // Calculate the number of new tracks generated at this step @@ -111,6 +111,7 @@ void GeneratorBase::update_counters(optical::CoreState& state) const // Update the number of active tracks. This must be done even if no new // tracks were generated counters.num_active = state.size() - counters.num_vacancies; + state.sync_put_counters(counters); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/OffloadAction.cc b/src/celeritas/optical/gen/OffloadAction.cc index 51dd357628..0edfbb4224 100644 --- a/src/celeritas/optical/gen/OffloadAction.cc +++ b/src/celeritas/optical/gen/OffloadAction.cc @@ -111,8 +111,10 @@ void OffloadAction::step_impl(CoreParams const& core_params, // distributions created in this step auto& optical_state = get>(core_state.aux(), data_.optical_id); - optical_state.counters().num_pending += detail::count_num_photons( + auto counters = optical_state.sync_get_counters(); + counters.num_pending += detail::count_num_photons( buffer, start, buffer_size, core_state.stream_id()); + optical_state.sync_put_counters(counters); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc index dafea685eb..384be5e8cf 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc @@ -135,7 +135,9 @@ void PrimaryGeneratorAction::insert_impl(optical::CoreState& state) const auto& aux_state = this->counters(*state.aux()); aux_state.counters.num_pending = data_.num_photons; - state.counters().num_pending += data_.num_photons; + auto counters = state.sync_get_counters(); + counters.num_pending += data_.num_photons; + state.sync_put_counters(counters); } //---------------------------------------------------------------------------// @@ -150,7 +152,7 @@ void PrimaryGeneratorAction::step_impl(CoreParams const& params, auto const& counters = this->counters(*state.aux()).counters; - if (state.counters().num_vacancies > 0 && counters.num_pending > 0) + if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) { // Generate the optical photons from the distribution data this->generate(params, state); @@ -170,15 +172,12 @@ void PrimaryGeneratorAction::generate(CoreParams const& params, CELER_EXPECT(state.aux()); auto const& aux_state = this->counters(*state.aux()); - size_type num_gen - = min(state.counters().num_vacancies, aux_state.counters.num_pending); + size_type num_gen = min(state.sync_get_counters().num_vacancies, + aux_state.counters.num_pending); // Generate optical photons in vacant track slots - detail::PrimaryGeneratorExecutor execute{params.ptr(), - state.ptr(), - data_, - params_.host_ref(), - state.counters()}; + detail::PrimaryGeneratorExecutor execute{ + params.ptr(), state.ptr(), data_, params_.host_ref()}; launch_action(num_gen, execute); } diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cu b/src/celeritas/optical/gen/PrimaryGeneratorAction.cu index c13a7c742a..a65ea1c77f 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cu +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cu @@ -31,15 +31,14 @@ void PrimaryGeneratorAction::generate(CoreParams const& params, CELER_EXPECT(state.aux()); auto const& aux_state = this->counters(*state.aux()); - size_type num_gen - = min(state.counters().num_vacancies, aux_state.counters.num_pending); + size_type num_gen = min(state.sync_get_counters().num_vacancies, + aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::PrimaryGeneratorExecutor execute{params.ptr(), state.ptr(), data_, - params_.device_ref(), - state.counters()}; + params_.device_ref()}; static ActionLauncher const launch(*this); launch(num_gen, state.stream_id(), execute); } diff --git a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh index ee1e49d99c..7c065a5536 100644 --- a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh @@ -31,7 +31,6 @@ struct DirectGeneratorExecutor CRefPtr params; RefPtr state; NativeRef const data; - CoreStateCounters counters; // Initialize optical photons inline CELER_FUNCTION void operator()(TrackSlotId tid) const; @@ -52,16 +51,17 @@ CELER_FUNCTION void DirectGeneratorExecutor::operator()(TrackSlotId tid) const CELER_EXPECT(params); CELER_EXPECT(state); + auto counters = state->init.counters.data().get(); // Create view to new track to be initialized CoreTrackView vacancy(*params, *state, [&] { TrackSlotId idx{ - index_before(counters.num_vacancies, ThreadId(tid.get()))}; + index_before(counters->num_vacancies, ThreadId(tid.get()))}; return state->init.vacancies[idx]; }()); // Get initializer from the back TrackInitializer const& init = data.initializers[ItemId( - index_before(counters.num_pending, ThreadId(tid.get())))]; + index_before(counters->num_pending, ThreadId(tid.get())))]; // Initialize track vacancy = init; diff --git a/src/celeritas/optical/gen/detail/GeneratorExecutor.hh b/src/celeritas/optical/gen/detail/GeneratorExecutor.hh index e0afce578d..6a0f1cddc5 100644 --- a/src/celeritas/optical/gen/detail/GeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/GeneratorExecutor.hh @@ -38,7 +38,6 @@ struct GeneratorExecutor NativeCRef const scintillation; NativeRef const offload; size_type buffer_size{}; - CoreStateCounters counters; //// FUNCTIONS //// @@ -64,6 +63,7 @@ CELER_FUNCTION void GeneratorExecutor::operator()(TrackSlotId tid) const using DistId = ItemId; CoreTrackView track(*params, *state, tid); + auto counters = state->init.counters.data().get(); // Find the index of the first distribution that has a nonzero number of // primaries left to generate @@ -86,13 +86,14 @@ CELER_FUNCTION void GeneratorExecutor::operator()(TrackSlotId tid) const CELER_ASSERT(dist); // Create the view to the new track to be initialized - CoreTrackView vacancy{*params, *state, [&] { - // Get the vacancy from the back in case there - // are more vacancies than photons to generate - TrackSlotId idx{index_before( - counters.num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there + // are more vacancies than photons to generate + TrackSlotId idx{ + index_before(counters->num_vacancies, ThreadId(tid.get()))}; + return state->init.vacancies[idx]; + }()}; // Generate one track from the distribution auto rng = track.rng(); diff --git a/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh index 2d3d6c4969..5b0b51a58e 100644 --- a/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh @@ -35,7 +35,6 @@ struct PrimaryGeneratorExecutor RefPtr state; PrimaryDistributionData data; NativeCRef distributions; - CoreStateCounters counters; //// FUNCTIONS //// @@ -61,15 +60,17 @@ CELER_FUNCTION void PrimaryGeneratorExecutor::operator()(TrackSlotId tid) const CELER_EXPECT(distributions); CoreTrackView track(*params, *state, tid); + auto counters = state->init.counters.data().get(); // Create the view to the new track to be initialized - CoreTrackView vacancy{*params, *state, [&] { - // Get the vacancy from the back in case there - // are more vacancies than photons to generate - TrackSlotId idx{index_before( - counters.num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there + // are more vacancies than photons to generate + TrackSlotId idx{ + index_before(counters->num_vacancies, ThreadId(tid.get()))}; + return state->init.vacancies[idx]; + }()}; // Generate one primary from the distribution auto rng = track.rng(); diff --git a/src/celeritas/track/TrackInitData.hh b/src/celeritas/track/TrackInitData.hh index 32a17628ed..8b2c32de71 100644 --- a/src/celeritas/track/TrackInitData.hh +++ b/src/celeritas/track/TrackInitData.hh @@ -95,7 +95,7 @@ struct TrackInitializer * (with one remainder at the end for storing the accumulated number of * secondaries). * - \c counters stores the number of tracks with a given status and is updated - * during each step of the simulation of the event. + * during each step of the simulation of an event. */ template struct TrackInitStateData @@ -120,7 +120,7 @@ struct TrackInitStateData // CoreStateCounters) Items initializers; - // Maintain the counters here to allow GPU-resident computation with + // Maintain the counters here to allow device-resident computation with // synchronization between host and device only at the end of a step or // when explicitly requested, such as in the tests Items counters; From f6acaf8bf1b24be2da65943a5f1b4bebfad0b7ec Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 25 Jan 2026 20:36:33 -0500 Subject: [PATCH 25/74] Retrieve updated counters earlier to resolve most failing tests --- src/celeritas/optical/Transporter.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/celeritas/optical/Transporter.cc b/src/celeritas/optical/Transporter.cc index 463707d46d..a610a91955 100644 --- a/src/celeritas/optical/Transporter.cc +++ b/src/celeritas/optical/Transporter.cc @@ -93,6 +93,9 @@ void Transporter::transport_impl(CoreState& state) const } } + // No longer have a reference to the counters, so need to retrieve the + // updated values + counters = state.sync_get_counters(); num_steps += counters.num_active; if (CELER_UNLIKELY(++num_step_iters == this->params()->sim()->max_step_iters())) @@ -111,9 +114,6 @@ void Transporter::transport_impl(CoreState& state) const state.reset(); break; } - // No longer have a reference to the counters, so need to retrieve the - // updated values - counters = state.sync_get_counters(); } // Update statistics From 266b90a5465e04cd50e93063001374247a5dad29 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 26 Jan 2026 13:53:37 -0500 Subject: [PATCH 26/74] Resolve compiler warning on unused variable --- src/celeritas/track/ExtendFromPrimariesAction.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index 3e1ea93f50..c528dbc3e5 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -24,7 +24,6 @@ void ExtendFromPrimariesAction::process_primaries( PrimaryStateData const& pstate) const { auto primaries = pstate.primaries(); - auto counters = state.sync_get_counters(); detail::ProcessPrimariesExecutor execute_thread{ params.ptr(), state.ptr(), primaries}; static ActionLauncher const launch_kernel(*this); From 2e51bc141f4f30ea037f5c1eceec9f2aaf980d55 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 26 Jan 2026 18:53:40 -0500 Subject: [PATCH 27/74] Empty commit to trigger CI From 01215be73f56b277d296e2bc88be02fcf79fa87b Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 28 Jan 2026 10:31:28 -0500 Subject: [PATCH 28/74] Don't need to sync counters in optical CoreState constructor --- src/celeritas/optical/CoreState.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/optical/CoreState.cc b/src/celeritas/optical/CoreState.cc index cedf8210f3..c7ec1aa517 100644 --- a/src/celeritas/optical/CoreState.cc +++ b/src/celeritas/optical/CoreState.cc @@ -46,7 +46,7 @@ CoreState::CoreState(CoreParams const& params, states_ = CollectionStateStore( params.host_ref(), stream_id, num_track_slots); - auto counters = this->sync_get_counters(); + auto counters = CoreStateCounters{}; counters.num_vacancies = num_track_slots; this->sync_put_counters(counters); From 3bbbead081315f90af20c417c790cb84d9789fe3 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 6 Feb 2026 16:43:03 -0500 Subject: [PATCH 29/74] Add SetPending executor class --- src/celeritas/global/detail/SetPending.cc | 45 ++++++++++++++ src/celeritas/global/detail/SetPending.cu | 37 ++++++++++++ src/celeritas/global/detail/SetPending.hh | 72 +++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 src/celeritas/global/detail/SetPending.cc create mode 100644 src/celeritas/global/detail/SetPending.cu create mode 100644 src/celeritas/global/detail/SetPending.hh diff --git a/src/celeritas/global/detail/SetPending.cc b/src/celeritas/global/detail/SetPending.cc new file mode 100644 index 0000000000..e0b3442f11 --- /dev/null +++ b/src/celeritas/global/detail/SetPending.cc @@ -0,0 +1,45 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetPending.cc +//---------------------------------------------------------------------------// +#include "SetPending.hh" + +#include "corecel/Assert.hh" +#include "corecel/Types.hh" + +#include "../ActionLauncher.hh" +#include "../CoreParams.hh" +#include "../CoreState.hh" + +namespace celeritas +{ +namespace detail +{ +//---------------------------------------------------------------------------// +/*! + * Reset the num_pending counter to the number of generated primaries. + */ +void set_pending(CoreParams const& params, + CoreState& state, + size_type num_primaries) +{ + SetPendingExecutor execute_thread{ + params.ptr(), state.ptr(), num_primaries}; + launch_core(1, "set-pending", params, state, execute_thread); +} + +//---------------------------------------------------------------------------// +// DEVICE-DISABLED IMPLEMENTATION +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +void set_pending(CoreParams const&, CoreState&, size_type) +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas diff --git a/src/celeritas/global/detail/SetPending.cu b/src/celeritas/global/detail/SetPending.cu new file mode 100644 index 0000000000..52074f9022 --- /dev/null +++ b/src/celeritas/global/detail/SetPending.cu @@ -0,0 +1,37 @@ +//------------------------------ -*- cuda -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetPending.cu +//---------------------------------------------------------------------------// +#include "SetPending.hh" + +#include "corecel/Assert.hh" +#include "corecel/Types.hh" + +#include "../ActionLauncher.device.hh" +#include "../CoreParams.hh" +#include "../CoreState.hh" + +namespace celeritas +{ +namespace detail +{ +//---------------------------------------------------------------------------// +/*! + * Reset the num_pending counter to the number of generated primaries. + */ +void set_pending(CoreParams const& params, + CoreState& state, + size_type num_primaries) +{ + SetPendingExecutor execute_thread{ + params.ptr(), state.ptr(), num_primaries}; + static ActionLauncher const launch_kernel( + "set-pending"); + launch_kernel(1, state, execute_thread); +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas diff --git a/src/celeritas/global/detail/SetPending.hh b/src/celeritas/global/detail/SetPending.hh new file mode 100644 index 0000000000..f0dc4c599c --- /dev/null +++ b/src/celeritas/global/detail/SetPending.hh @@ -0,0 +1,72 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetPending.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Macros.hh" +#include "celeritas/Types.hh" + +#include "../CoreState.hh" + +namespace celeritas +{ +//---------------------------------------------------------------------------// +class CoreParams; +template +class CoreState; + +namespace detail +{ +//---------------------------------------------------------------------------// +// LAUNCHER +//---------------------------------------------------------------------------// +/*! + * Reset the num_pending counter based on the number of primaries. + */ +struct SetPendingExecutor +{ + //// DATA //// + + CRefPtr params; + RefPtr state; + size_type primaries; + + //// FUNCTIONS //// + + // Set num_pending to the number of of primaries waiting to be generated + CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); +}; + +//---------------------------------------------------------------------------// + +void set_pending(CoreParams const& params, + CoreState& state, + size_type num_primaries); +void set_pending(CoreParams const& params, + CoreState& state, + size_type num_primaries); + +//---------------------------------------------------------------------------// +// INLINE DEFINITIONS +//---------------------------------------------------------------------------// +/*! + * Set num_pending to the number of primaries waiting to be generated. + */ +CELER_FORCEINLINE_FUNCTION void SetPendingExecutor::operator()(ThreadId tid) +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should call with only one thread + CELER_EXPECT(primaries > 0); + + auto counters = state->init.counters.data().get(); + counters->num_pending = primaries; + return; +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas From 00297ef3e89a1bdde4760dcbb9a022253dc3e560 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 8 Feb 2026 15:50:21 -0500 Subject: [PATCH 30/74] Allow zero primaries --- src/celeritas/global/detail/SetPending.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/celeritas/global/detail/SetPending.hh b/src/celeritas/global/detail/SetPending.hh index f0dc4c599c..76d2f82fde 100644 --- a/src/celeritas/global/detail/SetPending.hh +++ b/src/celeritas/global/detail/SetPending.hh @@ -60,7 +60,6 @@ CELER_FORCEINLINE_FUNCTION void SetPendingExecutor::operator()(ThreadId tid) CELER_EXPECT(params); CELER_EXPECT(state); CELER_EXPECT(tid.get() == 0); // Should call with only one thread - CELER_EXPECT(primaries > 0); auto counters = state->init.counters.data().get(); counters->num_pending = primaries; From 3a40fcb1d360e6757fc4f886021e159dea0cfcf6 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Thu, 12 Feb 2026 13:58:39 -0500 Subject: [PATCH 31/74] Update Stepper functors to remove core state counter synchronization --- src/celeritas/CMakeLists.txt | 2 +- src/celeritas/global/Stepper.cc | 11 ++-- src/celeritas/global/detail/SetPending.cc | 45 -------------- src/celeritas/global/detail/SetPending.cu | 37 ------------ src/celeritas/global/detail/SetPending.hh | 71 ----------------------- 5 files changed, 5 insertions(+), 161 deletions(-) delete mode 100644 src/celeritas/global/detail/SetPending.cc delete mode 100644 src/celeritas/global/detail/SetPending.cu delete mode 100644 src/celeritas/global/detail/SetPending.hh diff --git a/src/celeritas/CMakeLists.txt b/src/celeritas/CMakeLists.txt index 7320f9aeb4..c5d33914d5 100644 --- a/src/celeritas/CMakeLists.txt +++ b/src/celeritas/CMakeLists.txt @@ -384,7 +384,7 @@ celeritas_polysource(em/model/SeltzerBergerModel) celeritas_polysource(em/model/CoulombScatteringModel) celeritas_polysource(geo/detail/BoundaryAction) celeritas_polysource(global/detail/KillActive) -celeritas_polysource(global/detail/SetPending) +celeritas_polysource(global/detail/SetGenerated) celeritas_polysource(global/detail/TrackSlotUtils) celeritas_polysource(mucf/model/DTMixMucfModel) celeritas_polysource(neutron/model/ChipsNeutronElasticModel) diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index 39ea00769f..fb0de7ae46 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -22,7 +22,7 @@ #include "CoreParams.hh" #include "detail/KillActive.hh" -#include "detail/SetPending.hh" +#include "detail/SetGenerated.hh" namespace celeritas { @@ -129,11 +129,10 @@ template auto Stepper::operator()() -> result_type { ScopedProfiling profile_this{"step"}; - auto counters = state_->sync_get_counters(); - counters.num_generated = 0; - state_->sync_put_counters(counters); + // Initialize the num_generated counter to zero + detail::set_generated(*params_, *state_); actions_->step(*params_, *state_); - counters = state_->sync_get_counters(); + auto counters = state_->sync_get_counters(); // Get the number of track initializers and active tracks result_type result; @@ -167,8 +166,6 @@ auto Stepper::operator()(SpanConstPrimary primaries) -> result_type << "event number " << max_id->event_id.unchecked_get() << " exceeds max_events=" << params_->init()->max_events()); - // Reset the num_pending counter to the number of primaries - detail::set_pending(*params_, *state_, primaries.size()); primaries_action_->insert(*params_, *state_, primaries); return (*this)(); diff --git a/src/celeritas/global/detail/SetPending.cc b/src/celeritas/global/detail/SetPending.cc deleted file mode 100644 index e0b3442f11..0000000000 --- a/src/celeritas/global/detail/SetPending.cc +++ /dev/null @@ -1,45 +0,0 @@ -//------------------------------- -*- C++ -*- -------------------------------// -// Copyright Celeritas contributors: see top-level COPYRIGHT file for details -// SPDX-License-Identifier: (Apache-2.0 OR MIT) -//---------------------------------------------------------------------------// -//! \file celeritas/global/detail/SetPending.cc -//---------------------------------------------------------------------------// -#include "SetPending.hh" - -#include "corecel/Assert.hh" -#include "corecel/Types.hh" - -#include "../ActionLauncher.hh" -#include "../CoreParams.hh" -#include "../CoreState.hh" - -namespace celeritas -{ -namespace detail -{ -//---------------------------------------------------------------------------// -/*! - * Reset the num_pending counter to the number of generated primaries. - */ -void set_pending(CoreParams const& params, - CoreState& state, - size_type num_primaries) -{ - SetPendingExecutor execute_thread{ - params.ptr(), state.ptr(), num_primaries}; - launch_core(1, "set-pending", params, state, execute_thread); -} - -//---------------------------------------------------------------------------// -// DEVICE-DISABLED IMPLEMENTATION -//---------------------------------------------------------------------------// -#if !CELER_USE_DEVICE -void set_pending(CoreParams const&, CoreState&, size_type) -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} -#endif - -//---------------------------------------------------------------------------// -} // namespace detail -} // namespace celeritas diff --git a/src/celeritas/global/detail/SetPending.cu b/src/celeritas/global/detail/SetPending.cu deleted file mode 100644 index 52074f9022..0000000000 --- a/src/celeritas/global/detail/SetPending.cu +++ /dev/null @@ -1,37 +0,0 @@ -//------------------------------ -*- cuda -*- -------------------------------// -// Copyright Celeritas contributors: see top-level COPYRIGHT file for details -// SPDX-License-Identifier: (Apache-2.0 OR MIT) -//---------------------------------------------------------------------------// -//! \file celeritas/global/detail/SetPending.cu -//---------------------------------------------------------------------------// -#include "SetPending.hh" - -#include "corecel/Assert.hh" -#include "corecel/Types.hh" - -#include "../ActionLauncher.device.hh" -#include "../CoreParams.hh" -#include "../CoreState.hh" - -namespace celeritas -{ -namespace detail -{ -//---------------------------------------------------------------------------// -/*! - * Reset the num_pending counter to the number of generated primaries. - */ -void set_pending(CoreParams const& params, - CoreState& state, - size_type num_primaries) -{ - SetPendingExecutor execute_thread{ - params.ptr(), state.ptr(), num_primaries}; - static ActionLauncher const launch_kernel( - "set-pending"); - launch_kernel(1, state, execute_thread); -} - -//---------------------------------------------------------------------------// -} // namespace detail -} // namespace celeritas diff --git a/src/celeritas/global/detail/SetPending.hh b/src/celeritas/global/detail/SetPending.hh deleted file mode 100644 index 76d2f82fde..0000000000 --- a/src/celeritas/global/detail/SetPending.hh +++ /dev/null @@ -1,71 +0,0 @@ -//------------------------------- -*- C++ -*- -------------------------------// -// Copyright Celeritas contributors: see top-level COPYRIGHT file for details -// SPDX-License-Identifier: (Apache-2.0 OR MIT) -//---------------------------------------------------------------------------// -//! \file celeritas/global/detail/SetPending.hh -//---------------------------------------------------------------------------// -#pragma once - -#include "corecel/Macros.hh" -#include "celeritas/Types.hh" - -#include "../CoreState.hh" - -namespace celeritas -{ -//---------------------------------------------------------------------------// -class CoreParams; -template -class CoreState; - -namespace detail -{ -//---------------------------------------------------------------------------// -// LAUNCHER -//---------------------------------------------------------------------------// -/*! - * Reset the num_pending counter based on the number of primaries. - */ -struct SetPendingExecutor -{ - //// DATA //// - - CRefPtr params; - RefPtr state; - size_type primaries; - - //// FUNCTIONS //// - - // Set num_pending to the number of of primaries waiting to be generated - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); -}; - -//---------------------------------------------------------------------------// - -void set_pending(CoreParams const& params, - CoreState& state, - size_type num_primaries); -void set_pending(CoreParams const& params, - CoreState& state, - size_type num_primaries); - -//---------------------------------------------------------------------------// -// INLINE DEFINITIONS -//---------------------------------------------------------------------------// -/*! - * Set num_pending to the number of primaries waiting to be generated. - */ -CELER_FORCEINLINE_FUNCTION void SetPendingExecutor::operator()(ThreadId tid) -{ - CELER_EXPECT(params); - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should call with only one thread - - auto counters = state->init.counters.data().get(); - counters->num_pending = primaries; - return; -} - -//---------------------------------------------------------------------------// -} // namespace detail -} // namespace celeritas From 59cb44aa5a534c74aca921b062b38dbd14fddc63 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Thu, 12 Feb 2026 14:00:08 -0500 Subject: [PATCH 32/74] Update Stepper functors to remove core state counter synchronization --- src/celeritas/global/detail/SetGenerated.cc | 43 +++++++++++++ src/celeritas/global/detail/SetGenerated.cu | 35 +++++++++++ src/celeritas/global/detail/SetGenerated.hh | 67 +++++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 src/celeritas/global/detail/SetGenerated.cc create mode 100644 src/celeritas/global/detail/SetGenerated.cu create mode 100644 src/celeritas/global/detail/SetGenerated.hh diff --git a/src/celeritas/global/detail/SetGenerated.cc b/src/celeritas/global/detail/SetGenerated.cc new file mode 100644 index 0000000000..8af43196df --- /dev/null +++ b/src/celeritas/global/detail/SetGenerated.cc @@ -0,0 +1,43 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetGenerated.cc +//---------------------------------------------------------------------------// +#include "SetGenerated.hh" + +#include "corecel/Assert.hh" +#include "corecel/Types.hh" + +#include "../ActionLauncher.hh" +#include "../CoreParams.hh" +#include "../CoreState.hh" + +namespace celeritas +{ +namespace detail +{ +//---------------------------------------------------------------------------// +/*! + * Reset the num_pending counter to the number of generated primaries. + */ +void set_generated(CoreParams const& params, CoreState& state) +{ + SetGeneratedExecutor execute_thread{params.ptr(), + state.ptr()}; + launch_core(1, "set-generated", params, state, execute_thread); +} + +//---------------------------------------------------------------------------// +// DEVICE-DISABLED IMPLEMENTATION +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +void set_generated(CoreParams const&, CoreState&, size_type) +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas diff --git a/src/celeritas/global/detail/SetGenerated.cu b/src/celeritas/global/detail/SetGenerated.cu new file mode 100644 index 0000000000..9fb99b30ae --- /dev/null +++ b/src/celeritas/global/detail/SetGenerated.cu @@ -0,0 +1,35 @@ +//------------------------------ -*- cuda -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetGenerated.cu +//---------------------------------------------------------------------------// +#include "SetGenerated.hh" + +#include "corecel/Assert.hh" +#include "corecel/Types.hh" + +#include "../ActionLauncher.device.hh" +#include "../CoreParams.hh" +#include "../CoreState.hh" + +namespace celeritas +{ +namespace detail +{ +//---------------------------------------------------------------------------// +/*! + * Reset the num_pending counter to the number of generated primaries. + */ +void set_generated(CoreParams const& params, CoreState& state) +{ + SetGeneratedExecutor execute_thread{params.ptr(), + state.ptr()}; + static ActionLauncher const launch_kernel( + "set-generated"); + launch_kernel(1, state, execute_thread); +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas diff --git a/src/celeritas/global/detail/SetGenerated.hh b/src/celeritas/global/detail/SetGenerated.hh new file mode 100644 index 0000000000..820068431e --- /dev/null +++ b/src/celeritas/global/detail/SetGenerated.hh @@ -0,0 +1,67 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetGenerated.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Macros.hh" +#include "celeritas/Types.hh" + +#include "../CoreState.hh" + +namespace celeritas +{ +//---------------------------------------------------------------------------// +class CoreParams; +template +class CoreState; + +namespace detail +{ +//---------------------------------------------------------------------------// +// LAUNCHER +//---------------------------------------------------------------------------// +/*! + * // Initialize the num_generated counter to zero. + */ +struct SetGeneratedExecutor +{ + //// DATA //// + + CRefPtr params; + RefPtr state; + + //// FUNCTIONS //// + + // Initialize the num_generated counter to zero + CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); +}; + +//---------------------------------------------------------------------------// + +void set_generated(CoreParams const& params, CoreState& state); +void set_generated(CoreParams const& params, + CoreState& state); + +//---------------------------------------------------------------------------// +// INLINE DEFINITIONS +//---------------------------------------------------------------------------// +/*! + * Initialize the num_generated counter to zero. + */ +CELER_FORCEINLINE_FUNCTION void SetGeneratedExecutor::operator()(ThreadId tid) +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should call with only one thread + + auto counters = state->init.counters.data().get(); + counters->num_generated = 0; + return; +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas From 5a5eb8b071f0f13036494626bd1faa32390e5063 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Thu, 12 Feb 2026 14:14:44 -0500 Subject: [PATCH 33/74] Remove extraneous parameter --- src/celeritas/global/detail/SetGenerated.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/detail/SetGenerated.cc b/src/celeritas/global/detail/SetGenerated.cc index 8af43196df..8fa7978c6d 100644 --- a/src/celeritas/global/detail/SetGenerated.cc +++ b/src/celeritas/global/detail/SetGenerated.cc @@ -32,7 +32,7 @@ void set_generated(CoreParams const& params, CoreState& state) // DEVICE-DISABLED IMPLEMENTATION //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -void set_generated(CoreParams const&, CoreState&, size_type) +void set_generated(CoreParams const&, CoreState&) { CELER_NOT_CONFIGURED("CUDA OR HIP"); } From 26cf3e53d4734278dcb20b24fe3d572058b4a0b4 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 16 Feb 2026 13:54:16 -0500 Subject: [PATCH 34/74] Use KernelLauncher instead of ActionLauncher --- src/celeritas/global/ActionLauncher.device.hh | 19 ------------------- src/celeritas/global/detail/SetGenerated.cu | 6 +++--- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/celeritas/global/ActionLauncher.device.hh b/src/celeritas/global/ActionLauncher.device.hh index 0d247435fa..a1179e638f 100644 --- a/src/celeritas/global/ActionLauncher.device.hh +++ b/src/celeritas/global/ActionLauncher.device.hh @@ -64,12 +64,6 @@ class ActionLauncher : public KernelLauncher // Launch a kernel for a thread range or number of threads using KernelLauncher::operator(); - // Launch a kernel for the wrapped executor with a specific number of - // threads - void operator()(size_type num_threads, - CoreState const& state, - F const& execute_thread) const; - // Launch a kernel for the wrapped executor void operator()(CoreState const& state, F const& execute_thread) const; @@ -102,19 +96,6 @@ ActionLauncher::ActionLauncher(StepActionT const& action, { } -//---------------------------------------------------------------------------// -/*! - * Launch a kernel for the wrapped executor with a specific number of threads. - */ -template -void ActionLauncher::operator()(size_type num_threads, - CoreState const& state, - F const& execute_thread) const -{ - return (*this)( - range(ThreadId{num_threads}), state.stream_id(), execute_thread); -} - //---------------------------------------------------------------------------// /*! * Launch a kernel for the wrapped executor. diff --git a/src/celeritas/global/detail/SetGenerated.cu b/src/celeritas/global/detail/SetGenerated.cu index 9fb99b30ae..ed8bbe7c37 100644 --- a/src/celeritas/global/detail/SetGenerated.cu +++ b/src/celeritas/global/detail/SetGenerated.cu @@ -8,8 +8,8 @@ #include "corecel/Assert.hh" #include "corecel/Types.hh" +#include "corecel/sys/KernelLauncher.device.hh" -#include "../ActionLauncher.device.hh" #include "../CoreParams.hh" #include "../CoreState.hh" @@ -25,9 +25,9 @@ void set_generated(CoreParams const& params, CoreState& state) { SetGeneratedExecutor execute_thread{params.ptr(), state.ptr()}; - static ActionLauncher const launch_kernel( + static KernelLauncher const launch_kernel( "set-generated"); - launch_kernel(1, state, execute_thread); + launch_kernel(1, state.stream_id(), execute_thread); } //---------------------------------------------------------------------------// From 9ce90bae0c3246e90c323bff4b3954d16036cd0f Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 2 Mar 2026 13:13:11 -0500 Subject: [PATCH 35/74] Minor Doxygen Comment Change --- src/celeritas/optical/TrackInitData.hh | 2 +- src/celeritas/track/TrackInitData.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/celeritas/optical/TrackInitData.hh b/src/celeritas/optical/TrackInitData.hh index e64b620797..da09f1fa50 100644 --- a/src/celeritas/optical/TrackInitData.hh +++ b/src/celeritas/optical/TrackInitData.hh @@ -29,7 +29,7 @@ namespace optical * - \c initializers stores the data for track initializers and secondaries * waiting to be turned into new tracks and can be any size up to \c * capacity. - * - \c vacancies stores the \c TrackSlotid of the tracks that have been + * - \c vacancies stores the \c TrackSlotId of the tracks that have been * killed; the size will be <= the number of track states. * - \c counters stores the number of tracks with a given status and is updated * during each step of the simulation of an event. diff --git a/src/celeritas/track/TrackInitData.hh b/src/celeritas/track/TrackInitData.hh index 8b2c32de71..b00e4501dd 100644 --- a/src/celeritas/track/TrackInitData.hh +++ b/src/celeritas/track/TrackInitData.hh @@ -87,7 +87,7 @@ struct TrackInitializer * \c max_events. * - \c initializers stores the data for primaries and secondaries waiting to * be turned into new tracks and can be any size up to \c capacity. - * - \c vacancies stores the \c TrackSlotid of the tracks that have been + * - \c vacancies stores the \c TrackSlotId of the tracks that have been * killed; the size will be <= the number of track states. * - \c track_counters stores the total number of particles that have been * created per event. From 37cca9b84c9767db70b9b493aa5038f7350ab2f6 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 2 Mar 2026 13:24:11 -0500 Subject: [PATCH 36/74] First try at absorbing copy_if_vacant return value into the function --- .../optical/action/LocateVacanciesAction.cc | 7 +- .../action/detail/TrackInitAlgorithms.cc | 15 +++-- .../action/detail/TrackInitAlgorithms.cu | 64 +++++++++---------- .../action/detail/TrackInitAlgorithms.hh | 21 +++--- 4 files changed, 55 insertions(+), 52 deletions(-) diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cc b/src/celeritas/optical/action/LocateVacanciesAction.cc index 20fa9020e8..cb4af058cb 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cc +++ b/src/celeritas/optical/action/LocateVacanciesAction.cc @@ -51,13 +51,12 @@ void LocateVacanciesAction::step(CoreParams const&, CoreStateDevice& state) cons template void LocateVacanciesAction::step_impl(CoreState& state) const { - auto counters = state.sync_get_counters(); - // Compact the IDs of the inactive tracks, getting the sorted indices of // the empty slots - counters.num_vacancies = detail::copy_if_vacant( - state.ref().sim.status, state.ref().init.vacancies, state.stream_id()); + detail::copy_if_vacant( + state.ref().sim.status, state.ref().init, state.stream_id()); + auto counters = state.sync_get_counters(); counters.num_alive = state.size() - counters.num_vacancies; state.sync_put_counters(counters); } diff --git a/src/celeritas/optical/action/detail/TrackInitAlgorithms.cc b/src/celeritas/optical/action/detail/TrackInitAlgorithms.cc index f61db0ef50..2cfc501f36 100644 --- a/src/celeritas/optical/action/detail/TrackInitAlgorithms.cc +++ b/src/celeritas/optical/action/detail/TrackInitAlgorithms.cc @@ -18,14 +18,14 @@ namespace detail * * \return Number of vacant track slots */ -size_type copy_if_vacant(TrackStatusRef const& status, - TrackSlotRef const& vacancies, - StreamId) +void copy_if_vacant(TrackStatusRef const& status, + TrackInitRef const& init, + StreamId) { - CELER_EXPECT(status.size() == vacancies.size()); + CELER_EXPECT(status.size() == init.vacancies.size()); auto* data = status.data().get(); - auto* result = vacancies.data().get(); + auto* result = init.vacancies.data().get(); size_type tid = 0; auto* const stop = data + status.size(); @@ -37,7 +37,10 @@ size_type copy_if_vacant(TrackStatusRef const& status, } ++tid; } - return result - vacancies.data().get(); + + auto counters = init.counters.data().get(); + counters->num_vacancies = result - init.vacancies.data().get(); + return; } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/action/detail/TrackInitAlgorithms.cu b/src/celeritas/optical/action/detail/TrackInitAlgorithms.cu index 8e7ab434e5..f0d62ea4af 100644 --- a/src/celeritas/optical/action/detail/TrackInitAlgorithms.cu +++ b/src/celeritas/optical/action/detail/TrackInitAlgorithms.cu @@ -64,44 +64,49 @@ struct TransformType * * \return Number of vacant track slots */ -size_type copy_if_vacant(TrackStatusRef const& status, - TrackSlotRef const& vacancies, - StreamId stream_id) +void copy_if_vacant(TrackStatusRef const& status, + TrackInitRef const& init, + StreamId stream_id) { - CELER_EXPECT(status.size() == vacancies.size()); + CELER_EXPECT(status.size() == init.vacancies.size()); ScopedProfiling profile_this{"copy-if-vacant"}; -#ifdef CELER_USE_THRUST auto start = thrust::make_transform_iterator( thrust::make_counting_iterator(0), TransformType{}); - auto result = device_pointer_cast(vacancies.data()); + auto result = device_pointer_cast(init.vacancies.data()); + auto counters = device_pointer_cast(init.counters.data()); +#ifdef CELER_USE_THRUST auto end = thrust::copy_if(thrust_execute_on(stream_id), start, - start + vacancies.size(), + start + init.vacancies.size(), device_pointer_cast(status.data()), result, IsVacant{}); CELER_DEVICE_API_CALL(PeekAtLastError()); - return end - result; + // New size of the vacancy vector + auto host_counters + = ItemCopier{stream_id}(counters.get()); + host_counters.num_vacancies = end - result; + Copier copy{{counters.get(), 1}, + stream_id}; + copy(MemSpace::host, {&host_counters, 1}); + stream.sync(); + return; #else auto& stream = device().stream(stream_id); - DeviceVector num_vacancies{1, stream_id}; - auto start = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), TransformType{}); # if CELER_CUB_HAS_FLAGGEDIF // Calling with nullptr causes the function to return the amount of working - // space needed instead of invoking the kernel. + // space needed instead of invoking the kernel size_t temp_storage_bytes = 0; auto flags = device_pointer_cast(status.data()); - auto results = device_pointer_cast(vacancies.data()); cub::DeviceSelect::FlaggedIf(nullptr, temp_storage_bytes, start, flags, - results, - num_vacancies.data(), - vacancies.size(), + result, + &(counters->num_vacancies), + init.vacancies.size(), IsVacant{}, stream.get()); // Allocate temporary storage @@ -110,9 +115,9 @@ size_type copy_if_vacant(TrackStatusRef const& status, temp_storage_bytes, start, flags, - results, - num_vacancies.data(), - vacancies.size(), + result, + &(counters->num_vacancies), + init.vacancies.size(), IsVacant{}, stream.get()); # else @@ -133,16 +138,15 @@ size_type copy_if_vacant(TrackStatusRef const& status, IsVacant{}); # endif // Calling with nullptr causes the function to return the amount of working - // space needed instead of invoking the kernel. + // space needed instead of invoking the kernel size_t temp_storage_bytes = 0; - auto results = device_pointer_cast(vacancies.data()); auto cub_error_code = cub::DeviceSelect::Flagged(nullptr, temp_storage_bytes, start, flags.data(), - results, - num_vacancies.data(), - vacancies.size(), + result, + &(counters->num_vacancies), + init.vacancies.size(), stream.get()); CELER_DISCARD(cub_error_code); // Allocate temporary storage @@ -151,18 +155,14 @@ size_type copy_if_vacant(TrackStatusRef const& status, temp_storage_bytes, start, flags.data(), - results, - num_vacancies.data(), - vacancies.size(), + result, + &(counters->num_vacancies), + init.vacancies.size(), stream.get()); CELER_DISCARD(cub_error_code); # endif CELER_DEVICE_API_CALL(PeekAtLastError()); - - auto result = ItemCopier{stream_id}(num_vacancies.data()); - - stream.sync(); - return result; + return; #endif } diff --git a/src/celeritas/optical/action/detail/TrackInitAlgorithms.hh b/src/celeritas/optical/action/detail/TrackInitAlgorithms.hh index 86f9cb276c..c5dfb205c1 100644 --- a/src/celeritas/optical/action/detail/TrackInitAlgorithms.hh +++ b/src/celeritas/optical/action/detail/TrackInitAlgorithms.hh @@ -11,6 +11,7 @@ #include "corecel/Types.hh" #include "corecel/data/Collection.hh" #include "celeritas/Types.hh" +#include "celeritas/optical/TrackInitData.hh" namespace celeritas { @@ -20,7 +21,7 @@ namespace detail { //---------------------------------------------------------------------------// template -using TrackSlotRef = StateCollection; +using TrackInitRef = TrackInitStateData; template using TrackStatusRef = StateCollection; @@ -36,20 +37,20 @@ struct IsVacant //---------------------------------------------------------------------------// // Compact the \c TrackSlotIds of the inactive tracks -size_type copy_if_vacant(TrackStatusRef const&, - TrackSlotRef const&, - StreamId); -size_type copy_if_vacant(TrackStatusRef const&, - TrackSlotRef const&, - StreamId); +void copy_if_vacant(TrackStatusRef const&, + TrackInitRef const&, + StreamId); +void copy_if_vacant(TrackStatusRef const&, + TrackInitRef const&, + StreamId); //---------------------------------------------------------------------------// // INLINE DEFINITIONS //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -inline size_type copy_if_vacant(TrackStatusRef const&, - TrackSlotRef const&, - StreamId) +inline void copy_if_vacant(TrackStatusRef const&, + TrackInitRef const&, + StreamId) { CELER_NOT_CONFIGURED("CUDA or HIP"); } From a51e6d69e0363e125388d5244306a5bcc489e14f Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 2 Mar 2026 18:47:19 -0500 Subject: [PATCH 37/74] Fix tests of new copy_if_vacant function --- test/celeritas/optical/OpticalUtils.test.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/celeritas/optical/OpticalUtils.test.cc b/test/celeritas/optical/OpticalUtils.test.cc index 1565954160..848b739ce8 100644 --- a/test/celeritas/optical/OpticalUtils.test.cc +++ b/test/celeritas/optical/OpticalUtils.test.cc @@ -16,6 +16,7 @@ #include "corecel/data/CollectionBuilder.hh" #include "corecel/data/Ref.hh" #include "corecel/math/Algorithms.hh" +#include "celeritas/optical/CoreState.hh" #include "celeritas/optical/action/detail/TrackInitAlgorithms.hh" #include "celeritas/optical/gen/detail/GeneratorAlgorithms.hh" @@ -42,19 +43,21 @@ std::vector locate_vacancies(std::vector const& input) StateVal host_status; make_builder(&host_status).insert_back(input.begin(), input.end()); StateVal status(host_status); + optical::TrackInitStateData init; - StateVal vacancies; - resize(&vacancies, status.size()); + resize(&init, StreamId{0}, host_status.size()); StateRef status_ref(status); - StateRef vacancies_ref(vacancies); - size_type num_vacancies = optical::detail::copy_if_vacant( - status_ref, vacancies_ref, StreamId{0}); + optical::TrackInitStateData init_ref; + init_ref = init; + optical::detail::copy_if_vacant(status_ref, init_ref, StreamId{0}); - auto host_vacancies = copy_to_host(vacancies); + auto host_vacancies = copy_to_host(init.vacancies); + auto host_counters_copy = copy_to_host(init.counters); + auto* host_counters = host_counters_copy.data().get(); std::vector result; - for (auto tid : range(TrackSlotId{num_vacancies})) + for (auto tid : range(TrackSlotId{host_counters->num_vacancies})) { result.push_back(static_cast(host_vacancies[tid].unchecked_get())); } From 066bc73b46f759462d0c3fa1626c598488929a5a Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 8 Mar 2026 21:50:48 -0400 Subject: [PATCH 38/74] Move Stepper calls and copy_if_vacant function to use functors --- src/celeritas/CMakeLists.txt | 1 + src/celeritas/global/Stepper.cc | 2 +- src/celeritas/global/detail/SetGenerated.cc | 5 +- src/celeritas/global/detail/SetGenerated.cu | 3 +- src/celeritas/global/detail/SetGenerated.hh | 67 ------------------- src/celeritas/optical/CoreState.hh | 1 + .../optical/action/LocateVacanciesAction.cc | 45 ++++++++++--- .../optical/action/LocateVacanciesAction.hh | 4 ++ 8 files changed, 47 insertions(+), 81 deletions(-) delete mode 100644 src/celeritas/global/detail/SetGenerated.hh diff --git a/src/celeritas/CMakeLists.txt b/src/celeritas/CMakeLists.txt index c5d33914d5..6bb496ab2d 100644 --- a/src/celeritas/CMakeLists.txt +++ b/src/celeritas/CMakeLists.txt @@ -395,6 +395,7 @@ celeritas_polysource(optical/model/RayleighModel) celeritas_polysource(optical/model/WavelengthShiftModel) celeritas_polysource(optical/action/AlongStepAction) celeritas_polysource(optical/action/DiscreteSelectAction) +celeritas_polysource(optical/action/LocateVacanciesAction) celeritas_polysource(optical/action/PreStepAction) celeritas_polysource(optical/action/TrackingCutAction) celeritas_polysource(optical/action/detail/TrackInitAlgorithms) diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index fb0de7ae46..cefcc87615 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -22,7 +22,7 @@ #include "CoreParams.hh" #include "detail/KillActive.hh" -#include "detail/SetGenerated.hh" +#include "detail/SetGeneratedExecutor.hh" namespace celeritas { diff --git a/src/celeritas/global/detail/SetGenerated.cc b/src/celeritas/global/detail/SetGenerated.cc index 8fa7978c6d..776c853656 100644 --- a/src/celeritas/global/detail/SetGenerated.cc +++ b/src/celeritas/global/detail/SetGenerated.cc @@ -4,11 +4,10 @@ //---------------------------------------------------------------------------// //! \file celeritas/global/detail/SetGenerated.cc //---------------------------------------------------------------------------// -#include "SetGenerated.hh" - #include "corecel/Assert.hh" #include "corecel/Types.hh" +#include "SetGeneratedExecutor.hh" #include "../ActionLauncher.hh" #include "../CoreParams.hh" #include "../CoreState.hh" @@ -32,7 +31,7 @@ void set_generated(CoreParams const& params, CoreState& state) // DEVICE-DISABLED IMPLEMENTATION //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -void set_generated(CoreParams const&, CoreState&) +void set_generated(CoreParams const&, CoreState&, size_type) { CELER_NOT_CONFIGURED("CUDA OR HIP"); } diff --git a/src/celeritas/global/detail/SetGenerated.cu b/src/celeritas/global/detail/SetGenerated.cu index ed8bbe7c37..01c92c6223 100644 --- a/src/celeritas/global/detail/SetGenerated.cu +++ b/src/celeritas/global/detail/SetGenerated.cu @@ -4,12 +4,11 @@ //---------------------------------------------------------------------------// //! \file celeritas/global/detail/SetGenerated.cu //---------------------------------------------------------------------------// -#include "SetGenerated.hh" - #include "corecel/Assert.hh" #include "corecel/Types.hh" #include "corecel/sys/KernelLauncher.device.hh" +#include "SetGeneratedExecutor.hh" #include "../CoreParams.hh" #include "../CoreState.hh" diff --git a/src/celeritas/global/detail/SetGenerated.hh b/src/celeritas/global/detail/SetGenerated.hh deleted file mode 100644 index 820068431e..0000000000 --- a/src/celeritas/global/detail/SetGenerated.hh +++ /dev/null @@ -1,67 +0,0 @@ -//------------------------------- -*- C++ -*- -------------------------------// -// Copyright Celeritas contributors: see top-level COPYRIGHT file for details -// SPDX-License-Identifier: (Apache-2.0 OR MIT) -//---------------------------------------------------------------------------// -//! \file celeritas/global/detail/SetGenerated.hh -//---------------------------------------------------------------------------// -#pragma once - -#include "corecel/Macros.hh" -#include "celeritas/Types.hh" - -#include "../CoreState.hh" - -namespace celeritas -{ -//---------------------------------------------------------------------------// -class CoreParams; -template -class CoreState; - -namespace detail -{ -//---------------------------------------------------------------------------// -// LAUNCHER -//---------------------------------------------------------------------------// -/*! - * // Initialize the num_generated counter to zero. - */ -struct SetGeneratedExecutor -{ - //// DATA //// - - CRefPtr params; - RefPtr state; - - //// FUNCTIONS //// - - // Initialize the num_generated counter to zero - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); -}; - -//---------------------------------------------------------------------------// - -void set_generated(CoreParams const& params, CoreState& state); -void set_generated(CoreParams const& params, - CoreState& state); - -//---------------------------------------------------------------------------// -// INLINE DEFINITIONS -//---------------------------------------------------------------------------// -/*! - * Initialize the num_generated counter to zero. - */ -CELER_FORCEINLINE_FUNCTION void SetGeneratedExecutor::operator()(ThreadId tid) -{ - CELER_EXPECT(params); - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should call with only one thread - - auto counters = state->init.counters.data().get(); - counters->num_generated = 0; - return; -} - -//---------------------------------------------------------------------------// -} // namespace detail -} // namespace celeritas diff --git a/src/celeritas/optical/CoreState.hh b/src/celeritas/optical/CoreState.hh index 66435df662..cc29ece801 100644 --- a/src/celeritas/optical/CoreState.hh +++ b/src/celeritas/optical/CoreState.hh @@ -9,6 +9,7 @@ #include "corecel/cont/Span.hh" #include "corecel/data/AuxInterface.hh" #include "corecel/data/AuxStateVec.hh" +#include "corecel/data/DeviceVector.hh" #include "corecel/data/ObserverPtr.hh" #include "corecel/data/StateDataStore.hh" #include "corecel/random/params/RngParamsFwd.hh" diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cc b/src/celeritas/optical/action/LocateVacanciesAction.cc index cb4af058cb..7bf96fb049 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cc +++ b/src/celeritas/optical/action/LocateVacanciesAction.cc @@ -11,7 +11,10 @@ #include "celeritas/optical/CoreParams.hh" #include "celeritas/optical/CoreState.hh" +#include "ActionLauncher.hh" + #include "detail/TrackInitAlgorithms.hh" +#include "detail/UpdateAliveExecutor.hh" namespace celeritas { @@ -30,23 +33,28 @@ LocateVacanciesAction::LocateVacanciesAction(ActionId aid) /*! * Execute the action with host data. */ -void LocateVacanciesAction::step(CoreParams const&, CoreStateHost& state) const +void LocateVacanciesAction::step(CoreParams const& params, + CoreStateHost& state) const { - return this->step_impl(state); + this->step_impl(state); + return this->update_alive(params, state, state.size()); } //---------------------------------------------------------------------------// /*! * Execute the action with device data. */ -void LocateVacanciesAction::step(CoreParams const&, CoreStateDevice& state) const +void LocateVacanciesAction::step(CoreParams const& params, + CoreStateDevice& state) const { - return this->step_impl(state); + this->step_impl(state); + return this->update_alive(params, state, state.size()); } //---------------------------------------------------------------------------// /*! - * Initialize optical track states. + * Compact the IDs of the inactive slots to find the vacancies and update the + * number of alive slots accordingly. */ template void LocateVacanciesAction::step_impl(CoreState& state) const @@ -55,11 +63,32 @@ void LocateVacanciesAction::step_impl(CoreState& state) const // the empty slots detail::copy_if_vacant( state.ref().sim.status, state.ref().init, state.stream_id()); +} - auto counters = state.sync_get_counters(); - counters.num_alive = state.size() - counters.num_vacancies; - state.sync_put_counters(counters); +//---------------------------------------------------------------------------// +/*! + * Update the number of alive slots as the empty slots have been compacted. + */ +void LocateVacanciesAction::update_alive(CoreParams const& params, + CoreStateHost& state, + size_type state_size) const +{ + detail::UpdateAliveExecutor execute_thread{ + params.ptr(), state.ptr(), state_size}; + launch_action(1, execute_thread); +} + +//---------------------------------------------------------------------------// +// INLINE DEFINITIONS +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +inline void LocateVacanciesAction::update_alive(CoreParams const&, + CoreStateDevice&, + size_type) const +{ + CELER_NOT_CONFIGURED("CUDA or HIP"); } +#endif //---------------------------------------------------------------------------// } // namespace optical diff --git a/src/celeritas/optical/action/LocateVacanciesAction.hh b/src/celeritas/optical/action/LocateVacanciesAction.hh index 3e66f8354e..c407440d78 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.hh +++ b/src/celeritas/optical/action/LocateVacanciesAction.hh @@ -8,6 +8,8 @@ #include "ActionInterface.hh" +#include "detail/UpdateAliveExecutor.hh" + namespace celeritas { namespace optical @@ -37,6 +39,8 @@ class LocateVacanciesAction final : public OpticalStepActionInterface, private: template void step_impl(CoreState&) const; + void update_alive(CoreParams const&, CoreStateHost&, size_type) const; + void update_alive(CoreParams const&, CoreStateDevice&, size_type) const; }; //---------------------------------------------------------------------------// From 40ba5073223fd375447227a7027f3c56afa608ca Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Thu, 12 Mar 2026 14:42:41 -0400 Subject: [PATCH 39/74] Add new functors and initial implementation of generating and transporting optical photons --- src/accel/LocalOpticalGenOffload.cc | 44 ++++++++---- src/accel/detail/UpdatePendingExecutor.hh | 64 ++++++++++++++++++ .../global/detail/SetGeneratedExecutor.hh | 67 +++++++++++++++++++ .../optical/action/LocateVacanciesAction.cu | 41 ++++++++++++ .../action/detail/UpdateAliveExecutor.hh | 64 ++++++++++++++++++ 5 files changed, 267 insertions(+), 13 deletions(-) create mode 100644 src/accel/detail/UpdatePendingExecutor.hh create mode 100644 src/celeritas/global/detail/SetGeneratedExecutor.hh create mode 100644 src/celeritas/optical/action/LocateVacanciesAction.cu create mode 100644 src/celeritas/optical/action/detail/UpdateAliveExecutor.hh diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index 7b9a3e4e75..99d18d8003 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -14,6 +14,9 @@ #include "corecel/sys/ActionRegistry.hh" #include "corecel/sys/ActionRegistryOutput.hh" #include "corecel/sys/Device.hh" +#include "corecel/sys/KernelLauncher.hh" +#include "accel/detail/UpdatePendingExecutor.hh" +// #include "corecel/sys/KernelLauncher.device.hh" #include "corecel/sys/ScopedProfiling.hh" #include "geocel/GeantUtils.hh" #include "celeritas/global/CoreParams.hh" @@ -108,17 +111,14 @@ void LocalOpticalGenOffload::InitializeEvent(int id) CELER_EXPECT(id >= 0); event_id_ = id_cast(id); - if constexpr (CELERITAS_RESEED == CELERITAS_RESEED_TRACKSLOT) + + if (!(G4Threading::IsMultithreadedApplication() + && G4MTRunManager::SeedOncePerCommunication())) { - if (!(G4Threading::IsMultithreadedApplication() - && G4MTRunManager::SeedOncePerCommunication())) - { - // Since Geant4 schedules events dynamically, reseed the Celeritas - // RNGs using the Geant4 event ID for reproducibility. This - // guarantees that an event can be reproduced given the event ID. - state_->reseed(transport_->params()->rng(), - id_cast(id)); - } + // Since Geant4 schedules events dynamically, reseed the Celeritas RNGs + // using the Geant4 event ID for reproducibility. This guarantees that + // an event can be reproduced given the event ID. + state_->reseed(transport_->params()->rng(), id_cast(id)); } } @@ -189,9 +189,27 @@ void LocalOpticalGenOffload::Flush() // Copy the buffered distributions to device generate_->insert(*state_, make_span(buffer_)); - auto counters = state_->sync_get_counters(); - counters.num_pending += num_photons_; - state_->sync_put_counters(counters); + // Update the number of primaries waiting to be generated based on the + // number of photons, using only one thread + auto const& optical_params = *transport_->params(); + // optical::detail::UpdatePendingExecutor execute_thread{ + // optical_params.ptr(), *s, num_photons_}; + if (celeritas::device()) + { + // auto* s = + // dynamic_cast*>(&(*state_)); + // optical::detail::UpdatePendingExecutor execute_thread{ + // optical_params.ptr(), s->ptr(), num_photons_}; + // launch_kernel(1, s->stream_id(), execute_thread); + } + else + { + auto* s = dynamic_cast*>(&(*state_)); + optical::detail::UpdatePendingExecutor execute_thread{ + optical_params.ptr(), s->ptr(), num_photons_}; + launch_kernel(1, execute_thread); + } + num_photons_ = 0; buffer_.clear(); diff --git a/src/accel/detail/UpdatePendingExecutor.hh b/src/accel/detail/UpdatePendingExecutor.hh new file mode 100644 index 0000000000..18c47785c6 --- /dev/null +++ b/src/accel/detail/UpdatePendingExecutor.hh @@ -0,0 +1,64 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file accel/detail/UpdatePendingExecutor.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Macros.hh" +#include "corecel/Types.hh" +#include "corecel/math/Algorithms.hh" +#include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/CoreTrackData.hh" +#include "celeritas/optical/CoreTrackView.hh" +#include "celeritas/track/Utils.hh" + +namespace celeritas +{ +namespace optical +{ +namespace detail +{ +//---------------------------------------------------------------------------// +// LAUNCHER +//---------------------------------------------------------------------------// +/*! + * Update the num_pending counter based on the generated photons from buffered + * optical distribution data. + */ +struct UpdatePendingExecutor +{ + //// DATA //// + + CRefPtr params; + RefPtr state; + size_type num_photons; + + //// FUNCTIONS //// + + // Update number of of primaries waiting to be generated + CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); +}; + +//---------------------------------------------------------------------------// +// INLINE DEFINITIONS +//---------------------------------------------------------------------------// +/*! + * Update number of primaries to be generated to include buffered optical + * photons. + */ +CELER_FORCEINLINE_FUNCTION void UpdatePendingExecutor::operator()(ThreadId tid) +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should call with only one thread + + auto counters = state->init.counters.data().get(); + counters->num_pending += num_photons; +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace optical +} // namespace celeritas diff --git a/src/celeritas/global/detail/SetGeneratedExecutor.hh b/src/celeritas/global/detail/SetGeneratedExecutor.hh new file mode 100644 index 0000000000..cf577c931b --- /dev/null +++ b/src/celeritas/global/detail/SetGeneratedExecutor.hh @@ -0,0 +1,67 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/global/detail/SetGeneratedExecutor.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Macros.hh" +#include "celeritas/Types.hh" + +#include "../CoreState.hh" + +namespace celeritas +{ +//---------------------------------------------------------------------------// +class CoreParams; +template +class CoreState; + +namespace detail +{ +//---------------------------------------------------------------------------// +// LAUNCHER +//---------------------------------------------------------------------------// +/*! + * // Initialize the num_generated counter to zero. + */ +struct SetGeneratedExecutor +{ + //// DATA //// + + CRefPtr params; + RefPtr state; + + //// FUNCTIONS //// + + // Initialize the num_generated counter to zero + CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); +}; + +//---------------------------------------------------------------------------// + +void set_generated(CoreParams const& params, CoreState& state); +void set_generated(CoreParams const& params, + CoreState& state); + +//---------------------------------------------------------------------------// +// INLINE DEFINITIONS +//---------------------------------------------------------------------------// +/*! + * Initialize the num_generated counter to zero. + */ +CELER_FORCEINLINE_FUNCTION void SetGeneratedExecutor::operator()(ThreadId tid) +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should call with only one thread + + auto counters = state->init.counters.data().get(); + counters->num_generated = 0; + return; +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cu b/src/celeritas/optical/action/LocateVacanciesAction.cu new file mode 100644 index 0000000000..7ef681d987 --- /dev/null +++ b/src/celeritas/optical/action/LocateVacanciesAction.cu @@ -0,0 +1,41 @@ +//------------------------------ -*- cuda -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/optical/action/LcateVacanciesAction.cu +//---------------------------------------------------------------------------// +#include "LocateVacanciesAction.hh" + +#include "corecel/Assert.hh" +#include "corecel/Macros.hh" +#include "corecel/Types.hh" +#include "corecel/sys/KernelLauncher.device.hh" + +#include "../CoreParams.hh" +#include "../CoreState.hh" + +#include "detail/TrackInitAlgorithms.hh" +#include "detail/UpdateAliveExecutor.hh" + +namespace celeritas +{ +namespace optical +{ +//---------------------------------------------------------------------------// +/*! + * Update the number of active slots as the empty slots have been compacted. + */ +void LocateVacanciesAction::update_alive(CoreParams const& params, + CoreStateDevice& state, + size_type state_size) const +{ + detail::UpdateAliveExecutor execute_thread{ + params.ptr(), state.ptr(), state_size}; + static KernelLauncher const launch_kernel( + "update-alive"); + launch_kernel(1, state.stream_id(), execute_thread); +} + +//---------------------------------------------------------------------------// +} // namespace optical +} // namespace celeritas diff --git a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh new file mode 100644 index 0000000000..c76bae04e2 --- /dev/null +++ b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh @@ -0,0 +1,64 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file optical/action/detail/UpdateAliveExecutor.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Macros.hh" +#include "corecel/Types.hh" +#include "corecel/math/Algorithms.hh" +#include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/CoreTrackData.hh" +#include "celeritas/optical/CoreTrackView.hh" +#include "celeritas/track/Utils.hh" + +namespace celeritas +{ +namespace optical +{ +namespace detail +{ +//---------------------------------------------------------------------------// +// LAUNCHER +//---------------------------------------------------------------------------// +/*! + * Update the num_alive counter based on the number of photons that are still + * alive after compacting vacancies. + */ +struct UpdateAliveExecutor +{ + //// DATA //// + + CRefPtr params; + RefPtr state; + size_type state_size; + + //// FUNCTIONS //// + + // Update number of photons that are still alive + CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); +}; + +//---------------------------------------------------------------------------// +// INLINE DEFINITIONS +//---------------------------------------------------------------------------// +/*! + * Update number of photons that are still alive after compacting vacancies. + */ +CELER_FORCEINLINE_FUNCTION void UpdateAliveExecutor::operator()(ThreadId tid) +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should be called with only one thread + + auto counters = state->init.counters.data().get(); + counters->num_alive = state_size - counters->num_vacancies; + CELER_ASSERT(state_size >= counters->num_vacancies); +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace optical +} // namespace celeritas From d6a13ba76881331c1784ba5f0b5c3a6bedbfaa77 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sat, 4 Apr 2026 16:50:59 -0400 Subject: [PATCH 40/74] Revised implementation of generating and transporting optical photons --- src/accel/CMakeLists.txt | 4 +- src/accel/LocalOpticalGenOffload.cc | 51 +++++++++++++---------- src/accel/LocalOpticalGenOffload.cu | 31 ++++++++++++++ src/accel/LocalOpticalGenOffload.hh | 9 ++++ src/accel/detail/UpdatePendingExecutor.hh | 2 +- 5 files changed, 73 insertions(+), 24 deletions(-) create mode 100644 src/accel/LocalOpticalGenOffload.cu diff --git a/src/accel/CMakeLists.txt b/src/accel/CMakeLists.txt index 80c5f6a21f..ddc1832ceb 100644 --- a/src/accel/CMakeLists.txt +++ b/src/accel/CMakeLists.txt @@ -47,7 +47,9 @@ list(APPEND SOURCES detail/IntegrationSingleton.cc ) -celeritas_polysource(ExceptionConverter) +celeritas_polysource(ExceptionConverter + LocalOpticalGenOffload +) if(Geant4_VERSION VERSION_GREATER_EQUAL 11.0) list(APPEND SOURCES diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index 99d18d8003..c6ac9f51e3 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -15,16 +15,13 @@ #include "corecel/sys/ActionRegistryOutput.hh" #include "corecel/sys/Device.hh" #include "corecel/sys/KernelLauncher.hh" -#include "accel/detail/UpdatePendingExecutor.hh" -// #include "corecel/sys/KernelLauncher.device.hh" #include "corecel/sys/ScopedProfiling.hh" #include "geocel/GeantUtils.hh" #include "celeritas/global/CoreParams.hh" -#include "celeritas/optical/CoreParams.hh" -#include "celeritas/optical/CoreState.hh" #include "celeritas/optical/Transporter.hh" #include "celeritas/optical/gen/GeneratorAction.hh" #include "celeritas/phys/GeneratorRegistry.hh" +#include "accel/detail/UpdatePendingExecutor.hh" #include "SetupOptions.hh" #include "SharedParams.hh" @@ -190,25 +187,10 @@ void LocalOpticalGenOffload::Flush() generate_->insert(*state_, make_span(buffer_)); // Update the number of primaries waiting to be generated based on the - // number of photons, using only one thread + // number of photons auto const& optical_params = *transport_->params(); - // optical::detail::UpdatePendingExecutor execute_thread{ - // optical_params.ptr(), *s, num_photons_}; - if (celeritas::device()) - { - // auto* s = - // dynamic_cast*>(&(*state_)); - // optical::detail::UpdatePendingExecutor execute_thread{ - // optical_params.ptr(), s->ptr(), num_photons_}; - // launch_kernel(1, s->stream_id(), execute_thread); - } - else - { - auto* s = dynamic_cast*>(&(*state_)); - optical::detail::UpdatePendingExecutor execute_thread{ - optical_params.ptr(), s->ptr(), num_photons_}; - launch_kernel(1, execute_thread); - } + auto* s = static_cast*>(&(*state_)); + update_primaries(optical_params, *s); num_photons_ = 0; buffer_.clear(); @@ -264,5 +246,30 @@ void LocalOpticalGenOffload::Finalize() CELER_ENSURE(!*this); } +//---------------------------------------------------------------------------// +/*! + * Call the UpdatePending functor to update the number of primaries to be + * generated to include the buffered optical photons; use only one host thread. + */ +void LocalOpticalGenOffload::update_primaries( + optical::CoreParams const& optical_params, + optical::CoreState& state) const +{ + optical::detail::UpdatePendingExecutor execute_thread{ + optical_params.ptr(), state.ptr(), num_photons_}; + launch_kernel(1, execute_thread); +} + +//---------------------------------------------------------------------------// +// DEVICE-DISABLED IMPLEMENTATION +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +inline void LocalOpticalGenOffload::update_primaries( + optical::CoreParams const&, optical::CoreState&) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/src/accel/LocalOpticalGenOffload.cu b/src/accel/LocalOpticalGenOffload.cu new file mode 100644 index 0000000000..6e7211afc9 --- /dev/null +++ b/src/accel/LocalOpticalGenOffload.cu @@ -0,0 +1,31 @@ +//------------------------------ -*- cuda -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file accel/LocalOpticalGenOffload.cu +//---------------------------------------------------------------------------// +#include "LocalOpticalGenOffload.hh" + +#include "corecel/sys/KernelLauncher.device.hh" +#include "accel/detail/UpdatePendingExecutor.hh" + +namespace celeritas +{ +//---------------------------------------------------------------------------// +/*! + * Call the UpdatePending functor to update number of primaries to be generated + * to include the buffered optical photons; use only one device thread. + */ +void LocalOpticalGenOffload::update_primaries( + optical::CoreParams const& optical_params, + optical::CoreState& state) const +{ + optical::detail::UpdatePendingExecutor execute_thread{ + optical_params.ptr(), state.ptr(), num_photons_}; + static KernelLauncher const launch_kernel( + "update-pending"); + launch_kernel(1, state.stream_id(), execute_thread); +} + +//---------------------------------------------------------------------------// +} // namespace celeritas diff --git a/src/accel/LocalOpticalGenOffload.hh b/src/accel/LocalOpticalGenOffload.hh index 343c27860b..14517b29d6 100644 --- a/src/accel/LocalOpticalGenOffload.hh +++ b/src/accel/LocalOpticalGenOffload.hh @@ -11,6 +11,8 @@ #include "corecel/Types.hh" #include "celeritas/Types.hh" #include "celeritas/inp/Control.hh" +#include "celeritas/optical/CoreParams.hh" +#include "celeritas/optical/CoreState.hh" #include "celeritas/optical/gen/GeneratorData.hh" #include "LocalOffloadInterface.hh" @@ -80,6 +82,13 @@ class LocalOpticalGenOffload final : public LocalOffloadInterface explicit operator bool() const { return this->Initialized(); } private: + // Update the number of primaries waiting to be generated on host/device + // Called by Flush() + void update_primaries(optical::CoreParams const&, + optical::CoreState&) const; + void update_primaries(optical::CoreParams const&, + optical::CoreState&) const; + // Transport pending optical tracks std::shared_ptr transport_; diff --git a/src/accel/detail/UpdatePendingExecutor.hh b/src/accel/detail/UpdatePendingExecutor.hh index 18c47785c6..253213b39d 100644 --- a/src/accel/detail/UpdatePendingExecutor.hh +++ b/src/accel/detail/UpdatePendingExecutor.hh @@ -45,7 +45,7 @@ struct UpdatePendingExecutor // INLINE DEFINITIONS //---------------------------------------------------------------------------// /*! - * Update number of primaries to be generated to include buffered optical + * Update number of primaries to be generated to include the buffered optical * photons. */ CELER_FORCEINLINE_FUNCTION void UpdatePendingExecutor::operator()(ThreadId tid) From 369095d93e846fff46dfaaf89ecc97b187c28b9f Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 5 Apr 2026 15:00:11 -0400 Subject: [PATCH 41/74] Remove synchronization from LocalOpticalGenOffload::Flush() --- src/accel/CMakeLists.txt | 5 ++--- src/accel/LocalOpticalGenOffload.cc | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/accel/CMakeLists.txt b/src/accel/CMakeLists.txt index ddc1832ceb..afcf4e85b7 100644 --- a/src/accel/CMakeLists.txt +++ b/src/accel/CMakeLists.txt @@ -47,9 +47,8 @@ list(APPEND SOURCES detail/IntegrationSingleton.cc ) -celeritas_polysource(ExceptionConverter - LocalOpticalGenOffload -) +celeritas_polysource(ExceptionConverter) +celeritas_polysource(LocalOpticalGenOffload) if(Geant4_VERSION VERSION_GREATER_EQUAL 11.0) list(APPEND SOURCES diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index c6ac9f51e3..aa2629624a 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -187,10 +187,19 @@ void LocalOpticalGenOffload::Flush() generate_->insert(*state_, make_span(buffer_)); // Update the number of primaries waiting to be generated based on the - // number of photons + // number of photons. Can use static_cast since the appropriate derived + // class is specified in the constructor. auto const& optical_params = *transport_->params(); - auto* s = static_cast*>(&(*state_)); - update_primaries(optical_params, *s); + if (celeritas::device()) + { + auto* s = static_cast*>(&*state_); + update_primaries(optical_params, *s); + } + else + { + auto* s = static_cast*>(&*state_); + update_primaries(optical_params, *s); + } num_photons_ = 0; buffer_.clear(); From 73f9fd94679133be81052c1d558b5422ccc90f94 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 6 Apr 2026 11:42:18 -0400 Subject: [PATCH 42/74] Fix parameter list for undefined device function --- src/celeritas/global/detail/SetGenerated.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/detail/SetGenerated.cc b/src/celeritas/global/detail/SetGenerated.cc index 776c853656..65a2654114 100644 --- a/src/celeritas/global/detail/SetGenerated.cc +++ b/src/celeritas/global/detail/SetGenerated.cc @@ -31,7 +31,7 @@ void set_generated(CoreParams const& params, CoreState& state) // DEVICE-DISABLED IMPLEMENTATION //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -void set_generated(CoreParams const&, CoreState&, size_type) +void set_generated(CoreParams const&, CoreState&) { CELER_NOT_CONFIGURED("CUDA OR HIP"); } From 4de4636b93b72ebdd609bbb5b4d4e313811df7a9 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 6 Apr 2026 11:57:36 -0400 Subject: [PATCH 43/74] Move UpdatePendingExecutor to optical/gen --- src/accel/LocalOpticalGenOffload.cc | 2 +- src/accel/LocalOpticalGenOffload.cu | 2 +- .../optical/gen}/detail/UpdatePendingExecutor.hh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename src/{accel => celeritas/optical/gen}/detail/UpdatePendingExecutor.hh (97%) diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index aa2629624a..8c57095d1e 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -20,8 +20,8 @@ #include "celeritas/global/CoreParams.hh" #include "celeritas/optical/Transporter.hh" #include "celeritas/optical/gen/GeneratorAction.hh" +#include "celeritas/optical/gen/detail/UpdatePendingExecutor.hh" #include "celeritas/phys/GeneratorRegistry.hh" -#include "accel/detail/UpdatePendingExecutor.hh" #include "SetupOptions.hh" #include "SharedParams.hh" diff --git a/src/accel/LocalOpticalGenOffload.cu b/src/accel/LocalOpticalGenOffload.cu index 6e7211afc9..5a081245fb 100644 --- a/src/accel/LocalOpticalGenOffload.cu +++ b/src/accel/LocalOpticalGenOffload.cu @@ -7,7 +7,7 @@ #include "LocalOpticalGenOffload.hh" #include "corecel/sys/KernelLauncher.device.hh" -#include "accel/detail/UpdatePendingExecutor.hh" +#include "celeritas/optical/gen/detail/UpdatePendingExecutor.hh" namespace celeritas { diff --git a/src/accel/detail/UpdatePendingExecutor.hh b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh similarity index 97% rename from src/accel/detail/UpdatePendingExecutor.hh rename to src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh index 253213b39d..9d01a5635f 100644 --- a/src/accel/detail/UpdatePendingExecutor.hh +++ b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh @@ -2,7 +2,7 @@ // Copyright Celeritas contributors: see top-level COPYRIGHT file for details // SPDX-License-Identifier: (Apache-2.0 OR MIT) //---------------------------------------------------------------------------// -//! \file accel/detail/UpdatePendingExecutor.hh +//! \file celeritas/optical/gen/detail/UpdatePendingExecutor.hh //---------------------------------------------------------------------------// #pragma once From 6d36f6092d196e2172271abc73a85fdc0847dc98 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 6 Apr 2026 14:56:44 -0400 Subject: [PATCH 44/74] Remove params from update_pending functor --- src/accel/LocalOpticalGenOffload.cc | 12 +++++------- src/accel/LocalOpticalGenOffload.cu | 5 ++--- src/accel/LocalOpticalGenOffload.hh | 6 ++---- .../optical/gen/detail/UpdatePendingExecutor.hh | 2 -- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index 8c57095d1e..485f899101 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -189,16 +189,15 @@ void LocalOpticalGenOffload::Flush() // Update the number of primaries waiting to be generated based on the // number of photons. Can use static_cast since the appropriate derived // class is specified in the constructor. - auto const& optical_params = *transport_->params(); if (celeritas::device()) { auto* s = static_cast*>(&*state_); - update_primaries(optical_params, *s); + update_primaries(*s); } else { auto* s = static_cast*>(&*state_); - update_primaries(optical_params, *s); + update_primaries(*s); } num_photons_ = 0; @@ -261,11 +260,10 @@ void LocalOpticalGenOffload::Finalize() * generated to include the buffered optical photons; use only one host thread. */ void LocalOpticalGenOffload::update_primaries( - optical::CoreParams const& optical_params, optical::CoreState& state) const { - optical::detail::UpdatePendingExecutor execute_thread{ - optical_params.ptr(), state.ptr(), num_photons_}; + optical::detail::UpdatePendingExecutor execute_thread{state.ptr(), + num_photons_}; launch_kernel(1, execute_thread); } @@ -274,7 +272,7 @@ void LocalOpticalGenOffload::update_primaries( //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE inline void LocalOpticalGenOffload::update_primaries( - optical::CoreParams const&, optical::CoreState&) const + optical::CoreState&) const { CELER_NOT_CONFIGURED("CUDA OR HIP"); } diff --git a/src/accel/LocalOpticalGenOffload.cu b/src/accel/LocalOpticalGenOffload.cu index 5a081245fb..d60e53de3e 100644 --- a/src/accel/LocalOpticalGenOffload.cu +++ b/src/accel/LocalOpticalGenOffload.cu @@ -17,11 +17,10 @@ namespace celeritas * to include the buffered optical photons; use only one device thread. */ void LocalOpticalGenOffload::update_primaries( - optical::CoreParams const& optical_params, optical::CoreState& state) const { - optical::detail::UpdatePendingExecutor execute_thread{ - optical_params.ptr(), state.ptr(), num_photons_}; + optical::detail::UpdatePendingExecutor execute_thread{state.ptr(), + num_photons_}; static KernelLauncher const launch_kernel( "update-pending"); launch_kernel(1, state.stream_id(), execute_thread); diff --git a/src/accel/LocalOpticalGenOffload.hh b/src/accel/LocalOpticalGenOffload.hh index 14517b29d6..73a1205949 100644 --- a/src/accel/LocalOpticalGenOffload.hh +++ b/src/accel/LocalOpticalGenOffload.hh @@ -84,10 +84,8 @@ class LocalOpticalGenOffload final : public LocalOffloadInterface private: // Update the number of primaries waiting to be generated on host/device // Called by Flush() - void update_primaries(optical::CoreParams const&, - optical::CoreState&) const; - void update_primaries(optical::CoreParams const&, - optical::CoreState&) const; + void update_primaries(optical::CoreState&) const; + void update_primaries(optical::CoreState&) const; // Transport pending optical tracks std::shared_ptr transport_; diff --git a/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh index 9d01a5635f..5d8ccc3efb 100644 --- a/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh +++ b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh @@ -31,7 +31,6 @@ struct UpdatePendingExecutor { //// DATA //// - CRefPtr params; RefPtr state; size_type num_photons; @@ -50,7 +49,6 @@ struct UpdatePendingExecutor */ CELER_FORCEINLINE_FUNCTION void UpdatePendingExecutor::operator()(ThreadId tid) { - CELER_EXPECT(params); CELER_EXPECT(state); CELER_EXPECT(tid.get() == 0); // Should call with only one thread From 00def814477a32d6fec85bb553a28e58f8909e98 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 6 Apr 2026 14:59:25 -0400 Subject: [PATCH 45/74] Replace sync_get_counters in DirectGeneratorAction::insert_impl() --- .../optical/gen/DirectGeneratorAction.cc | 24 +++++++++++++++---- .../optical/gen/DirectGeneratorAction.cu | 17 +++++++++++++ .../optical/gen/DirectGeneratorAction.hh | 3 +++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index fff4e30574..8dc23fca17 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -20,6 +20,7 @@ #include "detail/DirectGeneratorExecutor.hh" #include "detail/GeneratorAlgorithms.hh" +#include "detail/UpdatePendingExecutor.hh" namespace celeritas { @@ -144,9 +145,7 @@ void DirectGeneratorAction::insert_impl(CoreState& state, // Update counters and copy distributions to aux state storage aux_state.counters.buffer_size = data.size(); aux_state.counters.num_pending = data.size(); - auto counters = state.sync_get_counters(); - counters.num_pending += data.size(); - state.sync_put_counters(counters); + update_pending(state, data.size()); Copier copy_to_aux{aux_state.initializers(), state.stream_id()}; @@ -224,11 +223,28 @@ void DirectGeneratorAction::generate(CoreParams const& params, launch_action(num_gen, execute); } +//---------------------------------------------------------------------------// +/*! + * Launch a (host) kernel to update the number of pending optical photons. + */ +void DirectGeneratorAction::update_pending(CoreStateHost& state, + size_type num_pending) const +{ + // Update the number of pending optical photons + detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; + launch_kernel(1, execute); +} + //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE void DirectGeneratorAction::generate(CoreParams const&, CoreStateDevice&) const { - CELER_NOT_IMPLEMENTED("device"); + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} + +void DirectGeneratorAction::update_pending(CoreStateDevice&, size_type) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); } #endif diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cu b/src/celeritas/optical/gen/DirectGeneratorAction.cu index 5f46af8a0f..1227aa7bea 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cu +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cu @@ -13,6 +13,7 @@ #include "celeritas/optical/action/TrackSlotExecutor.hh" #include "detail/DirectGeneratorExecutor.hh" +#include "detail/UpdatePendingExecutor.hh" namespace celeritas { @@ -32,11 +33,27 @@ void DirectGeneratorAction::generate(CoreParams const& params, size_type num_gen = min(state.sync_get_counters().num_vacancies, aux_state.counters.num_pending); + // Generate optical photons in vacant track slots detail::DirectGeneratorExecutor execute{ params.ptr(), state.ptr(), aux_state.store.ref()}; static ActionLauncher const launch(*this); launch(num_gen, state.stream_id(), execute); } + +//---------------------------------------------------------------------------// +/*! + * Launch a (device) kernel to initialize optical photons. + */ +void DirectGeneratorAction::update_pending(CoreStateDevice& state, + size_type num_pending) const +{ + // Update the number of pending optical photons + detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; + static KernelLauncher const launch_kernel( + "update-pending"); + launch_kernel(1, state.stream_id(), execute); +} + //---------------------------------------------------------------------------// } // namespace optical } // namespace celeritas diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.hh b/src/celeritas/optical/gen/DirectGeneratorAction.hh index 683ce84e1e..b5e3d7eee3 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.hh +++ b/src/celeritas/optical/gen/DirectGeneratorAction.hh @@ -71,6 +71,9 @@ class DirectGeneratorAction final : public GeneratorBase void generate(CoreParams const&, CoreStateHost&) const; void generate(CoreParams const&, CoreStateDevice&) const; + + void update_pending(CoreStateHost&, size_type) const; + void update_pending(CoreStateDevice&, size_type) const; }; //---------------------------------------------------------------------------// From 75e25a39be368e05cb1413a5eaab971f73c34721 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 8 Apr 2026 13:18:09 -0400 Subject: [PATCH 46/74] Remove need for sync_get_counters before calling DirectGeneratorExecutor --- .../optical/gen/DirectGeneratorAction.cc | 9 +++--- .../optical/gen/DirectGeneratorAction.cu | 4 +-- src/celeritas/optical/gen/GeneratorData.hh | 2 +- .../gen/detail/DirectGeneratorExecutor.hh | 31 ++++++++++++------- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index 8dc23fca17..b63966169b 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -185,9 +185,10 @@ void DirectGeneratorAction::step_impl(CoreParams const& params, = get>(*state.aux(), this->aux_id()); auto& counters = aux_state.counters; - if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) + if (counters.num_pending > 0) { - // Generate the optical photons from the distribution data + // Generate the optical photons from the distribution data. To avoid + // synchronization, we defer the check for vacancies until later. this->generate(params, state); } @@ -214,13 +215,11 @@ void DirectGeneratorAction::generate(CoreParams const& params, auto& aux_state = get>( *state.aux(), this->aux_id()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::DirectGeneratorExecutor execute{ params.ptr(), state.ptr(), aux_state.store.ref()}; - launch_action(num_gen, execute); + launch_action(aux_state.counters.num_pending, execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cu b/src/celeritas/optical/gen/DirectGeneratorAction.cu index 1227aa7bea..d777df223b 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cu +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cu @@ -30,14 +30,12 @@ void DirectGeneratorAction::generate(CoreParams const& params, auto& aux_state = get>( *state.aux(), this->aux_id()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::DirectGeneratorExecutor execute{ params.ptr(), state.ptr(), aux_state.store.ref()}; static ActionLauncher const launch(*this); - launch(num_gen, state.stream_id(), execute); + launch(aux_state.counters.num_pending, state.stream_id(), execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/GeneratorData.hh b/src/celeritas/optical/gen/GeneratorData.hh index 35862a33f5..3ca13842e5 100644 --- a/src/celeritas/optical/gen/GeneratorData.hh +++ b/src/celeritas/optical/gen/GeneratorData.hh @@ -162,7 +162,7 @@ struct GeneratorState : public GeneratorStateBase //---------------------------------------------------------------------------// /*! - * Resize optical buffere. + * Resize optical buffers. */ template void resize(GeneratorStateData* state, diff --git a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh index 633788b737..133828a9e8 100644 --- a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh @@ -50,19 +50,28 @@ CELER_FUNCTION void DirectGeneratorExecutor::operator()(TrackSlotId tid) const CELER_EXPECT(state); auto* counters = state->init.counters.data().get(); - // Create view to new track to be initialized - CoreTrackView vacancy(*params, *state, [&] { - TrackSlotId idx{ - index_before(counters->num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()); - // Get initializer from the back - TrackInitializer const& init = data.initializers[ItemId( - index_before(counters->num_pending, ThreadId(tid.get())))]; + // Original code set the number of threads to the minimum between of number + // of vacancies and the number of pending in the auxiliary state. To avoid + // accessing the state counters to compute this min, we instead skip the + // excess if state.counters.num_vacancies < aux_state.counters.num_pending + if (tid < counters->num_vacancies) + { + // Create view to new track to be initialized + CoreTrackView vacancy(*params, *state, [&] { + TrackSlotId idx{ + index_before(counters->num_vacancies, ThreadId(tid.get()))}; + return state->init.vacancies[idx]; + }()); + + // Get initializer from the back + TrackInitializer const& init + = data.initializers[ItemId( + index_before(counters->num_pending, ThreadId(tid.get())))]; - // Initialize track - vacancy = init; + // Initialize track + vacancy = init; + } } //---------------------------------------------------------------------------// From ef954a87dc17076b88cf58ecc51d8bba0f47095e Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 10 Apr 2026 11:42:03 -0400 Subject: [PATCH 47/74] Make pointer type explicit with auto* --- src/celeritas/global/detail/SetGeneratedExecutor.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/global/detail/SetGeneratedExecutor.hh b/src/celeritas/global/detail/SetGeneratedExecutor.hh index cf577c931b..174667ebbe 100644 --- a/src/celeritas/global/detail/SetGeneratedExecutor.hh +++ b/src/celeritas/global/detail/SetGeneratedExecutor.hh @@ -57,7 +57,7 @@ CELER_FORCEINLINE_FUNCTION void SetGeneratedExecutor::operator()(ThreadId tid) CELER_EXPECT(state); CELER_EXPECT(tid.get() == 0); // Should call with only one thread - auto counters = state->init.counters.data().get(); + auto* counters = state->init.counters.data().get(); counters->num_generated = 0; return; } From 988335fd99208892a486d042aa2fe71a59293eba Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 10 Apr 2026 12:04:03 -0400 Subject: [PATCH 48/74] Remove state counter synchronization in step_impl --- .../track/ExtendFromPrimariesAction.cc | 35 ++++++--- .../track/ExtendFromPrimariesAction.cu | 17 ++++- .../track/ExtendFromPrimariesAction.hh | 3 + .../track/detail/ProcessPrimariesExecutor.hh | 7 +- .../track/detail/UpdateCountersExecutor.hh | 72 +++++++++++++++++++ 5 files changed, 120 insertions(+), 14 deletions(-) create mode 100644 src/celeritas/track/detail/UpdateCountersExecutor.hh diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index 305fc5d9b5..b189627773 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -19,6 +19,7 @@ #include "TrackInitParams.hh" #include "detail/ProcessPrimariesExecutor.hh" // IWYU pragma: associated +#include "detail/UpdateCountersExecutor.hh" // IWYU pragma: associated namespace celeritas { @@ -184,18 +185,9 @@ void ExtendFromPrimariesAction::step_impl(CoreParams const& params, CoreState& state) const { auto& primaries = get>(state.aux(), aux_id_); - auto counters = state.sync_get_counters(); - - // Create track initializers from primaries - counters.num_initializers += primaries.count; - state.sync_put_counters(counters); this->process_primaries(params, state, primaries); - - // Mark that the primaries have been processed - counters.num_generated += primaries.count; - counters.num_pending = 0; + this->update_counters(params, state, primaries.count); primaries.count = 0; - state.sync_put_counters(counters); } //---------------------------------------------------------------------------// @@ -209,10 +201,24 @@ void ExtendFromPrimariesAction::process_primaries( { auto primaries = pstate.primaries(); detail::ProcessPrimariesExecutor execute{ - params.ptr(), state.ptr(), primaries}; + params.ptr(), state.ptr(), primaries, pstate.count}; return launch_action(*this, primaries.size(), params, state, execute); } +//---------------------------------------------------------------------------// +/*! + * Launch a (host) kernel to update state counters for number of primary + * particles. + */ +void ExtendFromPrimariesAction::update_counters(CoreParams const& params, + CoreStateHost& state, + size_type num_primaries) const +{ + detail::UpdateCountersExecutor execute{ + params.ptr(), state.ptr(), num_primaries}; + return launch_action(*this, 1, params, state, execute); +} + //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE void ExtendFromPrimariesAction::process_primaries( @@ -222,6 +228,13 @@ void ExtendFromPrimariesAction::process_primaries( { CELER_NOT_CONFIGURED("CUDA OR HIP"); } + +void ExtendFromPrimariesAction::update_counters(CoreParams const&, + CoreStateDevice&, + size_type) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} #endif //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index c528dbc3e5..5665ce155c 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -11,6 +11,7 @@ #include "celeritas/global/CoreState.hh" #include "detail/ProcessPrimariesExecutor.hh" +#include "detail/UpdateCountersExecutor.hh" namespace celeritas { @@ -25,7 +26,7 @@ void ExtendFromPrimariesAction::process_primaries( { auto primaries = pstate.primaries(); detail::ProcessPrimariesExecutor execute_thread{ - params.ptr(), state.ptr(), primaries}; + params.ptr(), state.ptr(), primaries, pstate.count}; static ActionLauncher const launch_kernel(*this); if (!primaries.empty()) { @@ -33,5 +34,19 @@ void ExtendFromPrimariesAction::process_primaries( } } +//---------------------------------------------------------------------------// +/*! + * Launch a kernel to update state counters for number of primary particles. + */ +void ExtendFromPrimariesAction::update_counters(CoreParams const& params, + CoreStateDevice& state, + size_type num_primaries) const +{ + detail::UpdateCountersExecutor execute_thread{ + params.ptr(), state.ptr(), num_primaries}; + static ActionLauncher const launch_kernel(*this); + launch_kernel(1, state.stream_id(), execute_thread); +} + //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/src/celeritas/track/ExtendFromPrimariesAction.hh b/src/celeritas/track/ExtendFromPrimariesAction.hh index 80d1fcb3ad..d245d2992c 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.hh +++ b/src/celeritas/track/ExtendFromPrimariesAction.hh @@ -99,6 +99,9 @@ class ExtendFromPrimariesAction final : public CoreStepActionInterface, void process_primaries(CoreParams const&, CoreStateDevice&, PrimaryStateData const&) const; + + void update_counters(CoreParams const&, CoreStateHost&, size_type) const; + void update_counters(CoreParams const&, CoreStateDevice&, size_type) const; }; template diff --git a/src/celeritas/track/detail/ProcessPrimariesExecutor.hh b/src/celeritas/track/detail/ProcessPrimariesExecutor.hh index 0addafb95f..7bc6105cbe 100644 --- a/src/celeritas/track/detail/ProcessPrimariesExecutor.hh +++ b/src/celeritas/track/detail/ProcessPrimariesExecutor.hh @@ -41,6 +41,7 @@ struct ProcessPrimariesExecutor StatePtr state; Span primaries; + size_type num_primaries; //// FUNCTIONS //// @@ -56,7 +57,8 @@ CELER_FUNCTION void ProcessPrimariesExecutor::operator()(ThreadId tid) const { CELER_EXPECT(tid < primaries.size()); auto* counters = state->init.counters.data().get(); - CELER_EXPECT(primaries.size() <= counters->num_initializers + tid.get()); + CELER_EXPECT(primaries.size() + <= counters->num_initializers + num_primaries + tid.get()); Primary const& primary = primaries[tid.unchecked_get()]; @@ -83,7 +85,8 @@ CELER_FUNCTION void ProcessPrimariesExecutor::operator()(ThreadId tid) const } // Store the initializer - size_type idx = counters->num_initializers - primaries.size() + tid.get(); + size_type idx = counters->num_initializers + num_primaries + - primaries.size() + tid.get(); state->init.initializers[ItemId(idx)] = ti; } diff --git a/src/celeritas/track/detail/UpdateCountersExecutor.hh b/src/celeritas/track/detail/UpdateCountersExecutor.hh new file mode 100644 index 0000000000..d7b23719fe --- /dev/null +++ b/src/celeritas/track/detail/UpdateCountersExecutor.hh @@ -0,0 +1,72 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/track/detail/UpdateCountersExecutor.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Assert.hh" +#include "corecel/Macros.hh" +#include "corecel/cont/Span.hh" +#include "corecel/random/engine/InitializeRngState.hh" +#include "celeritas/Quantities.hh" +#include "celeritas/Types.hh" +#include "celeritas/global/CoreTrackData.hh" +#include "celeritas/phys/ParticleData.hh" +#include "celeritas/phys/Primary.hh" + +#include "../SimData.hh" +#include "../TrackInitData.hh" +#include "../Utils.hh" + +namespace celeritas +{ +namespace detail +{ +//---------------------------------------------------------------------------// +/*! + * Create track initializers from primary particles. + */ +struct UpdateCountersExecutor +{ + //// TYPES //// + + using ParamsPtr = CRefPtr; + using StatePtr = RefPtr; + + //// DATA //// + + ParamsPtr params; + StatePtr state; + + size_type num_primaries; + + //// FUNCTIONS //// + + // Update state counters based on the number of primaries + inline CELER_FUNCTION void operator()(ThreadId tid) const; +}; + +//---------------------------------------------------------------------------// +/*! + * Update state counters based on the number of primaries. + */ +CELER_FUNCTION void UpdateCountersExecutor::operator()(ThreadId tid) const +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should call with only one thread + + auto* counters = state->init.counters.data().get(); + // Update track initializers from primaries + counters->num_initializers += num_primaries; + // Mark that the primaries have been processed + counters->num_generated += num_primaries; + counters->num_pending = 0; + return; +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas From 249e6fa88260e20ff687f5c9fc9291ae6a1f82de Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 12 Apr 2026 15:50:25 -0400 Subject: [PATCH 49/74] Move update num_pending to GeneratorBase base class --- src/celeritas/CMakeLists.txt | 1 + .../optical/gen/DirectGeneratorAction.cc | 17 --------- .../optical/gen/DirectGeneratorAction.cu | 14 ------- .../optical/gen/DirectGeneratorAction.hh | 3 -- src/celeritas/optical/gen/GeneratorBase.cc | 23 ++++++++++++ src/celeritas/optical/gen/GeneratorBase.cu | 37 +++++++++++++++++++ src/celeritas/optical/gen/GeneratorBase.hh | 4 ++ 7 files changed, 65 insertions(+), 34 deletions(-) create mode 100644 src/celeritas/optical/gen/GeneratorBase.cu diff --git a/src/celeritas/CMakeLists.txt b/src/celeritas/CMakeLists.txt index 93a6bfd1c7..290fb7a294 100644 --- a/src/celeritas/CMakeLists.txt +++ b/src/celeritas/CMakeLists.txt @@ -409,6 +409,7 @@ celeritas_polysource(optical/action/PreStepAction) celeritas_polysource(optical/action/TrackingCutAction) celeritas_polysource(optical/action/detail/TrackInitAlgorithms) celeritas_polysource(optical/gen/GeneratorAction) +celeritas_polysource(optical/gen/GeneratorBase) celeritas_polysource(optical/gen/OffloadAction) celeritas_polysource(optical/gen/OffloadGatherAction) celeritas_polysource(optical/gen/PrimaryGeneratorAction) diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index b63966169b..6f176fa5f9 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -222,29 +222,12 @@ void DirectGeneratorAction::generate(CoreParams const& params, launch_action(aux_state.counters.num_pending, execute); } -//---------------------------------------------------------------------------// -/*! - * Launch a (host) kernel to update the number of pending optical photons. - */ -void DirectGeneratorAction::update_pending(CoreStateHost& state, - size_type num_pending) const -{ - // Update the number of pending optical photons - detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; - launch_kernel(1, execute); -} - //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE void DirectGeneratorAction::generate(CoreParams const&, CoreStateDevice&) const { CELER_NOT_CONFIGURED("CUDA OR HIP"); } - -void DirectGeneratorAction::update_pending(CoreStateDevice&, size_type) const -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} #endif //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cu b/src/celeritas/optical/gen/DirectGeneratorAction.cu index d777df223b..3d6222ff94 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cu +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cu @@ -38,20 +38,6 @@ void DirectGeneratorAction::generate(CoreParams const& params, launch(aux_state.counters.num_pending, state.stream_id(), execute); } -//---------------------------------------------------------------------------// -/*! - * Launch a (device) kernel to initialize optical photons. - */ -void DirectGeneratorAction::update_pending(CoreStateDevice& state, - size_type num_pending) const -{ - // Update the number of pending optical photons - detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; - static KernelLauncher const launch_kernel( - "update-pending"); - launch_kernel(1, state.stream_id(), execute); -} - //---------------------------------------------------------------------------// } // namespace optical } // namespace celeritas diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.hh b/src/celeritas/optical/gen/DirectGeneratorAction.hh index b5e3d7eee3..683ce84e1e 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.hh +++ b/src/celeritas/optical/gen/DirectGeneratorAction.hh @@ -71,9 +71,6 @@ class DirectGeneratorAction final : public GeneratorBase void generate(CoreParams const&, CoreStateHost&) const; void generate(CoreParams const&, CoreStateDevice&) const; - - void update_pending(CoreStateHost&, size_type) const; - void update_pending(CoreStateDevice&, size_type) const; }; //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/GeneratorBase.cc b/src/celeritas/optical/gen/GeneratorBase.cc index 8df3ed9fe7..eb327f78a5 100644 --- a/src/celeritas/optical/gen/GeneratorBase.cc +++ b/src/celeritas/optical/gen/GeneratorBase.cc @@ -8,6 +8,9 @@ #include "corecel/Assert.hh" #include "corecel/data/AuxStateVec.hh" +#include "corecel/sys/KernelLauncher.hh" + +#include "detail/UpdatePendingExecutor.hh" namespace celeritas { @@ -47,6 +50,26 @@ GeneratorStateBase const& GeneratorBase::counters(AuxStateVec const& aux) const return dynamic_cast(aux.at(aux_id_)); } +//---------------------------------------------------------------------------// +/*! + * Launch a (host) kernel to update the number of pending optical photons. + */ +void GeneratorBase::update_pending(CoreStateHost& state, + size_type num_pending) const +{ + // Update the number of pending optical photons + detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; + launch_kernel(1, execute); +} + +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +void GeneratorBase::update_pending(CoreStateDevice&, size_type) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + //---------------------------------------------------------------------------// } // namespace optical } // namespace celeritas diff --git a/src/celeritas/optical/gen/GeneratorBase.cu b/src/celeritas/optical/gen/GeneratorBase.cu new file mode 100644 index 0000000000..ac1aa95372 --- /dev/null +++ b/src/celeritas/optical/gen/GeneratorBase.cu @@ -0,0 +1,37 @@ +//------------------------------ -*- cuda -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/optical/gen/GeneratorBase.cu +//---------------------------------------------------------------------------// +#include "GeneratorBase.hh" + +#include "corecel/Assert.hh" +#include "celeritas/optical/CoreParams.hh" +#include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/action/ActionLauncher.device.hh" +#include "celeritas/optical/action/TrackSlotExecutor.hh" + +#include "detail/UpdatePendingExecutor.hh" + +namespace celeritas +{ +namespace optical +{ +//---------------------------------------------------------------------------// +/*! + * Launch a (device) kernel to initialize optical photons. + */ +void GeneratorBase::update_pending(CoreStateDevice& state, + size_type num_pending) const +{ + // Update the number of pending optical photons + detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; + static KernelLauncher const launch_kernel( + "update-pending"); + launch_kernel(1, state.stream_id(), execute); +} + +//---------------------------------------------------------------------------// +} // namespace optical +} // namespace celeritas diff --git a/src/celeritas/optical/gen/GeneratorBase.hh b/src/celeritas/optical/gen/GeneratorBase.hh index 09c26d7330..2d4db7e39d 100644 --- a/src/celeritas/optical/gen/GeneratorBase.hh +++ b/src/celeritas/optical/gen/GeneratorBase.hh @@ -74,6 +74,10 @@ class GeneratorBase : virtual public optical::OpticalStepActionInterface, template inline void update_counters(optical::CoreState&) const; + // Update the num_pending state counter + void update_pending(CoreStateHost&, size_type) const; + void update_pending(CoreStateDevice&, size_type) const; + private: StaticActionData sad_; AuxId aux_id_; From 8162f50bda5cf3a6cd8a7f5ec913fcbab91713d6 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 12 Apr 2026 16:07:27 -0400 Subject: [PATCH 50/74] Remove optical state sync from insert_impl --- src/celeritas/optical/gen/PrimaryGeneratorAction.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc index c2b5850e9b..5f774c688d 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc @@ -135,9 +135,7 @@ void PrimaryGeneratorAction::insert_impl(optical::CoreState& state) const auto& aux_state = this->counters(*state.aux()); aux_state.counters.num_pending = data_.num_photons; - auto counters = state.sync_get_counters(); - counters.num_pending += data_.num_photons; - state.sync_put_counters(counters); + update_pending(state, data_.num_photons); } //---------------------------------------------------------------------------// From 9b511878581891ddcbbcb34c84916fc11a8c1af2 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 15 Apr 2026 22:55:54 -0400 Subject: [PATCH 51/74] Remove Core State counter sync from all GeneratorAction classes --- src/celeritas/optical/gen/GeneratorAction.cc | 11 +- src/celeritas/optical/gen/GeneratorAction.cu | 6 +- .../optical/gen/PrimaryGeneratorAction.cc | 5 +- .../optical/gen/PrimaryGeneratorAction.cu | 4 +- .../optical/gen/WlsGeneratorAction.cc | 13 +- .../optical/gen/WlsGeneratorAction.cu | 4 +- .../gen/detail/DirectGeneratorExecutor.hh | 4 +- .../optical/gen/detail/GeneratorExecutor.hh | 112 ++++++++++-------- .../gen/detail/PrimaryGeneratorExecutor.hh | 35 ++++-- .../optical/gen/detail/UpdateSumExecutor.hh | 13 +- .../gen/detail/WlsGeneratorExecutor.hh | 73 ++++++------ 11 files changed, 153 insertions(+), 127 deletions(-) diff --git a/src/celeritas/optical/gen/GeneratorAction.cc b/src/celeritas/optical/gen/GeneratorAction.cc index a7c244af50..c3ca0ffac7 100644 --- a/src/celeritas/optical/gen/GeneratorAction.cc +++ b/src/celeritas/optical/gen/GeneratorAction.cc @@ -202,9 +202,10 @@ void GeneratorAction::step_impl(CoreParams const& params, state.stream_id()); } - if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) + if (counters.num_pending > 0) { - // Generate the optical photons from the distribution data + // Generate the optical photons from the distribution data. To avoid + // synchronization, we defer the check for vacancies until later. this->generate(params, state); } @@ -232,8 +233,7 @@ void GeneratorAction::generate(CoreParams const& params, auto& aux_state = get>(*state.aux(), this->aux_id()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); + size_type num_gen = aux_state.counters.num_pending; { // Generate optical photons in vacant track slots detail::GeneratorExecutor execute{params.ptr(), @@ -247,7 +247,8 @@ void GeneratorAction::generate(CoreParams const& params, { // Update the cumulative sum of the number of photons per distribution // according to how many were generated - detail::UpdateSumExecutor execute{aux_state.store.ref(), num_gen}; + detail::UpdateSumExecutor execute{ + state.ptr(), aux_state.store.ref(), num_gen}; launch_kernel(aux_state.counters.buffer_size, execute); } } diff --git a/src/celeritas/optical/gen/GeneratorAction.cu b/src/celeritas/optical/gen/GeneratorAction.cu index 697cc0746e..7d46cb03f4 100644 --- a/src/celeritas/optical/gen/GeneratorAction.cu +++ b/src/celeritas/optical/gen/GeneratorAction.cu @@ -38,8 +38,7 @@ void GeneratorAction::generate(CoreParams const& params, auto& aux_state = get>(*state.aux(), this->aux_id()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); + size_type num_gen = aux_state.counters.num_pending; { // Generate optical photons in vacant track slots detail::GeneratorExecutor execute{params.ptr(), @@ -54,7 +53,8 @@ void GeneratorAction::generate(CoreParams const& params, { // Update the cumulative sum of the number of photons per distribution // according to how many were generated - detail::UpdateSumExecutor execute{aux_state.store.ref(), num_gen}; + detail::UpdateSumExecutor execute{ + state.ptr(), aux_state.store.ref(), num_gen}; static KernelLauncher const launch_kernel( "update-sum"); launch_kernel( diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc index 5f774c688d..cb43fdacfb 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc @@ -151,6 +151,7 @@ void PrimaryGeneratorAction::step_impl(CoreParams const& params, auto const& counters = this->counters(*state.aux()).counters; if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) + // if (counters.num_pending > 0) { // Generate the optical photons from the distribution data this->generate(params, state); @@ -170,13 +171,11 @@ void PrimaryGeneratorAction::generate(CoreParams const& params, CELER_EXPECT(state.aux()); auto const& aux_state = this->counters(*state.aux()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::PrimaryGeneratorExecutor execute{ params.ptr(), state.ptr(), data_, params_.host_ref()}; - launch_action(num_gen, execute); + launch_action(aux_state.counters.num_pending, execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cu b/src/celeritas/optical/gen/PrimaryGeneratorAction.cu index a65ea1c77f..aec34f85f7 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cu +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cu @@ -31,8 +31,6 @@ void PrimaryGeneratorAction::generate(CoreParams const& params, CELER_EXPECT(state.aux()); auto const& aux_state = this->counters(*state.aux()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::PrimaryGeneratorExecutor execute{params.ptr(), @@ -40,7 +38,7 @@ void PrimaryGeneratorAction::generate(CoreParams const& params, data_, params_.device_ref()}; static ActionLauncher const launch(*this); - launch(num_gen, state.stream_id(), execute); + launch(aux_state.counters.num_pending, state.stream_id(), execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/WlsGeneratorAction.cc b/src/celeritas/optical/gen/WlsGeneratorAction.cc index 2ccb18e05c..6615073d06 100644 --- a/src/celeritas/optical/gen/WlsGeneratorAction.cc +++ b/src/celeritas/optical/gen/WlsGeneratorAction.cc @@ -148,13 +148,12 @@ void WlsGeneratorAction::step_impl(CoreParams const& params, } // Update the core state counters with the number of new pending tracks - auto core_counters = state.sync_get_counters(); - core_counters.num_pending += counters.num_pending - num_pending_prev; - state.sync_put_counters(core_counters); + update_pending(state, counters.num_pending - num_pending_prev); - if (counters.num_pending > 0 && core_counters.num_vacancies > 0) + if (counters.num_pending > 0) { - // Generate the optical photons from the distribution data + // Generate the optical photons from the distribution data. To avoid + // synchronization, we defer the check for vacancies until later. this->generate(params, state); // Compact the buffer again to remove stale distributions and free up @@ -194,8 +193,6 @@ void WlsGeneratorAction::generate(CoreParams const& params, auto& aux_state = get>(*state.aux(), this->aux_id()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::WlsGeneratorExecutor execute{ @@ -205,7 +202,7 @@ void WlsGeneratorAction::generate(CoreParams const& params, wls2_ ? wls2_->host_ref() : NativeCRef{}, aux_state.store.ref(), aux_state.counters.buffer_size}; - launch_action(num_gen, execute); + launch_action(aux_state.counters.num_pending, execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/WlsGeneratorAction.cu b/src/celeritas/optical/gen/WlsGeneratorAction.cu index 61d5c9222b..9dea3bd71b 100644 --- a/src/celeritas/optical/gen/WlsGeneratorAction.cu +++ b/src/celeritas/optical/gen/WlsGeneratorAction.cu @@ -34,8 +34,6 @@ void WlsGeneratorAction::generate(CoreParams const& params, auto& aux_state = get>(*state.aux(), this->aux_id()); - size_type num_gen = min(state.sync_get_counters().num_vacancies, - aux_state.counters.num_pending); // Generate optical photons in vacant track slots detail::WlsGeneratorExecutor execute{ @@ -46,7 +44,7 @@ void WlsGeneratorAction::generate(CoreParams const& params, aux_state.store.ref(), aux_state.counters.buffer_size}; static ActionLauncher const launch(*this); - launch(num_gen, state.stream_id(), execute); + launch(aux_state.counters.num_pending, state.stream_id(), execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh index 133828a9e8..5fc01bf7fe 100644 --- a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh @@ -53,8 +53,8 @@ CELER_FUNCTION void DirectGeneratorExecutor::operator()(TrackSlotId tid) const // Original code set the number of threads to the minimum between of number // of vacancies and the number of pending in the auxiliary state. To avoid - // accessing the state counters to compute this min, we instead skip the - // excess if state.counters.num_vacancies < aux_state.counters.num_pending + // accessing the state counters to compute this min, we skip the extra + // threads if counters->num_vacancies < aux_state.counters.num_pending if (tid < counters->num_vacancies) { // Create view to new track to be initialized diff --git a/src/celeritas/optical/gen/detail/GeneratorExecutor.hh b/src/celeritas/optical/gen/detail/GeneratorExecutor.hh index ba1342521d..5faefe1252 100644 --- a/src/celeritas/optical/gen/detail/GeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/GeneratorExecutor.hh @@ -66,59 +66,67 @@ CELER_FUNCTION void GeneratorExecutor::operator()(TrackSlotId tid) const auto* counters = state->init.counters.data().get(); - // Find the index of the first distribution that has a nonzero number of - // primaries left to generate - auto all_offsets = offload.offsets[ItemRange( - ItemId(0), ItemId(buffer_size))]; - auto buffer_start = celeritas::upper_bound( - all_offsets.begin(), all_offsets.end(), size_type(0)); - CELER_ASSERT(buffer_start != all_offsets.end()); - - // Get the cumulative sum of the number of photons in the distributions. - // The values are used to determine which threads will generate from the - // corresponding distribution - Span offsets{buffer_start, all_offsets.end()}; - - // Find the distribution this thread will generate from - size_type dist_idx = buffer_start - all_offsets.begin() - + find_distribution_index(offsets, tid.get()); - CELER_ASSERT(dist_idx < offload.distributions.size()); - auto& dist = offload.distributions[DistId(dist_idx)]; - CELER_ASSERT(dist); - - // Create the view to the new track to be initialized - CoreTrackView vacancy{ - *params, *state, [&] { - // Get the vacancy from the back in case there - // are more vacancies than photons to generate - TrackSlotId idx{ - index_before(counters->num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; - - if (!dist.material) + // Original code set the number of threads to the minimum between of number + // of vacancies and the number of pending in the auxiliary state. To avoid + // accessing the state counters to compute this min, we skip the extra + // threads if state.counters.num_vacancies < aux_state.counters.num_pending + if (tid < counters->num_vacancies) { - // If the optical material hasn't been set, initialize a temporary - // geometry state at the pre-step point and use it to find the optical - // material ID - auto geo = vacancy.geometry(); - geo = GeoTrackInitializer{dist.points[StepPoint::pre].pos, {1, 0, 0}}; - dist.material = vacancy.material_record(geo).material_id(); - } - CELER_ASSERT(dist.material); - - // Generate one track from the distribution - auto rng = vacancy.rng(); - if (dist.type == GeneratorType::cherenkov) - { - CELER_ASSERT(cherenkov); - auto opt_mat = vacancy.material_record(dist.material); - vacancy = CherenkovGenerator(opt_mat, cherenkov, dist)(rng); - } - else - { - CELER_ASSERT(scintillation); - vacancy = ScintillationGenerator(scintillation, dist)(rng); + // Find the index of the first distribution that has a nonzero number + // of primaries left to generate + auto all_offsets = offload.offsets[ItemRange( + ItemId(0), ItemId(buffer_size))]; + auto buffer_start = celeritas::upper_bound( + all_offsets.begin(), all_offsets.end(), size_type(0)); + CELER_ASSERT(buffer_start != all_offsets.end()); + + // Get the cumulative sum of the number of photons in the + // distributions. The values are used to determine which threads will + // generate from the corresponding distribution + Span offsets{buffer_start, all_offsets.end()}; + + // Find the distribution this thread will generate from + size_type dist_idx = buffer_start - all_offsets.begin() + + find_distribution_index(offsets, tid.get()); + CELER_ASSERT(dist_idx < offload.distributions.size()); + auto& dist = offload.distributions[DistId(dist_idx)]; + CELER_ASSERT(dist); + + // Create the view to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there + // are more vacancies than photons to generate + TrackSlotId idx{index_before(counters->num_vacancies, + ThreadId(tid.get()))}; + return state->init.vacancies[idx]; + }()}; + + if (!dist.material) + { + // If the optical material hasn't been set, initialize a temporary + // geometry state at the pre-step point and use it to find the + // optical material ID + auto geo = vacancy.geometry(); + geo = GeoTrackInitializer{dist.points[StepPoint::pre].pos, + {1, 0, 0}}; + dist.material = vacancy.material_record(geo).material_id(); + } + CELER_ASSERT(dist.material); + + // Generate one track from the distribution + auto rng = vacancy.rng(); + if (dist.type == GeneratorType::cherenkov) + { + CELER_ASSERT(cherenkov); + auto opt_mat = vacancy.material_record(dist.material); + vacancy = CherenkovGenerator(opt_mat, cherenkov, dist)(rng); + } + else + { + CELER_ASSERT(scintillation); + vacancy = ScintillationGenerator(scintillation, dist)(rng); + } } } diff --git a/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh index fc118decd6..9e52b8f5c3 100644 --- a/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh @@ -59,21 +59,30 @@ CELER_FUNCTION void PrimaryGeneratorExecutor::operator()(TrackSlotId tid) const CELER_EXPECT(data); CELER_EXPECT(distributions); - CoreTrackView track(*params, *state, tid); - auto const& counters = track.counters(); + CoreTrackView temp(*params, *state, TrackSlotId{0}); + auto const& counters = temp.counters(); - // Create the view to the new track to be initialized - CoreTrackView vacancy{*params, *state, [&] { - // Get the vacancy from the back in case there - // are more vacancies than photons to generate - TrackSlotId idx{index_before( - counters.num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; + // Original code set the number of threads to the minimum between of number + // of vacancies and the number of pending in the auxiliary state. To avoid + // accessing the state counters to compute this min, we skip the extra + // threads if counters.num_vacancies < aux_state.counters.num_pending + if (tid < counters.num_vacancies) + { + // Create the view to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there + // are more vacancies than photons to generate + TrackSlotId idx{ + index_before(counters.num_vacancies, ThreadId(tid.get()))}; + return state->init.vacancies[idx]; + }()}; - // Generate one primary from the distribution - auto rng = track.rng(); - vacancy = PrimaryGenerator(distributions, data)(rng); + // Generate one primary from the distribution + CoreTrackView track(*params, *state, tid); + auto rng = track.rng(); + vacancy = PrimaryGenerator(distributions, data)(rng); + } } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/detail/UpdateSumExecutor.hh b/src/celeritas/optical/gen/detail/UpdateSumExecutor.hh index 49d54a1711..7eaea26c14 100644 --- a/src/celeritas/optical/gen/detail/UpdateSumExecutor.hh +++ b/src/celeritas/optical/gen/detail/UpdateSumExecutor.hh @@ -8,6 +8,7 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" +#include "celeritas/track/CoreStateCounters.hh" #include "../GeneratorData.hh" @@ -27,6 +28,7 @@ struct UpdateSumExecutor { //// DATA //// + RefPtr state; NativeRef const offload; size_type num_gen{}; @@ -48,18 +50,25 @@ struct UpdateSumExecutor */ CELER_FUNCTION void UpdateSumExecutor::operator()(TrackSlotId tid) const { + CELER_EXPECT(state); CELER_EXPECT(offload); CELER_EXPECT(num_gen > 0); CELER_EXPECT(tid < offload.offsets.size()); + // We deferred the check for the number of vacancies, but capped the + // updates at num_vacancies in the GeneratorExecutor functor if it was + // less than num_gen, so make the same adjustment here. + auto* counters = state->init.counters.data().get(); + size_type num_generated = min(num_gen, counters->num_vacancies); + auto& offset = offload.offsets[ItemId(tid.get())]; - if (offset < num_gen) + if (offset < num_generated) { offset = 0; } else { - offset -= num_gen; + offset -= num_generated; } } diff --git a/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh index a637dbfbaa..7d6ecb4561 100644 --- a/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh @@ -65,43 +65,50 @@ CELER_FUNCTION void WlsGeneratorExecutor::operator()(TrackSlotId tid) const auto* counters = state->init.counters.data().get(); - // Get the cumulative sum of the number of photons in the distributions. - // The values are used to determine which threads will generate from the - // corresponding distribution - auto offsets = data.offsets[ItemRange( - ItemId(0), ItemId(buffer_size))]; + // Original code set the number of threads to the minimum between of number + // of vacancies and the number of pending in the auxiliary state. To avoid + // accessing the state counters to compute this min, we skip the extra + // threads if state.counters.num_vacancies < aux_state.counters.num_pending + if (tid < counters->num_vacancies) + { + // Get the cumulative sum of the number of photons in the + // distributions. The values are used to determine which threads will + // generate from the corresponding distribution + auto offsets = data.offsets[ItemRange( + ItemId(0), ItemId(buffer_size))]; - // Find the distribution this thread will generate from - size_type dist_idx = find_distribution_index(offsets, tid.get()); - CELER_ASSERT(dist_idx < data.distributions.size()); - auto& dist = data.distributions[DistId(dist_idx)]; - CELER_ASSERT(dist); + // Find the distribution this thread will generate from + size_type dist_idx = find_distribution_index(offsets, tid.get()); + CELER_ASSERT(dist_idx < data.distributions.size()); + auto& dist = data.distributions[DistId(dist_idx)]; + CELER_ASSERT(dist); - // Create the view to the new track to be initialized - CoreTrackView vacancy{ - *params, *state, [&] { - // Get the vacancy from the back in case there are more vacancies - // than photons to generate - TrackSlotId idx{ - index_before(counters->num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; + // Create the view to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there are more + // vacancies than photons to generate + TrackSlotId idx{index_before(counters->num_vacancies, + ThreadId(tid.get()))}; + return state->init.vacancies[idx]; + }()}; - // Generate one track from the distribution - auto rng = vacancy.rng(); - if (dist.type == GeneratorType::wls) - { - CELER_ASSERT(wls); - vacancy = WavelengthShiftGenerator(wls, dist)(rng); - } - else - { - CELER_ASSERT(wls2); - vacancy = WavelengthShiftGenerator(wls2, dist)(rng); - } + // Generate one track from the distribution + auto rng = vacancy.rng(); + if (dist.type == GeneratorType::wls) + { + CELER_ASSERT(wls); + vacancy = WavelengthShiftGenerator(wls, dist)(rng); + } + else + { + CELER_ASSERT(wls2); + vacancy = WavelengthShiftGenerator(wls2, dist)(rng); + } - // Update the number of photons left to generate - atomic_add(&dist.num_photons, size_type(-1)); + // Update the number of photons left to generate + atomic_add(&dist.num_photons, size_type(-1)); + } } //---------------------------------------------------------------------------// From c338ff3836a9f85cad963c376041b4ac129e67a1 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 15 Apr 2026 23:09:41 -0400 Subject: [PATCH 52/74] Remove last synchronization and update comments --- src/celeritas/optical/gen/DirectGeneratorAction.cc | 2 +- src/celeritas/optical/gen/GeneratorAction.cc | 2 +- src/celeritas/optical/gen/PrimaryGeneratorAction.cc | 6 +++--- src/celeritas/optical/gen/WlsGeneratorAction.cc | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index 6f176fa5f9..26e9ccbf59 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -188,7 +188,7 @@ void DirectGeneratorAction::step_impl(CoreParams const& params, if (counters.num_pending > 0) { // Generate the optical photons from the distribution data. To avoid - // synchronization, we defer the check for vacancies until later. + // synchronization, we defer the check for vacancies. this->generate(params, state); } diff --git a/src/celeritas/optical/gen/GeneratorAction.cc b/src/celeritas/optical/gen/GeneratorAction.cc index c3ca0ffac7..0c3cc94aee 100644 --- a/src/celeritas/optical/gen/GeneratorAction.cc +++ b/src/celeritas/optical/gen/GeneratorAction.cc @@ -205,7 +205,7 @@ void GeneratorAction::step_impl(CoreParams const& params, if (counters.num_pending > 0) { // Generate the optical photons from the distribution data. To avoid - // synchronization, we defer the check for vacancies until later. + // synchronization, we defer the check for vacancies. this->generate(params, state); } diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc index cb43fdacfb..6d9642cd50 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc @@ -150,10 +150,10 @@ void PrimaryGeneratorAction::step_impl(CoreParams const& params, auto const& counters = this->counters(*state.aux()).counters; - if (state.sync_get_counters().num_vacancies > 0 && counters.num_pending > 0) - // if (counters.num_pending > 0) + if (counters.num_pending > 0) { - // Generate the optical photons from the distribution data + // Generate the optical photons from the distribution data. To avoid + // synchronization, we defer the check for vacancies. this->generate(params, state); } diff --git a/src/celeritas/optical/gen/WlsGeneratorAction.cc b/src/celeritas/optical/gen/WlsGeneratorAction.cc index 6615073d06..2728b9860a 100644 --- a/src/celeritas/optical/gen/WlsGeneratorAction.cc +++ b/src/celeritas/optical/gen/WlsGeneratorAction.cc @@ -153,7 +153,7 @@ void WlsGeneratorAction::step_impl(CoreParams const& params, if (counters.num_pending > 0) { // Generate the optical photons from the distribution data. To avoid - // synchronization, we defer the check for vacancies until later. + // synchronization, we defer the check for vacancies. this->generate(params, state); // Compact the buffer again to remove stale distributions and free up From 87fd4594670752f60829d8c43db7b04f3fcbd3a6 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 17 Apr 2026 11:36:56 -0400 Subject: [PATCH 53/74] Remove core state synchronization from Runner::operator() --- src/celeritas/CMakeLists.txt | 1 + src/celeritas/optical/Runner.cc | 36 ++++++++++++++++++++++++++++++--- src/celeritas/optical/Runner.cu | 33 ++++++++++++++++++++++++++++++ src/celeritas/optical/Runner.hh | 6 ++++++ 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 src/celeritas/optical/Runner.cu diff --git a/src/celeritas/CMakeLists.txt b/src/celeritas/CMakeLists.txt index 290fb7a294..2d17123d77 100644 --- a/src/celeritas/CMakeLists.txt +++ b/src/celeritas/CMakeLists.txt @@ -398,6 +398,7 @@ celeritas_polysource(neutron/model/ChipsNeutronElasticModel) celeritas_polysource(neutron/model/NeutronCaptureModel) celeritas_polysource(neutron/model/NeutronInelasticModel) celeritas_polysource(optical/model/AbsorptionModel) +celeritas_polysource(optical/Runner) celeritas_polysource(optical/model/MieModel) celeritas_polysource(optical/model/RayleighModel) celeritas_polysource(optical/model/WavelengthShiftModel) diff --git a/src/celeritas/optical/Runner.cc b/src/celeritas/optical/Runner.cc index 12f081309d..348807b003 100644 --- a/src/celeritas/optical/Runner.cc +++ b/src/celeritas/optical/Runner.cc @@ -10,6 +10,7 @@ #include "corecel/io/OutputInterfaceAdapter.hh" #include "corecel/io/OutputRegistry.hh" +#include "corecel/sys/KernelLauncher.hh" #include "celeritas/inp/StandaloneInputIO.json.hh" #include "celeritas/phys/GeneratorRegistry.hh" #include "celeritas/setup/Problem.hh" @@ -123,12 +124,21 @@ auto Runner::operator()(SpanConstGenDist data) -> Result * for some run modes, e.g. offloading distributions through accel where we * already know the number of pending tracks. */ - auto counters = state_->sync_get_counters(); + size_type total_pending(0); for (auto const& d : data) { - counters.num_pending += d.num_photons; + total_pending += d.num_photons; + } + if (celeritas::device()) + { + auto* s = static_cast*>(&*state_); + update_pending(*s, total_pending); + } + else + { + auto* s = static_cast*>(&*state_); + update_pending(*s, total_pending); } - state_->sync_put_counters(counters); return this->run(); } @@ -156,6 +166,26 @@ auto Runner::run() const -> Result return result; } +//---------------------------------------------------------------------------// +/*! + * Launch a (host) kernel to update the number of pending optical photons. + */ +void Runner::update_pending(CoreState& state, + size_type num_pending) const +{ + // Update the number of pending optical photons + detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; + launch_kernel(1, execute); +} + +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +void Runner::update_pending(CoreState&, size_type) const +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + //---------------------------------------------------------------------------// } // namespace optical } // namespace celeritas diff --git a/src/celeritas/optical/Runner.cu b/src/celeritas/optical/Runner.cu new file mode 100644 index 0000000000..0d0fe7cae7 --- /dev/null +++ b/src/celeritas/optical/Runner.cu @@ -0,0 +1,33 @@ +//------------------------------ -*- cuda -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/optical/Runner.cu +//---------------------------------------------------------------------------// +#include "Runner.hh" + +#include "corecel/Assert.hh" +#include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/action/ActionLauncher.device.hh" + +namespace celeritas +{ +namespace optical +{ +//---------------------------------------------------------------------------// +/*! + * Launch a (device) kernel to update the number of pending optical photons. + */ +void Runner::update_pending(CoreState& state, + size_type num_pending) const +{ + // Update the number of pending optical photons + detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; + static KernelLauncher const launch_kernel( + "update-pending"); + launch_kernel(1, state.stream_id(), execute); +} + +//---------------------------------------------------------------------------// +} // namespace optical +} // namespace celeritas diff --git a/src/celeritas/optical/Runner.hh b/src/celeritas/optical/Runner.hh index 1cf18fbd78..25d643c662 100644 --- a/src/celeritas/optical/Runner.hh +++ b/src/celeritas/optical/Runner.hh @@ -9,6 +9,7 @@ #include #include "celeritas/inp/StandaloneInput.hh" +#include "celeritas/optical/action/ActionInterface.hh" #include "celeritas/setup/StandaloneInput.hh" #include "celeritas/user/ActionTimes.hh" @@ -16,6 +17,7 @@ #include "gen/DirectGeneratorAction.hh" #include "gen/GeneratorAction.hh" #include "gen/PrimaryGeneratorAction.hh" +#include "gen/detail/UpdatePendingExecutor.hh" namespace celeritas { @@ -83,6 +85,10 @@ class Runner //// HELPER FUNCTIONS //// Result run() const; + + // Update the num_pending state counter + void update_pending(CoreState&, size_type) const; + void update_pending(CoreState&, size_type) const; }; //---------------------------------------------------------------------------// From 67b6a09cc52824b42b80fa4b237707ad10b0ba6c Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 17 Apr 2026 12:03:19 -0400 Subject: [PATCH 54/74] Remove some unnecessary header files --- src/celeritas/optical/Runner.hh | 1 - src/celeritas/optical/action/LocateVacanciesAction.cu | 1 - .../optical/action/detail/UpdateAliveExecutor.hh | 4 ---- src/celeritas/optical/gen/GeneratorBase.cu | 4 +--- src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh | 4 ---- src/celeritas/track/detail/UpdateCountersExecutor.hh | 8 -------- 6 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/celeritas/optical/Runner.hh b/src/celeritas/optical/Runner.hh index 25d643c662..9b3c48eb4a 100644 --- a/src/celeritas/optical/Runner.hh +++ b/src/celeritas/optical/Runner.hh @@ -9,7 +9,6 @@ #include #include "celeritas/inp/StandaloneInput.hh" -#include "celeritas/optical/action/ActionInterface.hh" #include "celeritas/setup/StandaloneInput.hh" #include "celeritas/user/ActionTimes.hh" diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cu b/src/celeritas/optical/action/LocateVacanciesAction.cu index 7ef681d987..78e737f520 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cu +++ b/src/celeritas/optical/action/LocateVacanciesAction.cu @@ -14,7 +14,6 @@ #include "../CoreParams.hh" #include "../CoreState.hh" -#include "detail/TrackInitAlgorithms.hh" #include "detail/UpdateAliveExecutor.hh" namespace celeritas diff --git a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh index c76bae04e2..8d293d94bd 100644 --- a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh +++ b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh @@ -8,11 +8,7 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" -#include "corecel/math/Algorithms.hh" #include "celeritas/optical/CoreState.hh" -#include "celeritas/optical/CoreTrackData.hh" -#include "celeritas/optical/CoreTrackView.hh" -#include "celeritas/track/Utils.hh" namespace celeritas { diff --git a/src/celeritas/optical/gen/GeneratorBase.cu b/src/celeritas/optical/gen/GeneratorBase.cu index ac1aa95372..1327c2bf8a 100644 --- a/src/celeritas/optical/gen/GeneratorBase.cu +++ b/src/celeritas/optical/gen/GeneratorBase.cu @@ -7,10 +7,8 @@ #include "GeneratorBase.hh" #include "corecel/Assert.hh" -#include "celeritas/optical/CoreParams.hh" #include "celeritas/optical/CoreState.hh" #include "celeritas/optical/action/ActionLauncher.device.hh" -#include "celeritas/optical/action/TrackSlotExecutor.hh" #include "detail/UpdatePendingExecutor.hh" @@ -20,7 +18,7 @@ namespace optical { //---------------------------------------------------------------------------// /*! - * Launch a (device) kernel to initialize optical photons. + * Launch a (device) kernel to update the number of pending optical photons. */ void GeneratorBase::update_pending(CoreStateDevice& state, size_type num_pending) const diff --git a/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh index 5d8ccc3efb..f1673dc9e6 100644 --- a/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh +++ b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh @@ -8,11 +8,7 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" -#include "corecel/math/Algorithms.hh" #include "celeritas/optical/CoreState.hh" -#include "celeritas/optical/CoreTrackData.hh" -#include "celeritas/optical/CoreTrackView.hh" -#include "celeritas/track/Utils.hh" namespace celeritas { diff --git a/src/celeritas/track/detail/UpdateCountersExecutor.hh b/src/celeritas/track/detail/UpdateCountersExecutor.hh index d7b23719fe..de978d3438 100644 --- a/src/celeritas/track/detail/UpdateCountersExecutor.hh +++ b/src/celeritas/track/detail/UpdateCountersExecutor.hh @@ -8,17 +8,9 @@ #include "corecel/Assert.hh" #include "corecel/Macros.hh" -#include "corecel/cont/Span.hh" -#include "corecel/random/engine/InitializeRngState.hh" -#include "celeritas/Quantities.hh" #include "celeritas/Types.hh" -#include "celeritas/global/CoreTrackData.hh" -#include "celeritas/phys/ParticleData.hh" -#include "celeritas/phys/Primary.hh" -#include "../SimData.hh" #include "../TrackInitData.hh" -#include "../Utils.hh" namespace celeritas { From 62078b0f18f86c3078412069cc69fd8f29924a47 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 17 Apr 2026 14:57:14 -0400 Subject: [PATCH 55/74] Resolve nvcc warning --- src/accel/detail/IntegrationSingleton.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/accel/detail/IntegrationSingleton.cc b/src/accel/detail/IntegrationSingleton.cc index ddec014e9c..25345f0360 100644 --- a/src/accel/detail/IntegrationSingleton.cc +++ b/src/accel/detail/IntegrationSingleton.cc @@ -64,7 +64,7 @@ validate_and_return_offloaded(std::optional const& user) return *user; } //---------------------------------------------------------------------------// -}; // namespace +} // namespace //---------------------------------------------------------------------------// /*! From 51de5b7d32f683933a9f98c43438aef764f9ea24 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 17 Apr 2026 14:58:46 -0400 Subject: [PATCH 56/74] Remove extraneous semicolon ending a namespace -- old gcc versions produce warnings --- src/celeritas/em/distribution/BhabhaEnergyDistribution.hh | 2 +- src/celeritas/io/ImportVolume.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/celeritas/em/distribution/BhabhaEnergyDistribution.hh b/src/celeritas/em/distribution/BhabhaEnergyDistribution.hh index 0e7aad6592..a91c71dbbc 100644 --- a/src/celeritas/em/distribution/BhabhaEnergyDistribution.hh +++ b/src/celeritas/em/distribution/BhabhaEnergyDistribution.hh @@ -60,7 +60,7 @@ class BhabhaEnergyDistribution return 1; } -}; // namespace BhabhaEnergyDistribution +} // namespace BhabhaEnergyDistribution //---------------------------------------------------------------------------// // INLINE DEFINITIONS diff --git a/src/celeritas/io/ImportVolume.hh b/src/celeritas/io/ImportVolume.hh index 35e4da016e..470f749144 100644 --- a/src/celeritas/io/ImportVolume.hh +++ b/src/celeritas/io/ImportVolume.hh @@ -29,7 +29,7 @@ struct ImportVolume using Index = unsigned int; //!@} - static constexpr Index unspecified = -1; + static constexpr Index unspecified = static_cast(-1); Index geo_material_id{unspecified}; //!< Material defined by geometry Index region_id{unspecified}; //!< Optional region associated From 359f451575470c347c77a3f39898457e4d189b98 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Thu, 23 Apr 2026 15:24:47 -0400 Subject: [PATCH 57/74] Fix typo in documentation of filename --- src/celeritas/optical/action/LocateVacanciesAction.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cu b/src/celeritas/optical/action/LocateVacanciesAction.cu index 78e737f520..ee134151e8 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cu +++ b/src/celeritas/optical/action/LocateVacanciesAction.cu @@ -2,7 +2,7 @@ // Copyright Celeritas contributors: see top-level COPYRIGHT file for details // SPDX-License-Identifier: (Apache-2.0 OR MIT) //---------------------------------------------------------------------------// -//! \file celeritas/optical/action/LcateVacanciesAction.cu +//! \file celeritas/optical/action/LocateVacanciesAction.cu //---------------------------------------------------------------------------// #include "LocateVacanciesAction.hh" From 3755a26af2319a7b5a88cc9525606098858db9a6 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Fri, 24 Apr 2026 09:42:43 -0400 Subject: [PATCH 58/74] Use dynamic_cast instead of static_cast --- src/accel/LocalOpticalGenOffload.cc | 7 +++---- src/celeritas/optical/Runner.cc | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index 485f899101..1fc88322f9 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -187,16 +187,15 @@ void LocalOpticalGenOffload::Flush() generate_->insert(*state_, make_span(buffer_)); // Update the number of primaries waiting to be generated based on the - // number of photons. Can use static_cast since the appropriate derived - // class is specified in the constructor. + // number of photons. if (celeritas::device()) { - auto* s = static_cast*>(&*state_); + auto* s = dynamic_cast*>(&*state_); update_primaries(*s); } else { - auto* s = static_cast*>(&*state_); + auto* s = dynamic_cast*>(&*state_); update_primaries(*s); } diff --git a/src/celeritas/optical/Runner.cc b/src/celeritas/optical/Runner.cc index d73b1afdaf..8a23cff51c 100644 --- a/src/celeritas/optical/Runner.cc +++ b/src/celeritas/optical/Runner.cc @@ -134,12 +134,12 @@ auto Runner::operator()(SpanConstGenDist data) -> Result } if (celeritas::device()) { - auto* s = static_cast*>(&*state_); + auto* s = dynamic_cast*>(&*state_); update_pending(*s, total_pending); } else { - auto* s = static_cast*>(&*state_); + auto* s = dynamic_cast*>(&*state_); update_pending(*s, total_pending); } From 177390a0479712bd953c7e22dda87755c3e9bb38 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 27 Apr 2026 15:30:19 -0400 Subject: [PATCH 59/74] Remove synchronization from InitializeTracksAction::step_impl --- src/celeritas/track/InitializeTracksAction.cc | 62 +++++++-------- src/celeritas/track/InitializeTracksAction.cu | 17 +++-- .../track/detail/InitTracksExecutor.hh | 75 ++++++++++--------- .../track/detail/TrackInitAlgorithms.cc | 6 +- .../track/detail/TrackInitAlgorithms.cu | 40 ++++------ .../track/detail/TrackInitAlgorithms.hh | 3 - .../track/detail/UpdateNewTracksExecutor.hh | 68 +++++++++++++++++ 7 files changed, 165 insertions(+), 106 deletions(-) create mode 100644 src/celeritas/track/detail/UpdateNewTracksExecutor.hh diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index a14930fc62..e540cf77ae 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -19,6 +19,7 @@ #include "detail/InitTracksExecutor.hh" // IWYU pragma: associated #include "detail/TrackInitAlgorithms.hh" +#include "detail/UpdateNewTracksExecutor.hh" // IWYU pragma: associated namespace celeritas { @@ -55,62 +56,51 @@ template void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreState& core_state) const { - auto counters = core_state.sync_get_counters(); - // The number of new tracks to initialize is the smaller of the number of - // empty slots in the track vector and the number of track initializers - size_type num_new_tracks - = std::min(counters.num_vacancies, counters.num_initializers); - if (num_new_tracks > 0) + // empty slots in the track vector and the number of track initializers. + // To avoid synchronizing the core state counters, we let the kernels + // calculate the number of new tracks and proceed accordingly. This means + // the code might sometimes call these functions when there is no work + // to do, but that's quickly determined so the overhead should be minimal. + if (core_params.init()->track_order() == TrackOrder::init_charge) { - if (core_params.init()->track_order() == TrackOrder::init_charge) - { - // Reset track initializer indices - fill_sequence(&core_state.ref().init.indices, - core_state.stream_id()); - - // Partition indices by whether tracks are charged or neutral - detail::partition_initializers(core_params, - core_state.ref().init, - num_new_tracks, - core_state.stream_id()); - } + // Reset track initializer indices + fill_sequence(&core_state.ref().init.indices, core_state.stream_id()); - // Launch a kernel to initialize tracks - this->step_impl(core_params, core_state, num_new_tracks); - - // Update initializers/vacancies - counters.num_initializers -= num_new_tracks; - counters.num_vacancies -= num_new_tracks; + // Partition indices by whether tracks are charged or neutral + detail::partition_initializers( + core_params, core_state.ref().init, core_state.stream_id()); } - // Store number of active tracks at the start of the loop - counters.num_active = core_state.size() - counters.num_vacancies; - core_state.sync_put_counters(counters); + // Launch a kernel to initialize tracks, using the largest possible + // number and computing the actual number in the kernel. + this->step_impl(core_params, core_state, core_state.size()); } //---------------------------------------------------------------------------// /*! - * Launch a (host) kernel to initialize tracks. + * Launch (host) kernels to initialize tracks and to update the corresponding + * counters. * * The thread index here corresponds to initializer indices, not track slots * (or indices into the track slot indirection array). */ void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreStateHost& core_state, - size_type num_new_tracks) const + size_type max_new_tracks) const { - detail::InitTracksExecutor execute{ - core_params.ptr(), core_state.ptr(), num_new_tracks}; - return launch_action( - *this, num_new_tracks, core_params, core_state, execute); + detail::InitTracksExecutor execute{core_params.ptr(), + core_state.ptr()}; + launch_action(*this, max_new_tracks, core_params, core_state, execute); + + detail::UpdateNewTracksExecutor execute_thread{ + core_params.ptr(), core_state.ptr()}; + return launch_action(*this, 1, core_params, core_state, execute_thread); } //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -void InitializeTracksAction::step_impl(CoreParams const&, - CoreStateDevice&, - size_type) const +void InitializeTracksAction::step_impl(CoreParams const&, CoreStateDevice&) const { CELER_NOT_CONFIGURED("CUDA OR HIP"); } diff --git a/src/celeritas/track/InitializeTracksAction.cu b/src/celeritas/track/InitializeTracksAction.cu index da65606798..489bbb980c 100644 --- a/src/celeritas/track/InitializeTracksAction.cu +++ b/src/celeritas/track/InitializeTracksAction.cu @@ -11,21 +11,28 @@ #include "celeritas/global/CoreState.hh" #include "detail/InitTracksExecutor.hh" +#include "detail/UpdateNewTracksExecutor.hh" namespace celeritas { //---------------------------------------------------------------------------// /*! - * Launch a kernel to initialize tracks. + * Launch (device) kernels to initialize tracks and to update the corresponding + * counters. */ void InitializeTracksAction::step_impl(CoreParams const& params, CoreStateDevice& state, size_type num_new_tracks) const { - detail::InitTracksExecutor execute_thread{ - params.ptr(), state.ptr(), num_new_tracks}; - static ActionLauncher const launch_kernel(*this); - launch_kernel(num_new_tracks, state.stream_id(), execute_thread); + detail::InitTracksExecutor execute{params.ptr(), + state.ptr()}; + static ActionLauncher const launch_kernel(*this); + launch_kernel(num_new_tracks, state.stream_id(), execute); + + detail::UpdateNewTracksExecutor execute_thread{ + params.ptr(), state.ptr()}; + static ActionLauncher const launch_kernel2(*this); + launch_kernel2(1, state.stream_id(), execute_thread); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/detail/InitTracksExecutor.hh b/src/celeritas/track/detail/InitTracksExecutor.hh index 0414fe1dce..9e588df34e 100644 --- a/src/celeritas/track/detail/InitTracksExecutor.hh +++ b/src/celeritas/track/detail/InitTracksExecutor.hh @@ -45,7 +45,6 @@ struct InitTracksExecutor ParamsPtr params; StatePtr state; - size_type num_init{}; //// FUNCTIONS //// @@ -63,46 +62,54 @@ struct InitTracksExecutor */ CELER_FUNCTION void InitTracksExecutor::operator()(ThreadId tid) const { - CELER_EXPECT(tid < num_init); + CELER_EXPECT(params); + CELER_EXPECT(state); auto const& data = state->init; auto* counters = state->init.counters.data().get(); - // Get the track initializer from the back of the vector. Since new - // initializers are pushed to the back of the vector, these will be the - // most recently added and therefore the ones that still might have a - // parent they can copy the geometry state from. - TrackInitializer& init = data.initializers[ItemId([&] { - if (params->init.track_order == TrackOrder::init_charge) - { - // Get the index into the track initializer or parent track slot ID - // array from the sorted indices - return data.indices[TrackSlotId(index_before(num_init, tid))] - + counters->num_initializers - num_init; - } - return index_before(counters->num_initializers, tid); - }())]; - - // View to the new track to be initialized - CoreTrackView vacancy{ - *params, *state, [&] { - if (params->init.track_order == TrackOrder::init_charge - && IsNeutral{params}(init)) + size_type num_init + = min(counters->num_vacancies, counters->num_initializers); + CELER_EXPECT(num_init <= state->size()); + if (tid < num_init) + { + // Get the track initializer from the back of the vector. Since new + // initializers are pushed to the back of the vector, these will be the + // most recently added and therefore the ones that still might have a + // parent they can copy the geometry state from. + TrackInitializer& init = data.initializers[ItemId([&] { + if (params->init.track_order == TrackOrder::init_charge) { - // Get the vacancy from the front of the track state - return data.vacancies[TrackSlotId(index_before(num_init, tid))]; + // Get the index into the track initializer or parent track + // slot ID array from the sorted indices + return data.indices[TrackSlotId(index_before(num_init, tid))] + + counters->num_initializers - num_init; } - // Get the vacancy from the back of the track state - return data.vacancies[TrackSlotId( - index_before(counters->num_vacancies, tid))]; - }()}; + return index_before(counters->num_initializers, tid); + }())]; - // Clear parent IDs if new primaries were added this step - if (counters->num_generated) - { - init.geo.parent = {}; - } + // View to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + if (params->init.track_order == TrackOrder::init_charge + && IsNeutral{params}(init)) + { + // Get the vacancy from the front of the track state + return data + .vacancies[TrackSlotId(index_before(num_init, tid))]; + } + // Get the vacancy from the back of the track state + return data.vacancies[TrackSlotId( + index_before(counters->num_vacancies, tid))]; + }()}; - vacancy = init; + // Clear parent IDs if new primaries were added this step + if (counters->num_generated) + { + init.geo.parent = {}; + } + + vacancy = init; + } } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cc b/src/celeritas/track/detail/TrackInitAlgorithms.cc index f34b45b2e9..902ab24831 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cc +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cc @@ -81,13 +81,15 @@ size_type exclusive_scan_counts( void partition_initializers( CoreParams const& params, TrackInitStateData const& init, - size_type count, StreamId) { // Partition the indices based on the track initializer charge + auto* counters = init.counters.data().get(); + auto count = std::min(counters->num_vacancies, counters->num_initializers); + if (count == 0) + return; auto* start = init.indices.data().get(); auto* end = start + count; - auto* counters = init.counters.data().get(); auto* stencil = init.initializers.data().get() + counters->num_initializers - count; std::stable_partition( diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.cu b/src/celeritas/track/detail/TrackInitAlgorithms.cu index 2f19f2e419..7fc7483e8a 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.cu +++ b/src/celeritas/track/detail/TrackInitAlgorithms.cu @@ -95,8 +95,6 @@ void remove_if_alive( Copier copy{{counters.get(), 1}, stream_id}; copy(MemSpace::host, {&host_counters, 1}); - stream.sync(); - return; #else auto& stream = device().stream(stream_id); // Calling with nullptr causes the function to return the amount of working @@ -125,8 +123,9 @@ void remove_if_alive( stream.get()); CELER_DISCARD(cub_error_code); CELER_DEVICE_API_CALL(PeekAtLastError()); - return; #endif + stream.sync(); + return; } //---------------------------------------------------------------------------// @@ -155,9 +154,6 @@ size_type exclusive_scan_counts( // Copy the last element (accumulated total) back to host auto result = ItemCopier{stream_id}(stop.get() - 1); - - stream.sync(); - return result; #else // Calling with nullptr causes the function to return the amount of working // space needed instead of invoking the kernel. @@ -175,18 +171,15 @@ size_type exclusive_scan_counts( data, counts.size(), stream.get()); - // Set the counter similar to the following - // counters.num_secondaries = "last value in the counts object; CELER_DISCARD(cub_error_code); CELER_DEVICE_API_CALL(PeekAtLastError()); // Copy the last element (accumulated total) back to host auto result = ItemCopier{stream_id}(data.get() + counts.size() - 1); - +#endif stream.sync(); return result; -#endif } //---------------------------------------------------------------------------// @@ -199,26 +192,27 @@ size_type exclusive_scan_counts( void partition_initializers( CoreParams const& params, TrackInitStateData const& init, - size_type count, StreamId stream_id) { - CELER_EXPECT(count != 0); - ScopedProfiling profile_this{"partition-initializers"}; -#if CELER_USE_THRUST // Partition the indices based on the track initializer charge - auto start = device_pointer_cast(init.indices.data()); - auto end = start + count; auto counters = device_pointer_cast(init.counters.data()); auto cpucntrs = ItemCopier{stream_id}(counters.get()); + size_type count = min(cpucntrs.num_vacancies, cpucntrs.num_initializers); + if (count == 0) + return; + // The initializers array is large. Use stencil to point to the start where + // this array is being used auto stencil = static_cast(init.initializers.data()) + cpucntrs.num_initializers - count; +#if CELER_USE_THRUST + auto* start = device_pointer_cast(init.indices.data()); + auto* end = start + count; thrust::stable_partition( thrust_execute_on(stream_id), start, end, IsNeutralStencil{params.ptr(), stencil}); - CELER_DEVICE_API_CALL(PeekAtLastError()); #else auto& stream = device().stream(stream_id); // CUB doesn't have a partition function that allows the user to specify @@ -227,13 +221,6 @@ void partition_initializers( // instead we create an iterator by using a functor to transform the // stencil values into boolean flags that determine how to partition // the indices. - // - // The initializers array is large. Use stencil to point to the start where - // this array is being used - auto counters = device_pointer_cast(init.counters.data()); - auto cpucntrs = ItemCopier{stream_id}(counters.get()); - auto stencil = static_cast(init.initializers.data()) - + cpucntrs.num_initializers - count; DeviceVector flags{count, stream_id}; # if CELER_CUB_HAS_TRANSFORM || CELER_HIPCUB_HAS_TRANSFORM // HIP defines hipCUB functions as [[nodiscard]], but we defer error checks @@ -254,7 +241,7 @@ void partition_initializers( IsNeutral{params.ptr()}); # endif // Calling with nullptr causes the function to return the amount of working - // space needed instead of invoking the kernel. + // space needed instead of invoking the kernel size_t temp_storage_bytes = 0; // CUB doesn't support in-place partitioning, so use a counting iterator // because the indices are always sequential from zero @@ -282,8 +269,9 @@ void partition_initializers( count, stream.get()); CELER_DISCARD(cub_error_code); - CELER_DEVICE_API_CALL(PeekAtLastError()); #endif + CELER_DEVICE_API_CALL(PeekAtLastError()); + stream.sync(); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/detail/TrackInitAlgorithms.hh b/src/celeritas/track/detail/TrackInitAlgorithms.hh index 81d0b53201..5eaad1022f 100644 --- a/src/celeritas/track/detail/TrackInitAlgorithms.hh +++ b/src/celeritas/track/detail/TrackInitAlgorithms.hh @@ -59,12 +59,10 @@ size_type exclusive_scan_counts( void partition_initializers( CoreParams const&, TrackInitStateData const&, - size_type, StreamId); void partition_initializers( CoreParams const&, TrackInitStateData const&, - size_type, StreamId); //---------------------------------------------------------------------------// @@ -87,7 +85,6 @@ inline size_type exclusive_scan_counts( inline void partition_initializers( CoreParams const&, TrackInitStateData const&, - size_type, StreamId) { CELER_NOT_CONFIGURED("CUDA or HIP"); diff --git a/src/celeritas/track/detail/UpdateNewTracksExecutor.hh b/src/celeritas/track/detail/UpdateNewTracksExecutor.hh new file mode 100644 index 0000000000..46c161eccd --- /dev/null +++ b/src/celeritas/track/detail/UpdateNewTracksExecutor.hh @@ -0,0 +1,68 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/track/detail/UpdateNewTracksExecutor.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Assert.hh" +#include "corecel/Macros.hh" +#include "celeritas/Types.hh" + +#include "../TrackInitData.hh" + +namespace celeritas +{ +namespace detail +{ +//---------------------------------------------------------------------------// +/*! + * Update state counters based on the number of new tracks. + */ +struct UpdateNewTracksExecutor +{ + //// TYPES //// + + using ParamsPtr = CRefPtr; + using StatePtr = RefPtr; + + //// DATA //// + + ParamsPtr params; + StatePtr state; + + //// FUNCTIONS //// + + // Update state counters based on the number of primaries + inline CELER_FUNCTION void operator()(ThreadId tid) const; +}; + +//---------------------------------------------------------------------------// +/*! + * Update state counters based on the number of new tracks. + */ +CELER_FUNCTION void UpdateNewTracksExecutor::operator()(ThreadId tid) const +{ + CELER_EXPECT(params); + CELER_EXPECT(state); + CELER_EXPECT(tid.get() == 0); // Should call with only one thread + + auto* counters = state->init.counters.data().get(); + + size_type num_new_tracks + = min(counters->num_vacancies, counters->num_initializers); + if (num_new_tracks > 0) + { + // Update initializers/vacancies + counters->num_initializers -= num_new_tracks; + counters->num_vacancies -= num_new_tracks; + } + // Store number of active tracks at the start of the loop + counters->num_active = state->size() - counters->num_vacancies; + return; +} + +//---------------------------------------------------------------------------// +} // namespace detail +} // namespace celeritas From e71afa8856985d5fbbf90aa369ad715e5ee2bc04 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 27 Apr 2026 16:46:44 -0400 Subject: [PATCH 60/74] Fix step_impl device function header in host-only code --- src/celeritas/track/InitializeTracksAction.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index e540cf77ae..66841c62b8 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -100,7 +100,9 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -void InitializeTracksAction::step_impl(CoreParams const&, CoreStateDevice&) const +void InitializeTracksAction::step_impl(CoreParams const&, + CoreStateDevice&, + size_type) const { CELER_NOT_CONFIGURED("CUDA OR HIP"); } From cf9ccd15e7df67b7ab45ac88008ebc2ef022bbf2 Mon Sep 17 00:00:00 2001 From: Seth R Johnson Date: Tue, 28 Apr 2026 15:12:44 -0400 Subject: [PATCH 61/74] WIP: interactive code review --- src/celeritas/global/TrackExecutor.hh | 14 ++++++++ src/celeritas/global/detail/SetGenerated.cc | 6 ++-- .../global/detail/SetGeneratedExecutor.hh | 23 +++++------- .../optical/gen/DirectGeneratorAction.cc | 2 +- .../gen/detail/DirectGeneratorExecutor.hh | 36 +++++++++---------- src/celeritas/track/TrackFunctors.hh | 10 ++++++ 6 files changed, 53 insertions(+), 38 deletions(-) diff --git a/src/celeritas/global/TrackExecutor.hh b/src/celeritas/global/TrackExecutor.hh index 8ee00589a7..cdef72302d 100644 --- a/src/celeritas/global/TrackExecutor.hh +++ b/src/celeritas/global/TrackExecutor.hh @@ -169,6 +169,20 @@ make_active_track_executor(CoreParamsPtr params, params, state, AppliesValid{}, celeritas::forward(apply_track)}; } +//---------------------------------------------------------------------------// +/*! + * Return a track executor that only applies to active, non-errored tracks. + */ +template +inline CELER_FUNCTION decltype(auto) +make_single_track_executor(CoreParamsPtr params, + CoreStatePtr const& state, + T&& apply_track) +{ + return ConditionalTrackExecutor{ + params, state, IsThreadZero{}, celeritas::forward(apply_track)}; +} + //---------------------------------------------------------------------------// /*! * Return a track executor that only applies if the action ID matches. diff --git a/src/celeritas/global/detail/SetGenerated.cc b/src/celeritas/global/detail/SetGenerated.cc index 65a2654114..c35b3ace2f 100644 --- a/src/celeritas/global/detail/SetGenerated.cc +++ b/src/celeritas/global/detail/SetGenerated.cc @@ -6,6 +6,7 @@ //---------------------------------------------------------------------------// #include "corecel/Assert.hh" #include "corecel/Types.hh" +#include "celeritas/global/TrackExecutor.hh" #include "SetGeneratedExecutor.hh" #include "../ActionLauncher.hh" @@ -22,8 +23,9 @@ namespace detail */ void set_generated(CoreParams const& params, CoreState& state) { - SetGeneratedExecutor execute_thread{params.ptr(), - state.ptr()}; + auto execute_thread = make_single_track_executor( + params.ptr(), state.ptr(), SetGeneratedExecutor{}); + launch_core(1, "set-generated", params, state, execute_thread); } diff --git a/src/celeritas/global/detail/SetGeneratedExecutor.hh b/src/celeritas/global/detail/SetGeneratedExecutor.hh index 174667ebbe..60c62d2aed 100644 --- a/src/celeritas/global/detail/SetGeneratedExecutor.hh +++ b/src/celeritas/global/detail/SetGeneratedExecutor.hh @@ -7,7 +7,9 @@ #pragma once #include "corecel/Macros.hh" +#include "corecel/sys/ThreadId.hh" #include "celeritas/Types.hh" +#include "celeritas/global/CoreTrackView.hh" #include "../CoreState.hh" @@ -24,19 +26,14 @@ namespace detail // LAUNCHER //---------------------------------------------------------------------------// /*! - * // Initialize the num_generated counter to zero. + * Initialize the num_generated counter to zero. */ struct SetGeneratedExecutor { - //// DATA //// - - CRefPtr params; - RefPtr state; - //// FUNCTIONS //// // Initialize the num_generated counter to zero - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); + CELER_FORCEINLINE_FUNCTION void operator()(CoreTrackView& track); }; //---------------------------------------------------------------------------// @@ -51,15 +48,11 @@ void set_generated(CoreParams const& params, /*! * Initialize the num_generated counter to zero. */ -CELER_FORCEINLINE_FUNCTION void SetGeneratedExecutor::operator()(ThreadId tid) +CELER_FORCEINLINE_FUNCTION void +SetGeneratedExecutor::operator()(CoreTrackView& track) { - CELER_EXPECT(params); - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should call with only one thread - - auto* counters = state->init.counters.data().get(); - counters->num_generated = 0; - return; + CELER_EXPECT(track.thread_id() == ThreadId{0}); // single thread kernel + track.counters().num_generated = 0; } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index 26e9ccbf59..0fd1f12cef 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -145,7 +145,7 @@ void DirectGeneratorAction::insert_impl(CoreState& state, // Update counters and copy distributions to aux state storage aux_state.counters.buffer_size = data.size(); aux_state.counters.num_pending = data.size(); - update_pending(state, data.size()); + this->update_pending(state, data.size()); Copier copy_to_aux{aux_state.initializers(), state.stream_id()}; diff --git a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh index 5fc01bf7fe..5bcd9768fc 100644 --- a/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/DirectGeneratorExecutor.hh @@ -31,11 +31,7 @@ struct DirectGeneratorExecutor NativeRef const data; // Initialize optical photons - inline CELER_FUNCTION void operator()(TrackSlotId tid) const; - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid) const - { - return (*this)(TrackSlotId{tid.unchecked_get()}); - } + inline CELER_FUNCTION void operator()(ThreadId tid) const; }; //---------------------------------------------------------------------------// @@ -44,7 +40,7 @@ struct DirectGeneratorExecutor /*! * Initialize optical photons. */ -CELER_FUNCTION void DirectGeneratorExecutor::operator()(TrackSlotId tid) const +CELER_FUNCTION void DirectGeneratorExecutor::operator()(ThreadId tid) const { CELER_EXPECT(params); CELER_EXPECT(state); @@ -55,23 +51,23 @@ CELER_FUNCTION void DirectGeneratorExecutor::operator()(TrackSlotId tid) const // of vacancies and the number of pending in the auxiliary state. To avoid // accessing the state counters to compute this min, we skip the extra // threads if counters->num_vacancies < aux_state.counters.num_pending - if (tid < counters->num_vacancies) + if (!(tid < counters->num_vacancies)) { - // Create view to new track to be initialized - CoreTrackView vacancy(*params, *state, [&] { - TrackSlotId idx{ - index_before(counters->num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()); + return; + } - // Get initializer from the back - TrackInitializer const& init - = data.initializers[ItemId( - index_before(counters->num_pending, ThreadId(tid.get())))]; + // Create view to new track to be initialized + CoreTrackView vacancy(*params, *state, [&] { + TrackSlotId idx{index_before(counters->num_vacancies, tid)}; + return state->init.vacancies[idx]; + }()); - // Initialize track - vacancy = init; - } + // Get initializer from the back + TrackInitializer const& init = data.initializers[ItemId( + index_before(counters->num_pending, tid))]; + + // Initialize track + vacancy = init; } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/TrackFunctors.hh b/src/celeritas/track/TrackFunctors.hh index b12ab6a610..5403b7e836 100644 --- a/src/celeritas/track/TrackFunctors.hh +++ b/src/celeritas/track/TrackFunctors.hh @@ -27,6 +27,16 @@ struct AppliesValid } }; +//! Only launch on a single thread +struct IsThreadZero +{ + template + CELER_FUNCTION bool operator()(T const& track) const + { + return track.thread_id() == ThreadId{0}; + } +}; + //---------------------------------------------------------------------------// /*! * Apply only to tracks with the given post-step action ID. From 93e0e744eb2fa04b99fb6dfe09d1477efe4936fb Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 6 May 2026 20:29:04 -0400 Subject: [PATCH 62/74] Incorporate WIP review comments --- src/accel/LocalOpticalGenOffload.cc | 4 +- src/celeritas/CMakeLists.txt | 2 +- src/celeritas/global/Stepper.cc | 40 +++++++++- .../{detail/SetGenerated.cu => Stepper.cu} | 28 ++++--- src/celeritas/global/Stepper.hh | 11 ++- src/celeritas/global/detail/SetGenerated.cc | 44 ----------- .../optical/gen/WlsGeneratorAction.cc | 2 +- .../gen/detail/WlsGeneratorExecutor.hh | 78 +++++++++---------- 8 files changed, 103 insertions(+), 106 deletions(-) rename src/celeritas/global/{detail/SetGenerated.cu => Stepper.cu} (56%) delete mode 100644 src/celeritas/global/detail/SetGenerated.cc diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index 1fc88322f9..3d4ab95d56 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -191,12 +191,12 @@ void LocalOpticalGenOffload::Flush() if (celeritas::device()) { auto* s = dynamic_cast*>(&*state_); - update_primaries(*s); + this->update_primaries(*s); } else { auto* s = dynamic_cast*>(&*state_); - update_primaries(*s); + this->update_primaries(*s); } num_photons_ = 0; diff --git a/src/celeritas/CMakeLists.txt b/src/celeritas/CMakeLists.txt index f7041bed41..73349e6d0e 100644 --- a/src/celeritas/CMakeLists.txt +++ b/src/celeritas/CMakeLists.txt @@ -393,8 +393,8 @@ celeritas_polysource(em/model/RelativisticBremModel) celeritas_polysource(em/model/SeltzerBergerModel) celeritas_polysource(em/model/CoulombScatteringModel) celeritas_polysource(geo/detail/BoundaryAction) +celeritas_polysource(global/Stepper) celeritas_polysource(global/detail/KillActive) -celeritas_polysource(global/detail/SetGenerated) celeritas_polysource(global/detail/TrackSlotUtils) celeritas_polysource(mucf/model/DTMixMucfModel) celeritas_polysource(neutron/model/ChipsNeutronElasticModel) diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index 05ca59fc54..25f0e99a89 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -14,10 +14,12 @@ #include "corecel/sys/ScopedProfiling.hh" #include "orange/OrangeData.hh" #include "celeritas/Types.hh" +#include "celeritas/global/TrackExecutor.hh" #include "celeritas/random/RngReseed.hh" #include "celeritas/track/ExtendFromPrimariesAction.hh" #include "celeritas/track/TrackInitParams.hh" +#include "ActionLauncher.hh" #include "CoreParams.hh" #include "detail/KillActive.hh" @@ -129,7 +131,7 @@ auto Stepper::operator()() -> result_type { ScopedProfiling profile_this{"step"}; // Initialize the num_generated counter to zero - detail::set_generated(*params_, *state_); + this->set_generated(); actions_->step(*params_, *state_); auto counters = state_->sync_get_counters(); @@ -208,6 +210,42 @@ void Stepper::reseed(UniqueEventId event_id) params_->init()->reset_track_ids(state_->stream_id(), &state_->ref().init); } +//---------------------------------------------------------------------------// +/*! + * Set the num_pending counter to the number of generated primaries. + * This template should always be specialized. + */ +// template +// void Stepper::set_generated() +// { +// CELER_NOT_CONFIGURED("CUDA OR HIP"); +// } + +//---------------------------------------------------------------------------// +/*! + * Set the num_pending counter to the number of generated primaries. + */ +template<> +void Stepper::set_generated() +{ + auto execute_thread + = make_single_track_executor(params_->ptr(), + state_->ptr(), + detail::SetGeneratedExecutor{}); + launch_core(1, "set-generated", *params_, *state_, execute_thread); +} + +//---------------------------------------------------------------------------// +// DEVICE-DISABLED IMPLEMENTATION +//---------------------------------------------------------------------------// +#if !CELER_USE_DEVICE +template<> +void Stepper::set_generated() +{ + CELER_NOT_CONFIGURED("CUDA OR HIP"); +} +#endif + //---------------------------------------------------------------------------// // EXPLICIT INSTANTIATION //---------------------------------------------------------------------------// diff --git a/src/celeritas/global/detail/SetGenerated.cu b/src/celeritas/global/Stepper.cu similarity index 56% rename from src/celeritas/global/detail/SetGenerated.cu rename to src/celeritas/global/Stepper.cu index 01c92c6223..75fbe3d50c 100644 --- a/src/celeritas/global/detail/SetGenerated.cu +++ b/src/celeritas/global/Stepper.cu @@ -2,33 +2,37 @@ // Copyright Celeritas contributors: see top-level COPYRIGHT file for details // SPDX-License-Identifier: (Apache-2.0 OR MIT) //---------------------------------------------------------------------------// -//! \file celeritas/global/detail/SetGenerated.cu +//! \file celeritas/global/Stepper.cu //---------------------------------------------------------------------------// +#include "Stepper.hh" + #include "corecel/Assert.hh" #include "corecel/Types.hh" #include "corecel/sys/KernelLauncher.device.hh" +#include "celeritas/global/TrackExecutor.hh" + +#include "CoreParams.hh" +#include "CoreState.hh" -#include "SetGeneratedExecutor.hh" -#include "../CoreParams.hh" -#include "../CoreState.hh" +#include "detail/SetGeneratedExecutor.hh" namespace celeritas { -namespace detail -{ //---------------------------------------------------------------------------// /*! - * Reset the num_pending counter to the number of generated primaries. + * Set the num_pending counter to the number of generated primaries. */ -void set_generated(CoreParams const& params, CoreState& state) +template<> +void Stepper::set_generated() { - SetGeneratedExecutor execute_thread{params.ptr(), - state.ptr()}; + auto execute_thread + = make_single_track_executor(params_->ptr(), + state_->ptr(), + detail::SetGeneratedExecutor{}); static KernelLauncher const launch_kernel( "set-generated"); - launch_kernel(1, state.stream_id(), execute_thread); + launch_kernel(1, state_->stream_id(), execute_thread); } //---------------------------------------------------------------------------// -} // namespace detail } // namespace celeritas diff --git a/src/celeritas/global/Stepper.hh b/src/celeritas/global/Stepper.hh index c7a9a00bf3..953601de3a 100644 --- a/src/celeritas/global/Stepper.hh +++ b/src/celeritas/global/Stepper.hh @@ -148,6 +148,8 @@ class Stepper final : public StepperInterface //!@{ //! \name Type aliases using StateRef = CoreStateData; + using CoreStateHost = CoreState; + using CoreStateDevice = CoreState; //!@} public: @@ -184,6 +186,9 @@ class Stepper final : public StepperInterface //! Reset the core state counters and data so it can be reused void reset_state() { state_->reset(); } + //! Set the num_pending counter to the number of generated primaries + void set_generated(); + //! Get a shared pointer to the state (TEMPORARY, DO NOT USE) SPState sp_state() final { return state_; } @@ -199,11 +204,9 @@ class Stepper final : public StepperInterface }; //---------------------------------------------------------------------------// -// EXPLICIT INSTANTIATION +// EXPLICIT INSTANTIATION removed but retained in Stepper.cc so that the +// set_generated() member function can be specialized based on MemSpace //---------------------------------------------------------------------------// -extern template class Stepper; -extern template class Stepper; - //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/src/celeritas/global/detail/SetGenerated.cc b/src/celeritas/global/detail/SetGenerated.cc deleted file mode 100644 index c35b3ace2f..0000000000 --- a/src/celeritas/global/detail/SetGenerated.cc +++ /dev/null @@ -1,44 +0,0 @@ -//------------------------------- -*- C++ -*- -------------------------------// -// Copyright Celeritas contributors: see top-level COPYRIGHT file for details -// SPDX-License-Identifier: (Apache-2.0 OR MIT) -//---------------------------------------------------------------------------// -//! \file celeritas/global/detail/SetGenerated.cc -//---------------------------------------------------------------------------// -#include "corecel/Assert.hh" -#include "corecel/Types.hh" -#include "celeritas/global/TrackExecutor.hh" - -#include "SetGeneratedExecutor.hh" -#include "../ActionLauncher.hh" -#include "../CoreParams.hh" -#include "../CoreState.hh" - -namespace celeritas -{ -namespace detail -{ -//---------------------------------------------------------------------------// -/*! - * Reset the num_pending counter to the number of generated primaries. - */ -void set_generated(CoreParams const& params, CoreState& state) -{ - auto execute_thread = make_single_track_executor( - params.ptr(), state.ptr(), SetGeneratedExecutor{}); - - launch_core(1, "set-generated", params, state, execute_thread); -} - -//---------------------------------------------------------------------------// -// DEVICE-DISABLED IMPLEMENTATION -//---------------------------------------------------------------------------// -#if !CELER_USE_DEVICE -void set_generated(CoreParams const&, CoreState&) -{ - CELER_NOT_CONFIGURED("CUDA OR HIP"); -} -#endif - -//---------------------------------------------------------------------------// -} // namespace detail -} // namespace celeritas diff --git a/src/celeritas/optical/gen/WlsGeneratorAction.cc b/src/celeritas/optical/gen/WlsGeneratorAction.cc index 2728b9860a..71c00c403f 100644 --- a/src/celeritas/optical/gen/WlsGeneratorAction.cc +++ b/src/celeritas/optical/gen/WlsGeneratorAction.cc @@ -148,7 +148,7 @@ void WlsGeneratorAction::step_impl(CoreParams const& params, } // Update the core state counters with the number of new pending tracks - update_pending(state, counters.num_pending - num_pending_prev); + this->update_pending(state, counters.num_pending - num_pending_prev); if (counters.num_pending > 0) { diff --git a/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh index 7d6ecb4561..a10e39a6b5 100644 --- a/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/WlsGeneratorExecutor.hh @@ -43,11 +43,7 @@ struct WlsGeneratorExecutor //// FUNCTIONS //// // Generate optical photons - inline CELER_FUNCTION void operator()(TrackSlotId tid) const; - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid) const - { - return (*this)(TrackSlotId{tid.unchecked_get()}); - } + inline CELER_FUNCTION void operator()(ThreadId tid) const; }; //---------------------------------------------------------------------------// @@ -56,7 +52,7 @@ struct WlsGeneratorExecutor /*! * Generate WLS photons from optical distribution data. */ -CELER_FUNCTION void WlsGeneratorExecutor::operator()(TrackSlotId tid) const +CELER_FUNCTION void WlsGeneratorExecutor::operator()(ThreadId tid) const { CELER_EXPECT(state); CELER_EXPECT(data); @@ -69,46 +65,46 @@ CELER_FUNCTION void WlsGeneratorExecutor::operator()(TrackSlotId tid) const // of vacancies and the number of pending in the auxiliary state. To avoid // accessing the state counters to compute this min, we skip the extra // threads if state.counters.num_vacancies < aux_state.counters.num_pending - if (tid < counters->num_vacancies) + if (!(tid < counters->num_vacancies)) { - // Get the cumulative sum of the number of photons in the - // distributions. The values are used to determine which threads will - // generate from the corresponding distribution - auto offsets = data.offsets[ItemRange( - ItemId(0), ItemId(buffer_size))]; - - // Find the distribution this thread will generate from - size_type dist_idx = find_distribution_index(offsets, tid.get()); - CELER_ASSERT(dist_idx < data.distributions.size()); - auto& dist = data.distributions[DistId(dist_idx)]; - CELER_ASSERT(dist); + return; + } + // Get the cumulative sum of the number of photons in the + // distributions. The values are used to determine which threads will + // generate from the corresponding distribution + auto offsets = data.offsets[ItemRange( + ItemId(0), ItemId(buffer_size))]; - // Create the view to the new track to be initialized - CoreTrackView vacancy{ - *params, *state, [&] { - // Get the vacancy from the back in case there are more - // vacancies than photons to generate - TrackSlotId idx{index_before(counters->num_vacancies, - ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; + // Find the distribution this thread will generate from + size_type dist_idx = find_distribution_index(offsets, tid.get()); + CELER_ASSERT(dist_idx < data.distributions.size()); + auto& dist = data.distributions[DistId(dist_idx)]; + CELER_ASSERT(dist); - // Generate one track from the distribution - auto rng = vacancy.rng(); - if (dist.type == GeneratorType::wls) - { - CELER_ASSERT(wls); - vacancy = WavelengthShiftGenerator(wls, dist)(rng); - } - else - { - CELER_ASSERT(wls2); - vacancy = WavelengthShiftGenerator(wls2, dist)(rng); - } + // Create the view to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there are more + // vacancies than photons to generate + TrackSlotId idx{index_before(counters->num_vacancies, tid)}; + return state->init.vacancies[idx]; + }()}; - // Update the number of photons left to generate - atomic_add(&dist.num_photons, size_type(-1)); + // Generate one track from the distribution + auto rng = vacancy.rng(); + if (dist.type == GeneratorType::wls) + { + CELER_ASSERT(wls); + vacancy = WavelengthShiftGenerator(wls, dist)(rng); + } + else + { + CELER_ASSERT(wls2); + vacancy = WavelengthShiftGenerator(wls2, dist)(rng); } + + // Update the number of photons left to generate + atomic_add(&dist.num_photons, size_type(-1)); } //---------------------------------------------------------------------------// From 8c2475830d60b004237dd92677d82f6957e981ee Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 13 May 2026 13:02:55 -0400 Subject: [PATCH 63/74] Fix some of the WIP code review --- src/celeritas/global/TrackExecutor.hh | 7 +- src/celeritas/optical/Runner.cc | 4 +- .../optical/gen/PrimaryGeneratorAction.cc | 2 +- .../optical/gen/detail/GeneratorExecutor.hh | 119 +++++++++--------- .../gen/detail/PrimaryGeneratorExecutor.hh | 41 +++--- src/celeritas/track/InitializeTracksAction.cc | 19 +-- src/celeritas/track/InitializeTracksAction.cu | 24 ++-- src/celeritas/track/TrackFunctors.hh | 2 +- ...Executor.hh => UpdateNumActiveExecutor.hh} | 10 +- 9 files changed, 113 insertions(+), 115 deletions(-) rename src/celeritas/track/detail/{UpdateNewTracksExecutor.hh => UpdateNumActiveExecutor.hh} (86%) diff --git a/src/celeritas/global/TrackExecutor.hh b/src/celeritas/global/TrackExecutor.hh index cdef72302d..2935062b23 100644 --- a/src/celeritas/global/TrackExecutor.hh +++ b/src/celeritas/global/TrackExecutor.hh @@ -28,7 +28,7 @@ namespace celeritas * the tracks are sorted. Otherwise, thread and track slot have the same * numerical value. * - * This is primarily used by \c ActionLauncher . + * This is used primarily by \c ActionLauncher . * * \code void foo_kernel(CoreParamsPtr const params, @@ -157,7 +157,7 @@ CELER_FUNCTION ConditionalTrackExecutor(CoreParamsPtr, // FREE FUNCTIONS //---------------------------------------------------------------------------// /*! - * Return a track executor that only applies to active, non-errored tracks. + * Return a track executor that applies to only active, non-errored tracks. */ template inline CELER_FUNCTION decltype(auto) @@ -171,7 +171,8 @@ make_active_track_executor(CoreParamsPtr params, //---------------------------------------------------------------------------// /*! - * Return a track executor that only applies to active, non-errored tracks. + * Return a track executor that applies to only one track. This is used + * primarily when updating state counters, as these need only one thread. */ template inline CELER_FUNCTION decltype(auto) diff --git a/src/celeritas/optical/Runner.cc b/src/celeritas/optical/Runner.cc index 8a23cff51c..5cf9c3b79d 100644 --- a/src/celeritas/optical/Runner.cc +++ b/src/celeritas/optical/Runner.cc @@ -135,12 +135,12 @@ auto Runner::operator()(SpanConstGenDist data) -> Result if (celeritas::device()) { auto* s = dynamic_cast*>(&*state_); - update_pending(*s, total_pending); + this->update_pending(*s, total_pending); } else { auto* s = dynamic_cast*>(&*state_); - update_pending(*s, total_pending); + this->update_pending(*s, total_pending); } return this->run(); diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc index 6d9642cd50..e679b30ff0 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc @@ -135,7 +135,7 @@ void PrimaryGeneratorAction::insert_impl(optical::CoreState& state) const auto& aux_state = this->counters(*state.aux()); aux_state.counters.num_pending = data_.num_photons; - update_pending(state, data_.num_photons); + this->update_pending(state, data_.num_photons); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/detail/GeneratorExecutor.hh b/src/celeritas/optical/gen/detail/GeneratorExecutor.hh index ef885a0ea7..3ff5f45903 100644 --- a/src/celeritas/optical/gen/detail/GeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/GeneratorExecutor.hh @@ -44,11 +44,7 @@ struct GeneratorExecutor //// FUNCTIONS //// // Generate optical photons - inline CELER_FUNCTION void operator()(TrackSlotId tid) const; - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid) const - { - return (*this)(TrackSlotId{tid.unchecked_get()}); - } + inline CELER_FUNCTION void operator()(ThreadId tid) const; }; //---------------------------------------------------------------------------// @@ -57,7 +53,7 @@ struct GeneratorExecutor /*! * Generate photons from optical distribution data. */ -CELER_FUNCTION void GeneratorExecutor::operator()(TrackSlotId tid) const +CELER_FUNCTION void GeneratorExecutor::operator()(ThreadId tid) const { using namespace celeritas::literals; CELER_EXPECT(state); @@ -71,63 +67,62 @@ CELER_FUNCTION void GeneratorExecutor::operator()(TrackSlotId tid) const // of vacancies and the number of pending in the auxiliary state. To avoid // accessing the state counters to compute this min, we skip the extra // threads if state.counters.num_vacancies < aux_state.counters.num_pending - if (tid < counters->num_vacancies) + if (!(tid < counters->num_vacancies)) + { + return; + } + // Find the index of the first distribution that has a nonzero number + // of primaries left to generate + auto all_offsets = offload.offsets[ItemRange( + ItemId(0), ItemId(buffer_size))]; + auto buffer_start + = celeritas::upper_bound(all_offsets.begin(), all_offsets.end(), 0_sz); + CELER_ASSERT(buffer_start != all_offsets.end()); + + // Get the cumulative sum of the number of photons in the distributions. + // The values are used to determine which threads will generate from the + // corresponding distribution + Span offsets{buffer_start, all_offsets.end()}; + + // Find the distribution this thread will generate from + size_type dist_idx = buffer_start - all_offsets.begin() + + find_distribution_index(offsets, tid.get()); + CELER_ASSERT(dist_idx < offload.distributions.size()); + auto& dist = offload.distributions[DistId(dist_idx)]; + CELER_ASSERT(dist); + + // Create the view to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there + // are more vacancies than photons to generate + TrackSlotId idx{index_before(counters->num_vacancies, tid)}; + return state->init.vacancies[idx]; + }()}; + + if (!dist.material) + { + // If the optical material hasn't been set, initialize a temporary + // geometry state at the pre-step point and use it to find the + // optical material ID + auto geo = vacancy.geometry(); + geo = GeoTrackInitializer{dist.points[StepPoint::pre].pos, {1, 0, 0}}; + dist.material = vacancy.material_record(geo).material_id(); + } + CELER_ASSERT(dist.material); + + // Generate one track from the distribution + auto rng = vacancy.rng(); + if (dist.type == GeneratorType::cherenkov) + { + CELER_ASSERT(cherenkov); + auto opt_mat = vacancy.material_record(dist.material); + vacancy = CherenkovGenerator(opt_mat, cherenkov, dist)(rng); + } + else { - // Find the index of the first distribution that has a nonzero number - // of primaries left to generate - auto all_offsets = offload.offsets[ItemRange( - ItemId(0), ItemId(buffer_size))]; - auto buffer_start = celeritas::upper_bound( - all_offsets.begin(), all_offsets.end(), 0_sz); - CELER_ASSERT(buffer_start != all_offsets.end()); - - // Get the cumulative sum of the number of photons in the - // distributions. The values are used to determine which threads will - // generate from the corresponding distribution - Span offsets{buffer_start, all_offsets.end()}; - - // Find the distribution this thread will generate from - size_type dist_idx = buffer_start - all_offsets.begin() - + find_distribution_index(offsets, tid.get()); - CELER_ASSERT(dist_idx < offload.distributions.size()); - auto& dist = offload.distributions[DistId(dist_idx)]; - CELER_ASSERT(dist); - - // Create the view to the new track to be initialized - CoreTrackView vacancy{ - *params, *state, [&] { - // Get the vacancy from the back in case there - // are more vacancies than photons to generate - TrackSlotId idx{index_before(counters->num_vacancies, - ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; - - if (!dist.material) - { - // If the optical material hasn't been set, initialize a temporary - // geometry state at the pre-step point and use it to find the - // optical material ID - auto geo = vacancy.geometry(); - geo = GeoTrackInitializer{dist.points[StepPoint::pre].pos, - {1, 0, 0}}; - dist.material = vacancy.material_record(geo).material_id(); - } - CELER_ASSERT(dist.material); - - // Generate one track from the distribution - auto rng = vacancy.rng(); - if (dist.type == GeneratorType::cherenkov) - { - CELER_ASSERT(cherenkov); - auto opt_mat = vacancy.material_record(dist.material); - vacancy = CherenkovGenerator(opt_mat, cherenkov, dist)(rng); - } - else - { - CELER_ASSERT(scintillation); - vacancy = ScintillationGenerator(scintillation, dist)(rng); - } + CELER_ASSERT(scintillation); + vacancy = ScintillationGenerator(scintillation, dist)(rng); } } diff --git a/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh b/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh index 9e52b8f5c3..3ae0a5f0ff 100644 --- a/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh +++ b/src/celeritas/optical/gen/detail/PrimaryGeneratorExecutor.hh @@ -39,11 +39,7 @@ struct PrimaryGeneratorExecutor //// FUNCTIONS //// // Generate optical photons - inline CELER_FUNCTION void operator()(TrackSlotId tid) const; - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid) const - { - return (*this)(TrackSlotId{tid.unchecked_get()}); - } + inline CELER_FUNCTION void operator()(ThreadId tid) const; }; //---------------------------------------------------------------------------// @@ -52,37 +48,36 @@ struct PrimaryGeneratorExecutor /*! * Generate photons from optical distribution data. */ -CELER_FUNCTION void PrimaryGeneratorExecutor::operator()(TrackSlotId tid) const +CELER_FUNCTION void PrimaryGeneratorExecutor::operator()(ThreadId tid) const { CELER_EXPECT(params); CELER_EXPECT(state); CELER_EXPECT(data); CELER_EXPECT(distributions); - CoreTrackView temp(*params, *state, TrackSlotId{0}); - auto const& counters = temp.counters(); + auto* counters = state->init.counters.data().get(); // Original code set the number of threads to the minimum between of number // of vacancies and the number of pending in the auxiliary state. To avoid // accessing the state counters to compute this min, we skip the extra // threads if counters.num_vacancies < aux_state.counters.num_pending - if (tid < counters.num_vacancies) + if (!(tid < counters->num_vacancies)) { - // Create the view to the new track to be initialized - CoreTrackView vacancy{ - *params, *state, [&] { - // Get the vacancy from the back in case there - // are more vacancies than photons to generate - TrackSlotId idx{ - index_before(counters.num_vacancies, ThreadId(tid.get()))}; - return state->init.vacancies[idx]; - }()}; - - // Generate one primary from the distribution - CoreTrackView track(*params, *state, tid); - auto rng = track.rng(); - vacancy = PrimaryGenerator(distributions, data)(rng); + return; } + // Create the view to the new track to be initialized + CoreTrackView vacancy{ + *params, *state, [&] { + // Get the vacancy from the back in case there + // are more vacancies than photons to generate + TrackSlotId idx{index_before(counters->num_vacancies, tid)}; + return state->init.vacancies[idx]; + }()}; + + // Generate one primary from the distribution + CoreTrackView track(*params, *state, TrackSlotId{tid.get()}); + auto rng = track.rng(); + vacancy = PrimaryGenerator(distributions, data)(rng); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index 66841c62b8..24a3b4a2ce 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -19,7 +19,7 @@ #include "detail/InitTracksExecutor.hh" // IWYU pragma: associated #include "detail/TrackInitAlgorithms.hh" -#include "detail/UpdateNewTracksExecutor.hh" // IWYU pragma: associated +#include "detail/UpdateNumActiveExecutor.hh" // IWYU pragma: associated namespace celeritas { @@ -89,13 +89,16 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, CoreStateHost& core_state, size_type max_new_tracks) const { - detail::InitTracksExecutor execute{core_params.ptr(), - core_state.ptr()}; - launch_action(*this, max_new_tracks, core_params, core_state, execute); - - detail::UpdateNewTracksExecutor execute_thread{ - core_params.ptr(), core_state.ptr()}; - return launch_action(*this, 1, core_params, core_state, execute_thread); + { + detail::InitTracksExecutor execute{core_params.ptr(), + core_state.ptr()}; + launch_action(*this, max_new_tracks, core_params, core_state, execute); + } + { + detail::UpdateNumActiveExecutor execute_thread{ + core_params.ptr(), core_state.ptr()}; + launch_action(*this, 1, core_params, core_state, execute_thread); + } } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/InitializeTracksAction.cu b/src/celeritas/track/InitializeTracksAction.cu index 489bbb980c..ae5ba2af46 100644 --- a/src/celeritas/track/InitializeTracksAction.cu +++ b/src/celeritas/track/InitializeTracksAction.cu @@ -11,7 +11,7 @@ #include "celeritas/global/CoreState.hh" #include "detail/InitTracksExecutor.hh" -#include "detail/UpdateNewTracksExecutor.hh" +#include "detail/UpdateNumActiveExecutor.hh" namespace celeritas { @@ -24,15 +24,19 @@ void InitializeTracksAction::step_impl(CoreParams const& params, CoreStateDevice& state, size_type num_new_tracks) const { - detail::InitTracksExecutor execute{params.ptr(), - state.ptr()}; - static ActionLauncher const launch_kernel(*this); - launch_kernel(num_new_tracks, state.stream_id(), execute); - - detail::UpdateNewTracksExecutor execute_thread{ - params.ptr(), state.ptr()}; - static ActionLauncher const launch_kernel2(*this); - launch_kernel2(1, state.stream_id(), execute_thread); + { + detail::InitTracksExecutor execute{params.ptr(), + state.ptr()}; + static ActionLauncher const launch_kernel(*this); + launch_kernel(num_new_tracks, state.stream_id(), execute); + } + { + detail::UpdateNumActiveExecutor execute_thread{ + params.ptr(), state.ptr()}; + static ActionLauncher const launch_kernel( + *this); + launch_kernel(1, state.stream_id(), execute_thread); + } } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/TrackFunctors.hh b/src/celeritas/track/TrackFunctors.hh index 5403b7e836..cd1c5e963b 100644 --- a/src/celeritas/track/TrackFunctors.hh +++ b/src/celeritas/track/TrackFunctors.hh @@ -27,7 +27,7 @@ struct AppliesValid } }; -//! Only launch on a single thread +//! Launch on only a single thread struct IsThreadZero { template diff --git a/src/celeritas/track/detail/UpdateNewTracksExecutor.hh b/src/celeritas/track/detail/UpdateNumActiveExecutor.hh similarity index 86% rename from src/celeritas/track/detail/UpdateNewTracksExecutor.hh rename to src/celeritas/track/detail/UpdateNumActiveExecutor.hh index 46c161eccd..72c8926a8e 100644 --- a/src/celeritas/track/detail/UpdateNewTracksExecutor.hh +++ b/src/celeritas/track/detail/UpdateNumActiveExecutor.hh @@ -2,7 +2,7 @@ // Copyright Celeritas contributors: see top-level COPYRIGHT file for details // SPDX-License-Identifier: (Apache-2.0 OR MIT) //---------------------------------------------------------------------------// -//! \file celeritas/track/detail/UpdateNewTracksExecutor.hh +//! \file celeritas/track/detail/UpdateNumActiveExecutor.hh //---------------------------------------------------------------------------// #pragma once @@ -18,9 +18,9 @@ namespace detail { //---------------------------------------------------------------------------// /*! - * Update state counters based on the number of new tracks. + * Update num_active state counter based on the number of vacancies. */ -struct UpdateNewTracksExecutor +struct UpdateNumActiveExecutor { //// TYPES //// @@ -40,9 +40,9 @@ struct UpdateNewTracksExecutor //---------------------------------------------------------------------------// /*! - * Update state counters based on the number of new tracks. + * Update number of active trackes based on the number of vacancies. */ -CELER_FUNCTION void UpdateNewTracksExecutor::operator()(ThreadId tid) const +CELER_FUNCTION void UpdateNumActiveExecutor::operator()(ThreadId tid) const { CELER_EXPECT(params); CELER_EXPECT(state); From 2486cc0e61f50803d85d05ef0fa7fdab002fa1cc Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 13 May 2026 17:17:18 -0400 Subject: [PATCH 64/74] Updates for the SetGenerated executor --- src/celeritas/global/CoreTrackView.hh | 6 +++--- src/celeritas/global/Stepper.hh | 2 +- src/celeritas/global/detail/SetGeneratedExecutor.hh | 6 ------ 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/celeritas/global/CoreTrackView.hh b/src/celeritas/global/CoreTrackView.hh index 3e88faede8..a420359cdf 100644 --- a/src/celeritas/global/CoreTrackView.hh +++ b/src/celeritas/global/CoreTrackView.hh @@ -120,8 +120,8 @@ class CoreTrackView inline CELER_FUNCTION CoreStateCounters const& counters() const; private: - StateRef const& states_; ParamsRef const& params_; + StateRef const& states_; ThreadId const thread_id_; TrackSlotId track_slot_id_; }; @@ -136,7 +136,7 @@ CELER_FUNCTION CoreTrackView::CoreTrackView(ParamsRef const& params, StateRef const& states, ThreadId thread) - : states_(states), params_(params), thread_id_(thread) + : params_(params), states_(states), thread_id_(thread) { CELER_EXPECT(states_.track_slots.empty() || thread_id_ < states_.track_slots.size()); @@ -157,7 +157,7 @@ CELER_FUNCTION CoreTrackView::CoreTrackView(ParamsRef const& params, StateRef const& states, TrackSlotId track_slot) - : states_(states), params_(params), track_slot_id_(track_slot) + : params_(params), states_(states), track_slot_id_(track_slot) { CELER_EXPECT(track_slot_id_ < states_.size()); } diff --git a/src/celeritas/global/Stepper.hh b/src/celeritas/global/Stepper.hh index 953601de3a..fe6fbf10c0 100644 --- a/src/celeritas/global/Stepper.hh +++ b/src/celeritas/global/Stepper.hh @@ -186,7 +186,7 @@ class Stepper final : public StepperInterface //! Reset the core state counters and data so it can be reused void reset_state() { state_->reset(); } - //! Set the num_pending counter to the number of generated primaries + //! Reset the num_generated state counter to zero void set_generated(); //! Get a shared pointer to the state (TEMPORARY, DO NOT USE) diff --git a/src/celeritas/global/detail/SetGeneratedExecutor.hh b/src/celeritas/global/detail/SetGeneratedExecutor.hh index 60c62d2aed..0f1fb2668e 100644 --- a/src/celeritas/global/detail/SetGeneratedExecutor.hh +++ b/src/celeritas/global/detail/SetGeneratedExecutor.hh @@ -36,12 +36,6 @@ struct SetGeneratedExecutor CELER_FORCEINLINE_FUNCTION void operator()(CoreTrackView& track); }; -//---------------------------------------------------------------------------// - -void set_generated(CoreParams const& params, CoreState& state); -void set_generated(CoreParams const& params, - CoreState& state); - //---------------------------------------------------------------------------// // INLINE DEFINITIONS //---------------------------------------------------------------------------// From 84c2ba4bdc0ff9aeccc40fa8e4eaa9c00e9e0598 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 17 May 2026 16:34:14 -0400 Subject: [PATCH 65/74] Incorporate draft PR feedback for LocateVacanciesAction::update_alive --- src/celeritas/optical/CoreTrackView.hh | 46 +++++++++++++++++++ .../optical/action/LocateVacanciesAction.cc | 7 ++- .../optical/action/LocateVacanciesAction.cu | 12 +++-- .../optical/action/LocateVacanciesAction.hh | 2 - .../action/detail/UpdateAliveExecutor.hh | 18 ++++---- 5 files changed, 66 insertions(+), 19 deletions(-) diff --git a/src/celeritas/optical/CoreTrackView.hh b/src/celeritas/optical/CoreTrackView.hh index 431e67aa15..497de23d4f 100644 --- a/src/celeritas/optical/CoreTrackView.hh +++ b/src/celeritas/optical/CoreTrackView.hh @@ -8,6 +8,7 @@ #include "corecel/math/Atomics.hh" #include "corecel/random/engine/RngEngine.hh" +#include "corecel/sys/ThreadId.hh" #include "geocel/AllVolumesView.hh" #include "geocel/DetectorView.hh" #include "geocel/VolumeSurfaceView.hh" @@ -43,6 +44,11 @@ class CoreTrackView //!@} public: + // Construct with comprehensive param/state data and thread + inline CELER_FUNCTION CoreTrackView(ParamsRef const& params, + StateRef const& states, + ThreadId thread); + // Construct directly from a track slot ID inline CELER_FUNCTION CoreTrackView(ParamsRef const& params, StateRef const& states, @@ -90,6 +96,9 @@ class CoreTrackView // Return an RNG engine inline CELER_FUNCTION RngEngine rng() const; + // Get the index of the current thread in the current kernel + inline CELER_FUNCTION ThreadId thread_id() const; + // Get the track's index among the states inline CELER_FUNCTION TrackSlotId track_slot_id() const; @@ -106,12 +115,31 @@ class CoreTrackView private: ParamsRef const& params_; StateRef const& states_; + ThreadId const thread_id_; TrackSlotId const track_slot_id_; }; //---------------------------------------------------------------------------// // INLINE DEFINITIONS //---------------------------------------------------------------------------// +/*! + * Construct with comprehensive param/state data and thread. + * + * For optical tracks, the value of the track slot is the same as the thread + * ID. + */ +CELER_FUNCTION +CoreTrackView::CoreTrackView(ParamsRef const& params, + StateRef const& states, + ThreadId thread) + : params_(params) + , states_(states) + , thread_id_(thread) + , track_slot_id_(TrackSlotId{thread.get()}) +{ + CELER_EXPECT(track_slot_id_ < states_.size()); +} + /*! * Construct with comprehensive param/state data and track slot. * @@ -298,6 +326,24 @@ CELER_FUNCTION SimTrackView CoreTrackView::sim() const return SimTrackView{params_.sim, states_.sim, this->track_slot_id()}; } +//---------------------------------------------------------------------------// +/*! + * Get the index of the current thread in the current kernel. + * + * \warning If the kernel calling this function is not applied to \em all + * tracks, then comparing against a particular thread ID (e.g. zero for a + * once-per-kernel initialization) may result in an error. + * + * \pre The thread ID is only set if the class is initialized with the thread + * ID (e.g. from \c TrackExecutor ), which is not the case in track + * initialization (where the "core track" is constructed from a vacancy). + */ +CELER_FORCEINLINE_FUNCTION ThreadId CoreTrackView::thread_id() const +{ + CELER_EXPECT(thread_id_); + return thread_id_; +} + //---------------------------------------------------------------------------// /*! * Get the track's index among the states. diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cc b/src/celeritas/optical/action/LocateVacanciesAction.cc index 7bf96fb049..118a6b7a89 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cc +++ b/src/celeritas/optical/action/LocateVacanciesAction.cc @@ -10,6 +10,7 @@ #include "corecel/Macros.hh" #include "celeritas/optical/CoreParams.hh" #include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/TrackExecutor.hh" #include "ActionLauncher.hh" @@ -73,8 +74,10 @@ void LocateVacanciesAction::update_alive(CoreParams const& params, CoreStateHost& state, size_type state_size) const { - detail::UpdateAliveExecutor execute_thread{ - params.ptr(), state.ptr(), state_size}; + auto execute_thread + = make_single_track_executor(params.ptr(), + state.ptr(), + detail::UpdateAliveExecutor{state_size}); launch_action(1, execute_thread); } diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cu b/src/celeritas/optical/action/LocateVacanciesAction.cu index ee134151e8..9979d22e72 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cu +++ b/src/celeritas/optical/action/LocateVacanciesAction.cu @@ -10,9 +10,9 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" #include "corecel/sys/KernelLauncher.device.hh" - -#include "../CoreParams.hh" -#include "../CoreState.hh" +#include "celeritas/optical/CoreParams.hh" +#include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/TrackExecutor.hh" #include "detail/UpdateAliveExecutor.hh" @@ -28,8 +28,10 @@ void LocateVacanciesAction::update_alive(CoreParams const& params, CoreStateDevice& state, size_type state_size) const { - detail::UpdateAliveExecutor execute_thread{ - params.ptr(), state.ptr(), state_size}; + auto execute_thread + = make_single_track_executor(params.ptr(), + state.ptr(), + detail::UpdateAliveExecutor{state_size}); static KernelLauncher const launch_kernel( "update-alive"); launch_kernel(1, state.stream_id(), execute_thread); diff --git a/src/celeritas/optical/action/LocateVacanciesAction.hh b/src/celeritas/optical/action/LocateVacanciesAction.hh index c407440d78..e1bc108e4a 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.hh +++ b/src/celeritas/optical/action/LocateVacanciesAction.hh @@ -8,8 +8,6 @@ #include "ActionInterface.hh" -#include "detail/UpdateAliveExecutor.hh" - namespace celeritas { namespace optical diff --git a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh index 8d293d94bd..da44875621 100644 --- a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh +++ b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh @@ -8,7 +8,9 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" +#include "corecel/sys/ThreadId.hh" #include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/CoreTrackView.hh" namespace celeritas { @@ -27,14 +29,12 @@ struct UpdateAliveExecutor { //// DATA //// - CRefPtr params; - RefPtr state; size_type state_size; //// FUNCTIONS //// // Update number of photons that are still alive - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); + CELER_FORCEINLINE_FUNCTION void operator()(CoreTrackView& track); }; //---------------------------------------------------------------------------// @@ -43,15 +43,13 @@ struct UpdateAliveExecutor /*! * Update number of photons that are still alive after compacting vacancies. */ -CELER_FORCEINLINE_FUNCTION void UpdateAliveExecutor::operator()(ThreadId tid) +CELER_FORCEINLINE_FUNCTION void +UpdateAliveExecutor::operator()(CoreTrackView& track) { - CELER_EXPECT(params); - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should be called with only one thread + CELER_EXPECT(track.thread_id() == ThreadId{0}); // single thread kernel - auto counters = state->init.counters.data().get(); - counters->num_alive = state_size - counters->num_vacancies; - CELER_ASSERT(state_size >= counters->num_vacancies); + track.counters().num_alive = state_size - track.counters().num_vacancies; + CELER_ASSERT(state_size >= track.counters().num_vacancies); } //---------------------------------------------------------------------------// From 746adc98329e79abc4db01e0975e99f21650059d Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 17 May 2026 17:44:08 -0400 Subject: [PATCH 66/74] Add Track Executor for optical tracks --- src/celeritas/optical/TrackExecutor.hh | 231 +++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 src/celeritas/optical/TrackExecutor.hh diff --git a/src/celeritas/optical/TrackExecutor.hh b/src/celeritas/optical/TrackExecutor.hh new file mode 100644 index 0000000000..3adf5322fc --- /dev/null +++ b/src/celeritas/optical/TrackExecutor.hh @@ -0,0 +1,231 @@ +//------------------------------- -*- C++ -*- -------------------------------// +// Copyright Celeritas contributors: see top-level COPYRIGHT file for details +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/optical/TrackExecutor.hh +//---------------------------------------------------------------------------// +#pragma once + +#include "corecel/Assert.hh" +#include "corecel/Types.hh" +#include "corecel/math/Algorithms.hh" +#include "corecel/sys/ThreadId.hh" +#include "celeritas/track/TrackFunctors.hh" + +#include "CoreTrackData.hh" +#include "CoreTrackDataFwd.hh" +#include "CoreTrackView.hh" + +namespace celeritas +{ +namespace optical +{ +//---------------------------------------------------------------------------// +/*! + * Call an optical \c CoreTrackView executor for a given ThreadId. + * + * This class can be used to call a functor that applies to \c CoreTrackView + * using a \c ThreadId, so that the tracks can be easily looped over as a + * group on CPU or GPU. It applies a remapping from \em thread to \em slot if + * the tracks are sorted. Otherwise, thread and track slot have the same + * numerical value. + * + * This is used primarily by \c ActionLauncher . + * + * \code +void foo_kernel(CoreParamsPtr const params, + CoreStatePtr const state) +{ + TrackExecutor execute{params, state, MyTrackApplier{}}; + + for (auto tid : range(ThreadID{123})) + { + step(tid); + } +} +\endcode + * + * \todo Rename to ThreadExecutor. The template parameter, which must operate + * on a core track view, is a track executor. + */ +template +class TrackExecutor +{ + public: + //!@{ + //! \name Type aliases + using ParamsPtr = CoreParamsPtr; + using StatePtr = CoreStatePtr; + using Applier = T; + //!@} + + public: + //! Construct with core data and executor + CELER_FUNCTION + TrackExecutor(ParamsPtr params, StatePtr state, T&& execute_track) + : params_{params} + , state_{state} + , execute_track_{celeritas::forward(execute_track)} + { + } + + //! Call the underlying function, using indirection array if needed + CELER_FUNCTION void operator()(ThreadId thread) + { + CELER_EXPECT(thread < state_->size()); + CoreTrackView track(*params_, *state_, thread); + return execute_track_(track); + } + + private: + ParamsPtr const params_; + StatePtr const state_; + T execute_track_; +}; + +//---------------------------------------------------------------------------// +/*! + * Launch the track only when a certain condition applies to the sim state. + * + * The condition \c C must have the signature \code + * (SimTrackView const&) -> bool + \endcode + * + * see \c make_active_track_executor for an example where this is used to apply + * only to active (or killed) tracks. + */ +template +class ConditionalTrackExecutor +{ + public: + //!@{ + //! \name Type aliases + using ParamsPtr = CoreParamsPtr; + using StatePtr = CoreStatePtr; + using Applier = T; + //!@} + + public: + //! Construct with condition and operator + CELER_FUNCTION + ConditionalTrackExecutor(ParamsPtr params, + StatePtr state, + C&& applies, + T&& execute_track) + : params_{params} + , state_{state} + , applies_{celeritas::forward(applies)} + , execute_track_{celeritas::forward(execute_track)} + { + } + + //! Launch the given thread if the track meets the condition + CELER_FUNCTION void operator()(ThreadId thread) + { + CELER_EXPECT(thread < state_->size()); + CoreTrackView track(*params_, *state_, thread); + if (!applies_(track)) + { + return; + } + + // NOTE: "return value type" error means the executor function is + // incorrectly returning a value + return execute_track_(track); + } + + private: + ParamsPtr const params_; + StatePtr const state_; + C applies_; + T execute_track_; +}; + +//---------------------------------------------------------------------------// +// DEDUCTION GUIDES +//---------------------------------------------------------------------------// +template +CELER_FUNCTION TrackExecutor(CoreParamsPtr, + CoreStatePtr, + T&&) -> TrackExecutor; + +template +CELER_FUNCTION ConditionalTrackExecutor(CoreParamsPtr, + CoreStatePtr, + C&&, + T&&) -> ConditionalTrackExecutor; + +//---------------------------------------------------------------------------// +// FREE FUNCTIONS +//---------------------------------------------------------------------------// +/*! + * Return a track executor that applies to only active, non-errored tracks. + */ +template +inline CELER_FUNCTION decltype(auto) +make_active_track_executor(CoreParamsPtr params, + CoreStatePtr const& state, + T&& apply_track) +{ + return ConditionalTrackExecutor{ + params, state, AppliesValid{}, celeritas::forward(apply_track)}; +} + +//---------------------------------------------------------------------------// +/*! + * Return a track executor that applies to only one track. This is used + * primarily when updating state counters, as these need only one thread. + */ +template +inline CELER_FUNCTION decltype(auto) +make_single_track_executor(CoreParamsPtr params, + CoreStatePtr const& state, + T&& apply_track) +{ + return ConditionalTrackExecutor{ + params, state, IsThreadZero{}, celeritas::forward(apply_track)}; +} + +//---------------------------------------------------------------------------// +/*! + * Return a track executor that only applies if the action ID matches. + * + * \note This should generally only be used for post-step actions and other + * cases where the IDs *explicitly* are set. Many explicit actions apply to all + * threads, active or not. + */ +template +inline CELER_FUNCTION decltype(auto) +make_action_track_executor(CoreParamsPtr params, + CoreStatePtr state, + ActionId action, + T&& apply_track) +{ + CELER_EXPECT(action); + return ConditionalTrackExecutor{params, + state, + IsStepActionEqual{action}, + celeritas::forward(apply_track)}; +} + +//---------------------------------------------------------------------------// +/*! + * Return a track executor that only applies for the given along-step action. + */ +template +inline CELER_FUNCTION decltype(auto) +make_along_step_track_executor(CoreParamsPtr params, + CoreStatePtr state, + ActionId action, + T&& apply_track) +{ + CELER_EXPECT(action); + return ConditionalTrackExecutor{params, + state, + IsAlongStepActionEqual{action}, + celeritas::forward(apply_track)}; +} + +//---------------------------------------------------------------------------// +} // namespace optical +} // namespace celeritas From 8f91be533a2144213d4e3fc96db1f5a82df24ab1 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 17 May 2026 17:47:52 -0400 Subject: [PATCH 67/74] Change UpdateCountersExecutor to use make_single_executor --- .../track/ExtendFromPrimariesAction.cc | 12 ++++++--- .../track/ExtendFromPrimariesAction.cu | 15 ++++++++--- .../track/detail/UpdateCountersExecutor.hh | 26 ++++++------------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index b189627773..c173571e99 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -15,6 +15,7 @@ #include "celeritas/global/ActionLauncher.hh" #include "celeritas/global/CoreParams.hh" #include "celeritas/global/CoreState.hh" +#include "celeritas/global/TrackExecutor.hh" #include "TrackInitParams.hh" @@ -214,9 +215,14 @@ void ExtendFromPrimariesAction::update_counters(CoreParams const& params, CoreStateHost& state, size_type num_primaries) const { - detail::UpdateCountersExecutor execute{ - params.ptr(), state.ptr(), num_primaries}; - return launch_action(*this, 1, params, state, execute); + auto execute_thread = make_single_track_executor( + params.ptr(), + state.ptr(), + detail::UpdateCountersExecutor{num_primaries}); + launch_core(1, "update-counters", params, state, execute_thread); + // detail::UpdateCountersExecutor execute{ + // params.ptr(), state.ptr(), num_primaries}; + // return launch_action(*this, 1, params, state, execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index 5665ce155c..44b1c82fb1 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -9,6 +9,7 @@ #include "celeritas/global/ActionLauncher.device.hh" #include "celeritas/global/CoreParams.hh" #include "celeritas/global/CoreState.hh" +#include "celeritas/global/TrackExecutor.hh" #include "detail/ProcessPrimariesExecutor.hh" #include "detail/UpdateCountersExecutor.hh" @@ -42,10 +43,18 @@ void ExtendFromPrimariesAction::update_counters(CoreParams const& params, CoreStateDevice& state, size_type num_primaries) const { - detail::UpdateCountersExecutor execute_thread{ - params.ptr(), state.ptr(), num_primaries}; - static ActionLauncher const launch_kernel(*this); + auto execute_thread = make_single_track_executor( + params.ptr(), + state.ptr(), + detail::UpdateCountersExecutor{num_primaries}); + static KernelLauncher const launch_kernel( + "update-counters"); launch_kernel(1, state.stream_id(), execute_thread); + // detail::UpdateCountersExecutor execute_thread{ + // params.ptr(), state.ptr(), num_primaries}; + // static ActionLauncher const + // launch_kernel(*this); launch_kernel(1, state.stream_id(), + // execute_thread); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/detail/UpdateCountersExecutor.hh b/src/celeritas/track/detail/UpdateCountersExecutor.hh index de978d3438..b0745c53a7 100644 --- a/src/celeritas/track/detail/UpdateCountersExecutor.hh +++ b/src/celeritas/track/detail/UpdateCountersExecutor.hh @@ -9,6 +9,7 @@ #include "corecel/Assert.hh" #include "corecel/Macros.hh" #include "celeritas/Types.hh" +#include "celeritas/global/CoreTrackView.hh" #include "../TrackInitData.hh" @@ -22,41 +23,30 @@ namespace detail */ struct UpdateCountersExecutor { - //// TYPES //// - - using ParamsPtr = CRefPtr; - using StatePtr = RefPtr; - //// DATA //// - ParamsPtr params; - StatePtr state; - size_type num_primaries; //// FUNCTIONS //// // Update state counters based on the number of primaries - inline CELER_FUNCTION void operator()(ThreadId tid) const; + CELER_FORCEINLINE_FUNCTION void operator()(CoreTrackView& track); }; //---------------------------------------------------------------------------// /*! * Update state counters based on the number of primaries. */ -CELER_FUNCTION void UpdateCountersExecutor::operator()(ThreadId tid) const +CELER_FORCEINLINE_FUNCTION void +UpdateCountersExecutor::operator()(CoreTrackView& track) { - CELER_EXPECT(params); - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should call with only one thread + CELER_EXPECT(track.thread_id() == ThreadId{0}); // single thread kernel - auto* counters = state->init.counters.data().get(); // Update track initializers from primaries - counters->num_initializers += num_primaries; + track.counters().num_initializers += num_primaries; // Mark that the primaries have been processed - counters->num_generated += num_primaries; - counters->num_pending = 0; - return; + track.counters().num_generated += num_primaries; + track.counters().num_pending = 0; } //---------------------------------------------------------------------------// From 9244df917fb88e9d16153fdaec33e5d2ffa1c993 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 17 May 2026 18:06:37 -0400 Subject: [PATCH 68/74] Remove old commented code --- src/celeritas/track/ExtendFromPrimariesAction.cc | 3 --- src/celeritas/track/ExtendFromPrimariesAction.cu | 5 ----- 2 files changed, 8 deletions(-) diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cc b/src/celeritas/track/ExtendFromPrimariesAction.cc index c173571e99..0e7a08740a 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cc +++ b/src/celeritas/track/ExtendFromPrimariesAction.cc @@ -220,9 +220,6 @@ void ExtendFromPrimariesAction::update_counters(CoreParams const& params, state.ptr(), detail::UpdateCountersExecutor{num_primaries}); launch_core(1, "update-counters", params, state, execute_thread); - // detail::UpdateCountersExecutor execute{ - // params.ptr(), state.ptr(), num_primaries}; - // return launch_action(*this, 1, params, state, execute); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/track/ExtendFromPrimariesAction.cu b/src/celeritas/track/ExtendFromPrimariesAction.cu index 44b1c82fb1..c6ab27564d 100644 --- a/src/celeritas/track/ExtendFromPrimariesAction.cu +++ b/src/celeritas/track/ExtendFromPrimariesAction.cu @@ -50,11 +50,6 @@ void ExtendFromPrimariesAction::update_counters(CoreParams const& params, static KernelLauncher const launch_kernel( "update-counters"); launch_kernel(1, state.stream_id(), execute_thread); - // detail::UpdateCountersExecutor execute_thread{ - // params.ptr(), state.ptr(), num_primaries}; - // static ActionLauncher const - // launch_kernel(*this); launch_kernel(1, state.stream_id(), - // execute_thread); } //---------------------------------------------------------------------------// From dc0934b04a6596d22de393da13d1b9893484ea9b Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Sun, 17 May 2026 18:16:33 -0400 Subject: [PATCH 69/74] Change UpdateNumActiveExecutor to use make_single_executor --- src/celeritas/track/InitializeTracksAction.cc | 10 +++++-- src/celeritas/track/InitializeTracksAction.cu | 11 ++++--- .../track/detail/UpdateNumActiveExecutor.hh | 30 +++++++------------ 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/celeritas/track/InitializeTracksAction.cc b/src/celeritas/track/InitializeTracksAction.cc index 24a3b4a2ce..94068f8633 100644 --- a/src/celeritas/track/InitializeTracksAction.cc +++ b/src/celeritas/track/InitializeTracksAction.cc @@ -14,6 +14,7 @@ #include "celeritas/global/ActionLauncher.hh" #include "celeritas/global/CoreParams.hh" #include "celeritas/global/CoreState.hh" +#include "celeritas/global/TrackExecutor.hh" #include "TrackInitParams.hh" @@ -95,9 +96,12 @@ void InitializeTracksAction::step_impl(CoreParams const& core_params, launch_action(*this, max_new_tracks, core_params, core_state, execute); } { - detail::UpdateNumActiveExecutor execute_thread{ - core_params.ptr(), core_state.ptr()}; - launch_action(*this, 1, core_params, core_state, execute_thread); + auto execute_thread = make_single_track_executor( + core_params.ptr(), + core_state.ptr(), + detail::UpdateNumActiveExecutor{core_state.size()}); + launch_core( + 1, "update-active", core_params, core_state, execute_thread); } } diff --git a/src/celeritas/track/InitializeTracksAction.cu b/src/celeritas/track/InitializeTracksAction.cu index ae5ba2af46..95fba10863 100644 --- a/src/celeritas/track/InitializeTracksAction.cu +++ b/src/celeritas/track/InitializeTracksAction.cu @@ -9,6 +9,7 @@ #include "celeritas/global/ActionLauncher.device.hh" #include "celeritas/global/CoreParams.hh" #include "celeritas/global/CoreState.hh" +#include "celeritas/global/TrackExecutor.hh" #include "detail/InitTracksExecutor.hh" #include "detail/UpdateNumActiveExecutor.hh" @@ -31,10 +32,12 @@ void InitializeTracksAction::step_impl(CoreParams const& params, launch_kernel(num_new_tracks, state.stream_id(), execute); } { - detail::UpdateNumActiveExecutor execute_thread{ - params.ptr(), state.ptr()}; - static ActionLauncher const launch_kernel( - *this); + auto execute_thread = make_single_track_executor( + params.ptr(), + state.ptr(), + detail::UpdateNumActiveExecutor{state.size()}); + static KernelLauncher const launch_kernel( + "update-active"); launch_kernel(1, state.stream_id(), execute_thread); } } diff --git a/src/celeritas/track/detail/UpdateNumActiveExecutor.hh b/src/celeritas/track/detail/UpdateNumActiveExecutor.hh index 72c8926a8e..cbef53fb41 100644 --- a/src/celeritas/track/detail/UpdateNumActiveExecutor.hh +++ b/src/celeritas/track/detail/UpdateNumActiveExecutor.hh @@ -22,45 +22,35 @@ namespace detail */ struct UpdateNumActiveExecutor { - //// TYPES //// - - using ParamsPtr = CRefPtr; - using StatePtr = RefPtr; - //// DATA //// - ParamsPtr params; - StatePtr state; + size_type state_size; //// FUNCTIONS //// // Update state counters based on the number of primaries - inline CELER_FUNCTION void operator()(ThreadId tid) const; + CELER_FORCEINLINE_FUNCTION void operator()(CoreTrackView& track); }; //---------------------------------------------------------------------------// /*! * Update number of active trackes based on the number of vacancies. */ -CELER_FUNCTION void UpdateNumActiveExecutor::operator()(ThreadId tid) const +CELER_FORCEINLINE_FUNCTION void +UpdateNumActiveExecutor::operator()(CoreTrackView& track) { - CELER_EXPECT(params); - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should call with only one thread - - auto* counters = state->init.counters.data().get(); + CELER_EXPECT(track.thread_id() == ThreadId{0}); // single thread kernel - size_type num_new_tracks - = min(counters->num_vacancies, counters->num_initializers); + size_type num_new_tracks = min(track.counters().num_vacancies, + track.counters().num_initializers); if (num_new_tracks > 0) { // Update initializers/vacancies - counters->num_initializers -= num_new_tracks; - counters->num_vacancies -= num_new_tracks; + track.counters().num_initializers -= num_new_tracks; + track.counters().num_vacancies -= num_new_tracks; } // Store number of active tracks at the start of the loop - counters->num_active = state->size() - counters->num_vacancies; - return; + track.counters().num_active = state_size - track.counters().num_vacancies; } //---------------------------------------------------------------------------// From 5bc712b1fba6f59aac91e0e3c58bb291db545d0c Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 18 May 2026 11:25:24 -0400 Subject: [PATCH 70/74] Remove unused header --- src/celeritas/optical/gen/DirectGeneratorAction.cc | 1 - src/celeritas/optical/gen/DirectGeneratorAction.cu | 1 - 2 files changed, 2 deletions(-) diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index 0fd1f12cef..e416d041e6 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -20,7 +20,6 @@ #include "detail/DirectGeneratorExecutor.hh" #include "detail/GeneratorAlgorithms.hh" -#include "detail/UpdatePendingExecutor.hh" namespace celeritas { diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cu b/src/celeritas/optical/gen/DirectGeneratorAction.cu index 3d6222ff94..5c91cb3846 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cu +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cu @@ -13,7 +13,6 @@ #include "celeritas/optical/action/TrackSlotExecutor.hh" #include "detail/DirectGeneratorExecutor.hh" -#include "detail/UpdatePendingExecutor.hh" namespace celeritas { From fe6469fa31fb81fb22d37f0fff3005b49d8f6bf2 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Mon, 18 May 2026 12:00:01 -0400 Subject: [PATCH 71/74] Move header to source files --- src/celeritas/optical/Runner.cc | 1 + src/celeritas/optical/Runner.cu | 2 ++ src/celeritas/optical/Runner.hh | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/celeritas/optical/Runner.cc b/src/celeritas/optical/Runner.cc index 5cf9c3b79d..b01b5704da 100644 --- a/src/celeritas/optical/Runner.cc +++ b/src/celeritas/optical/Runner.cc @@ -19,6 +19,7 @@ #include "CoreParams.hh" #include "CoreState.hh" #include "Transporter.hh" +#include "gen/detail/UpdatePendingExecutor.hh" namespace celeritas { diff --git a/src/celeritas/optical/Runner.cu b/src/celeritas/optical/Runner.cu index 0d0fe7cae7..7a7bc93304 100644 --- a/src/celeritas/optical/Runner.cu +++ b/src/celeritas/optical/Runner.cu @@ -10,6 +10,8 @@ #include "celeritas/optical/CoreState.hh" #include "celeritas/optical/action/ActionLauncher.device.hh" +#include "gen/detail/UpdatePendingExecutor.hh" + namespace celeritas { namespace optical diff --git a/src/celeritas/optical/Runner.hh b/src/celeritas/optical/Runner.hh index 9b3c48eb4a..89b9436aa7 100644 --- a/src/celeritas/optical/Runner.hh +++ b/src/celeritas/optical/Runner.hh @@ -16,7 +16,6 @@ #include "gen/DirectGeneratorAction.hh" #include "gen/GeneratorAction.hh" #include "gen/PrimaryGeneratorAction.hh" -#include "gen/detail/UpdatePendingExecutor.hh" namespace celeritas { From 4fdb7aed7c894c079799eef42461fac5069ba557 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 20 May 2026 20:36:46 -0400 Subject: [PATCH 72/74] Remove unused header files --- src/celeritas/optical/action/LocateVacanciesAction.cu | 8 +++++--- .../optical/action/detail/UpdateAliveExecutor.hh | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/celeritas/optical/action/LocateVacanciesAction.cu b/src/celeritas/optical/action/LocateVacanciesAction.cu index 9979d22e72..79f63432b4 100644 --- a/src/celeritas/optical/action/LocateVacanciesAction.cu +++ b/src/celeritas/optical/action/LocateVacanciesAction.cu @@ -10,9 +10,11 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" #include "corecel/sys/KernelLauncher.device.hh" -#include "celeritas/optical/CoreParams.hh" -#include "celeritas/optical/CoreState.hh" -#include "celeritas/optical/TrackExecutor.hh" + +#include "ActionLauncher.device.hh" +#include "../CoreParams.hh" +#include "../CoreState.hh" +#include "../TrackExecutor.hh" #include "detail/UpdateAliveExecutor.hh" diff --git a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh index da44875621..52fec0cdb1 100644 --- a/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh +++ b/src/celeritas/optical/action/detail/UpdateAliveExecutor.hh @@ -9,7 +9,6 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" #include "corecel/sys/ThreadId.hh" -#include "celeritas/optical/CoreState.hh" #include "celeritas/optical/CoreTrackView.hh" namespace celeritas From 02d1f9a2e978732ba4d8421c94fe03b5d0ec1747 Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 20 May 2026 20:38:43 -0400 Subject: [PATCH 73/74] Remove commented function --- src/celeritas/global/Stepper.cc | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/celeritas/global/Stepper.cc b/src/celeritas/global/Stepper.cc index 25f0e99a89..f49fd6f565 100644 --- a/src/celeritas/global/Stepper.cc +++ b/src/celeritas/global/Stepper.cc @@ -210,17 +210,6 @@ void Stepper::reseed(UniqueEventId event_id) params_->init()->reset_track_ids(state_->stream_id(), &state_->ref().init); } -//---------------------------------------------------------------------------// -/*! - * Set the num_pending counter to the number of generated primaries. - * This template should always be specialized. - */ -// template -// void Stepper::set_generated() -// { -// CELER_NOT_CONFIGURED("CUDA OR HIP"); -// } - //---------------------------------------------------------------------------// /*! * Set the num_pending counter to the number of generated primaries. From be7f3d6ed2a8d0d8b179edda869ea976fa0e8f8c Mon Sep 17 00:00:00 2001 From: LSchwiebert Date: Wed, 20 May 2026 20:41:33 -0400 Subject: [PATCH 74/74] Revise UpdatePendingExecutor per WIP feedback --- src/accel/LocalOpticalGenOffload.cc | 12 ++++++++---- src/accel/LocalOpticalGenOffload.cu | 12 +++++++++--- src/celeritas/optical/Runner.cc | 10 +++++++--- src/celeritas/optical/Runner.cu | 15 ++++++++++----- .../optical/gen/DirectGeneratorAction.cc | 7 +++++-- .../optical/gen/DirectGeneratorAction.hh | 9 +++++++-- src/celeritas/optical/gen/GeneratorBase.cc | 19 ++++++++++++++----- src/celeritas/optical/gen/GeneratorBase.cu | 13 +++++++++---- src/celeritas/optical/gen/GeneratorBase.hh | 6 ++++-- .../optical/gen/PrimaryGeneratorAction.cc | 7 ++++--- .../optical/gen/PrimaryGeneratorAction.hh | 5 ++++- .../optical/gen/WlsGeneratorAction.cc | 3 ++- .../gen/detail/UpdatePendingExecutor.hh | 15 +++++++-------- 13 files changed, 90 insertions(+), 43 deletions(-) diff --git a/src/accel/LocalOpticalGenOffload.cc b/src/accel/LocalOpticalGenOffload.cc index 73c97da5b0..a971b2dc82 100644 --- a/src/accel/LocalOpticalGenOffload.cc +++ b/src/accel/LocalOpticalGenOffload.cc @@ -11,11 +11,12 @@ #include "corecel/io/Logger.hh" #include "corecel/sys/Device.hh" -#include "corecel/sys/KernelLauncher.hh" #include "corecel/sys/ScopedProfiling.hh" #include "geocel/GeantUtils.hh" #include "celeritas/global/CoreParams.hh" +#include "celeritas/optical/TrackExecutor.hh" #include "celeritas/optical/Transporter.hh" +#include "celeritas/optical/action/ActionLauncher.hh" #include "celeritas/optical/gen/GeneratorAction.hh" #include "celeritas/optical/gen/detail/UpdatePendingExecutor.hh" #include "celeritas/phys/GeneratorRegistry.hh" @@ -258,9 +259,12 @@ void LocalOpticalGenOffload::Finalize() void LocalOpticalGenOffload::update_primaries( optical::CoreState& state) const { - optical::detail::UpdatePendingExecutor execute_thread{state.ptr(), - num_photons_}; - launch_kernel(1, execute_thread); + auto const& optical_params = *transport_->params(); + auto execute_thread = make_single_track_executor( + optical_params.ptr(), + state.ptr(), + optical::detail::UpdatePendingExecutor{num_photons_}); + launch_action(1, execute_thread); } //---------------------------------------------------------------------------// diff --git a/src/accel/LocalOpticalGenOffload.cu b/src/accel/LocalOpticalGenOffload.cu index d60e53de3e..55c14f497e 100644 --- a/src/accel/LocalOpticalGenOffload.cu +++ b/src/accel/LocalOpticalGenOffload.cu @@ -6,7 +6,10 @@ //---------------------------------------------------------------------------// #include "LocalOpticalGenOffload.hh" -#include "corecel/sys/KernelLauncher.device.hh" +#include "celeritas/global/CoreParams.hh" +#include "celeritas/optical/TrackExecutor.hh" +#include "celeritas/optical/Transporter.hh" +#include "celeritas/optical/action/ActionLauncher.device.hh" #include "celeritas/optical/gen/detail/UpdatePendingExecutor.hh" namespace celeritas @@ -19,8 +22,11 @@ namespace celeritas void LocalOpticalGenOffload::update_primaries( optical::CoreState& state) const { - optical::detail::UpdatePendingExecutor execute_thread{state.ptr(), - num_photons_}; + auto const& optical_params = *transport_->params(); + auto execute_thread = make_single_track_executor( + optical_params.ptr(), + state.ptr(), + optical::detail::UpdatePendingExecutor{num_photons_}); static KernelLauncher const launch_kernel( "update-pending"); launch_kernel(1, state.stream_id(), execute_thread); diff --git a/src/celeritas/optical/Runner.cc b/src/celeritas/optical/Runner.cc index b01b5704da..8b1b280f4f 100644 --- a/src/celeritas/optical/Runner.cc +++ b/src/celeritas/optical/Runner.cc @@ -10,9 +10,10 @@ #include "corecel/io/OutputInterfaceAdapter.hh" #include "corecel/io/OutputRegistry.hh" -#include "corecel/sys/KernelLauncher.hh" #include "corecel/sys/ScopedProfiling.hh" #include "celeritas/inp/StandaloneInputIO.json.hh" +#include "celeritas/optical/TrackExecutor.hh" +#include "celeritas/optical/action/ActionLauncher.hh" #include "celeritas/phys/GeneratorRegistry.hh" #include "celeritas/setup/Problem.hh" @@ -179,8 +180,11 @@ void Runner::update_pending(CoreState& state, size_type num_pending) const { // Update the number of pending optical photons - detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; - launch_kernel(1, execute); + auto execute_thread = make_single_track_executor( + this->params()->ptr(), + state.ptr(), + detail::UpdatePendingExecutor{num_pending}); + launch_action(1, execute_thread); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/Runner.cu b/src/celeritas/optical/Runner.cu index 7a7bc93304..4b9168143e 100644 --- a/src/celeritas/optical/Runner.cu +++ b/src/celeritas/optical/Runner.cu @@ -7,9 +7,11 @@ #include "Runner.hh" #include "corecel/Assert.hh" -#include "celeritas/optical/CoreState.hh" -#include "celeritas/optical/action/ActionLauncher.device.hh" +#include "CoreParams.hh" +#include "CoreState.hh" +#include "TrackExecutor.hh" +#include "action/ActionLauncher.device.hh" #include "gen/detail/UpdatePendingExecutor.hh" namespace celeritas @@ -24,10 +26,13 @@ void Runner::update_pending(CoreState& state, size_type num_pending) const { // Update the number of pending optical photons - detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; - static KernelLauncher const launch_kernel( + auto execute_thread = make_single_track_executor( + this->params()->ptr(), + state.ptr(), + detail::UpdatePendingExecutor{num_pending}); + static KernelLauncher const launch_kernel( "update-pending"); - launch_kernel(1, state.stream_id(), execute); + launch_kernel(1, state.stream_id(), execute_thread); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.cc b/src/celeritas/optical/gen/DirectGeneratorAction.cc index e416d041e6..047b0a3713 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.cc +++ b/src/celeritas/optical/gen/DirectGeneratorAction.cc @@ -25,6 +25,8 @@ namespace celeritas { namespace optical { +CoreParams* DirectGeneratorAction::params_ = nullptr; // Set in + // make_and_insert() namespace { //---------------------------------------------------------------------------// @@ -49,7 +51,7 @@ auto make_state(StreamId stream, size_type size) * Construct and add to core params. */ std::shared_ptr -DirectGeneratorAction::make_and_insert(CoreParams const& params) +DirectGeneratorAction::make_and_insert(CoreParams& params) { ActionRegistry& actions = *params.action_reg(); AuxParamsRegistry& aux = *params.aux_reg(); @@ -59,6 +61,7 @@ DirectGeneratorAction::make_and_insert(CoreParams const& params) actions.insert(result); aux.insert(result); gen.insert(result); + params_ = ¶ms; return result; } @@ -144,7 +147,7 @@ void DirectGeneratorAction::insert_impl(CoreState& state, // Update counters and copy distributions to aux state storage aux_state.counters.buffer_size = data.size(); aux_state.counters.num_pending = data.size(); - this->update_pending(state, data.size()); + this->update_pending(*params_, state, data.size()); Copier copy_to_aux{aux_state.initializers(), state.stream_id()}; diff --git a/src/celeritas/optical/gen/DirectGeneratorAction.hh b/src/celeritas/optical/gen/DirectGeneratorAction.hh index 683ce84e1e..9db7d44e22 100644 --- a/src/celeritas/optical/gen/DirectGeneratorAction.hh +++ b/src/celeritas/optical/gen/DirectGeneratorAction.hh @@ -42,8 +42,7 @@ class DirectGeneratorAction final : public GeneratorBase public: // Construct and add to core params - static std::shared_ptr - make_and_insert(CoreParams const&); + static std::shared_ptr make_and_insert(CoreParams&); // Construct with action ID and data IDs DirectGeneratorAction(ActionId, AuxId, GeneratorId); @@ -61,6 +60,12 @@ class DirectGeneratorAction final : public GeneratorBase void step(CoreParams const&, CoreStateDevice&) const final; private: + //// DATA //// + + // Core params isn't passed to insert(), so save a pointer so + // update_pending() can be called later + static CoreParams* params_; + //// HELPER FUNCTIONS //// template diff --git a/src/celeritas/optical/gen/GeneratorBase.cc b/src/celeritas/optical/gen/GeneratorBase.cc index eb327f78a5..3d6c900faf 100644 --- a/src/celeritas/optical/gen/GeneratorBase.cc +++ b/src/celeritas/optical/gen/GeneratorBase.cc @@ -8,7 +8,10 @@ #include "corecel/Assert.hh" #include "corecel/data/AuxStateVec.hh" -#include "corecel/sys/KernelLauncher.hh" +#include "celeritas/optical/CoreParams.hh" +#include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/TrackExecutor.hh" +#include "celeritas/optical/action/ActionLauncher.hh" #include "detail/UpdatePendingExecutor.hh" @@ -54,17 +57,23 @@ GeneratorStateBase const& GeneratorBase::counters(AuxStateVec const& aux) const /*! * Launch a (host) kernel to update the number of pending optical photons. */ -void GeneratorBase::update_pending(CoreStateHost& state, +void GeneratorBase::update_pending(CoreParams const& params, + CoreStateHost& state, size_type num_pending) const { // Update the number of pending optical photons - detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; - launch_kernel(1, execute); + auto execute_thread = make_single_track_executor( + params.ptr(), + state.ptr(), + detail::UpdatePendingExecutor{num_pending}); + launch_action(1, execute_thread); } //---------------------------------------------------------------------------// #if !CELER_USE_DEVICE -void GeneratorBase::update_pending(CoreStateDevice&, size_type) const +void GeneratorBase::update_pending(CoreParams const&, + CoreStateDevice&, + size_type) const { CELER_NOT_CONFIGURED("CUDA OR HIP"); } diff --git a/src/celeritas/optical/gen/GeneratorBase.cu b/src/celeritas/optical/gen/GeneratorBase.cu index 1327c2bf8a..f67a47cd18 100644 --- a/src/celeritas/optical/gen/GeneratorBase.cu +++ b/src/celeritas/optical/gen/GeneratorBase.cu @@ -8,6 +8,7 @@ #include "corecel/Assert.hh" #include "celeritas/optical/CoreState.hh" +#include "celeritas/optical/TrackExecutor.hh" #include "celeritas/optical/action/ActionLauncher.device.hh" #include "detail/UpdatePendingExecutor.hh" @@ -20,14 +21,18 @@ namespace optical /*! * Launch a (device) kernel to update the number of pending optical photons. */ -void GeneratorBase::update_pending(CoreStateDevice& state, +void GeneratorBase::update_pending(CoreParams const& params, + CoreStateDevice& state, size_type num_pending) const { // Update the number of pending optical photons - detail::UpdatePendingExecutor execute{state.ptr(), num_pending}; - static KernelLauncher const launch_kernel( + auto execute_thread = make_single_track_executor( + params.ptr(), + state.ptr(), + detail::UpdatePendingExecutor{num_pending}); + static KernelLauncher const launch_kernel( "update-pending"); - launch_kernel(1, state.stream_id(), execute); + launch_kernel(1, state.stream_id(), execute_thread); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/GeneratorBase.hh b/src/celeritas/optical/gen/GeneratorBase.hh index 2d4db7e39d..e9e271331f 100644 --- a/src/celeritas/optical/gen/GeneratorBase.hh +++ b/src/celeritas/optical/gen/GeneratorBase.hh @@ -16,6 +16,8 @@ namespace celeritas { +class CoreParams; + namespace optical { //---------------------------------------------------------------------------// @@ -75,8 +77,8 @@ class GeneratorBase : virtual public optical::OpticalStepActionInterface, inline void update_counters(optical::CoreState&) const; // Update the num_pending state counter - void update_pending(CoreStateHost&, size_type) const; - void update_pending(CoreStateDevice&, size_type) const; + void update_pending(CoreParams const&, CoreStateHost&, size_type) const; + void update_pending(CoreParams const&, CoreStateDevice&, size_type) const; private: StaticActionData sad_; diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc index e679b30ff0..3887abee2d 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.cc +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.cc @@ -29,12 +29,13 @@ namespace celeritas { namespace optical { +CoreParams* PrimaryGeneratorAction::core_params_ = nullptr; //---------------------------------------------------------------------------// /*! * Construct and add to core params. */ std::shared_ptr -PrimaryGeneratorAction::make_and_insert(CoreParams const& params, Input&& input) +PrimaryGeneratorAction::make_and_insert(CoreParams& params, Input&& input) { CELER_EXPECT(input); ActionRegistry& actions = *params.action_reg(); @@ -46,6 +47,7 @@ PrimaryGeneratorAction::make_and_insert(CoreParams const& params, Input&& input) actions.insert(result); aux.insert(result); gen.insert(result); + core_params_ = ¶ms; return result; } @@ -72,7 +74,6 @@ PrimaryGeneratorAction::PrimaryGeneratorAction(ActionId id, data_.shape = std::visit(insert, inp.shape); params_ = ParamsDataStore{std::move(host_params)}; - CELER_ENSURE(data_); CELER_ENSURE(params_); } @@ -135,7 +136,7 @@ void PrimaryGeneratorAction::insert_impl(optical::CoreState& state) const auto& aux_state = this->counters(*state.aux()); aux_state.counters.num_pending = data_.num_photons; - this->update_pending(state, data_.num_photons); + this->update_pending(*core_params_, state, data_.num_photons); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/optical/gen/PrimaryGeneratorAction.hh b/src/celeritas/optical/gen/PrimaryGeneratorAction.hh index 2b3cf537cf..60f8bea69a 100644 --- a/src/celeritas/optical/gen/PrimaryGeneratorAction.hh +++ b/src/celeritas/optical/gen/PrimaryGeneratorAction.hh @@ -47,7 +47,7 @@ class PrimaryGeneratorAction final : public GeneratorBase public: // Construct and add to core params static std::shared_ptr - make_and_insert(CoreParams const&, Input&&); + make_and_insert(CoreParams&, Input&&); // Construct with IDs and distributions PrimaryGeneratorAction(ActionId, AuxId, GeneratorId, Input); @@ -76,6 +76,9 @@ class PrimaryGeneratorAction final : public GeneratorBase PrimaryDistributionData data_; ParamsDataStore params_; + // Core params isn't passed to insert(), so save a pointer so + // update_pending() can be called later + static CoreParams* core_params_; //// HELPER FUNCTIONS //// diff --git a/src/celeritas/optical/gen/WlsGeneratorAction.cc b/src/celeritas/optical/gen/WlsGeneratorAction.cc index 71c00c403f..709f878803 100644 --- a/src/celeritas/optical/gen/WlsGeneratorAction.cc +++ b/src/celeritas/optical/gen/WlsGeneratorAction.cc @@ -148,7 +148,8 @@ void WlsGeneratorAction::step_impl(CoreParams const& params, } // Update the core state counters with the number of new pending tracks - this->update_pending(state, counters.num_pending - num_pending_prev); + this->update_pending( + params, state, counters.num_pending - num_pending_prev); if (counters.num_pending > 0) { diff --git a/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh index f1673dc9e6..d5eafde19d 100644 --- a/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh +++ b/src/celeritas/optical/gen/detail/UpdatePendingExecutor.hh @@ -8,7 +8,8 @@ #include "corecel/Macros.hh" #include "corecel/Types.hh" -#include "celeritas/optical/CoreState.hh" +#include "corecel/sys/ThreadId.hh" +#include "celeritas/optical/CoreTrackView.hh" namespace celeritas { @@ -27,13 +28,12 @@ struct UpdatePendingExecutor { //// DATA //// - RefPtr state; size_type num_photons; //// FUNCTIONS //// // Update number of of primaries waiting to be generated - CELER_FORCEINLINE_FUNCTION void operator()(ThreadId tid); + CELER_FORCEINLINE_FUNCTION void operator()(CoreTrackView& track); }; //---------------------------------------------------------------------------// @@ -43,13 +43,12 @@ struct UpdatePendingExecutor * Update number of primaries to be generated to include the buffered optical * photons. */ -CELER_FORCEINLINE_FUNCTION void UpdatePendingExecutor::operator()(ThreadId tid) +CELER_FORCEINLINE_FUNCTION void +UpdatePendingExecutor::operator()(CoreTrackView& track) { - CELER_EXPECT(state); - CELER_EXPECT(tid.get() == 0); // Should call with only one thread + CELER_EXPECT(track.thread_id() == ThreadId{0}); // single thread kernel - auto counters = state->init.counters.data().get(); - counters->num_pending += num_photons; + track.counters().num_pending += num_photons; } //---------------------------------------------------------------------------//