diff --git a/docs/src/API.md b/docs/src/API.md
index 349326449..0a6914445 100644
--- a/docs/src/API.md
+++ b/docs/src/API.md
@@ -116,6 +116,7 @@ Microphysics2M.number_decrease_for_mass_limit
 DistributionTools
 DistributionTools.generalized_gamma_quantile
 DistributionTools.generalized_gamma_cdf
+DistributionTools.gamma_inc_inv
 DistributionTools.exponential_cdf
 DistributionTools.exponential_quantile
 ```
@@ -387,6 +388,7 @@ Lightweight numerical utility functions shared across modules.
 ```@docs
 Utilities
 Utilities.clamp_to_nonneg
+Utilities.promote_typeof
 Utilities.ϵ_numerics
 Utilities.ϵ_numerics_2M_M
 Utilities.ϵ_numerics_2M_N
diff --git a/docs/src/P3Scheme.md b/docs/src/P3Scheme.md
index d7d67fa73..7a660a650 100644
--- a/docs/src/P3Scheme.md
+++ b/docs/src/P3Scheme.md
@@ -118,6 +118,54 @@ We obtain the following expression for $ρ_d$
   \frac{(β_{va} - 2)(k - 1)}{(1 - F_{rim}) k - 1} - (1 - F_{rim})
 }
 ```
+
+!!! details "Click here to see a numerically stable form"
+    Evaluated directly, this expression loses accuracy as ``F_{rim} → 0``. In that limit ``k → 1`` and
+    ``(1 - F_{rim}) k → 1``, so the ratio ``(k - 1) / ((1 - F_{rim}) k - 1)`` is ``0/0``. In `Float32` the
+    resulting cancellation drives the computed ``ρ_d``, and therefore ``ρ_g``, negative. We rewrite the
+    expression so that the cancellation is removed.
+
+    Let ``F_u = 1 - F_{rim}`` be the unrimed fraction, let ``L = \log F_u``, and let ``p = 1 / (3 - β_{va})``,
+    so that ``k = F_u^{-p}`` and ``(1 - F_{rim}) k = F_u^{1 - p}``. Every power of the form ``F_u^a - 1`` is
+    equal to ``e^{aL} - 1``. We write these through the relative exponential functions
+
+    ```math
+    φ(a) = \frac{e^{aL} - 1}{aL} = \mathrm{exprel}_1(aL), \qquad
+    ψ(a) = \frac{e^{aL} - 1 - aL}{(aL)^2} = \mathrm{exprel}_2(aL),
+    ```
+
+    both of which stay finite and accurate as ``L → 0``. Using ``β_{va} - 2 = -(1 - p) / p``, the denominator
+    of ``ρ_d`` becomes ``φ(-p) / φ(1 - p) - F_u``. Both the numerator ``ρ_{rim} F_{rim}`` and the denominator
+    are proportional to ``L`` as ``F_{rim} → 0``, so we factor ``L`` out of each. With ``F_{rim} = -L\,φ(1)``
+    and ``φ(-p) - φ(1 - p) = L\,[-p\,ψ(-p) - (1 - p)\,ψ(1 - p)]``, the denominator divided by ``L`` is
+
+    ```math
+    G = \big[-p\,ψ(-p) - (1 - p)\,ψ(1 - p)\big] - φ(1 - p)\,φ(1),
+    ```
+
+    which tends to ``-3/2`` as ``F_{rim} → 0``. The factor ``L`` cancels, and we are left with
+
+    ```math
+    ρ_d = -\frac{ρ_{rim}\,φ(1)\,φ(1 - p)}{G}.
+    ```
+
+    This form has no subtraction of nearly equal numbers, so it stays accurate in `Float32`, and ``ρ_g``
+    stays positive for every physical input without any clipping. The functions ``\mathrm{exprel}_1`` and
+    ``\mathrm{exprel}_2`` (implemented by the internal `exprel` function) use a Taylor series at small
+    arguments, where the closed forms would themselves cancel.
+
+    The figure below shows the relative error of ``ρ_g`` in `Float32`, against a reference computed in
+    `BigFloat`, for both the direct evaluation and this rewrite. The direct form is order-one wrong at small
+    rime fractions and only reaches `Float32` precision near ``F_{rim} = 0.35``, while the rewrite stays at
+    the precision floor across the whole range.
+
+    ```@example
+    include("plots/P3RhoDStability.jl")
+
+    nothing # hide
+    ```
+    ![](P3RhoDStability.svg)
+
 Given $ρ_d$, we can obtain $ρ_g$, $D_{gr}$, and $D_{cr}$ using the expressions above. Depending on the value of $ρ_{rim}$ and $F_{rim}$, 
   these thresholds and densities obtain a range of values, as shown in the plot below.
 
diff --git a/docs/src/plots/P3RhoDStability.jl b/docs/src/plots/P3RhoDStability.jl
new file mode 100644
index 000000000..e1aa8e9c7
--- /dev/null
+++ b/docs/src/plots/P3RhoDStability.jl
@@ -0,0 +1,39 @@
+import CairoMakie: Makie
+import CloudMicrophysics.Parameters as CMP
+import CloudMicrophysics.P3Scheme as P3
+
+# Direct evaluation of the analytical solution for ρ_d, which loses accuracy as F_rim → 0.
+function ρ_d_direct(β_va, F_rim, ρ_rim)
+    p = 1 / (3 - β_va)
+    Fᵤ = 1 - F_rim
+    k = Fᵤ^(-p)
+    den = (β_va - 2) * (k - 1) / (Fᵤ * k - 1) - Fᵤ
+    return ρ_rim * F_rim / den
+end
+
+mass = CMP.ParametersP3(Float32).mass
+β_va = mass.β_va
+ρ_rim = 400
+
+ρ_g(ρ_d, F_rim) = F_rim * ρ_rim + (1 - F_rim) * ρ_d
+F_rims = [10.0^e for e in -7:0.05:-0.005]
+ρ_g_ref(F_rim) = Float64(ρ_g(ρ_d_direct(big(β_va), big(F_rim), big(ρ_rim)), big(F_rim)))
+rel_err(ρ_g_f32, F_rim) = abs(Float64(ρ_g_f32) - ρ_g_ref(F_rim)) / abs(ρ_g_ref(F_rim))
+
+err_stable = [rel_err(ρ_g(P3.get_ρ_d(mass, Float32(F), Float32(ρ_rim)), Float32(F)), F) for F in F_rims]
+err_direct = [rel_err(ρ_g(ρ_d_direct(β_va, Float32(F), Float32(ρ_rim)), Float32(F)), F) for F in F_rims]
+
+fig = Makie.Figure(size = (760, 460))
+ax = Makie.Axis(
+    fig[1, 1];
+    xscale = log10,
+    yscale = log10,
+    xlabel = "F_rim",
+    ylabel = "relative error of ρ_g (Float32)",
+    title = "Direct evaluation vs. numerically stable rewrite",
+)
+Makie.lines!(ax, F_rims, max.(err_direct, 1e-16), label = "direct evaluation", linewidth = 2)
+Makie.lines!(ax, F_rims, max.(err_stable, 1e-16), label = "stable rewrite", linewidth = 2)
+Makie.hlines!(ax, [eps(Float32)], color = :gray, linestyle = :dash, label = "eps(Float32)")
+Makie.axislegend(ax; position = :rt)
+Makie.save("P3RhoDStability.svg", fig)
diff --git a/src/BulkMicrophysicsTendencies.jl b/src/BulkMicrophysicsTendencies.jl
index 958887851..86265d455 100644
--- a/src/BulkMicrophysicsTendencies.jl
+++ b/src/BulkMicrophysicsTendencies.jl
@@ -150,7 +150,7 @@ processes are pre-routed by temperature, so consumers never need `is_warm`.
     q_rai = UT.clamp_to_nonneg(q_rai)
     q_sno = UT.clamp_to_nonneg(q_sno)
 
-    FT = typeof(q_tot)
+    FT = UT.promote_typeof(ρ, T, q_tot, q_lcl, q_icl, q_rai, q_sno)
     opts = mp.options
 
     # Construct state tuples (reused across all process calls)
diff --git a/src/Common.jl b/src/Common.jl
index 6aa0d63ae..df1b2c89f 100644
--- a/src/Common.jl
+++ b/src/Common.jl
@@ -154,7 +154,8 @@ This curve smoothly transitions from y = 0 for 0 < x < x_0 to y = x - x_0 for x_
 # Returns
 - Integral of logistic function (same units as x)
 """
-@inline function logistic_function_integral(x::FT, x_0::FT, k::FT) where {FT}
+@inline function logistic_function_integral(x, x_0, k)
+    FT = UT.promote_typeof(x, x_0, k)
     # Branchless GPU-compatible implementation
     x = max(FT(0), x)
     x_safe = max(x, UT.ϵ_numerics(FT))
@@ -372,7 +373,8 @@ Assumes exponential size distribution (μ=0).
 # Returns
 - Bulk fall speed component [m/s]
 """
-@inline function Chen2022_exponential_pdf(a::FT, b::FT, c::FT, λ_inv::FT, k::Int) where {FT}
+@inline function Chen2022_exponential_pdf(a, b, c, λ_inv, k::Int)
+    FT = UT.promote_typeof(a, b, c, λ_inv)
     # μ = 0 for exponential distribution, δ = k + 1
     δ = FT(k + 1)
     return a * exp(-δ * log(λ_inv) - (b + δ) * log(1 / λ_inv + c)) * SF.gamma(b + δ) / SF.gamma(δ)
diff --git a/src/DistributionTools.jl b/src/DistributionTools.jl
index 3f5bd2efd..9cb70f922 100644
--- a/src/DistributionTools.jl
+++ b/src/DistributionTools.jl
@@ -10,8 +10,10 @@ Mostly used for the 2-moment microphysics.
 """
 module DistributionTools
 
+import ForwardDiff as FD
 import SpecialFunctions as SF
 import LogExpFunctions as LEF
+import ..Utilities as UT
 
 """
     size_distribution(args...; kwargs...)
@@ -42,10 +44,53 @@ Calculate the quantile (inverse cumulative distribution function) for a
 """
 function generalized_gamma_quantile(ν, μ, B, Y)
     # Compute the inverse of the regularized incomplete gamma function
-    z = SF.gamma_inc_inv((ν + 1) / μ, Y, 1 - Y)
+    z = gamma_inc_inv((ν + 1) / μ, Y, 1 - Y)
     return (z / B)^(1 / μ)
 end
 
+"""
+    gamma_inc_inv(a, p, q)
+
+Inverse of the regularized lower incomplete gamma function: the `x` such that
+`P(a, x) = p` (with `q = 1 - p`). Forwards to `SF.gamma_inc_inv`, and adds a
+`ForwardDiff.Dual` method (which `SpecialFunctions` lacks) by differentiating
+the defining identity, a special case of the
+[implicit function theorem](https://en.wikipedia.org/wiki/Implicit_function_theorem):
+
+    P(a, x(a, p)) = p   ⟹   ∂ₓP ⋅ ∂x/∂p = 1,   ∂ₐP + ∂ₓP ⋅ ∂x/∂a = 0
+
+so with `∂ₓP(a, x) = x^(a-1) e^(-x) / Γ(a)`,
+
+    ∂x/∂p = 1 / ∂ₓP(a, x),   ∂x/∂a = -∂ₐP / ∂ₓP
+
+`∂ₐP` has no closed form; if (and only if) `a` carries derivatives, it is
+obtained by a forward difference of the forward map in `a` against the known
+base value (`P(a, x) = p`, `Q(a, x) = q`), differencing the smaller of
+`(P, Q)`. The redundant `q` slot is treated symmetrically,
+`∂x = ½ ∂x/∂p ⋅ (∂p - ∂q)`, which composes correctly for callers passing
+`(Y, 1 - Y)` regardless of which slot carries derivatives.
+"""
+gamma_inc_inv(a::Real, p::Real, q::Real) = _gamma_inc_inv(promote(a, p, q)...)
+
+_gamma_inc_inv(a::Real, p::Real, q::Real) = SF.gamma_inc_inv(a, p, q)
+
+function _gamma_inc_inv(a::FD.Dual{T}, p::FD.Dual{T}, q::FD.Dual{T}) where {T}
+    av, pv, qv = FD.value(a), FD.value(p), FD.value(q)
+    x = SF.gamma_inc_inv(av, pv, qv)
+    dxdp = exp(SF.loggamma(av) + x - (av - 1) * log(x))
+    da, dp, dq = FD.partials(a), FD.partials(p), FD.partials(q)
+    dxda = if iszero(da)
+        zero(dxdp)
+    else
+        h = sqrt(eps(typeof(av)))
+        # forward difference the smaller of (P, Q) against its base value
+        p_ph, q_ph = SF.gamma_inc(av + h, x)
+        dPda = ifelse(pv ≤ qv, p_ph - pv, -(q_ph - qv)) / h
+        -dPda * dxdp
+    end
+    return FD.Dual{T}(x, dxda * da + dxdp / 2 * (dp - dq))
+end
+
 """
     generalized_gamma_cdf(ν, μ, B, x)
 
@@ -176,7 +221,7 @@ Calculate the nth moment of an exponential distribution parameterized in the for
  - `Mⁿ`: The nth physical moment of the distribution
 """
 function exponential_Mⁿ(D_mean, N, n)
-    return N * factorial(n) * D_mean^n
+    return N * UT.fac(n) * D_mean^n
 end
 
 end # module DistributionTools
diff --git a/src/IceNucleation.jl b/src/IceNucleation.jl
index ff4bdbc36..f89e1024f 100644
--- a/src/IceNucleation.jl
+++ b/src/IceNucleation.jl
@@ -266,7 +266,7 @@ Compute the rate of liquid water freezing into ice.
     + `∂ₜq_frz`: Specific mass freezing rate [kg(water) kg⁻¹(air) s⁻¹].
 """
 function liquid_freezing_rate(opt::CMP.RainFreezing, pdf, tps, q, ρ, N, T)
-    FT = eltype(q)
+    FT = float(UT.promote_typeof(q, ρ, N, T))
     T_freeze = TDI.TD.Parameters.T_freeze(tps)
     (; ρw) = pdf  # [kg(water) m⁻³(water)]
     n = N / ρ     # specific number concentration [kg⁻¹(air)]
@@ -298,8 +298,8 @@ function liquid_freezing_rate(opt::CMP.RainFreezing, pdf, tps, q, ρ, N, T)
     # Otherwise, return zero.
     ϵₘ, ϵₙ = UT.ϵ_numerics_2M_M(FT), UT.ϵ_numerics_2M_N(FT)
     cond = (n > ϵₙ) & (q > ϵₘ) & (T < T_freeze - 4)
-    ∂ₜn_frz = ifelse(cond, ∂ₜn_frz, zero(n))
-    ∂ₜq_frz = ifelse(cond, ∂ₜq_frz, zero(q))
+    ∂ₜn_frz = ifelse(cond, ∂ₜn_frz, zero(FT))
+    ∂ₜq_frz = ifelse(cond, ∂ₜq_frz, zero(FT))
 
     return (; ∂ₜn_frz, ∂ₜq_frz)
 end
@@ -351,7 +351,7 @@ function liquid_freezing_rate(
     opt::CMP.RainFreezing, pdf::CMP.CloudParticlePDF_SB2006,
     tps, q, ρ, N, T,
 )
-    FT = eltype(q)
+    FT = float(UT.promote_typeof(q, ρ, N, T))
     T_freeze = TDI.TD.Parameters.T_freeze(tps)
     (; ρw) = pdf
     n = N / ρ     # specific number concentration [kg⁻¹(air)]
@@ -375,8 +375,8 @@ function liquid_freezing_rate(
     # Check non-trivial number/mass and that T < -4 °C.
     ϵₘ, ϵₙ = UT.ϵ_numerics_2M_M(FT), UT.ϵ_numerics_2M_N(FT)
     cond = (n > ϵₙ) & (q > ϵₘ) & (T < T_freeze - 4)  # TODO: Make a parameter.
-    ∂ₜn_frz = ifelse(cond, ∂ₜn_frz, zero(n))
-    ∂ₜq_frz = ifelse(cond, ∂ₜq_frz, zero(q))
+    ∂ₜn_frz = ifelse(cond, ∂ₜn_frz, zero(FT))
+    ∂ₜq_frz = ifelse(cond, ∂ₜq_frz, zero(FT))
 
     return (; ∂ₜn_frz, ∂ₜq_frz)
 end
diff --git a/src/Microphysics2M.jl b/src/Microphysics2M.jl
index 986326ab0..045b8150b 100644
--- a/src/Microphysics2M.jl
+++ b/src/Microphysics2M.jl
@@ -66,9 +66,9 @@ Return the parameters of the rain drop diameter distribution
  - A `NamedTuple` with the fields `(; N₀r, Dr_mean, xr_mean)`
 """
 function pdf_rain_parameters(pdf_r::CMP.RainParticlePDF_SB2006_notlimited, qᵣ, ρₐ, Nᵣ)
-    FT = eltype(qᵣ)
+    FT = UT.promote_typeof(qᵣ, ρₐ, Nᵣ)
     (Nᵣ < UT.ϵ_numerics_2M_N(FT) || qᵣ < UT.ϵ_numerics_2M_M(FT)) &&
-        return (; N₀r = zero(Nᵣ), Dr_mean = zero(qᵣ), xr_mean = zero(qᵣ))
+        return (; N₀r = zero(FT), Dr_mean = zero(FT), xr_mean = zero(FT))
     (; ρw) = pdf_r
     Lᵣ = ρₐ * qᵣ
 
@@ -80,9 +80,9 @@ function pdf_rain_parameters(pdf_r::CMP.RainParticlePDF_SB2006_notlimited, qᵣ,
     return (; N₀r, Dr_mean, xr_mean)
 end
 function pdf_rain_parameters(pdf_r::CMP.RainParticlePDF_SB2006_limited, qᵣ, ρₐ, Nᵣ)
-    FT = eltype(qᵣ)
+    FT = UT.promote_typeof(qᵣ, ρₐ, Nᵣ)
     (Nᵣ < UT.ϵ_numerics_2M_N(FT) && qᵣ < UT.ϵ_numerics_2M_M(FT)) &&
-        return (; N₀r = zero(Nᵣ), Dr_mean = zero(qᵣ), xr_mean = zero(qᵣ))
+        return (; N₀r = zero(FT), Dr_mean = zero(FT), xr_mean = zero(FT))
     (; xr_min, xr_max, N0_min, N0_max, λ_min, λ_max, ρw) = pdf_r
     Lᵣ = ρₐ * max(0, qᵣ)
 
@@ -161,10 +161,10 @@ That is,
  - `(logA, logB)`: Log of the parameters of the generalized gamma distribution
 """
 function log_pdf_cloud_parameters_mass(pdf_c, q, ρₐ, N)
-    FT = eltype(q)
+    FT = UT.promote_typeof(q, ρₐ, N)
     L = ρₐ * q
     # If L or N are (essentially) zero, return `A=0` (no number per mass), `B=∞` (zero mass "length" scale)
-    (N < UT.ϵ_numerics_2M_N(FT) || L < UT.ϵ_numerics_2M_M(FT)) && return (log(zero(N)), log(1 / zero(q)))
+    (N < UT.ϵ_numerics_2M_N(FT) || L < UT.ϵ_numerics_2M_M(FT)) && return (log(zero(FT)), log(1 / zero(FT)))
     (; νc, μc) = pdf_c
     logx̄ = log(L / N)
     z1 = (νc + 1) / μc
@@ -209,7 +209,7 @@ where
  - `(logN₀c, λc, νcD, μcD)`: Parameters of the generalized gamma distribution in terms of diameter
 """
 function pdf_cloud_parameters(pdf_c, q, ρₐ, N)
-    FT = eltype(q)
+    FT = UT.promote_typeof(q, ρₐ, N)
     logAc, logBc = log_pdf_cloud_parameters_mass(pdf_c, q, ρₐ, N)
     (; νc, μc, ρw) = pdf_c
     k_m = ρw * π / 6
@@ -253,7 +253,11 @@ Return `n(D)`, a function that computes the size distribution for rain particles
 """
 function DT.size_distribution(pdf::CMP.RainParticlePDF_SB2006, q, ρₐ, N)
     (; N₀r, Dr_mean) = pdf_rain_parameters(pdf, q, ρₐ, N)
-    return n(D) = iszero(N₀r) ? zero(D) : N₀r * exp(-D / Dr_mean)
+    function n(D)
+        v = N₀r * exp(-D / Dr_mean)
+        return ifelse(iszero(N₀r), zero(v), v)
+    end
+    return n
 end
 
 """
@@ -274,7 +278,12 @@ The size distribution is given by:
 """
 function DT.size_distribution(pdf::CMP.CloudParticlePDF_SB2006, q, ρₐ, N)
     (; logN₀c, λc, νcD, μcD) = pdf_cloud_parameters(pdf, q, ρₐ, N)
-    return n(D) = logN₀c == -Inf ? zero(D) : exp(logN₀c + νcD * log(D) - λc * D^μcD)
+    # zero the active value (not logN₀c) so the closure is type-concrete under mixed Dual/plain D
+    function n(D)
+        v = exp(logN₀c + νcD * log(D) - λc * D^μcD)
+        return ifelse(logN₀c == -Inf, zero(v), v)
+    end
+    return n
 end
 
 """
@@ -312,7 +321,7 @@ the size distribution is within (1 - p) to (p) probability of the true size dist
 function get_size_distribution_bounds(
     pdf::CMP.RainParticlePDF_SB2006, q, ρₐ, N, p = eps(eltype(q)),
 )
-    FT = eltype(q)
+    FT = UT.promote_typeof(q, ρₐ, N)
     (; Dr_mean) = pdf_rain_parameters(pdf, q, ρₐ, N)
     iszero(Dr_mean) && return (FT(0), FT(0))
     D_min = DT.exponential_quantile(Dr_mean, p)
@@ -322,13 +331,8 @@ end
 function get_size_distribution_bounds(
     pdf::CMP.CloudParticlePDF_SB2006, q, ρₐ, N, p = eps(eltype(q)),
 )
-    FT = eltype(q)
+    FT = UT.promote_typeof(q, ρₐ, N)
     (; λc, νcD, μcD) = pdf_cloud_parameters(pdf, q, ρₐ, N)
-
-    # `generalized_gamma_quantile` calls `SF.gamma_inc_inv`, which returns
-    # Float64 regardless of input type; pin the bounds back to `eltype(q)` so
-    # downstream integration nodes stay in the working precision (matches the
-    # rain method above and the ice `integral_bounds` `FT(...)` conversion).
     D_min = FT(DT.generalized_gamma_quantile(νcD, μcD, λc, p))
     D_max = FT(DT.generalized_gamma_quantile(νcD, μcD, λc, 1 - p))
     return D_min, D_max
@@ -353,6 +357,8 @@ densities of cloud liquid water and rain water.
     "Rate of change of the rain water number density"
     dN_rai_dt::FT = FT(0)
 end
+LclRaiRates(dq_lcl_dt, dN_lcl_dt, dq_rai_dt, dN_rai_dt) =
+    LclRaiRates(promote(dq_lcl_dt, dN_lcl_dt, dq_rai_dt, dN_rai_dt)...)
 
 """
     autoconversion(acnv, pdf_c, q_lcl, q_rai, ρ, N_lcl)
@@ -374,8 +380,9 @@ Compute autoconversion rates
 function autoconversion(
     acnv::CMP.AcnvSB2006, pdf_c::CMP.CloudParticlePDF_SB2006, q_lcl, q_rai, ρ, N_lcl,
 )
-    FT = eltype(q_lcl)
-    if q_lcl < UT.ϵ_numerics_2M_M(FT) || N_lcl < UT.ϵ_numerics_2M_N(FT)
+    FT = UT.promote_typeof(q_lcl, q_rai, ρ, N_lcl)
+    ϵM, ϵN = UT.ϵ_numerics_2M_M(FT), UT.ϵ_numerics_2M_N(FT)
+    if q_lcl < ϵM || N_lcl < ϵN
         return LclRaiRates{FT}()
     end
 
@@ -386,7 +393,9 @@ function autoconversion(
     x_lcl = min(x_star, L_lcl / N_lcl)
     q_rai = max(0, q_rai)
     τ = 1 - q_lcl / (q_lcl + q_rai)  # Eq. (5) from SB2006
-    ϕ_au = A * τ^a * (1 - τ^a)^b
+    # τ^a has a vertical tangent at τ = 0; the ifelse keeps the ForwardDiff
+    # derivative w.r.t. q_rai finite at q_rai = 0 (and the code branch-free)
+    ϕ_au = ifelse(q_rai < ϵM, zero(τ), A * τ^a * (1 - τ^a)^b)
 
     dL_rai_dt =
         kcc / 20 / x_star * (νc + 2) * (νc + 4) / (νc + 1)^2 *
@@ -421,7 +430,7 @@ Compute accretion rate
 """
 function accretion((; accr)::CMP.SB2006, q_lcl, q_rai, ρ, N_lcl)
 
-    FT = eltype(q_lcl)
+    FT = UT.promote_typeof(q_lcl, q_rai, ρ, N_lcl)
     if q_lcl < UT.ϵ_numerics_2M_M(FT) || q_rai < UT.ϵ_numerics_2M_M(FT) || N_lcl < UT.ϵ_numerics_2M_N(FT)
         return LclRaiRates{FT}()
     end
@@ -434,7 +443,7 @@ function accretion((; accr)::CMP.SB2006, q_lcl, q_rai, ρ, N_lcl)
     ϕ_ac = (τ / (τ + τ0))^c          # Eq. (8) from SB2006
 
     dL_rai_dt = kcr * L_lcl * L_rai * ϕ_ac * sqrt(ρ0 / ρ)  # Eq. (7) from SB2006
-    dN_rai_dt = zero(N_lcl)
+    dN_rai_dt = zero(FT)
     dL_lcl_dt = -dL_rai_dt
     dN_lcl_dt = dL_lcl_dt / x_lcl
 
@@ -465,7 +474,7 @@ Compute cloud liquid self-collection rate
 function cloud_liquid_self_collection(
     acnv::CMP.AcnvSB2006, pdf_c::CMP.CloudParticlePDF_SB2006, q_lcl, ρ, dN_lcl_dt_au,
 )
-    FT = eltype(q_lcl)
+    FT = UT.promote_typeof(q_lcl, ρ, dN_lcl_dt_au)
     if q_lcl < UT.ϵ_numerics_2M_M(FT)
         return FT(0)
     end
@@ -525,7 +534,7 @@ Compute the rain self-collection rate
 function rain_self_collection(
     pdf::CMP.RainParticlePDF_SB2006, self::CMP.SelfColSB2006, q_rai, ρ, N_rai,
 )
-    FT = eltype(q_rai)
+    FT = UT.promote_typeof(q_rai, ρ, N_rai)
 
     if q_rai < UT.ϵ_numerics_2M_M(FT) || N_rai < UT.ϵ_numerics_2M_N(FT)
         return FT(0)
@@ -561,7 +570,7 @@ Compute the raindrops number density tendency due to breakup of raindrops
 function rain_breakup(
     pdf::CMP.RainParticlePDF_SB2006, brek::CMP.BreakupSB2006, q_rai, ρ, N_rai, dN_rai_dt_sc,
 )
-    FT = eltype(q_rai)
+    FT = UT.promote_typeof(q_rai, ρ, N_rai, dN_rai_dt_sc)
 
     if q_rai < UT.ϵ_numerics_2M_M(FT) || N_rai < UT.ϵ_numerics_2M_N(FT)
         return FT(0)
@@ -632,7 +641,7 @@ function cloud_terminal_velocity(
     (; ρw, grav, ν_air)::CMP.StokesRegimeVelType,
     q_liq, ρₐ, N_liq,
 )
-    FT = eltype(q_liq)
+    FT = UT.promote_typeof(q_liq, ρₐ, N_liq)
 
     if N_liq < UT.ϵ_numerics_2M_N(FT) || q_liq < UT.ϵ_numerics_2M_M(FT)
         return (FT(0), FT(0))
@@ -675,7 +684,7 @@ Fall velocity of individual rain drops is parameterized:
 function rain_terminal_velocity(
     (; pdf_r)::CMP.SB2006, (; ρ0, aR, bR, cR)::CMP.SB2006VelType, q_rai, ρ, N_rai,
 )
-    FT = eltype(q_rai)
+    FT = UT.promote_typeof(q_rai, ρ, N_rai)
     # TODO: Input argument list needs to be redesigned
 
     (; Dr_mean) = pdf_rain_parameters(pdf_r, q_rai, ρ, N_rai)
@@ -693,7 +702,7 @@ end
 function rain_terminal_velocity(
     (; pdf_r)::CMP.SB2006, vel::CMP.Chen2022VelTypeRain, q_rai, ρ, N_rai,
 )
-    FT = eltype(q_rai)
+    FT = UT.promote_typeof(q_rai, ρ, N_rai)
     # coefficients from Table B1 from Chen et. al. 2022
     aiu, bi, ciu = CO.Chen2022_vel_coeffs(vel, ρ)
     # size distribution parameter
@@ -734,7 +743,8 @@ end
 
 Returns the approximation of an incomplete gamma function for a ∈ {-1.0, -0.101}, and x in [0.067 1.82]
 """
-function Γ_incl(a::FT, x::FT) where {FT}
+function Γ_incl(a, x)
+    FT = UT.promote_typeof(a, x)
     #return exp(-x) / ((FT(1.5) - FT(0.54) * a) * x^(FT(0.46) - FT(0.75) * a))
     return exp(-x) / (
         (FT(0.33) - FT(0.7) * a) * x^(FT(0.08) - FT(0.93) * a) +
@@ -771,7 +781,9 @@ function rain_evaporation(
     (; pdf_r, evap)::CMP.SB2006, aps::CMP.AirProperties, tps::TDI.PS,
     q_tot, q_lcl, q_icl, q_rai, q_sno, ρ, N_rai, T,
 )
-    FT = eltype(q_tot)
+    # the early return below must match the main path's type for any mix of
+    # plain-float and Dual arguments
+    FT = UT.promote_typeof(q_tot, q_lcl, q_icl, q_rai, q_sno, ρ, N_rai, T)
     ϵₘ = UT.ϵ_numerics_2M_M(FT)
     ϵₙ = UT.ϵ_numerics_2M_N(FT)
 
@@ -879,7 +891,8 @@ The method is based on Horn (2012, DOI: [10.5194/gmd-5-345-2012](https://doi.org
 """
 function number_decrease_for_mass_limit((; τ)::CMP.NumberAdjustmentHorn2012, x_min, q, ρ, N)
     # Avoid NaN when both q and x_min are 0
-    N_max = iszero(x_min) ? oftype(q, Inf) : ρ * q / x_min
+    FT = UT.promote_typeof(q, ρ, N)
+    N_max = iszero(x_min) ? FT(Inf) : FT(ρ * q / x_min)
     return min(0, N_max - N) / τ
 end
 
@@ -915,8 +928,9 @@ function number_tendency_from_mass_limits((; x_min, x_max, τ), q, n)
     # When q == 0, the target n is zero (no mass -> no particles).
     # Otherwise, n_target is bounded between q / x_max and q / x_min.
     # This also naturally handles x_min == 0 (where q / x_min yields Inf).
-    ϵₘ = UT.ϵ_numerics_2M_M(typeof(q))
-    n_target = ifelse(q < ϵₘ, zero(n), clamp(n, q / x_max, q / x_min))
+    FT = UT.promote_typeof(q, n)
+    ϵₘ = UT.ϵ_numerics_2M_M(FT)
+    n_target = ifelse(q < ϵₘ, zero(FT), clamp(n, q / x_max, q / x_min))
     return (n_target - n) / τ
 end
 
@@ -979,7 +993,7 @@ function conv_q_lcl_to_q_rai(
     (; ρ_w, R_6C_0, E_0, k)::CMP.LD2004, q_lcl, ρ, N_d, smooth_transition = false,
 )
     if q_lcl <= UT.ϵ_numerics_2M_M(eltype(q_lcl))
-        return zero(q_lcl)
+        return zero(UT.promote_typeof(q_lcl, ρ, N_d))
     else
         # Mean volume radius in microns (assuming spherical cloud droplets)
         r_vol = cbrt(3 * q_lcl * ρ / 4 / π / ρ_w / N_d) * 1_000_000
diff --git a/src/MicrophysicsNonEq.jl b/src/MicrophysicsNonEq.jl
index 22ce29d63..e56c0342b 100644
--- a/src/MicrophysicsNonEq.jl
+++ b/src/MicrophysicsNonEq.jl
@@ -33,7 +33,7 @@ See DOI: 10.5194/acp-23-10883-2023
     (; ρᵢ)::CMP.CloudIce, (; D_vapor)::CMP.AirProperties,
     ip::CMP.Frostenberg2023, q_icl, T,
 )
-    FT = eltype(ρᵢ)
+    FT = UT.promote_typeof(q_icl, T, ρᵢ)
     # Get the estimated number of INPs
     N_icl = exp(IN.INP_concentration_mean(ip, T))
 
diff --git a/src/P3_particle_properties.jl b/src/P3_particle_properties.jl
index 3ae12f8c8..bcaf87ab8 100644
--- a/src/P3_particle_properties.jl
+++ b/src/P3_particle_properties.jl
@@ -10,7 +10,7 @@ This struct bundles the P3 parameterizations `params`, the provided rime state
 
 # Construction
 
-  - [`state_from_prognostic`](@ref): Main entry point. 
+  - [`state_from_prognostic`](@ref): Main entry point.
     Accepts the volumetric prognostic variables `(ρq_ice, ρn_ice, ρq_rim, ρb_rim)`,
     regularises them into `(F_rim, ρ_rim)`, and returns the constructed state.
 
@@ -41,14 +41,18 @@ struct P3State{FT, PARAMS <: CMP.ParametersP3}
 end
 
 function P3State(params::CMP.ParametersP3, ρq_ice, ρn_ice, F_rim, ρ_rim)
-    FT = eltype(ρq_ice)
+    FT = UT.promote_typeof(ρq_ice, ρn_ice, F_rim, ρ_rim)
     (; mass, ρ_i) = params
     ρ_d = get_ρ_d(mass, F_rim, ρ_rim)
     ρ_g = get_ρ_g(F_rim, ρ_rim, ρ_d)
     D_th = get_D_th(mass, ρ_i)
     D_gr = ifelse(iszero(F_rim), FT(Inf), get_D_gr(mass, ρ_g))
     D_cr = ifelse(iszero(F_rim), FT(Inf), get_D_cr(mass, F_rim, ρ_g))
-    return P3State(params, ρq_ice, ρn_ice, F_rim, ρ_rim, ρ_g, D_th, D_gr, D_cr)
+    return P3State(
+        params,
+        FT(ρq_ice), FT(ρn_ice), FT(F_rim), FT(ρ_rim),
+        FT(ρ_g), FT(D_th), FT(D_gr), FT(D_cr),
+    )
 end
 
 Base.show(io::IO, mime::MIME"text/plain", x::P3State) =
@@ -111,6 +115,43 @@ Return `true` if the particle is unrimed, i.e. `F_rim = 0`.
 """
 isunrimed(state::P3State) = iszero(state.F_rim)
 
+@inline exprel1(x) = expm1(x) / x            # exprel₁ = (exp(x)-1)/x
+@inline _exprel2(x) = (expm1(x) - x) / (x * x)
+@inline _exprel2_small(x) =
+    evalpoly(x, ntuple(i -> inv(oftype(x, UT.fac(i + 1))), Val(8)))
+@inline function exprel2(x)                  # exprel₂ = (exp(x)-1-x)/x²
+    abs(x) < oftype(x, 1 / 5) && return _exprel2_small(x)
+    return _exprel2(x)
+end
+
+"""
+    exprel(x, ::Val{k})
+
+Compute the relative exponential `exprelₖ(x) = Σₙ xⁿ/(n+k)!`.
+
+# Arguments
+- `x`: real argument.
+- `k`: order of the function, passed as `Val(k)`.
+
+# Details
+
+`exprelₖ` is one of the `φ`-functions `φₖ(x)` that appear in exponential integrators.
+It is implemented for `k = 1` (`(eˣ-1)/x`) and `k = 2` (`(eˣ-1-x)/x²`); other values of `k`
+throw an `ArgumentError`. For `k = 2` at small `|x|`, a Taylor series is used to avoid
+catastrophic loss of precision near `x = 0`.
+
+The value `k` is passed as `Val(k)`, so the order resolves at compile time.
+
+# References
+- [Exponential integrators](https://en.wikipedia.org/wiki/Exponential_integrator)
+- [Niesen & Wright (2009), A Krylov subspace algorithm for evaluating the φ-functions appearing in exponential integrators](https://arxiv.org/abs/0907.4631)
+"""
+@inline function exprel(x, ::Val{k}) where {k}
+    k == 1 && return exprel1(x)
+    k == 2 && return exprel2(x)
+    throw(ArgumentError("exprel is only implemented for k = 1 and k = 2"))
+end
+
 """
     get_ρ_d(mass::MassPowerLaw, F_rim, ρ_rim)
     get_ρ_d(state::P3State)
@@ -119,6 +160,9 @@ Exact solution for the density of the unrimed portion of the particle as
     function of the rime mass fraction `F_rim`, mass power law parameters `mass`,
     and rime density `ρ_rim`.
 
+For the derivation of the numerically stable form used here, see the P3 scheme
+documentation ([Assumed particle size relationships](@ref)).
+
 # Arguments
 - `mass`: [`CMP.MassPowerLaw`](@ref) parameters
 - `F_rim`: rime mass fraction
@@ -126,12 +170,32 @@ Exact solution for the density of the unrimed portion of the particle as
 
 # Returns
 - `ρ_d`: density of the unrimed portion of the particle [kg/m³]
+
+# Examples
+
+```jldoctest
+julia> import CloudMicrophysics.Parameters as CMP,
+              ClimaParams as CP,
+              CloudMicrophysics.P3Scheme as P3
+
+julia> FT = Float64;
+
+julia> mass = CMP.MassPowerLaw(CP.create_toml_dict(FT));
+
+julia> F_rim, ρ_rim = FT(0.5), FT(916.7);
+
+julia> ρ_d = P3.get_ρ_d(mass, F_rim, ρ_rim)
+488.9120789986414
+```
 """
 function get_ρ_d((; β_va)::CMP.MassPowerLaw, F_rim, ρ_rim)
-    k = (1 - F_rim)^(-1 / (3 - β_va))
-    num = ρ_rim * F_rim
-    den = (β_va - 2) * (k - 1) / ((1 - F_rim) * k - 1) - (1 - F_rim)
-    return num / den
+    p = 1 / (3 - β_va)
+    logFᵤ = log1p(-F_rim)            # = log(1 - F_rim)
+    φ₁ = exprel(logFᵤ, Val(1))
+    φ₁₋ₚ = exprel((1 - p) * logFᵤ, Val(1))
+    H = -p * exprel(-p * logFᵤ, Val(2)) - (1 - p) * exprel((1 - p) * logFᵤ, Val(2))
+    G = H - φ₁₋ₚ * φ₁
+    return -(ρ_rim * φ₁ * φ₁₋ₚ) / G
 end
 get_ρ_d((; params, F_rim, ρ_rim)::P3State) = get_ρ_d(params.mass, F_rim, ρ_rim)
 
@@ -253,22 +317,22 @@ Return the coefficients for the ice mass power law at diameter `D`.
  - `(a, b)`: coefficients for the ice mass power law, `a D^b`
 """
 function ice_mass_coeffs(state::P3State, D)
+    FT = promote_type(eltype(state), eltype(D))
     (; params, F_rim, ρ_g, D_th, D_gr, D_cr) = state
-    FT = eltype(D)
     (; ρ_i) = params
     (; α_va, β_va) = params.mass
-
-    return if D < D_th       # small spherical ice
-        (ρ_i * π / 6, FT(3))
+    a, b = if D < D_th       # small spherical ice
+        (ρ_i * π / 6, 3)
     elseif iszero(F_rim)     # large nonspherical unrimed ice
         (α_va, β_va)
     elseif D_th ≤ D < D_gr   # dense nonspherical rimed ice
         (α_va, β_va)
     elseif D_gr ≤ D < D_cr   # graupel (rimed)
-        (ρ_g * π / 6, FT(3))
+        (ρ_g * π / 6, 3)
     else # D_cr ≤ D          # partially rimed ice
         (α_va / (1 - F_rim), β_va)
     end
+    return FT(a), FT(b)
 end
 
 """
@@ -372,5 +436,5 @@ function ϕᵢ(state::P3State, D)
     ϕ_ob = min(1, 3 * sqrt(FT(π)) * mᵢ / (4 * ρᵢ * aᵢ^FT(1.5))) # κ =  1/3
     #ϕ_pr = max(1, 16 * ρᵢ^2 * aᵢ^3 / (9 * FT(π) * mᵢ^2))       # κ = -1/6
 
-    return ifelse(D == 0, FT(0), ϕ_ob)
+    return ifelse(D == 0, zero(ϕ_ob), ϕ_ob)
 end
diff --git a/src/P3_processes.jl b/src/P3_processes.jl
index 7f943522a..865dbf597 100644
--- a/src/P3_processes.jl
+++ b/src/P3_processes.jl
@@ -208,8 +208,11 @@ function compute_max_freeze_rate(aps, tps, velocity_params, ρₐ, Tₐ, state)
     # `f_frz = 1`.
     denom = L_f - cp_l * ΔT
     function max_freeze_rate(Dᵢ)
-        Tₐ ≥ T_frz && return zero(Dᵢ)  # No collisional freezing above the freezing temperature
-        denom > 0 || return floatmax(typeof(Dᵢ))
+        # fallback values typed by the promotion of the node and the captured state
+        # (mixed plain/Dual under differentiation)
+        FT = UT.promote_typeof(Dᵢ, ΔT, Δρᵥ_sat, denom)
+        Tₐ ≥ T_frz && return zero(FT)  # No collisional freezing above the freezing temperature
+        denom > 0 || return floatmax(FT)
         return 2 * (π * Dᵢ) * F_v(Dᵢ) * (K_therm * ΔT + Lᵥ * D_vapor * Δρᵥ_sat) / denom
     end
     return max_freeze_rate
@@ -374,7 +377,7 @@ function get_liquid_integrals_rain_closed(
     psd_r::CMP.RainParticlePDF_SB2006, vel::CMP.Chen2022VelType,
     n_r, ρₐ, L_r, N_r, state, ∂ₜV, m_liq, ρ′_rim, bounds_r; quad,
 )
-    FT = eltype(state)
+    FT = promote_type(eltype(state), UT.promote_typeof(ρₐ, L_r, N_r))
     ρw = psd_r.ρw
     (; N₀r, Dr_mean) = CM2.pdf_rain_parameters(psd_r, L_r / ρₐ, ρₐ, N_r)
     ai, bi, ci = CO.Chen2022_vel_coeffs(vel.rain, ρₐ)
@@ -586,7 +589,7 @@ function bulk_liquid_ice_collision_sources(
     psd_c, psd_r, L_c, N_c, L_r, N_r,
     aps, tps, vel, ρₐ, T; quad = ChebyshevGauss(100),
 )
-    FT = eltype(state)
+    FT = promote_type(eltype(state), UT.promote_typeof(L_c, N_c, L_r, N_r, ρₐ, T))
     (; τ_wet, ρ_i) = state.params
     D_shd = FT(1e-3) # 1mm  # TODO: Externalize this parameter
 
diff --git a/src/P3_size_distribution.jl b/src/P3_size_distribution.jl
index f74da48d0..f5751646f 100644
--- a/src/P3_size_distribution.jl
+++ b/src/P3_size_distribution.jl
@@ -72,7 +72,7 @@ Compute `log(Iᵏ)` where `Iᵏ` is the following integral:
 See also [`gamma_inc_moment`](@ref)
 """
 function loggamma_inc_moment(D₁, D₂, μ, logλ, k = 0, scale = 1)
-    FT = eltype(logλ)
+    FT = float(UT.promote_typeof(D₁, D₂, μ, logλ))
     D₁ < D₂ || return log(FT(0))  # return log(0) if D₁ ≥ D₂
     z = k + μ + 1
     # `λ⋅D ≡ xexpy(D, logλ) ≡ D * exp(logλ)` (numerically stable)
@@ -274,7 +274,7 @@ The P3 variables `F_rim` and `ρ_rim` are computed in a regularised way
 function get_distribution_logλ_from_prognostic(
     params, ρq_ice, ρn_ice, ρq_rim, ρb_rim, args...,
 )
-    state = get_state_from_prognostic(params, ρq_ice, ρn_ice, ρq_rim, ρb_rim)
+    state = state_from_prognostic(params, ρq_ice, ρn_ice, ρq_rim, ρb_rim)
     return get_distribution_logλ(state, args...)
 end
 
diff --git a/src/P3_terminal_velocity.jl b/src/P3_terminal_velocity.jl
index 9e120d6c6..7d058192e 100644
--- a/src/P3_terminal_velocity.jl
+++ b/src/P3_terminal_velocity.jl
@@ -57,7 +57,7 @@ function ice_terminal_velocity_number_weighted(
     (; ρn_ice, ρq_ice) = state
     # TODO - do we want to swicth to ϵ_numerics(FT)
     if ρn_ice < eps(one(ρn_ice)) || ρq_ice < eps(one(ρq_ice))
-        return zero(ρn_ice)
+        return zero(promote_type(eltype(state), UT.promote_typeof(ρₐ, logλ)))
     end
 
     v_term = ice_particle_terminal_velocity(velocity_params, ρₐ, state; use_aspect_ratio)
@@ -96,7 +96,7 @@ function ice_terminal_velocity_mass_weighted(
     (; ρn_ice, ρq_ice) = state
     # TODO - do we want to swicth to ϵ_numerics(FT)
     if ρn_ice < eps(one(ρn_ice)) || ρq_ice < eps(one(ρq_ice))
-        return zero(ρq_ice)
+        return zero(promote_type(eltype(state), UT.promote_typeof(ρₐ, logλ)))
     end
 
     v_term = ice_particle_terminal_velocity(velocity_params, ρₐ, state; use_aspect_ratio)
diff --git a/src/Utilities.jl b/src/Utilities.jl
index 62dc60f4a..932af57dd 100644
--- a/src/Utilities.jl
+++ b/src/Utilities.jl
@@ -9,6 +9,21 @@ module Utilities
 import UnrolledUtilities as UU
 
 export clamp_to_nonneg, ϵ_numerics, ϵ_numerics_2M_M, ϵ_numerics_2M_N, ϵ_numerics_P3_B
+export promote_typeof
+export fac
+
+"""
+    promote_typeof(args...)
+
+The common promoted type of the arguments' types.
+
+Use it to type early returns and fallback values from all the arguments the
+main-path result derives from. Typing them from a single argument
+(`FT = eltype(q_tot)`-style) makes the function's return a union when a
+caller mixes plain floats with `ForwardDiff.Dual`s (or float widths) across
+arguments — non-concrete, heap-boxed, and silent.
+"""
+@inline promote_typeof(args...) = Base.promote_typeof(args...)
 export unrolled_logsumexp
 export sgs_weight_function, rime_mass_fraction, rime_density
 
@@ -26,6 +41,13 @@ Compatible with dual numbers (AD) and GPUs.
 """
 @inline clamp_to_nonneg(x) = max(zero(x), x)
 
+"""
+    fac(n)
+
+Integer factorial `n!`, valid for `0 ≤ n ≤ 20`.
+"""
+@inline fac(n) = prod(1:n; init = one(n))
+
 """
     ϵ_numerics(FT)
 
@@ -164,6 +186,10 @@ Mirrors the `sgs_weight_function` in `ClimaAtmos.jl/src/utils/variable_manipulat
         zero(a)
     elseif a > min(1, 42 * a_half)   # autodiff generates NaNs when a is large
         one(a)
+    elseif 4 * a < eps(typeof(a))
+        # 1 - a rounds to 1, making atanh(-1) = -Inf: the value is 0 either
+        # way, but autodiff generates NaNs (mirrors the upper guard)
+        zero(a)
     else
         (1 + tanh(2 * atanh(1 - 2 * (1 - a)^(-1 / log2(1 - a_half))))) / 2
     end
@@ -185,10 +211,10 @@ zero when `denominator` is below machine precision.
     ϵ = eps(typeof(denominator))^2,
 )
     weight = sgs_weight_function(denominator, half)
-    return ifelse(
-        denominator < ϵ, zero(numerator),
-        weight * numerator / denominator,
-    )
+    # zero of the promoted type: a single-argument zero makes the return a
+    # union when numerator and denominator mix plain floats with Duals
+    z = zero(promote_typeof(numerator, denominator))
+    return ifelse(denominator < ϵ, z, weight * numerator / denominator)
 end
 
 """
diff --git a/test/ad_compat_tests.jl b/test/ad_compat_tests.jl
new file mode 100644
index 000000000..69363bbb0
--- /dev/null
+++ b/test/ad_compat_tests.jl
@@ -0,0 +1,212 @@
+using Test
+
+import ClimaParams as CP
+import CloudMicrophysics as CM
+import CloudMicrophysics.Parameters as CMP
+import CloudMicrophysics.BulkMicrophysicsTendencies as BMT
+import CloudMicrophysics.P3Scheme as P3
+import CloudMicrophysics.DistributionTools as DT
+import CloudMicrophysics.Microphysics2M as CM2
+import CloudMicrophysics.ThermodynamicsInterface as TDI
+import ForwardDiff as FD
+import SpecialFunctions as SF
+
+# ForwardDiff compatibility of the pointwise 2M+P3 path: `bulk_microphysics_tendencies`
+# must be differentiable w.r.t. the 8 prognostic species (with `logλ` held fixed,
+# matching the substepping semantics). Differentiating *through* the `logλ` shape
+# solve is out of scope here — it additionally requires a `∂/∂a` rule for the
+# forward `SF.gamma_inc`.
+
+function test_ad_compatibility(FT)
+    tps = TDI.TD.Parameters.ThermodynamicsParameters(FT)
+    mp = CMP.Microphysics2MParams(FT; with_ice = true, is_limited = true)
+    p3 = mp.ice.scheme
+    D(v, ∂) = FD.Dual{:ad_test}(FT(v), FT(∂))
+
+    @testset "P3State construction with Dual prognostics ($FT)" begin
+        st = P3.state_from_prognostic(p3, FT(1e-4), FT(1e4), FT(2e-5), FT(4e-8))
+        std = P3.state_from_prognostic(p3, D(1e-4, 1), D(1e4, 0), D(2e-5, 0), D(4e-8, 0))
+        @test eltype(std) <: FD.Dual
+        # primal values are unchanged by differentiation
+        @test FD.value(std.ρ_g) == st.ρ_g
+        @test FD.value(std.D_gr) == st.D_gr
+        @test FD.value(std.D_cr) == st.D_cr
+        @test FD.value(std.D_th) == st.D_th
+        # params-only field is a true constant; rime-derived fields carry sensitivity
+        @test iszero(FD.partials(std.D_th))
+        @test !iszero(FD.partials(std.ρ_g))
+        @test !iszero(FD.partials(std.D_cr))
+        # partial seeding (derivative w.r.t. a single prognostic) also constructs
+        st1 = P3.state_from_prognostic(p3, D(1e-4, 1), FT(1e4), FT(2e-5), FT(4e-8))
+        @test eltype(st1) <: FD.Dual
+        # the `F_rim = 0` branch stays intact under Duals
+        st0 = P3.state_from_prognostic(p3, D(1e-4, 1), D(1e4, 0), D(0, 0), D(0, 0))
+        @test FD.value(st0.D_gr) == FT(Inf) && FD.value(st0.D_cr) == FT(Inf)
+    end
+
+    @testset "regularised ratios stay NaN-free across tiny denominators ($FT)" begin
+        # below ~eps(FT)/4 the sgs_weight_function sigmoid hits atanh(-1):
+        # the weight is 0 either way, but the partials were NaN. Sweep
+        # denominators across that band for both regularised ratios.
+        for denom in (eps(FT)^2, eps(FT) / 8, eps(FT), sqrt(eps(FT)), FT(1e-9))
+            std = P3.state_from_prognostic(p3, D(denom, 1), D(10, 0), D(denom / 10, 1), D(denom / 10, 1))
+            # the regularised ratios must always be differentiable
+            @test all(isfinite, FD.partials(std.F_rim))
+            @test all(isfinite, FD.partials(std.ρ_rim))
+            # the cached thresholds are NaN/Inf markers in degenerate rime
+            # regimes (gated downstream); they need finite partials only where
+            # their value is finite
+            for field in (std.ρ_g, std.D_gr, std.D_cr)
+                isfinite(FD.value(field)) && @test all(isfinite, FD.partials(field))
+            end
+        end
+    end
+
+    @testset "Γ_incl accepts mixed argument types ($FT)" begin
+        # the rain-evaporation path calls Γ_incl(params_float, dual): Microphysics2M ~:808
+        g = CM2.Γ_incl(FT(-0.25), D(0.5, 1))
+        @test g isa FD.Dual
+        @test FD.value(g) ≈ CM2.Γ_incl(FT(-0.25), FT(0.5))
+        @test CM2.Γ_incl(FT(-0.25), FT(0.5)) isa FT
+    end
+
+    @testset "rain_evaporation is concretely typed for mixed arguments ($FT)" begin
+        # the early return (no rain / supersaturated) must have the same type
+        # as the main path when species are Duals over a plain-float q_tot —
+        # a union here heap-boxes every call in the Jacobian hot loop
+        sb = mp.warm_rain.seifert_beheng
+        aps = mp.warm_rain.air_properties
+        z = D(0, 0)
+        # subsaturated (main path) and supersaturated (early return) states
+        for (q_tot, T) in ((FT(0.005), FT(288)), (FT(0.02), FT(288)))
+            t = @inferred CM2.rain_evaporation(
+                sb, aps, tps, q_tot, D(2e-4, 1), z, D(1e-4, 1), z, FT(1.05), D(4e4, 1), T,
+            )
+            @test all(v -> v isa FD.Dual, values(t))
+        end
+    end
+
+    @testset "BMT 2M+P3 Jacobian w.r.t. the 8 species ($FT)" begin
+        function rhs(x, ρ, T, q_tot, logλ)
+            t = BMT.bulk_microphysics_tendencies(
+                BMT.Microphysics2Moment(), mp, tps, ρ, T, q_tot,
+                x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], logλ)
+            return [t.dq_lcl_dt, t.dn_lcl_dt, t.dq_rai_dt, t.dn_rai_dt,
+                t.dq_ice_dt, t.dn_ice_dt, t.dq_rim_dt, t.db_rim_dt]
+        end
+        function consistent_logλ(ρ, x)
+            st = P3.state_from_prognostic(p3, ρ * x[5], ρ * x[6], ρ * x[7], ρ * x[8])
+            return P3.get_distribution_logλ(st)
+        end
+        # x = [q_lcl, n_lcl, q_rai, n_rai, q_ice, n_ice, q_rim, b_rim]; interior
+        # states (all species nonzero where the regime is active)
+        regimes = (
+            (; name = "warm rain", ρ = FT(1.05), T = FT(288), q_tot = FT(0.015),
+                x = FT[4e-4, 8e7, 2.1e-3, 5e4, 0, 0, 0, 0], logλ = FT(-Inf)),
+            (; name = "mixed phase", ρ = FT(0.78), T = FT(273.5), q_tot = FT(0.009),
+                x = FT[2e-4, 5e7, 1e-4, 4e4, 1e-4, 2e5, 4e-5, 6e-8], logλ = nothing),
+            (; name = "ice heavy", ρ = FT(0.45), T = FT(233), q_tot = FT(0.003),
+                x = FT[1e-6, 1e6, 1e-12, 1e-2, 8e-4, 5e5, 5e-4, 9e-7], logλ = nothing),
+            # sub-threshold ice with b_rim in the regularised-ratio band that
+            # previously produced NaN partials via sgs_weight_function
+            (; name = "cloud edge", ρ = FT(0.7), T = FT(263), q_tot = FT(0.005),
+                x = FT[1e-5, 1e7, 1e-6, 1e3, 3e-8, 30, 1e-8, 2.5e-11], logλ = nothing),
+        )
+        for r in regimes
+            logλ = isnothing(r.logλ) ? consistent_logλ(r.ρ, r.x) : r.logλ
+            f = x -> rhs(x, r.ρ, r.T, r.q_tot, logλ)
+            v₀ = f(r.x)
+            J = FD.jacobian(f, r.x)
+            @test all(isfinite, J)
+            @test f(r.x) == v₀  # differentiation does not perturb the primal
+        end
+
+        # Jacobian against central finite differences (Float64 only — FD
+        # truncation in Float32 is not meaningful at these magnitudes)
+        if FT == Float64
+            r = regimes[2]
+            logλ = consistent_logλ(r.ρ, r.x)
+            f = x -> rhs(x, r.ρ, r.T, r.q_tot, logλ)
+            J = FD.jacobian(f, r.x)
+            J_fd = similar(J)
+            for j in 1:8
+                h = 1e-6 * r.x[j]
+                xp = copy(r.x);
+                xp[j] += h
+                xm = copy(r.x);
+                xm[j] -= h
+                J_fd[:, j] = (f(xp) - f(xm)) / 2h
+            end
+            # per-row scales: number rows dwarf mass rows by ~10 orders of
+            # magnitude, so a single global scale would leave the mass rows
+            # unconstrained
+            for i in 1:8
+                scale = max(maximum(abs, J[i, :]), maximum(abs, J_fd[i, :]))
+                iszero(scale) && continue
+                @test maximum(abs, J[i, :] - J_fd[i, :]) / scale < 1e-5
+            end
+        end
+
+        # Boundary: SB2006 autoconversion Φ_au(τ) ∝ τ^0.7 has a vertical
+        # tangent at exactly zero rain with cloud present; the ϵ-gate in
+        # `autoconversion` keeps the Jacobian finite there
+        x_boundary = FT[1e-6, 1e6, 0, 0, 8e-4, 5e5, 5e-4, 9e-7]
+        logλ_b = consistent_logλ(FT(0.45), x_boundary)
+        f_b = x -> rhs(x, FT(0.45), FT(233), FT(0.003), logλ_b)
+        @test all(isfinite, f_b(x_boundary))
+        @test all(isfinite, FD.jacobian(f_b, x_boundary))
+    end
+end
+
+@testset "gamma_inc_inv Dual rule vs finite differences" begin
+    # SF kernels are Float64-only internally; validate the rule in Float64
+    D64(v, ∂) = FD.Dual{:ad_test}(Float64(v), Float64(∂))
+    for (a, p) in ((0.5, 0.1), (1.3, 0.1), (2.5, 1e-5), (2.5, 0.5), (7.0, 1 - 1e-5), (25.0, 0.9))
+        x = SF.gamma_inc_inv(a, p, 1 - p)
+        # p-slot (callers pass (Y, 1-Y), so q carries the opposite partial)
+        g = DT.gamma_inc_inv(D64(a, 0), D64(p, 1), D64(1 - p, -1))
+        h = p * (1 - p) * 1e-4
+        ∂p_fd = (SF.gamma_inc_inv(a, p + h, 1 - p - h) - SF.gamma_inc_inv(a, p - h, 1 - p + h)) / 2h
+        @test FD.value(g) == x  # primal exactly the SF result
+        @test FD.partials(g)[1] ≈ ∂p_fd rtol = 1e-5
+        # a-slot
+        ga = DT.gamma_inc_inv(D64(a, 1), D64(p, 0), D64(1 - p, 0))
+        ha = 1e-5 * a
+        ∂a_fd = (SF.gamma_inc_inv(a + ha, p, 1 - p) - SF.gamma_inc_inv(a - ha, p, 1 - p)) / 2ha
+        @test FD.partials(ga)[1] ≈ ∂a_fd rtol = 1e-4
+        # q-slot symmetry: seeding only q composes to the negative p-derivative
+        gq = DT.gamma_inc_inv(D64(a, 0), D64(p, 0), D64(1 - p, 1))
+        @test FD.partials(gq)[1] ≈ -FD.partials(g)[1] / 2 atol = abs(FD.partials(g)[1]) * 1e-10
+        # quantile through the public wrapper stays consistent
+        @test DT.gamma_inc_inv(a, p, 1 - p) == x
+    end
+
+    # a-slot deep in the upper tail: P saturates at the resolution of one(x),
+    # so the rule must difference Q there (the repo's upper integration bounds
+    # use q as small as eps())
+    for q in (1e-8, 1e-13, eps())
+        a = 3.0
+        ga = DT.gamma_inc_inv(D64(a, 1), D64(1 - q, 0), D64(q, 0))
+        ha = 1e-5 * a
+        ∂a_fd = (SF.gamma_inc_inv(a + ha, 1 - q, q) - SF.gamma_inc_inv(a - ha, 1 - q, q)) / 2ha
+        @test FD.partials(ga)[1] ≈ ∂a_fd rtol = 1e-4
+    end
+
+    # mixed Dual/plain arguments promote inside the public wrapper
+    gm = DT.gamma_inc_inv(2.5, D64(0.5, 1), D64(0.5, -1))
+    @test gm isa FD.Dual
+    @test FD.value(gm) == SF.gamma_inc_inv(2.5, 0.5, 0.5)
+end
+
+@testset "get_distribution_logλ_from_prognostic" begin
+    for FT in (Float32, Float64)
+        p3 = CMP.Microphysics2MParams(FT; with_ice = true, is_limited = true).ice.scheme
+        logλ = P3.get_distribution_logλ_from_prognostic(p3, FT(1e-4), FT(1e4), FT(2e-5), FT(4e-8))
+        st = P3.state_from_prognostic(p3, FT(1e-4), FT(1e4), FT(2e-5), FT(4e-8))
+        @test isfinite(logλ)
+        @test logλ == P3.get_distribution_logλ(st)
+    end
+end
+
+test_ad_compatibility(Float64)
+test_ad_compatibility(Float32)
diff --git a/test/gpu_clima_core_test.jl b/test/gpu_clima_core_test.jl
index 9ccf61c55..1c026eb38 100644
--- a/test/gpu_clima_core_test.jl
+++ b/test/gpu_clima_core_test.jl
@@ -115,10 +115,10 @@ get_rcemipii_center_space(::Type{FT}) where {FT} = CC.Spaces.CenterExtrudedFinit
 
 get_p3_fields(::Type{FT}, space) where {FT} = (;
     params = CMP.ParametersP3(FT),
-    L_ice = ones(space) .* FT(1e-4),   # [kg/m³] ice mass content
-    N_ice = ones(space) .* FT(1e4),    # [1/m³]  ice number concentration
-    F_rim = zeros(space),              # [-]     rime mass fraction (no rime)
-    ρ_rim = zeros(space),              # [kg/m³] rime density      (no rime)
+    ρq_ice = ones(space) .* FT(1e-4),   # [kg/m³] ice mass content,
+    ρn_ice = ones(space) .* FT(1e4),    # [1/m³]  ice number concentration,
+    ρq_rim = zeros(space),              # [kg/m³] rime mass content (no rime)
+    ρb_rim = zeros(space),              # [-]  rime volume (no rime)
     logλ = zeros(space),               # cache field for the result
 )
 
@@ -132,10 +132,8 @@ p3_logλ_1d(::Type{FT}) where {FT} = p3_logλ(FT, make_column(FT))
 p3_logλ_3d(::Type{FT}) where {FT} = p3_logλ(FT, get_rcemipii_center_space(FT))
 
 function p3_logλ(::Type{FT}, space) where {FT}
-    (; params, L_ice, N_ice, F_rim, ρ_rim, logλ) = get_p3_fields(FT, space)
-    @. logλ = P3.get_distribution_logλ(
-        P3.P3State(params, L_ice, N_ice, F_rim, ρ_rim),
-    )
+    (; params, ρq_ice, ρn_ice, ρq_rim, ρb_rim, logλ) = get_p3_fields(FT, space)
+    @. logλ = P3.get_distribution_logλ_from_prognostic(params, ρq_ice, ρn_ice, ρq_rim, ρb_rim)
     return logλ
 end
 
diff --git a/test/gpu_tests.jl b/test/gpu_tests.jl
index f5b54e2db..9bab493d4 100644
--- a/test/gpu_tests.jl
+++ b/test/gpu_tests.jl
@@ -444,6 +444,13 @@ end
     output[i] = P3.ice_self_collection(state, logλ, vel_params, ρₐ[i])
 end
 
+@kernel inbounds = true function test_rain_freezing_kernel!(
+    rf, pdf_r, tps, output, q, ρ, N, T,
+)
+    i = @index(Global, Linear)
+    output[i] = CMI_het.liquid_freezing_rate(rf, pdf_r, tps, q[i], ρ[i], N[i], T[i])
+end
+
 """
     setup_output(dims, FT)
 Helper function for GPU tests. Allocates an array of type `FT` with dimensions
@@ -1043,6 +1050,27 @@ function test_gpu(FT)
             # test homogeneous_J_linear is callable and returns a reasonable value
             TT.@test J_linear ≈ FT(7.156568123338207e11)
         end
+
+        TT.@testset "CMI_het.liquid_freezing_rate (rain, Bigg)" begin
+            DT = @NamedTuple{∂ₜn_frz::FT, ∂ₜq_frz::FT}
+            (; output, ndrange) = setup_output(1, DT)
+
+            rf = CMP.RainFreezing(CP.create_toml_dict(FT))
+            pdf_r = CMP.RainParticlePDF_SB2006_limited(CP.create_toml_dict(FT))
+            T_freeze = TDI.TD.Parameters.T_freeze(tps)
+
+            q = ArrayType([FT(1e-4)])
+            ρ = ArrayType([FT(1)])
+            N = ArrayType([FT(1e3)])
+            T = ArrayType([T_freeze - FT(20)])
+
+            kernel! = test_rain_freezing_kernel!(backend, work_groups)
+            kernel!(rf, pdf_r, tps, output, q, ρ, N, T; ndrange)
+            out = Array(output)[1]
+
+            TT.@test isfinite(out.∂ₜn_frz) && out.∂ₜn_frz > FT(0)
+            TT.@test isfinite(out.∂ₜq_frz) && out.∂ₜq_frz > FT(0)
+        end
     end  # TT.@testset "Ice nucleation kernels"
 
     TT.@testset "Modal nucleation kernels" begin
diff --git a/test/p3_rho_d_stability.jl b/test/p3_rho_d_stability.jl
new file mode 100644
index 000000000..00d2388a3
--- /dev/null
+++ b/test/p3_rho_d_stability.jl
@@ -0,0 +1,33 @@
+using Test: @testset, @test
+import CloudMicrophysics.P3Scheme as P3
+import CloudMicrophysics.Parameters as CMP
+import ClimaParams as CP
+import ForwardDiff as FD
+
+# Direct evaluation of the analytical solution for ρ_d. Evaluated in BigFloat it is the
+# high-precision reference; a Float64 reference is not accurate enough, because the direct
+# expression itself loses about two percent at F_rim = 1e-7.
+function get_ρ_d_direct(::Type{FT}, F_rim, ρ_rim, β_va) where {FT}
+    k = (one(FT) - F_rim)^(-one(FT) / (FT(3) - β_va))
+    den = (β_va - FT(2)) * (k - one(FT)) / ((one(FT) - F_rim) * k - one(FT)) - (one(FT) - F_rim)
+    return ρ_rim * F_rim / den
+end
+
+@testset "P3 get_ρ_d Float32 stability (small rime fraction)" begin
+    mass = CMP.ParametersP3(Float32).mass
+    βva = mass.β_va
+    ρ_g_ref(F_rim, ρ_rim) =
+        P3.get_ρ_g(F_rim, ρ_rim, get_ρ_d_direct(BigFloat, F_rim, ρ_rim, BigFloat(βva)))
+    for F_rim in (1.0f-7, 1.0f-6, 1.0f-5, 1.0f-4, 1.0f-3, 1.0f-2, 1.0f-1, 4.0f-1, 9.0f-1),
+        ρ_rim in (1.0f0, 5.0f1, 4.0f2, 9.16f2)
+
+        ρ_d = P3.get_ρ_d(mass, F_rim, ρ_rim)
+        ρ_g = P3.get_ρ_g(F_rim, ρ_rim, ρ_d)
+        @test ρ_g > 0           # The direct implementation produces negative numbers for some inputs.
+        @test isfinite(ρ_d)
+        @test ρ_g ≈ Float32(ρ_g_ref(BigFloat(F_rim), BigFloat(ρ_rim))) rtol = 1.0f-5
+    end
+    # The value and its derivative with respect to F_rim stay finite under ForwardDiff in Float32.
+    g(x) = (ρ_d = P3.get_ρ_d(mass, x, 4.0f2); P3.get_ρ_g(x, 4.0f2, ρ_d))
+    @test isfinite(FD.derivative(g, 1.0f-4))
+end
diff --git a/test/return_type_tests.jl b/test/return_type_tests.jl
new file mode 100644
index 000000000..43815115d
--- /dev/null
+++ b/test/return_type_tests.jl
@@ -0,0 +1,108 @@
+using Test
+
+import ClimaParams as CP
+import CloudMicrophysics as CM
+import CloudMicrophysics.Parameters as CMP
+import CloudMicrophysics.BulkMicrophysicsTendencies as BMT
+import CloudMicrophysics.Microphysics2M as CM2
+import CloudMicrophysics.MicrophysicsNonEq as CMNonEq
+import CloudMicrophysics.DistributionTools as DT
+import CloudMicrophysics.P3Scheme as P3
+import CloudMicrophysics.Common as CO
+import CloudMicrophysics.Utilities as UT
+import CloudMicrophysics.ThermodynamicsInterface as TDI
+import ForwardDiff as FD
+
+# Functions must return CONCRETE types for every mix of plain-float and Dual
+# arguments: fallback returns typed from a single argument produce silent,
+# heap-boxed unions otherwise (see Utilities.promote_typeof). Each entry below
+# is checked with every single-Dual signature and the all-Dual signature.
+
+const FT64 = Float64
+const D8 = FD.Dual{Nothing, Float64, 8}
+
+function concrete_for_all_mixes(f, pre::Tuple, n::Int; post::Tuple = ())
+    ok = true
+    for i in 0:n  # 0 = all-Dual; i >= 1 = Dual only in slot i
+        numeric = ntuple(j -> (i == 0 || i == j) ? D8 : FT64, n)
+        rts = Base.return_types(f, Tuple{pre..., numeric..., post...})
+        ok &= length(rts) >= 1 && all(isconcretetype, rts)
+    end
+    return ok
+end
+
+@testset "early returns are concretely typed under mixed Dual/plain args" begin
+    tps = TDI.TD.Parameters.ThermodynamicsParameters(FT64)
+    mp = CMP.Microphysics2MParams(FT64; with_ice = true, is_limited = true)
+    mpnl = CMP.Microphysics2MParams(FT64; with_ice = true, is_limited = false)
+    sb = mp.warm_rain.seifert_beheng
+    sbnl = mpnl.warm_rain.seifert_beheng
+    aps = mp.warm_rain.air_properties
+
+    P = typeof
+    @test concrete_for_all_mixes(CM2.pdf_rain_parameters, (P(sb.pdf_r),), 3)
+    @test concrete_for_all_mixes(CM2.pdf_rain_parameters, (P(sbnl.pdf_r),), 3)
+    @test concrete_for_all_mixes(CM2.pdf_rain_parameters_mass, (P(sb.pdf_r),), 3)
+    @test concrete_for_all_mixes(CM2.log_pdf_cloud_parameters_mass, (P(sb.pdf_c),), 3)
+    @test concrete_for_all_mixes(CM2.pdf_cloud_parameters_mass, (P(sb.pdf_c),), 3)
+    @test concrete_for_all_mixes(CM2.pdf_cloud_parameters, (P(sb.pdf_c),), 3)
+    @test concrete_for_all_mixes(CM2.get_size_distribution_bounds, (P(sb.pdf_r),), 3)
+    @test concrete_for_all_mixes(CM2.get_size_distribution_bounds, (P(sb.pdf_c),), 3)
+    @test concrete_for_all_mixes(CM2.autoconversion, (P(sb.acnv), P(sb.pdf_c)), 4)
+    @test concrete_for_all_mixes(CM2.accretion, (P(sb),), 4)
+    @test concrete_for_all_mixes(CM2.cloud_liquid_self_collection, (P(sb.acnv), P(sb.pdf_c)), 3)
+    @test concrete_for_all_mixes(CM2.rain_self_collection, (P(sb.pdf_r), P(sb.self)), 3)
+    @test concrete_for_all_mixes(CM2.rain_breakup, (P(sb.pdf_r), P(sb.brek)), 4)
+    @test concrete_for_all_mixes(CM2.rain_self_collection_and_breakup, (P(sb),), 3)
+    @test concrete_for_all_mixes(CM2.rain_evaporation, (P(sb), P(aps), P(tps)), 8)
+    numadj = (; sb.numadj.τ, x_min = sb.pdf_c.xc_min, x_max = sb.pdf_c.xc_max)
+    @test concrete_for_all_mixes(CM2.number_tendency_from_mass_limits, (P(numadj),), 2)
+    @test concrete_for_all_mixes(
+        CM2.rain_terminal_velocity, (P(sb), P(CMP.SB2006VelType(FT64))), 3,
+    )
+    @test concrete_for_all_mixes(
+        CM2.rain_terminal_velocity, (P(sb), P(mp.ice.terminal_velocity.rain)), 3,
+    )
+    @test concrete_for_all_mixes(
+        CM2.cloud_terminal_velocity, (P(sb.pdf_c), P(CMP.StokesRegimeVelType(FT64))), 3,
+    )
+    @test concrete_for_all_mixes(
+        CO.Chen2022_exponential_pdf, (), 4; post = (Int,),
+    )
+    @test concrete_for_all_mixes(CO.logistic_function_integral, (), 3)
+    @test concrete_for_all_mixes(UT._regularised_ratio, (), 2)
+    @test concrete_for_all_mixes(
+        CMNonEq.τ_relax,
+        (P(CMP.CloudIce(FT64)), P(CMP.AirProperties(FT64)), P(mp.ice.ice_nucleation)), 2,
+    )
+    @test concrete_for_all_mixes(P3.loggamma_inc_moment, (), 4)
+    # P3 aspect ratio: plain state with Dual D and vice versa
+    st = P3.state_from_prognostic(mp.ice.scheme, FT64(1e-4), FT64(1e4), FT64(2e-5), FT64(4e-8))
+    @test concrete_for_all_mixes(P3.ϕᵢ, (P(st),), 1)
+    # ice terminal velocities: plain state, Dual ρₐ (fallback previously typed
+    # off the state alone). Keyword wrapper -> probe the positional core.
+    for f in (P3.ice_terminal_velocity_number_weighted, P3.ice_terminal_velocity_mass_weighted)
+        rts = Base.return_types(f, Tuple{P(mp.ice.terminal_velocity), D8, P(st), FT64})
+        @test all(isconcretetype, rts)
+    end
+
+    # LclRaiRates: mixed-type construction promotes instead of MethodError
+    r = CM2.LclRaiRates(D8(1.0), 0.0, D8(2.0), 0.0)
+    @test r isa CM2.LclRaiRates{D8}
+
+    # size-distribution closures: zero branch must match the main branch when
+    # the node D and the captured parameters mix plain/Dual
+    for (pdf, q) in ((sb.pdf_r, FT64(1e-4)), (sb.pdf_c, FT64(1e-3)))
+        n_plain = DT.size_distribution(pdf, q, FT64(1.1), FT64(1e7))
+        @test isconcretetype(only(Base.return_types(n_plain, Tuple{D8})))
+        n_dual = DT.size_distribution(pdf, D8(q), FT64(1.1), D8(1e7))
+        @test isconcretetype(only(Base.return_types(n_dual, Tuple{FT64})))
+        n_zero = DT.size_distribution(pdf, zero(FT64), FT64(1.1), zero(FT64))
+        @test n_zero(D8(1e-4)) isa D8  # zero branch, Dual node
+    end
+
+    # NOTE: the 1M Instantaneous entry is NOT asserted here: its process
+    # leaves (Microphysics1M.jl) carry their own single-argument-typed
+    # returns. The 1M leaves share this pattern and are out of scope for
+    # this 2M/P3 sweep (the BMT-level source-terms FT is fixed).
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 2e36f22f7..20c6c78de 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,12 +18,15 @@ TT.@testset "All tests" begin
     include("nucleation_unit_tests.jl")
     include("precipitation_susceptibility_tests.jl")
     include("p3_tests.jl")
+    include("p3_rho_d_stability.jl")
     include("p3_shape_solver_warmstart_tests.jl")
     include("aqua.jl")
     include("performance_tests.jl")
     include("aerosol_activation_calibration.jl")
     include("ice_nucleation_calibration.jl")
     include("ventilation_tests.jl")
+    include("ad_compat_tests.jl")
+    include("return_type_tests.jl")
     include("DistributionTools_tests.jl")
     include("unrolled_logsumexp.jl")
 end