From 1e61ce0bd35d57b8ec920a014ef49dea333b583e Mon Sep 17 00:00:00 2001 From: Bojan Nikolic Date: Mon, 15 Jun 2026 13:56:02 +0000 Subject: [PATCH] Threaded sum for large numpy arrays Using this makes runtime of mean and other similar calculations about 5-7 times faster on large images such as 24k x 24k that I use for the SKA reference run. Overall it improves performance by ~~15% in the current run (consistent with expectations given time in these functions and that the submaps do not gain from this optimisation) --- bdsf/functions.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/bdsf/functions.py b/bdsf/functions.py index 4352ff7..327947a 100644 --- a/bdsf/functions.py +++ b/bdsf/functions.py @@ -1,8 +1,26 @@ -# some functions from __future__ import print_function from __future__ import absolute_import + +import numpy +from concurrent.futures import ThreadPoolExecutor + +# some functions from shutil import get_terminal_size +def sum_threaded(a, N=10000000): + if a.size < N : + return a.sum() + n = int(a.size//N) + with ThreadPoolExecutor() as te: + res=te.map(lambda x: a[x*N:(x+1)*N].sum(), + list(range(n+1))) + res=list(res) + return numpy.array(res).sum() + +def mean_threaded(a): + return sum_threaded(a)/a.size + + try: # For Python 2 basestring = basestring @@ -2139,9 +2157,9 @@ def bstat(indata, mask, kappa_npixbeam): c2 = 0.0 maxiter = 200 converge_num = 1e-6 - m_raw = numpy.mean(skpix) + m_raw = mean_threaded(skpix) d2 = (skpix-m_raw)**2 - r_raw = (d2.sum()/(ct-1))**0.5 + r_raw = (sum_threaded(d2)/(ct-1))**0.5 while (c1 >= c2) and (iter < maxiter): npix = skpix.size @@ -2157,8 +2175,8 @@ def bstat(indata, mask, kappa_npixbeam): if iter == 0: sig = r_raw else: - m_new = numpy.mean(skpix) - sig = (d2.sum()/ct - (m_raw-m_new)**2)**0.5 + m_new = mean_threaded(skpix) + sig = (sum_threaded(d2)/ct - (m_raw-m_new)**2)**0.5 wsm1, wsm2 = numpy.searchsorted(skpix, [medval - kappa*sig, medval + kappa*sig]) @@ -2171,9 +2189,9 @@ def bstat(indata, mask, kappa_npixbeam): c2 = converge_num * lastct iter += 1 - mean = numpy.mean(skpix) + mean = mean_threaded(skpix) median = skpix[skpix.size//2] - sigma = (d2.sum()/(ct-1) - (m_raw-mean)**2 * ct/(ct-1) )**0.5 + sigma = (sum_threaded(d2)/(ct-1) - (m_raw-mean)**2 * ct/(ct-1) )**0.5 mode = 2.5*median - 1.5*mean if sigma > 0.0: skew_par = abs(mean - median)/sigma