From 6bad126382a14e539cdd8632adf617403d12469e Mon Sep 17 00:00:00 2001 From: Lorri Rao Date: Wed, 11 Mar 2026 19:55:40 +0000 Subject: [PATCH] Fix int32 overflow in rotating buffer size calculation When rotating buffer sizes exceed ~2GB, the int32_t multiplication of rotatingNum * rotatingSize wraps negative (e.g. 838926336 * 3 = -1778188288), causing "Insufficient rotating buffer size" abort. Use int64_t for rotatingNum and totalRotatingSizeNeeded to handle large grouped GEMM buffer allocations correctly. --- tensilelite/client/src/DataInitialization.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensilelite/client/src/DataInitialization.cpp b/tensilelite/client/src/DataInitialization.cpp index 834446c0c4..d848da9042 100644 --- a/tensilelite/client/src/DataInitialization.cpp +++ b/tensilelite/client/src/DataInitialization.cpp @@ -2504,21 +2504,21 @@ namespace TensileLite { auto castInputs = static_pointer_cast(inputs); size_t rotatingSize = getRotatingSize(*gemmProblem, *castInputs); - int32_t rotatingNum + int64_t rotatingNum = min(maxRotatingBufferNum, ceil((float)m_rotatingBuffer / rotatingSize)) - 1; // Minus the original buffer. // <= 0 means don't rotating - rotatingNum = max(0, rotatingNum); + rotatingNum = max((int64_t)0, rotatingNum); - int32_t totalRotatingSizeNeeded = rotatingNum * rotatingSize; + int64_t totalRotatingSizeNeeded = rotatingNum * rotatingSize; std::cout << "Rotating buffer set to: " << m_rotatingBuffer << ". Rotating num: " << rotatingNum << std::endl; if(m_rotatingMode == 0) { auto rotatingAllocatedSize = m_rm->getDataSize() - m_rm->getDataLargestUnitSize(); - if(totalRotatingSizeNeeded > rotatingAllocatedSize) + if(totalRotatingSizeNeeded > (int64_t)rotatingAllocatedSize) { std::cout << "Rotating buffer size: " << rotatingAllocatedSize << " is not enough for rotating buffer size: " << rotatingSize @@ -2566,21 +2566,21 @@ namespace TensileLite rotatingSize += getRotatingSize(groupedProblem->gemms[i], castInputs->grouped[i]); } - int32_t rotatingNum + int64_t rotatingNum = min(maxRotatingBufferNum, ceil((float)m_rotatingBuffer / rotatingSize)) - 1; // Minus the original buffer. // <= 0 means don't rotating - rotatingNum = max(0, rotatingNum); + rotatingNum = max((int64_t)0, rotatingNum); - int32_t totalRotatingSizeNeeded = rotatingNum * rotatingSize; + int64_t totalRotatingSizeNeeded = rotatingNum * rotatingSize; std::cout << "Rotating buffer set to: " << m_rotatingBuffer << ". Rotating num: " << rotatingNum << std::endl; if(m_rotatingMode == 0) { auto rotatingAllocatedSize = m_rm->getDataSize() - m_rm->getDataLargestUnitSize(); - if(totalRotatingSizeNeeded > rotatingAllocatedSize) + if(totalRotatingSizeNeeded > (int64_t)rotatingAllocatedSize) { std::cout << "Rotating buffer size: " << rotatingAllocatedSize << " is not enough for rotating buffer size: " << rotatingSize