Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions system/security/LdapSecurity/failedAuthCache.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#pragma once

#include <unordered_map>
#include <string>
#include <algorithm>
#include <limits>

#include "jlog.hpp"
#include "jmutex.hpp"
#include "jutil.hpp"

struct FailedAuthEntry
{
unsigned firstFailureTick;
unsigned failedAttempts;

FailedAuthEntry() : firstFailureTick(0), failedAttempts(0) {}
FailedAuthEntry(unsigned tick, unsigned attempts)
: firstFailureTick(tick), failedAttempts(attempts) {}
};

// FailedAuthCache is primarily a load-reduction mechanism to prevent repeated failed
// authentication attempts from hammering the LDAP/Active Directory server with unnecessary
// traffic and round-trips. Once a configurable threshold of local failures is reached, the cache
// blocks further attempts for the configured timeout period, avoiding LDAP queries.
// NOTE: This is NOT the primary lockout mechanism; Active Directory's own account.lockout
// policy remains authoritative. This cache layer simply reduces network load during failure bursts.
// IMPORTANT: The TTL is measured from the FIRST failure, not continuously updated on each new failure.
// This allows automatic recovery: if the underlying AD condition (temporary outage, account unlock, etc.)
// is fixed within the timeout period, the entry expires and retries are allowed. This prevents permanent
// blocking of services that are retrying after a transient AD issue.
class FailedAuthCache
{
public:
static constexpr unsigned defaultMaxFailedAttempts = 5;
static constexpr unsigned defaultCacheTimeoutSeconds = 300; // 5 minutes
// NOTE: maxAllowedEntries caps the cache size to bound memory use. During a credential-spray
// attack with more distinct usernames than this limit, the oldest entries are evicted to make
// room for newer ones. Evicted users can retry LDAP immediately, so this cap trades off memory
// use against protection coverage. Raise this limit if spray attacks with many distinct usernames
// are a concern. The trim cost is O(N) per insert so keep this reasonably sized.
static constexpr unsigned defaultMaxAllowedEntries = 25;

FailedAuthCache(unsigned maxFailedAttempts = defaultMaxFailedAttempts,
unsigned cacheTimeoutSeconds = defaultCacheTimeoutSeconds,
unsigned maxAllowedEntries = defaultMaxAllowedEntries)
: m_maxFailedAttempts(maxFailedAttempts),
// Clamp once at construction so ms conversion cannot overflow.
m_cacheTimeoutSeconds(std::min(cacheTimeoutSeconds, std::numeric_limits<unsigned>::max() / 1000U)),

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar comment to the other PR - use a much lower threshold than 49 days, otherwise it will never expire.

m_maxAllowedEntries(maxAllowedEntries)
{
}

// Check if a user should be blocked due to failed authentication.
// Returns true if authentication should be blocked.
bool isUserLockedOut(const char* username)
{
if (!username || !*username)
return false;

CriticalBlock block(m_lock);

auto it = m_cache.find(username);
if (it == m_cache.end())
return false;

const unsigned currentTick = msTick();
if (isExpired(it->second, currentTick))
{
// Entry has expired; remove it and allow retries. No need to trim the
// full cache here — trimming happens on every insert/update in updateUserLockoutStatus.
m_cache.erase(it);
trimFailedAuthCache();

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this call necessary?

return false;
}

return it->second.failedAttempts >= m_maxFailedAttempts;
}

// Record a failed authentication attempt for username.
void updateUserLockoutStatus(const char* username)
{
if (!username || !*username)
return;

CriticalBlock block(m_lock);

const unsigned currentTick = msTick();
auto it = m_cache.find(username);
if (it == m_cache.end())
{
m_cache[username] = FailedAuthEntry(currentTick, 1);
trimFailedAuthCache();
return;
}

if (isExpired(it->second, currentTick))
{
// Timeout expired from first failure; reset to allow retries.
// If the underlying AD condition (e.g., temporary outage, account unlock) was fixed,
// the service can now attempt authentication again.
it->second.firstFailureTick = currentTick;
it->second.failedAttempts = 1;
}
else
++(it->second.failedAttempts);

if (it->second.failedAttempts == m_maxFailedAttempts)
OWARNLOG("User %s locked out for %u seconds after reaching maximum failed authentication attempts of %u", username, m_cacheTimeoutSeconds, m_maxFailedAttempts);
}

// Remove a user from the failed auth cache (e.g., on successful authentication).
void removeUser(const char* username)
{
if (!username || !*username)
return;

CriticalBlock block(m_lock);

auto it = m_cache.find(username);
if (it != m_cache.end())
{
m_cache.erase(it);
trimFailedAuthCache();

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this call here?

}
}

// Optional setters/getters
void setMaxFailedAttempts(unsigned v) { m_maxFailedAttempts = v; }
// Keep timeout-ms conversion safe even if set at runtime.
void setCacheTimeoutSeconds(unsigned v) { m_cacheTimeoutSeconds = std::min(v, std::numeric_limits<unsigned>::max() / 1000U); }
void setMaxAllowedEntries(unsigned v) { m_maxAllowedEntries = v; }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Theoretically this function and timeout should call trimFailedAuthCache(). This could break my assumption that there is at most one extra. I would resolve by clearing all.


unsigned getMaxFailedAttempts() const { return m_maxFailedAttempts; }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these needed - they're not called.

unsigned getCacheTimeoutSeconds() const { return m_cacheTimeoutSeconds; }
unsigned getMaxAllowedEntries() const { return m_maxAllowedEntries; }

// Clear entire cache
void clear()
{
CriticalBlock block(m_lock);
m_cache.clear();
}

private:
unsigned queryCacheTimeoutMs() const
{
// Safe because constructor/setter clamp m_cacheTimeoutSeconds.
return m_cacheTimeoutSeconds * 1000U;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be pedantic, but the setter in combination with this function does have a chance for a race condition to result in a value being set that would return something out of bounds. Maybe use the critical section or have some validation in the setter to keep it in bounds? This is the only thing I saw that probably really warrants a change.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed with clamping the value.

}

// Check if a cached entry has expired based on time elapsed since FIRST failure.
// NOTE: TTL is from firstFailureTick (the initial failure time), NOT updated to the latest failure.
// This design prevents permanent blocking and allows recovery if the underlying condition is resolved.
bool isExpired(const FailedAuthEntry &entry, unsigned currentTick) const
{
const unsigned elapsedMs = currentTick - entry.firstFailureTick; // Unsigned tick subtraction intentionally handles msTick wraparound.
return elapsedMs >= queryCacheTimeoutMs();
}

void trimFailedAuthCache()
{
// Expect lock is held by caller.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be simplified

  1. since it is called whenever an entry is added it can only have 1 extra entry - that can be checked by an assert.
  2. When checking for expired entries, it could also keep a pointer to the oldest non-expired. That could then be removed without a subsequent re-iteration.

const unsigned currentTick = msTick();

// Remove expired entries.
for (auto it = m_cache.begin(); it != m_cache.end();)
{
if (isExpired(it->second, currentTick))
it = m_cache.erase(it);
else
++it;
}

// If still too large, remove oldest entries by age.
while (m_cache.size() > m_maxAllowedEntries)
{
// max_element with this comparator returns the entry with the largest age (oldest).
auto oldestIt = std::max_element(
m_cache.begin(),
m_cache.end(),
[currentTick](const auto& a, const auto& b) {
const unsigned ageA = currentTick - a.second.firstFailureTick;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be a possible simplification, but not a requested change. Since msTick is monotonically increasing (until wraparound) couldn't you just compare the firstFailureTick times to get a relative age ordering (smallest value meaning oldest, ignoring error due to wraparound which I can't think of a way to compensate for anyhow). The math with currentTick gives an absolute age, but you still would have error when the two operands are on either side of a wraparound.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. We should keep subtraction here: unsigned tick subtraction is the correct wrap-safe pattern for monotonic msTick counters. Directly comparing firstFailureTick/currentTick would be less reliable across counter rollover.

const unsigned ageB = currentTick - b.second.firstFailureTick;
return ageA < ageB;
});
if (oldestIt != m_cache.end())
m_cache.erase(oldestIt);
else
break;
}
}

private:
std::unordered_map<std::string, FailedAuthEntry> m_cache;
CriticalSection m_lock;

unsigned m_maxFailedAttempts;
unsigned m_cacheTimeoutSeconds;
unsigned m_maxAllowedEntries;
};
71 changes: 64 additions & 7 deletions system/security/LdapSecurity/ldapsecurity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
limitations under the License.
############################################################################## */

#include "jlog.hpp"
#define AXA_API DECL_EXPORT

#include "ldapsecurity.ipp"
Expand All @@ -27,6 +28,22 @@
using namespace cryptohelper;

#include "workunit.hpp"
#include <ctime>

/**********************************************************
* Failed Authentication Cache (Load Reduction) *
**********************************************************/
// This cache tracks repeated authentication failures to reduce LDAP/AD traffic
// by avoiding redundant queries when a user has failed authentication multiple times
// within a short window. Active Directory's account lockout policy remains authoritative;
// this cache is purely a load-reduction layer.

#include "failedAuthCache.hpp"

// Static, class-scoped failed-auth cache instance. It will be initialized once
// (on first manager init) with values from configuration.
FailedAuthCache CLdapSecManager::s_failedAuthCache;
CriticalSection CLdapSecManager::s_failedAuthCacheInitLock;

/**********************************************************
* CLdapSecUser *
Expand Down Expand Up @@ -557,6 +574,9 @@ ISecProperty* CLdapSecResourceList::findProperty(const char* name)
}


// The failed-auth helper functions are now provided by FailedAuthCache.


/**********************************************************
* CLdapSecManager *
**********************************************************/
Expand Down Expand Up @@ -616,7 +636,18 @@ void CLdapSecManager::init(const char *serviceName, IPropertyTree* cfg)

m_passwordExpirationWarningDays = cfg->getPropInt(".//@passwordExpirationWarningDays", 10); //Default to 10 days
m_checkViewPermissions = cfg->getPropBool(".//@checkViewPermissions", false);
unsigned maxFailedAuthAttempts = cfg->getPropInt(".//@maxFailedAuthAttempts", FailedAuthCache::defaultMaxFailedAttempts);
unsigned failedAuthCacheTimeout = cfg->getPropInt(".//@failedAuthCacheTimeoutSeconds", FailedAuthCache::defaultCacheTimeoutSeconds);
unsigned maxAllowedFailedAuthEntries = cfg->getPropInt(".//@maxAllowedFailedAuthEntries", FailedAuthCache::defaultMaxAllowedEntries);
m_hpccInternalScope.set(queryDfsXmlBranchName(DXB_Internal)).append("::");//HpccInternal::

// Initialize/update the shared failed-auth cache with configured values
{
CriticalBlock block(CLdapSecManager::s_failedAuthCacheInitLock);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is breaking the encapsulation. There should probably be a single set function that internally locks the critical section.

CLdapSecManager::s_failedAuthCache.setMaxFailedAttempts(maxFailedAuthAttempts);
CLdapSecManager::s_failedAuthCache.setCacheTimeoutSeconds(failedAuthCacheTimeout);
CLdapSecManager::s_failedAuthCache.setMaxAllowedEntries(maxAllowedFailedAuthEntries);
}
};


Expand Down Expand Up @@ -675,6 +706,35 @@ bool CLdapSecManager::authenticate(ISecUser* user)
if(!user)
return false;

const char* username = user->getName();
if (isEmptyString(username))
{
DBGLOG("CLdapSecManager::authenticate username cannot be empty");
return false;
}

// Check failed-auth cache before proceeding to reduce LDAP queries on repeated failures
// (load reduction mechanism; Active Directory's own lockout policy is authoritative)
if (CLdapSecManager::s_failedAuthCache.isUserLockedOut(username))
{
user->setAuthenticateStatus(AS_INVALID_CREDENTIALS);
m_permissionsCache->removePermissions(*user);
m_permissionsCache->removeFromUserCache(*user);
return false;
}

bool rc = doUserAuthenticate(user);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if the authentication failed because the server was unavailable?

if (rc)
CLdapSecManager::s_failedAuthCache.removeUser(username);
else
CLdapSecManager::s_failedAuthCache.updateUserLockoutStatus(username);

return rc;
}

bool CLdapSecManager::doUserAuthenticate(ISecUser* user)
{
const char* username = user->getName();
bool isCaching = m_permissionsCache->isCacheEnabled() && !m_usercache_off;//caching enabled?
bool isUserCached = false;
Owned<ISecUser> cachedUser = new CLdapSecUser(user->getName(), "");
Expand Down Expand Up @@ -735,12 +795,9 @@ bool CLdapSecManager::authenticate(ISecUser* user)
}

if (isUserCached && cachedUser->getAuthenticateStatus() == AS_AUTHENTICATED)//only authenticated users will be cached
{
return true;
}

//User not in cache. Look for session token, or call LDAP to authenticate

if (0 != user->credentials().getSessionToken())//check for token existence
{
user->setAuthenticateStatus(AS_AUTHENTICATED);
Expand All @@ -759,10 +816,10 @@ bool CLdapSecManager::authenticate(ISecUser* user)
pDSM = queryDigitalSignatureManagerInstanceFromEnv();
if (pDSM && pDSM->isDigiSignerConfigured())
{
//Set user digital signature
StringBuffer b64Signature;
pDSM->digiSign(b64Signature, user->getName());
user->credentials().setSignature(b64Signature);
//Set user digital signature
StringBuffer b64Signature;
pDSM->digiSign(b64Signature, user->getName());
user->credentials().setSignature(b64Signature);
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions system/security/LdapSecurity/ldapsecurity.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
#endif
#include "seclib.hpp"

// forward declare the new FailedAuthCache used by the manager
class FailedAuthCache;

#ifndef LDAPSECURITY_EXPORTS
#define LDAPSECURITY_API DECL_IMPORT
#else
Expand Down Expand Up @@ -380,6 +383,8 @@ private:
StringBuffer m_description;
unsigned m_passwordExpirationWarningDays;
bool m_checkViewPermissions;
static FailedAuthCache s_failedAuthCache;
static CriticalSection s_failedAuthCacheInitLock;
static const SecFeatureSet s_safeFeatures = SMF_ALL_FEATURES;
static const SecFeatureSet s_implementedFeatures = s_safeFeatures & ~(SMF_RetrieveUserData | SMF_RemoveResources);
StringBuffer m_hpccInternalScope;
Expand Down Expand Up @@ -522,6 +527,9 @@ public:
virtual void removeViewMembers(const char * viewName, StringArray & viewUsers, StringArray & viewGroups) override;
virtual void queryViewMembers(const char * viewName, StringArray & viewUsers, StringArray & viewGroups) override;
virtual bool userInView(const char * user, const char* viewName) override;

private:
bool doUserAuthenticate(ISecUser* user);
};

#ifdef _MSC_VER
Expand Down
Loading