-
Notifications
You must be signed in to change notification settings - Fork 311
HPCC-35300 LDAP should handle brute force password hack attempts #21172
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: candidate-10.2.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| #pragma once | ||
|
|
||
| #include <unordered_map> | ||
| #include <string> | ||
| #include <algorithm> | ||
| #include <limits> | ||
|
|
||
| #include "jlog.hpp" | ||
| #include "jmutex.hpp" | ||
| #include "jutil.hpp" | ||
|
|
||
| struct FailedAuthEntry | ||
| { | ||
| unsigned firstFailureTick; | ||
| unsigned failedAttempts; | ||
|
|
||
| FailedAuthEntry() : firstFailureTick(0), failedAttempts(0) {} | ||
| FailedAuthEntry(unsigned tick, unsigned attempts) | ||
| : firstFailureTick(tick), failedAttempts(attempts) {} | ||
| }; | ||
|
|
||
| // FailedAuthCache is primarily a load-reduction mechanism to prevent repeated failed | ||
| // authentication attempts from hammering the LDAP/Active Directory server with unnecessary | ||
| // traffic and round-trips. Once a configurable threshold of local failures is reached, the cache | ||
| // blocks further attempts for the configured timeout period, avoiding LDAP queries. | ||
| // NOTE: This is NOT the primary lockout mechanism; Active Directory's own account.lockout | ||
| // policy remains authoritative. This cache layer simply reduces network load during failure bursts. | ||
| // IMPORTANT: The TTL is measured from the FIRST failure, not continuously updated on each new failure. | ||
| // This allows automatic recovery: if the underlying AD condition (temporary outage, account unlock, etc.) | ||
| // is fixed within the timeout period, the entry expires and retries are allowed. This prevents permanent | ||
| // blocking of services that are retrying after a transient AD issue. | ||
| class FailedAuthCache | ||
| { | ||
| public: | ||
| static constexpr unsigned defaultMaxFailedAttempts = 5; | ||
| static constexpr unsigned defaultCacheTimeoutSeconds = 300; // 5 minutes | ||
| // NOTE: maxAllowedEntries caps the cache size to bound memory use. During a credential-spray | ||
| // attack with more distinct usernames than this limit, the oldest entries are evicted to make | ||
| // room for newer ones. Evicted users can retry LDAP immediately, so this cap trades off memory | ||
| // use against protection coverage. Raise this limit if spray attacks with many distinct usernames | ||
| // are a concern. The trim cost is O(N) per insert so keep this reasonably sized. | ||
| static constexpr unsigned defaultMaxAllowedEntries = 25; | ||
|
|
||
| FailedAuthCache(unsigned maxFailedAttempts = defaultMaxFailedAttempts, | ||
| unsigned cacheTimeoutSeconds = defaultCacheTimeoutSeconds, | ||
| unsigned maxAllowedEntries = defaultMaxAllowedEntries) | ||
| : m_maxFailedAttempts(maxFailedAttempts), | ||
| // Clamp once at construction so ms conversion cannot overflow. | ||
| m_cacheTimeoutSeconds(std::min(cacheTimeoutSeconds, std::numeric_limits<unsigned>::max() / 1000U)), | ||
| m_maxAllowedEntries(maxAllowedEntries) | ||
| { | ||
| } | ||
|
|
||
| // Check if a user should be blocked due to failed authentication. | ||
| // Returns true if authentication should be blocked. | ||
| bool isUserLockedOut(const char* username) | ||
| { | ||
| if (!username || !*username) | ||
| return false; | ||
|
|
||
| CriticalBlock block(m_lock); | ||
|
|
||
| auto it = m_cache.find(username); | ||
| if (it == m_cache.end()) | ||
| return false; | ||
|
|
||
| const unsigned currentTick = msTick(); | ||
| if (isExpired(it->second, currentTick)) | ||
| { | ||
| // Entry has expired; remove it and allow retries. No need to trim the | ||
| // full cache here — trimming happens on every insert/update in updateUserLockoutStatus. | ||
| m_cache.erase(it); | ||
| trimFailedAuthCache(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this call necessary? |
||
| return false; | ||
| } | ||
|
|
||
| return it->second.failedAttempts >= m_maxFailedAttempts; | ||
| } | ||
|
|
||
| // Record a failed authentication attempt for username. | ||
| void updateUserLockoutStatus(const char* username) | ||
| { | ||
| if (!username || !*username) | ||
| return; | ||
|
|
||
| CriticalBlock block(m_lock); | ||
|
|
||
| const unsigned currentTick = msTick(); | ||
| auto it = m_cache.find(username); | ||
| if (it == m_cache.end()) | ||
| { | ||
| m_cache[username] = FailedAuthEntry(currentTick, 1); | ||
| trimFailedAuthCache(); | ||
| return; | ||
| } | ||
|
|
||
| if (isExpired(it->second, currentTick)) | ||
| { | ||
| // Timeout expired from first failure; reset to allow retries. | ||
| // If the underlying AD condition (e.g., temporary outage, account unlock) was fixed, | ||
| // the service can now attempt authentication again. | ||
| it->second.firstFailureTick = currentTick; | ||
| it->second.failedAttempts = 1; | ||
| } | ||
| else | ||
| ++(it->second.failedAttempts); | ||
|
|
||
| if (it->second.failedAttempts == m_maxFailedAttempts) | ||
| OWARNLOG("User %s locked out for %u seconds after reaching maximum failed authentication attempts of %u", username, m_cacheTimeoutSeconds, m_maxFailedAttempts); | ||
| } | ||
|
|
||
| // Remove a user from the failed auth cache (e.g., on successful authentication). | ||
| void removeUser(const char* username) | ||
| { | ||
| if (!username || !*username) | ||
| return; | ||
|
|
||
| CriticalBlock block(m_lock); | ||
|
|
||
| auto it = m_cache.find(username); | ||
| if (it != m_cache.end()) | ||
| { | ||
| m_cache.erase(it); | ||
| trimFailedAuthCache(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this call here? |
||
| } | ||
| } | ||
|
|
||
| // Optional setters/getters | ||
| void setMaxFailedAttempts(unsigned v) { m_maxFailedAttempts = v; } | ||
| // Keep timeout-ms conversion safe even if set at runtime. | ||
| void setCacheTimeoutSeconds(unsigned v) { m_cacheTimeoutSeconds = std::min(v, std::numeric_limits<unsigned>::max() / 1000U); } | ||
| void setMaxAllowedEntries(unsigned v) { m_maxAllowedEntries = v; } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Theoretically this function and timeout should call trimFailedAuthCache(). This could break my assumption that there is at most one extra. I would resolve by clearing all. |
||
|
|
||
| unsigned getMaxFailedAttempts() const { return m_maxFailedAttempts; } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these needed - they're not called. |
||
| unsigned getCacheTimeoutSeconds() const { return m_cacheTimeoutSeconds; } | ||
| unsigned getMaxAllowedEntries() const { return m_maxAllowedEntries; } | ||
|
|
||
| // Clear entire cache | ||
| void clear() | ||
| { | ||
| CriticalBlock block(m_lock); | ||
| m_cache.clear(); | ||
| } | ||
|
|
||
| private: | ||
| unsigned queryCacheTimeoutMs() const | ||
| { | ||
| // Safe because constructor/setter clamp m_cacheTimeoutSeconds. | ||
| return m_cacheTimeoutSeconds * 1000U; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may be pedantic, but the setter in combination with this function does have a chance for a race condition to result in a value being set that would return something out of bounds. Maybe use the critical section or have some validation in the setter to keep it in bounds? This is the only thing I saw that probably really warrants a change.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed with clamping the value. |
||
| } | ||
|
|
||
| // Check if a cached entry has expired based on time elapsed since FIRST failure. | ||
| // NOTE: TTL is from firstFailureTick (the initial failure time), NOT updated to the latest failure. | ||
| // This design prevents permanent blocking and allows recovery if the underlying condition is resolved. | ||
| bool isExpired(const FailedAuthEntry &entry, unsigned currentTick) const | ||
| { | ||
| const unsigned elapsedMs = currentTick - entry.firstFailureTick; // Unsigned tick subtraction intentionally handles msTick wraparound. | ||
| return elapsedMs >= queryCacheTimeoutMs(); | ||
| } | ||
|
|
||
| void trimFailedAuthCache() | ||
| { | ||
| // Expect lock is held by caller. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be simplified
|
||
| const unsigned currentTick = msTick(); | ||
|
|
||
| // Remove expired entries. | ||
| for (auto it = m_cache.begin(); it != m_cache.end();) | ||
| { | ||
| if (isExpired(it->second, currentTick)) | ||
| it = m_cache.erase(it); | ||
| else | ||
| ++it; | ||
| } | ||
|
|
||
| // If still too large, remove oldest entries by age. | ||
| while (m_cache.size() > m_maxAllowedEntries) | ||
| { | ||
| // max_element with this comparator returns the entry with the largest age (oldest). | ||
| auto oldestIt = std::max_element( | ||
| m_cache.begin(), | ||
| m_cache.end(), | ||
| [currentTick](const auto& a, const auto& b) { | ||
| const unsigned ageA = currentTick - a.second.firstFailureTick; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be a possible simplification, but not a requested change. Since msTick is monotonically increasing (until wraparound) couldn't you just compare the firstFailureTick times to get a relative age ordering (smallest value meaning oldest, ignoring error due to wraparound which I can't think of a way to compensate for anyhow). The math with currentTick gives an absolute age, but you still would have error when the two operands are on either side of a wraparound.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. We should keep subtraction here: unsigned tick subtraction is the correct wrap-safe pattern for monotonic msTick counters. Directly comparing firstFailureTick/currentTick would be less reliable across counter rollover. |
||
| const unsigned ageB = currentTick - b.second.firstFailureTick; | ||
| return ageA < ageB; | ||
| }); | ||
| if (oldestIt != m_cache.end()) | ||
| m_cache.erase(oldestIt); | ||
| else | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| private: | ||
| std::unordered_map<std::string, FailedAuthEntry> m_cache; | ||
| CriticalSection m_lock; | ||
|
|
||
| unsigned m_maxFailedAttempts; | ||
| unsigned m_cacheTimeoutSeconds; | ||
| unsigned m_maxAllowedEntries; | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
| limitations under the License. | ||
| ############################################################################## */ | ||
|
|
||
| #include "jlog.hpp" | ||
| #define AXA_API DECL_EXPORT | ||
|
|
||
| #include "ldapsecurity.ipp" | ||
|
|
@@ -27,6 +28,22 @@ | |
| using namespace cryptohelper; | ||
|
|
||
| #include "workunit.hpp" | ||
| #include <ctime> | ||
|
|
||
| /********************************************************** | ||
| * Failed Authentication Cache (Load Reduction) * | ||
| **********************************************************/ | ||
| // This cache tracks repeated authentication failures to reduce LDAP/AD traffic | ||
| // by avoiding redundant queries when a user has failed authentication multiple times | ||
| // within a short window. Active Directory's account lockout policy remains authoritative; | ||
| // this cache is purely a load-reduction layer. | ||
|
|
||
| #include "failedAuthCache.hpp" | ||
|
|
||
| // Static, class-scoped failed-auth cache instance. It will be initialized once | ||
| // (on first manager init) with values from configuration. | ||
| FailedAuthCache CLdapSecManager::s_failedAuthCache; | ||
| CriticalSection CLdapSecManager::s_failedAuthCacheInitLock; | ||
|
|
||
| /********************************************************** | ||
| * CLdapSecUser * | ||
|
|
@@ -557,6 +574,9 @@ ISecProperty* CLdapSecResourceList::findProperty(const char* name) | |
| } | ||
|
|
||
|
|
||
| // The failed-auth helper functions are now provided by FailedAuthCache. | ||
|
|
||
|
|
||
| /********************************************************** | ||
| * CLdapSecManager * | ||
| **********************************************************/ | ||
|
|
@@ -616,7 +636,18 @@ void CLdapSecManager::init(const char *serviceName, IPropertyTree* cfg) | |
|
|
||
| m_passwordExpirationWarningDays = cfg->getPropInt(".//@passwordExpirationWarningDays", 10); //Default to 10 days | ||
| m_checkViewPermissions = cfg->getPropBool(".//@checkViewPermissions", false); | ||
| unsigned maxFailedAuthAttempts = cfg->getPropInt(".//@maxFailedAuthAttempts", FailedAuthCache::defaultMaxFailedAttempts); | ||
| unsigned failedAuthCacheTimeout = cfg->getPropInt(".//@failedAuthCacheTimeoutSeconds", FailedAuthCache::defaultCacheTimeoutSeconds); | ||
| unsigned maxAllowedFailedAuthEntries = cfg->getPropInt(".//@maxAllowedFailedAuthEntries", FailedAuthCache::defaultMaxAllowedEntries); | ||
| m_hpccInternalScope.set(queryDfsXmlBranchName(DXB_Internal)).append("::");//HpccInternal:: | ||
|
|
||
| // Initialize/update the shared failed-auth cache with configured values | ||
| { | ||
| CriticalBlock block(CLdapSecManager::s_failedAuthCacheInitLock); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is breaking the encapsulation. There should probably be a single set function that internally locks the critical section. |
||
| CLdapSecManager::s_failedAuthCache.setMaxFailedAttempts(maxFailedAuthAttempts); | ||
| CLdapSecManager::s_failedAuthCache.setCacheTimeoutSeconds(failedAuthCacheTimeout); | ||
| CLdapSecManager::s_failedAuthCache.setMaxAllowedEntries(maxAllowedFailedAuthEntries); | ||
| } | ||
| }; | ||
|
|
||
|
|
||
|
|
@@ -675,6 +706,35 @@ bool CLdapSecManager::authenticate(ISecUser* user) | |
| if(!user) | ||
| return false; | ||
|
|
||
| const char* username = user->getName(); | ||
| if (isEmptyString(username)) | ||
| { | ||
| DBGLOG("CLdapSecManager::authenticate username cannot be empty"); | ||
| return false; | ||
| } | ||
|
|
||
| // Check failed-auth cache before proceeding to reduce LDAP queries on repeated failures | ||
| // (load reduction mechanism; Active Directory's own lockout policy is authoritative) | ||
| if (CLdapSecManager::s_failedAuthCache.isUserLockedOut(username)) | ||
| { | ||
| user->setAuthenticateStatus(AS_INVALID_CREDENTIALS); | ||
| m_permissionsCache->removePermissions(*user); | ||
| m_permissionsCache->removeFromUserCache(*user); | ||
| return false; | ||
| } | ||
|
|
||
| bool rc = doUserAuthenticate(user); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if the authentication failed because the server was unavailable? |
||
| if (rc) | ||
| CLdapSecManager::s_failedAuthCache.removeUser(username); | ||
| else | ||
| CLdapSecManager::s_failedAuthCache.updateUserLockoutStatus(username); | ||
|
|
||
| return rc; | ||
| } | ||
|
|
||
| bool CLdapSecManager::doUserAuthenticate(ISecUser* user) | ||
| { | ||
| const char* username = user->getName(); | ||
| bool isCaching = m_permissionsCache->isCacheEnabled() && !m_usercache_off;//caching enabled? | ||
| bool isUserCached = false; | ||
| Owned<ISecUser> cachedUser = new CLdapSecUser(user->getName(), ""); | ||
|
|
@@ -735,12 +795,9 @@ bool CLdapSecManager::authenticate(ISecUser* user) | |
| } | ||
|
|
||
| if (isUserCached && cachedUser->getAuthenticateStatus() == AS_AUTHENTICATED)//only authenticated users will be cached | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| //User not in cache. Look for session token, or call LDAP to authenticate | ||
|
|
||
| if (0 != user->credentials().getSessionToken())//check for token existence | ||
| { | ||
| user->setAuthenticateStatus(AS_AUTHENTICATED); | ||
|
|
@@ -759,10 +816,10 @@ bool CLdapSecManager::authenticate(ISecUser* user) | |
| pDSM = queryDigitalSignatureManagerInstanceFromEnv(); | ||
| if (pDSM && pDSM->isDigiSignerConfigured()) | ||
| { | ||
| //Set user digital signature | ||
| StringBuffer b64Signature; | ||
| pDSM->digiSign(b64Signature, user->getName()); | ||
| user->credentials().setSignature(b64Signature); | ||
| //Set user digital signature | ||
| StringBuffer b64Signature; | ||
| pDSM->digiSign(b64Signature, user->getName()); | ||
| user->credentials().setSignature(b64Signature); | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similar comment to the other PR - use a much lower threshold than 49 days, otherwise it will never expire.