-
Notifications
You must be signed in to change notification settings - Fork 4
Auto-size num.stream.threads from topology and replica count #119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
915c917
6299517
c305f24
736f37d
f8ae1b0
1c5c6ce
5522fba
c5a7bd0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| package org.hypertrace.core.kafkastreams.framework.threading; | ||
|
|
||
| import static java.util.stream.Collectors.toUnmodifiableSet; | ||
|
|
||
| import java.time.Duration; | ||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
| import java.util.Map.Entry; | ||
| import java.util.OptionalInt; | ||
| import java.util.Set; | ||
| import java.util.concurrent.ExecutionException; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.TimeoutException; | ||
| import org.apache.kafka.clients.admin.AdminClient; | ||
| import org.apache.kafka.clients.admin.DescribeTopicsResult; | ||
| import org.apache.kafka.clients.admin.TopicDescription; | ||
| import org.apache.kafka.common.KafkaFuture; | ||
| import org.apache.kafka.common.errors.UnknownTopicOrPartitionException; | ||
| import org.apache.kafka.streams.Topology; | ||
| import org.apache.kafka.streams.TopologyDescription; | ||
| import org.apache.kafka.streams.TopologyDescription.Source; | ||
| import org.apache.kafka.streams.TopologyDescription.Subtopology; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| /** | ||
| * Computes a per-instance {@code num.stream.threads} value from a topology and the partition count | ||
| * of every source topic. | ||
| * | ||
| * <p>For each sub-topology the maximum partition count across its source topics is the number of | ||
| * stream tasks. Summing across sub-topologies and dividing by the replica count yields the threads | ||
| * each instance should run to keep all tasks active without idle threads. | ||
| * | ||
| * <p>Returns {@link OptionalInt#empty()} when the topology contains a regex/pattern subscription | ||
| * ({@link Source#topicPattern()}) — those sub-topologies cannot be enumerated against the broker | ||
| * up-front, so dynamic sizing would silently under-count tasks. The caller falls back to its | ||
| * configured default in that case. | ||
| */ | ||
| public class DynamicStreamThreadsCountCalculator { | ||
|
|
||
| private static final long DESCRIBE_TOPICS_TIMEOUT_MILLIS = Duration.ofSeconds(5).toMillis(); | ||
| private static final Logger logger = | ||
| LoggerFactory.getLogger(DynamicStreamThreadsCountCalculator.class); | ||
|
|
||
| private static Set<String> sourceTopicsOf(final Subtopology subtopology) { | ||
| return subtopology.nodes().stream() | ||
| .filter(node -> node instanceof Source) | ||
| .map(node -> (Source) node) | ||
| .flatMap(source -> source.topicSet().stream()) | ||
| .collect(toUnmodifiableSet()); | ||
| } | ||
|
|
||
| private static boolean hasPatternSource(final Subtopology subtopology) { | ||
| return subtopology.nodes().stream() | ||
| .filter(node -> node instanceof Source) | ||
| .map(node -> (Source) node) | ||
| .anyMatch(source -> source.topicPattern() != null); | ||
| } | ||
|
|
||
| public OptionalInt compute( | ||
| final Topology topology, final AdminClient adminClient, final int replicas) { | ||
| if (replicas <= 0) { | ||
| throw new IllegalArgumentException("replicas must be positive, got " + replicas); | ||
| } | ||
|
|
||
| final TopologyDescription description = topology.describe(); | ||
|
|
||
| // Bail out if any sub-topology subscribes via regex — topicSet() is empty for those, so | ||
| // dynamic sizing would silently under-count tasks. The caller substitutes its fallback. | ||
| final boolean anyPatternSource = | ||
| description.subtopologies().stream() | ||
| .anyMatch(DynamicStreamThreadsCountCalculator::hasPatternSource); | ||
| if (anyPatternSource) { | ||
| logger.warn( | ||
| "Topology contains a regex/pattern source; dynamic num.stream.threads is not supported. " | ||
| + "Caller will fall back to its configured default."); | ||
| return OptionalInt.empty(); | ||
| } | ||
|
|
||
| final Set<String> sourceTopics = | ||
| description.subtopologies().stream() | ||
| .flatMap(subtopology -> sourceTopicsOf(subtopology).stream()) | ||
| .collect(toUnmodifiableSet()); | ||
|
|
||
| final Map<String, Integer> partitionsByTopic = describePartitions(adminClient, sourceTopics); | ||
|
|
||
| int totalTasks = 0; | ||
| int subtopologyCount = 0; | ||
| for (final Subtopology subtopology : description.subtopologies()) { | ||
| subtopologyCount++; | ||
| final Set<String> subtopologyTopics = sourceTopicsOf(subtopology); | ||
|
|
||
| final int tasksForSubtopology = | ||
| subtopologyTopics.stream() | ||
| .mapToInt(topic -> partitionsByTopic.getOrDefault(topic, 0)) | ||
| .max() | ||
| .orElse(0); | ||
|
|
||
| if (tasksForSubtopology == 0) { | ||
| logger.warn( | ||
| "Sub-topology has no resolvable partitions; topics={}. Pod restart will be needed once topics exist.", | ||
| subtopologyTopics); | ||
| } | ||
| totalTasks += tasksForSubtopology; | ||
| } | ||
|
|
||
| if (totalTasks == 0) { | ||
| logger.warn( | ||
| "No resolvable partitions across {} sub-topologies; skipping dynamic num.stream.threads.", | ||
| subtopologyCount); | ||
| return OptionalInt.empty(); | ||
| } | ||
|
|
||
| final int threads = (int) Math.ceil((double) totalTasks / replicas); | ||
| logger.info( | ||
| "Dynamic num.stream.threads: totalTasks={} across {} sub-topologies, replicas={}, computed={}", | ||
| totalTasks, | ||
| subtopologyCount, | ||
| replicas, | ||
| threads); | ||
| return OptionalInt.of(threads); | ||
| } | ||
|
|
||
| // Single-loop implementation: AdminClient.describeTopics() already fires all RPCs concurrently | ||
| // before returning futures, so iteration here only consumes a shared deadline (now+timeout) — | ||
| // total wall-clock is capped at DESCRIBE_TOPICS_TIMEOUT_MILLIS regardless of topic count. | ||
| private Map<String, Integer> describePartitions( | ||
| final AdminClient adminClient, final Set<String> topics) { | ||
| if (topics.isEmpty()) { | ||
| return Map.of(); | ||
| } | ||
| final DescribeTopicsResult result = adminClient.describeTopics(topics); | ||
| final Map<String, KafkaFuture<TopicDescription>> futures = result.topicNameValues(); | ||
| final long deadlineMillis = System.currentTimeMillis() + DESCRIBE_TOPICS_TIMEOUT_MILLIS; | ||
| final Map<String, Integer> partitions = new HashMap<>(); | ||
|
|
||
| for (final Entry<String, KafkaFuture<TopicDescription>> entry : futures.entrySet()) { | ||
| final long remainingMillis = deadlineMillis - System.currentTimeMillis(); | ||
| if (remainingMillis <= 0) { | ||
| throw new RuntimeException( | ||
| "Timed out describing topics after " + DESCRIBE_TOPICS_TIMEOUT_MILLIS + "ms"); | ||
| } | ||
| try { | ||
| partitions.put( | ||
| entry.getKey(), | ||
| entry.getValue().get(remainingMillis, TimeUnit.MILLISECONDS).partitions().size()); | ||
| } catch (final TimeoutException timeoutException) { | ||
| throw new RuntimeException( | ||
| "Timed out describing topic " + entry.getKey(), timeoutException); | ||
| } catch (final InterruptedException interruptedException) { | ||
| Thread.currentThread().interrupt(); | ||
| throw new RuntimeException( | ||
| "Interrupted while describing topic " + entry.getKey(), interruptedException); | ||
| } catch (final ExecutionException executionException) { | ||
| if (executionException.getCause() instanceof UnknownTopicOrPartitionException) { | ||
| logger.warn( | ||
| "Topic absent on broker: {}. Treating as 0 partitions; restart needed once created.", | ||
| entry.getKey()); | ||
| partitions.put(entry.getKey(), 0); | ||
| } else { | ||
| throw new RuntimeException( | ||
| "Failed to describe topic " + entry.getKey(), executionException); | ||
| } | ||
| } | ||
| } | ||
| return Map.copyOf(partitions); | ||
| } | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor suggestion — the two-phase approach ( Since private Map<String, Integer> describePartitions(AdminClient adminClient, Set<String> topics) {
if (topics.isEmpty()) {
return Map.of();
}
Map<String, KafkaFuture<TopicDescription>> futures =
adminClient.describeTopics(topics).topicNameValues();
long deadline = System.currentTimeMillis() + DESCRIBE_TOPICS_TIMEOUT_MILLIS;
Map<String, Integer> partitions = new HashMap<>();
for (Entry<String, KafkaFuture<TopicDescription>> entry : futures.entrySet()) {
long remaining = deadline - System.currentTimeMillis();
if (remaining <= 0) {
throw new RuntimeException("Timed out describing topics");
}
try {
partitions.put(
entry.getKey(),
entry.getValue().get(remaining, TimeUnit.MILLISECONDS).partitions().size());
} catch (TimeoutException e) {
throw new RuntimeException("Timed out describing topic " + entry.getKey(), e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted describing topics", e);
} catch (ExecutionException e) {
if (e.getCause() instanceof UnknownTopicOrPartitionException) {
logger.warn("Topic absent on broker: {}. Treating as 0 partitions.", entry.getKey());
partitions.put(entry.getKey(), 0);
} else {
throw new RuntimeException("Failed to describe topic " + entry.getKey(), e);
}
}
}
return Map.copyOf(partitions);
}This keeps exception handling in one place, removes the implicit "must call A before B" contract, and eliminates the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Verified equivalence and adopted ( |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious whether topic pattern subscriptions are in scope here —
source.topicSet()returns empty when topics are subscribed via regex, which would make those sub-topologies contribute 0 partitions. Is this a known limitation or something worth handling?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch —
Source.topicSet()is empty for regex subscriptions andtopicPattern()would be the way to handle them. None of the current Hypertrace Kafka Streams apps subscribe via regex, so I've documented this as a known limitation in the class javadoc rather than handling it now (would need an extraAdminClient.listTopics()+ Pattern match round-trip). Apps that adopt regex shouldn't opt intoDYNAMICuntil that's added (736f37d).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If any sub-topology uses a pattern subscription, the dynamic calculation is fundamentally incomplete — it will silently under-count tasks and under-provision threads. Rather than
documenting this and hoping apps don't hit it, the resolver should detect it and return empty:
And the caller simply doesn't override:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch — bumping this from a doc note to detection is the right call. Currently a regex-only sub-topology silently contributes 0 tasks, and an all-regex topology lands on
totalTasks == 0 → 1thread (severe under-provisioning, no signal). Switchedcompute()to returnOptionalInt, added the pattern pre-check, and the call site at the framework layer translates empty →FALLBACK_NUM_STREAM_THREADSvia a single.orElse(...)(so all "cannot resolve" paths collapse there). Test added covering the regex-source path end-to-end (1c5c6ce).