|
| 1 | +package org.hypertrace.core.kafkastreams.framework.threading; |
| 2 | + |
| 3 | +import static java.util.stream.Collectors.toUnmodifiableSet; |
| 4 | + |
| 5 | +import java.time.Duration; |
| 6 | +import java.util.HashMap; |
| 7 | +import java.util.Map; |
| 8 | +import java.util.Map.Entry; |
| 9 | +import java.util.OptionalInt; |
| 10 | +import java.util.Set; |
| 11 | +import java.util.concurrent.ExecutionException; |
| 12 | +import java.util.concurrent.TimeUnit; |
| 13 | +import java.util.concurrent.TimeoutException; |
| 14 | +import org.apache.kafka.clients.admin.AdminClient; |
| 15 | +import org.apache.kafka.clients.admin.DescribeTopicsResult; |
| 16 | +import org.apache.kafka.clients.admin.TopicDescription; |
| 17 | +import org.apache.kafka.common.KafkaFuture; |
| 18 | +import org.apache.kafka.common.errors.UnknownTopicOrPartitionException; |
| 19 | +import org.apache.kafka.streams.Topology; |
| 20 | +import org.apache.kafka.streams.TopologyDescription; |
| 21 | +import org.apache.kafka.streams.TopologyDescription.Source; |
| 22 | +import org.apache.kafka.streams.TopologyDescription.Subtopology; |
| 23 | +import org.slf4j.Logger; |
| 24 | +import org.slf4j.LoggerFactory; |
| 25 | + |
| 26 | +/** |
| 27 | + * Computes a per-instance {@code num.stream.threads} value from a topology and the partition count |
| 28 | + * of every source topic. |
| 29 | + * |
| 30 | + * <p>For each sub-topology the maximum partition count across its source topics is the number of |
| 31 | + * stream tasks. Summing across sub-topologies and dividing by the replica count yields the threads |
| 32 | + * each instance should run to keep all tasks active without idle threads. |
| 33 | + * |
| 34 | + * <p>Returns {@link OptionalInt#empty()} when the topology contains a regex/pattern subscription |
| 35 | + * ({@link Source#topicPattern()}) — those sub-topologies cannot be enumerated against the broker |
| 36 | + * up-front, so dynamic sizing would silently under-count tasks. The caller falls back to its |
| 37 | + * configured default in that case. |
| 38 | + */ |
| 39 | +public class DynamicStreamThreadsCountCalculator { |
| 40 | + |
| 41 | + private static final long DESCRIBE_TOPICS_TIMEOUT_MILLIS = Duration.ofSeconds(5).toMillis(); |
| 42 | + private static final Logger logger = |
| 43 | + LoggerFactory.getLogger(DynamicStreamThreadsCountCalculator.class); |
| 44 | + |
| 45 | + private static Set<String> sourceTopicsOf(final Subtopology subtopology) { |
| 46 | + return subtopology.nodes().stream() |
| 47 | + .filter(node -> node instanceof Source) |
| 48 | + .map(node -> (Source) node) |
| 49 | + .flatMap(source -> source.topicSet().stream()) |
| 50 | + .collect(toUnmodifiableSet()); |
| 51 | + } |
| 52 | + |
| 53 | + private static boolean hasPatternSource(final Subtopology subtopology) { |
| 54 | + return subtopology.nodes().stream() |
| 55 | + .filter(node -> node instanceof Source) |
| 56 | + .map(node -> (Source) node) |
| 57 | + .anyMatch(source -> source.topicPattern() != null); |
| 58 | + } |
| 59 | + |
| 60 | + public OptionalInt compute( |
| 61 | + final Topology topology, final AdminClient adminClient, final int replicas) { |
| 62 | + if (replicas <= 0) { |
| 63 | + throw new IllegalArgumentException("replicas must be positive, got " + replicas); |
| 64 | + } |
| 65 | + |
| 66 | + final TopologyDescription description = topology.describe(); |
| 67 | + |
| 68 | + // Bail out if any sub-topology subscribes via regex — topicSet() is empty for those, so |
| 69 | + // dynamic sizing would silently under-count tasks. The caller substitutes its fallback. |
| 70 | + final boolean anyPatternSource = |
| 71 | + description.subtopologies().stream() |
| 72 | + .anyMatch(DynamicStreamThreadsCountCalculator::hasPatternSource); |
| 73 | + if (anyPatternSource) { |
| 74 | + logger.warn( |
| 75 | + "Topology contains a regex/pattern source; dynamic num.stream.threads is not supported. " |
| 76 | + + "Caller will fall back to its configured default."); |
| 77 | + return OptionalInt.empty(); |
| 78 | + } |
| 79 | + |
| 80 | + final Set<String> sourceTopics = |
| 81 | + description.subtopologies().stream() |
| 82 | + .flatMap(subtopology -> sourceTopicsOf(subtopology).stream()) |
| 83 | + .collect(toUnmodifiableSet()); |
| 84 | + |
| 85 | + final Map<String, Integer> partitionsByTopic = describePartitions(adminClient, sourceTopics); |
| 86 | + |
| 87 | + int totalTasks = 0; |
| 88 | + int subtopologyCount = 0; |
| 89 | + for (final Subtopology subtopology : description.subtopologies()) { |
| 90 | + subtopologyCount++; |
| 91 | + final Set<String> subtopologyTopics = sourceTopicsOf(subtopology); |
| 92 | + |
| 93 | + final int tasksForSubtopology = |
| 94 | + subtopologyTopics.stream() |
| 95 | + .mapToInt(topic -> partitionsByTopic.getOrDefault(topic, 0)) |
| 96 | + .max() |
| 97 | + .orElse(0); |
| 98 | + |
| 99 | + if (tasksForSubtopology == 0) { |
| 100 | + logger.warn( |
| 101 | + "Sub-topology has no resolvable partitions; topics={}. Pod restart will be needed once topics exist.", |
| 102 | + subtopologyTopics); |
| 103 | + } |
| 104 | + totalTasks += tasksForSubtopology; |
| 105 | + } |
| 106 | + |
| 107 | + if (totalTasks == 0) { |
| 108 | + logger.warn( |
| 109 | + "No resolvable partitions across {} sub-topologies; skipping dynamic num.stream.threads.", |
| 110 | + subtopologyCount); |
| 111 | + return OptionalInt.empty(); |
| 112 | + } |
| 113 | + |
| 114 | + final int threads = (int) Math.ceil((double) totalTasks / replicas); |
| 115 | + logger.info( |
| 116 | + "Dynamic num.stream.threads: totalTasks={} across {} sub-topologies, replicas={}, computed={}", |
| 117 | + totalTasks, |
| 118 | + subtopologyCount, |
| 119 | + replicas, |
| 120 | + threads); |
| 121 | + return OptionalInt.of(threads); |
| 122 | + } |
| 123 | + |
| 124 | + // Single-loop implementation: AdminClient.describeTopics() already fires all RPCs concurrently |
| 125 | + // before returning futures, so iteration here only consumes a shared deadline (now+timeout) — |
| 126 | + // total wall-clock is capped at DESCRIBE_TOPICS_TIMEOUT_MILLIS regardless of topic count. |
| 127 | + private Map<String, Integer> describePartitions( |
| 128 | + final AdminClient adminClient, final Set<String> topics) { |
| 129 | + if (topics.isEmpty()) { |
| 130 | + return Map.of(); |
| 131 | + } |
| 132 | + final DescribeTopicsResult result = adminClient.describeTopics(topics); |
| 133 | + final Map<String, KafkaFuture<TopicDescription>> futures = result.topicNameValues(); |
| 134 | + final long deadlineMillis = System.currentTimeMillis() + DESCRIBE_TOPICS_TIMEOUT_MILLIS; |
| 135 | + final Map<String, Integer> partitions = new HashMap<>(); |
| 136 | + |
| 137 | + for (final Entry<String, KafkaFuture<TopicDescription>> entry : futures.entrySet()) { |
| 138 | + final long remainingMillis = deadlineMillis - System.currentTimeMillis(); |
| 139 | + if (remainingMillis <= 0) { |
| 140 | + throw new RuntimeException( |
| 141 | + "Timed out describing topics after " + DESCRIBE_TOPICS_TIMEOUT_MILLIS + "ms"); |
| 142 | + } |
| 143 | + try { |
| 144 | + partitions.put( |
| 145 | + entry.getKey(), |
| 146 | + entry.getValue().get(remainingMillis, TimeUnit.MILLISECONDS).partitions().size()); |
| 147 | + } catch (final TimeoutException timeoutException) { |
| 148 | + throw new RuntimeException( |
| 149 | + "Timed out describing topic " + entry.getKey(), timeoutException); |
| 150 | + } catch (final InterruptedException interruptedException) { |
| 151 | + Thread.currentThread().interrupt(); |
| 152 | + throw new RuntimeException( |
| 153 | + "Interrupted while describing topic " + entry.getKey(), interruptedException); |
| 154 | + } catch (final ExecutionException executionException) { |
| 155 | + if (executionException.getCause() instanceof UnknownTopicOrPartitionException) { |
| 156 | + logger.warn( |
| 157 | + "Topic absent on broker: {}. Treating as 0 partitions; restart needed once created.", |
| 158 | + entry.getKey()); |
| 159 | + partitions.put(entry.getKey(), 0); |
| 160 | + } else { |
| 161 | + throw new RuntimeException( |
| 162 | + "Failed to describe topic " + entry.getKey(), executionException); |
| 163 | + } |
| 164 | + } |
| 165 | + } |
| 166 | + return Map.copyOf(partitions); |
| 167 | + } |
| 168 | +} |
0 commit comments