@@ -2,15 +2,14 @@ package ch.cern.sparkmeasure
22
33import org .apache .kafka .clients .producer .{KafkaProducer , Producer , ProducerRecord }
44import org .apache .kafka .common .serialization .ByteArraySerializer
5- import org .apache .spark .{ SparkConf , TaskFailedReason , TaskKilled }
5+ import org .apache .spark .SparkConf
66import org .apache .spark .scheduler ._
77import org .apache .spark .sql .SparkSession
88import org .apache .spark .sql .execution .ui .{SparkListenerSQLExecutionEnd , SparkListenerSQLExecutionStart }
99import org .slf4j .{Logger , LoggerFactory }
1010
1111import java .nio .charset .StandardCharsets
1212import java .util .Properties
13- import scala .collection .mutable
1413import scala .util .Try
1514
1615/**
@@ -30,8 +29,6 @@ import scala.util.Try
3029 * example: --conf spark.sparkmeasure.kafkaTopic=sparkmeasure-stageinfo
3130 * spark.sparkmeasure.kafka.* = Other kafka properties
3231 * example: --conf spark.sparkmeasure.kafka.ssl.keystore.location=/var/private/ssl/kafka.server.keystore.jks
33- * spark.sparkmeasure.appLabels.* = Custom labels to include in application start and end events
34- * example: --conf spark.sparkmeasure.appLabels.environment=production
3532 *
3633 * This code depends on "kafka clients", you may need to add the dependency:
3734 * --packages org.apache.kafka:kafka-clients:3.2.1
@@ -52,29 +49,6 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
5249 case _ => " noAppId"
5350 }
5451
55- // Application tracking
56- private var appName : String = " noAppName"
57- private var startTime : Long = 0L
58-
59- // Executor tracking
60- private var executorsFailed : Int = 0
61- private var executorsKilled : Int = 0
62-
63- // Job tracking
64- private var totalJobsCompleted : Int = 0
65- private var succeededJobsCount : Int = 0
66- private var failedJobsCount : Int = 0
67-
68- // Stage tracking
69- private var totalStagesCompleted : Int = 0
70- private var succeededStagesCount : Int = 0
71- private var failedStagesCount : Int = 0
72-
73- // Task tracking
74- private var totalTaskCount : Int = 0
75- private var numTaskFailed : Int = 0
76- private var numTaskKilled : Int = 0
77-
7852 override def onExecutorAdded (executorAdded : SparkListenerExecutorAdded ): Unit = {
7953 val executorInfo = executorAdded.executorInfo
8054 val epochMillis = System .currentTimeMillis()
@@ -90,17 +64,6 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
9064 report(metrics)
9165 }
9266
93- override def onExecutorRemoved (executorRemoved : SparkListenerExecutorRemoved ): Unit = {
94- if (executorRemoved != null && executorRemoved.reason != null ) {
95- executorRemoved.reason match {
96- case reason if reason.toLowerCase.contains(" kill" ) =>
97- executorsKilled += 1
98- case _ =>
99- executorsFailed += 1
100- }
101- }
102- }
103-
10467 override def onStageSubmitted (stageSubmitted : SparkListenerStageSubmitted ): Unit = {
10568 val submissionTime = stageSubmitted.stageInfo.submissionTime.getOrElse(0L )
10669 val attemptNumber = stageSubmitted.stageInfo.attemptNumber()
@@ -118,6 +81,7 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
11881 report(metrics)
11982 }
12083
84+
12185 override def onStageCompleted (stageCompleted : SparkListenerStageCompleted ): Unit = {
12286 val stageId = stageCompleted.stageInfo.stageId.toString
12387 val submissionTime = stageCompleted.stageInfo.submissionTime.getOrElse(0L )
@@ -176,17 +140,6 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
176140 )
177141
178142 report(stageTaskMetrics)
179-
180- if (stageCompleted != null && stageCompleted.stageInfo != null ) {
181- val stageInfo = stageCompleted.stageInfo
182- totalStagesCompleted += 1
183-
184- if (stageInfo.failureReason.isDefined) {
185- failedStagesCount += 1
186- } else {
187- succeededStagesCount += 1
188- }
189- }
190143 }
191144
192145 override def onOtherEvent (event : SparkListenerEvent ): Unit = {
@@ -250,69 +203,13 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
250203 " epochMillis" -> epochMillis
251204 )
252205 report(jobEndMetrics)
253-
254- if (jobEnd != null ) {
255- totalJobsCompleted += 1
256-
257- jobEnd.jobResult match {
258- case org.apache.spark.scheduler.JobSucceeded =>
259- succeededJobsCount += 1
260- case _ =>
261- failedJobsCount += 1
262- }
263- }
264206 }
265207
266208 override def onApplicationStart (applicationStart : SparkListenerApplicationStart ): Unit = {
267209 appId = applicationStart.appId.getOrElse(" noAppId" )
268- appName = applicationStart.appName
269- startTime = applicationStart.time
270- val appLabels = extractAppLabels(conf)
271- val epochMillis = System .currentTimeMillis()
272-
273- val appStartMetrics = Map [String , Any ](
274- " name" -> " applications_started" ,
275- " appId" -> appId,
276- " appName" -> appName,
277- " startTime" -> startTime,
278- " epochMillis" -> epochMillis
279- ) ++ appLabels
280-
281- report(appStartMetrics)
282210 }
283211
284212 override def onApplicationEnd (applicationEnd : SparkListenerApplicationEnd ): Unit = {
285- val completionTime = applicationEnd.time
286- val safeEndTime = if (completionTime > 0 ) completionTime else System .currentTimeMillis()
287- val duration = if (startTime > 0 ) safeEndTime - startTime else 0L
288- val epochMillis = System .currentTimeMillis()
289- val configurations = conf.getAll.toMap
290- val appLabels = extractAppLabels(conf)
291-
292- val appEndMetrics = Map [String , Any ](
293- " name" -> " applications_ended" ,
294- " appId" -> appId,
295- " appName" -> appName,
296- " startTime" -> startTime,
297- " completionTime" -> completionTime,
298- " duration" -> duration,
299- " executorsFailed" -> executorsFailed,
300- " executorsKilled" -> executorsKilled,
301- " totalJobsCompleted" -> totalJobsCompleted,
302- " succeededJobsCount" -> succeededJobsCount,
303- " failedJobsCount" -> failedJobsCount,
304- " numStagesCompleted" -> totalStagesCompleted,
305- " numSucceededStages" -> succeededStagesCount,
306- " numFailedStages" -> failedStagesCount,
307- " totalTaskCount" -> totalTaskCount,
308- " numTaskFailed" -> numTaskFailed,
309- " numTaskKilled" -> numTaskKilled,
310- " epochMillis" -> epochMillis,
311- " configurations" -> configurations
312- ) ++ appLabels
313-
314- report(appEndMetrics)
315-
316213 logger.info(s " Spark application ended, timestamp = ${applicationEnd.time}, closing Kafka connection. " )
317214 synchronized (
318215 if (Option (producer).isDefined) {
@@ -326,22 +223,6 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
326223 )
327224 }
328225
329- override def onTaskEnd (taskEnd : SparkListenerTaskEnd ): Unit = {
330- if (taskEnd != null ) {
331- totalTaskCount += 1
332-
333- if (taskEnd.reason != null ) {
334- taskEnd.reason match {
335- case _ : TaskKilled =>
336- numTaskKilled += 1
337- case _ : TaskFailedReason =>
338- numTaskFailed += 1
339- case _ =>
340- }
341- }
342- }
343- }
344-
345226 protected def report [T <: Any ](metrics : Map [String , T ]): Unit = {
346227 val result : Unit = Try {
347228 ensureProducer()
@@ -375,14 +256,6 @@ class KafkaSink(conf: SparkConf) extends SparkListener {
375256 )
376257 }
377258
378- private def extractAppLabels (conf : SparkConf ): Map [String , String ] = {
379- Try {
380- conf.getAll
381- .filter { case (key, _) => key.startsWith(" spark.sparkmeasure.appLabels." ) }
382- .map { case (key, value) => (key.stripPrefix(" spark.sparkmeasure." ), value) }
383- .toMap
384- }.getOrElse(Map .empty[String , String ])
385- }
386259}
387260
388261/**
@@ -409,8 +282,6 @@ class KafkaSinkExtended(conf: SparkConf) extends KafkaSink(conf) {
409282 }
410283
411284 override def onTaskEnd (taskEnd : SparkListenerTaskEnd ): Unit = {
412- super .onTaskEnd(taskEnd)
413-
414285 val taskInfo = taskEnd.taskInfo
415286 val taskmetrics = taskEnd.taskMetrics
416287 val epochMillis = System .currentTimeMillis()
0 commit comments