Prepare release 0.26

LucaCanali · LucaCanali · commit 613e20e16b43 · 2025-08-29T09:45:33.000+02:00
diff --git a/README.md b/README.md
@@ -95,7 +95,7 @@ Main author and contact: Luca.Canali@cern.ch
   # Python CLI
   # pip install pyspark
   pip install sparkmeasure
-  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
   # Import sparkMeasure
   from sparkmeasure import StageMetrics
@@ -120,7 +120,7 @@ Main author and contact: Luca.Canali@cern.ch
   ```
 #### Scala CLI
   ```
-  spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+  spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
   val stageMetrics = ch.cern.sparkmeasure.StageMetrics(spark)
   stageMetrics.runAndMeasure(spark.sql("select count(*) from range(1000) cross join range(1000) cross join range(1000)").show())
@@ -203,7 +203,7 @@ Notes:
 This is similar but slightly different from the example above as it collects metrics at the Task-level rather than Stage-level
   ```
   # Scala CLI
-  spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.12:0.25
+  spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.12:0.26
 
   val taskMetrics = ch.cern.sparkmeasure.TaskMetrics(spark)
   taskMetrics.runAndMeasure(spark.sql("select count(*) from range(1000) cross join range(1000) cross join range(1000)").show())
@@ -212,7 +212,7 @@ This is similar but slightly different from the example above as it collects met
   # Python CLI
   # pip install pyspark
   pip install sparkmeasure
-  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
   from sparkmeasure import TaskMetrics
   taskmetrics = TaskMetrics(spark)
@@ -226,8 +226,8 @@ This is similar but slightly different from the example above as it collects met
 
 | Spark Version  | Recommended SparkMeasure Version | Scala Version       |
 | -------------- |----------------------------------|---------------------|
-| Spark 4.x      | 0.25 (latest)                    | Scala 2.13          |
-| Spark 3.x      | 0.25 (latest)                    | Scala 2.12 and 2.13 |
+| Spark 4.x      | 0.26 (latest)                    | Scala 2.13          |
+| Spark 3.x      | 0.26 (latest)                    | Scala 2.12 and 2.13 |
 | Spark 2.4, 2.3 | 0.19                             | Scala 2.11          |
 | Spark 2.2, 2.1 | 0.16                             | Scala 2.11          |
 
@@ -241,7 +241,7 @@ To get SparkMeasure, choose one of the following options:
 
 2. **Specific Versions:**
 
-    * Download JAR files from the [sparkMeasure release notes](https://github.com/LucaCanali/sparkMeasure/releases/tag/v0.25).
+    * Download JAR files from the [sparkMeasure release notes](https://github.com/LucaCanali/sparkMeasure/releases/tag/v0.26).
 
 3. **Latest Development Builds:**
 
@@ -255,21 +255,21 @@ To get SparkMeasure, choose one of the following options:
 
 #### Spark 4 with Scala 2.13
 
-* **Scala:** `spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25`
+* **Scala:** `spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26`
 * **Python:**
 
   ```bash
-  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
   pip install sparkmeasure
   ```
 
 #### Spark 3 with Scala 2.12
 
-* **Scala:** `spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.12:0.25`
+* **Scala:** `spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.12:0.26`
 * **Python:**
 
   ```bash
-  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.12:0.25
+  pyspark --packages ch.cern.sparkmeasure:spark-measure_2.12:0.26
   pip install sparkmeasure
   ```
 ### Including sparkMeasure in your Spark environment
@@ -279,14 +279,14 @@ Choose your preferred method:
 * Use the `--packages` option:
 
   ```bash
-  --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+  --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
   ```
 * Directly reference the JAR file:
 
   ```bash
-  --jars /path/to/spark-measure_2.13-0.25.jar
-  --jars https://github.com/LucaCanali/sparkMeasure/releases/download/v0.25/spark-measure_2.13-0.25.jar
-  --conf spark.driver.extraClassPath=/path/to/spark-measure_2.13-0.25.jar
+  --jars /path/to/spark-measure_2.13-0.26.jar
+  --jars https://github.com/LucaCanali/sparkMeasure/releases/download/v0.26/spark-measure_2.13-0.26.jar
+  --conf spark.driver.extraClassPath=/path/to/spark-measure_2.13-0.26.jar
   ```
 
 
diff --git a/build.sbt b/build.sbt
@@ -4,8 +4,8 @@
 
 name := "spark-measure"
 
-version := "0.26-SNAPSHOT"
-isSnapshot := true
+version := "0.26"
+isSnapshot := false
 
 scalaVersion       := "2.12.18"
 crossScalaVersions := Seq("2.12.18", "2.13.8")
diff --git a/docs/Flight_recorder_mode_FileSink.md b/docs/Flight_recorder_mode_FileSink.md
@@ -12,7 +12,7 @@ Metrics can also be printed to stdout.
 ## Recording metrics using the Flight Recorder mode with Stage-level granularity  
 To record metrics at the stage execution level granularity add these configurations to spark-submit: 
    ```
-   --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+   --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
    --conf spark.extraListeners=ch.cern.sparkmeasure.FlightRecorderStageMetrics
    ```
 
@@ -25,7 +25,7 @@ The usage is almost the same as for the stage metrics mode described above, just
 The configuration parameters applicable to Flight recorder mode for Task granularity are:
 
    ```
-   --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+   --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
    --conf spark.extraListeners=ch.cern.sparkmeasure.FlightRecorderTaskMetrics
    ```
 
@@ -51,7 +51,7 @@ A Python example
  - This runs the pi.py example script 
  - collects and saves the metrics to `/tmp/stageMetrics_flightRecorder` in json format:
 ```
-bin/spark-submit --master local[*] --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 \
+bin/spark-submit --master local[*] --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 \
 --conf spark.extraListeners=ch.cern.sparkmeasure.FlightRecorderStageMetrics \
 examples/src/main/python/pi.py
 
@@ -63,12 +63,12 @@ A Scala example
 - same example as above, in addition use a custom output filename
 - print metrics also to stdout
 ```
-bin/spark-submit --master local[*] --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 \
+bin/spark-submit --master local[*] --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 \
 --class org.apache.spark.examples.SparkPi \
 --conf spark.extraListeners=ch.cern.sparkmeasure.FlightRecorderStageMetrics \
 --conf spark.sparkmeasure.printToStdout=true \
 --conf spark.sparkmeasure.outputFilename="/tmp/myoutput_$(date +%s).json" \
-examples/jars/spark-examples_2.12-3.3.1.jar 10
+examples/jars/spark-examples_2.13-4.0.0.jar 10
 
 # The metrics are printed on stdout and also saved to a file
 # Find and read the flight recorder output file:
@@ -80,7 +80,7 @@ This example collected metrics with Task granularity.
 (note: source the Hadoop environment before running this)
 ```
 bin/spark-submit --master yarn --deploy-mode cluster \
---packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 \
+--packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 \
 --conf spark.extraListeners=ch.cern.sparkmeasure.FlightRecorderTaskMetrics \
 --conf spark.sparkmeasure.outputFormat=json_to_hadoop \
 --conf spark.sparkmeasure.outputFilename="hdfs://myclustername/user/luca/test/myoutput_$(date +%s).json" \
@@ -96,7 +96,7 @@ Example, use Spark 4, Kubernetes, Scala 2.13 and write output to S3:
 bin/spark-submit --master k8s://https://XXX.XXX.XXX.XXX --deploy-mode client --conf spark.executor.instances=3 \
 --conf spark.executor.cores=2 --executor-memory 6g --driver-memory 8g \
 --conf spark.kubernetes.container.image=apache/spark \
---packages org.apache.hadoop:hadoop-aws:3.4.1,ch.cern.sparkmeasure:spark-measure_2.13:0.25 \
+--packages org.apache.hadoop:hadoop-aws:3.4.1,ch.cern.sparkmeasure:spark-measure_2.13:0.26 \
 --conf spark.hadoop.fs.s3a.secret.key="YYY..." \
 --conf spark.hadoop.fs.s3a.access.key="ZZZ..." \
 --conf spark.hadoop.fs.s3a.endpoint="https://s3.cern.ch" \
@@ -105,7 +105,7 @@ bin/spark-submit --master k8s://https://XXX.XXX.XXX.XXX --deploy-mode client --c
 --conf spark.sparkmeasure.outputFormat=json_to_hadoop \
 --conf spark.sparkmeasure.outputFilename="s3a://test/myoutput_$(date +%s).json" \
 --class org.apache.spark.examples.SparkPi \
-examples/jars/spark-examples_2.13-4.4.0.jar 10
+examples/jars/spark-examples_2.13-4.0.0.jar 10
 ```
 
 
@@ -115,7 +115,7 @@ To post-process the saved metrics you will need to deserialize objects saved by
 This is an example of how to do that using the supplied helper object sparkmeasure.Utils
 
 ```
-bin/spark-shell  --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+bin/spark-shell  --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
 val myMetrics = ch.cern.sparkmeasure.IOUtils.readSerializedStageMetricsJSON("/tmp/stageMetrics_flightRecorder")
 // use ch.cern.sparkmeasure.IOUtils.readSerializedStageMetrics("/tmp/stageMetrics.serialized") for java serialization
diff --git a/docs/Flight_recorder_mode_InfluxDBSink.md b/docs/Flight_recorder_mode_InfluxDBSink.md
@@ -87,7 +87,7 @@ bin/spark-shell \
   --conf spark.sparkmeasure.influxdbURL="http://localhost:8086" \
   --conf spark.extraListeners=ch.cern.sparkmeasure.InfluxDBSink,ch.cern.sparkmeasure.InfluxDBSinkExtended \
   --conf spark.sparkmeasure.influxdbStagemetrics=true
-  --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+  --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
 // run a Spark job, this will produce metrics  
 spark.sql("select count(*) from range(1000) cross join range(1000) cross join range(1000)").show
diff --git a/docs/Flight_recorder_mode_KafkaSink.md b/docs/Flight_recorder_mode_KafkaSink.md
@@ -55,7 +55,7 @@ in spark-submit as in:
 - Start Apache Kafka. 
   - This example uses Kafka configured as in the getting started instructions at
     [Apache Kafka quickstart](https://kafka.apache.org/quickstart)
-    - for example run from Docker image: `docker run -p 9092:9092 apache/kafka:4.0.0`
+    - for example run from Docker image: `docker run -p 9092:9092 apache/kafka:latest`
 
 - Start Spark with sparkMeasure and attach the KafkaSink Listener
    
@@ -67,7 +67,7 @@ bin/spark-shell \
 --conf spark.extraListeners=ch.cern.sparkmeasure.KafkaSink \
 --conf spark.sparkmeasure.kafkaBroker=localhost:9092 \
 --conf spark.sparkmeasure.kafkaTopic=metrics
---packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+--packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 ```
 
 - Look at the metrics being written into Kafka:
diff --git a/docs/Flight_recorder_mode_PrometheusPushgatewaySink.md b/docs/Flight_recorder_mode_PrometheusPushgatewaySink.md
@@ -60,7 +60,7 @@ Examples:
 bin/spark-shell \
 --conf spark.extraListeners=ch.cern.sparkmeasure.PushGatewaySink \
 --conf spark.sparkmeasure.pushgateway=localhost:9091 \
---packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+--packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 ```
 
 - Look at the metrics being written to the Pushgateway
diff --git a/docs/Instrument_Python_code.md b/docs/Instrument_Python_code.md
@@ -11,7 +11,7 @@ You can find an example of how to instrument a Scala application running Apache
  
 How to run the example:
  ```
-bin/spark-submit --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 <path_to_examples>/test_sparkmeasure_python.py
+bin/spark-submit --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 <path_to_examples>/test_sparkmeasure_python.py
  ```
 
  Some relevant snippet of code are:
@@ -54,10 +54,10 @@ The details are discussed in the [examples for Python shell and notebook](https:
 
 - This is how to run sparkMeasure using a packaged version in Maven Central
   ```
-  bin/spark-submit --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 your_python_code.py
+  bin/spark-submit --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 your_python_code.py
 
   // alternative: just download and use the jar (it is only needed in the driver) as in:
-  bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.25.jar ...
+  bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.26.jar ...
   ```
 
 ### Download and build sparkMeasure (optional)
@@ -73,8 +73,8 @@ The details are discussed in the [examples for Python shell and notebook](https:
      pip install .
   
      # Run as in one of these examples:
-     bin/spark-submit --jars path>/spark-measure_2.13-0.26-SNAPSHOT.jar ...
+     bin/spark-submit --jars path>/spark-measure_2.13-0.27-SNAPSHOT.jar ...
      
      # alternative, set classpath for the driver (sparkmeasure code runs only in the driver)
-     bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.26-SNAPSHOT.jar ...
+     bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.27-SNAPSHOT.jar ...
      ```
diff --git a/docs/Instrument_Scala_code.md b/docs/Instrument_Scala_code.md
@@ -13,7 +13,7 @@ How to run the example:
 # build the example jar
 sbt package
 
-bin/spark-submit --master local[*] --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 --class ch.cern.testSparkMeasure.testSparkMeasure <path_to_the_example_jar>/testsparkmeasurescala_2.13-0.1.jar
+bin/spark-submit --master local[*] --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 --class ch.cern.testSparkMeasure.testSparkMeasure <path_to_the_example_jar>/testsparkmeasurescala_2.13-0.1.jar
  ```
 
 ### Collect and save Stage Metrics
@@ -72,10 +72,10 @@ You have the option to export aggregated stage metrics and/or task metrics to:
 
 - This is how to run sparkMeasure using a packaged version in Maven Central
     ```
-    bin/spark-submit --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+    bin/spark-submit --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
     // or just download and use the jar (it is only needed in the driver) as in:
-    bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.25.jar ...
+    bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.26.jar ...
    ```
 - The alternative, see paragraph above, is to build a jar from master (See below).
 
@@ -89,8 +89,8 @@ You have the option to export aggregated stage metrics and/or task metrics to:
    ls -l target/scala-2.12/spark-measure*.jar  # location of the compiled jar
 
    # Run as in one of these examples:
-   bin/spark-submit --jars path>/spark-measure_2.13-0.26-SNAPSHOT.jar
+   bin/spark-submit --jars path>/spark-measure_2.13-0.27-SNAPSHOT.jar
 
    # alternative, set classpath for the driver (it is only needed in the driver)
-   bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.26-SNAPSHOT.jar ...
+   bin/spark-submit --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.27-SNAPSHOT.jar ...
    ```
diff --git a/docs/Prometheus.md b/docs/Prometheus.md
@@ -35,7 +35,7 @@ https://prometheus.io/docs/instrumenting/exposition_formats/
 
 1. Measure metrics at the Stage level (example in Scala):
 ```
-bin/spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25 
+bin/spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26 
 
 val stageMetrics = ch.cern.sparkmeasure.StageMetrics(spark) 
 stageMetrics.begin()
diff --git a/docs/Python_shell_and_Jupyter.md b/docs/Python_shell_and_Jupyter.md
@@ -10,7 +10,7 @@ See also [README](../README.md) for an introduction to sparkMeasure and its arch
     ```
     pip install pyspark # Spark 4
     pip install sparkmeasure
-    bin/pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+    bin/pyspark --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
     ```
  - If you prefer to build from the latest development version:
     ```
@@ -24,10 +24,10 @@ See also [README](../README.md) for an introduction to sparkMeasure and its arch
     pip install .
 
     # Run as in one of these examples:
-    bin/pyspark --jars path>/spark-measure_2.13-0.26-SNAPSHOT.jar
+    bin/pyspark --jars path>/spark-measure_2.13-0.27-SNAPSHOT.jar
 
     #Alternative:
-    bin/pyspark --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.26-SNAPSHOT.jar
+    bin/pyspark --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.27-SNAPSHOT.jar
     ```
 
 
diff --git a/docs/Scala_shell_and_notebooks.md b/docs/Scala_shell_and_notebooks.md
@@ -8,10 +8,10 @@ See also [README](../README.md) for an introduction to sparkMeasure and its arch
 
 - The alternative, see paragraph above, is to build a jar from master.
     ```
-    bin/spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+    bin/spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
     // or just download and use the jar (it is only needed in the driver) as in:
-    bin/spark-shell --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.25.jar
+    bin/spark-shell --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.26.jar
    ```
 
 ### Download and build sparkMeasure (optional)
@@ -24,17 +24,17 @@ See also [README](../README.md) for an introduction to sparkMeasure and its arch
     ls -l target/scala-2.13/spark-measure*.jar  # location of the compiled jar
 
     # Run as in one of these examples:
-    bin/spark-shell --jars <path>/spark-measure_2.13-0.26-SNAPSHOT.jar
+    bin/spark-shell --jars <path>/spark-measure_2.13-0.27-SNAPSHOT.jar
 
     # Alternative, set classpath for the driver (the JAR is only needed in the driver)
-    bin/spark-shell --conf spark.driver.extraClassPath=<path>/spark-measure_2.11-0.24-SNAPSHOT.jar
+    bin/spark-shell --conf spark.driver.extraClassPath=<path>/spark-measure_2.13-0.27-SNAPSHOT.jar
     ```
 
 ### Example: collect and print stage metrics with sparkMeasure
 
 1. Measure metrics at the Stage level, a basic example:
     ```
-    bin/spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.25
+    bin/spark-shell --packages ch.cern.sparkmeasure:spark-measure_2.13:0.26
 
     val stageMetrics = ch.cern.sparkmeasure.StageMetrics(spark)
     stageMetrics.runAndMeasure(spark.sql("select count(*) from range(1000) cross join range(1000) cross join range(1000)").show)
diff --git a/examples/SparkMeasure_Jupyter_Colab_Example.ipynb b/examples/SparkMeasure_Jupyter_Colab_Example.ipynb
@@ -65,7 +65,7 @@
     "  .builder\n",
     "  .master(\"local[*]\")\n",
     "  .appName(\"Test sparkmeasure instrumentation of Python/PySpark code\")\n",
-    "  .config(\"spark.jars.packages\",\"ch.cern.sparkmeasure:spark-measure_2.12:0.25\")\n",
+    "  .config(\"spark.jars.packages\",\"ch.cern.sparkmeasure:spark-measure_2.12:0.26\")\n",
     "  .getOrCreate() )"
    ]
   },
diff --git a/examples/SparkMeasure_Jupyter_Python_getting_started.ipynb b/examples/SparkMeasure_Jupyter_Python_getting_started.ipynb
diff --git a/examples/testSparkMeasureScala/README.md b/examples/testSparkMeasureScala/README.md
diff --git a/examples/testSparkMeasureScala/build.sbt b/examples/testSparkMeasureScala/build.sbt
diff --git a/examples/testSparkMeasureScala/project/build.properties b/examples/testSparkMeasureScala/project/build.properties
diff --git a/examples/testSparkMeasureScala/src/main/scala/testSparkMeasure.scala b/examples/testSparkMeasureScala/src/main/scala/testSparkMeasure.scala
diff --git a/examples/test_sparkmeasure_python.py b/examples/test_sparkmeasure_python.py